@oh-my-pi/pi-ai 3.37.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -909,12 +909,48 @@ Several providers require OAuth authentication instead of static API keys:
909
909
  - **Anthropic** (Claude Pro/Max subscription)
910
910
  - **OpenAI Codex** (ChatGPT Plus/Pro subscription, access to GPT-5.x Codex models)
911
911
  - **GitHub Copilot** (Copilot subscription)
912
- - **Google Gemini CLI** (Free Gemini 2.0/2.5 via Google Cloud Code Assist)
912
+ - **Google Gemini CLI** (Gemini 2.0/2.5 via Google Cloud Code Assist; free tier or paid subscription)
913
913
  - **Antigravity** (Free Gemini 3, Claude, GPT-OSS via Google Cloud)
914
914
 
915
+ For paid Cloud Code Assist subscriptions, set `GOOGLE_CLOUD_PROJECT` or `GOOGLE_CLOUD_PROJECT_ID` to your project ID.
916
+
915
917
  ### Vertex AI (ADC)
916
918
 
917
- Vertex AI models use Application Default Credentials. Run `gcloud auth application-default login`, set `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`), and `GOOGLE_CLOUD_LOCATION`. You can also pass `project`/`location` in the call options.
919
+ Vertex AI models use Application Default Credentials (ADC):
920
+
921
+ - **Local development**: Run `gcloud auth application-default login`
922
+ - **CI/Production**: Set `GOOGLE_APPLICATION_CREDENTIALS` to point to a service account JSON key file
923
+
924
+ Also set `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) and `GOOGLE_CLOUD_LOCATION`. You can also pass `project`/`location` in the call options.
925
+
926
+ Example:
927
+
928
+ ```bash
929
+ # Local (uses your user credentials)
930
+ gcloud auth application-default login
931
+ export GOOGLE_CLOUD_PROJECT="my-project"
932
+ export GOOGLE_CLOUD_LOCATION="us-central1"
933
+
934
+ # CI/Production (service account key file)
935
+ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
936
+ ```
937
+
938
+ ```typescript
939
+ import { getModel, complete } from "@oh-my-pi/pi-ai";
940
+
941
+ (async () => {
942
+ const model = getModel("google-vertex", "gemini-2.5-flash");
943
+ const response = await complete(model, {
944
+ messages: [{ role: "user", content: "Hello from Vertex AI" }],
945
+ });
946
+
947
+ for (const block of response.content) {
948
+ if (block.type === "text") console.log(block.text);
949
+ }
950
+ })().catch(console.error);
951
+ ```
952
+
953
+ Official docs: [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials)
918
954
 
919
955
  ### CLI Login
920
956
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "3.37.1",
3
+ "version": "4.0.1",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
package/src/cli.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { existsSync, readFileSync, writeFileSync } from "fs";
3
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
4
4
  import { createInterface } from "readline";
5
5
  import { loginAnthropic } from "./utils/oauth/anthropic";
6
6
  import { loginGitHubCopilot } from "./utils/oauth/github-copilot";
@@ -1156,10 +1156,10 @@ export const MODELS = {
1156
1156
  reasoning: true,
1157
1157
  input: ["text", "image"],
1158
1158
  cost: {
1159
- input: 0,
1160
- output: 0,
1161
- cacheRead: 0,
1162
- cacheWrite: 0,
1159
+ input: 5,
1160
+ output: 25,
1161
+ cacheRead: 0.5,
1162
+ cacheWrite: 6.25,
1163
1163
  },
1164
1164
  contextWindow: 200000,
1165
1165
  maxTokens: 64000,
@@ -1173,10 +1173,10 @@ export const MODELS = {
1173
1173
  reasoning: false,
1174
1174
  input: ["text", "image"],
1175
1175
  cost: {
1176
- input: 0,
1177
- output: 0,
1178
- cacheRead: 0,
1179
- cacheWrite: 0,
1176
+ input: 3,
1177
+ output: 15,
1178
+ cacheRead: 0.3,
1179
+ cacheWrite: 3.75,
1180
1180
  },
1181
1181
  contextWindow: 200000,
1182
1182
  maxTokens: 64000,
@@ -1190,10 +1190,10 @@ export const MODELS = {
1190
1190
  reasoning: true,
1191
1191
  input: ["text", "image"],
1192
1192
  cost: {
1193
- input: 0,
1194
- output: 0,
1195
- cacheRead: 0,
1196
- cacheWrite: 0,
1193
+ input: 3,
1194
+ output: 15,
1195
+ cacheRead: 0.3,
1196
+ cacheWrite: 3.75,
1197
1197
  },
1198
1198
  contextWindow: 200000,
1199
1199
  maxTokens: 64000,
@@ -1207,9 +1207,9 @@ export const MODELS = {
1207
1207
  reasoning: true,
1208
1208
  input: ["text", "image"],
1209
1209
  cost: {
1210
- input: 0,
1211
- output: 0,
1212
- cacheRead: 0,
1210
+ input: 0.5,
1211
+ output: 3,
1212
+ cacheRead: 0.5,
1213
1213
  cacheWrite: 0,
1214
1214
  },
1215
1215
  contextWindow: 1048576,
@@ -1224,10 +1224,10 @@ export const MODELS = {
1224
1224
  reasoning: true,
1225
1225
  input: ["text", "image"],
1226
1226
  cost: {
1227
- input: 0,
1228
- output: 0,
1229
- cacheRead: 0,
1230
- cacheWrite: 0,
1227
+ input: 2,
1228
+ output: 12,
1229
+ cacheRead: 0.2,
1230
+ cacheWrite: 2.375,
1231
1231
  },
1232
1232
  contextWindow: 1048576,
1233
1233
  maxTokens: 65535,
@@ -1241,10 +1241,10 @@ export const MODELS = {
1241
1241
  reasoning: true,
1242
1242
  input: ["text", "image"],
1243
1243
  cost: {
1244
- input: 0,
1245
- output: 0,
1246
- cacheRead: 0,
1247
- cacheWrite: 0,
1244
+ input: 2,
1245
+ output: 12,
1246
+ cacheRead: 0.2,
1247
+ cacheWrite: 2.375,
1248
1248
  },
1249
1249
  contextWindow: 1048576,
1250
1250
  maxTokens: 65535,
@@ -1258,8 +1258,8 @@ export const MODELS = {
1258
1258
  reasoning: false,
1259
1259
  input: ["text"],
1260
1260
  cost: {
1261
- input: 0,
1262
- output: 0,
1261
+ input: 0.09,
1262
+ output: 0.36,
1263
1263
  cacheRead: 0,
1264
1264
  cacheWrite: 0,
1265
1265
  },
@@ -5192,17 +5192,17 @@ export const MODELS = {
5192
5192
  contextWindow: 262144,
5193
5193
  maxTokens: 65535,
5194
5194
  } satisfies Model<"openai-completions">,
5195
- "nex-agi/deepseek-v3.1-nex-n1:free": {
5196
- id: "nex-agi/deepseek-v3.1-nex-n1:free",
5197
- name: "Nex AGI: DeepSeek V3.1 Nex N1 (free)",
5195
+ "nex-agi/deepseek-v3.1-nex-n1": {
5196
+ id: "nex-agi/deepseek-v3.1-nex-n1",
5197
+ name: "Nex AGI: DeepSeek V3.1 Nex N1",
5198
5198
  api: "openai-completions",
5199
5199
  provider: "openrouter",
5200
5200
  baseUrl: "https://openrouter.ai/api/v1",
5201
5201
  reasoning: false,
5202
5202
  input: ["text"],
5203
5203
  cost: {
5204
- input: 0,
5205
- output: 0,
5204
+ input: 0.27,
5205
+ output: 1,
5206
5206
  cacheRead: 0,
5207
5207
  cacheWrite: 0,
5208
5208
  },
@@ -29,15 +29,32 @@ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
29
29
 
30
30
  import { transformMessages } from "./transorm-messages";
31
31
 
32
- // Stealth mode: Mimic Claude Code's tool naming exactly
33
- const claudeCodeVersion = "2.1.2";
34
-
35
- // Prefix all tool names to avoid collisions with Claude Code's built-in tools
36
- const toolNamePrefix = "cli_";
32
+ // Stealth mode: Mimic Claude Code headers while avoiding tool name collisions.
33
+ export const claudeCodeVersion = "1.0.83";
34
+ export const claudeToolPrefix = "proxy_";
35
+ export const claudeCodeSystemInstruction = "You are Claude Code, Anthropic's official CLI for Claude.";
36
+ export const claudeCodeHeaders = {
37
+ "anthropic-version": "2023-06-01",
38
+ "x-stainless-helper-method": "stream",
39
+ "x-stainless-retry-count": "0",
40
+ "x-stainless-runtime-version": "v24.3.0",
41
+ "x-stainless-package-version": "0.55.1",
42
+ "x-stainless-runtime": "node",
43
+ "x-stainless-lang": "js",
44
+ "x-stainless-arch": "arm64",
45
+ "x-stainless-os": "MacOS",
46
+ "x-stainless-timeout": "60",
47
+ } as const;
48
+
49
+ export const applyClaudeToolPrefix = (name: string) => {
50
+ if (!claudeToolPrefix || name.startsWith(claudeToolPrefix)) return name;
51
+ return `${claudeToolPrefix}${name}`;
52
+ };
37
53
 
38
- const toClaudeCodeName = (name: string) => toolNamePrefix + name;
39
- const fromClaudeCodeName = (name: string) =>
40
- name.startsWith(toolNamePrefix) ? name.slice(toolNamePrefix.length) : name;
54
+ export const stripClaudeToolPrefix = (name: string) => {
55
+ if (!claudeToolPrefix || !name.startsWith(claudeToolPrefix)) return name;
56
+ return name.slice(claudeToolPrefix.length);
57
+ };
41
58
 
42
59
  /**
43
60
  * Convert content blocks to Anthropic API format
@@ -96,6 +113,7 @@ export interface AnthropicOptions extends StreamOptions {
96
113
  thinkingBudgetTokens?: number;
97
114
  interleavedThinking?: boolean;
98
115
  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
116
+ betas?: string[] | string;
99
117
  }
100
118
 
101
119
  export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
@@ -126,7 +144,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
126
144
 
127
145
  try {
128
146
  const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
129
- const { client, isOAuthToken } = createClient(model, apiKey, options?.interleavedThinking ?? true);
147
+ const extraBetas = normalizeExtraBetas(options?.betas);
148
+ const { client, isOAuthToken } = createClient(model, apiKey, options?.interleavedThinking ?? true, extraBetas);
130
149
  const params = buildParams(model, context, isOAuthToken, options);
131
150
  const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
132
151
  stream.push({ type: "start", partial: output });
@@ -168,7 +187,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
168
187
  const block: Block = {
169
188
  type: "toolCall",
170
189
  id: event.content_block.id,
171
- name: isOAuthToken ? fromClaudeCodeName(event.content_block.name) : event.content_block.name,
190
+ name: isOAuthToken ? stripClaudeToolPrefix(event.content_block.name) : event.content_block.name,
172
191
  arguments: event.content_block.input as Record<string, any>,
173
192
  partialJson: "",
174
193
  index: event.index,
@@ -293,8 +312,14 @@ function isOAuthToken(apiKey: string): boolean {
293
312
  return apiKey.includes("sk-ant-oat");
294
313
  }
295
314
 
315
+ export function normalizeExtraBetas(betas?: string[] | string): string[] {
316
+ if (!betas) return [];
317
+ const raw = Array.isArray(betas) ? betas : betas.split(",");
318
+ return raw.map((beta) => beta.trim()).filter((beta) => beta.length > 0);
319
+ }
320
+
296
321
  // Build deduplicated beta header string
297
- function buildBetaHeader(baseBetas: string[], extraBetas: string[]): string {
322
+ export function buildBetaHeader(baseBetas: string[], extraBetas: string[]): string {
298
323
  const seen = new Set<string>();
299
324
  const result: string[] = [];
300
325
  for (const beta of [...baseBetas, ...extraBetas]) {
@@ -311,32 +336,33 @@ function createClient(
311
336
  model: Model<"anthropic-messages">,
312
337
  apiKey: string,
313
338
  interleavedThinking: boolean,
339
+ extraBetas: string[],
314
340
  ): { client: Anthropic; isOAuthToken: boolean } {
315
341
  const oauthToken = isOAuthToken(apiKey);
316
342
 
317
343
  // Base betas required for Claude Code compatibility
318
344
  const baseBetas = oauthToken
319
- ? [
320
- "claude-code-20250219",
321
- "oauth-2025-04-20",
322
- "interleaved-thinking-2025-05-14",
323
- "fine-grained-tool-streaming-2025-05-14",
324
- ]
345
+ ? ["claude-code-20250219", "oauth-2025-04-20", "fine-grained-tool-streaming-2025-05-14"]
325
346
  : ["fine-grained-tool-streaming-2025-05-14"];
326
347
 
327
- // Add interleaved thinking if requested (and not already in base)
328
- const extraBetas: string[] = [];
329
- if (interleavedThinking && !oauthToken) {
330
- extraBetas.push("interleaved-thinking-2025-05-14");
348
+ // Add interleaved thinking if requested
349
+ const mergedBetas: string[] = [];
350
+ if (interleavedThinking) {
351
+ mergedBetas.push("interleaved-thinking-2025-05-14");
331
352
  }
332
353
 
333
354
  // Include any betas from model headers
334
355
  const modelBeta = model.headers?.["anthropic-beta"];
335
356
  if (modelBeta) {
336
- extraBetas.push(...modelBeta.split(","));
357
+ mergedBetas.push(...normalizeExtraBetas(modelBeta));
337
358
  }
338
359
 
339
- const betaHeader = buildBetaHeader(baseBetas, extraBetas);
360
+ // Include any betas passed via options
361
+ if (extraBetas.length > 0) {
362
+ mergedBetas.push(...extraBetas);
363
+ }
364
+
365
+ const betaHeader = buildBetaHeader(baseBetas, mergedBetas);
340
366
 
341
367
  if (oauthToken) {
342
368
  // Stealth mode: Mimic Claude Code's headers exactly
@@ -346,6 +372,7 @@ function createClient(
346
372
  "anthropic-beta": betaHeader,
347
373
  "user-agent": `claude-cli/${claudeCodeVersion} (external, cli)`,
348
374
  "x-app": "cli",
375
+ ...claudeCodeHeaders,
349
376
  ...(model.headers || {}),
350
377
  };
351
378
  // Don't duplicate anthropic-beta from model.headers
@@ -367,6 +394,9 @@ function createClient(
367
394
  accept: "application/json",
368
395
  "anthropic-dangerous-direct-browser-access": "true",
369
396
  "anthropic-beta": betaHeader,
397
+ "user-agent": `claude-cli/${claudeCodeVersion} (external, cli)`,
398
+ "x-app": "cli",
399
+ ...claudeCodeHeaders,
370
400
  ...(model.headers || {}),
371
401
  };
372
402
  // Ensure our beta header takes precedence
@@ -382,6 +412,80 @@ function createClient(
382
412
  return { client, isOAuthToken: false };
383
413
  }
384
414
 
415
+ export type AnthropicSystemBlock = {
416
+ type: "text";
417
+ text: string;
418
+ cache_control?: { type: "ephemeral" };
419
+ };
420
+
421
+ type SystemBlockOptions = {
422
+ includeClaudeCodeInstruction?: boolean;
423
+ includeCacheControl?: boolean;
424
+ extraInstructions?: string[];
425
+ };
426
+
427
+ export function buildAnthropicSystemBlocks(
428
+ systemPrompt: string | undefined,
429
+ options: SystemBlockOptions = {},
430
+ ): AnthropicSystemBlock[] | undefined {
431
+ const { includeClaudeCodeInstruction = false, includeCacheControl = true, extraInstructions = [] } = options;
432
+ const blocks: AnthropicSystemBlock[] = [];
433
+ const sanitizedPrompt = systemPrompt ? sanitizeSurrogates(systemPrompt) : "";
434
+ const hasClaudeCodeInstruction = sanitizedPrompt.includes(claudeCodeSystemInstruction);
435
+ const cacheControl = includeCacheControl ? { type: "ephemeral" as const } : undefined;
436
+
437
+ if (includeClaudeCodeInstruction && !hasClaudeCodeInstruction) {
438
+ blocks.push({
439
+ type: "text",
440
+ text: claudeCodeSystemInstruction,
441
+ ...(cacheControl ? { cache_control: cacheControl } : {}),
442
+ });
443
+ }
444
+
445
+ for (const instruction of extraInstructions) {
446
+ const trimmed = instruction.trim();
447
+ if (!trimmed) continue;
448
+ blocks.push({
449
+ type: "text",
450
+ text: trimmed,
451
+ ...(cacheControl ? { cache_control: cacheControl } : {}),
452
+ });
453
+ }
454
+
455
+ if (systemPrompt) {
456
+ blocks.push({
457
+ type: "text",
458
+ text: sanitizedPrompt,
459
+ ...(cacheControl ? { cache_control: cacheControl } : {}),
460
+ });
461
+ }
462
+
463
+ return blocks.length > 0 ? blocks : undefined;
464
+ }
465
+
466
+ function disableThinkingIfToolChoiceForced(params: MessageCreateParamsStreaming): void {
467
+ const toolChoice = params.tool_choice;
468
+ if (!toolChoice) return;
469
+ if (toolChoice.type === "any" || toolChoice.type === "tool") {
470
+ delete params.thinking;
471
+ }
472
+ }
473
+
474
+ function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, model: Model<"anthropic-messages">): void {
475
+ const thinking = params.thinking;
476
+ if (!thinking || thinking.type !== "enabled") return;
477
+
478
+ const budgetTokens = thinking.budget_tokens ?? 0;
479
+ if (budgetTokens <= 0) return;
480
+
481
+ const maxTokens = params.max_tokens ?? 0;
482
+ const fallbackBuffer = 4000;
483
+ const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + fallbackBuffer;
484
+ if (maxTokens < requiredMaxTokens) {
485
+ params.max_tokens = requiredMaxTokens;
486
+ }
487
+ }
488
+
385
489
  function buildParams(
386
490
  model: Model<"anthropic-messages">,
387
491
  context: Context,
@@ -395,37 +499,13 @@ function buildParams(
395
499
  stream: true,
396
500
  };
397
501
 
398
- // For OAuth tokens, we MUST include Claude Code identity
399
- if (isOAuthToken) {
400
- params.system = [
401
- {
402
- type: "text",
403
- text: "You are Claude Code, Anthropic's official CLI for Claude.",
404
- cache_control: {
405
- type: "ephemeral",
406
- },
407
- },
408
- ];
409
- if (context.systemPrompt) {
410
- params.system.push({
411
- type: "text",
412
- text: sanitizeSurrogates(context.systemPrompt),
413
- cache_control: {
414
- type: "ephemeral",
415
- },
416
- });
417
- }
418
- } else if (context.systemPrompt) {
419
- // Add cache control to system prompt for non-OAuth tokens
420
- params.system = [
421
- {
422
- type: "text",
423
- text: sanitizeSurrogates(context.systemPrompt),
424
- cache_control: {
425
- type: "ephemeral",
426
- },
427
- },
428
- ];
502
+ const includeClaudeCodeSystem = !model.id.startsWith("claude-3-5-haiku");
503
+ const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
504
+ includeClaudeCodeInstruction: includeClaudeCodeSystem,
505
+ includeCacheControl: true,
506
+ });
507
+ if (systemBlocks) {
508
+ params.system = systemBlocks;
429
509
  }
430
510
 
431
511
  if (options?.temperature !== undefined) {
@@ -448,12 +528,15 @@ function buildParams(
448
528
  params.tool_choice = { type: options.toolChoice };
449
529
  } else if (isOAuthToken && options.toolChoice.name) {
450
530
  // Prefix tool name in tool_choice for OAuth mode
451
- params.tool_choice = { ...options.toolChoice, name: toClaudeCodeName(options.toolChoice.name) };
531
+ params.tool_choice = { ...options.toolChoice, name: applyClaudeToolPrefix(options.toolChoice.name) };
452
532
  } else {
453
533
  params.tool_choice = options.toolChoice;
454
534
  }
455
535
  }
456
536
 
537
+ disableThinkingIfToolChoiceForced(params);
538
+ ensureMaxTokensForThinking(params, model);
539
+
457
540
  return params;
458
541
  }
459
542
 
@@ -546,7 +629,7 @@ function convertMessages(
546
629
  blocks.push({
547
630
  type: "tool_use",
548
631
  id: sanitizeToolCallId(block.id),
549
- name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
632
+ name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
550
633
  input: block.arguments,
551
634
  });
552
635
  }
@@ -619,7 +702,7 @@ function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.
619
702
  const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
620
703
 
621
704
  return {
622
- name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
705
+ name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
623
706
  description: tool.description,
624
707
  input_schema: {
625
708
  type: "object" as const,
@@ -308,6 +308,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
308
308
  for (const block of output.content) delete (block as any).index;
309
309
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
310
310
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
311
+ // Some providers via OpenRouter include extra details here.
312
+ const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
313
+ if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
311
314
  stream.push({ type: "error", reason: output.stopReason, error: output });
312
315
  stream.end();
313
316
  }
@@ -368,9 +371,12 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
368
371
  model: model.id,
369
372
  messages,
370
373
  stream: true,
371
- stream_options: { include_usage: true },
372
374
  };
373
375
 
376
+ if (compat.supportsUsageInStreaming !== false) {
377
+ (params as { stream_options?: { include_usage: boolean } }).stream_options = { include_usage: true };
378
+ }
379
+
374
380
  if (compat.supportsStore) {
375
381
  params.store = false;
376
382
  }
@@ -610,6 +616,7 @@ function convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool
610
616
  name: tool.name,
611
617
  description: tool.description,
612
618
  parameters: tool.parameters as any, // TypeBox already generates JSON Schema
619
+ strict: false, // Disable strict mode to allow optional parameters without null unions
613
620
  },
614
621
  }));
615
622
  }
@@ -654,6 +661,7 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
654
661
  supportsStore: !isNonStandard,
655
662
  supportsDeveloperRole: !isNonStandard,
656
663
  supportsReasoningEffort: !isGrok,
664
+ supportsUsageInStreaming: true,
657
665
  maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
658
666
  requiresToolResultName: isMistral,
659
667
  requiresAssistantAfterToolResult: false, // Mistral no longer requires this as of Dec 2024
@@ -674,6 +682,7 @@ function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
674
682
  supportsStore: model.compat.supportsStore ?? detected.supportsStore,
675
683
  supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
676
684
  supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
685
+ supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
677
686
  maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
678
687
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
679
688
  requiresAssistantAfterToolResult:
@@ -544,7 +544,7 @@ function convertTools(tools: Tool[]): OpenAITool[] {
544
544
  name: tool.name,
545
545
  description: tool.description,
546
546
  parameters: tool.parameters as any, // TypeBox already generates JSON Schema
547
- strict: null,
547
+ strict: false,
548
548
  }));
549
549
  }
550
550
 
package/src/stream.ts CHANGED
@@ -26,13 +26,18 @@ import type {
26
26
  ThinkingLevel,
27
27
  } from "./types";
28
28
 
29
- const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
30
-
31
29
  let cachedVertexAdcCredentialsExists: boolean | null = null;
32
30
 
33
31
  function hasVertexAdcCredentials(): boolean {
34
32
  if (cachedVertexAdcCredentialsExists === null) {
35
- cachedVertexAdcCredentialsExists = existsSync(VERTEX_ADC_CREDENTIALS_PATH);
33
+ const gacPath = process.env.GOOGLE_APPLICATION_CREDENTIALS;
34
+ if (gacPath) {
35
+ cachedVertexAdcCredentialsExists = existsSync(gacPath);
36
+ } else {
37
+ cachedVertexAdcCredentialsExists = existsSync(
38
+ join(homedir(), ".config", "gcloud", "application_default_credentials.json"),
39
+ );
40
+ }
36
41
  }
37
42
  return cachedVertexAdcCredentialsExists;
38
43
  }
package/src/types.ts CHANGED
@@ -208,6 +208,8 @@ export interface OpenAICompat {
208
208
  supportsDeveloperRole?: boolean;
209
209
  /** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
210
210
  supportsReasoningEffort?: boolean;
211
+ /** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
212
+ supportsUsageInStreaming?: boolean;
211
213
  /** Which field to use for max tokens. Default: auto-detected from URL. */
212
214
  maxTokensField?: "max_completion_tokens" | "max_tokens";
213
215
  /** Whether tool results require the `name` field. Default: auto-detected from URL. */
@@ -12,6 +12,30 @@ const TOKEN_URL = "https://console.anthropic.com/v1/oauth/token";
12
12
  const REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback";
13
13
  const SCOPES = "org:create_api_key user:profile user:inference";
14
14
 
15
+ function parseAuthCode(input: string): { code: string; state?: string } {
16
+ const trimmed = input.trim();
17
+ if (!trimmed) return { code: "" };
18
+
19
+ try {
20
+ const url = new URL(trimmed);
21
+ const code = url.searchParams.get("code") ?? "";
22
+ const state = url.searchParams.get("state") ?? undefined;
23
+ if (code) return { code, state };
24
+ } catch {
25
+ // Ignore invalid URL parsing and fall back to manual parsing.
26
+ }
27
+
28
+ if (trimmed.includes("code=")) {
29
+ const params = new URLSearchParams(trimmed.replace(/^[?#]/, ""));
30
+ const code = params.get("code") ?? "";
31
+ const state = params.get("state") ?? undefined;
32
+ if (code) return { code, state };
33
+ }
34
+
35
+ const [code, state] = trimmed.split("#");
36
+ return { code, state };
37
+ }
38
+
15
39
  /**
16
40
  * Login with Anthropic OAuth (device code flow)
17
41
  *
@@ -43,9 +67,7 @@ export async function loginAnthropic(
43
67
 
44
68
  // Wait for user to paste authorization code (format: code#state)
45
69
  const authCode = await onPromptCode();
46
- const splits = authCode.split("#");
47
- const code = splits[0];
48
- const state = splits[1];
70
+ const { code, state } = parseAuthCode(authCode);
49
71
 
50
72
  // Exchange code for tokens
51
73
  const tokenResponse = await fetch(TOKEN_URL, {
@@ -56,8 +78,8 @@ export async function loginAnthropic(
56
78
  body: JSON.stringify({
57
79
  grant_type: "authorization_code",
58
80
  client_id: CLIENT_ID,
59
- code: code,
60
- state: state,
81
+ code,
82
+ ...(state ? { state } : {}),
61
83
  redirect_uri: REDIRECT_URI,
62
84
  code_verifier: verifier,
63
85
  }),
@@ -111,7 +133,7 @@ export async function refreshAnthropicToken(refreshToken: string): Promise<OAuth
111
133
  };
112
134
 
113
135
  return {
114
- refresh: data.refresh_token,
136
+ refresh: data.refresh_token || refreshToken,
115
137
  access: data.access_token,
116
138
  expires: Date.now() + data.expires_in * 1000 - 5 * 60 * 1000,
117
139
  };
@@ -122,13 +122,28 @@ interface LoadCodeAssistPayload {
122
122
  allowedTiers?: Array<{ id?: string; isDefault?: boolean }>;
123
123
  }
124
124
 
125
- interface OnboardUserPayload {
125
+ /**
126
+ * Long-running operation response from onboardUser
127
+ */
128
+ interface LongRunningOperationResponse {
129
+ name?: string;
126
130
  done?: boolean;
127
131
  response?: {
128
132
  cloudaicompanionProject?: { id?: string };
129
133
  };
130
134
  }
131
135
 
136
+ // Tier IDs as used by the Cloud Code API
137
+ const TIER_FREE = "free-tier";
138
+ const TIER_LEGACY = "legacy-tier";
139
+ const TIER_STANDARD = "standard-tier";
140
+
141
+ interface GoogleRpcErrorResponse {
142
+ error?: {
143
+ details?: Array<{ reason?: string }>;
144
+ };
145
+ }
146
+
132
147
  /**
133
148
  * Wait helper for onboarding retries
134
149
  */
@@ -137,18 +152,62 @@ function wait(ms: number): Promise<void> {
137
152
  }
138
153
 
139
154
  /**
140
- * Get default tier ID from allowed tiers
155
+ * Get default tier from allowed tiers
141
156
  */
142
- function getDefaultTierId(allowedTiers?: Array<{ id?: string; isDefault?: boolean }>): string | undefined {
143
- if (!allowedTiers || allowedTiers.length === 0) return undefined;
157
+ function getDefaultTier(allowedTiers?: Array<{ id?: string; isDefault?: boolean }>): { id?: string } {
158
+ if (!allowedTiers || allowedTiers.length === 0) return { id: TIER_LEGACY };
144
159
  const defaultTier = allowedTiers.find((t) => t.isDefault);
145
- return defaultTier?.id ?? allowedTiers[0]?.id;
160
+ return defaultTier ?? { id: TIER_LEGACY };
161
+ }
162
+
163
+ function isVpcScAffectedUser(payload: unknown): boolean {
164
+ if (!payload || typeof payload !== "object") return false;
165
+ if (!("error" in payload)) return false;
166
+ const error = (payload as GoogleRpcErrorResponse).error;
167
+ if (!error?.details || !Array.isArray(error.details)) return false;
168
+ return error.details.some((detail) => detail.reason === "SECURITY_POLICY_VIOLATED");
169
+ }
170
+
171
+ /**
172
+ * Poll a long-running operation until completion
173
+ */
174
+ async function pollOperation(
175
+ operationName: string,
176
+ headers: Record<string, string>,
177
+ onProgress?: (message: string) => void,
178
+ ): Promise<LongRunningOperationResponse> {
179
+ let attempt = 0;
180
+ while (true) {
181
+ if (attempt > 0) {
182
+ onProgress?.(`Waiting for project provisioning (attempt ${attempt + 1})...`);
183
+ await wait(5000);
184
+ }
185
+
186
+ const response = await fetch(`${CODE_ASSIST_ENDPOINT}/v1internal/${operationName}`, {
187
+ method: "GET",
188
+ headers,
189
+ });
190
+
191
+ if (!response.ok) {
192
+ throw new Error(`Failed to poll operation: ${response.status} ${response.statusText}`);
193
+ }
194
+
195
+ const data = (await response.json()) as LongRunningOperationResponse;
196
+ if (data.done) {
197
+ return data;
198
+ }
199
+
200
+ attempt += 1;
201
+ }
146
202
  }
147
203
 
148
204
  /**
149
205
  * Discover or provision a Google Cloud project for the user
150
206
  */
151
207
  async function discoverProject(accessToken: string, onProgress?: (message: string) => void): Promise<string> {
208
+ // Check for user-provided project ID via environment variable
209
+ const envProjectId = process.env.GOOGLE_CLOUD_PROJECT || process.env.GOOGLE_CLOUD_PROJECT_ID;
210
+
152
211
  const headers = {
153
212
  Authorization: `Bearer ${accessToken}`,
154
213
  "Content-Type": "application/json",
@@ -162,62 +221,114 @@ async function discoverProject(accessToken: string, onProgress?: (message: strin
162
221
  method: "POST",
163
222
  headers,
164
223
  body: JSON.stringify({
224
+ cloudaicompanionProject: envProjectId,
165
225
  metadata: {
166
226
  ideType: "IDE_UNSPECIFIED",
167
227
  platform: "PLATFORM_UNSPECIFIED",
168
228
  pluginType: "GEMINI",
229
+ duetProject: envProjectId,
169
230
  },
170
231
  }),
171
232
  });
172
233
 
173
- if (loadResponse.ok) {
174
- const data = (await loadResponse.json()) as LoadCodeAssistPayload;
234
+ let data: LoadCodeAssistPayload;
175
235
 
176
- // If we have an existing project, use it
236
+ if (!loadResponse.ok) {
237
+ let errorPayload: unknown;
238
+ try {
239
+ errorPayload = await loadResponse.clone().json();
240
+ } catch {
241
+ errorPayload = undefined;
242
+ }
243
+
244
+ if (isVpcScAffectedUser(errorPayload)) {
245
+ data = { currentTier: { id: TIER_STANDARD } };
246
+ } else {
247
+ const errorText = await loadResponse.text();
248
+ throw new Error(`loadCodeAssist failed: ${loadResponse.status} ${loadResponse.statusText}: ${errorText}`);
249
+ }
250
+ } else {
251
+ data = (await loadResponse.json()) as LoadCodeAssistPayload;
252
+ }
253
+
254
+ // If user already has a current tier and project, use it
255
+ if (data.currentTier) {
177
256
  if (data.cloudaicompanionProject) {
178
257
  return data.cloudaicompanionProject;
179
258
  }
259
+ // User has a tier but no managed project - they need to provide one via env var
260
+ if (envProjectId) {
261
+ return envProjectId;
262
+ }
263
+ throw new Error(
264
+ "This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID environment variable. " +
265
+ "See https://goo.gle/gemini-cli-auth-docs#workspace-gca",
266
+ );
267
+ }
180
268
 
181
- // Otherwise, try to onboard with the FREE tier
182
- const tierId = getDefaultTierId(data.allowedTiers) ?? "FREE";
183
-
184
- onProgress?.("Provisioning Cloud Code Assist project (this may take a moment)...");
185
-
186
- // Onboard with retries (the API may take time to provision)
187
- for (let attempt = 0; attempt < 10; attempt++) {
188
- const onboardResponse = await fetch(`${CODE_ASSIST_ENDPOINT}/v1internal:onboardUser`, {
189
- method: "POST",
190
- headers,
191
- body: JSON.stringify({
192
- tierId,
193
- metadata: {
194
- ideType: "IDE_UNSPECIFIED",
195
- platform: "PLATFORM_UNSPECIFIED",
196
- pluginType: "GEMINI",
197
- },
198
- }),
199
- });
269
+ // User needs to be onboarded - get the default tier
270
+ const tier = getDefaultTier(data.allowedTiers);
271
+ const tierId = tier?.id ?? TIER_FREE;
200
272
 
201
- if (onboardResponse.ok) {
202
- const onboardData = (await onboardResponse.json()) as OnboardUserPayload;
203
- const projectId = onboardData.response?.cloudaicompanionProject?.id;
273
+ if (tierId !== TIER_FREE && !envProjectId) {
274
+ throw new Error(
275
+ "This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID environment variable. " +
276
+ "See https://goo.gle/gemini-cli-auth-docs#workspace-gca",
277
+ );
278
+ }
204
279
 
205
- if (onboardData.done && projectId) {
206
- return projectId;
207
- }
208
- }
280
+ onProgress?.("Provisioning Cloud Code Assist project (this may take a moment)...");
281
+
282
+ // Build onboard request - for free tier, don't include project ID (Google provisions one)
283
+ // For other tiers, include the user's project ID if available
284
+ const onboardBody: Record<string, unknown> = {
285
+ tierId,
286
+ metadata: {
287
+ ideType: "IDE_UNSPECIFIED",
288
+ platform: "PLATFORM_UNSPECIFIED",
289
+ pluginType: "GEMINI",
290
+ },
291
+ };
209
292
 
210
- // Wait before retrying
211
- if (attempt < 9) {
212
- onProgress?.(`Waiting for project provisioning (attempt ${attempt + 2}/10)...`);
213
- await wait(3000);
214
- }
215
- }
293
+ if (tierId !== TIER_FREE && envProjectId) {
294
+ onboardBody.cloudaicompanionProject = envProjectId;
295
+ (onboardBody.metadata as Record<string, unknown>).duetProject = envProjectId;
296
+ }
297
+
298
+ // Start onboarding - this returns a long-running operation
299
+ const onboardResponse = await fetch(`${CODE_ASSIST_ENDPOINT}/v1internal:onboardUser`, {
300
+ method: "POST",
301
+ headers,
302
+ body: JSON.stringify(onboardBody),
303
+ });
304
+
305
+ if (!onboardResponse.ok) {
306
+ const errorText = await onboardResponse.text();
307
+ throw new Error(`onboardUser failed: ${onboardResponse.status} ${onboardResponse.statusText}: ${errorText}`);
308
+ }
309
+
310
+ let lroData = (await onboardResponse.json()) as LongRunningOperationResponse;
311
+
312
+ // If the operation isn't done yet, poll until completion
313
+ if (!lroData.done && lroData.name) {
314
+ lroData = await pollOperation(lroData.name, headers, onProgress);
315
+ }
316
+
317
+ // Try to get project ID from the response
318
+ const projectId = lroData.response?.cloudaicompanionProject?.id;
319
+ if (projectId) {
320
+ return projectId;
321
+ }
322
+
323
+ // If no project ID from onboarding, fall back to env var
324
+ if (envProjectId) {
325
+ return envProjectId;
216
326
  }
217
327
 
218
328
  throw new Error(
219
329
  "Could not discover or provision a Google Cloud project. " +
220
- "Please ensure you have access to Google Cloud Code Assist (Gemini CLI).",
330
+ "Try setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID environment variable. " +
331
+ "See https://goo.gle/gemini-cli-auth-docs#workspace-gca",
221
332
  );
222
333
  }
223
334