@oh-my-pi/pi-ai 3.20.0 → 3.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import type {
6
6
  ResponseInputText,
7
7
  ResponseOutputMessage,
8
8
  ResponseReasoningItem,
9
- } from "openai/resources/responses/responses.js";
9
+ } from "openai/resources/responses/responses";
10
10
  import { calculateCost } from "../models";
11
11
  import { getEnvApiKey } from "../stream";
12
12
  import type {
@@ -33,12 +33,9 @@ import {
33
33
  URL_PATHS,
34
34
  } from "./openai-codex/constants";
35
35
  import { getCodexInstructions } from "./openai-codex/prompts/codex";
36
- import {
37
- type CodexRequestOptions,
38
- normalizeModel,
39
- type RequestBody,
40
- transformRequestBody,
41
- } from "./openai-codex/request-transformer";
36
+ import { buildCodexPiBridge } from "./openai-codex/prompts/pi-codex-bridge";
37
+ import { buildCodexSystemPrompt } from "./openai-codex/prompts/system-prompt";
38
+ import { type CodexRequestOptions, type RequestBody, transformRequestBody } from "./openai-codex/request-transformer";
42
39
  import { parseCodexError, parseCodexSseStream } from "./openai-codex/response-handler";
43
40
  import { transformMessages } from "./transorm-messages";
44
41
 
@@ -94,6 +91,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
94
91
  model: model.id,
95
92
  input: messages,
96
93
  stream: true,
94
+ prompt_cache_key: options?.sessionId,
97
95
  };
98
96
 
99
97
  if (options?.maxTokens) {
@@ -108,8 +106,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
108
106
  params.tools = convertTools(context.tools);
109
107
  }
110
108
 
111
- const normalizedModel = normalizeModel(params.model);
112
- const codexInstructions = await getCodexInstructions(normalizedModel);
109
+ const codexInstructions = await getCodexInstructions(params.model);
110
+ const bridgeText = buildCodexPiBridge(context.tools);
111
+ const systemPrompt = buildCodexSystemPrompt({
112
+ codexInstructions,
113
+ bridgeText,
114
+ userSystemPrompt: context.systemPrompt,
115
+ });
116
+
117
+ params.instructions = systemPrompt.instructions;
113
118
 
114
119
  const codexOptions: CodexRequestOptions = {
115
120
  reasoningEffort: options?.reasoningEffort,
@@ -118,17 +123,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
118
123
  include: options?.include,
119
124
  };
120
125
 
121
- const transformedBody = await transformRequestBody(
122
- params,
123
- codexInstructions,
124
- codexOptions,
125
- options?.codexMode ?? true,
126
- );
126
+ const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);
127
127
 
128
- const headers = createCodexHeaders(model.headers, accountId, apiKey, transformedBody.prompt_cache_key);
128
+ const reasoningEffort = transformedBody.reasoning?.effort ?? null;
129
+ const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
129
130
  logCodexDebug("codex request", {
130
131
  url,
131
132
  model: params.model,
133
+ reasoningEffort,
132
134
  headers: redactHeaders(headers),
133
135
  });
134
136
 
@@ -340,10 +342,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
340
342
  }
341
343
  } else if (eventType === "error") {
342
344
  const code = (rawEvent as { code?: string }).code || "";
343
- const message = (rawEvent as { message?: string }).message || "Unknown error";
344
- throw new Error(code ? `Error Code ${code}: ${message}` : message);
345
+ const message = (rawEvent as { message?: string }).message || "";
346
+ throw new Error(formatCodexErrorEvent(rawEvent, code, message));
345
347
  } else if (eventType === "response.failed") {
346
- throw new Error("Unknown error");
348
+ throw new Error(formatCodexFailure(rawEvent) ?? "Codex response failed");
347
349
  }
348
350
  }
349
351
 
@@ -352,7 +354,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
352
354
  }
353
355
 
354
356
  if (output.stopReason === "aborted" || output.stopReason === "error") {
355
- throw new Error("An unknown error occurred");
357
+ throw new Error("Codex response failed");
356
358
  }
357
359
 
358
360
  stream.push({ type: "done", reason: output.stopReason, message: output });
@@ -406,11 +408,11 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
406
408
 
407
409
  function redactHeaders(headers: Headers): Record<string, string> {
408
410
  const redacted: Record<string, string> = {};
409
- headers.forEach((value, key) => {
411
+ for (const [key, value] of headers.entries()) {
410
412
  const lower = key.toLowerCase();
411
413
  if (lower === "authorization") {
412
414
  redacted[key] = "Bearer [redacted]";
413
- return;
415
+ continue;
414
416
  }
415
417
  if (
416
418
  lower.includes("account") ||
@@ -419,10 +421,10 @@ function redactHeaders(headers: Headers): Record<string, string> {
419
421
  lower === "cookie"
420
422
  ) {
421
423
  redacted[key] = "[redacted]";
422
- return;
424
+ continue;
423
425
  }
424
426
  redacted[key] = value;
425
- });
427
+ }
426
428
  return redacted;
427
429
  }
428
430
 
@@ -617,3 +619,68 @@ function mapStopReason(status: string | undefined): StopReason {
617
619
  return "stop";
618
620
  }
619
621
  }
622
+
623
+ function asRecord(value: unknown): Record<string, unknown> | null {
624
+ if (value && typeof value === "object") {
625
+ return value as Record<string, unknown>;
626
+ }
627
+ return null;
628
+ }
629
+
630
+ function getString(value: unknown): string | undefined {
631
+ return typeof value === "string" ? value : undefined;
632
+ }
633
+
634
+ function truncate(text: string, limit: number): string {
635
+ if (text.length <= limit) return text;
636
+ return `${text.slice(0, limit)}...[truncated ${text.length - limit}]`;
637
+ }
638
+
639
+ function formatCodexFailure(rawEvent: Record<string, unknown>): string | null {
640
+ const response = asRecord(rawEvent.response);
641
+ const error = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
642
+
643
+ const message = getString(error?.message) ?? getString(rawEvent.message) ?? getString(response?.message);
644
+ const code = getString(error?.code) ?? getString(error?.type) ?? getString(rawEvent.code);
645
+ const status = getString(response?.status) ?? getString(rawEvent.status);
646
+
647
+ const meta: string[] = [];
648
+ if (code) meta.push(`code=${code}`);
649
+ if (status) meta.push(`status=${status}`);
650
+
651
+ if (message) {
652
+ const metaText = meta.length ? ` (${meta.join(", ")})` : "";
653
+ return `Codex response failed: ${message}${metaText}`;
654
+ }
655
+
656
+ if (meta.length) {
657
+ return `Codex response failed (${meta.join(", ")})`;
658
+ }
659
+
660
+ try {
661
+ return `Codex response failed: ${truncate(JSON.stringify(rawEvent), 800)}`;
662
+ } catch {
663
+ return "Codex response failed";
664
+ }
665
+ }
666
+
667
+ function formatCodexErrorEvent(rawEvent: Record<string, unknown>, code: string, message: string): string {
668
+ const detail = formatCodexFailure(rawEvent);
669
+ if (detail) {
670
+ return detail.replace("response failed", "error event");
671
+ }
672
+
673
+ const meta: string[] = [];
674
+ if (code) meta.push(`code=${code}`);
675
+ if (message) meta.push(`message=${message}`);
676
+
677
+ if (meta.length > 0) {
678
+ return `Codex error event (${meta.join(", ")})`;
679
+ }
680
+
681
+ try {
682
+ return `Codex error event: ${truncate(JSON.stringify(rawEvent), 800)}`;
683
+ } catch {
684
+ return "Codex error event";
685
+ }
686
+ }
@@ -7,7 +7,7 @@ import type {
7
7
  ChatCompletionContentPartText,
8
8
  ChatCompletionMessageParam,
9
9
  ChatCompletionToolMessageParam,
10
- } from "openai/resources/chat/completions.js";
10
+ } from "openai/resources/chat/completions";
11
11
  import { calculateCost } from "../models";
12
12
  import { getEnvApiKey } from "../stream";
13
13
  import type {
@@ -196,34 +196,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
196
196
 
197
197
  // Some endpoints return reasoning in reasoning_content (llama.cpp),
198
198
  // or reasoning (other openai compatible endpoints)
199
+ // Use the first non-empty reasoning field to avoid duplication
200
+ // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
199
201
  const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
202
+ let foundReasoningField: string | null = null;
200
203
  for (const field of reasoningFields) {
201
204
  if (
202
205
  (choice.delta as any)[field] !== null &&
203
206
  (choice.delta as any)[field] !== undefined &&
204
207
  (choice.delta as any)[field].length > 0
205
208
  ) {
206
- if (!currentBlock || currentBlock.type !== "thinking") {
207
- finishCurrentBlock(currentBlock);
208
- currentBlock = {
209
- type: "thinking",
210
- thinking: "",
211
- thinkingSignature: field,
212
- };
213
- output.content.push(currentBlock);
214
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
209
+ if (!foundReasoningField) {
210
+ foundReasoningField = field;
211
+ break;
215
212
  }
213
+ }
214
+ }
216
215
 
217
- if (currentBlock.type === "thinking") {
218
- const delta = (choice.delta as any)[field];
219
- currentBlock.thinking += delta;
220
- stream.push({
221
- type: "thinking_delta",
222
- contentIndex: blockIndex(),
223
- delta,
224
- partial: output,
225
- });
226
- }
216
+ if (foundReasoningField) {
217
+ if (!currentBlock || currentBlock.type !== "thinking") {
218
+ finishCurrentBlock(currentBlock);
219
+ currentBlock = {
220
+ type: "thinking",
221
+ thinking: "",
222
+ thinkingSignature: foundReasoningField,
223
+ };
224
+ output.content.push(currentBlock);
225
+ stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
226
+ }
227
+
228
+ if (currentBlock.type === "thinking") {
229
+ const delta = (choice.delta as any)[foundReasoningField];
230
+ currentBlock.thinking += delta;
231
+ stream.push({
232
+ type: "thinking_delta",
233
+ contentIndex: blockIndex(),
234
+ delta,
235
+ partial: output,
236
+ });
227
237
  }
228
238
  }
229
239
 
@@ -480,10 +490,8 @@ function convertMessages(
480
490
  const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
481
491
  if (nonEmptyThinkingBlocks.length > 0) {
482
492
  if (compat.requiresThinkingAsText) {
483
- // Convert thinking blocks to text with <thinking> delimiters
484
- const thinkingText = nonEmptyThinkingBlocks
485
- .map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
486
- .join("\n");
493
+ // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
494
+ const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
487
495
  const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
488
496
  if (textContent) {
489
497
  textContent.unshift({ type: "text", text: thinkingText });
@@ -633,8 +641,7 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
633
641
  baseUrl.includes("cerebras.ai") ||
634
642
  baseUrl.includes("api.x.ai") ||
635
643
  baseUrl.includes("mistral.ai") ||
636
- baseUrl.includes("chutes.ai") ||
637
- baseUrl.includes("localhost");
644
+ baseUrl.includes("chutes.ai");
638
645
 
639
646
  const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
640
647
 
@@ -9,7 +9,7 @@ import type {
9
9
  ResponseInputText,
10
10
  ResponseOutputMessage,
11
11
  ResponseReasoningItem,
12
- } from "openai/resources/responses/responses.js";
12
+ } from "openai/resources/responses/responses";
13
13
  import { calculateCost } from "../models";
14
14
  import { getEnvApiKey } from "../stream";
15
15
  import type {
@@ -45,12 +45,13 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
45
45
  assistantMsg.api !== model.api;
46
46
 
47
47
  // Transform message from different provider/model
48
- const transformedContent = assistantMsg.content.map((block) => {
48
+ const transformedContent = assistantMsg.content.flatMap((block) => {
49
49
  if (block.type === "thinking") {
50
- // Convert thinking block to text block with <thinking> tags
50
+ // Skip empty thinking blocks, convert others to plain text
51
+ if (!block.thinking || block.thinking.trim() === "") return [];
51
52
  return {
52
53
  type: "text" as const,
53
- text: `<thinking>\n${block.thinking}\n</thinking>`,
54
+ text: block.thinking,
54
55
  };
55
56
  }
56
57
  // Normalize tool call IDs for github-copilot cross-API switches
package/src/stream.ts CHANGED
@@ -21,8 +21,9 @@ import type {
21
21
  KnownProvider,
22
22
  Model,
23
23
  OptionsForApi,
24
- ReasoningEffort,
25
24
  SimpleStreamOptions,
25
+ ThinkingBudgets,
26
+ ThinkingLevel,
26
27
  } from "./types";
27
28
 
28
29
  const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
@@ -64,7 +65,6 @@ export function getEnvApiKey(provider: any): string | undefined {
64
65
  if (hasCredentials && hasProject && hasLocation) {
65
66
  return "<authenticated>";
66
67
  }
67
- return undefined;
68
68
  }
69
69
 
70
70
  const envMap: Record<string, string> = {
@@ -76,6 +76,7 @@ export function getEnvApiKey(provider: any): string | undefined {
76
76
  openrouter: "OPENROUTER_API_KEY",
77
77
  zai: "ZAI_API_KEY",
78
78
  mistral: "MISTRAL_API_KEY",
79
+ opencode: "OPENCODE_API_KEY",
79
80
  };
80
81
 
81
82
  const envVar = envMap[provider];
@@ -178,10 +179,11 @@ function mapOptionsForApi<TApi extends Api>(
178
179
  maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
179
180
  signal: options?.signal,
180
181
  apiKey: apiKey || options?.apiKey,
182
+ sessionId: options?.sessionId,
181
183
  };
182
184
 
183
185
  // Helper to clamp xhigh to high for providers that don't support it
184
- const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
186
+ const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
185
187
 
186
188
  switch (model.api) {
187
189
  case "anthropic-messages": {
@@ -192,15 +194,17 @@ function mapOptionsForApi<TApi extends Api>(
192
194
 
193
195
  // Claude requires max_tokens > thinking.budget_tokens
194
196
  // So we need to ensure maxTokens accounts for both thinking and output
195
- const anthropicBudgets = {
197
+ const defaultBudgets: ThinkingBudgets = {
196
198
  minimal: 1024,
197
199
  low: 2048,
198
200
  medium: 8192,
199
201
  high: 16384,
200
202
  };
203
+ const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
201
204
 
202
205
  const minOutputTokens = 1024;
203
- let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
206
+ const level = clampReasoning(options.reasoning)!;
207
+ let thinkingBudget = budgets[level]!;
204
208
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
205
209
  const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
206
210
 
@@ -261,7 +265,7 @@ function mapOptionsForApi<TApi extends Api>(
261
265
  ...base,
262
266
  thinking: {
263
267
  enabled: true,
264
- budgetTokens: getGoogleBudget(googleModel, effort),
268
+ budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
265
269
  },
266
270
  } satisfies GoogleOptions;
267
271
  }
@@ -287,15 +291,16 @@ function mapOptionsForApi<TApi extends Api>(
287
291
  // Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
288
292
  // Claude requires max_tokens > thinking.budget_tokens
289
293
  // So we need to ensure maxTokens accounts for both thinking and output
290
- const budgets: Record<ClampedReasoningEffort, number> = {
294
+ const defaultBudgets: ThinkingBudgets = {
291
295
  minimal: 1024,
292
296
  low: 2048,
293
297
  medium: 8192,
294
298
  high: 16384,
295
299
  };
300
+ const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
296
301
 
297
302
  const minOutputTokens = 1024;
298
- let thinkingBudget = budgets[effort];
303
+ let thinkingBudget = budgets[effort]!;
299
304
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
300
305
  const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
301
306
 
@@ -316,22 +321,20 @@ function mapOptionsForApi<TApi extends Api>(
316
321
 
317
322
  case "google-vertex": {
318
323
  // Explicitly disable thinking when reasoning is not specified
319
- // This is needed because Gemini has "dynamic thinking" enabled by default
320
324
  if (!options?.reasoning) {
321
325
  return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
322
326
  }
323
327
 
324
- const googleModel = model as Model<"google-vertex">;
328
+ const vertexModel = model as Model<"google-vertex">;
325
329
  const effort = clampReasoning(options.reasoning)!;
330
+ const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
326
331
 
327
- // Gemini 3 models use thinkingLevel exclusively instead of thinkingBudget.
328
- // https://ai.google.dev/gemini-api/docs/thinking#set-budget
329
- if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
332
+ if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
330
333
  return {
331
334
  ...base,
332
335
  thinking: {
333
336
  enabled: true,
334
- level: getGemini3ThinkingLevel(effort, googleModel),
337
+ level: getGemini3ThinkingLevel(effort, geminiModel),
335
338
  },
336
339
  } satisfies GoogleVertexOptions;
337
340
  }
@@ -340,7 +343,7 @@ function mapOptionsForApi<TApi extends Api>(
340
343
  ...base,
341
344
  thinking: {
342
345
  enabled: true,
343
- budgetTokens: getGoogleBudget(googleModel, effort),
346
+ budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
344
347
  },
345
348
  } satisfies GoogleVertexOptions;
346
349
  }
@@ -353,21 +356,21 @@ function mapOptionsForApi<TApi extends Api>(
353
356
  }
354
357
  }
355
358
 
356
- type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
359
+ type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
357
360
 
358
- function isGemini3ProModel(model: Model<"google-generative-ai"> | Model<"google-vertex">): boolean {
361
+ function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
359
362
  // Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
360
363
  return model.id.includes("3-pro");
361
364
  }
362
365
 
363
- function isGemini3FlashModel(model: Model<"google-generative-ai"> | Model<"google-vertex">): boolean {
366
+ function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
364
367
  // Covers gemini-3-flash, gemini-3-flash-preview, and possible other prefixed ids in the future
365
368
  return model.id.includes("3-flash");
366
369
  }
367
370
 
368
371
  function getGemini3ThinkingLevel(
369
- effort: ClampedReasoningEffort,
370
- model: Model<"google-generative-ai"> | Model<"google-vertex">,
372
+ effort: ClampedThinkingLevel,
373
+ model: Model<"google-generative-ai">,
371
374
  ): GoogleThinkingLevel {
372
375
  if (isGemini3ProModel(model)) {
373
376
  // Gemini 3 Pro only supports LOW/HIGH (for now)
@@ -393,7 +396,7 @@ function getGemini3ThinkingLevel(
393
396
  }
394
397
  }
395
398
 
396
- function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): GoogleThinkingLevel {
399
+ function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
397
400
  if (modelId.includes("3-pro")) {
398
401
  // Gemini 3 Pro only supports LOW/HIGH (for now)
399
402
  switch (effort) {
@@ -419,12 +422,18 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
419
422
  }
420
423
 
421
424
  function getGoogleBudget(
422
- model: Model<"google-generative-ai"> | Model<"google-vertex">,
423
- effort: ClampedReasoningEffort,
425
+ model: Model<"google-generative-ai">,
426
+ effort: ClampedThinkingLevel,
427
+ customBudgets?: ThinkingBudgets,
424
428
  ): number {
429
+ // Custom budgets take precedence if provided for this level
430
+ if (customBudgets?.[effort] !== undefined) {
431
+ return customBudgets[effort]!;
432
+ }
433
+
425
434
  // See https://ai.google.dev/gemini-api/docs/thinking#set-budget
426
435
  if (model.id.includes("2.5-pro")) {
427
- const budgets: Record<ClampedReasoningEffort, number> = {
436
+ const budgets: Record<ClampedThinkingLevel, number> = {
428
437
  minimal: 128,
429
438
  low: 2048,
430
439
  medium: 8192,
@@ -435,7 +444,7 @@ function getGoogleBudget(
435
444
 
436
445
  if (model.id.includes("2.5-flash")) {
437
446
  // Covers 2.5-flash-lite as well
438
- const budgets: Record<ClampedReasoningEffort, number> = {
447
+ const budgets: Record<ClampedThinkingLevel, number> = {
439
448
  minimal: 128,
440
449
  low: 2048,
441
450
  medium: 8192,
package/src/types.ts CHANGED
@@ -54,10 +54,19 @@ export type KnownProvider =
54
54
  | "cerebras"
55
55
  | "openrouter"
56
56
  | "zai"
57
- | "mistral";
57
+ | "mistral"
58
+ | "opencode";
58
59
  export type Provider = KnownProvider | string;
59
60
 
60
- export type ReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
61
+ export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
62
+
63
+ /** Token budgets for each thinking level (token-based providers only) */
64
+ export interface ThinkingBudgets {
65
+ minimal?: number;
66
+ low?: number;
67
+ medium?: number;
68
+ high?: number;
69
+ }
61
70
 
62
71
  // Base options all providers share
63
72
  export interface StreamOptions {
@@ -65,11 +74,19 @@ export interface StreamOptions {
65
74
  maxTokens?: number;
66
75
  signal?: AbortSignal;
67
76
  apiKey?: string;
77
+ /**
78
+ * Optional session identifier for providers that support session-based caching.
79
+ * Providers can use this to enable prompt caching, request routing, or other
80
+ * session-aware features. Ignored by providers that don't support it.
81
+ */
82
+ sessionId?: string;
68
83
  }
69
84
 
70
85
  // Unified options with reasoning passed to streamSimple() and completeSimple()
71
86
  export interface SimpleStreamOptions extends StreamOptions {
72
- reasoning?: ReasoningEffort;
87
+ reasoning?: ThinkingLevel;
88
+ /** Custom token budgets for thinking levels (token-based providers only) */
89
+ thinkingBudgets?: ThinkingBudgets;
73
90
  }
74
91
 
75
92
  // Generic StreamFunction with typed options
@@ -146,7 +163,7 @@ export interface ToolResultMessage<TDetails = any> {
146
163
  toolName: string;
147
164
  content: (TextContent | ImageContent)[]; // Supports text and images
148
165
  details?: TDetails;
149
- isError?: boolean;
166
+ isError: boolean;
150
167
  timestamp: number; // Unix timestamp in milliseconds
151
168
  }
152
169
 
@@ -136,17 +136,45 @@ async function startDeviceFlow(domain: string): Promise<DeviceCodeResponse> {
136
136
  };
137
137
  }
138
138
 
139
+ /**
140
+ * Sleep that can be interrupted by an AbortSignal
141
+ */
142
+ function abortableSleep(ms: number, signal?: AbortSignal): Promise<void> {
143
+ return new Promise((resolve, reject) => {
144
+ if (signal?.aborted) {
145
+ reject(new Error("Login cancelled"));
146
+ return;
147
+ }
148
+
149
+ const timeout = setTimeout(resolve, ms);
150
+
151
+ signal?.addEventListener(
152
+ "abort",
153
+ () => {
154
+ clearTimeout(timeout);
155
+ reject(new Error("Login cancelled"));
156
+ },
157
+ { once: true },
158
+ );
159
+ });
160
+ }
161
+
139
162
  async function pollForGitHubAccessToken(
140
163
  domain: string,
141
164
  deviceCode: string,
142
165
  intervalSeconds: number,
143
166
  expiresIn: number,
167
+ signal?: AbortSignal,
144
168
  ) {
145
169
  const urls = getUrls(domain);
146
170
  const deadline = Date.now() + expiresIn * 1000;
147
171
  let intervalMs = Math.max(1000, Math.floor(intervalSeconds * 1000));
148
172
 
149
173
  while (Date.now() < deadline) {
174
+ if (signal?.aborted) {
175
+ throw new Error("Login cancelled");
176
+ }
177
+
150
178
  const raw = await fetchJson(urls.accessTokenUrl, {
151
179
  method: "POST",
152
180
  headers: {
@@ -168,20 +196,20 @@ async function pollForGitHubAccessToken(
168
196
  if (raw && typeof raw === "object" && typeof (raw as DeviceTokenErrorResponse).error === "string") {
169
197
  const err = (raw as DeviceTokenErrorResponse).error;
170
198
  if (err === "authorization_pending") {
171
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
199
+ await abortableSleep(intervalMs, signal);
172
200
  continue;
173
201
  }
174
202
 
175
203
  if (err === "slow_down") {
176
204
  intervalMs += 5000;
177
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
205
+ await abortableSleep(intervalMs, signal);
178
206
  continue;
179
207
  }
180
208
 
181
209
  throw new Error(`Device flow failed: ${err}`);
182
210
  }
183
211
 
184
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
212
+ await abortableSleep(intervalMs, signal);
185
213
  }
186
214
 
187
215
  throw new Error("Device flow timed out");
@@ -274,11 +302,13 @@ async function enableAllGitHubCopilotModels(
274
302
  * @param options.onAuth - Callback with URL and optional instructions (user code)
275
303
  * @param options.onPrompt - Callback to prompt user for input
276
304
  * @param options.onProgress - Optional progress callback
305
+ * @param options.signal - Optional AbortSignal for cancellation
277
306
  */
278
307
  export async function loginGitHubCopilot(options: {
279
308
  onAuth: (url: string, instructions?: string) => void;
280
309
  onPrompt: (prompt: { message: string; placeholder?: string; allowEmpty?: boolean }) => Promise<string>;
281
310
  onProgress?: (message: string) => void;
311
+ signal?: AbortSignal;
282
312
  }): Promise<OAuthCredentials> {
283
313
  const input = await options.onPrompt({
284
314
  message: "GitHub Enterprise URL/domain (blank for github.com)",
@@ -286,6 +316,10 @@ export async function loginGitHubCopilot(options: {
286
316
  allowEmpty: true,
287
317
  });
288
318
 
319
+ if (options.signal?.aborted) {
320
+ throw new Error("Login cancelled");
321
+ }
322
+
289
323
  const trimmed = input.trim();
290
324
  const enterpriseDomain = normalizeDomain(input);
291
325
  if (trimmed && !enterpriseDomain) {
@@ -301,6 +335,7 @@ export async function loginGitHubCopilot(options: {
301
335
  device.device_code,
302
336
  device.interval,
303
337
  device.expires_in,
338
+ options.signal,
304
339
  );
305
340
  const credentials = await refreshGitHubCopilotToken(githubAccessToken, enterpriseDomain ?? undefined);
306
341