opencodekit 0.18.15 → 0.18.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,25 +17,25 @@ const CLIENT_ID = "Ov23li8tweQw6odWQebz";
17
17
 
18
18
  // Logger function that will be set by the plugin
19
19
  let log: (
20
- level: "debug" | "info" | "warn" | "error",
21
- message: string,
22
- extra?: Record<string, any>,
20
+ level: "debug" | "info" | "warn" | "error",
21
+ message: string,
22
+ extra?: Record<string, any>,
23
23
  ) => void = () => {};
24
24
 
25
25
  /**
26
26
  * Set the logger function from the plugin context
27
27
  */
28
28
  function setLogger(client: any) {
29
- log = (level, message, extra) => {
30
- client.app
31
- .log({
32
- service: "copilot-auth",
33
- level,
34
- message,
35
- extra,
36
- })
37
- .catch(() => {}); // Fire and forget, don't block on logging
38
- };
29
+ log = (level, message, extra) => {
30
+ client.app
31
+ .log({
32
+ service: "copilot-auth",
33
+ level,
34
+ message,
35
+ extra,
36
+ })
37
+ .catch(() => {}); // Fire and forget, don't block on logging
38
+ };
39
39
  }
40
40
 
41
41
  // Add a small safety buffer when polling to avoid hitting the server
@@ -43,64 +43,101 @@ function setLogger(client: any) {
43
43
  const OAUTH_POLLING_SAFETY_MARGIN_MS = 3000; // 3 seconds
44
44
 
45
45
  const HEADERS = {
46
- "User-Agent": "GitHubCopilotChat/0.35.0",
47
- "Editor-Version": "vscode/1.107.0",
48
- "Editor-Plugin-Version": "copilot-chat/0.35.0",
49
- "Copilot-Integration-Id": "vscode-chat",
46
+ "User-Agent": "GitHubCopilotChat/0.35.0",
47
+ "Editor-Version": "vscode/1.107.0",
48
+ "Editor-Plugin-Version": "copilot-chat/0.35.0",
49
+ "Copilot-Integration-Id": "vscode-chat",
50
50
  };
51
51
 
52
52
  const RESPONSES_API_ALTERNATE_INPUT_TYPES = [
53
- "file_search_call",
54
- "computer_call",
55
- "computer_call_output",
56
- "web_search_call",
57
- "function_call",
58
- "function_call_output",
59
- "image_generation_call",
60
- "code_interpreter_call",
61
- "local_shell_call",
62
- "local_shell_call_output",
63
- "mcp_list_tools",
64
- "mcp_approval_request",
65
- "mcp_approval_response",
66
- "mcp_call",
67
- "reasoning",
53
+ "file_search_call",
54
+ "computer_call",
55
+ "computer_call_output",
56
+ "web_search_call",
57
+ "function_call",
58
+ "function_call_output",
59
+ "image_generation_call",
60
+ "code_interpreter_call",
61
+ "local_shell_call",
62
+ "local_shell_call_output",
63
+ "mcp_list_tools",
64
+ "mcp_approval_request",
65
+ "mcp_approval_response",
66
+ "mcp_call",
67
+ "reasoning",
68
68
  ];
69
69
 
70
70
  function normalizeDomain(url: string): string {
71
- return url.replace(/^https?:\/\//, "").replace(/\/$/, "");
71
+ return url.replace(/^https?:\/\//, "").replace(/\/$/, "");
72
72
  }
73
73
 
74
74
  function getUrls(domain: string) {
75
- return {
76
- DEVICE_CODE_URL: `https://${domain}/login/device/code`,
77
- ACCESS_TOKEN_URL: `https://${domain}/login/oauth/access_token`,
78
- };
75
+ return {
76
+ DEVICE_CODE_URL: `https://${domain}/login/device/code`,
77
+ ACCESS_TOKEN_URL: `https://${domain}/login/oauth/access_token`,
78
+ };
79
79
  }
80
80
 
81
81
  const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
82
82
 
83
83
  // Rate limit handling configuration
84
84
  const RATE_LIMIT_CONFIG = {
85
- maxRetries: 3,
86
- baseDelayMs: 2000, // Start with 2 seconds
87
- maxDelayMs: 60000, // Cap at 60 seconds
88
- defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
89
- maxFallbacks: 4, // Max model fallback switches per request
85
+ maxRetries: 3,
86
+ baseDelayMs: 2000, // Start with 2 seconds
87
+ maxDelayMs: 60000, // Cap at 60 seconds
88
+ defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
89
+ maxFallbacks: 4, // Max model fallback switches per request
90
+ };
91
+
92
+ // Local request shaping to smooth bursts before they hit Copilot limits
93
+ const REQUEST_SHAPING_CONFIG = {
94
+ tokensPerSecond: 1,
95
+ burstCapacity: 2,
96
+ maxQueueDelayMs: 15000,
97
+ };
98
+
99
+ const CIRCUIT_BREAKER_CONFIG = {
100
+ maxInlineWaitMs: 30000,
101
+ maxRecoveryCycles: 3,
90
102
  };
91
103
 
92
104
  // Per-model rate limit state (in-memory, resets on restart)
93
105
  interface RateLimitEntry {
94
- rateLimitedUntil: number; // Unix timestamp (ms)
106
+ rateLimitedUntil: number; // Unix timestamp (ms)
95
107
  }
96
108
  const rateLimitState = new Map<string, RateLimitEntry>();
109
+ const familyCircuitBreakerState = new Map<string, number>();
110
+
111
+ interface TokenBucketState {
112
+ tokens: number;
113
+ lastRefillAt: number;
114
+ }
115
+ const modelTokenBuckets = new Map<string, TokenBucketState>();
116
+ const modelQueueTail = new Map<string, Promise<void>>();
97
117
 
98
118
  // Model fallback chains: same-family alternatives when a model is rate-limited
99
119
  const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
100
- // Claude family
101
- "claude-opus-4.6": ["claude-opus-4.5", "claude-sonnet-4.6", "gpt-5.3-codex"],
102
- "claude-opus-4.5": ["claude-sonnet-4.6", "gpt-5.3-codex"],
103
- "claude-sonnet-4.6": ["gpt-5.3-codex"],
120
+ // Claude family
121
+ "claude-opus-4.6": [
122
+ "claude-opus-4.5",
123
+ "claude-sonnet-4.6",
124
+ "claude-sonnet-4.5",
125
+ ],
126
+ "claude-opus-4.5": [
127
+ "claude-opus-4.6",
128
+ "claude-sonnet-4.5",
129
+ "claude-sonnet-4.6",
130
+ ],
131
+ "claude-sonnet-4.6": [
132
+ "claude-sonnet-4.5",
133
+ "claude-opus-4.6",
134
+ "claude-opus-4.5",
135
+ ],
136
+ "claude-sonnet-4.5": [
137
+ "claude-sonnet-4.6",
138
+ "claude-opus-4.5",
139
+ "claude-opus-4.6",
140
+ ],
104
141
  };
105
142
 
106
143
  /**
@@ -108,65 +145,208 @@ const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
108
145
  * Returns cooldown in milliseconds, or null if header is missing/unparseable.
109
146
  */
110
147
  function parseRetryAfter(response: Response): number | null {
111
- const header = response.headers.get("retry-after");
112
- if (!header) return null;
113
- // Try as seconds first (most common)
114
- const seconds = parseInt(header, 10);
115
- if (!isNaN(seconds) && seconds > 0) return seconds * 1000;
116
- // Try as HTTP date
117
- const date = Date.parse(header);
118
- if (!isNaN(date)) return Math.max(0, date - Date.now());
119
- return null;
148
+ const header = response.headers.get("retry-after");
149
+ if (!header) return null;
150
+ // Try as seconds first (most common)
151
+ const seconds = parseInt(header, 10);
152
+ if (!isNaN(seconds) && seconds > 0) return seconds * 1000;
153
+ // Try as HTTP date
154
+ const date = Date.parse(header);
155
+ if (!isNaN(date)) return Math.max(0, date - Date.now());
156
+ return null;
120
157
  }
121
158
 
122
159
  function isModelRateLimited(model: string): boolean {
123
- const entry = rateLimitState.get(model);
124
- if (!entry) return false;
125
- if (Date.now() >= entry.rateLimitedUntil) {
126
- rateLimitState.delete(model);
127
- return false;
128
- }
129
- return true;
160
+ const entry = rateLimitState.get(model);
161
+ if (!entry) return false;
162
+ if (Date.now() >= entry.rateLimitedUntil) {
163
+ rateLimitState.delete(model);
164
+ return false;
165
+ }
166
+ return true;
167
+ }
168
+
169
+ function getRateLimitRemainingMs(model: string): number | null {
170
+ const entry = rateLimitState.get(model);
171
+ if (!entry) return null;
172
+ const remaining = entry.rateLimitedUntil - Date.now();
173
+ if (remaining <= 0) {
174
+ rateLimitState.delete(model);
175
+ return null;
176
+ }
177
+ return remaining;
178
+ }
179
+
180
+ function getModelFamily(model: string): string[] {
181
+ const family = new Set<string>([
182
+ model,
183
+ ...(MODEL_FALLBACK_CHAINS[model] || []),
184
+ ]);
185
+ return [...family];
186
+ }
187
+
188
+ function getFamilyCircuitKey(model: string): string {
189
+ return getModelFamily(model).sort().join("|");
190
+ }
191
+
192
+ function getFamilyCircuitRemainingMs(model: string): number {
193
+ const key = getFamilyCircuitKey(model);
194
+ const until = familyCircuitBreakerState.get(key);
195
+ if (!until) return 0;
196
+ const remaining = until - Date.now();
197
+ if (remaining <= 0) {
198
+ familyCircuitBreakerState.delete(key);
199
+ return 0;
200
+ }
201
+ return remaining;
202
+ }
203
+
204
+ function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
205
+ const key = getFamilyCircuitKey(model);
206
+ familyCircuitBreakerState.set(
207
+ key,
208
+ Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
209
+ );
210
+ }
211
+
212
+ function getFamilyMaxCooldownRemainingMs(model: string): number {
213
+ let maxRemaining = 0;
214
+ for (const candidate of getModelFamily(model)) {
215
+ const remaining = getRateLimitRemainingMs(candidate) ?? 0;
216
+ if (remaining > maxRemaining) maxRemaining = remaining;
217
+ }
218
+ return maxRemaining;
219
+ }
220
+
221
+ function isEntireModelFamilyCoolingDown(model: string): boolean {
222
+ const family = getModelFamily(model);
223
+ return (
224
+ family.length > 0 &&
225
+ family.every((candidate) => isModelRateLimited(candidate))
226
+ );
227
+ }
228
+
229
+ function formatRetryAfter(seconds: number): string {
230
+ if (seconds < 60) return `${seconds}s`;
231
+ const mins = Math.floor(seconds / 60);
232
+ const secs = seconds % 60;
233
+ return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
234
+ }
235
+
236
+ async function shapeRequestForModel(model: string): Promise<void> {
237
+ if (!model) return;
238
+
239
+ const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
240
+ let releaseQueue: (() => void) | undefined;
241
+ const currentGate = new Promise<void>((resolve) => {
242
+ releaseQueue = resolve;
243
+ });
244
+ const currentTail = previousTail.then(() => currentGate);
245
+ modelQueueTail.set(model, currentTail);
246
+
247
+ let queueTimeout: ReturnType<typeof setTimeout> | undefined;
248
+ try {
249
+ await Promise.race([
250
+ previousTail,
251
+ new Promise<void>((_, reject) => {
252
+ queueTimeout = setTimeout(() => {
253
+ reject(
254
+ new Error(
255
+ `[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
256
+ ),
257
+ );
258
+ }, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
259
+ }),
260
+ ]);
261
+
262
+ const now = Date.now();
263
+ const bucket = modelTokenBuckets.get(model) ?? {
264
+ tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
265
+ lastRefillAt: now,
266
+ };
267
+
268
+ const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
269
+ const refillTokens =
270
+ (elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
271
+ bucket.tokens = Math.min(
272
+ REQUEST_SHAPING_CONFIG.burstCapacity,
273
+ bucket.tokens + refillTokens,
274
+ );
275
+ bucket.lastRefillAt = now;
276
+
277
+ if (bucket.tokens < 1) {
278
+ const deficit = 1 - bucket.tokens;
279
+ const waitMs = Math.ceil(
280
+ (deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
281
+ );
282
+ if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
283
+ throw new Error(
284
+ `[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
285
+ );
286
+ }
287
+ log("info", `Local request shaping wait for ${model}`, {
288
+ wait_ms: waitMs,
289
+ });
290
+ await sleep(waitMs);
291
+ bucket.tokens = 0;
292
+ bucket.lastRefillAt = Date.now();
293
+ } else {
294
+ bucket.tokens -= 1;
295
+ }
296
+
297
+ modelTokenBuckets.set(model, bucket);
298
+ } finally {
299
+ if (queueTimeout) clearTimeout(queueTimeout);
300
+ releaseQueue?.();
301
+ if (modelQueueTail.get(model) === currentTail) {
302
+ modelQueueTail.delete(model);
303
+ }
304
+ }
130
305
  }
131
306
 
132
307
  function markModelRateLimited(model: string, cooldownMs: number): void {
133
- rateLimitState.set(model, {
134
- rateLimitedUntil: Date.now() + cooldownMs,
135
- });
136
- log(
137
- "info",
138
- `Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
139
- );
308
+ rateLimitState.set(model, {
309
+ rateLimitedUntil: Date.now() + cooldownMs,
310
+ });
311
+ log(
312
+ "info",
313
+ `Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
314
+ );
140
315
  }
141
316
 
142
317
  /**
143
318
  * Find the next available fallback model in the same family.
144
319
  * Skips models that are themselves rate-limited.
145
320
  */
146
- function getNextFallbackModel(model: string): string | null {
147
- const chain = MODEL_FALLBACK_CHAINS[model];
148
- if (!chain) return null;
149
- for (const fallback of chain) {
150
- if (!isModelRateLimited(fallback)) return fallback;
151
- }
152
- return null;
321
+ function getNextFallbackModel(
322
+ model: string,
323
+ attemptedModels: Set<string>,
324
+ ): string | null {
325
+ const chain = MODEL_FALLBACK_CHAINS[model];
326
+ if (!chain) return null;
327
+ for (const fallback of chain) {
328
+ if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
329
+ return fallback;
330
+ }
331
+ }
332
+ return null;
153
333
  }
154
334
 
155
335
  /**
156
336
  * Swap the model field in a fetch RequestInit body.
157
337
  */
158
338
  function swapModelInBody(
159
- init: RequestInit | undefined,
160
- newModel: string,
339
+ init: RequestInit | undefined,
340
+ newModel: string,
161
341
  ): RequestInit | undefined {
162
- if (!init?.body || typeof init.body !== "string") return init;
163
- try {
164
- const body = JSON.parse(init.body);
165
- body.model = newModel;
166
- return { ...init, body: JSON.stringify(body) };
167
- } catch {
168
- return init;
169
- }
342
+ if (!init?.body || typeof init.body !== "string") return init;
343
+ try {
344
+ const body = JSON.parse(init.body);
345
+ body.model = newModel;
346
+ return { ...init, body: JSON.stringify(body) };
347
+ } catch {
348
+ return init;
349
+ }
170
350
  }
171
351
 
172
352
  // Maximum length for item IDs in the OpenAI Responses API
@@ -178,17 +358,17 @@ const MAX_RESPONSE_API_ID_LENGTH = 64;
178
358
  * See: https://github.com/vercel/ai/issues/5171
179
359
  */
180
360
  function sanitizeResponseId(id: string): string {
181
- if (!id || id.length <= MAX_RESPONSE_API_ID_LENGTH) return id;
182
- // Use a simple hash: take first 8 chars + hash of full string for uniqueness
183
- // Format: "h_" + first 8 chars + "_" + base36 hash (up to ~50 chars total)
184
- let hash = 0;
185
- for (let i = 0; i < id.length; i++) {
186
- hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
187
- }
188
- const hashStr = Math.abs(hash).toString(36);
189
- const prefix = id.slice(0, 8);
190
- // Ensure total length <= 64: "h_" (2) + prefix (8) + "_" (1) + hash
191
- return `h_${prefix}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
361
+ if (!id || id.length <= MAX_RESPONSE_API_ID_LENGTH) return id;
362
+ // Use a simple hash: take first 8 chars + hash of full string for uniqueness
363
+ // Format: "h_" + first 8 chars + "_" + base36 hash (up to ~50 chars total)
364
+ let hash = 0;
365
+ for (let i = 0; i < id.length; i++) {
366
+ hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
367
+ }
368
+ const hashStr = Math.abs(hash).toString(36);
369
+ const prefix = id.slice(0, 8);
370
+ // Ensure total length <= 64: "h_" (2) + prefix (8) + "_" (1) + hash
371
+ return `h_${prefix}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
192
372
  }
193
373
 
194
374
  /**
@@ -196,632 +376,764 @@ function sanitizeResponseId(id: string): string {
196
376
  * Recursively checks `id` and `call_id` fields on each input item.
197
377
  */
198
378
  function sanitizeResponseInputIds(input: any[]): any[] {
199
- return input.map((item: any) => {
200
- if (!item || typeof item !== "object") return item;
201
- const sanitized = { ...item };
202
- if (
203
- typeof sanitized.id === "string" &&
204
- sanitized.id.length > MAX_RESPONSE_API_ID_LENGTH
205
- ) {
206
- sanitized.id = sanitizeResponseId(sanitized.id);
207
- }
208
- if (
209
- typeof sanitized.call_id === "string" &&
210
- sanitized.call_id.length > MAX_RESPONSE_API_ID_LENGTH
211
- ) {
212
- sanitized.call_id = sanitizeResponseId(sanitized.call_id);
213
- }
214
- return sanitized;
215
- });
379
+ return input.map((item: any) => {
380
+ if (!item || typeof item !== "object") return item;
381
+ const sanitized = { ...item };
382
+ if (
383
+ typeof sanitized.id === "string" &&
384
+ sanitized.id.length > MAX_RESPONSE_API_ID_LENGTH
385
+ ) {
386
+ sanitized.id = sanitizeResponseId(sanitized.id);
387
+ }
388
+ if (
389
+ typeof sanitized.call_id === "string" &&
390
+ sanitized.call_id.length > MAX_RESPONSE_API_ID_LENGTH
391
+ ) {
392
+ sanitized.call_id = sanitizeResponseId(sanitized.call_id);
393
+ }
394
+ return sanitized;
395
+ });
216
396
  }
217
397
 
218
398
  /**
219
399
  * Retries: 2s, 4s, 8s (with jitter)
220
400
  */
221
401
  function calculateRetryDelay(attempt: number): number {
222
- const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * Math.pow(2, attempt);
223
- const jitter = Math.random() * 1000; // Add 0-1s random jitter
224
- const delay = Math.min(
225
- exponentialDelay + jitter,
226
- RATE_LIMIT_CONFIG.maxDelayMs,
227
- );
228
- return Math.round(delay);
402
+ const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * 2 ** attempt;
403
+ const jitter = Math.random() * 1000; // Add 0-1s random jitter
404
+ const delay = Math.min(
405
+ exponentialDelay + jitter,
406
+ RATE_LIMIT_CONFIG.maxDelayMs,
407
+ );
408
+ return Math.round(delay);
229
409
  }
230
410
 
231
411
  export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
232
- // Initialize logger with the SDK client
233
- setLogger(sdk);
234
-
235
- return {
236
- auth: {
237
- provider: "github-copilot",
238
- loader: async (getAuth, provider) => {
239
- const info = await getAuth();
240
- if (!info || info.type !== "oauth") return {};
241
-
242
- // Enterprise URL support for baseURL
243
- const enterpriseUrl = (info as any).enterpriseUrl;
244
- const baseURL = enterpriseUrl
245
- ? `https://copilot-api.${normalizeDomain(enterpriseUrl)}`
246
- : undefined;
247
-
248
- if (provider && provider.models) {
249
- for (const [_modelId, model] of Object.entries(provider.models)) {
250
- model.cost = {
251
- input: 0,
252
- output: 0,
253
- cache: {
254
- read: 0,
255
- write: 0,
256
- },
257
- };
258
-
259
- // All models use the standard github-copilot SDK
260
- // Reasoning support for Claude models is handled via:
261
- // 1. The fetch wrapper adds thinking_budget to request body
262
- // 2. The fetch wrapper strips invalid thinking blocks from messages
263
- model.api.npm = "@ai-sdk/github-copilot";
264
- }
265
- }
266
-
267
- return {
268
- baseURL,
269
- apiKey: "",
270
- async fetch(input, init) {
271
- const info = await getAuth();
272
- if (info.type !== "oauth") return fetch(input, init);
273
-
274
- let isAgentCall = false;
275
- let isVisionRequest = false;
276
- let modifiedBody: any = undefined;
277
- let isClaudeModel = false;
278
-
279
- try {
280
- const body =
281
- typeof init?.body === "string"
282
- ? JSON.parse(init.body)
283
- : init?.body;
284
-
285
- const url = input.toString();
286
-
287
- // Check if this is a Claude model request
288
- const modelId = body?.model || "";
289
- isClaudeModel = modelId.toLowerCase().includes("claude");
290
-
291
- // Completions API
292
- if (body?.messages && url.includes("completions")) {
293
- // Keep local logic: detect if any message is assistant/tool
294
- isAgentCall = body.messages.some((msg: any) =>
295
- ["tool", "assistant"].includes(msg.role),
296
- );
297
- isVisionRequest = body.messages.some(
298
- (msg: any) =>
299
- Array.isArray(msg.content) &&
300
- msg.content.some((part: any) => part.type === "image_url"),
301
- );
302
-
303
- // For Claude models, add thinking_budget to enable reasoning
304
- // The Copilot API accepts this parameter and returns reasoning_text/reasoning_opaque
305
- if (isClaudeModel) {
306
- // Use configured thinking_budget from model options, or default to 10000
307
- const thinkingBudget = body.thinking_budget || 10000;
308
-
309
- // Fix for "Invalid signature in thinking block" error:
310
- // The Copilot API uses reasoning_text/reasoning_opaque format for thinking
311
- // When these are passed back without proper signature, it causes errors
312
- // Solution: Ensure reasoning_opaque is present when reasoning_text exists,
313
- // or remove reasoning content entirely if signature is invalid/missing
314
- const cleanedMessages = body.messages.map(
315
- (msg: any, idx: number) => {
316
- if (msg.role !== "assistant") return msg;
317
-
318
- // Log message structure for debugging
319
- log("debug", `Processing assistant message ${idx}`, {
320
- has_reasoning_text: !!msg.reasoning_text,
321
- has_reasoning_opaque: !!msg.reasoning_opaque,
322
- content_type: typeof msg.content,
323
- content_is_array: Array.isArray(msg.content),
324
- });
325
-
326
- // If message has reasoning_text but no/invalid reasoning_opaque, remove reasoning
327
- if (msg.reasoning_text && !msg.reasoning_opaque) {
328
- log(
329
- "warn",
330
- `Removing reasoning_text without reasoning_opaque from message ${idx}`,
331
- );
332
- const { reasoning_text: _unused, ...cleanedMsg } = msg;
333
- return cleanedMsg;
334
- }
335
-
336
- // If content is an array, check for thinking blocks
337
- if (Array.isArray(msg.content)) {
338
- const hasThinkingBlock = msg.content.some(
339
- (part: any) => part.type === "thinking",
340
- );
341
- if (hasThinkingBlock) {
342
- log(
343
- "debug",
344
- `Message ${idx} has thinking blocks in content array`,
345
- );
346
- // Filter out thinking blocks without signatures
347
- const cleanedContent = msg.content.filter(
348
- (part: any) => {
349
- if (part.type === "thinking") {
350
- if (!part.signature) {
351
- log(
352
- "warn",
353
- `Removing thinking block without signature`,
354
- );
355
- return false;
356
- }
357
- }
358
- return true;
359
- },
360
- );
361
- return {
362
- ...msg,
363
- content:
364
- cleanedContent.length > 0 ? cleanedContent : null,
365
- };
366
- }
367
- }
368
-
369
- return msg;
370
- },
371
- );
372
-
373
- modifiedBody = {
374
- ...body,
375
- messages: cleanedMessages,
376
- thinking_budget: thinkingBudget,
377
- };
378
- log("info", `Adding thinking_budget for Claude model`, {
379
- model: modelId,
380
- thinking_budget: thinkingBudget,
381
- });
382
- }
383
-
384
- // For GPT models (o1, gpt-5, etc.), add reasoning parameter
385
- const isGptModel =
386
- modelId.toLowerCase().includes("gpt") ||
387
- modelId.toLowerCase().includes("o1") ||
388
- modelId.toLowerCase().includes("o3") ||
389
- modelId.toLowerCase().includes("o4");
390
-
391
- if (isGptModel && !isClaudeModel) {
392
- // Get reasoning effort from body options or default to "medium"
393
- const reasoningEffort =
394
- body.reasoning?.effort ||
395
- body.reasoningEffort ||
396
- body.reasoning_effort ||
397
- "medium";
398
-
399
- modifiedBody = {
400
- ...(modifiedBody || body),
401
- reasoning: {
402
- effort: reasoningEffort,
403
- },
404
- };
405
-
406
- // Also pass through other reasoning options if present
407
- if (body.reasoningSummary || body.reasoning?.summary) {
408
- modifiedBody.reasoning.summary =
409
- body.reasoningSummary || body.reasoning?.summary;
410
- }
411
-
412
- log("info", `Adding reasoning for GPT model`, {
413
- model: modelId,
414
- reasoning_effort: reasoningEffort,
415
- });
416
- }
417
- }
418
-
419
- // Responses API
420
- if (body?.input) {
421
- // Sanitize long IDs from Copilot backend (can be 400+ chars)
422
- // OpenAI Responses API enforces a 64-char max on item IDs
423
- const sanitizedInput = sanitizeResponseInputIds(body.input);
424
- const inputWasSanitized =
425
- sanitizedInput !== body.input &&
426
- JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
427
-
428
- if (inputWasSanitized) {
429
- log("info", "Sanitized long IDs in Responses API input", {
430
- original_count: body.input.filter(
431
- (item: any) =>
432
- (typeof item?.id === "string" &&
433
- item.id.length > MAX_RESPONSE_API_ID_LENGTH) ||
434
- (typeof item?.call_id === "string" &&
435
- item.call_id.length > MAX_RESPONSE_API_ID_LENGTH),
436
- ).length,
437
- });
438
- modifiedBody = {
439
- ...(modifiedBody || body),
440
- input: sanitizedInput,
441
- };
442
- }
443
-
444
- isAgentCall = (sanitizedInput || body.input).some(
445
- (item: any) =>
446
- item?.role === "assistant" ||
447
- (item?.type &&
448
- RESPONSES_API_ALTERNATE_INPUT_TYPES.includes(item.type)),
449
- );
450
-
451
- isVisionRequest = body.input.some(
452
- (item: any) =>
453
- Array.isArray(item?.content) &&
454
- item.content.some(
455
- (part: any) => part.type === "input_image",
456
- ),
457
- );
458
- }
459
-
460
- // Messages API (Anthropic style)
461
- if (body?.messages && !url.includes("completions")) {
462
- isAgentCall = body.messages.some((msg: any) =>
463
- ["tool", "assistant"].includes(msg.role),
464
- );
465
- isVisionRequest = body.messages.some(
466
- (item: any) =>
467
- Array.isArray(item?.content) &&
468
- item.content.some(
469
- (part: any) =>
470
- part?.type === "image" ||
471
- (part?.type === "tool_result" &&
472
- Array.isArray(part?.content) &&
473
- part.content.some(
474
- (nested: any) => nested?.type === "image",
475
- )),
476
- ),
477
- );
478
- }
479
- } catch {}
480
-
481
- const headers: Record<string, string> = {
482
- "x-initiator": isAgentCall ? "agent" : "user",
483
- ...(init?.headers as Record<string, string>),
484
- ...HEADERS,
485
- Authorization: `Bearer ${info.refresh}`,
486
- "Openai-Intent": "conversation-edits",
487
- };
488
-
489
- if (isVisionRequest) {
490
- headers["Copilot-Vision-Request"] = "true";
491
- }
492
-
493
- // Official only deletes lowercase "authorization"
494
- delete headers["x-api-key"];
495
- delete headers["authorization"];
496
-
497
- // Prepare the final init object with potentially modified body
498
- const finalInit = {
499
- ...init,
500
- headers,
501
- ...(modifiedBody ? { body: JSON.stringify(modifiedBody) } : {}),
502
- };
503
-
504
- // Extract model from request body for rate limit tracking
505
- let currentModel = "";
506
- try {
507
- const bodyObj =
508
- typeof finalInit.body === "string"
509
- ? JSON.parse(finalInit.body)
510
- : finalInit.body;
511
- currentModel = bodyObj?.model || "";
512
- } catch {}
513
-
514
- // Pre-flight: if current model is already known rate-limited, switch to fallback
515
- let activeFinalInit: RequestInit = finalInit;
516
- if (currentModel && isModelRateLimited(currentModel)) {
517
- const fallback = getNextFallbackModel(currentModel);
518
- if (fallback) {
519
- log(
520
- "info",
521
- `Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
522
- );
523
- activeFinalInit =
524
- swapModelInBody(finalInit, fallback) || finalInit;
525
- currentModel = fallback;
526
- }
527
- }
528
-
529
- // Retry logic with model fallback and exponential backoff for rate limiting
530
- let lastError: Error | undefined;
531
- let fallbacksUsed = 0;
532
- let attempt = 0;
533
-
534
- while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
535
- try {
536
- const response = await fetch(input, activeFinalInit);
537
-
538
- if (response.status === 429) {
539
- // Parse Retry-After header for server-suggested cooldown
540
- const retryAfterMs = parseRetryAfter(response);
541
- const cooldownMs =
542
- retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
543
-
544
- // Mark this model as rate-limited
545
- if (currentModel) {
546
- markModelRateLimited(currentModel, cooldownMs);
547
- }
548
-
549
- // Try fallback model (doesn't count against retry budget)
550
- if (
551
- currentModel &&
552
- fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
553
- ) {
554
- const fallback = getNextFallbackModel(currentModel);
555
- if (fallback) {
556
- log(
557
- "warn",
558
- `Rate limited on ${currentModel}, switching to ${fallback}`,
559
- {
560
- retry_after_ms: retryAfterMs,
561
- cooldown_ms: cooldownMs,
562
- fallbacks_used: fallbacksUsed + 1,
563
- },
564
- );
565
- activeFinalInit =
566
- swapModelInBody(activeFinalInit, fallback) ||
567
- activeFinalInit;
568
- currentModel = fallback;
569
- fallbacksUsed++;
570
- continue; // Retry immediately with new model, no delay
571
- }
572
- }
573
-
574
- // No fallback available use exponential backoff on same model
575
- if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
576
- const delay =
577
- retryAfterMs != null
578
- ? Math.min(retryAfterMs, RATE_LIMIT_CONFIG.maxDelayMs)
579
- : calculateRetryDelay(attempt);
580
- log(
581
- "warn",
582
- `Rate limited (429), no fallback available, waiting ${delay}ms`,
583
- {
584
- delay_ms: delay,
585
- attempt: attempt + 1,
586
- max_retries: RATE_LIMIT_CONFIG.maxRetries,
587
- fallbacks_exhausted: true,
588
- },
589
- );
590
- await sleep(delay);
591
- attempt++;
592
- continue;
593
- }
594
-
595
- // Exhausted retries and fallbacks
596
- throw new Error(
597
- `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
598
- );
599
- }
600
-
601
- // Response transformation is handled by the custom SDK at
602
- // .opencode/plugin/sdk/copilot/
603
- return response;
604
- } catch (error) {
605
- lastError = error as Error;
606
-
607
- // Network errors might be transient, retry
608
- if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
609
- const delay = calculateRetryDelay(attempt);
610
- log("warn", `Request failed, retrying`, {
611
- delay_ms: delay,
612
- attempt: attempt + 1,
613
- max_retries: RATE_LIMIT_CONFIG.maxRetries,
614
- error: lastError.message,
615
- });
616
- await sleep(delay);
617
- attempt++;
618
- continue;
619
- }
620
- throw error;
621
- }
622
- }
623
-
624
- // Exhausted all retries
625
- if (lastError) {
626
- throw new Error(
627
- `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
628
- );
629
- }
630
- throw new Error(
631
- `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
632
- );
633
- },
634
- };
635
- },
636
- methods: [
637
- {
638
- type: "oauth",
639
- label: "Login with GitHub Copilot",
640
- prompts: [
641
- {
642
- type: "select",
643
- key: "deploymentType",
644
- message: "Select GitHub deployment type",
645
- options: [
646
- {
647
- label: "GitHub.com",
648
- value: "github.com",
649
- hint: "Public",
650
- },
651
- {
652
- label: "GitHub Enterprise",
653
- value: "enterprise",
654
- hint: "Data residency or self-hosted",
655
- },
656
- ],
657
- },
658
- {
659
- type: "text",
660
- key: "enterpriseUrl",
661
- message: "Enter your GitHub Enterprise URL or domain",
662
- placeholder: "company.ghe.com or https://company.ghe.com",
663
- condition: (inputs: any) =>
664
- inputs.deploymentType === "enterprise",
665
- validate: (value: string) => {
666
- if (!value) return "URL or domain is required";
667
- try {
668
- const url = value.includes("://")
669
- ? new URL(value)
670
- : new URL(`https://${value}`);
671
- if (!url.hostname)
672
- return "Please enter a valid URL or domain";
673
- return undefined;
674
- } catch {
675
- return "Please enter a valid URL (e.g., company.ghe.com or https://company.ghe.com)";
676
- }
677
- },
678
- },
679
- ],
680
- async authorize(inputs: any = {}) {
681
- const deploymentType = inputs.deploymentType || "github.com";
682
-
683
- let domain = "github.com";
684
- let actualProvider = "github-copilot";
685
-
686
- if (deploymentType === "enterprise") {
687
- const enterpriseUrl = inputs.enterpriseUrl;
688
- domain = normalizeDomain(enterpriseUrl);
689
- actualProvider = "github-copilot-enterprise";
690
- }
691
-
692
- const urls = getUrls(domain);
693
-
694
- const deviceResponse = await fetch(urls.DEVICE_CODE_URL, {
695
- method: "POST",
696
- headers: {
697
- Accept: "application/json",
698
- "Content-Type": "application/json",
699
- "User-Agent": "GitHubCopilotChat/0.35.0",
700
- },
701
- body: JSON.stringify({
702
- client_id: CLIENT_ID,
703
- scope: "read:user",
704
- }),
705
- });
706
-
707
- if (!deviceResponse.ok) {
708
- throw new Error("Failed to initiate device authorization");
709
- }
710
-
711
- const deviceData = await deviceResponse.json();
712
-
713
- return {
714
- url: deviceData.verification_uri,
715
- instructions: `Enter code: ${deviceData.user_code}`,
716
- method: "auto",
717
- callback: async () => {
718
- while (true) {
719
- const response = await fetch(urls.ACCESS_TOKEN_URL, {
720
- method: "POST",
721
- headers: {
722
- Accept: "application/json",
723
- "Content-Type": "application/json",
724
- "User-Agent": "GitHubCopilotChat/0.35.0",
725
- },
726
- body: JSON.stringify({
727
- client_id: CLIENT_ID,
728
- device_code: deviceData.device_code,
729
- grant_type:
730
- "urn:ietf:params:oauth:grant-type:device_code",
731
- }),
732
- });
733
-
734
- if (!response.ok) return { type: "failed" };
735
-
736
- const data = await response.json();
737
-
738
- if (data.access_token) {
739
- const result: {
740
- type: "success";
741
- refresh: string;
742
- access: string;
743
- expires: number;
744
- provider?: string;
745
- enterpriseUrl?: string;
746
- } = {
747
- type: "success",
748
- refresh: data.access_token,
749
- access: data.access_token,
750
- expires: 0,
751
- };
752
-
753
- if (actualProvider === "github-copilot-enterprise") {
754
- result.provider = "github-copilot-enterprise";
755
- result.enterpriseUrl = domain;
756
- }
757
-
758
- return result;
759
- }
760
-
761
- if (data.error === "authorization_pending") {
762
- await sleep(
763
- deviceData.interval * 1000 +
764
- OAUTH_POLLING_SAFETY_MARGIN_MS,
765
- );
766
- continue;
767
- }
768
-
769
- if (data.error === "slow_down") {
770
- // Based on the RFC spec, we must add 5 seconds to our current polling interval.
771
- let newInterval = (deviceData.interval + 5) * 1000;
772
-
773
- if (
774
- data.interval &&
775
- typeof data.interval === "number" &&
776
- data.interval > 0
777
- ) {
778
- newInterval = data.interval * 1000;
779
- }
780
-
781
- await sleep(newInterval + OAUTH_POLLING_SAFETY_MARGIN_MS);
782
- continue;
783
- }
784
-
785
- if (data.error) return { type: "failed" };
786
-
787
- await sleep(
788
- deviceData.interval * 1000 + OAUTH_POLLING_SAFETY_MARGIN_MS,
789
- );
790
- continue;
791
- }
792
- },
793
- };
794
- },
795
- },
796
- ],
797
- },
798
- // Hook to add custom headers for Claude reasoning support
799
- "chat.headers": async (input: any, output: any) => {
800
- // Only apply to GitHub Copilot provider
801
- if (!input.model?.providerID?.includes("github-copilot")) return;
802
-
803
- // Add Anthropic beta header for interleaved thinking (extended reasoning)
804
- // This is required for Claude models to return thinking blocks
805
- if (input.model?.api?.npm === "@ai-sdk/anthropic") {
806
- output.headers["anthropic-beta"] = "interleaved-thinking-2025-05-14";
807
- }
808
-
809
- // Mark subagent sessions as agent-initiated (matching standard Copilot tools)
810
- try {
811
- const session = await sdk.session
812
- .get({
813
- path: {
814
- id: input.sessionID,
815
- },
816
- throwOnError: true,
817
- })
818
- .catch(() => undefined);
819
- if (session?.data?.parentID) {
820
- output.headers["x-initiator"] = "agent";
821
- }
822
- } catch {
823
- // Ignore errors from session lookup
824
- }
825
- },
826
- };
412
+ // Initialize logger with the SDK client
413
+ setLogger(sdk);
414
+
415
+ return {
416
+ auth: {
417
+ provider: "github-copilot",
418
+ loader: async (getAuth, provider) => {
419
+ const info = await getAuth();
420
+ if (!info || info.type !== "oauth") return {};
421
+
422
+ // Enterprise URL support for baseURL
423
+ const enterpriseUrl = (info as any).enterpriseUrl;
424
+ const baseURL = enterpriseUrl
425
+ ? `https://copilot-api.${normalizeDomain(enterpriseUrl)}`
426
+ : undefined;
427
+
428
+ if (provider && provider.models) {
429
+ for (const [_modelId, model] of Object.entries(provider.models)) {
430
+ model.cost = {
431
+ input: 0,
432
+ output: 0,
433
+ cache: {
434
+ read: 0,
435
+ write: 0,
436
+ },
437
+ };
438
+
439
+ // All models use the standard github-copilot SDK
440
+ // Reasoning support for Claude models is handled via:
441
+ // 1. The fetch wrapper adds thinking_budget to request body
442
+ // 2. The fetch wrapper strips invalid thinking blocks from messages
443
+ model.api.npm = "@ai-sdk/github-copilot";
444
+ }
445
+ }
446
+
447
+ return {
448
+ baseURL,
449
+ apiKey: "",
450
+ async fetch(input, init) {
451
+ const info = await getAuth();
452
+ if (info.type !== "oauth") return fetch(input, init);
453
+
454
+ let isAgentCall = false;
455
+ let isVisionRequest = false;
456
+ let modifiedBody: any;
457
+ let isClaudeModel = false;
458
+
459
+ try {
460
+ const body =
461
+ typeof init?.body === "string"
462
+ ? JSON.parse(init.body)
463
+ : init?.body;
464
+
465
+ const url = input.toString();
466
+
467
+ // Check if this is a Claude model request
468
+ const modelId = body?.model || "";
469
+ isClaudeModel = modelId.toLowerCase().includes("claude");
470
+
471
+ // Completions API
472
+ if (body?.messages && url.includes("completions")) {
473
+ // Keep local logic: detect if any message is assistant/tool
474
+ isAgentCall = body.messages.some((msg: any) =>
475
+ ["tool", "assistant"].includes(msg.role),
476
+ );
477
+ isVisionRequest = body.messages.some(
478
+ (msg: any) =>
479
+ Array.isArray(msg.content) &&
480
+ msg.content.some((part: any) => part.type === "image_url"),
481
+ );
482
+
483
+ // For Claude models, add thinking_budget to enable reasoning
484
+ // The Copilot API accepts this parameter and returns reasoning_text/reasoning_opaque
485
+ if (isClaudeModel) {
486
+ // Use configured thinking_budget from model options, or default to 10000
487
+ const thinkingBudget = body.thinking_budget || 10000;
488
+
489
+ // Fix for "Invalid signature in thinking block" error:
490
+ // The Copilot API uses reasoning_text/reasoning_opaque format for thinking
491
+ // When these are passed back without proper signature, it causes errors
492
+ // Solution: Ensure reasoning_opaque is present when reasoning_text exists,
493
+ // or remove reasoning content entirely if signature is invalid/missing
494
+ const cleanedMessages = body.messages.map(
495
+ (msg: any, idx: number) => {
496
+ if (msg.role !== "assistant") return msg;
497
+
498
+ // Log message structure for debugging
499
+ log("debug", `Processing assistant message ${idx}`, {
500
+ has_reasoning_text: !!msg.reasoning_text,
501
+ has_reasoning_opaque: !!msg.reasoning_opaque,
502
+ content_type: typeof msg.content,
503
+ content_is_array: Array.isArray(msg.content),
504
+ });
505
+
506
+ // If message has reasoning_text but no/invalid reasoning_opaque, remove reasoning
507
+ if (msg.reasoning_text && !msg.reasoning_opaque) {
508
+ log(
509
+ "warn",
510
+ `Removing reasoning_text without reasoning_opaque from message ${idx}`,
511
+ );
512
+ const { reasoning_text: _unused, ...cleanedMsg } = msg;
513
+ return cleanedMsg;
514
+ }
515
+
516
+ // If content is an array, check for thinking blocks
517
+ if (Array.isArray(msg.content)) {
518
+ const hasThinkingBlock = msg.content.some(
519
+ (part: any) => part.type === "thinking",
520
+ );
521
+ if (hasThinkingBlock) {
522
+ log(
523
+ "debug",
524
+ `Message ${idx} has thinking blocks in content array`,
525
+ );
526
+ // Filter out thinking blocks without signatures
527
+ const cleanedContent = msg.content.filter(
528
+ (part: any) => {
529
+ if (part.type === "thinking") {
530
+ if (!part.signature) {
531
+ log(
532
+ "warn",
533
+ `Removing thinking block without signature`,
534
+ );
535
+ return false;
536
+ }
537
+ }
538
+ return true;
539
+ },
540
+ );
541
+ return {
542
+ ...msg,
543
+ content:
544
+ cleanedContent.length > 0 ? cleanedContent : null,
545
+ };
546
+ }
547
+ }
548
+
549
+ return msg;
550
+ },
551
+ );
552
+
553
+ modifiedBody = {
554
+ ...body,
555
+ messages: cleanedMessages,
556
+ thinking_budget: thinkingBudget,
557
+ };
558
+ log("info", `Adding thinking_budget for Claude model`, {
559
+ model: modelId,
560
+ thinking_budget: thinkingBudget,
561
+ });
562
+ }
563
+
564
+ // For GPT models (o1, gpt-5, etc.), add reasoning parameter
565
+ const isGptModel =
566
+ modelId.toLowerCase().includes("gpt") ||
567
+ modelId.toLowerCase().includes("o1") ||
568
+ modelId.toLowerCase().includes("o3") ||
569
+ modelId.toLowerCase().includes("o4");
570
+
571
+ if (isGptModel && !isClaudeModel) {
572
+ // Get reasoning effort from body options or default to "medium"
573
+ const reasoningEffort =
574
+ body.reasoning?.effort ||
575
+ body.reasoningEffort ||
576
+ body.reasoning_effort ||
577
+ "medium";
578
+
579
+ modifiedBody = {
580
+ ...(modifiedBody || body),
581
+ reasoning: {
582
+ effort: reasoningEffort,
583
+ },
584
+ };
585
+
586
+ // Also pass through other reasoning options if present
587
+ if (body.reasoningSummary || body.reasoning?.summary) {
588
+ modifiedBody.reasoning.summary =
589
+ body.reasoningSummary || body.reasoning?.summary;
590
+ }
591
+
592
+ log("info", `Adding reasoning for GPT model`, {
593
+ model: modelId,
594
+ reasoning_effort: reasoningEffort,
595
+ });
596
+ }
597
+ }
598
+
599
+ // Responses API
600
+ if (body?.input) {
601
+ // Sanitize long IDs from Copilot backend (can be 400+ chars)
602
+ // OpenAI Responses API enforces a 64-char max on item IDs
603
+ const sanitizedInput = sanitizeResponseInputIds(body.input);
604
+ const inputWasSanitized =
605
+ sanitizedInput !== body.input &&
606
+ JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
607
+
608
+ if (inputWasSanitized) {
609
+ log("info", "Sanitized long IDs in Responses API input", {
610
+ original_count: body.input.filter(
611
+ (item: any) =>
612
+ (typeof item?.id === "string" &&
613
+ item.id.length > MAX_RESPONSE_API_ID_LENGTH) ||
614
+ (typeof item?.call_id === "string" &&
615
+ item.call_id.length > MAX_RESPONSE_API_ID_LENGTH),
616
+ ).length,
617
+ });
618
+ modifiedBody = {
619
+ ...(modifiedBody || body),
620
+ input: sanitizedInput,
621
+ };
622
+ }
623
+
624
+ isAgentCall = (sanitizedInput || body.input).some(
625
+ (item: any) =>
626
+ item?.role === "assistant" ||
627
+ (item?.type &&
628
+ RESPONSES_API_ALTERNATE_INPUT_TYPES.includes(item.type)),
629
+ );
630
+
631
+ isVisionRequest = body.input.some(
632
+ (item: any) =>
633
+ Array.isArray(item?.content) &&
634
+ item.content.some(
635
+ (part: any) => part.type === "input_image",
636
+ ),
637
+ );
638
+ }
639
+
640
+ // Messages API (Anthropic style)
641
+ if (body?.messages && !url.includes("completions")) {
642
+ isAgentCall = body.messages.some((msg: any) =>
643
+ ["tool", "assistant"].includes(msg.role),
644
+ );
645
+ isVisionRequest = body.messages.some(
646
+ (item: any) =>
647
+ Array.isArray(item?.content) &&
648
+ item.content.some(
649
+ (part: any) =>
650
+ part?.type === "image" ||
651
+ (part?.type === "tool_result" &&
652
+ Array.isArray(part?.content) &&
653
+ part.content.some(
654
+ (nested: any) => nested?.type === "image",
655
+ )),
656
+ ),
657
+ );
658
+ }
659
+ } catch {}
660
+
661
+ const headers: Record<string, string> = {
662
+ "x-initiator": isAgentCall ? "agent" : "user",
663
+ ...(init?.headers as Record<string, string>),
664
+ ...HEADERS,
665
+ Authorization: `Bearer ${info.refresh}`,
666
+ "Openai-Intent": "conversation-edits",
667
+ };
668
+
669
+ if (isVisionRequest) {
670
+ headers["Copilot-Vision-Request"] = "true";
671
+ }
672
+
673
+ // Official only deletes lowercase "authorization"
674
+ delete headers["x-api-key"];
675
+ delete headers["authorization"];
676
+
677
+ // Prepare the final init object with potentially modified body
678
+ const finalInit = {
679
+ ...init,
680
+ headers,
681
+ ...(modifiedBody ? { body: JSON.stringify(modifiedBody) } : {}),
682
+ };
683
+
684
+ // Extract model from request body for rate limit tracking
685
+ let currentModel = "";
686
+ try {
687
+ const bodyObj =
688
+ typeof finalInit.body === "string"
689
+ ? JSON.parse(finalInit.body)
690
+ : finalInit.body;
691
+ currentModel = bodyObj?.model || "";
692
+ } catch {}
693
+
694
+ // Pre-flight: if current model is already known rate-limited, switch to fallback
695
+ let activeFinalInit: RequestInit = finalInit;
696
+ const attemptedModels = new Set<string>();
697
+ if (currentModel) attemptedModels.add(currentModel);
698
+ const requestedModel = currentModel;
699
+ if (currentModel) {
700
+ const circuitRemainingMs =
701
+ getFamilyCircuitRemainingMs(currentModel);
702
+ if (circuitRemainingMs > 0) {
703
+ if (
704
+ circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
705
+ ) {
706
+ log(
707
+ "info",
708
+ `Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
709
+ );
710
+ await sleep(circuitRemainingMs);
711
+ } else {
712
+ throw new Error(
713
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
714
+ );
715
+ }
716
+ }
717
+ }
718
+ if (currentModel && isModelRateLimited(currentModel)) {
719
+ const fallback = getNextFallbackModel(
720
+ currentModel,
721
+ attemptedModels,
722
+ );
723
+ if (fallback) {
724
+ log(
725
+ "info",
726
+ `Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
727
+ );
728
+ activeFinalInit =
729
+ swapModelInBody(finalInit, fallback) || finalInit;
730
+ currentModel = fallback;
731
+ attemptedModels.add(fallback);
732
+ } else {
733
+ const familyCooldownMs =
734
+ getFamilyMaxCooldownRemainingMs(currentModel);
735
+ openFamilyCircuitBreaker(currentModel, familyCooldownMs);
736
+ if (
737
+ familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
738
+ ) {
739
+ log(
740
+ "info",
741
+ `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
742
+ );
743
+ await sleep(familyCooldownMs);
744
+ attemptedModels.clear();
745
+ if (currentModel) attemptedModels.add(currentModel);
746
+ } else {
747
+ throw new Error(
748
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
749
+ );
750
+ }
751
+ }
752
+ }
753
+
754
+ // Retry logic with model fallback and exponential backoff for rate limiting
755
+ let lastError: Error | undefined;
756
+ let fallbacksUsed = 0;
757
+ let attempt = 0;
758
+ let recoveryCyclesUsed = 0;
759
+
760
+ while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
761
+ try {
762
+ if (currentModel) {
763
+ await shapeRequestForModel(currentModel);
764
+ }
765
+ const response = await fetch(input, activeFinalInit);
766
+
767
+ if (response.status === 429) {
768
+ try {
769
+ await response.body?.cancel();
770
+ } catch {}
771
+
772
+ // Parse Retry-After header for server-suggested cooldown
773
+ const retryAfterMs = parseRetryAfter(response);
774
+ const cooldownMs =
775
+ retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
776
+
777
+ // Mark this model as rate-limited
778
+ if (currentModel) {
779
+ markModelRateLimited(currentModel, cooldownMs);
780
+ }
781
+
782
+ // Try fallback model (doesn't count against retry budget)
783
+ if (
784
+ currentModel &&
785
+ fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
786
+ ) {
787
+ const fallback = getNextFallbackModel(
788
+ currentModel,
789
+ attemptedModels,
790
+ );
791
+ if (fallback) {
792
+ log(
793
+ "warn",
794
+ `Rate limited on ${currentModel}, switching to ${fallback}`,
795
+ {
796
+ retry_after_ms: retryAfterMs,
797
+ cooldown_ms: cooldownMs,
798
+ fallbacks_used: fallbacksUsed + 1,
799
+ },
800
+ );
801
+ activeFinalInit =
802
+ swapModelInBody(activeFinalInit, fallback) ||
803
+ activeFinalInit;
804
+ currentModel = fallback;
805
+ attemptedModels.add(fallback);
806
+ fallbacksUsed++;
807
+ continue; // Retry immediately with new model, no delay
808
+ }
809
+ }
810
+
811
+ // No fallback available — use exponential backoff on same model
812
+ if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
813
+ if (
814
+ currentModel &&
815
+ isEntireModelFamilyCoolingDown(currentModel)
816
+ ) {
817
+ const familyCooldownMs =
818
+ getFamilyMaxCooldownRemainingMs(currentModel);
819
+ openFamilyCircuitBreaker(currentModel, familyCooldownMs);
820
+ if (
821
+ familyCooldownMs <=
822
+ CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
823
+ ) {
824
+ log(
825
+ "info",
826
+ `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
827
+ );
828
+ await sleep(familyCooldownMs);
829
+ attemptedModels.clear();
830
+ if (currentModel) attemptedModels.add(currentModel);
831
+ attempt++;
832
+ continue;
833
+ }
834
+ throw new Error(
835
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
836
+ );
837
+ }
838
+
839
+ const modelCooldownMs = currentModel
840
+ ? getRateLimitRemainingMs(currentModel)
841
+ : null;
842
+ const delay = Math.min(
843
+ modelCooldownMs ??
844
+ retryAfterMs ??
845
+ calculateRetryDelay(attempt),
846
+ RATE_LIMIT_CONFIG.maxDelayMs,
847
+ );
848
+ log(
849
+ "warn",
850
+ `Rate limited (429), no fallback available, waiting ${delay}ms`,
851
+ {
852
+ delay_ms: delay,
853
+ attempt: attempt + 1,
854
+ max_retries: RATE_LIMIT_CONFIG.maxRetries,
855
+ fallbacks_exhausted: true,
856
+ },
857
+ );
858
+ await sleep(delay);
859
+ attemptedModels.clear();
860
+ if (currentModel) attemptedModels.add(currentModel);
861
+ attempt++;
862
+ continue;
863
+ }
864
+
865
+ // Exhausted retries and fallbacks
866
+ if (currentModel) {
867
+ const familyCooldownMs =
868
+ getFamilyMaxCooldownRemainingMs(currentModel);
869
+ const recoveryDelayMs =
870
+ familyCooldownMs > 0
871
+ ? Math.min(
872
+ familyCooldownMs,
873
+ CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
874
+ )
875
+ : calculateRetryDelay(0);
876
+ if (
877
+ recoveryDelayMs > 0 &&
878
+ recoveryCyclesUsed <
879
+ CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
880
+ ) {
881
+ recoveryCyclesUsed++;
882
+ log(
883
+ "info",
884
+ `Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
885
+ );
886
+ await sleep(recoveryDelayMs);
887
+ attempt = 0;
888
+ fallbacksUsed = 0;
889
+ if (requestedModel) {
890
+ currentModel = requestedModel;
891
+ activeFinalInit =
892
+ swapModelInBody(finalInit, requestedModel) ||
893
+ finalInit;
894
+ }
895
+ attemptedModels.clear();
896
+ if (currentModel) attemptedModels.add(currentModel);
897
+ continue;
898
+ }
899
+ }
900
+ throw new Error(
901
+ `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
902
+ );
903
+ }
904
+
905
+ // Response transformation is handled by the custom SDK at
906
+ // .opencode/plugin/sdk/copilot/
907
+ return response;
908
+ } catch (error) {
909
+ lastError = error as Error;
910
+
911
+ if (
912
+ lastError.message.includes(
913
+ "All fallback models cooling down",
914
+ ) ||
915
+ lastError.message.includes("Local request queue saturated")
916
+ ) {
917
+ throw lastError;
918
+ }
919
+
920
+ // Network errors might be transient, retry
921
+ if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
922
+ const delay = calculateRetryDelay(attempt);
923
+ log("warn", `Request failed, retrying`, {
924
+ delay_ms: delay,
925
+ attempt: attempt + 1,
926
+ max_retries: RATE_LIMIT_CONFIG.maxRetries,
927
+ error: lastError.message,
928
+ });
929
+ await sleep(delay);
930
+ attempt++;
931
+ continue;
932
+ }
933
+ throw error;
934
+ }
935
+ }
936
+
937
+ // Exhausted all retries
938
+ if (lastError) {
939
+ throw new Error(
940
+ `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
941
+ );
942
+ }
943
+ throw new Error(
944
+ `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
945
+ );
946
+ },
947
+ };
948
+ },
949
+ methods: [
950
+ {
951
+ type: "oauth",
952
+ label: "Login with GitHub Copilot",
953
+ prompts: [
954
+ {
955
+ type: "select",
956
+ key: "deploymentType",
957
+ message: "Select GitHub deployment type",
958
+ options: [
959
+ {
960
+ label: "GitHub.com",
961
+ value: "github.com",
962
+ hint: "Public",
963
+ },
964
+ {
965
+ label: "GitHub Enterprise",
966
+ value: "enterprise",
967
+ hint: "Data residency or self-hosted",
968
+ },
969
+ ],
970
+ },
971
+ {
972
+ type: "text",
973
+ key: "enterpriseUrl",
974
+ message: "Enter your GitHub Enterprise URL or domain",
975
+ placeholder: "company.ghe.com or https://company.ghe.com",
976
+ condition: (inputs: any) =>
977
+ inputs.deploymentType === "enterprise",
978
+ validate: (value: string) => {
979
+ if (!value) return "URL or domain is required";
980
+ try {
981
+ const url = value.includes("://")
982
+ ? new URL(value)
983
+ : new URL(`https://${value}`);
984
+ if (!url.hostname)
985
+ return "Please enter a valid URL or domain";
986
+ return undefined;
987
+ } catch {
988
+ return "Please enter a valid URL (e.g., company.ghe.com or https://company.ghe.com)";
989
+ }
990
+ },
991
+ },
992
+ ],
993
+ async authorize(inputs: any = {}) {
994
+ const deploymentType = inputs.deploymentType || "github.com";
995
+
996
+ let domain = "github.com";
997
+ let actualProvider = "github-copilot";
998
+
999
+ if (deploymentType === "enterprise") {
1000
+ const enterpriseUrl = inputs.enterpriseUrl;
1001
+ domain = normalizeDomain(enterpriseUrl);
1002
+ actualProvider = "github-copilot-enterprise";
1003
+ }
1004
+
1005
+ const urls = getUrls(domain);
1006
+
1007
+ const deviceResponse = await fetch(urls.DEVICE_CODE_URL, {
1008
+ method: "POST",
1009
+ headers: {
1010
+ Accept: "application/json",
1011
+ "Content-Type": "application/json",
1012
+ "User-Agent": "GitHubCopilotChat/0.35.0",
1013
+ },
1014
+ body: JSON.stringify({
1015
+ client_id: CLIENT_ID,
1016
+ scope: "read:user",
1017
+ }),
1018
+ });
1019
+
1020
+ if (!deviceResponse.ok) {
1021
+ throw new Error("Failed to initiate device authorization");
1022
+ }
1023
+
1024
+ const deviceData = await deviceResponse.json();
1025
+
1026
+ return {
1027
+ url: deviceData.verification_uri,
1028
+ instructions: `Enter code: ${deviceData.user_code}`,
1029
+ method: "auto",
1030
+ callback: async () => {
1031
+ while (true) {
1032
+ const response = await fetch(urls.ACCESS_TOKEN_URL, {
1033
+ method: "POST",
1034
+ headers: {
1035
+ Accept: "application/json",
1036
+ "Content-Type": "application/json",
1037
+ "User-Agent": "GitHubCopilotChat/0.35.0",
1038
+ },
1039
+ body: JSON.stringify({
1040
+ client_id: CLIENT_ID,
1041
+ device_code: deviceData.device_code,
1042
+ grant_type:
1043
+ "urn:ietf:params:oauth:grant-type:device_code",
1044
+ }),
1045
+ });
1046
+
1047
+ if (!response.ok) return { type: "failed" };
1048
+
1049
+ const data = await response.json();
1050
+
1051
+ if (data.access_token) {
1052
+ const result: {
1053
+ type: "success";
1054
+ refresh: string;
1055
+ access: string;
1056
+ expires: number;
1057
+ provider?: string;
1058
+ enterpriseUrl?: string;
1059
+ } = {
1060
+ type: "success",
1061
+ refresh: data.access_token,
1062
+ access: data.access_token,
1063
+ expires: 0,
1064
+ };
1065
+
1066
+ if (actualProvider === "github-copilot-enterprise") {
1067
+ result.provider = "github-copilot-enterprise";
1068
+ result.enterpriseUrl = domain;
1069
+ }
1070
+
1071
+ return result;
1072
+ }
1073
+
1074
+ if (data.error === "authorization_pending") {
1075
+ await sleep(
1076
+ deviceData.interval * 1000 +
1077
+ OAUTH_POLLING_SAFETY_MARGIN_MS,
1078
+ );
1079
+ continue;
1080
+ }
1081
+
1082
+ if (data.error === "slow_down") {
1083
+ // Based on the RFC spec, we must add 5 seconds to our current polling interval.
1084
+ let newInterval = (deviceData.interval + 5) * 1000;
1085
+
1086
+ if (
1087
+ data.interval &&
1088
+ typeof data.interval === "number" &&
1089
+ data.interval > 0
1090
+ ) {
1091
+ newInterval = data.interval * 1000;
1092
+ }
1093
+
1094
+ await sleep(newInterval + OAUTH_POLLING_SAFETY_MARGIN_MS);
1095
+ continue;
1096
+ }
1097
+
1098
+ if (data.error) return { type: "failed" };
1099
+
1100
+ await sleep(
1101
+ deviceData.interval * 1000 + OAUTH_POLLING_SAFETY_MARGIN_MS,
1102
+ );
1103
+ }
1104
+ },
1105
+ };
1106
+ },
1107
+ },
1108
+ ],
1109
+ },
1110
+ // Hook to add custom headers for Claude reasoning support
1111
+ "chat.headers": async (input: any, output: any) => {
1112
+ // Only apply to GitHub Copilot provider
1113
+ if (!input.model?.providerID?.includes("github-copilot")) return;
1114
+
1115
+ // Add Anthropic beta header for interleaved thinking (extended reasoning)
1116
+ // This is required for Claude models to return thinking blocks
1117
+ if (input.model?.api?.npm === "@ai-sdk/anthropic") {
1118
+ output.headers["anthropic-beta"] = "interleaved-thinking-2025-05-14";
1119
+ }
1120
+
1121
+ // Mark subagent sessions as agent-initiated (matching standard Copilot tools)
1122
+ try {
1123
+ const session = await sdk.session
1124
+ .get({
1125
+ path: {
1126
+ id: input.sessionID,
1127
+ },
1128
+ throwOnError: true,
1129
+ })
1130
+ .catch(() => undefined);
1131
+ if (session?.data?.parentID) {
1132
+ output.headers["x-initiator"] = "agent";
1133
+ }
1134
+ } catch {
1135
+ // Ignore errors from session lookup
1136
+ }
1137
+ },
1138
+ };
827
1139
  };