npm - opencodekit - Versions diffs - 0.18.15 → 0.18.17 - Mend

opencodekit 0.18.15 → 0.18.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.js +1 -1
package/dist/template/.opencode/memory.db +0 -0
package/dist/template/.opencode/memory.db-shm +0 -0
package/dist/template/.opencode/memory.db-wal +0 -0
package/dist/template/.opencode/plugin/copilot-auth.ts +1029 -717
package/package.json +1 -1

package/dist/template/.opencode/plugin/copilot-auth.ts CHANGED Viewed

@@ -17,25 +17,25 @@ const CLIENT_ID = "Ov23li8tweQw6odWQebz";
 // Logger function that will be set by the plugin
 let log: (
-  level: "debug" | "info" | "warn" | "error",
-  message: string,
-  extra?: Record<string, any>,
+	level: "debug" | "info" | "warn" | "error",
+	message: string,
+	extra?: Record<string, any>,
 ) => void = () => {};
 /**
  * Set the logger function from the plugin context
  */
 function setLogger(client: any) {
-  log = (level, message, extra) => {
-    client.app
-      .log({
-        service: "copilot-auth",
-        level,
-        message,
-        extra,
-      })
-      .catch(() => {}); // Fire and forget, don't block on logging
-  };
+	log = (level, message, extra) => {
+		client.app
+			.log({
+				service: "copilot-auth",
+				level,
+				message,
+				extra,
+			})
+			.catch(() => {}); // Fire and forget, don't block on logging
+	};
 }
 // Add a small safety buffer when polling to avoid hitting the server
@@ -43,64 +43,101 @@ function setLogger(client: any) {
 const OAUTH_POLLING_SAFETY_MARGIN_MS = 3000; // 3 seconds
 const HEADERS = {
-  "User-Agent": "GitHubCopilotChat/0.35.0",
-  "Editor-Version": "vscode/1.107.0",
-  "Editor-Plugin-Version": "copilot-chat/0.35.0",
-  "Copilot-Integration-Id": "vscode-chat",
+	"User-Agent": "GitHubCopilotChat/0.35.0",
+	"Editor-Version": "vscode/1.107.0",
+	"Editor-Plugin-Version": "copilot-chat/0.35.0",
+	"Copilot-Integration-Id": "vscode-chat",
 };
 const RESPONSES_API_ALTERNATE_INPUT_TYPES = [
-  "file_search_call",
-  "computer_call",
-  "computer_call_output",
-  "web_search_call",
-  "function_call",
-  "function_call_output",
-  "image_generation_call",
-  "code_interpreter_call",
-  "local_shell_call",
-  "local_shell_call_output",
-  "mcp_list_tools",
-  "mcp_approval_request",
-  "mcp_approval_response",
-  "mcp_call",
-  "reasoning",
+	"file_search_call",
+	"computer_call",
+	"computer_call_output",
+	"web_search_call",
+	"function_call",
+	"function_call_output",
+	"image_generation_call",
+	"code_interpreter_call",
+	"local_shell_call",
+	"local_shell_call_output",
+	"mcp_list_tools",
+	"mcp_approval_request",
+	"mcp_approval_response",
+	"mcp_call",
+	"reasoning",
 ];
 function normalizeDomain(url: string): string {
-  return url.replace(/^https?:\/\//, "").replace(/\/$/, "");
+	return url.replace(/^https?:\/\//, "").replace(/\/$/, "");
 }
 function getUrls(domain: string) {
-  return {
-    DEVICE_CODE_URL: `https://${domain}/login/device/code`,
-    ACCESS_TOKEN_URL: `https://${domain}/login/oauth/access_token`,
-  };
+	return {
+		DEVICE_CODE_URL: `https://${domain}/login/device/code`,
+		ACCESS_TOKEN_URL: `https://${domain}/login/oauth/access_token`,
+	};
 }
 const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
 // Rate limit handling configuration
 const RATE_LIMIT_CONFIG = {
-  maxRetries: 3,
-  baseDelayMs: 2000, // Start with 2 seconds
-  maxDelayMs: 60000, // Cap at 60 seconds
-  defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
-  maxFallbacks: 4, // Max model fallback switches per request
+	maxRetries: 3,
+	baseDelayMs: 2000, // Start with 2 seconds
+	maxDelayMs: 60000, // Cap at 60 seconds
+	defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
+	maxFallbacks: 4, // Max model fallback switches per request
+};
+// Local request shaping to smooth bursts before they hit Copilot limits
+const REQUEST_SHAPING_CONFIG = {
+	tokensPerSecond: 1,
+	burstCapacity: 2,
+	maxQueueDelayMs: 15000,
+};
+const CIRCUIT_BREAKER_CONFIG = {
+	maxInlineWaitMs: 30000,
+	maxRecoveryCycles: 3,
 };
 // Per-model rate limit state (in-memory, resets on restart)
 interface RateLimitEntry {
-  rateLimitedUntil: number; // Unix timestamp (ms)
+	rateLimitedUntil: number; // Unix timestamp (ms)
 }
 const rateLimitState = new Map<string, RateLimitEntry>();
+const familyCircuitBreakerState = new Map<string, number>();
+interface TokenBucketState {
+	tokens: number;
+	lastRefillAt: number;
+}
+const modelTokenBuckets = new Map<string, TokenBucketState>();
+const modelQueueTail = new Map<string, Promise<void>>();
 // Model fallback chains: same-family alternatives when a model is rate-limited
 const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
-  // Claude family
-  "claude-opus-4.6": ["claude-opus-4.5", "claude-sonnet-4.6", "gpt-5.3-codex"],
-  "claude-opus-4.5": ["claude-sonnet-4.6", "gpt-5.3-codex"],
-  "claude-sonnet-4.6": ["gpt-5.3-codex"],
+	// Claude family
+	"claude-opus-4.6": [
+		"claude-opus-4.5",
+		"claude-sonnet-4.6",
+		"claude-sonnet-4.5",
+	],
+	"claude-opus-4.5": [
+		"claude-opus-4.6",
+		"claude-sonnet-4.5",
+		"claude-sonnet-4.6",
+	],
+	"claude-sonnet-4.6": [
+		"claude-sonnet-4.5",
+		"claude-opus-4.6",
+		"claude-opus-4.5",
+	],
+	"claude-sonnet-4.5": [
+		"claude-sonnet-4.6",
+		"claude-opus-4.5",
+		"claude-opus-4.6",
+	],
 };
 /**
@@ -108,65 +145,208 @@ const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
  * Returns cooldown in milliseconds, or null if header is missing/unparseable.
  */
 function parseRetryAfter(response: Response): number | null {
-  const header = response.headers.get("retry-after");
-  if (!header) return null;
-  // Try as seconds first (most common)
-  const seconds = parseInt(header, 10);
-  if (!isNaN(seconds) && seconds > 0) return seconds * 1000;
-  // Try as HTTP date
-  const date = Date.parse(header);
-  if (!isNaN(date)) return Math.max(0, date - Date.now());
-  return null;
+	const header = response.headers.get("retry-after");
+	if (!header) return null;
+	// Try as seconds first (most common)
+	const seconds = parseInt(header, 10);
+	if (!isNaN(seconds) && seconds > 0) return seconds * 1000;
+	// Try as HTTP date
+	const date = Date.parse(header);
+	if (!isNaN(date)) return Math.max(0, date - Date.now());
+	return null;
 }
 function isModelRateLimited(model: string): boolean {
-  const entry = rateLimitState.get(model);
-  if (!entry) return false;
-  if (Date.now() >= entry.rateLimitedUntil) {
-    rateLimitState.delete(model);
-    return false;
-  }
-  return true;
+	const entry = rateLimitState.get(model);
+	if (!entry) return false;
+	if (Date.now() >= entry.rateLimitedUntil) {
+		rateLimitState.delete(model);
+		return false;
+	}
+	return true;
+}
+function getRateLimitRemainingMs(model: string): number | null {
+	const entry = rateLimitState.get(model);
+	if (!entry) return null;
+	const remaining = entry.rateLimitedUntil - Date.now();
+	if (remaining <= 0) {
+		rateLimitState.delete(model);
+		return null;
+	}
+	return remaining;
+}
+function getModelFamily(model: string): string[] {
+	const family = new Set<string>([
+		model,
+		...(MODEL_FALLBACK_CHAINS[model] || []),
+	]);
+	return [...family];
+}
+function getFamilyCircuitKey(model: string): string {
+	return getModelFamily(model).sort().join("|");
+}
+function getFamilyCircuitRemainingMs(model: string): number {
+	const key = getFamilyCircuitKey(model);
+	const until = familyCircuitBreakerState.get(key);
+	if (!until) return 0;
+	const remaining = until - Date.now();
+	if (remaining <= 0) {
+		familyCircuitBreakerState.delete(key);
+		return 0;
+	}
+	return remaining;
+}
+function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
+	const key = getFamilyCircuitKey(model);
+	familyCircuitBreakerState.set(
+		key,
+		Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
+	);
+}
+function getFamilyMaxCooldownRemainingMs(model: string): number {
+	let maxRemaining = 0;
+	for (const candidate of getModelFamily(model)) {
+		const remaining = getRateLimitRemainingMs(candidate) ?? 0;
+		if (remaining > maxRemaining) maxRemaining = remaining;
+	}
+	return maxRemaining;
+}
+function isEntireModelFamilyCoolingDown(model: string): boolean {
+	const family = getModelFamily(model);
+	return (
+		family.length > 0 &&
+		family.every((candidate) => isModelRateLimited(candidate))
+	);
+}
+function formatRetryAfter(seconds: number): string {
+	if (seconds < 60) return `${seconds}s`;
+	const mins = Math.floor(seconds / 60);
+	const secs = seconds % 60;
+	return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
+}
+async function shapeRequestForModel(model: string): Promise<void> {
+	if (!model) return;
+	const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
+	let releaseQueue: (() => void) | undefined;
+	const currentGate = new Promise<void>((resolve) => {
+		releaseQueue = resolve;
+	});
+	const currentTail = previousTail.then(() => currentGate);
+	modelQueueTail.set(model, currentTail);
+	let queueTimeout: ReturnType<typeof setTimeout> | undefined;
+	try {
+		await Promise.race([
+			previousTail,
+			new Promise<void>((_, reject) => {
+				queueTimeout = setTimeout(() => {
+					reject(
+						new Error(
+							`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
+						),
+					);
+				}, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
+			}),
+		]);
+		const now = Date.now();
+		const bucket = modelTokenBuckets.get(model) ?? {
+			tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
+			lastRefillAt: now,
+		};
+		const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
+		const refillTokens =
+			(elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
+		bucket.tokens = Math.min(
+			REQUEST_SHAPING_CONFIG.burstCapacity,
+			bucket.tokens + refillTokens,
+		);
+		bucket.lastRefillAt = now;
+		if (bucket.tokens < 1) {
+			const deficit = 1 - bucket.tokens;
+			const waitMs = Math.ceil(
+				(deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
+			);
+			if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
+				throw new Error(
+					`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
+				);
+			}
+			log("info", `Local request shaping wait for ${model}`, {
+				wait_ms: waitMs,
+			});
+			await sleep(waitMs);
+			bucket.tokens = 0;
+			bucket.lastRefillAt = Date.now();
+		} else {
+			bucket.tokens -= 1;
+		}
+		modelTokenBuckets.set(model, bucket);
+	} finally {
+		if (queueTimeout) clearTimeout(queueTimeout);
+		releaseQueue?.();
+		if (modelQueueTail.get(model) === currentTail) {
+			modelQueueTail.delete(model);
+		}
+	}
 }
 function markModelRateLimited(model: string, cooldownMs: number): void {
-  rateLimitState.set(model, {
-    rateLimitedUntil: Date.now() + cooldownMs,
-  });
-  log(
-    "info",
-    `Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
-  );
+	rateLimitState.set(model, {
+		rateLimitedUntil: Date.now() + cooldownMs,
+	});
+	log(
+		"info",
+		`Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
+	);
 }
 /**
  * Find the next available fallback model in the same family.
  * Skips models that are themselves rate-limited.
  */
-function getNextFallbackModel(model: string): string | null {
-  const chain = MODEL_FALLBACK_CHAINS[model];
-  if (!chain) return null;
-  for (const fallback of chain) {
-    if (!isModelRateLimited(fallback)) return fallback;
-  }
-  return null;
+function getNextFallbackModel(
+	model: string,
+	attemptedModels: Set<string>,
+): string | null {
+	const chain = MODEL_FALLBACK_CHAINS[model];
+	if (!chain) return null;
+	for (const fallback of chain) {
+		if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
+			return fallback;
+		}
+	}
+	return null;
 }
 /**
  * Swap the model field in a fetch RequestInit body.
  */
 function swapModelInBody(
-  init: RequestInit | undefined,
-  newModel: string,
+	init: RequestInit | undefined,
+	newModel: string,
 ): RequestInit | undefined {
-  if (!init?.body || typeof init.body !== "string") return init;
-  try {
-    const body = JSON.parse(init.body);
-    body.model = newModel;
-    return { ...init, body: JSON.stringify(body) };
-  } catch {
-    return init;
-  }
+	if (!init?.body || typeof init.body !== "string") return init;
+	try {
+		const body = JSON.parse(init.body);
+		body.model = newModel;
+		return { ...init, body: JSON.stringify(body) };
+	} catch {
+		return init;
+	}
 }
 // Maximum length for item IDs in the OpenAI Responses API
@@ -178,17 +358,17 @@ const MAX_RESPONSE_API_ID_LENGTH = 64;
  * See: https://github.com/vercel/ai/issues/5171
  */
 function sanitizeResponseId(id: string): string {
-  if (!id || id.length <= MAX_RESPONSE_API_ID_LENGTH) return id;
-  // Use a simple hash: take first 8 chars + hash of full string for uniqueness
-  // Format: "h_" + first 8 chars + "_" + base36 hash (up to ~50 chars total)
-  let hash = 0;
-  for (let i = 0; i < id.length; i++) {
-    hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
-  }
-  const hashStr = Math.abs(hash).toString(36);
-  const prefix = id.slice(0, 8);
-  // Ensure total length <= 64: "h_" (2) + prefix (8) + "_" (1) + hash
-  return `h_${prefix}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
+	if (!id || id.length <= MAX_RESPONSE_API_ID_LENGTH) return id;
+	// Use a simple hash: take first 8 chars + hash of full string for uniqueness
+	// Format: "h_" + first 8 chars + "_" + base36 hash (up to ~50 chars total)
+	let hash = 0;
+	for (let i = 0; i < id.length; i++) {
+		hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
+	}
+	const hashStr = Math.abs(hash).toString(36);
+	const prefix = id.slice(0, 8);
+	// Ensure total length <= 64: "h_" (2) + prefix (8) + "_" (1) + hash
+	return `h_${prefix}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
 }
 /**
@@ -196,632 +376,764 @@ function sanitizeResponseId(id: string): string {
  * Recursively checks `id` and `call_id` fields on each input item.
  */
 function sanitizeResponseInputIds(input: any[]): any[] {
-  return input.map((item: any) => {
-    if (!item || typeof item !== "object") return item;
-    const sanitized = { ...item };
-    if (
-      typeof sanitized.id === "string" &&
-      sanitized.id.length > MAX_RESPONSE_API_ID_LENGTH
-    ) {
-      sanitized.id = sanitizeResponseId(sanitized.id);
-    }
-    if (
-      typeof sanitized.call_id === "string" &&
-      sanitized.call_id.length > MAX_RESPONSE_API_ID_LENGTH
-    ) {
-      sanitized.call_id = sanitizeResponseId(sanitized.call_id);
-    }
-    return sanitized;
-  });
+	return input.map((item: any) => {
+		if (!item || typeof item !== "object") return item;
+		const sanitized = { ...item };
+		if (
+			typeof sanitized.id === "string" &&
+			sanitized.id.length > MAX_RESPONSE_API_ID_LENGTH
+		) {
+			sanitized.id = sanitizeResponseId(sanitized.id);
+		}
+		if (
+			typeof sanitized.call_id === "string" &&
+			sanitized.call_id.length > MAX_RESPONSE_API_ID_LENGTH
+		) {
+			sanitized.call_id = sanitizeResponseId(sanitized.call_id);
+		}
+		return sanitized;
+	});
 }
 /**
  * Retries: 2s, 4s, 8s (with jitter)
  */
 function calculateRetryDelay(attempt: number): number {
-  const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * Math.pow(2, attempt);
-  const jitter = Math.random() * 1000; // Add 0-1s random jitter
-  const delay = Math.min(
-    exponentialDelay + jitter,
-    RATE_LIMIT_CONFIG.maxDelayMs,
-  );
-  return Math.round(delay);
+	const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * 2 ** attempt;
+	const jitter = Math.random() * 1000; // Add 0-1s random jitter
+	const delay = Math.min(
+		exponentialDelay + jitter,
+		RATE_LIMIT_CONFIG.maxDelayMs,
+	);
+	return Math.round(delay);
 }
 export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
-  // Initialize logger with the SDK client
-  setLogger(sdk);
-  return {
-    auth: {
-      provider: "github-copilot",
-      loader: async (getAuth, provider) => {
-        const info = await getAuth();
-        if (!info || info.type !== "oauth") return {};
-        // Enterprise URL support for baseURL
-        const enterpriseUrl = (info as any).enterpriseUrl;
-        const baseURL = enterpriseUrl
-          ? `https://copilot-api.${normalizeDomain(enterpriseUrl)}`
-          : undefined;
-        if (provider && provider.models) {
-          for (const [_modelId, model] of Object.entries(provider.models)) {
-            model.cost = {
-              input: 0,
-              output: 0,
-              cache: {
-                read: 0,
-                write: 0,
-              },
-            };
-            // All models use the standard github-copilot SDK
-            // Reasoning support for Claude models is handled via:
-            // 1. The fetch wrapper adds thinking_budget to request body
-            // 2. The fetch wrapper strips invalid thinking blocks from messages
-            model.api.npm = "@ai-sdk/github-copilot";
-          }
-        }
-        return {
-          baseURL,
-          apiKey: "",
-          async fetch(input, init) {
-            const info = await getAuth();
-            if (info.type !== "oauth") return fetch(input, init);
-            let isAgentCall = false;
-            let isVisionRequest = false;
-            let modifiedBody: any = undefined;
-            let isClaudeModel = false;
-            try {
-              const body =
-                typeof init?.body === "string"
-                  ? JSON.parse(init.body)
-                  : init?.body;
-              const url = input.toString();
-              // Check if this is a Claude model request
-              const modelId = body?.model || "";
-              isClaudeModel = modelId.toLowerCase().includes("claude");
-              // Completions API
-              if (body?.messages && url.includes("completions")) {
-                // Keep local logic: detect if any message is assistant/tool
-                isAgentCall = body.messages.some((msg: any) =>
-                  ["tool", "assistant"].includes(msg.role),
-                );
-                isVisionRequest = body.messages.some(
-                  (msg: any) =>
-                    Array.isArray(msg.content) &&
-                    msg.content.some((part: any) => part.type === "image_url"),
-                );
-                // For Claude models, add thinking_budget to enable reasoning
-                // The Copilot API accepts this parameter and returns reasoning_text/reasoning_opaque
-                if (isClaudeModel) {
-                  // Use configured thinking_budget from model options, or default to 10000
-                  const thinkingBudget = body.thinking_budget || 10000;
-                  // Fix for "Invalid signature in thinking block" error:
-                  // The Copilot API uses reasoning_text/reasoning_opaque format for thinking
-                  // When these are passed back without proper signature, it causes errors
-                  // Solution: Ensure reasoning_opaque is present when reasoning_text exists,
-                  // or remove reasoning content entirely if signature is invalid/missing
-                  const cleanedMessages = body.messages.map(
-                    (msg: any, idx: number) => {
-                      if (msg.role !== "assistant") return msg;
-                      // Log message structure for debugging
-                      log("debug", `Processing assistant message ${idx}`, {
-                        has_reasoning_text: !!msg.reasoning_text,
-                        has_reasoning_opaque: !!msg.reasoning_opaque,
-                        content_type: typeof msg.content,
-                        content_is_array: Array.isArray(msg.content),
-                      });
-                      // If message has reasoning_text but no/invalid reasoning_opaque, remove reasoning
-                      if (msg.reasoning_text && !msg.reasoning_opaque) {
-                        log(
-                          "warn",
-                          `Removing reasoning_text without reasoning_opaque from message ${idx}`,
-                        );
-                        const { reasoning_text: _unused, ...cleanedMsg } = msg;
-                        return cleanedMsg;
-                      }
-                      // If content is an array, check for thinking blocks
-                      if (Array.isArray(msg.content)) {
-                        const hasThinkingBlock = msg.content.some(
-                          (part: any) => part.type === "thinking",
-                        );
-                        if (hasThinkingBlock) {
-                          log(
-                            "debug",
-                            `Message ${idx} has thinking blocks in content array`,
-                          );
-                          // Filter out thinking blocks without signatures
-                          const cleanedContent = msg.content.filter(
-                            (part: any) => {
-                              if (part.type === "thinking") {
-                                if (!part.signature) {
-                                  log(
-                                    "warn",
-                                    `Removing thinking block without signature`,
-                                  );
-                                  return false;
-                                }
-                              }
-                              return true;
-                            },
-                          );
-                          return {
-                            ...msg,
-                            content:
-                              cleanedContent.length > 0 ? cleanedContent : null,
-                          };
-                        }
-                      }
-                      return msg;
-                    },
-                  );
-                  modifiedBody = {
-                    ...body,
-                    messages: cleanedMessages,
-                    thinking_budget: thinkingBudget,
-                  };
-                  log("info", `Adding thinking_budget for Claude model`, {
-                    model: modelId,
-                    thinking_budget: thinkingBudget,
-                  });
-                }
-                // For GPT models (o1, gpt-5, etc.), add reasoning parameter
-                const isGptModel =
-                  modelId.toLowerCase().includes("gpt") ||
-                  modelId.toLowerCase().includes("o1") ||
-                  modelId.toLowerCase().includes("o3") ||
-                  modelId.toLowerCase().includes("o4");
-                if (isGptModel && !isClaudeModel) {
-                  // Get reasoning effort from body options or default to "medium"
-                  const reasoningEffort =
-                    body.reasoning?.effort ||
-                    body.reasoningEffort ||
-                    body.reasoning_effort ||
-                    "medium";
-                  modifiedBody = {
-                    ...(modifiedBody || body),
-                    reasoning: {
-                      effort: reasoningEffort,
-                    },
-                  };
-                  // Also pass through other reasoning options if present
-                  if (body.reasoningSummary || body.reasoning?.summary) {
-                    modifiedBody.reasoning.summary =
-                      body.reasoningSummary || body.reasoning?.summary;
-                  }
-                  log("info", `Adding reasoning for GPT model`, {
-                    model: modelId,
-                    reasoning_effort: reasoningEffort,
-                  });
-                }
-              }
-              // Responses API
-              if (body?.input) {
-                // Sanitize long IDs from Copilot backend (can be 400+ chars)
-                // OpenAI Responses API enforces a 64-char max on item IDs
-                const sanitizedInput = sanitizeResponseInputIds(body.input);
-                const inputWasSanitized =
-                  sanitizedInput !== body.input &&
-                  JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
-                if (inputWasSanitized) {
-                  log("info", "Sanitized long IDs in Responses API input", {
-                    original_count: body.input.filter(
-                      (item: any) =>
-                        (typeof item?.id === "string" &&
-                          item.id.length > MAX_RESPONSE_API_ID_LENGTH) ||
-                        (typeof item?.call_id === "string" &&
-                          item.call_id.length > MAX_RESPONSE_API_ID_LENGTH),
-                    ).length,
-                  });
-                  modifiedBody = {
-                    ...(modifiedBody || body),
-                    input: sanitizedInput,
-                  };
-                }
-                isAgentCall = (sanitizedInput || body.input).some(
-                  (item: any) =>
-                    item?.role === "assistant" ||
-                    (item?.type &&
-                      RESPONSES_API_ALTERNATE_INPUT_TYPES.includes(item.type)),
-                );
-                isVisionRequest = body.input.some(
-                  (item: any) =>
-                    Array.isArray(item?.content) &&
-                    item.content.some(
-                      (part: any) => part.type === "input_image",
-                    ),
-                );
-              }
-              // Messages API (Anthropic style)
-              if (body?.messages && !url.includes("completions")) {
-                isAgentCall = body.messages.some((msg: any) =>
-                  ["tool", "assistant"].includes(msg.role),
-                );
-                isVisionRequest = body.messages.some(
-                  (item: any) =>
-                    Array.isArray(item?.content) &&
-                    item.content.some(
-                      (part: any) =>
-                        part?.type === "image" ||
-                        (part?.type === "tool_result" &&
-                          Array.isArray(part?.content) &&
-                          part.content.some(
-                            (nested: any) => nested?.type === "image",
-                          )),
-                    ),
-                );
-              }
-            } catch {}
-            const headers: Record<string, string> = {
-              "x-initiator": isAgentCall ? "agent" : "user",
-              ...(init?.headers as Record<string, string>),
-              ...HEADERS,
-              Authorization: `Bearer ${info.refresh}`,
-              "Openai-Intent": "conversation-edits",
-            };
-            if (isVisionRequest) {
-              headers["Copilot-Vision-Request"] = "true";
-            }
-            // Official only deletes lowercase "authorization"
-            delete headers["x-api-key"];
-            delete headers["authorization"];
-            // Prepare the final init object with potentially modified body
-            const finalInit = {
-              ...init,
-              headers,
-              ...(modifiedBody ? { body: JSON.stringify(modifiedBody) } : {}),
-            };
-            // Extract model from request body for rate limit tracking
-            let currentModel = "";
-            try {
-              const bodyObj =
-                typeof finalInit.body === "string"
-                  ? JSON.parse(finalInit.body)
-                  : finalInit.body;
-              currentModel = bodyObj?.model || "";
-            } catch {}
-            // Pre-flight: if current model is already known rate-limited, switch to fallback
-            let activeFinalInit: RequestInit = finalInit;
-            if (currentModel && isModelRateLimited(currentModel)) {
-              const fallback = getNextFallbackModel(currentModel);
-              if (fallback) {
-                log(
-                  "info",
-                  `Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
-                );
-                activeFinalInit =
-                  swapModelInBody(finalInit, fallback) || finalInit;
-                currentModel = fallback;
-              }
-            }
-            // Retry logic with model fallback and exponential backoff for rate limiting
-            let lastError: Error | undefined;
-            let fallbacksUsed = 0;
-            let attempt = 0;
-            while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
-              try {
-                const response = await fetch(input, activeFinalInit);
-                if (response.status === 429) {
-                  // Parse Retry-After header for server-suggested cooldown
-                  const retryAfterMs = parseRetryAfter(response);
-                  const cooldownMs =
-                    retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
-                  // Mark this model as rate-limited
-                  if (currentModel) {
-                    markModelRateLimited(currentModel, cooldownMs);
-                  }
-                  // Try fallback model (doesn't count against retry budget)
-                  if (
-                    currentModel &&
-                    fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
-                  ) {
-                    const fallback = getNextFallbackModel(currentModel);
-                    if (fallback) {
-                      log(
-                        "warn",
-                        `Rate limited on ${currentModel}, switching to ${fallback}`,
-                        {
-                          retry_after_ms: retryAfterMs,
-                          cooldown_ms: cooldownMs,
-                          fallbacks_used: fallbacksUsed + 1,
-                        },
-                      );
-                      activeFinalInit =
-                        swapModelInBody(activeFinalInit, fallback) ||
-                        activeFinalInit;
-                      currentModel = fallback;
-                      fallbacksUsed++;
-                      continue; // Retry immediately with new model, no delay
-                    }
-                  }
-                  // No fallback available — use exponential backoff on same model
-                  if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
-                    const delay =
-                      retryAfterMs != null
-                        ? Math.min(retryAfterMs, RATE_LIMIT_CONFIG.maxDelayMs)
-                        : calculateRetryDelay(attempt);
-                    log(
-                      "warn",
-                      `Rate limited (429), no fallback available, waiting ${delay}ms`,
-                      {
-                        delay_ms: delay,
-                        attempt: attempt + 1,
-                        max_retries: RATE_LIMIT_CONFIG.maxRetries,
-                        fallbacks_exhausted: true,
-                      },
-                    );
-                    await sleep(delay);
-                    attempt++;
-                    continue;
-                  }
-                  // Exhausted retries and fallbacks
-                  throw new Error(
-                    `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
-                  );
-                }
-                // Response transformation is handled by the custom SDK at
-                // .opencode/plugin/sdk/copilot/
-                return response;
-              } catch (error) {
-                lastError = error as Error;
-                // Network errors might be transient, retry
-                if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
-                  const delay = calculateRetryDelay(attempt);
-                  log("warn", `Request failed, retrying`, {
-                    delay_ms: delay,
-                    attempt: attempt + 1,
-                    max_retries: RATE_LIMIT_CONFIG.maxRetries,
-                    error: lastError.message,
-                  });
-                  await sleep(delay);
-                  attempt++;
-                  continue;
-                }
-                throw error;
-              }
-            }
-            // Exhausted all retries
-            if (lastError) {
-              throw new Error(
-                `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
-              );
-            }
-            throw new Error(
-              `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
-            );
-          },
-        };
-      },
-      methods: [
-        {
-          type: "oauth",
-          label: "Login with GitHub Copilot",
-          prompts: [
-            {
-              type: "select",
-              key: "deploymentType",
-              message: "Select GitHub deployment type",
-              options: [
-                {
-                  label: "GitHub.com",
-                  value: "github.com",
-                  hint: "Public",
-                },
-                {
-                  label: "GitHub Enterprise",
-                  value: "enterprise",
-                  hint: "Data residency or self-hosted",
-                },
-              ],
-            },
-            {
-              type: "text",
-              key: "enterpriseUrl",
-              message: "Enter your GitHub Enterprise URL or domain",
-              placeholder: "company.ghe.com or https://company.ghe.com",
-              condition: (inputs: any) =>
-                inputs.deploymentType === "enterprise",
-              validate: (value: string) => {
-                if (!value) return "URL or domain is required";
-                try {
-                  const url = value.includes("://")
-                    ? new URL(value)
-                    : new URL(`https://${value}`);
-                  if (!url.hostname)
-                    return "Please enter a valid URL or domain";
-                  return undefined;
-                } catch {
-                  return "Please enter a valid URL (e.g., company.ghe.com or https://company.ghe.com)";
-                }
-              },
-            },
-          ],
-          async authorize(inputs: any = {}) {
-            const deploymentType = inputs.deploymentType || "github.com";
-            let domain = "github.com";
-            let actualProvider = "github-copilot";
-            if (deploymentType === "enterprise") {
-              const enterpriseUrl = inputs.enterpriseUrl;
-              domain = normalizeDomain(enterpriseUrl);
-              actualProvider = "github-copilot-enterprise";
-            }
-            const urls = getUrls(domain);
-            const deviceResponse = await fetch(urls.DEVICE_CODE_URL, {
-              method: "POST",
-              headers: {
-                Accept: "application/json",
-                "Content-Type": "application/json",
-                "User-Agent": "GitHubCopilotChat/0.35.0",
-              },
-              body: JSON.stringify({
-                client_id: CLIENT_ID,
-                scope: "read:user",
-              }),
-            });
-            if (!deviceResponse.ok) {
-              throw new Error("Failed to initiate device authorization");
-            }
-            const deviceData = await deviceResponse.json();
-            return {
-              url: deviceData.verification_uri,
-              instructions: `Enter code: ${deviceData.user_code}`,
-              method: "auto",
-              callback: async () => {
-                while (true) {
-                  const response = await fetch(urls.ACCESS_TOKEN_URL, {
-                    method: "POST",
-                    headers: {
-                      Accept: "application/json",
-                      "Content-Type": "application/json",
-                      "User-Agent": "GitHubCopilotChat/0.35.0",
-                    },
-                    body: JSON.stringify({
-                      client_id: CLIENT_ID,
-                      device_code: deviceData.device_code,
-                      grant_type:
-                        "urn:ietf:params:oauth:grant-type:device_code",
-                    }),
-                  });
-                  if (!response.ok) return { type: "failed" };
-                  const data = await response.json();
-                  if (data.access_token) {
-                    const result: {
-                      type: "success";
-                      refresh: string;
-                      access: string;
-                      expires: number;
-                      provider?: string;
-                      enterpriseUrl?: string;
-                    } = {
-                      type: "success",
-                      refresh: data.access_token,
-                      access: data.access_token,
-                      expires: 0,
-                    };
-                    if (actualProvider === "github-copilot-enterprise") {
-                      result.provider = "github-copilot-enterprise";
-                      result.enterpriseUrl = domain;
-                    }
-                    return result;
-                  }
-                  if (data.error === "authorization_pending") {
-                    await sleep(
-                      deviceData.interval * 1000 +
-                        OAUTH_POLLING_SAFETY_MARGIN_MS,
-                    );
-                    continue;
-                  }
-                  if (data.error === "slow_down") {
-                    // Based on the RFC spec, we must add 5 seconds to our current polling interval.
-                    let newInterval = (deviceData.interval + 5) * 1000;
-                    if (
-                      data.interval &&
-                      typeof data.interval === "number" &&
-                      data.interval > 0
-                    ) {
-                      newInterval = data.interval * 1000;
-                    }
-                    await sleep(newInterval + OAUTH_POLLING_SAFETY_MARGIN_MS);
-                    continue;
-                  }
-                  if (data.error) return { type: "failed" };
-                  await sleep(
-                    deviceData.interval * 1000 + OAUTH_POLLING_SAFETY_MARGIN_MS,
-                  );
-                  continue;
-                }
-              },
-            };
-          },
-        },
-      ],
-    },
-    // Hook to add custom headers for Claude reasoning support
-    "chat.headers": async (input: any, output: any) => {
-      // Only apply to GitHub Copilot provider
-      if (!input.model?.providerID?.includes("github-copilot")) return;
-      // Add Anthropic beta header for interleaved thinking (extended reasoning)
-      // This is required for Claude models to return thinking blocks
-      if (input.model?.api?.npm === "@ai-sdk/anthropic") {
-        output.headers["anthropic-beta"] = "interleaved-thinking-2025-05-14";
-      }
-      // Mark subagent sessions as agent-initiated (matching standard Copilot tools)
-      try {
-        const session = await sdk.session
-          .get({
-            path: {
-              id: input.sessionID,
-            },
-            throwOnError: true,
-          })
-          .catch(() => undefined);
-        if (session?.data?.parentID) {
-          output.headers["x-initiator"] = "agent";
-        }
-      } catch {
-        // Ignore errors from session lookup
-      }
-    },
-  };
+	// Initialize logger with the SDK client
+	setLogger(sdk);
+	return {
+		auth: {
+			provider: "github-copilot",
+			loader: async (getAuth, provider) => {
+				const info = await getAuth();
+				if (!info || info.type !== "oauth") return {};
+				// Enterprise URL support for baseURL
+				const enterpriseUrl = (info as any).enterpriseUrl;
+				const baseURL = enterpriseUrl
+					? `https://copilot-api.${normalizeDomain(enterpriseUrl)}`
+					: undefined;
+				if (provider && provider.models) {
+					for (const [_modelId, model] of Object.entries(provider.models)) {
+						model.cost = {
+							input: 0,
+							output: 0,
+							cache: {
+								read: 0,
+								write: 0,
+							},
+						};
+						// All models use the standard github-copilot SDK
+						// Reasoning support for Claude models is handled via:
+						// 1. The fetch wrapper adds thinking_budget to request body
+						// 2. The fetch wrapper strips invalid thinking blocks from messages
+						model.api.npm = "@ai-sdk/github-copilot";
+					}
+				}
+				return {
+					baseURL,
+					apiKey: "",
+					async fetch(input, init) {
+						const info = await getAuth();
+						if (info.type !== "oauth") return fetch(input, init);
+						let isAgentCall = false;
+						let isVisionRequest = false;
+						let modifiedBody: any;
+						let isClaudeModel = false;
+						try {
+							const body =
+								typeof init?.body === "string"
+									? JSON.parse(init.body)
+									: init?.body;
+							const url = input.toString();
+							// Check if this is a Claude model request
+							const modelId = body?.model || "";
+							isClaudeModel = modelId.toLowerCase().includes("claude");
+							// Completions API
+							if (body?.messages && url.includes("completions")) {
+								// Keep local logic: detect if any message is assistant/tool
+								isAgentCall = body.messages.some((msg: any) =>
+									["tool", "assistant"].includes(msg.role),
+								);
+								isVisionRequest = body.messages.some(
+									(msg: any) =>
+										Array.isArray(msg.content) &&
+										msg.content.some((part: any) => part.type === "image_url"),
+								);
+								// For Claude models, add thinking_budget to enable reasoning
+								// The Copilot API accepts this parameter and returns reasoning_text/reasoning_opaque
+								if (isClaudeModel) {
+									// Use configured thinking_budget from model options, or default to 10000
+									const thinkingBudget = body.thinking_budget || 10000;
+									// Fix for "Invalid signature in thinking block" error:
+									// The Copilot API uses reasoning_text/reasoning_opaque format for thinking
+									// When these are passed back without proper signature, it causes errors
+									// Solution: Ensure reasoning_opaque is present when reasoning_text exists,
+									// or remove reasoning content entirely if signature is invalid/missing
+									const cleanedMessages = body.messages.map(
+										(msg: any, idx: number) => {
+											if (msg.role !== "assistant") return msg;
+											// Log message structure for debugging
+											log("debug", `Processing assistant message ${idx}`, {
+												has_reasoning_text: !!msg.reasoning_text,
+												has_reasoning_opaque: !!msg.reasoning_opaque,
+												content_type: typeof msg.content,
+												content_is_array: Array.isArray(msg.content),
+											});
+											// If message has reasoning_text but no/invalid reasoning_opaque, remove reasoning
+											if (msg.reasoning_text && !msg.reasoning_opaque) {
+												log(
+													"warn",
+													`Removing reasoning_text without reasoning_opaque from message ${idx}`,
+												);
+												const { reasoning_text: _unused, ...cleanedMsg } = msg;
+												return cleanedMsg;
+											}
+											// If content is an array, check for thinking blocks
+											if (Array.isArray(msg.content)) {
+												const hasThinkingBlock = msg.content.some(
+													(part: any) => part.type === "thinking",
+												);
+												if (hasThinkingBlock) {
+													log(
+														"debug",
+														`Message ${idx} has thinking blocks in content array`,
+													);
+													// Filter out thinking blocks without signatures
+													const cleanedContent = msg.content.filter(
+														(part: any) => {
+															if (part.type === "thinking") {
+																if (!part.signature) {
+																	log(
+																		"warn",
+																		`Removing thinking block without signature`,
+																	);
+																	return false;
+																}
+															}
+															return true;
+														},
+													);
+													return {
+														...msg,
+														content:
+															cleanedContent.length > 0 ? cleanedContent : null,
+													};
+												}
+											}
+											return msg;
+										},
+									);
+									modifiedBody = {
+										...body,
+										messages: cleanedMessages,
+										thinking_budget: thinkingBudget,
+									};
+									log("info", `Adding thinking_budget for Claude model`, {
+										model: modelId,
+										thinking_budget: thinkingBudget,
+									});
+								}
+								// For GPT models (o1, gpt-5, etc.), add reasoning parameter
+								const isGptModel =
+									modelId.toLowerCase().includes("gpt") ||
+									modelId.toLowerCase().includes("o1") ||
+									modelId.toLowerCase().includes("o3") ||
+									modelId.toLowerCase().includes("o4");
+								if (isGptModel && !isClaudeModel) {
+									// Get reasoning effort from body options or default to "medium"
+									const reasoningEffort =
+										body.reasoning?.effort ||
+										body.reasoningEffort ||
+										body.reasoning_effort ||
+										"medium";
+									modifiedBody = {
+										...(modifiedBody || body),
+										reasoning: {
+											effort: reasoningEffort,
+										},
+									};
+									// Also pass through other reasoning options if present
+									if (body.reasoningSummary || body.reasoning?.summary) {
+										modifiedBody.reasoning.summary =
+											body.reasoningSummary || body.reasoning?.summary;
+									}
+									log("info", `Adding reasoning for GPT model`, {
+										model: modelId,
+										reasoning_effort: reasoningEffort,
+									});
+								}
+							}
+							// Responses API
+							if (body?.input) {
+								// Sanitize long IDs from Copilot backend (can be 400+ chars)
+								// OpenAI Responses API enforces a 64-char max on item IDs
+								const sanitizedInput = sanitizeResponseInputIds(body.input);
+								const inputWasSanitized =
+									sanitizedInput !== body.input &&
+									JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
+								if (inputWasSanitized) {
+									log("info", "Sanitized long IDs in Responses API input", {
+										original_count: body.input.filter(
+											(item: any) =>
+												(typeof item?.id === "string" &&
+													item.id.length > MAX_RESPONSE_API_ID_LENGTH) ||
+												(typeof item?.call_id === "string" &&
+													item.call_id.length > MAX_RESPONSE_API_ID_LENGTH),
+										).length,
+									});
+									modifiedBody = {
+										...(modifiedBody || body),
+										input: sanitizedInput,
+									};
+								}
+								isAgentCall = (sanitizedInput || body.input).some(
+									(item: any) =>
+										item?.role === "assistant" ||
+										(item?.type &&
+											RESPONSES_API_ALTERNATE_INPUT_TYPES.includes(item.type)),
+								);
+								isVisionRequest = body.input.some(
+									(item: any) =>
+										Array.isArray(item?.content) &&
+										item.content.some(
+											(part: any) => part.type === "input_image",
+										),
+								);
+							}
+							// Messages API (Anthropic style)
+							if (body?.messages && !url.includes("completions")) {
+								isAgentCall = body.messages.some((msg: any) =>
+									["tool", "assistant"].includes(msg.role),
+								);
+								isVisionRequest = body.messages.some(
+									(item: any) =>
+										Array.isArray(item?.content) &&
+										item.content.some(
+											(part: any) =>
+												part?.type === "image" ||
+												(part?.type === "tool_result" &&
+													Array.isArray(part?.content) &&
+													part.content.some(
+														(nested: any) => nested?.type === "image",
+													)),
+										),
+								);
+							}
+						} catch {}
+						const headers: Record<string, string> = {
+							"x-initiator": isAgentCall ? "agent" : "user",
+							...(init?.headers as Record<string, string>),
+							...HEADERS,
+							Authorization: `Bearer ${info.refresh}`,
+							"Openai-Intent": "conversation-edits",
+						};
+						if (isVisionRequest) {
+							headers["Copilot-Vision-Request"] = "true";
+						}
+						// Official only deletes lowercase "authorization"
+						delete headers["x-api-key"];
+						delete headers["authorization"];
+						// Prepare the final init object with potentially modified body
+						const finalInit = {
+							...init,
+							headers,
+							...(modifiedBody ? { body: JSON.stringify(modifiedBody) } : {}),
+						};
+						// Extract model from request body for rate limit tracking
+						let currentModel = "";
+						try {
+							const bodyObj =
+								typeof finalInit.body === "string"
+									? JSON.parse(finalInit.body)
+									: finalInit.body;
+							currentModel = bodyObj?.model || "";
+						} catch {}
+						// Pre-flight: if current model is already known rate-limited, switch to fallback
+						let activeFinalInit: RequestInit = finalInit;
+						const attemptedModels = new Set<string>();
+						if (currentModel) attemptedModels.add(currentModel);
+						const requestedModel = currentModel;
+						if (currentModel) {
+							const circuitRemainingMs =
+								getFamilyCircuitRemainingMs(currentModel);
+							if (circuitRemainingMs > 0) {
+								if (
+									circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
+								) {
+									log(
+										"info",
+										`Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
+									);
+									await sleep(circuitRemainingMs);
+								} else {
+									throw new Error(
+										`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
+									);
+								}
+							}
+						}
+						if (currentModel && isModelRateLimited(currentModel)) {
+							const fallback = getNextFallbackModel(
+								currentModel,
+								attemptedModels,
+							);
+							if (fallback) {
+								log(
+									"info",
+									`Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
+								);
+								activeFinalInit =
+									swapModelInBody(finalInit, fallback) || finalInit;
+								currentModel = fallback;
+								attemptedModels.add(fallback);
+							} else {
+								const familyCooldownMs =
+									getFamilyMaxCooldownRemainingMs(currentModel);
+								openFamilyCircuitBreaker(currentModel, familyCooldownMs);
+								if (
+									familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
+								) {
+									log(
+										"info",
+										`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
+									);
+									await sleep(familyCooldownMs);
+									attemptedModels.clear();
+									if (currentModel) attemptedModels.add(currentModel);
+								} else {
+									throw new Error(
+										`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
+									);
+								}
+							}
+						}
+						// Retry logic with model fallback and exponential backoff for rate limiting
+						let lastError: Error | undefined;
+						let fallbacksUsed = 0;
+						let attempt = 0;
+						let recoveryCyclesUsed = 0;
+						while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
+							try {
+								if (currentModel) {
+									await shapeRequestForModel(currentModel);
+								}
+								const response = await fetch(input, activeFinalInit);
+								if (response.status === 429) {
+									try {
+										await response.body?.cancel();
+									} catch {}
+									// Parse Retry-After header for server-suggested cooldown
+									const retryAfterMs = parseRetryAfter(response);
+									const cooldownMs =
+										retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
+									// Mark this model as rate-limited
+									if (currentModel) {
+										markModelRateLimited(currentModel, cooldownMs);
+									}
+									// Try fallback model (doesn't count against retry budget)
+									if (
+										currentModel &&
+										fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
+									) {
+										const fallback = getNextFallbackModel(
+											currentModel,
+											attemptedModels,
+										);
+										if (fallback) {
+											log(
+												"warn",
+												`Rate limited on ${currentModel}, switching to ${fallback}`,
+												{
+													retry_after_ms: retryAfterMs,
+													cooldown_ms: cooldownMs,
+													fallbacks_used: fallbacksUsed + 1,
+												},
+											);
+											activeFinalInit =
+												swapModelInBody(activeFinalInit, fallback) ||
+												activeFinalInit;
+											currentModel = fallback;
+											attemptedModels.add(fallback);
+											fallbacksUsed++;
+											continue; // Retry immediately with new model, no delay
+										}
+									}
+									// No fallback available — use exponential backoff on same model
+									if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
+										if (
+											currentModel &&
+											isEntireModelFamilyCoolingDown(currentModel)
+										) {
+											const familyCooldownMs =
+												getFamilyMaxCooldownRemainingMs(currentModel);
+											openFamilyCircuitBreaker(currentModel, familyCooldownMs);
+											if (
+												familyCooldownMs <=
+												CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
+											) {
+												log(
+													"info",
+													`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
+												);
+												await sleep(familyCooldownMs);
+												attemptedModels.clear();
+												if (currentModel) attemptedModels.add(currentModel);
+												attempt++;
+												continue;
+											}
+											throw new Error(
+												`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
+											);
+										}
+										const modelCooldownMs = currentModel
+											? getRateLimitRemainingMs(currentModel)
+											: null;
+										const delay = Math.min(
+											modelCooldownMs ??
+												retryAfterMs ??
+												calculateRetryDelay(attempt),
+											RATE_LIMIT_CONFIG.maxDelayMs,
+										);
+										log(
+											"warn",
+											`Rate limited (429), no fallback available, waiting ${delay}ms`,
+											{
+												delay_ms: delay,
+												attempt: attempt + 1,
+												max_retries: RATE_LIMIT_CONFIG.maxRetries,
+												fallbacks_exhausted: true,
+											},
+										);
+										await sleep(delay);
+										attemptedModels.clear();
+										if (currentModel) attemptedModels.add(currentModel);
+										attempt++;
+										continue;
+									}
+									// Exhausted retries and fallbacks
+									if (currentModel) {
+										const familyCooldownMs =
+											getFamilyMaxCooldownRemainingMs(currentModel);
+										const recoveryDelayMs =
+											familyCooldownMs > 0
+												? Math.min(
+														familyCooldownMs,
+														CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
+													)
+												: calculateRetryDelay(0);
+										if (
+											recoveryDelayMs > 0 &&
+											recoveryCyclesUsed <
+												CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
+										) {
+											recoveryCyclesUsed++;
+											log(
+												"info",
+												`Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
+											);
+											await sleep(recoveryDelayMs);
+											attempt = 0;
+											fallbacksUsed = 0;
+											if (requestedModel) {
+												currentModel = requestedModel;
+												activeFinalInit =
+													swapModelInBody(finalInit, requestedModel) ||
+													finalInit;
+											}
+											attemptedModels.clear();
+											if (currentModel) attemptedModels.add(currentModel);
+											continue;
+										}
+									}
+									throw new Error(
+										`[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
+									);
+								}
+								// Response transformation is handled by the custom SDK at
+								// .opencode/plugin/sdk/copilot/
+								return response;
+							} catch (error) {
+								lastError = error as Error;
+								if (
+									lastError.message.includes(
+										"All fallback models cooling down",
+									) ||
+									lastError.message.includes("Local request queue saturated")
+								) {
+									throw lastError;
+								}
+								// Network errors might be transient, retry
+								if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
+									const delay = calculateRetryDelay(attempt);
+									log("warn", `Request failed, retrying`, {
+										delay_ms: delay,
+										attempt: attempt + 1,
+										max_retries: RATE_LIMIT_CONFIG.maxRetries,
+										error: lastError.message,
+									});
+									await sleep(delay);
+									attempt++;
+									continue;
+								}
+								throw error;
+							}
+						}
+						// Exhausted all retries
+						if (lastError) {
+							throw new Error(
+								`[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
+							);
+						}
+						throw new Error(
+							`[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
+						);
+					},
+				};
+			},
+			methods: [
+				{
+					type: "oauth",
+					label: "Login with GitHub Copilot",
+					prompts: [
+						{
+							type: "select",
+							key: "deploymentType",
+							message: "Select GitHub deployment type",
+							options: [
+								{
+									label: "GitHub.com",
+									value: "github.com",
+									hint: "Public",
+								},
+								{
+									label: "GitHub Enterprise",
+									value: "enterprise",
+									hint: "Data residency or self-hosted",
+								},
+							],
+						},
+						{
+							type: "text",
+							key: "enterpriseUrl",
+							message: "Enter your GitHub Enterprise URL or domain",
+							placeholder: "company.ghe.com or https://company.ghe.com",
+							condition: (inputs: any) =>
+								inputs.deploymentType === "enterprise",
+							validate: (value: string) => {
+								if (!value) return "URL or domain is required";
+								try {
+									const url = value.includes("://")
+										? new URL(value)
+										: new URL(`https://${value}`);
+									if (!url.hostname)
+										return "Please enter a valid URL or domain";
+									return undefined;
+								} catch {
+									return "Please enter a valid URL (e.g., company.ghe.com or https://company.ghe.com)";
+								}
+							},
+						},
+					],
+					async authorize(inputs: any = {}) {
+						const deploymentType = inputs.deploymentType || "github.com";
+						let domain = "github.com";
+						let actualProvider = "github-copilot";
+						if (deploymentType === "enterprise") {
+							const enterpriseUrl = inputs.enterpriseUrl;
+							domain = normalizeDomain(enterpriseUrl);
+							actualProvider = "github-copilot-enterprise";
+						}
+						const urls = getUrls(domain);
+						const deviceResponse = await fetch(urls.DEVICE_CODE_URL, {
+							method: "POST",
+							headers: {
+								Accept: "application/json",
+								"Content-Type": "application/json",
+								"User-Agent": "GitHubCopilotChat/0.35.0",
+							},
+							body: JSON.stringify({
+								client_id: CLIENT_ID,
+								scope: "read:user",
+							}),
+						});
+						if (!deviceResponse.ok) {
+							throw new Error("Failed to initiate device authorization");
+						}
+						const deviceData = await deviceResponse.json();
+						return {
+							url: deviceData.verification_uri,
+							instructions: `Enter code: ${deviceData.user_code}`,
+							method: "auto",
+							callback: async () => {
+								while (true) {
+									const response = await fetch(urls.ACCESS_TOKEN_URL, {
+										method: "POST",
+										headers: {
+											Accept: "application/json",
+											"Content-Type": "application/json",
+											"User-Agent": "GitHubCopilotChat/0.35.0",
+										},
+										body: JSON.stringify({
+											client_id: CLIENT_ID,
+											device_code: deviceData.device_code,
+											grant_type:
+												"urn:ietf:params:oauth:grant-type:device_code",
+										}),
+									});
+									if (!response.ok) return { type: "failed" };
+									const data = await response.json();
+									if (data.access_token) {
+										const result: {
+											type: "success";
+											refresh: string;
+											access: string;
+											expires: number;
+											provider?: string;
+											enterpriseUrl?: string;
+										} = {
+											type: "success",
+											refresh: data.access_token,
+											access: data.access_token,
+											expires: 0,
+										};
+										if (actualProvider === "github-copilot-enterprise") {
+											result.provider = "github-copilot-enterprise";
+											result.enterpriseUrl = domain;
+										}
+										return result;
+									}
+									if (data.error === "authorization_pending") {
+										await sleep(
+											deviceData.interval * 1000 +
+												OAUTH_POLLING_SAFETY_MARGIN_MS,
+										);
+										continue;
+									}
+									if (data.error === "slow_down") {
+										// Based on the RFC spec, we must add 5 seconds to our current polling interval.
+										let newInterval = (deviceData.interval + 5) * 1000;
+										if (
+											data.interval &&
+											typeof data.interval === "number" &&
+											data.interval > 0
+										) {
+											newInterval = data.interval * 1000;
+										}
+										await sleep(newInterval + OAUTH_POLLING_SAFETY_MARGIN_MS);
+										continue;
+									}
+									if (data.error) return { type: "failed" };
+									await sleep(
+										deviceData.interval * 1000 + OAUTH_POLLING_SAFETY_MARGIN_MS,
+									);
+								}
+							},
+						};
+					},
+				},
+			],
+		},
+		// Hook to add custom headers for Claude reasoning support
+		"chat.headers": async (input: any, output: any) => {
+			// Only apply to GitHub Copilot provider
+			if (!input.model?.providerID?.includes("github-copilot")) return;
+			// Add Anthropic beta header for interleaved thinking (extended reasoning)
+			// This is required for Claude models to return thinking blocks
+			if (input.model?.api?.npm === "@ai-sdk/anthropic") {
+				output.headers["anthropic-beta"] = "interleaved-thinking-2025-05-14";
+			}
+			// Mark subagent sessions as agent-initiated (matching standard Copilot tools)
+			try {
+				const session = await sdk.session
+					.get({
+						path: {
+							id: input.sessionID,
+						},
+						throwOnError: true,
+					})
+					.catch(() => undefined);
+				if (session?.data?.parentID) {
+					output.headers["x-initiator"] = "agent";
+				}
+			} catch {
+				// Ignore errors from session lookup
+			}
+		},
+	};
 };