copilot-api-plus 1.4.10 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -1,10 +1,10 @@
1
1
  #!/usr/bin/env node
2
- import { GITHUB_BASE_URL, GITHUB_CLIENT_ID, HTTPError, PATHS, accountManager, cacheModels, cacheVSCodeVersion, copilotBaseUrl, copilotHeaders, ensurePaths, findModel, forwardError, getAccountDispatcher, getCopilotUsage, initProxyFromEnv, isAccountProxied, isNullish, isProxyActive, notifyStreamEnd, notifyStreamStart, resetAccountConnections, resetConnections, rootCause, sleep, standardHeaders, state } from "./account-manager-DktL5osZ.js";
3
- import { clearGithubToken, getDeviceCode, pollAccessToken, refreshCopilotToken, setupCopilotToken, setupGitHubToken, stopCopilotTokenRefresh } from "./token-B8FDrdsQ.js";
2
+ import { GITHUB_BASE_URL, GITHUB_CLIENT_ID, HTTPError, PATHS, accountManager, cacheModels, cacheVSCodeVersion, copilotBaseUrl, copilotHeaders, ensurePaths, findModel, forwardError, getAccountDispatcher, getCopilotUsage, initProxyFromEnv, isAccountProxied, isNullish, isProxyActive, notifyStreamEnd, notifyStreamStart, resetAccountConnections, resetConnections, rootCause, sleep, standardHeaders, state } from "./account-manager-2psqVsSO.js";
3
+ import { clearGithubToken, getDeviceCode, pollAccessToken, refreshCopilotToken, setupCopilotToken, setupGitHubToken, stopCopilotTokenRefresh } from "./token-DokbvagK.js";
4
4
  import { createRequire } from "node:module";
5
5
  import { defineCommand, runMain } from "citty";
6
6
  import consola from "consola";
7
- import { timingSafeEqual } from "node:crypto";
7
+ import { randomUUID, timingSafeEqual } from "node:crypto";
8
8
  import fs from "node:fs/promises";
9
9
  import os from "node:os";
10
10
  import path from "node:path";
@@ -1392,6 +1392,7 @@ accountRoutes.post("/", async (c) => {
1392
1392
  const body = await c.req.json();
1393
1393
  if (!body.githubToken || !body.label) return c.json({ error: "githubToken and label are required" }, 400);
1394
1394
  const account = await accountManager.addAccount(body.githubToken, body.label, body.accountType);
1395
+ accountManager.refreshGithubRateLimit(account);
1395
1396
  if (body.proxy) {
1396
1397
  try {
1397
1398
  const proxyUrl = new URL(body.proxy);
@@ -1479,6 +1480,7 @@ accountRoutes.post("/:id/refresh", async (c) => {
1479
1480
  if (!account) return c.json({ error: "Account not found" }, 404);
1480
1481
  await accountManager.refreshAccountToken(account);
1481
1482
  await accountManager.refreshAccountUsage(account);
1483
+ await accountManager.refreshGithubRateLimit(account);
1482
1484
  return c.json({ account: sanitiseAccount(account) });
1483
1485
  } catch (error) {
1484
1486
  consola.warn(`Error refreshing account: ${rootCause(error)}`);
@@ -1486,6 +1488,32 @@ accountRoutes.post("/:id/refresh", async (c) => {
1486
1488
  return c.json({ error: "Failed to refresh account" }, 500);
1487
1489
  }
1488
1490
  });
1491
+ accountRoutes.post("/:id/refresh-limits", async (c) => {
1492
+ try {
1493
+ const id = c.req.param("id");
1494
+ const account = accountManager.getAccountById(id);
1495
+ if (!account) return c.json({ error: "Account not found" }, 404);
1496
+ await accountManager.refreshGithubRateLimit(account);
1497
+ return c.json({ account: sanitiseAccount(account) });
1498
+ } catch (error) {
1499
+ consola.warn(`Error refreshing rate limits: ${rootCause(error)}`);
1500
+ consola.debug("Error refreshing rate limits:", error);
1501
+ return c.json({ error: "Failed to refresh rate limits" }, 500);
1502
+ }
1503
+ });
1504
+ accountRoutes.post("/:id/clear-session-limit", (c) => {
1505
+ try {
1506
+ const id = c.req.param("id");
1507
+ const account = accountManager.getAccountById(id);
1508
+ if (!account) return c.json({ error: "Account not found" }, 404);
1509
+ accountManager.clearCopilotSessionLimit(id);
1510
+ return c.json({ account: sanitiseAccount(account) });
1511
+ } catch (error) {
1512
+ consola.warn(`Error clearing session limit: ${rootCause(error)}`);
1513
+ consola.debug("Error clearing session limit:", error);
1514
+ return c.json({ error: "Failed to clear session limit" }, 500);
1515
+ }
1516
+ });
1489
1517
  accountRoutes.post("/auth/start", async (c) => {
1490
1518
  try {
1491
1519
  if (cachedDeviceCode && Date.now() < cachedDeviceCodeExpiresAt) {
@@ -1579,6 +1607,7 @@ accountRoutes.post("/auth/poll", async (c) => {
1579
1607
  clearAuthFlowState();
1580
1608
  const accountLabel = label || `Account ${accountManager.accountCount + 1}`;
1581
1609
  const account = await accountManager.addAccount(json.access_token, accountLabel, account_type || "individual");
1610
+ accountManager.refreshGithubRateLimit(account);
1582
1611
  return c.json({
1583
1612
  status: "complete",
1584
1613
  account: sanitiseAccount(account)
@@ -1899,6 +1928,7 @@ function injectIntoAnthropicPayload(payload) {
1899
1928
  function injectIntoOpenAIPayload(payload) {
1900
1929
  if (!ENABLED) return payload;
1901
1930
  const msgs = payload.messages;
1931
+ if (!Array.isArray(msgs)) return payload;
1902
1932
  for (const m of msgs) {
1903
1933
  if (m.role !== "system" && m.role !== "developer") continue;
1904
1934
  if (typeof m.content === "string" && alreadyInjected(m.content)) return payload;
@@ -2041,6 +2071,7 @@ function stripSystemReminders(payload) {
2041
2071
  * nothing changed.
2042
2072
  */
2043
2073
  function stripOpenAIReminders(payload) {
2074
+ if (!Array.isArray(payload.messages)) return payload;
2044
2075
  let changed = false;
2045
2076
  const newMessages = payload.messages.map((m) => {
2046
2077
  if (m.content === null) return m;
@@ -2304,6 +2335,396 @@ function overrideMessageStartEventModel(rawData, requestedModel) {
2304
2335
  }
2305
2336
  }
2306
2337
 
2338
+ //#endregion
2339
+ //#region src/services/copilot/responses-translator.ts
2340
+ function partsToInputContent(parts, role) {
2341
+ return parts.map((part) => {
2342
+ if (part.type === "text") return role === "assistant" ? {
2343
+ type: "output_text",
2344
+ text: part.text
2345
+ } : {
2346
+ type: "input_text",
2347
+ text: part.text
2348
+ };
2349
+ return {
2350
+ type: "input_image",
2351
+ image_url: part.image_url.url,
2352
+ detail: part.image_url.detail
2353
+ };
2354
+ });
2355
+ }
2356
+ function stringToInputContent(text, role) {
2357
+ return role === "assistant" ? [{
2358
+ type: "output_text",
2359
+ text
2360
+ }] : [{
2361
+ type: "input_text",
2362
+ text
2363
+ }];
2364
+ }
2365
+ function messageContent(message) {
2366
+ if (message.content === null) return [];
2367
+ if (typeof message.content === "string") return stringToInputContent(message.content, message.role);
2368
+ return partsToInputContent(message.content, message.role);
2369
+ }
2370
+ function translateAssistantWithToolCalls(message) {
2371
+ const items = [];
2372
+ const content = messageContent(message);
2373
+ if (content.length > 0) items.push({
2374
+ type: "message",
2375
+ role: "assistant",
2376
+ content
2377
+ });
2378
+ for (const call of message.tool_calls ?? []) items.push({
2379
+ type: "function_call",
2380
+ call_id: call.id,
2381
+ name: call.function.name,
2382
+ arguments: call.function.arguments
2383
+ });
2384
+ return items;
2385
+ }
2386
+ function translateMessage(message) {
2387
+ if (message.role === "tool") {
2388
+ const text = typeof message.content === "string" ? message.content : messageContent(message).map((c) => c.type === "input_text" || c.type === "output_text" ? c.text : "").join("");
2389
+ return [{
2390
+ type: "function_call_output",
2391
+ call_id: message.tool_call_id ?? "",
2392
+ output: text
2393
+ }];
2394
+ }
2395
+ if (message.role === "assistant" && message.tool_calls?.length) return translateAssistantWithToolCalls(message);
2396
+ return [{
2397
+ type: "message",
2398
+ role: message.role,
2399
+ content: messageContent(message)
2400
+ }];
2401
+ }
2402
+ function translateTool(tool) {
2403
+ return {
2404
+ type: "function",
2405
+ name: tool.function.name,
2406
+ description: tool.function.description,
2407
+ parameters: tool.function.parameters
2408
+ };
2409
+ }
2410
+ function translateToolChoice(choice) {
2411
+ if (!choice) return void 0;
2412
+ if (typeof choice === "string") return choice;
2413
+ return {
2414
+ type: "function",
2415
+ name: choice.function.name
2416
+ };
2417
+ }
2418
+ function translateReasoning(effort) {
2419
+ if (!effort) return void 0;
2420
+ if (effort === "max") return { effort: "high" };
2421
+ return { effort };
2422
+ }
2423
+ function chatToResponsesPayload(payload) {
2424
+ let instructions;
2425
+ const remainingMessages = [];
2426
+ let sawNonSystem = false;
2427
+ for (const msg of payload.messages) {
2428
+ if (msg.role === "system" && !sawNonSystem) {
2429
+ const text = typeof msg.content === "string" ? msg.content : messageContent(msg).map((c) => c.type === "input_text" || c.type === "output_text" ? c.text : "").join("\n");
2430
+ instructions = instructions ? `${instructions}\n\n${text}` : text;
2431
+ continue;
2432
+ }
2433
+ sawNonSystem = true;
2434
+ remainingMessages.push(msg);
2435
+ }
2436
+ const input = remainingMessages.flatMap((m) => translateMessage(m));
2437
+ const maxOutput = payload.max_completion_tokens ?? payload.max_tokens ?? void 0;
2438
+ return {
2439
+ model: payload.model,
2440
+ input,
2441
+ instructions,
2442
+ tools: payload.tools?.map((t) => translateTool(t)),
2443
+ tool_choice: translateToolChoice(payload.tool_choice),
2444
+ reasoning: translateReasoning(payload.reasoning_effort),
2445
+ max_output_tokens: maxOutput ?? void 0,
2446
+ temperature: payload.temperature ?? void 0,
2447
+ top_p: payload.top_p ?? void 0,
2448
+ parallel_tool_calls: void 0,
2449
+ stream: payload.stream ?? void 0
2450
+ };
2451
+ }
2452
+ function mapUsage(usage) {
2453
+ if (!usage) return void 0;
2454
+ return {
2455
+ prompt_tokens: usage.input_tokens ?? 0,
2456
+ completion_tokens: usage.output_tokens ?? 0,
2457
+ total_tokens: usage.total_tokens ?? (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
2458
+ ...usage.input_tokens_details?.cached_tokens !== void 0 && { prompt_tokens_details: { cached_tokens: usage.input_tokens_details.cached_tokens } }
2459
+ };
2460
+ }
2461
+ function extractAssistantText(output) {
2462
+ let text = "";
2463
+ for (const item of output) {
2464
+ if (item.type !== "message") continue;
2465
+ for (const part of item.content) text += part.text;
2466
+ }
2467
+ return text;
2468
+ }
2469
+ function extractToolCalls(output) {
2470
+ const calls = [];
2471
+ for (const item of output) if (item.type === "function_call") calls.push({
2472
+ id: item.call_id,
2473
+ type: "function",
2474
+ function: {
2475
+ name: item.name,
2476
+ arguments: item.arguments
2477
+ }
2478
+ });
2479
+ return calls;
2480
+ }
2481
+ function responsesToChatResponse(resp, requestedModel) {
2482
+ const text = extractAssistantText(resp.output);
2483
+ const toolCalls = extractToolCalls(resp.output);
2484
+ const finishReason = toolCalls.length > 0 ? "tool_calls" : "stop";
2485
+ return {
2486
+ id: resp.id,
2487
+ object: "chat.completion",
2488
+ created: resp.created_at ?? Math.floor(Date.now() / 1e3),
2489
+ model: requestedModel,
2490
+ choices: [{
2491
+ index: 0,
2492
+ message: {
2493
+ role: "assistant",
2494
+ content: text || null,
2495
+ ...toolCalls.length > 0 && { tool_calls: toolCalls }
2496
+ },
2497
+ logprobs: null,
2498
+ finish_reason: finishReason
2499
+ }],
2500
+ usage: mapUsage(resp.usage)
2501
+ };
2502
+ }
2503
+ function makeChunk(s, choice) {
2504
+ return { data: JSON.stringify({
2505
+ id: s.responseId,
2506
+ object: "chat.completion.chunk",
2507
+ created: s.created,
2508
+ model: s.requestedModel,
2509
+ choices: [choice]
2510
+ }) };
2511
+ }
2512
+ function ensureRoleChunk(s) {
2513
+ if (s.roleEmitted) return null;
2514
+ s.roleEmitted = true;
2515
+ return makeChunk(s, {
2516
+ index: 0,
2517
+ delta: {
2518
+ role: "assistant",
2519
+ content: ""
2520
+ },
2521
+ finish_reason: null,
2522
+ logprobs: null
2523
+ });
2524
+ }
2525
+ function getToolIndex(s, key) {
2526
+ let idx = s.toolIndexById.get(key);
2527
+ if (idx === void 0) {
2528
+ idx = s.nextToolIndex++;
2529
+ s.toolIndexById.set(key, idx);
2530
+ }
2531
+ return idx;
2532
+ }
2533
+ function* handleTextDelta(s, delta) {
2534
+ const roleChunk = ensureRoleChunk(s);
2535
+ if (roleChunk) yield roleChunk;
2536
+ yield makeChunk(s, {
2537
+ index: 0,
2538
+ delta: { content: delta },
2539
+ finish_reason: null,
2540
+ logprobs: null
2541
+ });
2542
+ }
2543
+ function* handleFunctionCallAdded(s, item) {
2544
+ s.hasToolCalls = true;
2545
+ const key = item.call_id || item.id || "";
2546
+ const idx = getToolIndex(s, key);
2547
+ const roleChunk = ensureRoleChunk(s);
2548
+ if (roleChunk) yield roleChunk;
2549
+ yield makeChunk(s, {
2550
+ index: 0,
2551
+ delta: { tool_calls: [{
2552
+ index: idx,
2553
+ id: item.call_id,
2554
+ type: "function",
2555
+ function: {
2556
+ name: item.name,
2557
+ arguments: ""
2558
+ }
2559
+ }] },
2560
+ finish_reason: null,
2561
+ logprobs: null
2562
+ });
2563
+ }
2564
+ function* handleArgumentsDelta(s, itemId, delta) {
2565
+ const idx = getToolIndex(s, itemId);
2566
+ yield makeChunk(s, {
2567
+ index: 0,
2568
+ delta: { tool_calls: [{
2569
+ index: idx,
2570
+ function: { arguments: delta }
2571
+ }] },
2572
+ finish_reason: null,
2573
+ logprobs: null
2574
+ });
2575
+ }
2576
+ function buildUsageChunk(s, usage) {
2577
+ if (!usage) return null;
2578
+ const chunk = {
2579
+ id: s.responseId,
2580
+ object: "chat.completion.chunk",
2581
+ created: s.created,
2582
+ model: s.requestedModel,
2583
+ choices: [],
2584
+ usage: {
2585
+ prompt_tokens: usage.prompt_tokens,
2586
+ completion_tokens: usage.completion_tokens,
2587
+ total_tokens: usage.total_tokens,
2588
+ ...usage.prompt_tokens_details && { prompt_tokens_details: usage.prompt_tokens_details }
2589
+ }
2590
+ };
2591
+ return { data: JSON.stringify(chunk) };
2592
+ }
2593
+ function* handleCompleted(s, response) {
2594
+ const finishReason = s.hasToolCalls ? "tool_calls" : "stop";
2595
+ yield makeChunk(s, {
2596
+ index: 0,
2597
+ delta: {},
2598
+ finish_reason: finishReason,
2599
+ logprobs: null
2600
+ });
2601
+ const usageChunk = buildUsageChunk(s, mapUsage(response.usage));
2602
+ if (usageChunk) yield usageChunk;
2603
+ yield { data: "[DONE]" };
2604
+ }
2605
+ function parseEvent(data) {
2606
+ try {
2607
+ return JSON.parse(data);
2608
+ } catch {
2609
+ return null;
2610
+ }
2611
+ }
2612
+ function handleTerminalEvent(event) {
2613
+ const message = event.message ?? event.response?.error?.message ?? "Responses API error";
2614
+ throw new Error(message);
2615
+ }
2616
+ /**
2617
+ * Dispatch a single Responses-API event to the right handler.
2618
+ * Returns generator of chunks and a boolean (true = stream complete).
2619
+ */
2620
+ function* dispatchEvent(s, event) {
2621
+ switch (event.type) {
2622
+ case "response.output_text.delta":
2623
+ if (event.delta) yield* handleTextDelta(s, event.delta);
2624
+ return false;
2625
+ case "response.output_item.added":
2626
+ if (event.item?.type === "function_call") yield* handleFunctionCallAdded(s, event.item);
2627
+ return false;
2628
+ case "response.function_call_arguments.delta":
2629
+ if (event.delta !== void 0) yield* handleArgumentsDelta(s, event.item_id ?? "", event.delta);
2630
+ return false;
2631
+ case "response.completed":
2632
+ if (event.response) {
2633
+ yield* handleCompleted(s, event.response);
2634
+ return true;
2635
+ }
2636
+ return false;
2637
+ case "response.failed":
2638
+ case "response.error": handleTerminalEvent(event);
2639
+ default: return false;
2640
+ }
2641
+ }
2642
+ /**
2643
+ * Translate a Responses-API SSE stream into Chat Completions SSE messages.
2644
+ *
2645
+ * Yields `{ data: <stringified chat-completion-chunk> }` objects so the
2646
+ * route handler can feed them straight into `stream.writeSSE()` — same
2647
+ * shape as the existing `events()` output for `/chat/completions`.
2648
+ */
2649
+ async function* responsesStreamToChatChunks(source, requestedModel) {
2650
+ const s = {
2651
+ responseId: `chatcmpl-${randomUUID().replaceAll("-", "")}`,
2652
+ created: Math.floor(Date.now() / 1e3),
2653
+ requestedModel,
2654
+ roleEmitted: false,
2655
+ hasToolCalls: false,
2656
+ toolIndexById: /* @__PURE__ */ new Map(),
2657
+ nextToolIndex: 0
2658
+ };
2659
+ for await (const sse of source) {
2660
+ if (!sse.data || sse.data === "[DONE]") continue;
2661
+ const event = parseEvent(sse.data);
2662
+ if (!event) continue;
2663
+ if (yield* dispatchEvent(s, event)) return;
2664
+ }
2665
+ }
2666
+
2667
+ //#endregion
2668
+ //#region src/services/copilot/create-responses.ts
2669
+ /**
2670
+ * Call Copilot's `/v1/responses` with a Chat Completions payload and
2671
+ * return either a Chat-style response or an SSE generator that yields
2672
+ * already-translated Chat Completion chunks (one per `data:` line).
2673
+ *
2674
+ * Currently only supports single-account mode. Multi-account routing
2675
+ * for Responses-only models can be added in a follow-up if needed.
2676
+ */
2677
+ async function createResponsesAsChat(payload) {
2678
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2679
+ const responsesPayload = chatToResponsesPayload(payload);
2680
+ const url = `${copilotBaseUrl(state)}/v1/responses`;
2681
+ const enableVision = responsesPayload.input.some((item) => item.type === "message" && item.content.some((c) => c.type === "input_image"));
2682
+ const isAgentCall = payload.messages.some((m) => ["assistant", "tool"].includes(m.role));
2683
+ const buildHeaders = () => ({
2684
+ ...copilotHeaders(state, enableVision),
2685
+ "X-Initiator": isAgentCall ? "agent" : "user"
2686
+ });
2687
+ const bodyString = JSON.stringify(responsesPayload);
2688
+ consola.debug("Sending request to Copilot (/v1/responses):", {
2689
+ model: responsesPayload.model,
2690
+ endpoint: url,
2691
+ stream: responsesPayload.stream
2692
+ });
2693
+ let response = await fetchWithRetry(url, () => ({
2694
+ method: "POST",
2695
+ headers: buildHeaders(),
2696
+ body: bodyString
2697
+ }));
2698
+ if (response.status === 401) {
2699
+ consola.warn("Copilot token expired, refreshing and retrying...");
2700
+ try {
2701
+ await refreshCopilotToken();
2702
+ response = await fetchWithTimeout$1(url, {
2703
+ method: "POST",
2704
+ headers: buildHeaders(),
2705
+ body: bodyString
2706
+ });
2707
+ } catch {}
2708
+ }
2709
+ if (!response.ok) {
2710
+ const errorBody = await response.text();
2711
+ consola.error("Failed /v1/responses request", {
2712
+ status: response.status,
2713
+ statusText: response.statusText,
2714
+ body: errorBody
2715
+ });
2716
+ throw new HTTPError(`Failed to call /v1/responses: ${response.status} ${errorBody}`, response);
2717
+ }
2718
+ if (payload.stream) {
2719
+ const sse = events(response);
2720
+ const translated = responsesStreamToChatChunks(sse, payload.model);
2721
+ translated.__accountInfo = { apiBaseUrl: copilotBaseUrl(state) };
2722
+ return translated;
2723
+ }
2724
+ const responsesResult = await response.json();
2725
+ return responsesToChatResponse(responsesResult, payload.model);
2726
+ }
2727
+
2307
2728
  //#endregion
2308
2729
  //#region src/services/copilot/create-chat-completions.ts
2309
2730
  /**
@@ -2520,6 +2941,15 @@ function logThinkingInjection(original, injected, resolvedModel) {
2520
2941
  else if (injected.reasoning_effort && injected.reasoning_effort !== original.reasoning_effort) consola.debug(`Thinking: injected reasoning_effort=${injected.reasoning_effort} for "${resolvedModel}"`);
2521
2942
  else if (reasoningUnsupportedModels.has(resolvedModel)) consola.debug(`Thinking: skipped — "${resolvedModel}" does not support reasoning`);
2522
2943
  }
2944
+ /**
2945
+ * Models known to require `/v1/responses` (and reject `/chat/completions`
2946
+ * with `unsupported_api_for_model`). Learned at runtime — once a model
2947
+ * hits the 400, all future requests for it skip the chat-completions
2948
+ * attempt and go straight to the Responses API.
2949
+ *
2950
+ * Cleared on process restart so Copilot routing changes self-heal.
2951
+ */
2952
+ const responsesApiOnlyModels = /* @__PURE__ */ new Set();
2523
2953
  const createChatCompletions = async (payload) => {
2524
2954
  const resolvedModel = modelRouter.resolveModel(payload.model);
2525
2955
  const routedPayload = resolvedModel !== payload.model ? {
@@ -2527,6 +2957,10 @@ const createChatCompletions = async (payload) => {
2527
2957
  model: resolvedModel
2528
2958
  } : payload;
2529
2959
  if (resolvedModel !== payload.model) consola.debug(`Model routed: ${payload.model} → ${resolvedModel}`);
2960
+ if (responsesApiOnlyModels.has(resolvedModel)) {
2961
+ consola.debug(`Model "${resolvedModel}" cached as Responses-only — using /v1/responses`);
2962
+ return createResponsesAsChat(routedPayload);
2963
+ }
2530
2964
  const thinkingPayload = injectThinking(routedPayload, resolvedModel);
2531
2965
  const wasInjected = thinkingPayload.reasoning_effort !== routedPayload.reasoning_effort || thinkingPayload.thinking_budget !== routedPayload.thinking_budget;
2532
2966
  logThinkingInjection(routedPayload, thinkingPayload, resolvedModel);
@@ -2542,6 +2976,11 @@ const createChatCompletions = async (payload) => {
2542
2976
  releaseSlot();
2543
2977
  return result;
2544
2978
  } catch (error) {
2979
+ const responsesRetry = handle400UnsupportedApiError(error, {
2980
+ resolvedModel,
2981
+ routedPayload
2982
+ }, releaseSlot);
2983
+ if (responsesRetry !== void 0) return responsesRetry;
2545
2984
  const maxTokensRetry = handle400MaxTokensError(error, {
2546
2985
  resolvedModel,
2547
2986
  routedPayload: thinkingPayload
@@ -2559,6 +2998,35 @@ const createChatCompletions = async (payload) => {
2559
2998
  }
2560
2999
  };
2561
3000
  /**
3001
+ * Handle Copilot's `unsupported_api_for_model` 400 — the model only
3002
+ * accepts /v1/responses, not /chat/completions (e.g. gpt-5.5). Mark the
3003
+ * model so future requests skip the failing attempt, then retry via the
3004
+ * Responses API translator.
3005
+ */
3006
+ function handle400UnsupportedApiError(error, ctx, releaseSlot) {
3007
+ if (!(error instanceof HTTPError) || error.response.status !== 400) return void 0;
3008
+ const errMsg = error.message;
3009
+ if (!errMsg.includes("unsupported_api_for_model") && !errMsg.includes("not accessible via the /chat/completions endpoint")) return void 0;
3010
+ responsesApiOnlyModels.add(ctx.resolvedModel);
3011
+ consola.debug(`Model "${ctx.resolvedModel}" requires /v1/responses — switching for future requests`);
3012
+ return (async () => {
3013
+ try {
3014
+ const result = await createResponsesAsChat(ctx.routedPayload);
3015
+ if (Symbol.asyncIterator in result) {
3016
+ const accountInfo = result.__accountInfo;
3017
+ const wrapped$1 = wrapGeneratorWithRelease(result, releaseSlot, accountInfo);
3018
+ wrapped$1.__accountInfo = accountInfo;
3019
+ return wrapped$1;
3020
+ }
3021
+ releaseSlot();
3022
+ return result;
3023
+ } catch (retryError) {
3024
+ releaseSlot();
3025
+ throw retryError;
3026
+ }
3027
+ })();
3028
+ }
3029
+ /**
2562
3030
  * Handle 400 errors caused by `max_tokens` being rejected — o-series and
2563
3031
  * GPT-5.x require `max_completion_tokens` instead. Learns at runtime:
2564
3032
  * adds the model to `maxCompletionTokensModels` and retries once with the
@@ -2757,6 +3225,29 @@ function isNonAccountError(errMsg) {
2757
3225
  * Returns the successful retry result, or null if the error was handled
2758
3226
  * without a successful retry.
2759
3227
  */
3228
+ /**
3229
+ * Handle a 429 from upstream: detect Copilot 5h Pro+ session-limit signature,
3230
+ * snapshot GitHub /rate_limit, and decide whether to mark the account or
3231
+ * propagate the error to the client (single-account guard).
3232
+ */
3233
+ async function handle429(error, account, hasOtherAccount) {
3234
+ let body;
3235
+ try {
3236
+ body = await error.response.clone().text();
3237
+ } catch {
3238
+ body = error.message || "";
3239
+ }
3240
+ const isCopilotSessionLimit = body.includes("user_global_rate_limited:pro_plus");
3241
+ if (isCopilotSessionLimit) accountManager.markCopilotSessionLimit(account.id, "user_global_rate_limited:pro_plus");
3242
+ accountManager.refreshGithubRateLimit(account);
3243
+ if (!hasOtherAccount) {
3244
+ consola.warn(`Account ${account.label}: 429 — only account, propagating to client without marking`);
3245
+ error.__nonAccountError = true;
3246
+ return null;
3247
+ }
3248
+ accountManager.markAccountStatus(account.id, "rate_limited", isCopilotSessionLimit ? "429 Copilot 5h session limit" : "429 Rate limited");
3249
+ return null;
3250
+ }
2760
3251
  async function handleMultiAccountHttpError(error, account, retryContext) {
2761
3252
  switch (error.response.status) {
2762
3253
  case 401:
@@ -2770,14 +3261,7 @@ async function handleMultiAccountHttpError(error, account, retryContext) {
2770
3261
  }
2771
3262
  accountManager.markAccountStatus(account.id, "banned", "403 Forbidden");
2772
3263
  return null;
2773
- case 429:
2774
- if (!retryContext.hasOtherAccount) {
2775
- consola.warn(`Account ${account.label}: 429 — only account, propagating to client without marking`);
2776
- error.__nonAccountError = true;
2777
- return null;
2778
- }
2779
- accountManager.markAccountStatus(account.id, "rate_limited", "429 Rate limited");
2780
- return null;
3264
+ case 429: return handle429(error, account, retryContext.hasOtherAccount);
2781
3265
  case 408:
2782
3266
  consola.warn(`Account ${account.label}: 408 request timeout (network issue, not rotating)`);
2783
3267
  error.__nonAccountError = true;
@@ -3502,12 +3986,12 @@ function getAnthropicThinkingBlocks(reasoningContent) {
3502
3986
  thinking: reasoningContent
3503
3987
  }];
3504
3988
  }
3505
- function getAnthropicTextBlocks(messageContent) {
3506
- if (typeof messageContent === "string") return [{
3989
+ function getAnthropicTextBlocks(messageContent$1) {
3990
+ if (typeof messageContent$1 === "string") return [{
3507
3991
  type: "text",
3508
- text: messageContent
3992
+ text: messageContent$1
3509
3993
  }];
3510
- if (Array.isArray(messageContent)) return messageContent.filter((part) => part.type === "text").map((part) => ({
3994
+ if (Array.isArray(messageContent$1)) return messageContent$1.filter((part) => part.type === "text").map((part) => ({
3511
3995
  type: "text",
3512
3996
  text: part.text
3513
3997
  }));
@@ -3790,7 +4274,7 @@ async function createWithMultiAccount(payload, options$1) {
3790
4274
  } catch (error) {
3791
4275
  lastError = error;
3792
4276
  if (error instanceof HTTPError) {
3793
- const action = handleAnthropicHttpError(error, account, triedAccountIds);
4277
+ const action = await handleAnthropicHttpError(error, account, triedAccountIds);
3794
4278
  if (action === "refresh401") return handleMultiAccount401(ctx, account);
3795
4279
  if (action === "throw") throw error;
3796
4280
  continue;
@@ -3822,11 +4306,21 @@ async function createWithMultiAccount(payload, options$1) {
3822
4306
  * would disable the proxy entirely, so 429 / 403 are propagated unchanged
3823
4307
  * to the client when no other account is available.
3824
4308
  */
3825
- function handleAnthropicHttpError(error, account, triedAccountIds) {
4309
+ async function handleAnthropicHttpError(error, account, triedAccountIds) {
3826
4310
  const status = error.response.status;
3827
4311
  if (status === 401) return "refresh401";
3828
4312
  if (status === 429 || status === 403) {
3829
4313
  const isRateLimit = status === 429;
4314
+ if (isRateLimit) {
4315
+ let body;
4316
+ try {
4317
+ body = await error.response.clone().text();
4318
+ } catch {
4319
+ body = error.message || "";
4320
+ }
4321
+ if (body.includes("user_global_rate_limited:pro_plus")) accountManager.markCopilotSessionLimit(account.id, "user_global_rate_limited:pro_plus");
4322
+ accountManager.refreshGithubRateLimit(account);
4323
+ }
3830
4324
  if (hasAnotherAnthropicAccountToTry(triedAccountIds)) {
3831
4325
  accountManager.markAccountStatus(account.id, isRateLimit ? "rate_limited" : "banned", isRateLimit ? "429 Rate limited" : "403 Forbidden");
3832
4326
  consola.warn(`Account ${account.label}: ${status} on /v1/messages, trying next account`);
@@ -4556,7 +5050,7 @@ async function validateGitHubToken(token) {
4556
5050
  state.githubToken = token;
4557
5051
  consola.info("Using provided GitHub token");
4558
5052
  try {
4559
- const { getGitHubUser } = await import("./get-user-Ct5NqLcM.js");
5053
+ const { getGitHubUser } = await import("./get-user-CZ4szDap.js");
4560
5054
  const user = await getGitHubUser();
4561
5055
  consola.info(`Logged in as ${user.login}`);
4562
5056
  } catch (error) {
@@ -4618,10 +5112,10 @@ async function runServer(options$1) {
4618
5112
  try {
4619
5113
  await setupCopilotToken();
4620
5114
  } catch (error) {
4621
- const { HTTPError: HTTPError$1 } = await import("./error-BaXXuCDb.js");
5115
+ const { HTTPError: HTTPError$1 } = await import("./error-C7zHD_5f.js");
4622
5116
  if (error instanceof HTTPError$1 && error.response.status === 401) {
4623
5117
  consola.error("Failed to get Copilot token - GitHub token may be invalid or Copilot access revoked");
4624
- const { clearGithubToken: clearGithubToken$1 } = await import("./token-DEcUuJp7.js");
5118
+ const { clearGithubToken: clearGithubToken$1 } = await import("./token-CRJg2Pnb.js");
4625
5119
  await clearGithubToken$1();
4626
5120
  consola.info("Please restart to re-authenticate");
4627
5121
  }