@nick3/copilot-api 1.4.5 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +14 -6
  2. package/dist/{account-DhQb2A6q.js → account-CipKmikF.js} +2 -2
  3. package/dist/{account-DhQb2A6q.js.map → account-CipKmikF.js.map} +1 -1
  4. package/dist/{accounts-manager-BsGuQhKM.js → accounts-manager-Cjrd_el_.js} +249 -83
  5. package/dist/accounts-manager-Cjrd_el_.js.map +1 -0
  6. package/dist/{accounts-registry-c7rs5Ed9.js → accounts-registry-CQYvRe65.js} +3 -3
  7. package/dist/{accounts-registry-c7rs5Ed9.js.map → accounts-registry-CQYvRe65.js.map} +1 -1
  8. package/dist/admin/assets/index-BFvCJZIK.js +57 -0
  9. package/dist/admin/assets/index-CsAeel_7.css +1 -0
  10. package/dist/admin/index.html +2 -2
  11. package/dist/{auth-BAEHgP-a.js → auth-1gAffrpI.js} +6 -6
  12. package/dist/{auth-BAEHgP-a.js.map → auth-1gAffrpI.js.map} +1 -1
  13. package/dist/{check-usage-Dbthad7V.js → check-usage-CsRu467P.js} +5 -5
  14. package/dist/{check-usage-Dbthad7V.js.map → check-usage-CsRu467P.js.map} +1 -1
  15. package/dist/{debug-hQJWwXtC.js → debug-BzR5ZQUk.js} +3 -3
  16. package/dist/{debug-hQJWwXtC.js.map → debug-BzR5ZQUk.js.map} +1 -1
  17. package/dist/{get-copilot-token-BySQCue6.js → get-copilot-token-BbpphnmV.js} +3 -3
  18. package/dist/{get-copilot-token-BySQCue6.js.map → get-copilot-token-BbpphnmV.js.map} +1 -1
  19. package/dist/main.js +6 -7
  20. package/dist/main.js.map +1 -1
  21. package/dist/{paths-DoT4SZ8f.js → paths-Cvzy-eLX.js} +2 -2
  22. package/dist/{paths-DoT4SZ8f.js.map → paths-Cvzy-eLX.js.map} +1 -1
  23. package/dist/{poll-access-token-CKc0_m42.js → poll-access-token-CGfLFzMq.js} +3 -3
  24. package/dist/{poll-access-token-CKc0_m42.js.map → poll-access-token-CGfLFzMq.js.map} +1 -1
  25. package/dist/{server-D_7gI9hx.js → server-DqwhClJ-.js} +187 -152
  26. package/dist/server-DqwhClJ-.js.map +1 -0
  27. package/dist/{start-DItwCYda.js → start-B1_Ols5Z.js} +16 -18
  28. package/dist/start-B1_Ols5Z.js.map +1 -0
  29. package/dist/{utils-BIK3ym34.js → utils-DY-jLXwO.js} +34 -10
  30. package/dist/utils-DY-jLXwO.js.map +1 -0
  31. package/package.json +3 -1
  32. package/dist/accounts-manager-BsGuQhKM.js.map +0 -1
  33. package/dist/admin/assets/index-Y2SvOXge.js +0 -57
  34. package/dist/admin/assets/index-geiCIixE.css +0 -1
  35. package/dist/server-D_7gI9hx.js.map +0 -1
  36. package/dist/start-DItwCYda.js.map +0 -1
  37. package/dist/utils-BIK3ym34.js.map +0 -1
package/README.md CHANGED
@@ -32,7 +32,7 @@ English | [中文](./README_CN.md)
32
32
  > [!IMPORTANT]
33
33
  > **Before using, please be aware of the following:**
34
34
  >
35
- > 1. **Claude Code model ID configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned).
35
+ > 1. **Claude Code configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned). Example claude `settings.json` see [Manual Configuration with `settings.json`](#manual-configuration-with-settingsjson).
36
36
  >
37
37
  > 2. **Recommend for Opencode:** When using with opencode, we recommend starting with the opencode OAuth app. This approach behaves identically to opencode's built-in GitHub Copilot provider with no Terms of Service risk:
38
38
  > ```sh
@@ -348,7 +348,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
348
348
  "gpt-5.4": "<built-in commentary prompt>"
349
349
  },
350
350
  "smallModel": "gpt-5-mini",
351
- "freeModelLoadBalancing": true,
351
+ "accountAffinity": true,
352
352
  "responsesApiContextManagementModels": [],
353
353
  "modelReasoningEfforts": {
354
354
  "gpt-5-mini": "low",
@@ -361,7 +361,8 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
361
361
  "compactUseSmallModel": true,
362
362
  "messageStartInputTokensFallback": false,
363
363
  "modelRefreshIntervalHours": 24,
364
- "useMessagesApi": true
364
+ "useMessagesApi": true,
365
+ "useResponsesApiWebSearch": true
365
366
  }
366
367
  ```
367
368
  - **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
@@ -377,7 +378,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
377
378
  - `topK` (optional): Default top_k value used when the request does not specify one.
378
379
  - **responsesApiContextManagementModels:** List of model IDs that should receive Responses API `context_management` compaction instructions. Use this when a model supports server-side context management and you want the proxy to keep only the latest compaction carrier on follow-up turns.
379
380
  - **smallModel:** Fallback model used for tool-less warmup messages, compact/background requests, and other short housekeeping turns (for example from Claude Code or OpenCode) to avoid spending premium requests; defaults to `gpt-5-mini`. If original names are blocked and this points to an aliased target, it resolves to the preferred alias.
380
- - **freeModelLoadBalancing:** Enable round-robin routing for free-model requests across multiple accounts. Defaults to `true`. Set to `false` to route free-model requests sequentially (same ordering strategy as premium models).
381
+ - **accountAffinity:** Enable sticky account routing based on session identity. When enabled, requests from the same session for the same model are routed to the account that last handled them successfully. Applies to both free and premium models. Defaults to `true`. Set to `false` to use sequential routing for all models.
381
382
  - **apiKey (deprecated):** Legacy single-key field kept for migration compatibility. Prefer `auth.apiKeys`. When `auth.apiKeys` is empty, the server falls back to `COPILOT_API_KEY` and then `apiKey`.
382
383
  - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
383
384
  - **modelAliases:** Map of `alias -> { target, allowOriginal? }` (legacy string values are still accepted). Alias keys are normalized (trim + lowercase) and must be non-empty; aliases cannot map to themselves (case-insensitive), and conflicting normalized aliases are rejected. `allowOriginal` overrides the global default per alias. If multiple aliases map to the same target, original names are allowed when any alias sets `allowOriginal: true` (allow-wins). Admin UI/API rejects blocked keys (`__proto__`, `constructor`, `prototype`). Aliases can be used in downstream requests.
@@ -388,6 +389,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
388
389
  - **messageStartInputTokensFallback:** When `true`, the Anthropic streaming translation layer estimates `message_start.input_tokens` when upstream stream events do not provide it. Defaults to `false`.
389
390
  - **modelRefreshIntervalHours:** Interval for refreshing account model lists in the background. Set to `0` to disable refresh. Defaults to `24`.
390
391
  - **useMessagesApi:** When `true` (default), Claude-family models that support Copilot's native `/v1/messages` endpoint may use the Messages API path. Set to `false` to skip the Messages API candidate and fall back to `/responses` (if supported) or `/chat/completions`.
392
+ - **useResponsesApiWebSearch:** When `true` (default), `/v1/responses` keeps tools with `type: "web_search"` and forwards them upstream. Set to `false` to strip them before the Copilot request is sent.
391
393
  - **anthropicApiKey:** Optional Anthropic API key used for accurate Claude token counting (see [Accurate Claude Token Counting](#accurate-claude-token-counting) below). Can also be set via the `ANTHROPIC_API_KEY` environment variable. If not set, token counting falls back to GPT tokenizer estimation.
392
394
 
393
395
  Edit this file to customize prompts or swap in your own fast model. If you edit it manually, restart the server (or call `GET /api/admin/config`) so the cached config is refreshed. Changes made through the Admin UI/API are validated, written to disk, and applied immediately; unknown keys are rejected.
@@ -607,7 +609,7 @@ OpenCode already has a direct GitHub Copilot provider. Use this section when you
607
609
  Start the proxy with the OpenCode OAuth app:
608
610
 
609
611
  ```sh
610
- COPILOT_API_OAUTH_APP=opencode npx @nick3/copilot-api@latest start
612
+ npx @nick3/copilot-api@latest --oauth-app=opencode start
611
613
  ```
612
614
 
613
615
  Then point OpenCode at the proxy with `@ai-sdk/anthropic`.
@@ -803,6 +805,12 @@ Here is an example `.claude/settings.json` file:
803
805
  }
804
806
  ```
805
807
 
808
+ - Replace `ANTHROPIC_MODEL`, `ANTHROPIC_DEFAULT_OPUS_MODEL`, `ANTHROPIC_DEFAULT_SONNET_MODEL`, and `ANTHROPIC_DEFAULT_HAIKU_MODEL` according to your needs. It is recommended to use gpt-5-mini for ANTHROPIC_DEFAULT_HAIKU_MODEL, as gpt-5-mini does not consume quota. ANTHROPIC_DEFAULT_HAIKU_MODEL is typically used for title generation, explore agents, etc.
809
+ - Setting CLAUDE_CODE_ATTRIBUTION_HEADER to 0 can prevent Claude code from adding billing and version information in system prompts, thereby avoiding prompt cache invalidation.
810
+ - Turning off CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION can prevent quota from being consumed unnecessarily.
811
+ - If you want to disable Claude Code WebSearch, deny `WebSearch` in permissions or set `useResponsesApiWebSearch` to `false` in `config.json`. When enabled, `/v1/responses` can forward `web_search` tools upstream, but actual support still depends on the selected model and Copilot behavior.
812
+ - Please do not enable `ENABLE_TOOL_SEARCH`, as the current Claude Code uses the client tool search mode. In this mode, loading defer tools requires an additional request each time, and cache hit rates are affected, so it does not necessarily save tokens. Only server tool search mode can save tokens. The current project has compatibility issues with client tool search mode, which can also cause errors when used.
813
+
806
814
  ### CLAUDE.md or AGENTS.md Recommended Content
807
815
 
808
816
  To add these reminders manually, include the following in `CLAUDE.md` for Claude Code, or `AGENTS.md` for opencode/codex:
@@ -895,5 +903,5 @@ bun run start
895
903
  - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
896
904
  - **Multi-account request routing**: Add multiple GitHub Copilot accounts using `auth add`.
897
905
  - **Premium models**: Accounts are tried in the order they were added. When an account's premium request quota (`remaining=0`) is exhausted (or insufficient for the selected model), the proxy automatically switches to the next eligible account.
898
- - **Free models**: By default, requests are distributed round-robin across all eligible accounts (including the temporary account created via `start --github-token ...`). Set `freeModelLoadBalancing=false` in `config.json` to disable this and route free-model requests sequentially.
906
+ - **Free models**: When `accountAffinity=true`, requests with the same affinity key and model stick to the account that last handled them successfully. Affinity misses fall back to the first available eligible account. Set `accountAffinity=false` in `config.json` to disable affinity and route all requests sequentially.
899
907
  - **Model classification**: Based on Copilot model metadata (`billing.is_premium` / `billing.multiplier`). Missing billing info or `billing.is_premium !== true` is treated as free.
@@ -13,5 +13,5 @@ function parseAccountType(value) {
13
13
  }
14
14
 
15
15
  //#endregion
16
- export { parseAccountType };
17
- //# sourceMappingURL=account-DhQb2A6q.js.map
16
+ export { parseAccountType as t };
17
+ //# sourceMappingURL=account-CipKmikF.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"account-DhQb2A6q.js","names":["ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType>"],"sources":["../src/lib/types/account.ts"],"sourcesContent":["import type { ModelsResponse } from \"~/services/copilot/get-models\"\n\n/**\n * Account type for GitHub Copilot subscription.\n */\nexport type AccountType = \"individual\" | \"business\" | \"enterprise\"\n\nexport const ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType> = [\n \"individual\",\n \"business\",\n \"enterprise\",\n]\n\nexport function isAccountType(value: unknown): value is AccountType {\n return (\n typeof value === \"string\"\n && (ACCOUNT_TYPE_VALUES as ReadonlyArray<string>).includes(value)\n )\n}\n\nexport function parseAccountType(value: unknown): AccountType {\n if (!isAccountType(value)) {\n throw new Error(\n `Invalid account type: ${String(value)}. Valid values: ${ACCOUNT_TYPE_VALUES.join(\n \", \",\n )}`,\n )\n }\n return value\n}\n\n/**\n * Metadata for a registered account, stored in the registry file.\n */\nexport interface AccountMeta {\n /** GitHub login (username) */\n id: string\n /** Account subscription type */\n accountType: AccountType\n /** Timestamp when the account was added */\n addedAt: number\n}\n\n/**\n * Registry file structure for storing account metadata.\n */\nexport interface AccountRegistry {\n /** Schema version for future migrations */\n version: 1\n /** Ordered list of accounts (order = priority) */\n accounts: Array<AccountMeta>\n}\n\n/**\n * Runtime state for an account, including tokens and quota information.\n */\nexport interface AccountRuntime extends AccountMeta {\n /** GitHub personal access token */\n githubToken: string\n /** Copilot API token (obtained from GitHub) */\n copilotToken?: string\n /** VS Code version for API headers */\n vsCodeVersion?: string\n /** Cached available models for this account */\n models?: ModelsResponse\n /** Timestamp of last models fetch */\n lastModelsFetch?: number\n /** Whether models refresh is in progress */\n isRefreshingModels?: boolean\n /** Promise for an in-flight models refresh */\n modelsRefreshPromise?: Promise<void>\n /** Total premium interactions quota entitlement */\n premiumEntitlement?: number\n /** Remaining premium interactions quota */\n premiumRemaining?: number\n /** Reserved premium interaction units for in-flight requests */\n premiumReserved?: number\n /** Internal reservation map for idempotent release */\n premiumReservations?: Map<symbol, number>\n /** Whether this account has unlimited quota */\n unlimited?: boolean\n /** Whether this account allows overage billing (enterprise feature) */\n overagePermitted?: boolean\n /** Timestamp of last quota fetch */\n lastQuotaFetch?: number\n /** Token refresh timer reference */\n refreshTimer?: ReturnType<typeof setInterval>\n /** Whether this account has failed (e.g., 401 error) */\n failed?: boolean\n /** Failure reason if failed */\n failureReason?: string\n /** Whether quota refresh is in progress (prevents concurrent refreshes) */\n isRefreshingQuota?: boolean\n /** Promise for an in-flight quota refresh (allows concurrent callers to await the same refresh) */\n quotaRefreshPromise?: Promise<void>\n}\n\n/**\n * Context required for making API calls on behalf of an account.\n * This is a subset of AccountRuntime used by service functions.\n */\nexport interface AccountContext {\n /** GitHub personal access token */\n githubToken: string\n /** Copilot API token */\n copilotToken?: string\n /** Account subscription type */\n accountType: AccountType\n /** VS Code version for API headers */\n vsCodeVersion?: string\n}\n"],"mappings":";AAOA,MAAaA,sBAAkD;CAC7D;CACA;CACA;CACD;AAED,SAAgB,cAAc,OAAsC;AAClE,QACE,OAAO,UAAU,YACb,oBAA8C,SAAS,MAAM;;AAIrE,SAAgB,iBAAiB,OAA6B;AAC5D,KAAI,CAAC,cAAc,MAAM,CACvB,OAAM,IAAI,MACR,yBAAyB,OAAO,MAAM,CAAC,kBAAkB,oBAAoB,KAC3E,KACD,GACF;AAEH,QAAO"}
1
+ {"version":3,"file":"account-CipKmikF.js","names":["ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType>"],"sources":["../src/lib/types/account.ts"],"sourcesContent":["import type { ModelsResponse } from \"~/services/copilot/get-models\"\n\n/**\n * Account type for GitHub Copilot subscription.\n */\nexport type AccountType = \"individual\" | \"business\" | \"enterprise\"\n\nexport const ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType> = [\n \"individual\",\n \"business\",\n \"enterprise\",\n]\n\nexport function isAccountType(value: unknown): value is AccountType {\n return (\n typeof value === \"string\"\n && (ACCOUNT_TYPE_VALUES as ReadonlyArray<string>).includes(value)\n )\n}\n\nexport function parseAccountType(value: unknown): AccountType {\n if (!isAccountType(value)) {\n throw new Error(\n `Invalid account type: ${String(value)}. Valid values: ${ACCOUNT_TYPE_VALUES.join(\n \", \",\n )}`,\n )\n }\n return value\n}\n\n/**\n * Metadata for a registered account, stored in the registry file.\n */\nexport interface AccountMeta {\n /** GitHub login (username) */\n id: string\n /** Account subscription type */\n accountType: AccountType\n /** Timestamp when the account was added */\n addedAt: number\n}\n\n/**\n * Registry file structure for storing account metadata.\n */\nexport interface AccountRegistry {\n /** Schema version for future migrations */\n version: 1\n /** Ordered list of accounts (order = priority) */\n accounts: Array<AccountMeta>\n}\n\n/**\n * Runtime state for an account, including tokens and quota information.\n */\nexport interface AccountRuntime extends AccountMeta {\n /** GitHub personal access token */\n githubToken: string\n /** Copilot API token (obtained from GitHub) */\n copilotToken?: string\n /** VS Code version for API headers */\n vsCodeVersion?: string\n /** Cached available models for this account */\n models?: ModelsResponse\n /** Timestamp of last models fetch */\n lastModelsFetch?: number\n /** Whether models refresh is in progress */\n isRefreshingModels?: boolean\n /** Promise for an in-flight models refresh */\n modelsRefreshPromise?: Promise<void>\n /** Total premium interactions quota entitlement */\n premiumEntitlement?: number\n /** Remaining premium interactions quota */\n premiumRemaining?: number\n /** Reserved premium interaction units for in-flight requests */\n premiumReserved?: number\n /** Internal reservation map for idempotent release */\n premiumReservations?: Map<symbol, number>\n /** Whether this account has unlimited quota */\n unlimited?: boolean\n /** Whether this account allows overage billing (enterprise feature) */\n overagePermitted?: boolean\n /** Timestamp of last quota fetch */\n lastQuotaFetch?: number\n /** Token refresh timer reference */\n refreshTimer?: ReturnType<typeof setInterval>\n /** Whether this account has failed (e.g., 401 error) */\n failed?: boolean\n /** Failure reason if failed */\n failureReason?: string\n /** Whether quota refresh is in progress (prevents concurrent refreshes) */\n isRefreshingQuota?: boolean\n /** Promise for an in-flight quota refresh (allows concurrent callers to await the same refresh) */\n quotaRefreshPromise?: Promise<void>\n}\n\n/**\n * Context required for making API calls on behalf of an account.\n * This is a subset of AccountRuntime used by service functions.\n */\nexport interface AccountContext {\n /** GitHub personal access token */\n githubToken: string\n /** Copilot API token */\n copilotToken?: string\n /** Account subscription type */\n accountType: AccountType\n /** VS Code version for API headers */\n vsCodeVersion?: string\n}\n"],"mappings":";AAOA,MAAaA,sBAAkD;CAC7D;CACA;CACA;CACD;AAED,SAAgB,cAAc,OAAsC;AAClE,QACE,OAAO,UAAU,YACb,oBAA8C,SAAS,MAAM;;AAIrE,SAAgB,iBAAiB,OAA6B;AAC5D,KAAI,CAAC,cAAc,MAAM,CACvB,OAAM,IAAI,MACR,yBAAyB,OAAO,MAAM,CAAC,kBAAkB,oBAAoB,KAC3E,KACD,GACF;AAEH,QAAO"}
@@ -1,7 +1,7 @@
1
- import { PATHS } from "./paths-DoT4SZ8f.js";
2
- import { addAccountToRegistry, hasLegacyToken, hasRegistry, listAccountsFromRegistry, loadAccountToken, readLegacyToken, saveAccountToken } from "./accounts-registry-c7rs5Ed9.js";
3
- import { HTTPError, getCopilotUsage, getGitHubUser, getModels } from "./utils-BIK3ym34.js";
4
- import { getCopilotToken } from "./get-copilot-token-BySQCue6.js";
1
+ import { t as PATHS } from "./paths-Cvzy-eLX.js";
2
+ import { a as loadAccountToken, i as listAccountsFromRegistry, l as saveAccountToken, n as hasLegacyToken, o as readLegacyToken, r as hasRegistry, t as addAccountToRegistry } from "./accounts-registry-CQYvRe65.js";
3
+ import { d as getModels, f as getGitHubUser, m as HTTPError, p as getCopilotUsage } from "./utils-DY-jLXwO.js";
4
+ import { t as getCopilotToken } from "./get-copilot-token-BbpphnmV.js";
5
5
  import consola from "consola";
6
6
  import fs from "node:fs";
7
7
 
@@ -42,7 +42,7 @@ const defaultConfig = {
42
42
  "gpt-5.4": gpt5CommentaryPrompt
43
43
  },
44
44
  smallModel: "gpt-5-mini",
45
- freeModelLoadBalancing: true,
45
+ accountAffinity: true,
46
46
  responsesApiContextManagementModels: [],
47
47
  modelReasoningEfforts: {
48
48
  "gpt-5-mini": "low",
@@ -55,7 +55,8 @@ const defaultConfig = {
55
55
  compactUseSmallModel: true,
56
56
  messageStartInputTokensFallback: false,
57
57
  modelRefreshIntervalHours: 24,
58
- useMessagesApi: true
58
+ useMessagesApi: true,
59
+ useResponsesApiWebSearch: true
59
60
  };
60
61
  let cachedConfig = null;
61
62
  function isPlainObject(value) {
@@ -128,8 +129,7 @@ function mergeDefaultConfig(config) {
128
129
  }
129
130
  function mergeDefaultAuth(config) {
130
131
  const authConfig = isPlainObject(config.auth) ? config.auth : void 0;
131
- const rawApiKeys = Array.isArray(authConfig?.apiKeys) ? authConfig.apiKeys : void 0;
132
- const nextAuth = { apiKeys: normalizeAuthApiKeys(rawApiKeys) };
132
+ const nextAuth = { apiKeys: normalizeAuthApiKeys(Array.isArray(authConfig?.apiKeys) ? authConfig.apiKeys : void 0) };
133
133
  if (authConfig && JSON.stringify(authConfig) === JSON.stringify(nextAuth)) return {
134
134
  mergedConfig: config,
135
135
  changed: false
@@ -142,15 +142,27 @@ function mergeDefaultAuth(config) {
142
142
  changed: true
143
143
  };
144
144
  }
145
- function mergeDefaultFreeModelLoadBalancing(config) {
146
- if (typeof config.freeModelLoadBalancing === "boolean") return {
145
+ function mergeDefaultAccountAffinity(config) {
146
+ const raw = config;
147
+ const hasOld = typeof raw.freeModelLoadBalancing === "boolean";
148
+ const hasNew = typeof config.accountAffinity === "boolean";
149
+ if (hasOld) {
150
+ const next = { ...config };
151
+ if (!hasNew) next.accountAffinity = raw.freeModelLoadBalancing;
152
+ delete next.freeModelLoadBalancing;
153
+ return {
154
+ mergedConfig: next,
155
+ changed: true
156
+ };
157
+ }
158
+ if (hasNew) return {
147
159
  mergedConfig: config,
148
160
  changed: false
149
161
  };
150
162
  return {
151
163
  mergedConfig: {
152
164
  ...config,
153
- freeModelLoadBalancing: defaultConfig.freeModelLoadBalancing ?? true
165
+ accountAffinity: defaultConfig.accountAffinity ?? true
154
166
  },
155
167
  changed: true
156
168
  };
@@ -181,11 +193,10 @@ function applyConfigMerges(config, mergeFns) {
181
193
  });
182
194
  }
183
195
  function mergeConfigWithDefaults() {
184
- const config = readConfigFromDisk();
185
- const { mergedConfig, changed } = applyConfigMerges(config, [
196
+ const { mergedConfig, changed } = applyConfigMerges(readConfigFromDisk(), [
186
197
  mergeDefaultAuth,
187
198
  mergeDefaultConfig,
188
- mergeDefaultFreeModelLoadBalancing,
199
+ mergeDefaultAccountAffinity,
189
200
  mergeDefaultModelRefreshInterval
190
201
  ]);
191
202
  if (changed) try {
@@ -271,8 +282,7 @@ function isOriginalModelNameAllowedForTarget(modelId) {
271
282
  return !getAliasTargetSet().has(normalized);
272
283
  }
273
284
  function getPreferredAliasForTarget(modelId) {
274
- const aliases = getModelAliases();
275
- return getAliasKeysForTarget(modelId, aliases)[0] ?? null;
285
+ return getAliasKeysForTarget(modelId, getModelAliases())[0] ?? null;
276
286
  }
277
287
  function getAliasKeysForTarget(target, aliases) {
278
288
  const normalizedTarget = target.toLowerCase();
@@ -304,12 +314,11 @@ function getSmallModel() {
304
314
  if (isOriginalModelNameAllowedForTarget(model)) return model;
305
315
  return getPreferredAliasForTarget(model) ?? model;
306
316
  }
307
- function isFreeModelLoadBalancingEnabled() {
308
- return getConfig().freeModelLoadBalancing ?? true;
317
+ function isAccountAffinityEnabled() {
318
+ return getConfig().accountAffinity ?? true;
309
319
  }
310
320
  function getModelRefreshIntervalHours() {
311
- const config = getConfig();
312
- return normalizeModelRefreshIntervalHours(config.modelRefreshIntervalHours) ?? defaultConfig.modelRefreshIntervalHours ?? 24;
321
+ return normalizeModelRefreshIntervalHours(getConfig().modelRefreshIntervalHours) ?? defaultConfig.modelRefreshIntervalHours ?? 24;
313
322
  }
314
323
  function getModelRefreshIntervalMs() {
315
324
  const hours = getModelRefreshIntervalHours();
@@ -372,6 +381,93 @@ function isMessagesApiEnabled() {
372
381
  function getAnthropicApiKey() {
373
382
  return getConfig().anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? void 0;
374
383
  }
384
+ function isResponsesApiWebSearchEnabled() {
385
+ return getConfig().useResponsesApiWebSearch ?? true;
386
+ }
387
+ function getClaudeTokenMultiplier() {
388
+ return getConfig().claudeTokenMultiplier ?? 1.15;
389
+ }
390
+
391
+ //#endregion
392
+ //#region src/lib/account-affinity.ts
393
+ const DEFAULT_MAX_ENTRIES = 1e4;
394
+ const DEFAULT_TTL_MS = 3600 * 1e3;
395
+ /**
396
+ * In-memory LRU cache with TTL for account affinity mappings.
397
+ *
398
+ * Uses Map insertion order for LRU eviction: accessed/updated entries are
399
+ * deleted and re-inserted so they move to the "newest" end.
400
+ */
401
+ var AccountAffinityCache = class {
402
+ cache = /* @__PURE__ */ new Map();
403
+ maxEntries;
404
+ ttlMs;
405
+ constructor(maxEntries = DEFAULT_MAX_ENTRIES, ttlMs = DEFAULT_TTL_MS) {
406
+ this.maxEntries = maxEntries;
407
+ this.ttlMs = ttlMs;
408
+ }
409
+ /** Look up the preferred account ID for a cache key. Returns undefined if not found or expired. */
410
+ get(key) {
411
+ const entry = this.cache.get(key);
412
+ if (!entry) return;
413
+ if (Date.now() >= entry.expiresAt) {
414
+ this.cache.delete(key);
415
+ return;
416
+ }
417
+ return entry.accountId;
418
+ }
419
+ /** Record a successful account mapping. Refreshes TTL and moves the entry to the newest position. */
420
+ set(key, accountId) {
421
+ this.cache.delete(key);
422
+ while (this.cache.size >= this.maxEntries) {
423
+ const oldest = this.cache.keys().next();
424
+ if (oldest.done) break;
425
+ this.cache.delete(oldest.value);
426
+ }
427
+ this.cache.set(key, {
428
+ accountId,
429
+ expiresAt: Date.now() + this.ttlMs
430
+ });
431
+ }
432
+ /** Remove a specific entry. */
433
+ delete(key) {
434
+ return this.cache.delete(key);
435
+ }
436
+ /** Remove all entries. */
437
+ clear() {
438
+ this.cache.clear();
439
+ }
440
+ /** Current number of entries (including potentially expired ones). */
441
+ get size() {
442
+ return this.cache.size;
443
+ }
444
+ };
445
+ /**
446
+ * Extract the affinity key from the request context.
447
+ * Uses the upstream request ID which is deterministic for the same user message.
448
+ */
449
+ function extractAffinityKey(context) {
450
+ return context.requestId?.trim() || void 0;
451
+ }
452
+ /**
453
+ * Build the full cache key by combining the affinity key with the model ID.
454
+ * This prevents cross-model pollution (same session requesting different models
455
+ * can be routed to different accounts).
456
+ */
457
+ function buildAffinityCacheKey(affinityKey, modelId) {
458
+ return `${affinityKey}:${modelId}`;
459
+ }
460
+ /**
461
+ * Check whether an account is a valid affinity candidate.
462
+ * An account is valid if it is not failed and is present in the provided
463
+ * runtime list.
464
+ */
465
+ function isAffinityAccountUsable(accountId, accounts) {
466
+ const account = accounts.find((a) => a.id === accountId);
467
+ if (!account) return void 0;
468
+ if (account.failed) return void 0;
469
+ return account;
470
+ }
375
471
 
376
472
  //#endregion
377
473
  //#region src/lib/accounts-manager-auth.ts
@@ -486,8 +582,9 @@ var AccountsManager = class {
486
582
  accountOrder = [];
487
583
  temporaryAccount;
488
584
  vsCodeVersion;
489
- freeModelCursor = 0;
490
- freeModelLoadBalancingEnabled = true;
585
+ accountAffinityEnabled = true;
586
+ affinityCache = new AccountAffinityCache();
587
+ loadBalanceCursor = 0;
491
588
  quotaRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
492
589
  modelsRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
493
590
  tokenRefreshEnabledAccounts = /* @__PURE__ */ new WeakSet();
@@ -529,8 +626,9 @@ var AccountsManager = class {
529
626
  consola.info(`Loaded ${this.accounts.size} account(s)`);
530
627
  this.startRegistryWatcher();
531
628
  }
532
- setFreeModelLoadBalancingEnabled(enabled) {
533
- this.freeModelLoadBalancingEnabled = enabled;
629
+ setAccountAffinityEnabled(enabled) {
630
+ this.accountAffinityEnabled = enabled;
631
+ if (!enabled) this.affinityCache.clear();
534
632
  }
535
633
  setModelsRefreshIntervalMs(intervalMs) {
536
634
  this.modelsRefreshIntervalMs = Number.isFinite(intervalMs) && intervalMs > 0 ? intervalMs : 0;
@@ -545,8 +643,7 @@ var AccountsManager = class {
545
643
  async runTokenRefreshTick(account, snapshot, refreshInSeconds) {
546
644
  if (!this.shouldContinueTokenRefresh(account, snapshot)) return;
547
645
  try {
548
- const ctx = toAccountContextFromSnapshot(account, snapshot);
549
- const { token, refresh_in } = await getCopilotToken(ctx);
646
+ const { token, refresh_in } = await getCopilotToken(toAccountContextFromSnapshot(account, snapshot));
550
647
  if (!this.shouldContinueTokenRefresh(account, snapshot)) return;
551
648
  if (!applyTokenRefreshSuccessIfCurrent(account, snapshot, token)) return;
552
649
  consola.debug(`Refreshed token for account ${account.id}`);
@@ -570,13 +667,10 @@ var AccountsManager = class {
570
667
  async initializeAccount(account) {
571
668
  const snapshot = takeAuthSnapshot(account);
572
669
  try {
573
- const tokenCtx = toAccountContextFromSnapshot(account, snapshot);
574
- const { token, refresh_in } = await getCopilotToken(tokenCtx);
670
+ const { token, refresh_in } = await getCopilotToken(toAccountContextFromSnapshot(account, snapshot));
575
671
  if (!applyCopilotTokenIfCurrent(account, snapshot, token)) return;
576
672
  this.startTokenRefresh(account, refresh_in);
577
- const modelsCtx = toAccountContextFromSnapshot(account, snapshot, token);
578
- const models = await getModels(modelsCtx);
579
- if (!applyModelsIfCurrent(account, snapshot, models)) return;
673
+ if (!applyModelsIfCurrent(account, snapshot, await getModels(toAccountContextFromSnapshot(account, snapshot, token)))) return;
580
674
  account.lastModelsFetch = Date.now();
581
675
  await this.refreshQuota(account);
582
676
  consola.debug(`Account ${account.id} initialized`);
@@ -663,8 +757,7 @@ var AccountsManager = class {
663
757
  }
664
758
  const snapshot = takeAuthSnapshot(account);
665
759
  if (account.modelsRefreshPromise) {
666
- const existingSnapshot = this.modelsRefreshSnapshotByAccount.get(account);
667
- if (isSameAuthSnapshot(existingSnapshot, snapshot)) {
760
+ if (isSameAuthSnapshot(this.modelsRefreshSnapshotByAccount.get(account), snapshot)) {
668
761
  await account.modelsRefreshPromise;
669
762
  return;
670
763
  }
@@ -673,8 +766,7 @@ var AccountsManager = class {
673
766
  const ctx = toAccountContextFromSnapshot(account, snapshot, account.copilotToken);
674
767
  const promise = (async () => {
675
768
  try {
676
- const models = await getModels(ctx);
677
- if (applyModelsIfCurrent(account, snapshot, models)) account.lastModelsFetch = Date.now();
769
+ if (applyModelsIfCurrent(account, snapshot, await getModels(ctx))) account.lastModelsFetch = Date.now();
678
770
  } catch (error) {
679
771
  if (error instanceof HTTPError && error.response.status === 401) {
680
772
  applyUnauthorizedIfCurrent(account, snapshot, "Unauthorized (401)");
@@ -704,8 +796,7 @@ var AccountsManager = class {
704
796
  async refreshQuota(account) {
705
797
  const snapshot = takeAuthSnapshot(account);
706
798
  if (account.quotaRefreshPromise) {
707
- const existingSnapshot = this.quotaRefreshSnapshotByAccount.get(account);
708
- if (isSameAuthSnapshot(existingSnapshot, snapshot)) {
799
+ if (isSameAuthSnapshot(this.quotaRefreshSnapshotByAccount.get(account), snapshot)) {
709
800
  await account.quotaRefreshPromise;
710
801
  return;
711
802
  }
@@ -770,38 +861,6 @@ var AccountsManager = class {
770
861
  }
771
862
  return null;
772
863
  }
773
- selectFreeAccountForRequest(orderedAccounts, candidates) {
774
- const count = orderedAccounts.length;
775
- const start = this.freeModelCursor % count;
776
- let supportedCandidateFound = false;
777
- for (let i = 0; i < count; i++) {
778
- const idx = (start + i) % count;
779
- const account = orderedAccounts[idx];
780
- if (this.isAccountFailed(account)) continue;
781
- const supported = this.pickSupportedCandidate(account, candidates);
782
- if (!supported) continue;
783
- supportedCandidateFound = true;
784
- const { candidate, model } = supported;
785
- const costUnits = getCostUnits(model);
786
- if (costUnits > 0) continue;
787
- this.freeModelCursor = (idx + 1) % count;
788
- return {
789
- ok: true,
790
- account,
791
- selectedModel: model,
792
- endpoint: candidate.endpoint,
793
- costUnits
794
- };
795
- }
796
- if (!supportedCandidateFound) return {
797
- ok: false,
798
- reason: "MODEL_NOT_SUPPORTED"
799
- };
800
- return {
801
- ok: false,
802
- reason: "NO_QUOTA"
803
- };
804
- }
805
864
  async selectAccountForCandidates(orderedAccounts, candidates) {
806
865
  if (orderedAccounts.length === 0) return {
807
866
  ok: false,
@@ -816,16 +875,13 @@ var AccountsManager = class {
816
875
  supportedCandidateFound = true;
817
876
  const { candidate, model } = supported;
818
877
  const costUnits = getCostUnits(model);
819
- if (costUnits <= 0) {
820
- if (this.freeModelLoadBalancingEnabled) return this.selectFreeAccountForRequest(orderedAccounts, candidates);
821
- return {
822
- ok: true,
823
- account,
824
- selectedModel: model,
825
- endpoint: candidate.endpoint,
826
- costUnits
827
- };
828
- }
878
+ if (costUnits <= 0) return {
879
+ ok: true,
880
+ account,
881
+ selectedModel: model,
882
+ endpoint: candidate.endpoint,
883
+ costUnits
884
+ };
829
885
  if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
830
886
  if (this.isAccountFailed(account)) continue;
831
887
  if (account.unlimited) return {
@@ -865,12 +921,120 @@ var AccountsManager = class {
865
921
  };
866
922
  }
867
923
  /**
924
+ * Try to use a preferred (affinity) account for the request.
925
+ * Returns a successful selection if the account is usable; null otherwise.
926
+ */
927
+ async tryAffinityAccount(preferredAccountId, orderedAccounts, candidates) {
928
+ const account = isAffinityAccountUsable(preferredAccountId, orderedAccounts);
929
+ if (!account) return null;
930
+ const supported = this.pickSupportedCandidate(account, candidates) ?? this.pickAliasFallbackCandidate(account, candidates);
931
+ if (!supported) return null;
932
+ return this.validateAffinityQuota(account, supported);
933
+ }
934
+ /**
935
+ * Resolve model aliases and try to pick a supported candidate.
936
+ * Returns null if no alias differs or the account doesn't support the alias.
937
+ */
938
+ pickAliasFallbackCandidate(account, candidates) {
939
+ const aliasCandidates = candidates.map((candidate) => {
940
+ const modelId = resolveModelAlias(candidate.modelId);
941
+ if (modelId === candidate.modelId) return candidate;
942
+ return {
943
+ ...candidate,
944
+ modelId
945
+ };
946
+ });
947
+ if (!aliasCandidates.some((candidate, index) => candidate.modelId !== candidates[index].modelId)) return null;
948
+ return this.pickSupportedCandidate(account, aliasCandidates);
949
+ }
950
+ /**
951
+ * Validate quota for an affinity candidate. Free models pass immediately;
952
+ * premium models go through quota refresh / reservation.
953
+ */
954
+ async validateAffinityQuota(account, supported) {
955
+ const { candidate, model } = supported;
956
+ const costUnits = getCostUnits(model);
957
+ if (costUnits <= 0) return {
958
+ ok: true,
959
+ account,
960
+ selectedModel: model,
961
+ endpoint: candidate.endpoint,
962
+ costUnits
963
+ };
964
+ if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
965
+ if (this.isAccountFailed(account)) return null;
966
+ if (account.unlimited) return {
967
+ ok: true,
968
+ account,
969
+ selectedModel: model,
970
+ endpoint: candidate.endpoint,
971
+ costUnits
972
+ };
973
+ const effectiveRemaining = getEffectivePremiumRemaining(account);
974
+ if (effectiveRemaining !== void 0 && effectiveRemaining < costUnits) return null;
975
+ const reservation = reservePremiumUnits(account, costUnits);
976
+ return {
977
+ ok: true,
978
+ account,
979
+ selectedModel: model,
980
+ endpoint: candidate.endpoint,
981
+ costUnits,
982
+ reservation
983
+ };
984
+ }
985
+ /**
868
986
  * Select an available account for a specific request (model + endpoint).
987
+ * When account affinity is enabled, routes to the previously successful account
988
+ * for the same affinity key + model combination.
869
989
  * Uses reservation to avoid oversubscribing premium quota under concurrency.
870
990
  */
871
- async selectAccountForRequest(candidates) {
991
+ async selectAccountForRequest(candidates, affinityContext) {
872
992
  if (candidates.length === 0) throw new Error("selectAccountForRequest requires at least one candidate");
873
993
  const orderedAccounts = [...this.temporaryAccount ? [this.temporaryAccount] : [], ...this.accountOrder.map((id) => this.accounts.get(id)).filter((account) => account !== void 0)];
994
+ const affinityKey = this.accountAffinityEnabled && affinityContext ? extractAffinityKey(affinityContext) : void 0;
995
+ const modelKey = candidates[0].modelId;
996
+ const cacheKey = affinityKey ? buildAffinityCacheKey(affinityKey, modelKey) : void 0;
997
+ if (cacheKey) {
998
+ const preferredId = this.affinityCache.get(cacheKey);
999
+ if (preferredId) {
1000
+ const affinityResult = await this.tryAffinityAccount(preferredId, orderedAccounts, candidates);
1001
+ if (affinityResult) {
1002
+ affinityResult.affinityHit = true;
1003
+ affinityResult.affinityCacheKey = cacheKey;
1004
+ affinityResult.confirmAffinity = () => {
1005
+ if (!this.accountAffinityEnabled) return;
1006
+ this.affinityCache.set(cacheKey, affinityResult.account.id);
1007
+ };
1008
+ return affinityResult;
1009
+ }
1010
+ }
1011
+ }
1012
+ const accountsForSelection = this.accountAffinityEnabled && orderedAccounts.length > 1 ? this.rotateAccounts(orderedAccounts) : orderedAccounts;
1013
+ const result = await this.selectWithAliasFallback(accountsForSelection, candidates);
1014
+ if (result.ok) this.loadBalanceCursor++;
1015
+ if (result.ok && cacheKey) {
1016
+ const successResult = result;
1017
+ successResult.confirmAffinity = () => {
1018
+ if (!this.accountAffinityEnabled) return;
1019
+ this.affinityCache.set(cacheKey, successResult.account.id);
1020
+ };
1021
+ }
1022
+ return result;
1023
+ }
1024
+ /**
1025
+ * Rotate the accounts array by the current load-balance cursor for round-robin distribution.
1026
+ * This ensures cache-miss requests are spread across accounts instead of always hitting the first.
1027
+ */
1028
+ rotateAccounts(accounts) {
1029
+ const start = this.loadBalanceCursor % accounts.length;
1030
+ if (start === 0) return accounts;
1031
+ return [...accounts.slice(start), ...accounts.slice(0, start)];
1032
+ }
1033
+ /**
1034
+ * Normal account selection with alias fallback.
1035
+ * Extracted to keep selectAccountForRequest readable after adding affinity logic.
1036
+ */
1037
+ async selectWithAliasFallback(orderedAccounts, candidates) {
874
1038
  const primary = await this.selectAccountForCandidates(orderedAccounts, candidates);
875
1039
  if (primary.ok || primary.reason !== "MODEL_NOT_SUPPORTED") return primary;
876
1040
  const aliasCandidates = candidates.map((candidate) => {
@@ -1064,7 +1228,7 @@ var AccountsManager = class {
1064
1228
  for (const meta of newMetas) if (!currentIds.has(meta.id)) await this.addNewAccount(meta, added);
1065
1229
  await this.reinitializeUpdatedAccounts(newMetas, currentIds, updated);
1066
1230
  this.accountOrder = newMetas.map((m) => m.id).filter((id) => this.accounts.has(id));
1067
- this.freeModelCursor = 0;
1231
+ this.loadBalanceCursor = 0;
1068
1232
  this.logRegistryReloadChanges(added, removed, updated);
1069
1233
  } catch (error) {
1070
1234
  consola.error("Failed to reload registry:", error);
@@ -1169,6 +1333,8 @@ var AccountsManager = class {
1169
1333
  this.stopRegistryWatcher();
1170
1334
  this.stopAllTokenRefresh();
1171
1335
  this.stopModelsRefresh();
1336
+ this.affinityCache.clear();
1337
+ this.loadBalanceCursor = 0;
1172
1338
  this.accounts.clear();
1173
1339
  this.accountOrder = [];
1174
1340
  this.temporaryAccount = void 0;
@@ -1178,5 +1344,5 @@ var AccountsManager = class {
1178
1344
  const accountsManager = new AccountsManager();
1179
1345
 
1180
1346
  //#endregion
1181
- export { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getAnthropicApiKey, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isForceAgentEnabled, isFreeModelLoadBalancingEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, mergeConfigWithDefaults, shouldCompactUseSmallModel };
1182
- //# sourceMappingURL=accounts-manager-BsGuQhKM.js.map
1347
+ export { isMessagesApiEnabled as _, getClaudeTokenMultiplier as a, mergeConfigWithDefaults as b, getModelAliases as c, getProviderConfig as d, getReasoningEffortForModel as f, isMessageStartInputTokensFallbackEnabled as g, isForceAgentEnabled as h, getAnthropicApiKey as i, getModelAliasesInfo as l, isAccountAffinityEnabled as m, PROVIDER_TYPE_ANTHROPIC as n, getConfig as o, getSmallModel as p, getAliasTargetSet as r, getExtraPromptForModel as s, accountsManager as t, getModelRefreshIntervalMs as u, isResponsesApiContextManagementModel as v, shouldCompactUseSmallModel as x, isResponsesApiWebSearchEnabled as y };
1348
+ //# sourceMappingURL=accounts-manager-Cjrd_el_.js.map