npm - @nick3/copilot-api - Versions diffs - 1.4.5 → 1.4.9 - Mend

@nick3/copilot-api 1.4.5 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md CHANGED Viewed

@@ -32,7 +32,7 @@ English | [中文](./README_CN.md)
 > [!IMPORTANT]
 > **Before using, please be aware of the following:**
 >
-> 1. **Claude Code model ID configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned).
+> 1. **Claude Code configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned). Example claude `settings.json` see [Manual Configuration with `settings.json`](#manual-configuration-with-settingsjson).
 >
 > 2. **Recommend for Opencode:** When using with opencode, we recommend starting with the opencode OAuth app. This approach behaves identically to opencode's built-in GitHub Copilot provider with no Terms of Service risk:
 >    ```sh
@@ -348,7 +348,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
       "gpt-5.4": "<built-in commentary prompt>"
     },
     "smallModel": "gpt-5-mini",
-    "freeModelLoadBalancing": true,
+    "accountAffinity": true,
     "responsesApiContextManagementModels": [],
     "modelReasoningEfforts": {
       "gpt-5-mini": "low",
@@ -361,7 +361,8 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
     "compactUseSmallModel": true,
     "messageStartInputTokensFallback": false,
     "modelRefreshIntervalHours": 24,
-    "useMessagesApi": true
+    "useMessagesApi": true,
+    "useResponsesApiWebSearch": true
   }
   ```
 - **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
@@ -377,7 +378,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
     - `topK` (optional): Default top_k value used when the request does not specify one.
 - **responsesApiContextManagementModels:** List of model IDs that should receive Responses API `context_management` compaction instructions. Use this when a model supports server-side context management and you want the proxy to keep only the latest compaction carrier on follow-up turns.
 - **smallModel:** Fallback model used for tool-less warmup messages, compact/background requests, and other short housekeeping turns (for example from Claude Code or OpenCode) to avoid spending premium requests; defaults to `gpt-5-mini`. If original names are blocked and this points to an aliased target, it resolves to the preferred alias.
-- **freeModelLoadBalancing:** Enable round-robin routing for free-model requests across multiple accounts. Defaults to `true`. Set to `false` to route free-model requests sequentially (same ordering strategy as premium models).
+- **accountAffinity:** Enable sticky account routing based on session identity. When enabled, requests from the same session for the same model are routed to the account that last handled them successfully. Applies to both free and premium models. Defaults to `true`. Set to `false` to use sequential routing for all models.
 - **apiKey (deprecated):** Legacy single-key field kept for migration compatibility. Prefer `auth.apiKeys`. When `auth.apiKeys` is empty, the server falls back to `COPILOT_API_KEY` and then `apiKey`.
 - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
 - **modelAliases:** Map of `alias -> { target, allowOriginal? }` (legacy string values are still accepted). Alias keys are normalized (trim + lowercase) and must be non-empty; aliases cannot map to themselves (case-insensitive), and conflicting normalized aliases are rejected. `allowOriginal` overrides the global default per alias. If multiple aliases map to the same target, original names are allowed when any alias sets `allowOriginal: true` (allow-wins). Admin UI/API rejects blocked keys (`__proto__`, `constructor`, `prototype`). Aliases can be used in downstream requests.
@@ -388,6 +389,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
 - **messageStartInputTokensFallback:** When `true`, the Anthropic streaming translation layer estimates `message_start.input_tokens` when upstream stream events do not provide it. Defaults to `false`.
 - **modelRefreshIntervalHours:** Interval for refreshing account model lists in the background. Set to `0` to disable refresh. Defaults to `24`.
 - **useMessagesApi:** When `true` (default), Claude-family models that support Copilot's native `/v1/messages` endpoint may use the Messages API path. Set to `false` to skip the Messages API candidate and fall back to `/responses` (if supported) or `/chat/completions`.
+- **useResponsesApiWebSearch:** When `true` (default), `/v1/responses` keeps tools with `type: "web_search"` and forwards them upstream. Set to `false` to strip them before the Copilot request is sent.
 - **anthropicApiKey:** Optional Anthropic API key used for accurate Claude token counting (see [Accurate Claude Token Counting](#accurate-claude-token-counting) below). Can also be set via the `ANTHROPIC_API_KEY` environment variable. If not set, token counting falls back to GPT tokenizer estimation.
 Edit this file to customize prompts or swap in your own fast model. If you edit it manually, restart the server (or call `GET /api/admin/config`) so the cached config is refreshed. Changes made through the Admin UI/API are validated, written to disk, and applied immediately; unknown keys are rejected.
@@ -607,7 +609,7 @@ OpenCode already has a direct GitHub Copilot provider. Use this section when you
 Start the proxy with the OpenCode OAuth app:
 ```sh
-COPILOT_API_OAUTH_APP=opencode npx @nick3/copilot-api@latest start
+npx @nick3/copilot-api@latest --oauth-app=opencode start
 ```
 Then point OpenCode at the proxy with `@ai-sdk/anthropic`.
@@ -803,6 +805,12 @@ Here is an example `.claude/settings.json` file:
 }
 ```
+- Replace `ANTHROPIC_MODEL`, `ANTHROPIC_DEFAULT_OPUS_MODEL`, `ANTHROPIC_DEFAULT_SONNET_MODEL`, and `ANTHROPIC_DEFAULT_HAIKU_MODEL` according to your needs. It is recommended to use gpt-5-mini for ANTHROPIC_DEFAULT_HAIKU_MODEL, as gpt-5-mini does not consume quota. ANTHROPIC_DEFAULT_HAIKU_MODEL is typically used for title generation, explore agents, etc.
+- Setting CLAUDE_CODE_ATTRIBUTION_HEADER to 0 can prevent Claude code from adding billing and version information in system prompts, thereby avoiding prompt cache invalidation.
+- Turning off CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION can prevent quota from being consumed unnecessarily.
+- If you want to disable Claude Code WebSearch, deny `WebSearch` in permissions or set `useResponsesApiWebSearch` to `false` in `config.json`. When enabled, `/v1/responses` can forward `web_search` tools upstream, but actual support still depends on the selected model and Copilot behavior.
+- Please do not enable `ENABLE_TOOL_SEARCH`, as the current Claude Code uses the client tool search mode. In this mode, loading defer tools requires an additional request each time, and cache hit rates are affected, so it does not necessarily save tokens. Only server tool search mode can save tokens. The current project has compatibility issues with client tool search mode, which can also cause errors when used.
 ### CLAUDE.md or AGENTS.md Recommended Content
 To add these reminders manually, include the following in `CLAUDE.md` for Claude Code, or `AGENTS.md` for opencode/codex:
@@ -895,5 +903,5 @@ bun run start
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
 - **Multi-account request routing**: Add multiple GitHub Copilot accounts using `auth add`.
   - **Premium models**: Accounts are tried in the order they were added. When an account's premium request quota (`remaining=0`) is exhausted (or insufficient for the selected model), the proxy automatically switches to the next eligible account.
-  - **Free models**: By default, requests are distributed round-robin across all eligible accounts (including the temporary account created via `start --github-token ...`). Set `freeModelLoadBalancing=false` in `config.json` to disable this and route free-model requests sequentially.
+  - **Free models**: When `accountAffinity=true`, requests with the same affinity key and model stick to the account that last handled them successfully. Affinity misses fall back to the first available eligible account. Set `accountAffinity=false` in `config.json` to disable affinity and route all requests sequentially.
   - **Model classification**: Based on Copilot model metadata (`billing.is_premium` / `billing.multiplier`). Missing billing info or `billing.is_premium !== true` is treated as free.

package/dist/{account-DhQb2A6q.js → account-CipKmikF.js} RENAMED Viewed

@@ -13,5 +13,5 @@ function parseAccountType(value) {
 }
 //#endregion
-export { parseAccountType };
-//# sourceMappingURL=account-DhQb2A6q.js.map
+export { parseAccountType as t };
+//# sourceMappingURL=account-CipKmikF.js.map

package/dist/{account-DhQb2A6q.js.map → account-CipKmikF.js.map} RENAMED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"account-~~DhQb2A6q~~.js","names":["ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType>"],"sources":["../src/lib/types/account.ts"],"sourcesContent":["import type { ModelsResponse } from \"~/services/copilot/get-models\"\n\n/*\n Account type for GitHub Copilot subscription.\n /\nexport type AccountType = \"individual\" \| \"business\" \| \"enterprise\"\n\nexport const ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType> = [\n \"individual\",\n \"business\",\n \"enterprise\",\n]\n\nexport function isAccountType(value: unknown): value is AccountType {\n return (\n typeof value === \"string\"\n && (ACCOUNT_TYPE_VALUES as ReadonlyArray<string>).includes(value)\n )\n}\n\nexport function parseAccountType(value: unknown): AccountType {\n if (!isAccountType(value)) {\n throw new Error(\n `Invalid account type: ${String(value)}. Valid values: ${ACCOUNT_TYPE_VALUES.join(\n \", \",\n )}`,\n )\n }\n return value\n}\n\n/\n Metadata for a registered account, stored in the registry file.\n /\nexport interface AccountMeta {\n /* GitHub login (username) /\n id: string\n /* Account subscription type /\n accountType: AccountType\n /* Timestamp when the account was added /\n addedAt: number\n}\n\n/\n Registry file structure for storing account metadata.\n /\nexport interface AccountRegistry {\n /* Schema version for future migrations /\n version: 1\n /* Ordered list of accounts (order = priority) /\n accounts: Array<AccountMeta>\n}\n\n/\n Runtime state for an account, including tokens and quota information.\n /\nexport interface AccountRuntime extends AccountMeta {\n /* GitHub personal access token /\n githubToken: string\n /* Copilot API token (obtained from GitHub) /\n copilotToken?: string\n /* VS Code version for API headers /\n vsCodeVersion?: string\n /* Cached available models for this account /\n models?: ModelsResponse\n /* Timestamp of last models fetch /\n lastModelsFetch?: number\n /* Whether models refresh is in progress /\n isRefreshingModels?: boolean\n /* Promise for an in-flight models refresh /\n modelsRefreshPromise?: Promise<void>\n /* Total premium interactions quota entitlement /\n premiumEntitlement?: number\n /* Remaining premium interactions quota /\n premiumRemaining?: number\n /* Reserved premium interaction units for in-flight requests /\n premiumReserved?: number\n /* Internal reservation map for idempotent release /\n premiumReservations?: Map<symbol, number>\n /* Whether this account has unlimited quota /\n unlimited?: boolean\n /* Whether this account allows overage billing (enterprise feature) /\n overagePermitted?: boolean\n /* Timestamp of last quota fetch /\n lastQuotaFetch?: number\n /* Token refresh timer reference /\n refreshTimer?: ReturnType<typeof setInterval>\n /* Whether this account has failed (e.g., 401 error) /\n failed?: boolean\n /* Failure reason if failed /\n failureReason?: string\n /* Whether quota refresh is in progress (prevents concurrent refreshes) /\n isRefreshingQuota?: boolean\n /* Promise for an in-flight quota refresh (allows concurrent callers to await the same refresh) /\n quotaRefreshPromise?: Promise<void>\n}\n\n/\n Context required for making API calls on behalf of an account.\n * This is a subset of AccountRuntime used by service functions.\n /\nexport interface AccountContext {\n /* GitHub personal access token /\n githubToken: string\n /* Copilot API token /\n copilotToken?: string\n /* Account subscription type /\n accountType: AccountType\n /* VS Code version for API headers */\n vsCodeVersion?: string\n}\n"],"mappings":";AAOA,MAAaA,sBAAkD;CAC7D;CACA;CACA;CACD;AAED,SAAgB,cAAc,OAAsC;AAClE,QACE,OAAO,UAAU,YACb,oBAA8C,SAAS,MAAM;;AAIrE,SAAgB,iBAAiB,OAA6B;AAC5D,KAAI,CAAC,cAAc,MAAM,CACvB,OAAM,IAAI,MACR,yBAAyB,OAAO,MAAM,CAAC,kBAAkB,oBAAoB,KAC3E,KACD,GACF;AAEH,QAAO"}
1	+ {"version":3,"file":"account-CipKmikF.js","names":["ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType>"],"sources":["../src/lib/types/account.ts"],"sourcesContent":["import type { ModelsResponse } from \"~/services/copilot/get-models\"\n\n/*\n Account type for GitHub Copilot subscription.\n /\nexport type AccountType = \"individual\" \| \"business\" \| \"enterprise\"\n\nexport const ACCOUNT_TYPE_VALUES: ReadonlyArray<AccountType> = [\n \"individual\",\n \"business\",\n \"enterprise\",\n]\n\nexport function isAccountType(value: unknown): value is AccountType {\n return (\n typeof value === \"string\"\n && (ACCOUNT_TYPE_VALUES as ReadonlyArray<string>).includes(value)\n )\n}\n\nexport function parseAccountType(value: unknown): AccountType {\n if (!isAccountType(value)) {\n throw new Error(\n `Invalid account type: ${String(value)}. Valid values: ${ACCOUNT_TYPE_VALUES.join(\n \", \",\n )}`,\n )\n }\n return value\n}\n\n/\n Metadata for a registered account, stored in the registry file.\n /\nexport interface AccountMeta {\n /* GitHub login (username) /\n id: string\n /* Account subscription type /\n accountType: AccountType\n /* Timestamp when the account was added /\n addedAt: number\n}\n\n/\n Registry file structure for storing account metadata.\n /\nexport interface AccountRegistry {\n /* Schema version for future migrations /\n version: 1\n /* Ordered list of accounts (order = priority) /\n accounts: Array<AccountMeta>\n}\n\n/\n Runtime state for an account, including tokens and quota information.\n /\nexport interface AccountRuntime extends AccountMeta {\n /* GitHub personal access token /\n githubToken: string\n /* Copilot API token (obtained from GitHub) /\n copilotToken?: string\n /* VS Code version for API headers /\n vsCodeVersion?: string\n /* Cached available models for this account /\n models?: ModelsResponse\n /* Timestamp of last models fetch /\n lastModelsFetch?: number\n /* Whether models refresh is in progress /\n isRefreshingModels?: boolean\n /* Promise for an in-flight models refresh /\n modelsRefreshPromise?: Promise<void>\n /* Total premium interactions quota entitlement /\n premiumEntitlement?: number\n /* Remaining premium interactions quota /\n premiumRemaining?: number\n /* Reserved premium interaction units for in-flight requests /\n premiumReserved?: number\n /* Internal reservation map for idempotent release /\n premiumReservations?: Map<symbol, number>\n /* Whether this account has unlimited quota /\n unlimited?: boolean\n /* Whether this account allows overage billing (enterprise feature) /\n overagePermitted?: boolean\n /* Timestamp of last quota fetch /\n lastQuotaFetch?: number\n /* Token refresh timer reference /\n refreshTimer?: ReturnType<typeof setInterval>\n /* Whether this account has failed (e.g., 401 error) /\n failed?: boolean\n /* Failure reason if failed /\n failureReason?: string\n /* Whether quota refresh is in progress (prevents concurrent refreshes) /\n isRefreshingQuota?: boolean\n /* Promise for an in-flight quota refresh (allows concurrent callers to await the same refresh) /\n quotaRefreshPromise?: Promise<void>\n}\n\n/\n Context required for making API calls on behalf of an account.\n * This is a subset of AccountRuntime used by service functions.\n /\nexport interface AccountContext {\n /* GitHub personal access token /\n githubToken: string\n /* Copilot API token /\n copilotToken?: string\n /* Account subscription type /\n accountType: AccountType\n /* VS Code version for API headers */\n vsCodeVersion?: string\n}\n"],"mappings":";AAOA,MAAaA,sBAAkD;CAC7D;CACA;CACA;CACD;AAED,SAAgB,cAAc,OAAsC;AAClE,QACE,OAAO,UAAU,YACb,oBAA8C,SAAS,MAAM;;AAIrE,SAAgB,iBAAiB,OAA6B;AAC5D,KAAI,CAAC,cAAc,MAAM,CACvB,OAAM,IAAI,MACR,yBAAyB,OAAO,MAAM,CAAC,kBAAkB,oBAAoB,KAC3E,KACD,GACF;AAEH,QAAO"}

package/dist/{accounts-manager-BsGuQhKM.js → accounts-manager-Cjrd_el_.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { PATHS } from "./paths-DoT4SZ8f.js";
-import { addAccountToRegistry, hasLegacyToken, hasRegistry, listAccountsFromRegistry, loadAccountToken, readLegacyToken, saveAccountToken } from "./accounts-registry-c7rs5Ed9.js";
-import { HTTPError, getCopilotUsage, getGitHubUser, getModels } from "./utils-BIK3ym34.js";
-import { getCopilotToken } from "./get-copilot-token-BySQCue6.js";
+import { t as PATHS } from "./paths-Cvzy-eLX.js";
+import { a as loadAccountToken, i as listAccountsFromRegistry, l as saveAccountToken, n as hasLegacyToken, o as readLegacyToken, r as hasRegistry, t as addAccountToRegistry } from "./accounts-registry-CQYvRe65.js";
+import { d as getModels, f as getGitHubUser, m as HTTPError, p as getCopilotUsage } from "./utils-DY-jLXwO.js";
+import { t as getCopilotToken } from "./get-copilot-token-BbpphnmV.js";
 import consola from "consola";
 import fs from "node:fs";
@@ -42,7 +42,7 @@ const defaultConfig = {
 		"gpt-5.4": gpt5CommentaryPrompt
 	},
 	smallModel: "gpt-5-mini",
-	freeModelLoadBalancing: true,
+	accountAffinity: true,
 	responsesApiContextManagementModels: [],
 	modelReasoningEfforts: {
 		"gpt-5-mini": "low",
@@ -55,7 +55,8 @@ const defaultConfig = {
 	compactUseSmallModel: true,
 	messageStartInputTokensFallback: false,
 	modelRefreshIntervalHours: 24,
-	useMessagesApi: true
+	useMessagesApi: true,
+	useResponsesApiWebSearch: true
 };
 let cachedConfig = null;
 function isPlainObject(value) {
@@ -128,8 +129,7 @@ function mergeDefaultConfig(config) {
 }
 function mergeDefaultAuth(config) {
 	const authConfig = isPlainObject(config.auth) ? config.auth : void 0;
-	const rawApiKeys = Array.isArray(authConfig?.apiKeys) ? authConfig.apiKeys : void 0;
-	const nextAuth = { apiKeys: normalizeAuthApiKeys(rawApiKeys) };
+	const nextAuth = { apiKeys: normalizeAuthApiKeys(Array.isArray(authConfig?.apiKeys) ? authConfig.apiKeys : void 0) };
 	if (authConfig && JSON.stringify(authConfig) === JSON.stringify(nextAuth)) return {
 		mergedConfig: config,
 		changed: false
@@ -142,15 +142,27 @@ function mergeDefaultAuth(config) {
 		changed: true
 	};
 }
-function mergeDefaultFreeModelLoadBalancing(config) {
-	if (typeof config.freeModelLoadBalancing === "boolean") return {
+function mergeDefaultAccountAffinity(config) {
+	const raw = config;
+	const hasOld = typeof raw.freeModelLoadBalancing === "boolean";
+	const hasNew = typeof config.accountAffinity === "boolean";
+	if (hasOld) {
+		const next = { ...config };
+		if (!hasNew) next.accountAffinity = raw.freeModelLoadBalancing;
+		delete next.freeModelLoadBalancing;
+		return {
+			mergedConfig: next,
+			changed: true
+		};
+	}
+	if (hasNew) return {
 		mergedConfig: config,
 		changed: false
 	};
 	return {
 		mergedConfig: {
 			...config,
-			freeModelLoadBalancing: defaultConfig.freeModelLoadBalancing ?? true
+			accountAffinity: defaultConfig.accountAffinity ?? true
 		},
 		changed: true
 	};
@@ -181,11 +193,10 @@ function applyConfigMerges(config, mergeFns) {
 	});
 }
 function mergeConfigWithDefaults() {
-	const config = readConfigFromDisk();
-	const { mergedConfig, changed } = applyConfigMerges(config, [
+	const { mergedConfig, changed } = applyConfigMerges(readConfigFromDisk(), [
 		mergeDefaultAuth,
 		mergeDefaultConfig,
-		mergeDefaultFreeModelLoadBalancing,
+		mergeDefaultAccountAffinity,
 		mergeDefaultModelRefreshInterval
 	]);
 	if (changed) try {
@@ -271,8 +282,7 @@ function isOriginalModelNameAllowedForTarget(modelId) {
 	return !getAliasTargetSet().has(normalized);
 }
 function getPreferredAliasForTarget(modelId) {
-	const aliases = getModelAliases();
-	return getAliasKeysForTarget(modelId, aliases)[0] ?? null;
+	return getAliasKeysForTarget(modelId, getModelAliases())[0] ?? null;
 }
 function getAliasKeysForTarget(target, aliases) {
 	const normalizedTarget = target.toLowerCase();
@@ -304,12 +314,11 @@ function getSmallModel() {
 	if (isOriginalModelNameAllowedForTarget(model)) return model;
 	return getPreferredAliasForTarget(model) ?? model;
 }
-function isFreeModelLoadBalancingEnabled() {
-	return getConfig().freeModelLoadBalancing ?? true;
+function isAccountAffinityEnabled() {
+	return getConfig().accountAffinity ?? true;
 }
 function getModelRefreshIntervalHours() {
-	const config = getConfig();
-	return normalizeModelRefreshIntervalHours(config.modelRefreshIntervalHours) ?? defaultConfig.modelRefreshIntervalHours ?? 24;
+	return normalizeModelRefreshIntervalHours(getConfig().modelRefreshIntervalHours) ?? defaultConfig.modelRefreshIntervalHours ?? 24;
 }
 function getModelRefreshIntervalMs() {
 	const hours = getModelRefreshIntervalHours();
@@ -372,6 +381,93 @@ function isMessagesApiEnabled() {
 function getAnthropicApiKey() {
 	return getConfig().anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? void 0;
 }
+function isResponsesApiWebSearchEnabled() {
+	return getConfig().useResponsesApiWebSearch ?? true;
+}
+function getClaudeTokenMultiplier() {
+	return getConfig().claudeTokenMultiplier ?? 1.15;
+}
+//#endregion
+//#region src/lib/account-affinity.ts
+const DEFAULT_MAX_ENTRIES = 1e4;
+const DEFAULT_TTL_MS = 3600 * 1e3;
+/**
+* In-memory LRU cache with TTL for account affinity mappings.
+*
+* Uses Map insertion order for LRU eviction: accessed/updated entries are
+* deleted and re-inserted so they move to the "newest" end.
+*/
+var AccountAffinityCache = class {
+	cache = /* @__PURE__ */ new Map();
+	maxEntries;
+	ttlMs;
+	constructor(maxEntries = DEFAULT_MAX_ENTRIES, ttlMs = DEFAULT_TTL_MS) {
+		this.maxEntries = maxEntries;
+		this.ttlMs = ttlMs;
+	}
+	/** Look up the preferred account ID for a cache key. Returns undefined if not found or expired. */
+	get(key) {
+		const entry = this.cache.get(key);
+		if (!entry) return;
+		if (Date.now() >= entry.expiresAt) {
+			this.cache.delete(key);
+			return;
+		}
+		return entry.accountId;
+	}
+	/** Record a successful account mapping. Refreshes TTL and moves the entry to the newest position. */
+	set(key, accountId) {
+		this.cache.delete(key);
+		while (this.cache.size >= this.maxEntries) {
+			const oldest = this.cache.keys().next();
+			if (oldest.done) break;
+			this.cache.delete(oldest.value);
+		}
+		this.cache.set(key, {
+			accountId,
+			expiresAt: Date.now() + this.ttlMs
+		});
+	}
+	/** Remove a specific entry. */
+	delete(key) {
+		return this.cache.delete(key);
+	}
+	/** Remove all entries. */
+	clear() {
+		this.cache.clear();
+	}
+	/** Current number of entries (including potentially expired ones). */
+	get size() {
+		return this.cache.size;
+	}
+};
+/**
+* Extract the affinity key from the request context.
+* Uses the upstream request ID which is deterministic for the same user message.
+*/
+function extractAffinityKey(context) {
+	return context.requestId?.trim() || void 0;
+}
+/**
+* Build the full cache key by combining the affinity key with the model ID.
+* This prevents cross-model pollution (same session requesting different models
+* can be routed to different accounts).
+*/
+function buildAffinityCacheKey(affinityKey, modelId) {
+	return `${affinityKey}:${modelId}`;
+}
+/**
+* Check whether an account is a valid affinity candidate.
+* An account is valid if it is not failed and is present in the provided
+* runtime list.
+*/
+function isAffinityAccountUsable(accountId, accounts) {
+	const account = accounts.find((a) => a.id === accountId);
+	if (!account) return void 0;
+	if (account.failed) return void 0;
+	return account;
+}
 //#endregion
 //#region src/lib/accounts-manager-auth.ts
@@ -486,8 +582,9 @@ var AccountsManager = class {
 	accountOrder = [];
 	temporaryAccount;
 	vsCodeVersion;
-	freeModelCursor = 0;
-	freeModelLoadBalancingEnabled = true;
+	accountAffinityEnabled = true;
+	affinityCache = new AccountAffinityCache();
+	loadBalanceCursor = 0;
 	quotaRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
 	modelsRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
 	tokenRefreshEnabledAccounts = /* @__PURE__ */ new WeakSet();
@@ -529,8 +626,9 @@ var AccountsManager = class {
 		consola.info(`Loaded ${this.accounts.size} account(s)`);
 		this.startRegistryWatcher();
 	}
-	setFreeModelLoadBalancingEnabled(enabled) {
-		this.freeModelLoadBalancingEnabled = enabled;
+	setAccountAffinityEnabled(enabled) {
+		this.accountAffinityEnabled = enabled;
+		if (!enabled) this.affinityCache.clear();
 	}
 	setModelsRefreshIntervalMs(intervalMs) {
 		this.modelsRefreshIntervalMs = Number.isFinite(intervalMs) && intervalMs > 0 ? intervalMs : 0;
@@ -545,8 +643,7 @@ var AccountsManager = class {
 	async runTokenRefreshTick(account, snapshot, refreshInSeconds) {
 		if (!this.shouldContinueTokenRefresh(account, snapshot)) return;
 		try {
-			const ctx = toAccountContextFromSnapshot(account, snapshot);
-			const { token, refresh_in } = await getCopilotToken(ctx);
+			const { token, refresh_in } = await getCopilotToken(toAccountContextFromSnapshot(account, snapshot));
 			if (!this.shouldContinueTokenRefresh(account, snapshot)) return;
 			if (!applyTokenRefreshSuccessIfCurrent(account, snapshot, token)) return;
 			consola.debug(`Refreshed token for account ${account.id}`);
@@ -570,13 +667,10 @@ var AccountsManager = class {
 	async initializeAccount(account) {
 		const snapshot = takeAuthSnapshot(account);
 		try {
-			const tokenCtx = toAccountContextFromSnapshot(account, snapshot);
-			const { token, refresh_in } = await getCopilotToken(tokenCtx);
+			const { token, refresh_in } = await getCopilotToken(toAccountContextFromSnapshot(account, snapshot));
 			if (!applyCopilotTokenIfCurrent(account, snapshot, token)) return;
 			this.startTokenRefresh(account, refresh_in);
-			const modelsCtx = toAccountContextFromSnapshot(account, snapshot, token);
-			const models = await getModels(modelsCtx);
-			if (!applyModelsIfCurrent(account, snapshot, models)) return;
+			if (!applyModelsIfCurrent(account, snapshot, await getModels(toAccountContextFromSnapshot(account, snapshot, token)))) return;
 			account.lastModelsFetch = Date.now();
 			await this.refreshQuota(account);
 			consola.debug(`Account ${account.id} initialized`);
@@ -663,8 +757,7 @@ var AccountsManager = class {
 		}
 		const snapshot = takeAuthSnapshot(account);
 		if (account.modelsRefreshPromise) {
-			const existingSnapshot = this.modelsRefreshSnapshotByAccount.get(account);
-			if (isSameAuthSnapshot(existingSnapshot, snapshot)) {
+			if (isSameAuthSnapshot(this.modelsRefreshSnapshotByAccount.get(account), snapshot)) {
 				await account.modelsRefreshPromise;
 				return;
 			}
@@ -673,8 +766,7 @@ var AccountsManager = class {
 		const ctx = toAccountContextFromSnapshot(account, snapshot, account.copilotToken);
 		const promise = (async () => {
 			try {
-				const models = await getModels(ctx);
-				if (applyModelsIfCurrent(account, snapshot, models)) account.lastModelsFetch = Date.now();
+				if (applyModelsIfCurrent(account, snapshot, await getModels(ctx))) account.lastModelsFetch = Date.now();
 			} catch (error) {
 				if (error instanceof HTTPError && error.response.status === 401) {
 					applyUnauthorizedIfCurrent(account, snapshot, "Unauthorized (401)");
@@ -704,8 +796,7 @@ var AccountsManager = class {
 	async refreshQuota(account) {
 		const snapshot = takeAuthSnapshot(account);
 		if (account.quotaRefreshPromise) {
-			const existingSnapshot = this.quotaRefreshSnapshotByAccount.get(account);
-			if (isSameAuthSnapshot(existingSnapshot, snapshot)) {
+			if (isSameAuthSnapshot(this.quotaRefreshSnapshotByAccount.get(account), snapshot)) {
 				await account.quotaRefreshPromise;
 				return;
 			}
@@ -770,38 +861,6 @@ var AccountsManager = class {
 		}
 		return null;
 	}
-	selectFreeAccountForRequest(orderedAccounts, candidates) {
-		const count = orderedAccounts.length;
-		const start = this.freeModelCursor % count;
-		let supportedCandidateFound = false;
-		for (let i = 0; i < count; i++) {
-			const idx = (start + i) % count;
-			const account = orderedAccounts[idx];
-			if (this.isAccountFailed(account)) continue;
-			const supported = this.pickSupportedCandidate(account, candidates);
-			if (!supported) continue;
-			supportedCandidateFound = true;
-			const { candidate, model } = supported;
-			const costUnits = getCostUnits(model);
-			if (costUnits > 0) continue;
-			this.freeModelCursor = (idx + 1) % count;
-			return {
-				ok: true,
-				account,
-				selectedModel: model,
-				endpoint: candidate.endpoint,
-				costUnits
-			};
-		}
-		if (!supportedCandidateFound) return {
-			ok: false,
-			reason: "MODEL_NOT_SUPPORTED"
-		};
-		return {
-			ok: false,
-			reason: "NO_QUOTA"
-		};
-	}
 	async selectAccountForCandidates(orderedAccounts, candidates) {
 		if (orderedAccounts.length === 0) return {
 			ok: false,
@@ -816,16 +875,13 @@ var AccountsManager = class {
 			supportedCandidateFound = true;
 			const { candidate, model } = supported;
 			const costUnits = getCostUnits(model);
-			if (costUnits <= 0) {
-				if (this.freeModelLoadBalancingEnabled) return this.selectFreeAccountForRequest(orderedAccounts, candidates);
-				return {
-					ok: true,
-					account,
-					selectedModel: model,
-					endpoint: candidate.endpoint,
-					costUnits
-				};
-			}
+			if (costUnits <= 0) return {
+				ok: true,
+				account,
+				selectedModel: model,
+				endpoint: candidate.endpoint,
+				costUnits
+			};
 			if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
 			if (this.isAccountFailed(account)) continue;
 			if (account.unlimited) return {
@@ -865,12 +921,120 @@ var AccountsManager = class {
 		};
 	}
 	/**
+	* Try to use a preferred (affinity) account for the request.
+	* Returns a successful selection if the account is usable; null otherwise.
+	*/
+	async tryAffinityAccount(preferredAccountId, orderedAccounts, candidates) {
+		const account = isAffinityAccountUsable(preferredAccountId, orderedAccounts);
+		if (!account) return null;
+		const supported = this.pickSupportedCandidate(account, candidates) ?? this.pickAliasFallbackCandidate(account, candidates);
+		if (!supported) return null;
+		return this.validateAffinityQuota(account, supported);
+	}
+	/**
+	* Resolve model aliases and try to pick a supported candidate.
+	* Returns null if no alias differs or the account doesn't support the alias.
+	*/
+	pickAliasFallbackCandidate(account, candidates) {
+		const aliasCandidates = candidates.map((candidate) => {
+			const modelId = resolveModelAlias(candidate.modelId);
+			if (modelId === candidate.modelId) return candidate;
+			return {
+				...candidate,
+				modelId
+			};
+		});
+		if (!aliasCandidates.some((candidate, index) => candidate.modelId !== candidates[index].modelId)) return null;
+		return this.pickSupportedCandidate(account, aliasCandidates);
+	}
+	/**
+	* Validate quota for an affinity candidate. Free models pass immediately;
+	* premium models go through quota refresh / reservation.
+	*/
+	async validateAffinityQuota(account, supported) {
+		const { candidate, model } = supported;
+		const costUnits = getCostUnits(model);
+		if (costUnits <= 0) return {
+			ok: true,
+			account,
+			selectedModel: model,
+			endpoint: candidate.endpoint,
+			costUnits
+		};
+		if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
+		if (this.isAccountFailed(account)) return null;
+		if (account.unlimited) return {
+			ok: true,
+			account,
+			selectedModel: model,
+			endpoint: candidate.endpoint,
+			costUnits
+		};
+		const effectiveRemaining = getEffectivePremiumRemaining(account);
+		if (effectiveRemaining !== void 0 && effectiveRemaining < costUnits) return null;
+		const reservation = reservePremiumUnits(account, costUnits);
+		return {
+			ok: true,
+			account,
+			selectedModel: model,
+			endpoint: candidate.endpoint,
+			costUnits,
+			reservation
+		};
+	}
+	/**
 	* Select an available account for a specific request (model + endpoint).
+	* When account affinity is enabled, routes to the previously successful account
+	* for the same affinity key + model combination.
 	* Uses reservation to avoid oversubscribing premium quota under concurrency.
 	*/
-	async selectAccountForRequest(candidates) {
+	async selectAccountForRequest(candidates, affinityContext) {
 		if (candidates.length === 0) throw new Error("selectAccountForRequest requires at least one candidate");
 		const orderedAccounts = [...this.temporaryAccount ? [this.temporaryAccount] : [], ...this.accountOrder.map((id) => this.accounts.get(id)).filter((account) => account !== void 0)];
+		const affinityKey = this.accountAffinityEnabled && affinityContext ? extractAffinityKey(affinityContext) : void 0;
+		const modelKey = candidates[0].modelId;
+		const cacheKey = affinityKey ? buildAffinityCacheKey(affinityKey, modelKey) : void 0;
+		if (cacheKey) {
+			const preferredId = this.affinityCache.get(cacheKey);
+			if (preferredId) {
+				const affinityResult = await this.tryAffinityAccount(preferredId, orderedAccounts, candidates);
+				if (affinityResult) {
+					affinityResult.affinityHit = true;
+					affinityResult.affinityCacheKey = cacheKey;
+					affinityResult.confirmAffinity = () => {
+						if (!this.accountAffinityEnabled) return;
+						this.affinityCache.set(cacheKey, affinityResult.account.id);
+					};
+					return affinityResult;
+				}
+			}
+		}
+		const accountsForSelection = this.accountAffinityEnabled && orderedAccounts.length > 1 ? this.rotateAccounts(orderedAccounts) : orderedAccounts;
+		const result = await this.selectWithAliasFallback(accountsForSelection, candidates);
+		if (result.ok) this.loadBalanceCursor++;
+		if (result.ok && cacheKey) {
+			const successResult = result;
+			successResult.confirmAffinity = () => {
+				if (!this.accountAffinityEnabled) return;
+				this.affinityCache.set(cacheKey, successResult.account.id);
+			};
+		}
+		return result;
+	}
+	/**
+	* Rotate the accounts array by the current load-balance cursor for round-robin distribution.
+	* This ensures cache-miss requests are spread across accounts instead of always hitting the first.
+	*/
+	rotateAccounts(accounts) {
+		const start = this.loadBalanceCursor % accounts.length;
+		if (start === 0) return accounts;
+		return [...accounts.slice(start), ...accounts.slice(0, start)];
+	}
+	/**
+	* Normal account selection with alias fallback.
+	* Extracted to keep selectAccountForRequest readable after adding affinity logic.
+	*/
+	async selectWithAliasFallback(orderedAccounts, candidates) {
 		const primary = await this.selectAccountForCandidates(orderedAccounts, candidates);
 		if (primary.ok || primary.reason !== "MODEL_NOT_SUPPORTED") return primary;
 		const aliasCandidates = candidates.map((candidate) => {
@@ -1064,7 +1228,7 @@ var AccountsManager = class {
 			for (const meta of newMetas) if (!currentIds.has(meta.id)) await this.addNewAccount(meta, added);
 			await this.reinitializeUpdatedAccounts(newMetas, currentIds, updated);
 			this.accountOrder = newMetas.map((m) => m.id).filter((id) => this.accounts.has(id));
-			this.freeModelCursor = 0;
+			this.loadBalanceCursor = 0;
 			this.logRegistryReloadChanges(added, removed, updated);
 		} catch (error) {
 			consola.error("Failed to reload registry:", error);
@@ -1169,6 +1333,8 @@ var AccountsManager = class {
 		this.stopRegistryWatcher();
 		this.stopAllTokenRefresh();
 		this.stopModelsRefresh();
+		this.affinityCache.clear();
+		this.loadBalanceCursor = 0;
 		this.accounts.clear();
 		this.accountOrder = [];
 		this.temporaryAccount = void 0;
@@ -1178,5 +1344,5 @@ var AccountsManager = class {
 const accountsManager = new AccountsManager();
 //#endregion
-export { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getAnthropicApiKey, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isForceAgentEnabled, isFreeModelLoadBalancingEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, mergeConfigWithDefaults, shouldCompactUseSmallModel };
-//# sourceMappingURL=accounts-manager-BsGuQhKM.js.map
+export { isMessagesApiEnabled as _, getClaudeTokenMultiplier as a, mergeConfigWithDefaults as b, getModelAliases as c, getProviderConfig as d, getReasoningEffortForModel as f, isMessageStartInputTokensFallbackEnabled as g, isForceAgentEnabled as h, getAnthropicApiKey as i, getModelAliasesInfo as l, isAccountAffinityEnabled as m, PROVIDER_TYPE_ANTHROPIC as n, getConfig as o, getSmallModel as p, getAliasTargetSet as r, getExtraPromptForModel as s, accountsManager as t, getModelRefreshIntervalMs as u, isResponsesApiContextManagementModel as v, shouldCompactUseSmallModel as x, isResponsesApiWebSearchEnabled as y };
+//# sourceMappingURL=accounts-manager-Cjrd_el_.js.map