@nick3/copilot-api 1.4.5 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/README.md +14 -6
  2. package/dist/{accounts-manager-BsGuQhKM.js → accounts-manager-B8Y2Y4ab.js} +241 -60
  3. package/dist/accounts-manager-B8Y2Y4ab.js.map +1 -0
  4. package/dist/admin/assets/index-BFvCJZIK.js +57 -0
  5. package/dist/admin/assets/index-CsAeel_7.css +1 -0
  6. package/dist/admin/index.html +2 -2
  7. package/dist/{auth-BAEHgP-a.js → auth-Bt0FGr8C.js} +3 -3
  8. package/dist/{auth-BAEHgP-a.js.map → auth-Bt0FGr8C.js.map} +1 -1
  9. package/dist/{check-usage-Dbthad7V.js → check-usage-By-sJOch.js} +4 -4
  10. package/dist/{check-usage-Dbthad7V.js.map → check-usage-By-sJOch.js.map} +1 -1
  11. package/dist/{get-copilot-token-BySQCue6.js → get-copilot-token-Q4FXtOE9.js} +2 -2
  12. package/dist/{get-copilot-token-BySQCue6.js.map → get-copilot-token-Q4FXtOE9.js.map} +1 -1
  13. package/dist/main.js +3 -3
  14. package/dist/{poll-access-token-CKc0_m42.js → poll-access-token-Cp_DfHGH.js} +2 -2
  15. package/dist/{poll-access-token-CKc0_m42.js.map → poll-access-token-Cp_DfHGH.js.map} +1 -1
  16. package/dist/{server-D_7gI9hx.js → server-Cln-GCLU.js} +136 -75
  17. package/dist/server-Cln-GCLU.js.map +1 -0
  18. package/dist/{start-DItwCYda.js → start-DH-QnZxL.js} +7 -7
  19. package/dist/start-DH-QnZxL.js.map +1 -0
  20. package/dist/{utils-BIK3ym34.js → utils-SV0FyqXk.js} +2 -2
  21. package/dist/utils-SV0FyqXk.js.map +1 -0
  22. package/package.json +1 -1
  23. package/dist/accounts-manager-BsGuQhKM.js.map +0 -1
  24. package/dist/admin/assets/index-Y2SvOXge.js +0 -57
  25. package/dist/admin/assets/index-geiCIixE.css +0 -1
  26. package/dist/server-D_7gI9hx.js.map +0 -1
  27. package/dist/start-DItwCYda.js.map +0 -1
  28. package/dist/utils-BIK3ym34.js.map +0 -1
package/README.md CHANGED
@@ -32,7 +32,7 @@ English | [中文](./README_CN.md)
32
32
  > [!IMPORTANT]
33
33
  > **Before using, please be aware of the following:**
34
34
  >
35
- > 1. **Claude Code model ID configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned).
35
+ > 1. **Claude Code configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned). Example claude `settings.json` see [Manual Configuration with `settings.json`](#manual-configuration-with-settingsjson).
36
36
  >
37
37
  > 2. **Recommend for Opencode:** When using with opencode, we recommend starting with the opencode OAuth app. This approach behaves identically to opencode's built-in GitHub Copilot provider with no Terms of Service risk:
38
38
  > ```sh
@@ -348,7 +348,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
348
348
  "gpt-5.4": "<built-in commentary prompt>"
349
349
  },
350
350
  "smallModel": "gpt-5-mini",
351
- "freeModelLoadBalancing": true,
351
+ "accountAffinity": true,
352
352
  "responsesApiContextManagementModels": [],
353
353
  "modelReasoningEfforts": {
354
354
  "gpt-5-mini": "low",
@@ -361,7 +361,8 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
361
361
  "compactUseSmallModel": true,
362
362
  "messageStartInputTokensFallback": false,
363
363
  "modelRefreshIntervalHours": 24,
364
- "useMessagesApi": true
364
+ "useMessagesApi": true,
365
+ "useResponsesApiWebSearch": true
365
366
  }
366
367
  ```
367
368
  - **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
@@ -377,7 +378,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
377
378
  - `topK` (optional): Default top_k value used when the request does not specify one.
378
379
  - **responsesApiContextManagementModels:** List of model IDs that should receive Responses API `context_management` compaction instructions. Use this when a model supports server-side context management and you want the proxy to keep only the latest compaction carrier on follow-up turns.
379
380
  - **smallModel:** Fallback model used for tool-less warmup messages, compact/background requests, and other short housekeeping turns (for example from Claude Code or OpenCode) to avoid spending premium requests; defaults to `gpt-5-mini`. If original names are blocked and this points to an aliased target, it resolves to the preferred alias.
380
- - **freeModelLoadBalancing:** Enable round-robin routing for free-model requests across multiple accounts. Defaults to `true`. Set to `false` to route free-model requests sequentially (same ordering strategy as premium models).
381
+ - **accountAffinity:** Enable sticky account routing based on session identity. When enabled, requests from the same session for the same model are routed to the account that last handled them successfully. Applies to both free and premium models. Defaults to `true`. Set to `false` to use sequential routing for all models.
381
382
  - **apiKey (deprecated):** Legacy single-key field kept for migration compatibility. Prefer `auth.apiKeys`. When `auth.apiKeys` is empty, the server falls back to `COPILOT_API_KEY` and then `apiKey`.
382
383
  - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
383
384
  - **modelAliases:** Map of `alias -> { target, allowOriginal? }` (legacy string values are still accepted). Alias keys are normalized (trim + lowercase) and must be non-empty; aliases cannot map to themselves (case-insensitive), and conflicting normalized aliases are rejected. `allowOriginal` overrides the global default per alias. If multiple aliases map to the same target, original names are allowed when any alias sets `allowOriginal: true` (allow-wins). Admin UI/API rejects blocked keys (`__proto__`, `constructor`, `prototype`). Aliases can be used in downstream requests.
@@ -388,6 +389,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
388
389
  - **messageStartInputTokensFallback:** When `true`, the Anthropic streaming translation layer estimates `message_start.input_tokens` when upstream stream events do not provide it. Defaults to `false`.
389
390
  - **modelRefreshIntervalHours:** Interval for refreshing account model lists in the background. Set to `0` to disable refresh. Defaults to `24`.
390
391
  - **useMessagesApi:** When `true` (default), Claude-family models that support Copilot's native `/v1/messages` endpoint may use the Messages API path. Set to `false` to skip the Messages API candidate and fall back to `/responses` (if supported) or `/chat/completions`.
392
+ - **useResponsesApiWebSearch:** When `true` (default), `/v1/responses` keeps tools with `type: "web_search"` and forwards them upstream. Set to `false` to strip them before the Copilot request is sent.
391
393
  - **anthropicApiKey:** Optional Anthropic API key used for accurate Claude token counting (see [Accurate Claude Token Counting](#accurate-claude-token-counting) below). Can also be set via the `ANTHROPIC_API_KEY` environment variable. If not set, token counting falls back to GPT tokenizer estimation.
392
394
 
393
395
  Edit this file to customize prompts or swap in your own fast model. If you edit it manually, restart the server (or call `GET /api/admin/config`) so the cached config is refreshed. Changes made through the Admin UI/API are validated, written to disk, and applied immediately; unknown keys are rejected.
@@ -607,7 +609,7 @@ OpenCode already has a direct GitHub Copilot provider. Use this section when you
607
609
  Start the proxy with the OpenCode OAuth app:
608
610
 
609
611
  ```sh
610
- COPILOT_API_OAUTH_APP=opencode npx @nick3/copilot-api@latest start
612
+ npx @nick3/copilot-api@latest --oauth-app=opencode start
611
613
  ```
612
614
 
613
615
  Then point OpenCode at the proxy with `@ai-sdk/anthropic`.
@@ -803,6 +805,12 @@ Here is an example `.claude/settings.json` file:
803
805
  }
804
806
  ```
805
807
 
808
+ - Replace `ANTHROPIC_MODEL`, `ANTHROPIC_DEFAULT_OPUS_MODEL`, `ANTHROPIC_DEFAULT_SONNET_MODEL`, and `ANTHROPIC_DEFAULT_HAIKU_MODEL` according to your needs. It is recommended to use gpt-5-mini for ANTHROPIC_DEFAULT_HAIKU_MODEL, as gpt-5-mini does not consume quota. ANTHROPIC_DEFAULT_HAIKU_MODEL is typically used for title generation, explore agents, etc.
809
+ - Setting CLAUDE_CODE_ATTRIBUTION_HEADER to 0 can prevent Claude code from adding billing and version information in system prompts, thereby avoiding prompt cache invalidation.
810
+ - Turning off CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION can prevent quota from being consumed unnecessarily.
811
+ - If you want to disable Claude Code WebSearch, deny `WebSearch` in permissions or set `useResponsesApiWebSearch` to `false` in `config.json`. When enabled, `/v1/responses` can forward `web_search` tools upstream, but actual support still depends on the selected model and Copilot behavior.
812
+ - Please do not enable `ENABLE_TOOL_SEARCH`, as the current Claude Code uses the client tool search mode. In this mode, loading defer tools requires an additional request each time, and cache hit rates are affected, so it does not necessarily save tokens. Only server tool search mode can save tokens. The current project has compatibility issues with client tool search mode, which can also cause errors when used.
813
+
806
814
  ### CLAUDE.md or AGENTS.md Recommended Content
807
815
 
808
816
  To add these reminders manually, include the following in `CLAUDE.md` for Claude Code, or `AGENTS.md` for opencode/codex:
@@ -895,5 +903,5 @@ bun run start
895
903
  - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
896
904
  - **Multi-account request routing**: Add multiple GitHub Copilot accounts using `auth add`.
897
905
  - **Premium models**: Accounts are tried in the order they were added. When an account's premium request quota (`remaining=0`) is exhausted (or insufficient for the selected model), the proxy automatically switches to the next eligible account.
898
- - **Free models**: By default, requests are distributed round-robin across all eligible accounts (including the temporary account created via `start --github-token ...`). Set `freeModelLoadBalancing=false` in `config.json` to disable this and route free-model requests sequentially.
906
+ - **Free models**: When `accountAffinity=true`, requests with the same affinity key and model stick to the account that last handled them successfully. Affinity misses fall back to the first available eligible account. Set `accountAffinity=false` in `config.json` to disable affinity and route all requests sequentially.
899
907
  - **Model classification**: Based on Copilot model metadata (`billing.is_premium` / `billing.multiplier`). Missing billing info or `billing.is_premium !== true` is treated as free.
@@ -1,7 +1,7 @@
1
1
  import { PATHS } from "./paths-DoT4SZ8f.js";
2
2
  import { addAccountToRegistry, hasLegacyToken, hasRegistry, listAccountsFromRegistry, loadAccountToken, readLegacyToken, saveAccountToken } from "./accounts-registry-c7rs5Ed9.js";
3
- import { HTTPError, getCopilotUsage, getGitHubUser, getModels } from "./utils-BIK3ym34.js";
4
- import { getCopilotToken } from "./get-copilot-token-BySQCue6.js";
3
+ import { HTTPError, getCopilotUsage, getGitHubUser, getModels } from "./utils-SV0FyqXk.js";
4
+ import { getCopilotToken } from "./get-copilot-token-Q4FXtOE9.js";
5
5
  import consola from "consola";
6
6
  import fs from "node:fs";
7
7
 
@@ -42,7 +42,7 @@ const defaultConfig = {
42
42
  "gpt-5.4": gpt5CommentaryPrompt
43
43
  },
44
44
  smallModel: "gpt-5-mini",
45
- freeModelLoadBalancing: true,
45
+ accountAffinity: true,
46
46
  responsesApiContextManagementModels: [],
47
47
  modelReasoningEfforts: {
48
48
  "gpt-5-mini": "low",
@@ -55,7 +55,8 @@ const defaultConfig = {
55
55
  compactUseSmallModel: true,
56
56
  messageStartInputTokensFallback: false,
57
57
  modelRefreshIntervalHours: 24,
58
- useMessagesApi: true
58
+ useMessagesApi: true,
59
+ useResponsesApiWebSearch: true
59
60
  };
60
61
  let cachedConfig = null;
61
62
  function isPlainObject(value) {
@@ -142,15 +143,27 @@ function mergeDefaultAuth(config) {
142
143
  changed: true
143
144
  };
144
145
  }
145
- function mergeDefaultFreeModelLoadBalancing(config) {
146
- if (typeof config.freeModelLoadBalancing === "boolean") return {
146
+ function mergeDefaultAccountAffinity(config) {
147
+ const raw = config;
148
+ const hasOld = typeof raw.freeModelLoadBalancing === "boolean";
149
+ const hasNew = typeof config.accountAffinity === "boolean";
150
+ if (hasOld) {
151
+ const next = { ...config };
152
+ if (!hasNew) next.accountAffinity = raw.freeModelLoadBalancing;
153
+ delete next.freeModelLoadBalancing;
154
+ return {
155
+ mergedConfig: next,
156
+ changed: true
157
+ };
158
+ }
159
+ if (hasNew) return {
147
160
  mergedConfig: config,
148
161
  changed: false
149
162
  };
150
163
  return {
151
164
  mergedConfig: {
152
165
  ...config,
153
- freeModelLoadBalancing: defaultConfig.freeModelLoadBalancing ?? true
166
+ accountAffinity: defaultConfig.accountAffinity ?? true
154
167
  },
155
168
  changed: true
156
169
  };
@@ -185,7 +198,7 @@ function mergeConfigWithDefaults() {
185
198
  const { mergedConfig, changed } = applyConfigMerges(config, [
186
199
  mergeDefaultAuth,
187
200
  mergeDefaultConfig,
188
- mergeDefaultFreeModelLoadBalancing,
201
+ mergeDefaultAccountAffinity,
189
202
  mergeDefaultModelRefreshInterval
190
203
  ]);
191
204
  if (changed) try {
@@ -304,8 +317,8 @@ function getSmallModel() {
304
317
  if (isOriginalModelNameAllowedForTarget(model)) return model;
305
318
  return getPreferredAliasForTarget(model) ?? model;
306
319
  }
307
- function isFreeModelLoadBalancingEnabled() {
308
- return getConfig().freeModelLoadBalancing ?? true;
320
+ function isAccountAffinityEnabled() {
321
+ return getConfig().accountAffinity ?? true;
309
322
  }
310
323
  function getModelRefreshIntervalHours() {
311
324
  const config = getConfig();
@@ -372,6 +385,97 @@ function isMessagesApiEnabled() {
372
385
  function getAnthropicApiKey() {
373
386
  return getConfig().anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? void 0;
374
387
  }
388
+ function isResponsesApiWebSearchEnabled() {
389
+ return getConfig().useResponsesApiWebSearch ?? true;
390
+ }
391
+
392
+ //#endregion
393
+ //#region src/lib/account-affinity.ts
394
+ const DEFAULT_MAX_ENTRIES = 1e4;
395
+ const DEFAULT_TTL_MS = 3600 * 1e3;
396
+ /**
397
+ * In-memory LRU cache with TTL for account affinity mappings.
398
+ *
399
+ * Uses Map insertion order for LRU eviction: accessed/updated entries are
400
+ * deleted and re-inserted so they move to the "newest" end.
401
+ */
402
+ var AccountAffinityCache = class {
403
+ cache = /* @__PURE__ */ new Map();
404
+ maxEntries;
405
+ ttlMs;
406
+ constructor(maxEntries = DEFAULT_MAX_ENTRIES, ttlMs = DEFAULT_TTL_MS) {
407
+ this.maxEntries = maxEntries;
408
+ this.ttlMs = ttlMs;
409
+ }
410
+ /** Look up the preferred account ID for a cache key. Returns undefined if not found or expired. */
411
+ get(key) {
412
+ const entry = this.cache.get(key);
413
+ if (!entry) return;
414
+ if (Date.now() >= entry.expiresAt) {
415
+ this.cache.delete(key);
416
+ return;
417
+ }
418
+ return entry.accountId;
419
+ }
420
+ /** Record a successful account mapping. Refreshes TTL and moves the entry to the newest position. */
421
+ set(key, accountId) {
422
+ this.cache.delete(key);
423
+ while (this.cache.size >= this.maxEntries) {
424
+ const oldest = this.cache.keys().next();
425
+ if (oldest.done) break;
426
+ this.cache.delete(oldest.value);
427
+ }
428
+ this.cache.set(key, {
429
+ accountId,
430
+ expiresAt: Date.now() + this.ttlMs
431
+ });
432
+ }
433
+ /** Remove a specific entry. */
434
+ delete(key) {
435
+ return this.cache.delete(key);
436
+ }
437
+ /** Remove all entries. */
438
+ clear() {
439
+ this.cache.clear();
440
+ }
441
+ /** Current number of entries (including potentially expired ones). */
442
+ get size() {
443
+ return this.cache.size;
444
+ }
445
+ };
446
+ /**
447
+ * Extract the best available affinity key from the request context.
448
+ * Priority: promptCacheKey > sessionId > safetyIdentifier.
449
+ */
450
+ function extractAffinityKey(context) {
451
+ for (const candidate of [
452
+ context.promptCacheKey,
453
+ context.sessionId,
454
+ context.safetyIdentifier
455
+ ]) {
456
+ const normalized = candidate?.trim();
457
+ if (normalized) return normalized;
458
+ }
459
+ }
460
+ /**
461
+ * Build the full cache key by combining the affinity key with the model ID.
462
+ * This prevents cross-model pollution (same session requesting different models
463
+ * can be routed to different accounts).
464
+ */
465
+ function buildAffinityCacheKey(affinityKey, modelId) {
466
+ return `${affinityKey}:${modelId}`;
467
+ }
468
+ /**
469
+ * Check whether an account is a valid affinity candidate.
470
+ * An account is valid if it is not failed and is present in the provided
471
+ * runtime list.
472
+ */
473
+ function isAffinityAccountUsable(accountId, accounts) {
474
+ const account = accounts.find((a) => a.id === accountId);
475
+ if (!account) return void 0;
476
+ if (account.failed) return void 0;
477
+ return account;
478
+ }
375
479
 
376
480
  //#endregion
377
481
  //#region src/lib/accounts-manager-auth.ts
@@ -486,8 +590,9 @@ var AccountsManager = class {
486
590
  accountOrder = [];
487
591
  temporaryAccount;
488
592
  vsCodeVersion;
489
- freeModelCursor = 0;
490
- freeModelLoadBalancingEnabled = true;
593
+ accountAffinityEnabled = true;
594
+ affinityCache = new AccountAffinityCache();
595
+ loadBalanceCursor = 0;
491
596
  quotaRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
492
597
  modelsRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
493
598
  tokenRefreshEnabledAccounts = /* @__PURE__ */ new WeakSet();
@@ -529,8 +634,9 @@ var AccountsManager = class {
529
634
  consola.info(`Loaded ${this.accounts.size} account(s)`);
530
635
  this.startRegistryWatcher();
531
636
  }
532
- setFreeModelLoadBalancingEnabled(enabled) {
533
- this.freeModelLoadBalancingEnabled = enabled;
637
+ setAccountAffinityEnabled(enabled) {
638
+ this.accountAffinityEnabled = enabled;
639
+ if (!enabled) this.affinityCache.clear();
534
640
  }
535
641
  setModelsRefreshIntervalMs(intervalMs) {
536
642
  this.modelsRefreshIntervalMs = Number.isFinite(intervalMs) && intervalMs > 0 ? intervalMs : 0;
@@ -770,38 +876,6 @@ var AccountsManager = class {
770
876
  }
771
877
  return null;
772
878
  }
773
- selectFreeAccountForRequest(orderedAccounts, candidates) {
774
- const count = orderedAccounts.length;
775
- const start = this.freeModelCursor % count;
776
- let supportedCandidateFound = false;
777
- for (let i = 0; i < count; i++) {
778
- const idx = (start + i) % count;
779
- const account = orderedAccounts[idx];
780
- if (this.isAccountFailed(account)) continue;
781
- const supported = this.pickSupportedCandidate(account, candidates);
782
- if (!supported) continue;
783
- supportedCandidateFound = true;
784
- const { candidate, model } = supported;
785
- const costUnits = getCostUnits(model);
786
- if (costUnits > 0) continue;
787
- this.freeModelCursor = (idx + 1) % count;
788
- return {
789
- ok: true,
790
- account,
791
- selectedModel: model,
792
- endpoint: candidate.endpoint,
793
- costUnits
794
- };
795
- }
796
- if (!supportedCandidateFound) return {
797
- ok: false,
798
- reason: "MODEL_NOT_SUPPORTED"
799
- };
800
- return {
801
- ok: false,
802
- reason: "NO_QUOTA"
803
- };
804
- }
805
879
  async selectAccountForCandidates(orderedAccounts, candidates) {
806
880
  if (orderedAccounts.length === 0) return {
807
881
  ok: false,
@@ -816,16 +890,13 @@ var AccountsManager = class {
816
890
  supportedCandidateFound = true;
817
891
  const { candidate, model } = supported;
818
892
  const costUnits = getCostUnits(model);
819
- if (costUnits <= 0) {
820
- if (this.freeModelLoadBalancingEnabled) return this.selectFreeAccountForRequest(orderedAccounts, candidates);
821
- return {
822
- ok: true,
823
- account,
824
- selectedModel: model,
825
- endpoint: candidate.endpoint,
826
- costUnits
827
- };
828
- }
893
+ if (costUnits <= 0) return {
894
+ ok: true,
895
+ account,
896
+ selectedModel: model,
897
+ endpoint: candidate.endpoint,
898
+ costUnits
899
+ };
829
900
  if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
830
901
  if (this.isAccountFailed(account)) continue;
831
902
  if (account.unlimited) return {
@@ -865,12 +936,120 @@ var AccountsManager = class {
865
936
  };
866
937
  }
867
938
  /**
939
+ * Try to use a preferred (affinity) account for the request.
940
+ * Returns a successful selection if the account is usable; null otherwise.
941
+ */
942
+ async tryAffinityAccount(preferredAccountId, orderedAccounts, candidates) {
943
+ const account = isAffinityAccountUsable(preferredAccountId, orderedAccounts);
944
+ if (!account) return null;
945
+ const supported = this.pickSupportedCandidate(account, candidates) ?? this.pickAliasFallbackCandidate(account, candidates);
946
+ if (!supported) return null;
947
+ return this.validateAffinityQuota(account, supported);
948
+ }
949
+ /**
950
+ * Resolve model aliases and try to pick a supported candidate.
951
+ * Returns null if no alias differs or the account doesn't support the alias.
952
+ */
953
+ pickAliasFallbackCandidate(account, candidates) {
954
+ const aliasCandidates = candidates.map((candidate) => {
955
+ const modelId = resolveModelAlias(candidate.modelId);
956
+ if (modelId === candidate.modelId) return candidate;
957
+ return {
958
+ ...candidate,
959
+ modelId
960
+ };
961
+ });
962
+ if (!aliasCandidates.some((candidate, index) => candidate.modelId !== candidates[index].modelId)) return null;
963
+ return this.pickSupportedCandidate(account, aliasCandidates);
964
+ }
965
+ /**
966
+ * Validate quota for an affinity candidate. Free models pass immediately;
967
+ * premium models go through quota refresh / reservation.
968
+ */
969
+ async validateAffinityQuota(account, supported) {
970
+ const { candidate, model } = supported;
971
+ const costUnits = getCostUnits(model);
972
+ if (costUnits <= 0) return {
973
+ ok: true,
974
+ account,
975
+ selectedModel: model,
976
+ endpoint: candidate.endpoint,
977
+ costUnits
978
+ };
979
+ if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
980
+ if (this.isAccountFailed(account)) return null;
981
+ if (account.unlimited) return {
982
+ ok: true,
983
+ account,
984
+ selectedModel: model,
985
+ endpoint: candidate.endpoint,
986
+ costUnits
987
+ };
988
+ const effectiveRemaining = getEffectivePremiumRemaining(account);
989
+ if (effectiveRemaining !== void 0 && effectiveRemaining < costUnits) return null;
990
+ const reservation = reservePremiumUnits(account, costUnits);
991
+ return {
992
+ ok: true,
993
+ account,
994
+ selectedModel: model,
995
+ endpoint: candidate.endpoint,
996
+ costUnits,
997
+ reservation
998
+ };
999
+ }
1000
+ /**
868
1001
  * Select an available account for a specific request (model + endpoint).
1002
+ * When account affinity is enabled, routes to the previously successful account
1003
+ * for the same affinity key + model combination.
869
1004
  * Uses reservation to avoid oversubscribing premium quota under concurrency.
870
1005
  */
871
- async selectAccountForRequest(candidates) {
1006
+ async selectAccountForRequest(candidates, affinityContext) {
872
1007
  if (candidates.length === 0) throw new Error("selectAccountForRequest requires at least one candidate");
873
1008
  const orderedAccounts = [...this.temporaryAccount ? [this.temporaryAccount] : [], ...this.accountOrder.map((id) => this.accounts.get(id)).filter((account) => account !== void 0)];
1009
+ const affinityKey = this.accountAffinityEnabled && affinityContext ? extractAffinityKey(affinityContext) : void 0;
1010
+ const modelKey = candidates[0].modelId;
1011
+ const cacheKey = affinityKey ? buildAffinityCacheKey(affinityKey, modelKey) : void 0;
1012
+ if (cacheKey) {
1013
+ const preferredId = this.affinityCache.get(cacheKey);
1014
+ if (preferredId) {
1015
+ const affinityResult = await this.tryAffinityAccount(preferredId, orderedAccounts, candidates);
1016
+ if (affinityResult) {
1017
+ affinityResult.affinityHit = true;
1018
+ affinityResult.affinityCacheKey = cacheKey;
1019
+ affinityResult.confirmAffinity = () => {
1020
+ if (!this.accountAffinityEnabled) return;
1021
+ this.affinityCache.set(cacheKey, affinityResult.account.id);
1022
+ };
1023
+ return affinityResult;
1024
+ }
1025
+ }
1026
+ }
1027
+ const accountsForSelection = this.accountAffinityEnabled && orderedAccounts.length > 1 ? this.rotateAccounts(orderedAccounts) : orderedAccounts;
1028
+ const result = await this.selectWithAliasFallback(accountsForSelection, candidates);
1029
+ if (result.ok) this.loadBalanceCursor++;
1030
+ if (result.ok && cacheKey) {
1031
+ const successResult = result;
1032
+ successResult.confirmAffinity = () => {
1033
+ if (!this.accountAffinityEnabled) return;
1034
+ this.affinityCache.set(cacheKey, successResult.account.id);
1035
+ };
1036
+ }
1037
+ return result;
1038
+ }
1039
+ /**
1040
+ * Rotate the accounts array by the current load-balance cursor for round-robin distribution.
1041
+ * This ensures cache-miss requests are spread across accounts instead of always hitting the first.
1042
+ */
1043
+ rotateAccounts(accounts) {
1044
+ const start = this.loadBalanceCursor % accounts.length;
1045
+ if (start === 0) return accounts;
1046
+ return [...accounts.slice(start), ...accounts.slice(0, start)];
1047
+ }
1048
+ /**
1049
+ * Normal account selection with alias fallback.
1050
+ * Extracted to keep selectAccountForRequest readable after adding affinity logic.
1051
+ */
1052
+ async selectWithAliasFallback(orderedAccounts, candidates) {
874
1053
  const primary = await this.selectAccountForCandidates(orderedAccounts, candidates);
875
1054
  if (primary.ok || primary.reason !== "MODEL_NOT_SUPPORTED") return primary;
876
1055
  const aliasCandidates = candidates.map((candidate) => {
@@ -1064,7 +1243,7 @@ var AccountsManager = class {
1064
1243
  for (const meta of newMetas) if (!currentIds.has(meta.id)) await this.addNewAccount(meta, added);
1065
1244
  await this.reinitializeUpdatedAccounts(newMetas, currentIds, updated);
1066
1245
  this.accountOrder = newMetas.map((m) => m.id).filter((id) => this.accounts.has(id));
1067
- this.freeModelCursor = 0;
1246
+ this.loadBalanceCursor = 0;
1068
1247
  this.logRegistryReloadChanges(added, removed, updated);
1069
1248
  } catch (error) {
1070
1249
  consola.error("Failed to reload registry:", error);
@@ -1169,6 +1348,8 @@ var AccountsManager = class {
1169
1348
  this.stopRegistryWatcher();
1170
1349
  this.stopAllTokenRefresh();
1171
1350
  this.stopModelsRefresh();
1351
+ this.affinityCache.clear();
1352
+ this.loadBalanceCursor = 0;
1172
1353
  this.accounts.clear();
1173
1354
  this.accountOrder = [];
1174
1355
  this.temporaryAccount = void 0;
@@ -1178,5 +1359,5 @@ var AccountsManager = class {
1178
1359
  const accountsManager = new AccountsManager();
1179
1360
 
1180
1361
  //#endregion
1181
- export { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getAnthropicApiKey, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isForceAgentEnabled, isFreeModelLoadBalancingEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, mergeConfigWithDefaults, shouldCompactUseSmallModel };
1182
- //# sourceMappingURL=accounts-manager-BsGuQhKM.js.map
1362
+ export { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getAnthropicApiKey, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isAccountAffinityEnabled, isForceAgentEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, isResponsesApiWebSearchEnabled, mergeConfigWithDefaults, shouldCompactUseSmallModel };
1363
+ //# sourceMappingURL=accounts-manager-B8Y2Y4ab.js.map