@nick3/copilot-api 1.4.5 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -6
- package/dist/{accounts-manager-BsGuQhKM.js → accounts-manager-B8Y2Y4ab.js} +241 -60
- package/dist/accounts-manager-B8Y2Y4ab.js.map +1 -0
- package/dist/admin/assets/index-BFvCJZIK.js +57 -0
- package/dist/admin/assets/index-CsAeel_7.css +1 -0
- package/dist/admin/index.html +2 -2
- package/dist/{auth-BAEHgP-a.js → auth-Bt0FGr8C.js} +3 -3
- package/dist/{auth-BAEHgP-a.js.map → auth-Bt0FGr8C.js.map} +1 -1
- package/dist/{check-usage-Dbthad7V.js → check-usage-By-sJOch.js} +4 -4
- package/dist/{check-usage-Dbthad7V.js.map → check-usage-By-sJOch.js.map} +1 -1
- package/dist/{get-copilot-token-BySQCue6.js → get-copilot-token-Q4FXtOE9.js} +2 -2
- package/dist/{get-copilot-token-BySQCue6.js.map → get-copilot-token-Q4FXtOE9.js.map} +1 -1
- package/dist/main.js +3 -3
- package/dist/{poll-access-token-CKc0_m42.js → poll-access-token-Cp_DfHGH.js} +2 -2
- package/dist/{poll-access-token-CKc0_m42.js.map → poll-access-token-Cp_DfHGH.js.map} +1 -1
- package/dist/{server-D_7gI9hx.js → server-Cln-GCLU.js} +136 -75
- package/dist/server-Cln-GCLU.js.map +1 -0
- package/dist/{start-DItwCYda.js → start-DH-QnZxL.js} +7 -7
- package/dist/start-DH-QnZxL.js.map +1 -0
- package/dist/{utils-BIK3ym34.js → utils-SV0FyqXk.js} +2 -2
- package/dist/utils-SV0FyqXk.js.map +1 -0
- package/package.json +1 -1
- package/dist/accounts-manager-BsGuQhKM.js.map +0 -1
- package/dist/admin/assets/index-Y2SvOXge.js +0 -57
- package/dist/admin/assets/index-geiCIixE.css +0 -1
- package/dist/server-D_7gI9hx.js.map +0 -1
- package/dist/start-DItwCYda.js.map +0 -1
- package/dist/utils-BIK3ym34.js.map +0 -1
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ English | [中文](./README_CN.md)
|
|
|
32
32
|
> [!IMPORTANT]
|
|
33
33
|
> **Before using, please be aware of the following:**
|
|
34
34
|
>
|
|
35
|
-
> 1. **Claude Code
|
|
35
|
+
> 1. **Claude Code configuration:** When using with Claude Code, please configure the model ID as `claude-opus-4-6` or `claude-opus-4.6` (without the `[1m]` suffix, exceeding GitHub Copilot's context window limit too much may lead to being banned). Example claude `settings.json` see [Manual Configuration with `settings.json`](#manual-configuration-with-settingsjson).
|
|
36
36
|
>
|
|
37
37
|
> 2. **Recommend for Opencode:** When using with opencode, we recommend starting with the opencode OAuth app. This approach behaves identically to opencode's built-in GitHub Copilot provider with no Terms of Service risk:
|
|
38
38
|
> ```sh
|
|
@@ -348,7 +348,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
|
|
|
348
348
|
"gpt-5.4": "<built-in commentary prompt>"
|
|
349
349
|
},
|
|
350
350
|
"smallModel": "gpt-5-mini",
|
|
351
|
-
"
|
|
351
|
+
"accountAffinity": true,
|
|
352
352
|
"responsesApiContextManagementModels": [],
|
|
353
353
|
"modelReasoningEfforts": {
|
|
354
354
|
"gpt-5-mini": "low",
|
|
@@ -361,7 +361,8 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
|
|
|
361
361
|
"compactUseSmallModel": true,
|
|
362
362
|
"messageStartInputTokensFallback": false,
|
|
363
363
|
"modelRefreshIntervalHours": 24,
|
|
364
|
-
"useMessagesApi": true
|
|
364
|
+
"useMessagesApi": true,
|
|
365
|
+
"useResponsesApiWebSearch": true
|
|
365
366
|
}
|
|
366
367
|
```
|
|
367
368
|
- **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
|
|
@@ -377,7 +378,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
|
|
|
377
378
|
- `topK` (optional): Default top_k value used when the request does not specify one.
|
|
378
379
|
- **responsesApiContextManagementModels:** List of model IDs that should receive Responses API `context_management` compaction instructions. Use this when a model supports server-side context management and you want the proxy to keep only the latest compaction carrier on follow-up turns.
|
|
379
380
|
- **smallModel:** Fallback model used for tool-less warmup messages, compact/background requests, and other short housekeeping turns (for example from Claude Code or OpenCode) to avoid spending premium requests; defaults to `gpt-5-mini`. If original names are blocked and this points to an aliased target, it resolves to the preferred alias.
|
|
380
|
-
- **
|
|
381
|
+
- **accountAffinity:** Enable sticky account routing based on session identity. When enabled, requests from the same session for the same model are routed to the account that last handled them successfully. Applies to both free and premium models. Defaults to `true`. Set to `false` to use sequential routing for all models.
|
|
381
382
|
- **apiKey (deprecated):** Legacy single-key field kept for migration compatibility. Prefer `auth.apiKeys`. When `auth.apiKeys` is empty, the server falls back to `COPILOT_API_KEY` and then `apiKey`.
|
|
382
383
|
- **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
|
|
383
384
|
- **modelAliases:** Map of `alias -> { target, allowOriginal? }` (legacy string values are still accepted). Alias keys are normalized (trim + lowercase) and must be non-empty; aliases cannot map to themselves (case-insensitive), and conflicting normalized aliases are rejected. `allowOriginal` overrides the global default per alias. If multiple aliases map to the same target, original names are allowed when any alias sets `allowOriginal: true` (allow-wins). Admin UI/API rejects blocked keys (`__proto__`, `constructor`, `prototype`). Aliases can be used in downstream requests.
|
|
@@ -388,6 +389,7 @@ The `<target>` can be either the account ID (GitHub username) or a 1-based index
|
|
|
388
389
|
- **messageStartInputTokensFallback:** When `true`, the Anthropic streaming translation layer estimates `message_start.input_tokens` when upstream stream events do not provide it. Defaults to `false`.
|
|
389
390
|
- **modelRefreshIntervalHours:** Interval for refreshing account model lists in the background. Set to `0` to disable refresh. Defaults to `24`.
|
|
390
391
|
- **useMessagesApi:** When `true` (default), Claude-family models that support Copilot's native `/v1/messages` endpoint may use the Messages API path. Set to `false` to skip the Messages API candidate and fall back to `/responses` (if supported) or `/chat/completions`.
|
|
392
|
+
- **useResponsesApiWebSearch:** When `true` (default), `/v1/responses` keeps tools with `type: "web_search"` and forwards them upstream. Set to `false` to strip them before the Copilot request is sent.
|
|
391
393
|
- **anthropicApiKey:** Optional Anthropic API key used for accurate Claude token counting (see [Accurate Claude Token Counting](#accurate-claude-token-counting) below). Can also be set via the `ANTHROPIC_API_KEY` environment variable. If not set, token counting falls back to GPT tokenizer estimation.
|
|
392
394
|
|
|
393
395
|
Edit this file to customize prompts or swap in your own fast model. If you edit it manually, restart the server (or call `GET /api/admin/config`) so the cached config is refreshed. Changes made through the Admin UI/API are validated, written to disk, and applied immediately; unknown keys are rejected.
|
|
@@ -607,7 +609,7 @@ OpenCode already has a direct GitHub Copilot provider. Use this section when you
|
|
|
607
609
|
Start the proxy with the OpenCode OAuth app:
|
|
608
610
|
|
|
609
611
|
```sh
|
|
610
|
-
|
|
612
|
+
npx @nick3/copilot-api@latest --oauth-app=opencode start
|
|
611
613
|
```
|
|
612
614
|
|
|
613
615
|
Then point OpenCode at the proxy with `@ai-sdk/anthropic`.
|
|
@@ -803,6 +805,12 @@ Here is an example `.claude/settings.json` file:
|
|
|
803
805
|
}
|
|
804
806
|
```
|
|
805
807
|
|
|
808
|
+
- Replace `ANTHROPIC_MODEL`, `ANTHROPIC_DEFAULT_OPUS_MODEL`, `ANTHROPIC_DEFAULT_SONNET_MODEL`, and `ANTHROPIC_DEFAULT_HAIKU_MODEL` according to your needs. It is recommended to use gpt-5-mini for ANTHROPIC_DEFAULT_HAIKU_MODEL, as gpt-5-mini does not consume quota. ANTHROPIC_DEFAULT_HAIKU_MODEL is typically used for title generation, explore agents, etc.
|
|
809
|
+
- Setting CLAUDE_CODE_ATTRIBUTION_HEADER to 0 can prevent Claude code from adding billing and version information in system prompts, thereby avoiding prompt cache invalidation.
|
|
810
|
+
- Turning off CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION can prevent quota from being consumed unnecessarily.
|
|
811
|
+
- If you want to disable Claude Code WebSearch, deny `WebSearch` in permissions or set `useResponsesApiWebSearch` to `false` in `config.json`. When enabled, `/v1/responses` can forward `web_search` tools upstream, but actual support still depends on the selected model and Copilot behavior.
|
|
812
|
+
- Please do not enable `ENABLE_TOOL_SEARCH`, as the current Claude Code uses the client tool search mode. In this mode, loading defer tools requires an additional request each time, and cache hit rates are affected, so it does not necessarily save tokens. Only server tool search mode can save tokens. The current project has compatibility issues with client tool search mode, which can also cause errors when used.
|
|
813
|
+
|
|
806
814
|
### CLAUDE.md or AGENTS.md Recommended Content
|
|
807
815
|
|
|
808
816
|
To add these reminders manually, include the following in `CLAUDE.md` for Claude Code, or `AGENTS.md` for opencode/codex:
|
|
@@ -895,5 +903,5 @@ bun run start
|
|
|
895
903
|
- If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
|
|
896
904
|
- **Multi-account request routing**: Add multiple GitHub Copilot accounts using `auth add`.
|
|
897
905
|
- **Premium models**: Accounts are tried in the order they were added. When an account's premium request quota (`remaining=0`) is exhausted (or insufficient for the selected model), the proxy automatically switches to the next eligible account.
|
|
898
|
-
- **Free models**:
|
|
906
|
+
- **Free models**: When `accountAffinity=true`, requests with the same affinity key and model stick to the account that last handled them successfully. Affinity misses fall back to the first available eligible account. Set `accountAffinity=false` in `config.json` to disable affinity and route all requests sequentially.
|
|
899
907
|
- **Model classification**: Based on Copilot model metadata (`billing.is_premium` / `billing.multiplier`). Missing billing info or `billing.is_premium !== true` is treated as free.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { PATHS } from "./paths-DoT4SZ8f.js";
|
|
2
2
|
import { addAccountToRegistry, hasLegacyToken, hasRegistry, listAccountsFromRegistry, loadAccountToken, readLegacyToken, saveAccountToken } from "./accounts-registry-c7rs5Ed9.js";
|
|
3
|
-
import { HTTPError, getCopilotUsage, getGitHubUser, getModels } from "./utils-
|
|
4
|
-
import { getCopilotToken } from "./get-copilot-token-
|
|
3
|
+
import { HTTPError, getCopilotUsage, getGitHubUser, getModels } from "./utils-SV0FyqXk.js";
|
|
4
|
+
import { getCopilotToken } from "./get-copilot-token-Q4FXtOE9.js";
|
|
5
5
|
import consola from "consola";
|
|
6
6
|
import fs from "node:fs";
|
|
7
7
|
|
|
@@ -42,7 +42,7 @@ const defaultConfig = {
|
|
|
42
42
|
"gpt-5.4": gpt5CommentaryPrompt
|
|
43
43
|
},
|
|
44
44
|
smallModel: "gpt-5-mini",
|
|
45
|
-
|
|
45
|
+
accountAffinity: true,
|
|
46
46
|
responsesApiContextManagementModels: [],
|
|
47
47
|
modelReasoningEfforts: {
|
|
48
48
|
"gpt-5-mini": "low",
|
|
@@ -55,7 +55,8 @@ const defaultConfig = {
|
|
|
55
55
|
compactUseSmallModel: true,
|
|
56
56
|
messageStartInputTokensFallback: false,
|
|
57
57
|
modelRefreshIntervalHours: 24,
|
|
58
|
-
useMessagesApi: true
|
|
58
|
+
useMessagesApi: true,
|
|
59
|
+
useResponsesApiWebSearch: true
|
|
59
60
|
};
|
|
60
61
|
let cachedConfig = null;
|
|
61
62
|
function isPlainObject(value) {
|
|
@@ -142,15 +143,27 @@ function mergeDefaultAuth(config) {
|
|
|
142
143
|
changed: true
|
|
143
144
|
};
|
|
144
145
|
}
|
|
145
|
-
function
|
|
146
|
-
|
|
146
|
+
function mergeDefaultAccountAffinity(config) {
|
|
147
|
+
const raw = config;
|
|
148
|
+
const hasOld = typeof raw.freeModelLoadBalancing === "boolean";
|
|
149
|
+
const hasNew = typeof config.accountAffinity === "boolean";
|
|
150
|
+
if (hasOld) {
|
|
151
|
+
const next = { ...config };
|
|
152
|
+
if (!hasNew) next.accountAffinity = raw.freeModelLoadBalancing;
|
|
153
|
+
delete next.freeModelLoadBalancing;
|
|
154
|
+
return {
|
|
155
|
+
mergedConfig: next,
|
|
156
|
+
changed: true
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
if (hasNew) return {
|
|
147
160
|
mergedConfig: config,
|
|
148
161
|
changed: false
|
|
149
162
|
};
|
|
150
163
|
return {
|
|
151
164
|
mergedConfig: {
|
|
152
165
|
...config,
|
|
153
|
-
|
|
166
|
+
accountAffinity: defaultConfig.accountAffinity ?? true
|
|
154
167
|
},
|
|
155
168
|
changed: true
|
|
156
169
|
};
|
|
@@ -185,7 +198,7 @@ function mergeConfigWithDefaults() {
|
|
|
185
198
|
const { mergedConfig, changed } = applyConfigMerges(config, [
|
|
186
199
|
mergeDefaultAuth,
|
|
187
200
|
mergeDefaultConfig,
|
|
188
|
-
|
|
201
|
+
mergeDefaultAccountAffinity,
|
|
189
202
|
mergeDefaultModelRefreshInterval
|
|
190
203
|
]);
|
|
191
204
|
if (changed) try {
|
|
@@ -304,8 +317,8 @@ function getSmallModel() {
|
|
|
304
317
|
if (isOriginalModelNameAllowedForTarget(model)) return model;
|
|
305
318
|
return getPreferredAliasForTarget(model) ?? model;
|
|
306
319
|
}
|
|
307
|
-
function
|
|
308
|
-
return getConfig().
|
|
320
|
+
function isAccountAffinityEnabled() {
|
|
321
|
+
return getConfig().accountAffinity ?? true;
|
|
309
322
|
}
|
|
310
323
|
function getModelRefreshIntervalHours() {
|
|
311
324
|
const config = getConfig();
|
|
@@ -372,6 +385,97 @@ function isMessagesApiEnabled() {
|
|
|
372
385
|
function getAnthropicApiKey() {
|
|
373
386
|
return getConfig().anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? void 0;
|
|
374
387
|
}
|
|
388
|
+
function isResponsesApiWebSearchEnabled() {
|
|
389
|
+
return getConfig().useResponsesApiWebSearch ?? true;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
//#endregion
|
|
393
|
+
//#region src/lib/account-affinity.ts
|
|
394
|
+
const DEFAULT_MAX_ENTRIES = 1e4;
|
|
395
|
+
const DEFAULT_TTL_MS = 3600 * 1e3;
|
|
396
|
+
/**
|
|
397
|
+
* In-memory LRU cache with TTL for account affinity mappings.
|
|
398
|
+
*
|
|
399
|
+
* Uses Map insertion order for LRU eviction: accessed/updated entries are
|
|
400
|
+
* deleted and re-inserted so they move to the "newest" end.
|
|
401
|
+
*/
|
|
402
|
+
var AccountAffinityCache = class {
|
|
403
|
+
cache = /* @__PURE__ */ new Map();
|
|
404
|
+
maxEntries;
|
|
405
|
+
ttlMs;
|
|
406
|
+
constructor(maxEntries = DEFAULT_MAX_ENTRIES, ttlMs = DEFAULT_TTL_MS) {
|
|
407
|
+
this.maxEntries = maxEntries;
|
|
408
|
+
this.ttlMs = ttlMs;
|
|
409
|
+
}
|
|
410
|
+
/** Look up the preferred account ID for a cache key. Returns undefined if not found or expired. */
|
|
411
|
+
get(key) {
|
|
412
|
+
const entry = this.cache.get(key);
|
|
413
|
+
if (!entry) return;
|
|
414
|
+
if (Date.now() >= entry.expiresAt) {
|
|
415
|
+
this.cache.delete(key);
|
|
416
|
+
return;
|
|
417
|
+
}
|
|
418
|
+
return entry.accountId;
|
|
419
|
+
}
|
|
420
|
+
/** Record a successful account mapping. Refreshes TTL and moves the entry to the newest position. */
|
|
421
|
+
set(key, accountId) {
|
|
422
|
+
this.cache.delete(key);
|
|
423
|
+
while (this.cache.size >= this.maxEntries) {
|
|
424
|
+
const oldest = this.cache.keys().next();
|
|
425
|
+
if (oldest.done) break;
|
|
426
|
+
this.cache.delete(oldest.value);
|
|
427
|
+
}
|
|
428
|
+
this.cache.set(key, {
|
|
429
|
+
accountId,
|
|
430
|
+
expiresAt: Date.now() + this.ttlMs
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
/** Remove a specific entry. */
|
|
434
|
+
delete(key) {
|
|
435
|
+
return this.cache.delete(key);
|
|
436
|
+
}
|
|
437
|
+
/** Remove all entries. */
|
|
438
|
+
clear() {
|
|
439
|
+
this.cache.clear();
|
|
440
|
+
}
|
|
441
|
+
/** Current number of entries (including potentially expired ones). */
|
|
442
|
+
get size() {
|
|
443
|
+
return this.cache.size;
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
/**
|
|
447
|
+
* Extract the best available affinity key from the request context.
|
|
448
|
+
* Priority: promptCacheKey > sessionId > safetyIdentifier.
|
|
449
|
+
*/
|
|
450
|
+
function extractAffinityKey(context) {
|
|
451
|
+
for (const candidate of [
|
|
452
|
+
context.promptCacheKey,
|
|
453
|
+
context.sessionId,
|
|
454
|
+
context.safetyIdentifier
|
|
455
|
+
]) {
|
|
456
|
+
const normalized = candidate?.trim();
|
|
457
|
+
if (normalized) return normalized;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Build the full cache key by combining the affinity key with the model ID.
|
|
462
|
+
* This prevents cross-model pollution (same session requesting different models
|
|
463
|
+
* can be routed to different accounts).
|
|
464
|
+
*/
|
|
465
|
+
function buildAffinityCacheKey(affinityKey, modelId) {
|
|
466
|
+
return `${affinityKey}:${modelId}`;
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Check whether an account is a valid affinity candidate.
|
|
470
|
+
* An account is valid if it is not failed and is present in the provided
|
|
471
|
+
* runtime list.
|
|
472
|
+
*/
|
|
473
|
+
function isAffinityAccountUsable(accountId, accounts) {
|
|
474
|
+
const account = accounts.find((a) => a.id === accountId);
|
|
475
|
+
if (!account) return void 0;
|
|
476
|
+
if (account.failed) return void 0;
|
|
477
|
+
return account;
|
|
478
|
+
}
|
|
375
479
|
|
|
376
480
|
//#endregion
|
|
377
481
|
//#region src/lib/accounts-manager-auth.ts
|
|
@@ -486,8 +590,9 @@ var AccountsManager = class {
|
|
|
486
590
|
accountOrder = [];
|
|
487
591
|
temporaryAccount;
|
|
488
592
|
vsCodeVersion;
|
|
489
|
-
|
|
490
|
-
|
|
593
|
+
accountAffinityEnabled = true;
|
|
594
|
+
affinityCache = new AccountAffinityCache();
|
|
595
|
+
loadBalanceCursor = 0;
|
|
491
596
|
quotaRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
|
|
492
597
|
modelsRefreshSnapshotByAccount = /* @__PURE__ */ new WeakMap();
|
|
493
598
|
tokenRefreshEnabledAccounts = /* @__PURE__ */ new WeakSet();
|
|
@@ -529,8 +634,9 @@ var AccountsManager = class {
|
|
|
529
634
|
consola.info(`Loaded ${this.accounts.size} account(s)`);
|
|
530
635
|
this.startRegistryWatcher();
|
|
531
636
|
}
|
|
532
|
-
|
|
533
|
-
this.
|
|
637
|
+
setAccountAffinityEnabled(enabled) {
|
|
638
|
+
this.accountAffinityEnabled = enabled;
|
|
639
|
+
if (!enabled) this.affinityCache.clear();
|
|
534
640
|
}
|
|
535
641
|
setModelsRefreshIntervalMs(intervalMs) {
|
|
536
642
|
this.modelsRefreshIntervalMs = Number.isFinite(intervalMs) && intervalMs > 0 ? intervalMs : 0;
|
|
@@ -770,38 +876,6 @@ var AccountsManager = class {
|
|
|
770
876
|
}
|
|
771
877
|
return null;
|
|
772
878
|
}
|
|
773
|
-
selectFreeAccountForRequest(orderedAccounts, candidates) {
|
|
774
|
-
const count = orderedAccounts.length;
|
|
775
|
-
const start = this.freeModelCursor % count;
|
|
776
|
-
let supportedCandidateFound = false;
|
|
777
|
-
for (let i = 0; i < count; i++) {
|
|
778
|
-
const idx = (start + i) % count;
|
|
779
|
-
const account = orderedAccounts[idx];
|
|
780
|
-
if (this.isAccountFailed(account)) continue;
|
|
781
|
-
const supported = this.pickSupportedCandidate(account, candidates);
|
|
782
|
-
if (!supported) continue;
|
|
783
|
-
supportedCandidateFound = true;
|
|
784
|
-
const { candidate, model } = supported;
|
|
785
|
-
const costUnits = getCostUnits(model);
|
|
786
|
-
if (costUnits > 0) continue;
|
|
787
|
-
this.freeModelCursor = (idx + 1) % count;
|
|
788
|
-
return {
|
|
789
|
-
ok: true,
|
|
790
|
-
account,
|
|
791
|
-
selectedModel: model,
|
|
792
|
-
endpoint: candidate.endpoint,
|
|
793
|
-
costUnits
|
|
794
|
-
};
|
|
795
|
-
}
|
|
796
|
-
if (!supportedCandidateFound) return {
|
|
797
|
-
ok: false,
|
|
798
|
-
reason: "MODEL_NOT_SUPPORTED"
|
|
799
|
-
};
|
|
800
|
-
return {
|
|
801
|
-
ok: false,
|
|
802
|
-
reason: "NO_QUOTA"
|
|
803
|
-
};
|
|
804
|
-
}
|
|
805
879
|
async selectAccountForCandidates(orderedAccounts, candidates) {
|
|
806
880
|
if (orderedAccounts.length === 0) return {
|
|
807
881
|
ok: false,
|
|
@@ -816,16 +890,13 @@ var AccountsManager = class {
|
|
|
816
890
|
supportedCandidateFound = true;
|
|
817
891
|
const { candidate, model } = supported;
|
|
818
892
|
const costUnits = getCostUnits(model);
|
|
819
|
-
if (costUnits <= 0) {
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
costUnits
|
|
827
|
-
};
|
|
828
|
-
}
|
|
893
|
+
if (costUnits <= 0) return {
|
|
894
|
+
ok: true,
|
|
895
|
+
account,
|
|
896
|
+
selectedModel: model,
|
|
897
|
+
endpoint: candidate.endpoint,
|
|
898
|
+
costUnits
|
|
899
|
+
};
|
|
829
900
|
if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
|
|
830
901
|
if (this.isAccountFailed(account)) continue;
|
|
831
902
|
if (account.unlimited) return {
|
|
@@ -865,12 +936,120 @@ var AccountsManager = class {
|
|
|
865
936
|
};
|
|
866
937
|
}
|
|
867
938
|
/**
|
|
939
|
+
* Try to use a preferred (affinity) account for the request.
|
|
940
|
+
* Returns a successful selection if the account is usable; null otherwise.
|
|
941
|
+
*/
|
|
942
|
+
async tryAffinityAccount(preferredAccountId, orderedAccounts, candidates) {
|
|
943
|
+
const account = isAffinityAccountUsable(preferredAccountId, orderedAccounts);
|
|
944
|
+
if (!account) return null;
|
|
945
|
+
const supported = this.pickSupportedCandidate(account, candidates) ?? this.pickAliasFallbackCandidate(account, candidates);
|
|
946
|
+
if (!supported) return null;
|
|
947
|
+
return this.validateAffinityQuota(account, supported);
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Resolve model aliases and try to pick a supported candidate.
|
|
951
|
+
* Returns null if no alias differs or the account doesn't support the alias.
|
|
952
|
+
*/
|
|
953
|
+
pickAliasFallbackCandidate(account, candidates) {
|
|
954
|
+
const aliasCandidates = candidates.map((candidate) => {
|
|
955
|
+
const modelId = resolveModelAlias(candidate.modelId);
|
|
956
|
+
if (modelId === candidate.modelId) return candidate;
|
|
957
|
+
return {
|
|
958
|
+
...candidate,
|
|
959
|
+
modelId
|
|
960
|
+
};
|
|
961
|
+
});
|
|
962
|
+
if (!aliasCandidates.some((candidate, index) => candidate.modelId !== candidates[index].modelId)) return null;
|
|
963
|
+
return this.pickSupportedCandidate(account, aliasCandidates);
|
|
964
|
+
}
|
|
965
|
+
/**
|
|
966
|
+
* Validate quota for an affinity candidate. Free models pass immediately;
|
|
967
|
+
* premium models go through quota refresh / reservation.
|
|
968
|
+
*/
|
|
969
|
+
async validateAffinityQuota(account, supported) {
|
|
970
|
+
const { candidate, model } = supported;
|
|
971
|
+
const costUnits = getCostUnits(model);
|
|
972
|
+
if (costUnits <= 0) return {
|
|
973
|
+
ok: true,
|
|
974
|
+
account,
|
|
975
|
+
selectedModel: model,
|
|
976
|
+
endpoint: candidate.endpoint,
|
|
977
|
+
costUnits
|
|
978
|
+
};
|
|
979
|
+
if (!account.unlimited && this.isQuotaCacheExpired(account)) await this.refreshQuota(account);
|
|
980
|
+
if (this.isAccountFailed(account)) return null;
|
|
981
|
+
if (account.unlimited) return {
|
|
982
|
+
ok: true,
|
|
983
|
+
account,
|
|
984
|
+
selectedModel: model,
|
|
985
|
+
endpoint: candidate.endpoint,
|
|
986
|
+
costUnits
|
|
987
|
+
};
|
|
988
|
+
const effectiveRemaining = getEffectivePremiumRemaining(account);
|
|
989
|
+
if (effectiveRemaining !== void 0 && effectiveRemaining < costUnits) return null;
|
|
990
|
+
const reservation = reservePremiumUnits(account, costUnits);
|
|
991
|
+
return {
|
|
992
|
+
ok: true,
|
|
993
|
+
account,
|
|
994
|
+
selectedModel: model,
|
|
995
|
+
endpoint: candidate.endpoint,
|
|
996
|
+
costUnits,
|
|
997
|
+
reservation
|
|
998
|
+
};
|
|
999
|
+
}
|
|
1000
|
+
/**
|
|
868
1001
|
* Select an available account for a specific request (model + endpoint).
|
|
1002
|
+
* When account affinity is enabled, routes to the previously successful account
|
|
1003
|
+
* for the same affinity key + model combination.
|
|
869
1004
|
* Uses reservation to avoid oversubscribing premium quota under concurrency.
|
|
870
1005
|
*/
|
|
871
|
-
async selectAccountForRequest(candidates) {
|
|
1006
|
+
async selectAccountForRequest(candidates, affinityContext) {
|
|
872
1007
|
if (candidates.length === 0) throw new Error("selectAccountForRequest requires at least one candidate");
|
|
873
1008
|
const orderedAccounts = [...this.temporaryAccount ? [this.temporaryAccount] : [], ...this.accountOrder.map((id) => this.accounts.get(id)).filter((account) => account !== void 0)];
|
|
1009
|
+
const affinityKey = this.accountAffinityEnabled && affinityContext ? extractAffinityKey(affinityContext) : void 0;
|
|
1010
|
+
const modelKey = candidates[0].modelId;
|
|
1011
|
+
const cacheKey = affinityKey ? buildAffinityCacheKey(affinityKey, modelKey) : void 0;
|
|
1012
|
+
if (cacheKey) {
|
|
1013
|
+
const preferredId = this.affinityCache.get(cacheKey);
|
|
1014
|
+
if (preferredId) {
|
|
1015
|
+
const affinityResult = await this.tryAffinityAccount(preferredId, orderedAccounts, candidates);
|
|
1016
|
+
if (affinityResult) {
|
|
1017
|
+
affinityResult.affinityHit = true;
|
|
1018
|
+
affinityResult.affinityCacheKey = cacheKey;
|
|
1019
|
+
affinityResult.confirmAffinity = () => {
|
|
1020
|
+
if (!this.accountAffinityEnabled) return;
|
|
1021
|
+
this.affinityCache.set(cacheKey, affinityResult.account.id);
|
|
1022
|
+
};
|
|
1023
|
+
return affinityResult;
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
const accountsForSelection = this.accountAffinityEnabled && orderedAccounts.length > 1 ? this.rotateAccounts(orderedAccounts) : orderedAccounts;
|
|
1028
|
+
const result = await this.selectWithAliasFallback(accountsForSelection, candidates);
|
|
1029
|
+
if (result.ok) this.loadBalanceCursor++;
|
|
1030
|
+
if (result.ok && cacheKey) {
|
|
1031
|
+
const successResult = result;
|
|
1032
|
+
successResult.confirmAffinity = () => {
|
|
1033
|
+
if (!this.accountAffinityEnabled) return;
|
|
1034
|
+
this.affinityCache.set(cacheKey, successResult.account.id);
|
|
1035
|
+
};
|
|
1036
|
+
}
|
|
1037
|
+
return result;
|
|
1038
|
+
}
|
|
1039
|
+
/**
|
|
1040
|
+
* Rotate the accounts array by the current load-balance cursor for round-robin distribution.
|
|
1041
|
+
* This ensures cache-miss requests are spread across accounts instead of always hitting the first.
|
|
1042
|
+
*/
|
|
1043
|
+
rotateAccounts(accounts) {
|
|
1044
|
+
const start = this.loadBalanceCursor % accounts.length;
|
|
1045
|
+
if (start === 0) return accounts;
|
|
1046
|
+
return [...accounts.slice(start), ...accounts.slice(0, start)];
|
|
1047
|
+
}
|
|
1048
|
+
/**
|
|
1049
|
+
* Normal account selection with alias fallback.
|
|
1050
|
+
* Extracted to keep selectAccountForRequest readable after adding affinity logic.
|
|
1051
|
+
*/
|
|
1052
|
+
async selectWithAliasFallback(orderedAccounts, candidates) {
|
|
874
1053
|
const primary = await this.selectAccountForCandidates(orderedAccounts, candidates);
|
|
875
1054
|
if (primary.ok || primary.reason !== "MODEL_NOT_SUPPORTED") return primary;
|
|
876
1055
|
const aliasCandidates = candidates.map((candidate) => {
|
|
@@ -1064,7 +1243,7 @@ var AccountsManager = class {
|
|
|
1064
1243
|
for (const meta of newMetas) if (!currentIds.has(meta.id)) await this.addNewAccount(meta, added);
|
|
1065
1244
|
await this.reinitializeUpdatedAccounts(newMetas, currentIds, updated);
|
|
1066
1245
|
this.accountOrder = newMetas.map((m) => m.id).filter((id) => this.accounts.has(id));
|
|
1067
|
-
this.
|
|
1246
|
+
this.loadBalanceCursor = 0;
|
|
1068
1247
|
this.logRegistryReloadChanges(added, removed, updated);
|
|
1069
1248
|
} catch (error) {
|
|
1070
1249
|
consola.error("Failed to reload registry:", error);
|
|
@@ -1169,6 +1348,8 @@ var AccountsManager = class {
|
|
|
1169
1348
|
this.stopRegistryWatcher();
|
|
1170
1349
|
this.stopAllTokenRefresh();
|
|
1171
1350
|
this.stopModelsRefresh();
|
|
1351
|
+
this.affinityCache.clear();
|
|
1352
|
+
this.loadBalanceCursor = 0;
|
|
1172
1353
|
this.accounts.clear();
|
|
1173
1354
|
this.accountOrder = [];
|
|
1174
1355
|
this.temporaryAccount = void 0;
|
|
@@ -1178,5 +1359,5 @@ var AccountsManager = class {
|
|
|
1178
1359
|
const accountsManager = new AccountsManager();
|
|
1179
1360
|
|
|
1180
1361
|
//#endregion
|
|
1181
|
-
export { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getAnthropicApiKey, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel,
|
|
1182
|
-
//# sourceMappingURL=accounts-manager-
|
|
1362
|
+
export { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getAnthropicApiKey, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isAccountAffinityEnabled, isForceAgentEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, isResponsesApiWebSearchEnabled, mergeConfigWithDefaults, shouldCompactUseSmallModel };
|
|
1363
|
+
//# sourceMappingURL=accounts-manager-B8Y2Y4ab.js.map
|