opencodekit 0.18.16 → 0.18.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -89,11 +89,31 @@ const RATE_LIMIT_CONFIG = {
|
|
|
89
89
|
maxFallbacks: 4, // Max model fallback switches per request
|
|
90
90
|
};
|
|
91
91
|
|
|
92
|
+
// Local request shaping to smooth bursts before they hit Copilot limits
|
|
93
|
+
const REQUEST_SHAPING_CONFIG = {
|
|
94
|
+
tokensPerSecond: 1,
|
|
95
|
+
burstCapacity: 2,
|
|
96
|
+
maxQueueDelayMs: 15000,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const CIRCUIT_BREAKER_CONFIG = {
|
|
100
|
+
maxInlineWaitMs: 30000,
|
|
101
|
+
maxRecoveryCycles: 3,
|
|
102
|
+
};
|
|
103
|
+
|
|
92
104
|
// Per-model rate limit state (in-memory, resets on restart)
|
|
93
105
|
interface RateLimitEntry {
|
|
94
106
|
rateLimitedUntil: number; // Unix timestamp (ms)
|
|
95
107
|
}
|
|
96
108
|
const rateLimitState = new Map<string, RateLimitEntry>();
|
|
109
|
+
const familyCircuitBreakerState = new Map<string, number>();
|
|
110
|
+
|
|
111
|
+
interface TokenBucketState {
|
|
112
|
+
tokens: number;
|
|
113
|
+
lastRefillAt: number;
|
|
114
|
+
}
|
|
115
|
+
const modelTokenBuckets = new Map<string, TokenBucketState>();
|
|
116
|
+
const modelQueueTail = new Map<string, Promise<void>>();
|
|
97
117
|
|
|
98
118
|
// Model fallback chains: same-family alternatives when a model is rate-limited
|
|
99
119
|
const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
|
|
@@ -146,6 +166,144 @@ function isModelRateLimited(model: string): boolean {
|
|
|
146
166
|
return true;
|
|
147
167
|
}
|
|
148
168
|
|
|
169
|
+
function getRateLimitRemainingMs(model: string): number | null {
|
|
170
|
+
const entry = rateLimitState.get(model);
|
|
171
|
+
if (!entry) return null;
|
|
172
|
+
const remaining = entry.rateLimitedUntil - Date.now();
|
|
173
|
+
if (remaining <= 0) {
|
|
174
|
+
rateLimitState.delete(model);
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
return remaining;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function getModelFamily(model: string): string[] {
|
|
181
|
+
const family = new Set<string>([
|
|
182
|
+
model,
|
|
183
|
+
...(MODEL_FALLBACK_CHAINS[model] || []),
|
|
184
|
+
]);
|
|
185
|
+
return [...family];
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function getFamilyCircuitKey(model: string): string {
|
|
189
|
+
return getModelFamily(model).sort().join("|");
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function getFamilyCircuitRemainingMs(model: string): number {
|
|
193
|
+
const key = getFamilyCircuitKey(model);
|
|
194
|
+
const until = familyCircuitBreakerState.get(key);
|
|
195
|
+
if (!until) return 0;
|
|
196
|
+
const remaining = until - Date.now();
|
|
197
|
+
if (remaining <= 0) {
|
|
198
|
+
familyCircuitBreakerState.delete(key);
|
|
199
|
+
return 0;
|
|
200
|
+
}
|
|
201
|
+
return remaining;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
|
|
205
|
+
const key = getFamilyCircuitKey(model);
|
|
206
|
+
familyCircuitBreakerState.set(
|
|
207
|
+
key,
|
|
208
|
+
Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function getFamilyMaxCooldownRemainingMs(model: string): number {
|
|
213
|
+
let maxRemaining = 0;
|
|
214
|
+
for (const candidate of getModelFamily(model)) {
|
|
215
|
+
const remaining = getRateLimitRemainingMs(candidate) ?? 0;
|
|
216
|
+
if (remaining > maxRemaining) maxRemaining = remaining;
|
|
217
|
+
}
|
|
218
|
+
return maxRemaining;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function isEntireModelFamilyCoolingDown(model: string): boolean {
|
|
222
|
+
const family = getModelFamily(model);
|
|
223
|
+
return (
|
|
224
|
+
family.length > 0 &&
|
|
225
|
+
family.every((candidate) => isModelRateLimited(candidate))
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function formatRetryAfter(seconds: number): string {
|
|
230
|
+
if (seconds < 60) return `${seconds}s`;
|
|
231
|
+
const mins = Math.floor(seconds / 60);
|
|
232
|
+
const secs = seconds % 60;
|
|
233
|
+
return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async function shapeRequestForModel(model: string): Promise<void> {
|
|
237
|
+
if (!model) return;
|
|
238
|
+
|
|
239
|
+
const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
|
|
240
|
+
let releaseQueue: (() => void) | undefined;
|
|
241
|
+
const currentGate = new Promise<void>((resolve) => {
|
|
242
|
+
releaseQueue = resolve;
|
|
243
|
+
});
|
|
244
|
+
const currentTail = previousTail.then(() => currentGate);
|
|
245
|
+
modelQueueTail.set(model, currentTail);
|
|
246
|
+
|
|
247
|
+
let queueTimeout: ReturnType<typeof setTimeout> | undefined;
|
|
248
|
+
try {
|
|
249
|
+
await Promise.race([
|
|
250
|
+
previousTail,
|
|
251
|
+
new Promise<void>((_, reject) => {
|
|
252
|
+
queueTimeout = setTimeout(() => {
|
|
253
|
+
reject(
|
|
254
|
+
new Error(
|
|
255
|
+
`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
|
|
256
|
+
),
|
|
257
|
+
);
|
|
258
|
+
}, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
|
|
259
|
+
}),
|
|
260
|
+
]);
|
|
261
|
+
|
|
262
|
+
const now = Date.now();
|
|
263
|
+
const bucket = modelTokenBuckets.get(model) ?? {
|
|
264
|
+
tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
|
|
265
|
+
lastRefillAt: now,
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
|
|
269
|
+
const refillTokens =
|
|
270
|
+
(elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
|
|
271
|
+
bucket.tokens = Math.min(
|
|
272
|
+
REQUEST_SHAPING_CONFIG.burstCapacity,
|
|
273
|
+
bucket.tokens + refillTokens,
|
|
274
|
+
);
|
|
275
|
+
bucket.lastRefillAt = now;
|
|
276
|
+
|
|
277
|
+
if (bucket.tokens < 1) {
|
|
278
|
+
const deficit = 1 - bucket.tokens;
|
|
279
|
+
const waitMs = Math.ceil(
|
|
280
|
+
(deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
|
|
281
|
+
);
|
|
282
|
+
if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
|
|
283
|
+
throw new Error(
|
|
284
|
+
`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
log("info", `Local request shaping wait for ${model}`, {
|
|
288
|
+
wait_ms: waitMs,
|
|
289
|
+
});
|
|
290
|
+
await sleep(waitMs);
|
|
291
|
+
bucket.tokens = 0;
|
|
292
|
+
bucket.lastRefillAt = Date.now();
|
|
293
|
+
} else {
|
|
294
|
+
bucket.tokens -= 1;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
modelTokenBuckets.set(model, bucket);
|
|
298
|
+
} finally {
|
|
299
|
+
if (queueTimeout) clearTimeout(queueTimeout);
|
|
300
|
+
releaseQueue?.();
|
|
301
|
+
if (modelQueueTail.get(model) === currentTail) {
|
|
302
|
+
modelQueueTail.delete(model);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
149
307
|
function markModelRateLimited(model: string, cooldownMs: number): void {
|
|
150
308
|
rateLimitState.set(model, {
|
|
151
309
|
rateLimitedUntil: Date.now() + cooldownMs,
|
|
@@ -160,11 +318,16 @@ function markModelRateLimited(model: string, cooldownMs: number): void {
|
|
|
160
318
|
* Find the next available fallback model in the same family.
|
|
161
319
|
* Skips models that are themselves rate-limited.
|
|
162
320
|
*/
|
|
163
|
-
function getNextFallbackModel(
|
|
321
|
+
function getNextFallbackModel(
|
|
322
|
+
model: string,
|
|
323
|
+
attemptedModels: Set<string>,
|
|
324
|
+
): string | null {
|
|
164
325
|
const chain = MODEL_FALLBACK_CHAINS[model];
|
|
165
326
|
if (!chain) return null;
|
|
166
327
|
for (const fallback of chain) {
|
|
167
|
-
if (!isModelRateLimited(fallback))
|
|
328
|
+
if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
|
|
329
|
+
return fallback;
|
|
330
|
+
}
|
|
168
331
|
}
|
|
169
332
|
return null;
|
|
170
333
|
}
|
|
@@ -530,8 +693,33 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
530
693
|
|
|
531
694
|
// Pre-flight: if current model is already known rate-limited, switch to fallback
|
|
532
695
|
let activeFinalInit: RequestInit = finalInit;
|
|
696
|
+
const attemptedModels = new Set<string>();
|
|
697
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
698
|
+
const requestedModel = currentModel;
|
|
699
|
+
if (currentModel) {
|
|
700
|
+
const circuitRemainingMs =
|
|
701
|
+
getFamilyCircuitRemainingMs(currentModel);
|
|
702
|
+
if (circuitRemainingMs > 0) {
|
|
703
|
+
if (
|
|
704
|
+
circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
705
|
+
) {
|
|
706
|
+
log(
|
|
707
|
+
"info",
|
|
708
|
+
`Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
|
|
709
|
+
);
|
|
710
|
+
await sleep(circuitRemainingMs);
|
|
711
|
+
} else {
|
|
712
|
+
throw new Error(
|
|
713
|
+
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
}
|
|
533
718
|
if (currentModel && isModelRateLimited(currentModel)) {
|
|
534
|
-
const fallback = getNextFallbackModel(
|
|
719
|
+
const fallback = getNextFallbackModel(
|
|
720
|
+
currentModel,
|
|
721
|
+
attemptedModels,
|
|
722
|
+
);
|
|
535
723
|
if (fallback) {
|
|
536
724
|
log(
|
|
537
725
|
"info",
|
|
@@ -540,6 +728,26 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
540
728
|
activeFinalInit =
|
|
541
729
|
swapModelInBody(finalInit, fallback) || finalInit;
|
|
542
730
|
currentModel = fallback;
|
|
731
|
+
attemptedModels.add(fallback);
|
|
732
|
+
} else {
|
|
733
|
+
const familyCooldownMs =
|
|
734
|
+
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
735
|
+
openFamilyCircuitBreaker(currentModel, familyCooldownMs);
|
|
736
|
+
if (
|
|
737
|
+
familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
738
|
+
) {
|
|
739
|
+
log(
|
|
740
|
+
"info",
|
|
741
|
+
`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
|
|
742
|
+
);
|
|
743
|
+
await sleep(familyCooldownMs);
|
|
744
|
+
attemptedModels.clear();
|
|
745
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
746
|
+
} else {
|
|
747
|
+
throw new Error(
|
|
748
|
+
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
749
|
+
);
|
|
750
|
+
}
|
|
543
751
|
}
|
|
544
752
|
}
|
|
545
753
|
|
|
@@ -547,12 +755,20 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
547
755
|
let lastError: Error | undefined;
|
|
548
756
|
let fallbacksUsed = 0;
|
|
549
757
|
let attempt = 0;
|
|
758
|
+
let recoveryCyclesUsed = 0;
|
|
550
759
|
|
|
551
760
|
while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
|
|
552
761
|
try {
|
|
762
|
+
if (currentModel) {
|
|
763
|
+
await shapeRequestForModel(currentModel);
|
|
764
|
+
}
|
|
553
765
|
const response = await fetch(input, activeFinalInit);
|
|
554
766
|
|
|
555
767
|
if (response.status === 429) {
|
|
768
|
+
try {
|
|
769
|
+
await response.body?.cancel();
|
|
770
|
+
} catch {}
|
|
771
|
+
|
|
556
772
|
// Parse Retry-After header for server-suggested cooldown
|
|
557
773
|
const retryAfterMs = parseRetryAfter(response);
|
|
558
774
|
const cooldownMs =
|
|
@@ -568,7 +784,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
568
784
|
currentModel &&
|
|
569
785
|
fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
|
|
570
786
|
) {
|
|
571
|
-
const fallback = getNextFallbackModel(
|
|
787
|
+
const fallback = getNextFallbackModel(
|
|
788
|
+
currentModel,
|
|
789
|
+
attemptedModels,
|
|
790
|
+
);
|
|
572
791
|
if (fallback) {
|
|
573
792
|
log(
|
|
574
793
|
"warn",
|
|
@@ -583,6 +802,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
583
802
|
swapModelInBody(activeFinalInit, fallback) ||
|
|
584
803
|
activeFinalInit;
|
|
585
804
|
currentModel = fallback;
|
|
805
|
+
attemptedModels.add(fallback);
|
|
586
806
|
fallbacksUsed++;
|
|
587
807
|
continue; // Retry immediately with new model, no delay
|
|
588
808
|
}
|
|
@@ -590,10 +810,41 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
590
810
|
|
|
591
811
|
// No fallback available — use exponential backoff on same model
|
|
592
812
|
if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
813
|
+
if (
|
|
814
|
+
currentModel &&
|
|
815
|
+
isEntireModelFamilyCoolingDown(currentModel)
|
|
816
|
+
) {
|
|
817
|
+
const familyCooldownMs =
|
|
818
|
+
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
819
|
+
openFamilyCircuitBreaker(currentModel, familyCooldownMs);
|
|
820
|
+
if (
|
|
821
|
+
familyCooldownMs <=
|
|
822
|
+
CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
823
|
+
) {
|
|
824
|
+
log(
|
|
825
|
+
"info",
|
|
826
|
+
`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
|
|
827
|
+
);
|
|
828
|
+
await sleep(familyCooldownMs);
|
|
829
|
+
attemptedModels.clear();
|
|
830
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
831
|
+
attempt++;
|
|
832
|
+
continue;
|
|
833
|
+
}
|
|
834
|
+
throw new Error(
|
|
835
|
+
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
836
|
+
);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
const modelCooldownMs = currentModel
|
|
840
|
+
? getRateLimitRemainingMs(currentModel)
|
|
841
|
+
: null;
|
|
842
|
+
const delay = Math.min(
|
|
843
|
+
modelCooldownMs ??
|
|
844
|
+
retryAfterMs ??
|
|
845
|
+
calculateRetryDelay(attempt),
|
|
846
|
+
RATE_LIMIT_CONFIG.maxDelayMs,
|
|
847
|
+
);
|
|
597
848
|
log(
|
|
598
849
|
"warn",
|
|
599
850
|
`Rate limited (429), no fallback available, waiting ${delay}ms`,
|
|
@@ -605,11 +856,47 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
605
856
|
},
|
|
606
857
|
);
|
|
607
858
|
await sleep(delay);
|
|
859
|
+
attemptedModels.clear();
|
|
860
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
608
861
|
attempt++;
|
|
609
862
|
continue;
|
|
610
863
|
}
|
|
611
864
|
|
|
612
865
|
// Exhausted retries and fallbacks
|
|
866
|
+
if (currentModel) {
|
|
867
|
+
const familyCooldownMs =
|
|
868
|
+
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
869
|
+
const recoveryDelayMs =
|
|
870
|
+
familyCooldownMs > 0
|
|
871
|
+
? Math.min(
|
|
872
|
+
familyCooldownMs,
|
|
873
|
+
CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
|
|
874
|
+
)
|
|
875
|
+
: calculateRetryDelay(0);
|
|
876
|
+
if (
|
|
877
|
+
recoveryDelayMs > 0 &&
|
|
878
|
+
recoveryCyclesUsed <
|
|
879
|
+
CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
|
|
880
|
+
) {
|
|
881
|
+
recoveryCyclesUsed++;
|
|
882
|
+
log(
|
|
883
|
+
"info",
|
|
884
|
+
`Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
|
|
885
|
+
);
|
|
886
|
+
await sleep(recoveryDelayMs);
|
|
887
|
+
attempt = 0;
|
|
888
|
+
fallbacksUsed = 0;
|
|
889
|
+
if (requestedModel) {
|
|
890
|
+
currentModel = requestedModel;
|
|
891
|
+
activeFinalInit =
|
|
892
|
+
swapModelInBody(finalInit, requestedModel) ||
|
|
893
|
+
finalInit;
|
|
894
|
+
}
|
|
895
|
+
attemptedModels.clear();
|
|
896
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
897
|
+
continue;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
613
900
|
throw new Error(
|
|
614
901
|
`[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
|
|
615
902
|
);
|
|
@@ -621,6 +908,15 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
621
908
|
} catch (error) {
|
|
622
909
|
lastError = error as Error;
|
|
623
910
|
|
|
911
|
+
if (
|
|
912
|
+
lastError.message.includes(
|
|
913
|
+
"All fallback models cooling down",
|
|
914
|
+
) ||
|
|
915
|
+
lastError.message.includes("Local request queue saturated")
|
|
916
|
+
) {
|
|
917
|
+
throw lastError;
|
|
918
|
+
}
|
|
919
|
+
|
|
624
920
|
// Network errors might be transient, retry
|
|
625
921
|
if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
|
|
626
922
|
const delay = calculateRetryDelay(attempt);
|