opencodekit 0.18.16 → 0.18.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,7 +20,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
20
20
 
21
21
  //#endregion
22
22
  //#region package.json
23
- var version = "0.18.16";
23
+ var version = "0.18.17";
24
24
 
25
25
  //#endregion
26
26
  //#region src/utils/license.ts
Binary file
@@ -89,11 +89,31 @@ const RATE_LIMIT_CONFIG = {
89
89
  maxFallbacks: 4, // Max model fallback switches per request
90
90
  };
91
91
 
92
+ // Local request shaping to smooth bursts before they hit Copilot limits
93
+ const REQUEST_SHAPING_CONFIG = {
94
+ tokensPerSecond: 1,
95
+ burstCapacity: 2,
96
+ maxQueueDelayMs: 15000,
97
+ };
98
+
99
+ const CIRCUIT_BREAKER_CONFIG = {
100
+ maxInlineWaitMs: 30000,
101
+ maxRecoveryCycles: 3,
102
+ };
103
+
92
104
  // Per-model rate limit state (in-memory, resets on restart)
93
105
  interface RateLimitEntry {
94
106
  rateLimitedUntil: number; // Unix timestamp (ms)
95
107
  }
96
108
  const rateLimitState = new Map<string, RateLimitEntry>();
109
+ const familyCircuitBreakerState = new Map<string, number>();
110
+
111
+ interface TokenBucketState {
112
+ tokens: number;
113
+ lastRefillAt: number;
114
+ }
115
+ const modelTokenBuckets = new Map<string, TokenBucketState>();
116
+ const modelQueueTail = new Map<string, Promise<void>>();
97
117
 
98
118
  // Model fallback chains: same-family alternatives when a model is rate-limited
99
119
  const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
@@ -146,6 +166,144 @@ function isModelRateLimited(model: string): boolean {
146
166
  return true;
147
167
  }
148
168
 
169
+ function getRateLimitRemainingMs(model: string): number | null {
170
+ const entry = rateLimitState.get(model);
171
+ if (!entry) return null;
172
+ const remaining = entry.rateLimitedUntil - Date.now();
173
+ if (remaining <= 0) {
174
+ rateLimitState.delete(model);
175
+ return null;
176
+ }
177
+ return remaining;
178
+ }
179
+
180
+ function getModelFamily(model: string): string[] {
181
+ const family = new Set<string>([
182
+ model,
183
+ ...(MODEL_FALLBACK_CHAINS[model] || []),
184
+ ]);
185
+ return [...family];
186
+ }
187
+
188
+ function getFamilyCircuitKey(model: string): string {
189
+ return getModelFamily(model).sort().join("|");
190
+ }
191
+
192
+ function getFamilyCircuitRemainingMs(model: string): number {
193
+ const key = getFamilyCircuitKey(model);
194
+ const until = familyCircuitBreakerState.get(key);
195
+ if (!until) return 0;
196
+ const remaining = until - Date.now();
197
+ if (remaining <= 0) {
198
+ familyCircuitBreakerState.delete(key);
199
+ return 0;
200
+ }
201
+ return remaining;
202
+ }
203
+
204
+ function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
205
+ const key = getFamilyCircuitKey(model);
206
+ familyCircuitBreakerState.set(
207
+ key,
208
+ Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
209
+ );
210
+ }
211
+
212
+ function getFamilyMaxCooldownRemainingMs(model: string): number {
213
+ let maxRemaining = 0;
214
+ for (const candidate of getModelFamily(model)) {
215
+ const remaining = getRateLimitRemainingMs(candidate) ?? 0;
216
+ if (remaining > maxRemaining) maxRemaining = remaining;
217
+ }
218
+ return maxRemaining;
219
+ }
220
+
221
+ function isEntireModelFamilyCoolingDown(model: string): boolean {
222
+ const family = getModelFamily(model);
223
+ return (
224
+ family.length > 0 &&
225
+ family.every((candidate) => isModelRateLimited(candidate))
226
+ );
227
+ }
228
+
229
+ function formatRetryAfter(seconds: number): string {
230
+ if (seconds < 60) return `${seconds}s`;
231
+ const mins = Math.floor(seconds / 60);
232
+ const secs = seconds % 60;
233
+ return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
234
+ }
235
+
236
+ async function shapeRequestForModel(model: string): Promise<void> {
237
+ if (!model) return;
238
+
239
+ const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
240
+ let releaseQueue: (() => void) | undefined;
241
+ const currentGate = new Promise<void>((resolve) => {
242
+ releaseQueue = resolve;
243
+ });
244
+ const currentTail = previousTail.then(() => currentGate);
245
+ modelQueueTail.set(model, currentTail);
246
+
247
+ let queueTimeout: ReturnType<typeof setTimeout> | undefined;
248
+ try {
249
+ await Promise.race([
250
+ previousTail,
251
+ new Promise<void>((_, reject) => {
252
+ queueTimeout = setTimeout(() => {
253
+ reject(
254
+ new Error(
255
+ `[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
256
+ ),
257
+ );
258
+ }, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
259
+ }),
260
+ ]);
261
+
262
+ const now = Date.now();
263
+ const bucket = modelTokenBuckets.get(model) ?? {
264
+ tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
265
+ lastRefillAt: now,
266
+ };
267
+
268
+ const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
269
+ const refillTokens =
270
+ (elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
271
+ bucket.tokens = Math.min(
272
+ REQUEST_SHAPING_CONFIG.burstCapacity,
273
+ bucket.tokens + refillTokens,
274
+ );
275
+ bucket.lastRefillAt = now;
276
+
277
+ if (bucket.tokens < 1) {
278
+ const deficit = 1 - bucket.tokens;
279
+ const waitMs = Math.ceil(
280
+ (deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
281
+ );
282
+ if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
283
+ throw new Error(
284
+ `[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
285
+ );
286
+ }
287
+ log("info", `Local request shaping wait for ${model}`, {
288
+ wait_ms: waitMs,
289
+ });
290
+ await sleep(waitMs);
291
+ bucket.tokens = 0;
292
+ bucket.lastRefillAt = Date.now();
293
+ } else {
294
+ bucket.tokens -= 1;
295
+ }
296
+
297
+ modelTokenBuckets.set(model, bucket);
298
+ } finally {
299
+ if (queueTimeout) clearTimeout(queueTimeout);
300
+ releaseQueue?.();
301
+ if (modelQueueTail.get(model) === currentTail) {
302
+ modelQueueTail.delete(model);
303
+ }
304
+ }
305
+ }
306
+
149
307
  function markModelRateLimited(model: string, cooldownMs: number): void {
150
308
  rateLimitState.set(model, {
151
309
  rateLimitedUntil: Date.now() + cooldownMs,
@@ -160,11 +318,16 @@ function markModelRateLimited(model: string, cooldownMs: number): void {
160
318
  * Find the next available fallback model in the same family.
161
319
  * Skips models that are themselves rate-limited.
162
320
  */
163
- function getNextFallbackModel(model: string): string | null {
321
+ function getNextFallbackModel(
322
+ model: string,
323
+ attemptedModels: Set<string>,
324
+ ): string | null {
164
325
  const chain = MODEL_FALLBACK_CHAINS[model];
165
326
  if (!chain) return null;
166
327
  for (const fallback of chain) {
167
- if (!isModelRateLimited(fallback)) return fallback;
328
+ if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
329
+ return fallback;
330
+ }
168
331
  }
169
332
  return null;
170
333
  }
@@ -530,8 +693,33 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
530
693
 
531
694
  // Pre-flight: if current model is already known rate-limited, switch to fallback
532
695
  let activeFinalInit: RequestInit = finalInit;
696
+ const attemptedModels = new Set<string>();
697
+ if (currentModel) attemptedModels.add(currentModel);
698
+ const requestedModel = currentModel;
699
+ if (currentModel) {
700
+ const circuitRemainingMs =
701
+ getFamilyCircuitRemainingMs(currentModel);
702
+ if (circuitRemainingMs > 0) {
703
+ if (
704
+ circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
705
+ ) {
706
+ log(
707
+ "info",
708
+ `Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
709
+ );
710
+ await sleep(circuitRemainingMs);
711
+ } else {
712
+ throw new Error(
713
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
714
+ );
715
+ }
716
+ }
717
+ }
533
718
  if (currentModel && isModelRateLimited(currentModel)) {
534
- const fallback = getNextFallbackModel(currentModel);
719
+ const fallback = getNextFallbackModel(
720
+ currentModel,
721
+ attemptedModels,
722
+ );
535
723
  if (fallback) {
536
724
  log(
537
725
  "info",
@@ -540,6 +728,26 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
540
728
  activeFinalInit =
541
729
  swapModelInBody(finalInit, fallback) || finalInit;
542
730
  currentModel = fallback;
731
+ attemptedModels.add(fallback);
732
+ } else {
733
+ const familyCooldownMs =
734
+ getFamilyMaxCooldownRemainingMs(currentModel);
735
+ openFamilyCircuitBreaker(currentModel, familyCooldownMs);
736
+ if (
737
+ familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
738
+ ) {
739
+ log(
740
+ "info",
741
+ `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
742
+ );
743
+ await sleep(familyCooldownMs);
744
+ attemptedModels.clear();
745
+ if (currentModel) attemptedModels.add(currentModel);
746
+ } else {
747
+ throw new Error(
748
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
749
+ );
750
+ }
543
751
  }
544
752
  }
545
753
 
@@ -547,12 +755,20 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
547
755
  let lastError: Error | undefined;
548
756
  let fallbacksUsed = 0;
549
757
  let attempt = 0;
758
+ let recoveryCyclesUsed = 0;
550
759
 
551
760
  while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
552
761
  try {
762
+ if (currentModel) {
763
+ await shapeRequestForModel(currentModel);
764
+ }
553
765
  const response = await fetch(input, activeFinalInit);
554
766
 
555
767
  if (response.status === 429) {
768
+ try {
769
+ await response.body?.cancel();
770
+ } catch {}
771
+
556
772
  // Parse Retry-After header for server-suggested cooldown
557
773
  const retryAfterMs = parseRetryAfter(response);
558
774
  const cooldownMs =
@@ -568,7 +784,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
568
784
  currentModel &&
569
785
  fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
570
786
  ) {
571
- const fallback = getNextFallbackModel(currentModel);
787
+ const fallback = getNextFallbackModel(
788
+ currentModel,
789
+ attemptedModels,
790
+ );
572
791
  if (fallback) {
573
792
  log(
574
793
  "warn",
@@ -583,6 +802,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
583
802
  swapModelInBody(activeFinalInit, fallback) ||
584
803
  activeFinalInit;
585
804
  currentModel = fallback;
805
+ attemptedModels.add(fallback);
586
806
  fallbacksUsed++;
587
807
  continue; // Retry immediately with new model, no delay
588
808
  }
@@ -590,10 +810,41 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
590
810
 
591
811
  // No fallback available — use exponential backoff on same model
592
812
  if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
593
- const delay =
594
- retryAfterMs != null
595
- ? Math.min(retryAfterMs, RATE_LIMIT_CONFIG.maxDelayMs)
596
- : calculateRetryDelay(attempt);
813
+ if (
814
+ currentModel &&
815
+ isEntireModelFamilyCoolingDown(currentModel)
816
+ ) {
817
+ const familyCooldownMs =
818
+ getFamilyMaxCooldownRemainingMs(currentModel);
819
+ openFamilyCircuitBreaker(currentModel, familyCooldownMs);
820
+ if (
821
+ familyCooldownMs <=
822
+ CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
823
+ ) {
824
+ log(
825
+ "info",
826
+ `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
827
+ );
828
+ await sleep(familyCooldownMs);
829
+ attemptedModels.clear();
830
+ if (currentModel) attemptedModels.add(currentModel);
831
+ attempt++;
832
+ continue;
833
+ }
834
+ throw new Error(
835
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
836
+ );
837
+ }
838
+
839
+ const modelCooldownMs = currentModel
840
+ ? getRateLimitRemainingMs(currentModel)
841
+ : null;
842
+ const delay = Math.min(
843
+ modelCooldownMs ??
844
+ retryAfterMs ??
845
+ calculateRetryDelay(attempt),
846
+ RATE_LIMIT_CONFIG.maxDelayMs,
847
+ );
597
848
  log(
598
849
  "warn",
599
850
  `Rate limited (429), no fallback available, waiting ${delay}ms`,
@@ -605,11 +856,47 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
605
856
  },
606
857
  );
607
858
  await sleep(delay);
859
+ attemptedModels.clear();
860
+ if (currentModel) attemptedModels.add(currentModel);
608
861
  attempt++;
609
862
  continue;
610
863
  }
611
864
 
612
865
  // Exhausted retries and fallbacks
866
+ if (currentModel) {
867
+ const familyCooldownMs =
868
+ getFamilyMaxCooldownRemainingMs(currentModel);
869
+ const recoveryDelayMs =
870
+ familyCooldownMs > 0
871
+ ? Math.min(
872
+ familyCooldownMs,
873
+ CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
874
+ )
875
+ : calculateRetryDelay(0);
876
+ if (
877
+ recoveryDelayMs > 0 &&
878
+ recoveryCyclesUsed <
879
+ CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
880
+ ) {
881
+ recoveryCyclesUsed++;
882
+ log(
883
+ "info",
884
+ `Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
885
+ );
886
+ await sleep(recoveryDelayMs);
887
+ attempt = 0;
888
+ fallbacksUsed = 0;
889
+ if (requestedModel) {
890
+ currentModel = requestedModel;
891
+ activeFinalInit =
892
+ swapModelInBody(finalInit, requestedModel) ||
893
+ finalInit;
894
+ }
895
+ attemptedModels.clear();
896
+ if (currentModel) attemptedModels.add(currentModel);
897
+ continue;
898
+ }
899
+ }
613
900
  throw new Error(
614
901
  `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
615
902
  );
@@ -621,6 +908,15 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
621
908
  } catch (error) {
622
909
  lastError = error as Error;
623
910
 
911
+ if (
912
+ lastError.message.includes(
913
+ "All fallback models cooling down",
914
+ ) ||
915
+ lastError.message.includes("Local request queue saturated")
916
+ ) {
917
+ throw lastError;
918
+ }
919
+
624
920
  // Network errors might be transient, retry
625
921
  if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
626
922
  const delay = calculateRetryDelay(attempt);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencodekit",
3
- "version": "0.18.16",
3
+ "version": "0.18.17",
4
4
  "description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
5
5
  "keywords": [
6
6
  "agents",