opencodekit 0.18.14 → 0.18.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,9 +84,108 @@ const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
84
84
  const RATE_LIMIT_CONFIG = {
85
85
  maxRetries: 3,
86
86
  baseDelayMs: 2000, // Start with 2 seconds
87
- maxDelayMs: 30000, // Cap at 30 seconds
87
+ maxDelayMs: 60000, // Cap at 60 seconds
88
+ defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
89
+ maxFallbacks: 4, // Max model fallback switches per request
88
90
  };
89
91
 
92
+ // Per-model rate limit state (in-memory, resets on restart)
93
+ interface RateLimitEntry {
94
+ rateLimitedUntil: number; // Unix timestamp (ms)
95
+ }
96
+ const rateLimitState = new Map<string, RateLimitEntry>();
97
+
98
+ // Model fallback chains: same-family alternatives when a model is rate-limited
99
+ const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
100
+ // Claude family
101
+ "claude-opus-4.6": [
102
+ "claude-opus-4.5",
103
+ "claude-sonnet-4.6",
104
+ "claude-sonnet-4.5",
105
+ ],
106
+ "claude-opus-4.5": [
107
+ "claude-opus-4.6",
108
+ "claude-sonnet-4.5",
109
+ "claude-sonnet-4.6",
110
+ ],
111
+ "claude-sonnet-4.6": [
112
+ "claude-sonnet-4.5",
113
+ "claude-opus-4.6",
114
+ "claude-opus-4.5",
115
+ ],
116
+ "claude-sonnet-4.5": [
117
+ "claude-sonnet-4.6",
118
+ "claude-opus-4.5",
119
+ "claude-opus-4.6",
120
+ ],
121
+ };
122
+
123
+ /**
124
+ * Parse the Retry-After header from a 429 response.
125
+ * Returns cooldown in milliseconds, or null if header is missing/unparseable.
126
+ */
127
+ function parseRetryAfter(response: Response): number | null {
128
+ const header = response.headers.get("retry-after");
129
+ if (!header) return null;
130
+ // Try as seconds first (most common)
131
+ const seconds = parseInt(header, 10);
132
+ if (!isNaN(seconds) && seconds > 0) return seconds * 1000;
133
+ // Try as HTTP date
134
+ const date = Date.parse(header);
135
+ if (!isNaN(date)) return Math.max(0, date - Date.now());
136
+ return null;
137
+ }
138
+
139
+ function isModelRateLimited(model: string): boolean {
140
+ const entry = rateLimitState.get(model);
141
+ if (!entry) return false;
142
+ if (Date.now() >= entry.rateLimitedUntil) {
143
+ rateLimitState.delete(model);
144
+ return false;
145
+ }
146
+ return true;
147
+ }
148
+
149
+ function markModelRateLimited(model: string, cooldownMs: number): void {
150
+ rateLimitState.set(model, {
151
+ rateLimitedUntil: Date.now() + cooldownMs,
152
+ });
153
+ log(
154
+ "info",
155
+ `Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
156
+ );
157
+ }
158
+
159
+ /**
160
+ * Find the next available fallback model in the same family.
161
+ * Skips models that are themselves rate-limited.
162
+ */
163
+ function getNextFallbackModel(model: string): string | null {
164
+ const chain = MODEL_FALLBACK_CHAINS[model];
165
+ if (!chain) return null;
166
+ for (const fallback of chain) {
167
+ if (!isModelRateLimited(fallback)) return fallback;
168
+ }
169
+ return null;
170
+ }
171
+
172
+ /**
173
+ * Swap the model field in a fetch RequestInit body.
174
+ */
175
+ function swapModelInBody(
176
+ init: RequestInit | undefined,
177
+ newModel: string,
178
+ ): RequestInit | undefined {
179
+ if (!init?.body || typeof init.body !== "string") return init;
180
+ try {
181
+ const body = JSON.parse(init.body);
182
+ body.model = newModel;
183
+ return { ...init, body: JSON.stringify(body) };
184
+ } catch {
185
+ return init;
186
+ }
187
+ }
188
+
90
189
  // Maximum length for item IDs in the OpenAI Responses API
91
190
  const MAX_RESPONSE_API_ID_LENGTH = 64;
92
191
  /**
@@ -117,10 +216,16 @@ function sanitizeResponseInputIds(input: any[]): any[] {
117
216
  return input.map((item: any) => {
118
217
  if (!item || typeof item !== "object") return item;
119
218
  const sanitized = { ...item };
120
- if (typeof sanitized.id === "string" && sanitized.id.length > MAX_RESPONSE_API_ID_LENGTH) {
219
+ if (
220
+ typeof sanitized.id === "string" &&
221
+ sanitized.id.length > MAX_RESPONSE_API_ID_LENGTH
222
+ ) {
121
223
  sanitized.id = sanitizeResponseId(sanitized.id);
122
224
  }
123
- if (typeof sanitized.call_id === "string" && sanitized.call_id.length > MAX_RESPONSE_API_ID_LENGTH) {
225
+ if (
226
+ typeof sanitized.call_id === "string" &&
227
+ sanitized.call_id.length > MAX_RESPONSE_API_ID_LENGTH
228
+ ) {
124
229
  sanitized.call_id = sanitizeResponseId(sanitized.call_id);
125
230
  }
126
231
  return sanitized;
@@ -131,7 +236,7 @@ function sanitizeResponseInputIds(input: any[]): any[] {
131
236
  * Retries: 2s, 4s, 8s (with jitter)
132
237
  */
133
238
  function calculateRetryDelay(attempt: number): number {
134
- const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * Math.pow(2, attempt);
239
+ const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * 2 ** attempt;
135
240
  const jitter = Math.random() * 1000; // Add 0-1s random jitter
136
241
  const delay = Math.min(
137
242
  exponentialDelay + jitter,
@@ -185,7 +290,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
185
290
 
186
291
  let isAgentCall = false;
187
292
  let isVisionRequest = false;
188
- let modifiedBody: any = undefined;
293
+ let modifiedBody: any;
189
294
  let isClaudeModel = false;
190
295
 
191
296
  try {
@@ -333,15 +438,18 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
333
438
  // Sanitize long IDs from Copilot backend (can be 400+ chars)
334
439
  // OpenAI Responses API enforces a 64-char max on item IDs
335
440
  const sanitizedInput = sanitizeResponseInputIds(body.input);
336
- const inputWasSanitized = sanitizedInput !== body.input &&
441
+ const inputWasSanitized =
442
+ sanitizedInput !== body.input &&
337
443
  JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
338
444
 
339
445
  if (inputWasSanitized) {
340
446
  log("info", "Sanitized long IDs in Responses API input", {
341
447
  original_count: body.input.filter(
342
448
  (item: any) =>
343
- (typeof item?.id === "string" && item.id.length > MAX_RESPONSE_API_ID_LENGTH) ||
344
- (typeof item?.call_id === "string" && item.call_id.length > MAX_RESPONSE_API_ID_LENGTH),
449
+ (typeof item?.id === "string" &&
450
+ item.id.length > MAX_RESPONSE_API_ID_LENGTH) ||
451
+ (typeof item?.call_id === "string" &&
452
+ item.call_id.length > MAX_RESPONSE_API_ID_LENGTH),
345
453
  ).length,
346
454
  });
347
455
  modifiedBody = {
@@ -410,34 +518,105 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
410
518
  ...(modifiedBody ? { body: JSON.stringify(modifiedBody) } : {}),
411
519
  };
412
520
 
413
- // Retry logic with exponential backoff for rate limiting
521
+ // Extract model from request body for rate limit tracking
522
+ let currentModel = "";
523
+ try {
524
+ const bodyObj =
525
+ typeof finalInit.body === "string"
526
+ ? JSON.parse(finalInit.body)
527
+ : finalInit.body;
528
+ currentModel = bodyObj?.model || "";
529
+ } catch {}
530
+
531
+ // Pre-flight: if current model is already known rate-limited, switch to fallback
532
+ let activeFinalInit: RequestInit = finalInit;
533
+ if (currentModel && isModelRateLimited(currentModel)) {
534
+ const fallback = getNextFallbackModel(currentModel);
535
+ if (fallback) {
536
+ log(
537
+ "info",
538
+ `Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
539
+ );
540
+ activeFinalInit =
541
+ swapModelInBody(finalInit, fallback) || finalInit;
542
+ currentModel = fallback;
543
+ }
544
+ }
545
+
546
+ // Retry logic with model fallback and exponential backoff for rate limiting
414
547
  let lastError: Error | undefined;
415
- for (
416
- let attempt = 0;
417
- attempt <= RATE_LIMIT_CONFIG.maxRetries;
418
- attempt++
419
- ) {
548
+ let fallbacksUsed = 0;
549
+ let attempt = 0;
550
+
551
+ while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
420
552
  try {
421
- const response = await fetch(input, finalInit);
553
+ const response = await fetch(input, activeFinalInit);
422
554
 
423
- // If we get a 429, retry with backoff
424
- if (
425
- response.status === 429 &&
426
- attempt < RATE_LIMIT_CONFIG.maxRetries
427
- ) {
428
- const delay = calculateRetryDelay(attempt);
429
- log("warn", `Rate limited (429), retrying`, {
430
- delay_ms: delay,
431
- attempt: attempt + 1,
432
- max_retries: RATE_LIMIT_CONFIG.maxRetries,
433
- });
434
- await sleep(delay);
435
- continue;
555
+ if (response.status === 429) {
556
+ // Parse Retry-After header for server-suggested cooldown
557
+ const retryAfterMs = parseRetryAfter(response);
558
+ const cooldownMs =
559
+ retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
560
+
561
+ // Mark this model as rate-limited
562
+ if (currentModel) {
563
+ markModelRateLimited(currentModel, cooldownMs);
564
+ }
565
+
566
+ // Try fallback model (doesn't count against retry budget)
567
+ if (
568
+ currentModel &&
569
+ fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
570
+ ) {
571
+ const fallback = getNextFallbackModel(currentModel);
572
+ if (fallback) {
573
+ log(
574
+ "warn",
575
+ `Rate limited on ${currentModel}, switching to ${fallback}`,
576
+ {
577
+ retry_after_ms: retryAfterMs,
578
+ cooldown_ms: cooldownMs,
579
+ fallbacks_used: fallbacksUsed + 1,
580
+ },
581
+ );
582
+ activeFinalInit =
583
+ swapModelInBody(activeFinalInit, fallback) ||
584
+ activeFinalInit;
585
+ currentModel = fallback;
586
+ fallbacksUsed++;
587
+ continue; // Retry immediately with new model, no delay
588
+ }
589
+ }
590
+
591
+ // No fallback available — use exponential backoff on same model
592
+ if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
593
+ const delay =
594
+ retryAfterMs != null
595
+ ? Math.min(retryAfterMs, RATE_LIMIT_CONFIG.maxDelayMs)
596
+ : calculateRetryDelay(attempt);
597
+ log(
598
+ "warn",
599
+ `Rate limited (429), no fallback available, waiting ${delay}ms`,
600
+ {
601
+ delay_ms: delay,
602
+ attempt: attempt + 1,
603
+ max_retries: RATE_LIMIT_CONFIG.maxRetries,
604
+ fallbacks_exhausted: true,
605
+ },
606
+ );
607
+ await sleep(delay);
608
+ attempt++;
609
+ continue;
610
+ }
611
+
612
+ // Exhausted retries and fallbacks
613
+ throw new Error(
614
+ `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
615
+ );
436
616
  }
437
617
 
438
- // Response transformation is now handled by the custom SDK at
439
- // .opencode/plugin/sdk/copilot/ which properly parses reasoning_text/reasoning_opaque
440
- // and converts them to AI SDK's reasoning content parts
618
+ // Response transformation is handled by the custom SDK at
619
+ // .opencode/plugin/sdk/copilot/
441
620
  return response;
442
621
  } catch (error) {
443
622
  lastError = error as Error;
@@ -452,20 +631,19 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
452
631
  error: lastError.message,
453
632
  });
454
633
  await sleep(delay);
634
+ attempt++;
455
635
  continue;
456
636
  }
457
637
  throw error;
458
638
  }
459
639
  }
460
640
 
461
- // If we've exhausted all retries, throw the last error
641
+ // Exhausted all retries
462
642
  if (lastError) {
463
643
  throw new Error(
464
644
  `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
465
645
  );
466
646
  }
467
-
468
- // This should not be reached, but just in case
469
647
  throw new Error(
470
648
  `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
471
649
  );
@@ -626,7 +804,6 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
626
804
  await sleep(
627
805
  deviceData.interval * 1000 + OAUTH_POLLING_SAFETY_MARGIN_MS,
628
806
  );
629
- continue;
630
807
  }
631
808
  },
632
809
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencodekit",
3
- "version": "0.18.14",
3
+ "version": "0.18.16",
4
4
  "description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
5
5
  "keywords": [
6
6
  "agents",