opencodekit 0.18.16 → 0.18.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,7 +20,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
20
20
 
21
21
  //#endregion
22
22
  //#region package.json
23
- var version = "0.18.16";
23
+ var version = "0.18.18";
24
24
 
25
25
  //#endregion
26
26
  //#region src/utils/license.ts
Binary file
@@ -161,8 +161,8 @@
161
161
  "claude-haiku-4.5": {
162
162
  "attachment": true,
163
163
  "limit": {
164
- "context": 200000,
165
- "output": 64000
164
+ "context": 144000,
165
+ "output": 32000
166
166
  },
167
167
  "options": {
168
168
  "thinking_budget": 10000,
@@ -174,13 +174,13 @@
174
174
  "variants": {
175
175
  "high": {
176
176
  "options": {
177
- "thinking_budget": 8000,
177
+ "thinking_budget": 16000,
178
178
  "type": "enabled"
179
179
  }
180
180
  },
181
181
  "max": {
182
182
  "options": {
183
- "thinking_budget": 16000,
183
+ "thinking_budget": 32000,
184
184
  "type": "enabled"
185
185
  }
186
186
  }
@@ -189,8 +189,8 @@
189
189
  "claude-opus-4.5": {
190
190
  "attachment": true,
191
191
  "limit": {
192
- "context": 200000,
193
- "output": 64000
192
+ "context": 160000,
193
+ "output": 32000
194
194
  },
195
195
  "options": {
196
196
  "thinking_budget": 10000
@@ -201,12 +201,12 @@
201
201
  "variants": {
202
202
  "high": {
203
203
  "options": {
204
- "thinking_budget": 8000
204
+ "thinking_budget": 16000
205
205
  }
206
206
  },
207
207
  "max": {
208
208
  "options": {
209
- "thinking_budget": 16000
209
+ "thinking_budget": 32000
210
210
  }
211
211
  }
212
212
  }
@@ -214,7 +214,7 @@
214
214
  "claude-opus-4.6": {
215
215
  "attachment": true,
216
216
  "limit": {
217
- "context": 200000,
217
+ "context": 144000,
218
218
  "output": 64000
219
219
  },
220
220
  "options": {
@@ -259,21 +259,33 @@
259
259
  "claude-sonnet-4": {
260
260
  "attachment": true,
261
261
  "limit": {
262
- "context": 200000,
263
- "output": 64000
262
+ "context": 216000,
263
+ "output": 16000
264
264
  },
265
265
  "options": {
266
266
  "thinking_budget": 10000
267
267
  },
268
268
  "reasoning": true,
269
269
  "temperature": true,
270
- "tool_call": true
270
+ "tool_call": true,
271
+ "variants": {
272
+ "high": {
273
+ "options": {
274
+ "thinking_budget": 16000
275
+ }
276
+ },
277
+ "max": {
278
+ "options": {
279
+ "thinking_budget": 32000
280
+ }
281
+ }
282
+ }
271
283
  },
272
284
  "claude-sonnet-4.5": {
273
285
  "attachment": true,
274
286
  "limit": {
275
- "context": 200000,
276
- "output": 64000
287
+ "context": 144000,
288
+ "output": 32000
277
289
  },
278
290
  "options": {
279
291
  "thinking_budget": 10000
@@ -284,12 +296,12 @@
284
296
  "variants": {
285
297
  "high": {
286
298
  "options": {
287
- "thinking_budget": 8000
299
+ "thinking_budget": 16000
288
300
  }
289
301
  },
290
302
  "max": {
291
303
  "options": {
292
- "thinking_budget": 16000
304
+ "thinking_budget": 32000
293
305
  }
294
306
  }
295
307
  }
@@ -298,7 +310,7 @@
298
310
  "attachment": true,
299
311
  "limit": {
300
312
  "context": 200000,
301
- "output": 64000
313
+ "output": 32000
302
314
  },
303
315
  "options": {
304
316
  "thinking": {
@@ -89,11 +89,31 @@ const RATE_LIMIT_CONFIG = {
89
89
  maxFallbacks: 4, // Max model fallback switches per request
90
90
  };
91
91
 
92
+ // Local request shaping to smooth bursts before they hit Copilot limits
93
+ const REQUEST_SHAPING_CONFIG = {
94
+ tokensPerSecond: 1,
95
+ burstCapacity: 2,
96
+ maxQueueDelayMs: 15000,
97
+ };
98
+
99
+ const CIRCUIT_BREAKER_CONFIG = {
100
+ maxInlineWaitMs: 30000,
101
+ maxRecoveryCycles: 3,
102
+ };
103
+
92
104
  // Per-model rate limit state (in-memory, resets on restart)
93
105
  interface RateLimitEntry {
94
106
  rateLimitedUntil: number; // Unix timestamp (ms)
95
107
  }
96
108
  const rateLimitState = new Map<string, RateLimitEntry>();
109
+ const familyCircuitBreakerState = new Map<string, number>();
110
+
111
+ interface TokenBucketState {
112
+ tokens: number;
113
+ lastRefillAt: number;
114
+ }
115
+ const modelTokenBuckets = new Map<string, TokenBucketState>();
116
+ const modelQueueTail = new Map<string, Promise<void>>();
97
117
 
98
118
  // Model fallback chains: same-family alternatives when a model is rate-limited
99
119
  const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
@@ -146,6 +166,144 @@ function isModelRateLimited(model: string): boolean {
146
166
  return true;
147
167
  }
148
168
 
169
+ function getRateLimitRemainingMs(model: string): number | null {
170
+ const entry = rateLimitState.get(model);
171
+ if (!entry) return null;
172
+ const remaining = entry.rateLimitedUntil - Date.now();
173
+ if (remaining <= 0) {
174
+ rateLimitState.delete(model);
175
+ return null;
176
+ }
177
+ return remaining;
178
+ }
179
+
180
+ function getModelFamily(model: string): string[] {
181
+ const family = new Set<string>([
182
+ model,
183
+ ...(MODEL_FALLBACK_CHAINS[model] || []),
184
+ ]);
185
+ return [...family];
186
+ }
187
+
188
+ function getFamilyCircuitKey(model: string): string {
189
+ return getModelFamily(model).sort().join("|");
190
+ }
191
+
192
+ function getFamilyCircuitRemainingMs(model: string): number {
193
+ const key = getFamilyCircuitKey(model);
194
+ const until = familyCircuitBreakerState.get(key);
195
+ if (!until) return 0;
196
+ const remaining = until - Date.now();
197
+ if (remaining <= 0) {
198
+ familyCircuitBreakerState.delete(key);
199
+ return 0;
200
+ }
201
+ return remaining;
202
+ }
203
+
204
+ function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
205
+ const key = getFamilyCircuitKey(model);
206
+ familyCircuitBreakerState.set(
207
+ key,
208
+ Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
209
+ );
210
+ }
211
+
212
+ function getFamilyMaxCooldownRemainingMs(model: string): number {
213
+ let maxRemaining = 0;
214
+ for (const candidate of getModelFamily(model)) {
215
+ const remaining = getRateLimitRemainingMs(candidate) ?? 0;
216
+ if (remaining > maxRemaining) maxRemaining = remaining;
217
+ }
218
+ return maxRemaining;
219
+ }
220
+
221
+ function isEntireModelFamilyCoolingDown(model: string): boolean {
222
+ const family = getModelFamily(model);
223
+ return (
224
+ family.length > 0 &&
225
+ family.every((candidate) => isModelRateLimited(candidate))
226
+ );
227
+ }
228
+
229
+ function formatRetryAfter(seconds: number): string {
230
+ if (seconds < 60) return `${seconds}s`;
231
+ const mins = Math.floor(seconds / 60);
232
+ const secs = seconds % 60;
233
+ return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
234
+ }
235
+
236
+ async function shapeRequestForModel(model: string): Promise<void> {
237
+ if (!model) return;
238
+
239
+ const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
240
+ let releaseQueue: (() => void) | undefined;
241
+ const currentGate = new Promise<void>((resolve) => {
242
+ releaseQueue = resolve;
243
+ });
244
+ const currentTail = previousTail.then(() => currentGate);
245
+ modelQueueTail.set(model, currentTail);
246
+
247
+ let queueTimeout: ReturnType<typeof setTimeout> | undefined;
248
+ try {
249
+ await Promise.race([
250
+ previousTail,
251
+ new Promise<void>((_, reject) => {
252
+ queueTimeout = setTimeout(() => {
253
+ reject(
254
+ new Error(
255
+ `[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
256
+ ),
257
+ );
258
+ }, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
259
+ }),
260
+ ]);
261
+
262
+ const now = Date.now();
263
+ const bucket = modelTokenBuckets.get(model) ?? {
264
+ tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
265
+ lastRefillAt: now,
266
+ };
267
+
268
+ const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
269
+ const refillTokens =
270
+ (elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
271
+ bucket.tokens = Math.min(
272
+ REQUEST_SHAPING_CONFIG.burstCapacity,
273
+ bucket.tokens + refillTokens,
274
+ );
275
+ bucket.lastRefillAt = now;
276
+
277
+ if (bucket.tokens < 1) {
278
+ const deficit = 1 - bucket.tokens;
279
+ const waitMs = Math.ceil(
280
+ (deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
281
+ );
282
+ if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
283
+ throw new Error(
284
+ `[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
285
+ );
286
+ }
287
+ log("info", `Local request shaping wait for ${model}`, {
288
+ wait_ms: waitMs,
289
+ });
290
+ await sleep(waitMs);
291
+ bucket.tokens = 0;
292
+ bucket.lastRefillAt = Date.now();
293
+ } else {
294
+ bucket.tokens -= 1;
295
+ }
296
+
297
+ modelTokenBuckets.set(model, bucket);
298
+ } finally {
299
+ if (queueTimeout) clearTimeout(queueTimeout);
300
+ releaseQueue?.();
301
+ if (modelQueueTail.get(model) === currentTail) {
302
+ modelQueueTail.delete(model);
303
+ }
304
+ }
305
+ }
306
+
149
307
  function markModelRateLimited(model: string, cooldownMs: number): void {
150
308
  rateLimitState.set(model, {
151
309
  rateLimitedUntil: Date.now() + cooldownMs,
@@ -160,11 +318,16 @@ function markModelRateLimited(model: string, cooldownMs: number): void {
160
318
  * Find the next available fallback model in the same family.
161
319
  * Skips models that are themselves rate-limited.
162
320
  */
163
- function getNextFallbackModel(model: string): string | null {
321
+ function getNextFallbackModel(
322
+ model: string,
323
+ attemptedModels: Set<string>,
324
+ ): string | null {
164
325
  const chain = MODEL_FALLBACK_CHAINS[model];
165
326
  if (!chain) return null;
166
327
  for (const fallback of chain) {
167
- if (!isModelRateLimited(fallback)) return fallback;
328
+ if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
329
+ return fallback;
330
+ }
168
331
  }
169
332
  return null;
170
333
  }
@@ -192,20 +355,29 @@ const MAX_RESPONSE_API_ID_LENGTH = 64;
192
355
  * Sanitize an ID to fit within the Responses API 64-char limit.
193
356
  * GitHub Copilot returns proprietary long IDs (400+ chars) that violate
194
357
  * the OpenAI spec. We hash them to a deterministic 64-char string.
358
+ * Preserves the original prefix (e.g., "fc_", "msg_", "call_") so that
359
+ * OpenAI's prefix validation passes.
195
360
  * See: https://github.com/vercel/ai/issues/5171
196
361
  */
197
362
  function sanitizeResponseId(id: string): string {
198
363
  if (!id || id.length <= MAX_RESPONSE_API_ID_LENGTH) return id;
199
- // Use a simple hash: take first 8 chars + hash of full string for uniqueness
200
- // Format: "h_" + first 8 chars + "_" + base36 hash (up to ~50 chars total)
364
+ // Detect and preserve the original prefix (e.g., "fc_", "msg_", "call_", "resp_")
365
+ // The OpenAI Responses API validates that IDs start with specific prefixes
366
+ const prefixMatch = id.match(/^([a-z]+_)/);
367
+ const prefix = prefixMatch ? prefixMatch[1] : "";
368
+ // Hash the full ID for deterministic uniqueness
201
369
  let hash = 0;
202
370
  for (let i = 0; i < id.length; i++) {
203
371
  hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
204
372
  }
205
373
  const hashStr = Math.abs(hash).toString(36);
206
- const prefix = id.slice(0, 8);
207
- // Ensure total length <= 64: "h_" (2) + prefix (8) + "_" (1) + hash
208
- return `h_${prefix}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
374
+ // Take some chars from after the prefix for additional uniqueness
375
+ const afterPrefix = id.slice(prefix.length);
376
+ const maxMiddleLen =
377
+ MAX_RESPONSE_API_ID_LENGTH - prefix.length - hashStr.length - 1;
378
+ const middle = afterPrefix.slice(0, Math.max(0, maxMiddleLen));
379
+ // Format: prefix + middle + "_" + hash (ensure total <= 64)
380
+ return `${prefix}${middle}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
209
381
  }
210
382
 
211
383
  /**
@@ -350,7 +522,12 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
350
522
  return cleanedMsg;
351
523
  }
352
524
 
353
- // If content is an array, check for thinking blocks
525
+ // If content is an array, strip ALL thinking blocks.
526
+ // Reasoning is communicated via reasoning_text/reasoning_opaque
527
+ // fields, not via thinking blocks in the content array.
528
+ // Even thinking blocks WITH signatures can cause
529
+ // "Invalid signature in thinking block" errors when
530
+ // signatures are expired or from a different context.
354
531
  if (Array.isArray(msg.content)) {
355
532
  const hasThinkingBlock = msg.content.some(
356
533
  (part: any) => part.type === "thinking",
@@ -358,22 +535,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
358
535
  if (hasThinkingBlock) {
359
536
  log(
360
537
  "debug",
361
- `Message ${idx} has thinking blocks in content array`,
538
+ `Stripping all thinking blocks from message ${idx}`,
362
539
  );
363
- // Filter out thinking blocks without signatures
364
540
  const cleanedContent = msg.content.filter(
365
- (part: any) => {
366
- if (part.type === "thinking") {
367
- if (!part.signature) {
368
- log(
369
- "warn",
370
- `Removing thinking block without signature`,
371
- );
372
- return false;
373
- }
374
- }
375
- return true;
376
- },
541
+ (part: any) => part.type !== "thinking",
377
542
  );
378
543
  return {
379
544
  ...msg,
@@ -530,8 +695,33 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
530
695
 
531
696
  // Pre-flight: if current model is already known rate-limited, switch to fallback
532
697
  let activeFinalInit: RequestInit = finalInit;
698
+ const attemptedModels = new Set<string>();
699
+ if (currentModel) attemptedModels.add(currentModel);
700
+ const requestedModel = currentModel;
701
+ if (currentModel) {
702
+ const circuitRemainingMs =
703
+ getFamilyCircuitRemainingMs(currentModel);
704
+ if (circuitRemainingMs > 0) {
705
+ if (
706
+ circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
707
+ ) {
708
+ log(
709
+ "info",
710
+ `Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
711
+ );
712
+ await sleep(circuitRemainingMs);
713
+ } else {
714
+ throw new Error(
715
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
716
+ );
717
+ }
718
+ }
719
+ }
533
720
  if (currentModel && isModelRateLimited(currentModel)) {
534
- const fallback = getNextFallbackModel(currentModel);
721
+ const fallback = getNextFallbackModel(
722
+ currentModel,
723
+ attemptedModels,
724
+ );
535
725
  if (fallback) {
536
726
  log(
537
727
  "info",
@@ -540,6 +730,26 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
540
730
  activeFinalInit =
541
731
  swapModelInBody(finalInit, fallback) || finalInit;
542
732
  currentModel = fallback;
733
+ attemptedModels.add(fallback);
734
+ } else {
735
+ const familyCooldownMs =
736
+ getFamilyMaxCooldownRemainingMs(currentModel);
737
+ openFamilyCircuitBreaker(currentModel, familyCooldownMs);
738
+ if (
739
+ familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
740
+ ) {
741
+ log(
742
+ "info",
743
+ `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
744
+ );
745
+ await sleep(familyCooldownMs);
746
+ attemptedModels.clear();
747
+ if (currentModel) attemptedModels.add(currentModel);
748
+ } else {
749
+ throw new Error(
750
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
751
+ );
752
+ }
543
753
  }
544
754
  }
545
755
 
@@ -547,12 +757,20 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
547
757
  let lastError: Error | undefined;
548
758
  let fallbacksUsed = 0;
549
759
  let attempt = 0;
760
+ let recoveryCyclesUsed = 0;
550
761
 
551
762
  while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
552
763
  try {
764
+ if (currentModel) {
765
+ await shapeRequestForModel(currentModel);
766
+ }
553
767
  const response = await fetch(input, activeFinalInit);
554
768
 
555
769
  if (response.status === 429) {
770
+ try {
771
+ await response.body?.cancel();
772
+ } catch {}
773
+
556
774
  // Parse Retry-After header for server-suggested cooldown
557
775
  const retryAfterMs = parseRetryAfter(response);
558
776
  const cooldownMs =
@@ -568,7 +786,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
568
786
  currentModel &&
569
787
  fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
570
788
  ) {
571
- const fallback = getNextFallbackModel(currentModel);
789
+ const fallback = getNextFallbackModel(
790
+ currentModel,
791
+ attemptedModels,
792
+ );
572
793
  if (fallback) {
573
794
  log(
574
795
  "warn",
@@ -583,6 +804,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
583
804
  swapModelInBody(activeFinalInit, fallback) ||
584
805
  activeFinalInit;
585
806
  currentModel = fallback;
807
+ attemptedModels.add(fallback);
586
808
  fallbacksUsed++;
587
809
  continue; // Retry immediately with new model, no delay
588
810
  }
@@ -590,10 +812,41 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
590
812
 
591
813
  // No fallback available — use exponential backoff on same model
592
814
  if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
593
- const delay =
594
- retryAfterMs != null
595
- ? Math.min(retryAfterMs, RATE_LIMIT_CONFIG.maxDelayMs)
596
- : calculateRetryDelay(attempt);
815
+ if (
816
+ currentModel &&
817
+ isEntireModelFamilyCoolingDown(currentModel)
818
+ ) {
819
+ const familyCooldownMs =
820
+ getFamilyMaxCooldownRemainingMs(currentModel);
821
+ openFamilyCircuitBreaker(currentModel, familyCooldownMs);
822
+ if (
823
+ familyCooldownMs <=
824
+ CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
825
+ ) {
826
+ log(
827
+ "info",
828
+ `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
829
+ );
830
+ await sleep(familyCooldownMs);
831
+ attemptedModels.clear();
832
+ if (currentModel) attemptedModels.add(currentModel);
833
+ attempt++;
834
+ continue;
835
+ }
836
+ throw new Error(
837
+ `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
838
+ );
839
+ }
840
+
841
+ const modelCooldownMs = currentModel
842
+ ? getRateLimitRemainingMs(currentModel)
843
+ : null;
844
+ const delay = Math.min(
845
+ modelCooldownMs ??
846
+ retryAfterMs ??
847
+ calculateRetryDelay(attempt),
848
+ RATE_LIMIT_CONFIG.maxDelayMs,
849
+ );
597
850
  log(
598
851
  "warn",
599
852
  `Rate limited (429), no fallback available, waiting ${delay}ms`,
@@ -605,11 +858,47 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
605
858
  },
606
859
  );
607
860
  await sleep(delay);
861
+ attemptedModels.clear();
862
+ if (currentModel) attemptedModels.add(currentModel);
608
863
  attempt++;
609
864
  continue;
610
865
  }
611
866
 
612
867
  // Exhausted retries and fallbacks
868
+ if (currentModel) {
869
+ const familyCooldownMs =
870
+ getFamilyMaxCooldownRemainingMs(currentModel);
871
+ const recoveryDelayMs =
872
+ familyCooldownMs > 0
873
+ ? Math.min(
874
+ familyCooldownMs,
875
+ CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
876
+ )
877
+ : calculateRetryDelay(0);
878
+ if (
879
+ recoveryDelayMs > 0 &&
880
+ recoveryCyclesUsed <
881
+ CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
882
+ ) {
883
+ recoveryCyclesUsed++;
884
+ log(
885
+ "info",
886
+ `Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
887
+ );
888
+ await sleep(recoveryDelayMs);
889
+ attempt = 0;
890
+ fallbacksUsed = 0;
891
+ if (requestedModel) {
892
+ currentModel = requestedModel;
893
+ activeFinalInit =
894
+ swapModelInBody(finalInit, requestedModel) ||
895
+ finalInit;
896
+ }
897
+ attemptedModels.clear();
898
+ if (currentModel) attemptedModels.add(currentModel);
899
+ continue;
900
+ }
901
+ }
613
902
  throw new Error(
614
903
  `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
615
904
  );
@@ -621,6 +910,15 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
621
910
  } catch (error) {
622
911
  lastError = error as Error;
623
912
 
913
+ if (
914
+ lastError.message.includes(
915
+ "All fallback models cooling down",
916
+ ) ||
917
+ lastError.message.includes("Local request queue saturated")
918
+ ) {
919
+ throw lastError;
920
+ }
921
+
624
922
  // Network errors might be transient, retry
625
923
  if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
626
924
  const delay = calculateRetryDelay(attempt);
@@ -90,24 +90,51 @@ export function createHooks(deps: HookDeps) {
90
90
  }
91
91
  }
92
92
 
93
- // --- Session error: show actual error details ---
93
+ // --- Session error: classify and guide ---
94
94
  if (event.type === "session.error") {
95
95
  const props = event.properties as Record<string, unknown> | undefined;
96
96
  const errorMsg = props?.error
97
- ? String(props.error).slice(0, 120)
97
+ ? String(props.error).slice(0, 200)
98
98
  : props?.message
99
- ? String(props.message).slice(0, 120)
99
+ ? String(props.message).slice(0, 200)
100
100
  : "Unknown error";
101
101
 
102
- // Classify: match the specific AI SDK error pattern
103
- const isTokenOverflow =
102
+ // Log full error for debugging
103
+ await log(`Session error: ${errorMsg}`, "warn");
104
+
105
+ // Classify error and provide specific guidance
106
+ let guidance: string;
107
+ if (
104
108
  /token.{0,20}(exceed|limit)/i.test(errorMsg) ||
105
- errorMsg.includes("context_length_exceeded");
106
- const guidance = isTokenOverflow
107
- ? "Context too large — use /compact or start a new session"
108
- : "Save important learnings with observation tool";
109
+ errorMsg.includes("context_length_exceeded")
110
+ ) {
111
+ guidance = "Context too large — use /compact or start a new session";
112
+ } else if (
113
+ /rate.?limit|429|too many requests/i.test(errorMsg)
114
+ ) {
115
+ guidance = "Rate limited — wait a moment and retry";
116
+ } else if (
117
+ /unauthorized|401|403|auth/i.test(errorMsg)
118
+ ) {
119
+ guidance = "Auth error — check API key or token";
120
+ } else if (
121
+ /timeout|ETIMEDOUT|ECONNRESET|network|fetch failed/i.test(errorMsg)
122
+ ) {
123
+ guidance = "Network error — check connection and retry";
124
+ } else if (
125
+ /invalid.*signature|thinking block/i.test(errorMsg)
126
+ ) {
127
+ guidance = "API format error — try starting a new session";
128
+ } else if (
129
+ /500|502|503|504|internal server|service unavailable/i.test(errorMsg)
130
+ ) {
131
+ guidance = "Server error — retry in a few seconds";
132
+ } else {
133
+ guidance = "Unexpected error — save work with observation tool if needed";
134
+ }
109
135
 
110
- await showToast("Session Error", `${guidance} (${errorMsg})`, "warning");
136
+ const short = errorMsg.length > 80 ? `${errorMsg.slice(0, 80)}…` : errorMsg;
137
+ await showToast("Session Error", `${guidance} (${short})`, "warning");
111
138
  }
112
139
  },
113
140
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencodekit",
3
- "version": "0.18.16",
3
+ "version": "0.18.18",
4
4
  "description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
5
5
  "keywords": [
6
6
  "agents",