opencodekit 0.18.16 → 0.18.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/dist/template/.opencode/memory.db +0 -0
- package/dist/template/.opencode/memory.db-shm +0 -0
- package/dist/template/.opencode/memory.db-wal +0 -0
- package/dist/template/.opencode/opencode.json +29 -17
- package/dist/template/.opencode/plugin/copilot-auth.ts +326 -28
- package/dist/template/.opencode/plugin/lib/memory-hooks.ts +37 -10
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -161,8 +161,8 @@
|
|
|
161
161
|
"claude-haiku-4.5": {
|
|
162
162
|
"attachment": true,
|
|
163
163
|
"limit": {
|
|
164
|
-
"context":
|
|
165
|
-
"output":
|
|
164
|
+
"context": 144000,
|
|
165
|
+
"output": 32000
|
|
166
166
|
},
|
|
167
167
|
"options": {
|
|
168
168
|
"thinking_budget": 10000,
|
|
@@ -174,13 +174,13 @@
|
|
|
174
174
|
"variants": {
|
|
175
175
|
"high": {
|
|
176
176
|
"options": {
|
|
177
|
-
"thinking_budget":
|
|
177
|
+
"thinking_budget": 16000,
|
|
178
178
|
"type": "enabled"
|
|
179
179
|
}
|
|
180
180
|
},
|
|
181
181
|
"max": {
|
|
182
182
|
"options": {
|
|
183
|
-
"thinking_budget":
|
|
183
|
+
"thinking_budget": 32000,
|
|
184
184
|
"type": "enabled"
|
|
185
185
|
}
|
|
186
186
|
}
|
|
@@ -189,8 +189,8 @@
|
|
|
189
189
|
"claude-opus-4.5": {
|
|
190
190
|
"attachment": true,
|
|
191
191
|
"limit": {
|
|
192
|
-
"context":
|
|
193
|
-
"output":
|
|
192
|
+
"context": 160000,
|
|
193
|
+
"output": 32000
|
|
194
194
|
},
|
|
195
195
|
"options": {
|
|
196
196
|
"thinking_budget": 10000
|
|
@@ -201,12 +201,12 @@
|
|
|
201
201
|
"variants": {
|
|
202
202
|
"high": {
|
|
203
203
|
"options": {
|
|
204
|
-
"thinking_budget":
|
|
204
|
+
"thinking_budget": 16000
|
|
205
205
|
}
|
|
206
206
|
},
|
|
207
207
|
"max": {
|
|
208
208
|
"options": {
|
|
209
|
-
"thinking_budget":
|
|
209
|
+
"thinking_budget": 32000
|
|
210
210
|
}
|
|
211
211
|
}
|
|
212
212
|
}
|
|
@@ -214,7 +214,7 @@
|
|
|
214
214
|
"claude-opus-4.6": {
|
|
215
215
|
"attachment": true,
|
|
216
216
|
"limit": {
|
|
217
|
-
"context":
|
|
217
|
+
"context": 144000,
|
|
218
218
|
"output": 64000
|
|
219
219
|
},
|
|
220
220
|
"options": {
|
|
@@ -259,21 +259,33 @@
|
|
|
259
259
|
"claude-sonnet-4": {
|
|
260
260
|
"attachment": true,
|
|
261
261
|
"limit": {
|
|
262
|
-
"context":
|
|
263
|
-
"output":
|
|
262
|
+
"context": 216000,
|
|
263
|
+
"output": 16000
|
|
264
264
|
},
|
|
265
265
|
"options": {
|
|
266
266
|
"thinking_budget": 10000
|
|
267
267
|
},
|
|
268
268
|
"reasoning": true,
|
|
269
269
|
"temperature": true,
|
|
270
|
-
"tool_call": true
|
|
270
|
+
"tool_call": true,
|
|
271
|
+
"variants": {
|
|
272
|
+
"high": {
|
|
273
|
+
"options": {
|
|
274
|
+
"thinking_budget": 16000
|
|
275
|
+
}
|
|
276
|
+
},
|
|
277
|
+
"max": {
|
|
278
|
+
"options": {
|
|
279
|
+
"thinking_budget": 32000
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
271
283
|
},
|
|
272
284
|
"claude-sonnet-4.5": {
|
|
273
285
|
"attachment": true,
|
|
274
286
|
"limit": {
|
|
275
|
-
"context":
|
|
276
|
-
"output":
|
|
287
|
+
"context": 144000,
|
|
288
|
+
"output": 32000
|
|
277
289
|
},
|
|
278
290
|
"options": {
|
|
279
291
|
"thinking_budget": 10000
|
|
@@ -284,12 +296,12 @@
|
|
|
284
296
|
"variants": {
|
|
285
297
|
"high": {
|
|
286
298
|
"options": {
|
|
287
|
-
"thinking_budget":
|
|
299
|
+
"thinking_budget": 16000
|
|
288
300
|
}
|
|
289
301
|
},
|
|
290
302
|
"max": {
|
|
291
303
|
"options": {
|
|
292
|
-
"thinking_budget":
|
|
304
|
+
"thinking_budget": 32000
|
|
293
305
|
}
|
|
294
306
|
}
|
|
295
307
|
}
|
|
@@ -298,7 +310,7 @@
|
|
|
298
310
|
"attachment": true,
|
|
299
311
|
"limit": {
|
|
300
312
|
"context": 200000,
|
|
301
|
-
"output":
|
|
313
|
+
"output": 32000
|
|
302
314
|
},
|
|
303
315
|
"options": {
|
|
304
316
|
"thinking": {
|
|
@@ -89,11 +89,31 @@ const RATE_LIMIT_CONFIG = {
|
|
|
89
89
|
maxFallbacks: 4, // Max model fallback switches per request
|
|
90
90
|
};
|
|
91
91
|
|
|
92
|
+
// Local request shaping to smooth bursts before they hit Copilot limits
|
|
93
|
+
const REQUEST_SHAPING_CONFIG = {
|
|
94
|
+
tokensPerSecond: 1,
|
|
95
|
+
burstCapacity: 2,
|
|
96
|
+
maxQueueDelayMs: 15000,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const CIRCUIT_BREAKER_CONFIG = {
|
|
100
|
+
maxInlineWaitMs: 30000,
|
|
101
|
+
maxRecoveryCycles: 3,
|
|
102
|
+
};
|
|
103
|
+
|
|
92
104
|
// Per-model rate limit state (in-memory, resets on restart)
|
|
93
105
|
interface RateLimitEntry {
|
|
94
106
|
rateLimitedUntil: number; // Unix timestamp (ms)
|
|
95
107
|
}
|
|
96
108
|
const rateLimitState = new Map<string, RateLimitEntry>();
|
|
109
|
+
const familyCircuitBreakerState = new Map<string, number>();
|
|
110
|
+
|
|
111
|
+
interface TokenBucketState {
|
|
112
|
+
tokens: number;
|
|
113
|
+
lastRefillAt: number;
|
|
114
|
+
}
|
|
115
|
+
const modelTokenBuckets = new Map<string, TokenBucketState>();
|
|
116
|
+
const modelQueueTail = new Map<string, Promise<void>>();
|
|
97
117
|
|
|
98
118
|
// Model fallback chains: same-family alternatives when a model is rate-limited
|
|
99
119
|
const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
|
|
@@ -146,6 +166,144 @@ function isModelRateLimited(model: string): boolean {
|
|
|
146
166
|
return true;
|
|
147
167
|
}
|
|
148
168
|
|
|
169
|
+
function getRateLimitRemainingMs(model: string): number | null {
|
|
170
|
+
const entry = rateLimitState.get(model);
|
|
171
|
+
if (!entry) return null;
|
|
172
|
+
const remaining = entry.rateLimitedUntil - Date.now();
|
|
173
|
+
if (remaining <= 0) {
|
|
174
|
+
rateLimitState.delete(model);
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
return remaining;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function getModelFamily(model: string): string[] {
|
|
181
|
+
const family = new Set<string>([
|
|
182
|
+
model,
|
|
183
|
+
...(MODEL_FALLBACK_CHAINS[model] || []),
|
|
184
|
+
]);
|
|
185
|
+
return [...family];
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function getFamilyCircuitKey(model: string): string {
|
|
189
|
+
return getModelFamily(model).sort().join("|");
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function getFamilyCircuitRemainingMs(model: string): number {
|
|
193
|
+
const key = getFamilyCircuitKey(model);
|
|
194
|
+
const until = familyCircuitBreakerState.get(key);
|
|
195
|
+
if (!until) return 0;
|
|
196
|
+
const remaining = until - Date.now();
|
|
197
|
+
if (remaining <= 0) {
|
|
198
|
+
familyCircuitBreakerState.delete(key);
|
|
199
|
+
return 0;
|
|
200
|
+
}
|
|
201
|
+
return remaining;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
|
|
205
|
+
const key = getFamilyCircuitKey(model);
|
|
206
|
+
familyCircuitBreakerState.set(
|
|
207
|
+
key,
|
|
208
|
+
Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function getFamilyMaxCooldownRemainingMs(model: string): number {
|
|
213
|
+
let maxRemaining = 0;
|
|
214
|
+
for (const candidate of getModelFamily(model)) {
|
|
215
|
+
const remaining = getRateLimitRemainingMs(candidate) ?? 0;
|
|
216
|
+
if (remaining > maxRemaining) maxRemaining = remaining;
|
|
217
|
+
}
|
|
218
|
+
return maxRemaining;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function isEntireModelFamilyCoolingDown(model: string): boolean {
|
|
222
|
+
const family = getModelFamily(model);
|
|
223
|
+
return (
|
|
224
|
+
family.length > 0 &&
|
|
225
|
+
family.every((candidate) => isModelRateLimited(candidate))
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function formatRetryAfter(seconds: number): string {
|
|
230
|
+
if (seconds < 60) return `${seconds}s`;
|
|
231
|
+
const mins = Math.floor(seconds / 60);
|
|
232
|
+
const secs = seconds % 60;
|
|
233
|
+
return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async function shapeRequestForModel(model: string): Promise<void> {
|
|
237
|
+
if (!model) return;
|
|
238
|
+
|
|
239
|
+
const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
|
|
240
|
+
let releaseQueue: (() => void) | undefined;
|
|
241
|
+
const currentGate = new Promise<void>((resolve) => {
|
|
242
|
+
releaseQueue = resolve;
|
|
243
|
+
});
|
|
244
|
+
const currentTail = previousTail.then(() => currentGate);
|
|
245
|
+
modelQueueTail.set(model, currentTail);
|
|
246
|
+
|
|
247
|
+
let queueTimeout: ReturnType<typeof setTimeout> | undefined;
|
|
248
|
+
try {
|
|
249
|
+
await Promise.race([
|
|
250
|
+
previousTail,
|
|
251
|
+
new Promise<void>((_, reject) => {
|
|
252
|
+
queueTimeout = setTimeout(() => {
|
|
253
|
+
reject(
|
|
254
|
+
new Error(
|
|
255
|
+
`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
|
|
256
|
+
),
|
|
257
|
+
);
|
|
258
|
+
}, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
|
|
259
|
+
}),
|
|
260
|
+
]);
|
|
261
|
+
|
|
262
|
+
const now = Date.now();
|
|
263
|
+
const bucket = modelTokenBuckets.get(model) ?? {
|
|
264
|
+
tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
|
|
265
|
+
lastRefillAt: now,
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
|
|
269
|
+
const refillTokens =
|
|
270
|
+
(elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
|
|
271
|
+
bucket.tokens = Math.min(
|
|
272
|
+
REQUEST_SHAPING_CONFIG.burstCapacity,
|
|
273
|
+
bucket.tokens + refillTokens,
|
|
274
|
+
);
|
|
275
|
+
bucket.lastRefillAt = now;
|
|
276
|
+
|
|
277
|
+
if (bucket.tokens < 1) {
|
|
278
|
+
const deficit = 1 - bucket.tokens;
|
|
279
|
+
const waitMs = Math.ceil(
|
|
280
|
+
(deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
|
|
281
|
+
);
|
|
282
|
+
if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
|
|
283
|
+
throw new Error(
|
|
284
|
+
`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
log("info", `Local request shaping wait for ${model}`, {
|
|
288
|
+
wait_ms: waitMs,
|
|
289
|
+
});
|
|
290
|
+
await sleep(waitMs);
|
|
291
|
+
bucket.tokens = 0;
|
|
292
|
+
bucket.lastRefillAt = Date.now();
|
|
293
|
+
} else {
|
|
294
|
+
bucket.tokens -= 1;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
modelTokenBuckets.set(model, bucket);
|
|
298
|
+
} finally {
|
|
299
|
+
if (queueTimeout) clearTimeout(queueTimeout);
|
|
300
|
+
releaseQueue?.();
|
|
301
|
+
if (modelQueueTail.get(model) === currentTail) {
|
|
302
|
+
modelQueueTail.delete(model);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
149
307
|
function markModelRateLimited(model: string, cooldownMs: number): void {
|
|
150
308
|
rateLimitState.set(model, {
|
|
151
309
|
rateLimitedUntil: Date.now() + cooldownMs,
|
|
@@ -160,11 +318,16 @@ function markModelRateLimited(model: string, cooldownMs: number): void {
|
|
|
160
318
|
* Find the next available fallback model in the same family.
|
|
161
319
|
* Skips models that are themselves rate-limited.
|
|
162
320
|
*/
|
|
163
|
-
function getNextFallbackModel(
|
|
321
|
+
function getNextFallbackModel(
|
|
322
|
+
model: string,
|
|
323
|
+
attemptedModels: Set<string>,
|
|
324
|
+
): string | null {
|
|
164
325
|
const chain = MODEL_FALLBACK_CHAINS[model];
|
|
165
326
|
if (!chain) return null;
|
|
166
327
|
for (const fallback of chain) {
|
|
167
|
-
if (!isModelRateLimited(fallback))
|
|
328
|
+
if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
|
|
329
|
+
return fallback;
|
|
330
|
+
}
|
|
168
331
|
}
|
|
169
332
|
return null;
|
|
170
333
|
}
|
|
@@ -192,20 +355,29 @@ const MAX_RESPONSE_API_ID_LENGTH = 64;
|
|
|
192
355
|
* Sanitize an ID to fit within the Responses API 64-char limit.
|
|
193
356
|
* GitHub Copilot returns proprietary long IDs (400+ chars) that violate
|
|
194
357
|
* the OpenAI spec. We hash them to a deterministic 64-char string.
|
|
358
|
+
* Preserves the original prefix (e.g., "fc_", "msg_", "call_") so that
|
|
359
|
+
* OpenAI's prefix validation passes.
|
|
195
360
|
* See: https://github.com/vercel/ai/issues/5171
|
|
196
361
|
*/
|
|
197
362
|
function sanitizeResponseId(id: string): string {
|
|
198
363
|
if (!id || id.length <= MAX_RESPONSE_API_ID_LENGTH) return id;
|
|
199
|
-
//
|
|
200
|
-
//
|
|
364
|
+
// Detect and preserve the original prefix (e.g., "fc_", "msg_", "call_", "resp_")
|
|
365
|
+
// The OpenAI Responses API validates that IDs start with specific prefixes
|
|
366
|
+
const prefixMatch = id.match(/^([a-z]+_)/);
|
|
367
|
+
const prefix = prefixMatch ? prefixMatch[1] : "";
|
|
368
|
+
// Hash the full ID for deterministic uniqueness
|
|
201
369
|
let hash = 0;
|
|
202
370
|
for (let i = 0; i < id.length; i++) {
|
|
203
371
|
hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
|
|
204
372
|
}
|
|
205
373
|
const hashStr = Math.abs(hash).toString(36);
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
374
|
+
// Take some chars from after the prefix for additional uniqueness
|
|
375
|
+
const afterPrefix = id.slice(prefix.length);
|
|
376
|
+
const maxMiddleLen =
|
|
377
|
+
MAX_RESPONSE_API_ID_LENGTH - prefix.length - hashStr.length - 1;
|
|
378
|
+
const middle = afterPrefix.slice(0, Math.max(0, maxMiddleLen));
|
|
379
|
+
// Format: prefix + middle + "_" + hash (ensure total <= 64)
|
|
380
|
+
return `${prefix}${middle}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
|
|
209
381
|
}
|
|
210
382
|
|
|
211
383
|
/**
|
|
@@ -350,7 +522,12 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
350
522
|
return cleanedMsg;
|
|
351
523
|
}
|
|
352
524
|
|
|
353
|
-
// If content is an array,
|
|
525
|
+
// If content is an array, strip ALL thinking blocks.
|
|
526
|
+
// Reasoning is communicated via reasoning_text/reasoning_opaque
|
|
527
|
+
// fields, not via thinking blocks in the content array.
|
|
528
|
+
// Even thinking blocks WITH signatures can cause
|
|
529
|
+
// "Invalid signature in thinking block" errors when
|
|
530
|
+
// signatures are expired or from a different context.
|
|
354
531
|
if (Array.isArray(msg.content)) {
|
|
355
532
|
const hasThinkingBlock = msg.content.some(
|
|
356
533
|
(part: any) => part.type === "thinking",
|
|
@@ -358,22 +535,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
358
535
|
if (hasThinkingBlock) {
|
|
359
536
|
log(
|
|
360
537
|
"debug",
|
|
361
|
-
`
|
|
538
|
+
`Stripping all thinking blocks from message ${idx}`,
|
|
362
539
|
);
|
|
363
|
-
// Filter out thinking blocks without signatures
|
|
364
540
|
const cleanedContent = msg.content.filter(
|
|
365
|
-
(part: any) =>
|
|
366
|
-
if (part.type === "thinking") {
|
|
367
|
-
if (!part.signature) {
|
|
368
|
-
log(
|
|
369
|
-
"warn",
|
|
370
|
-
`Removing thinking block without signature`,
|
|
371
|
-
);
|
|
372
|
-
return false;
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
return true;
|
|
376
|
-
},
|
|
541
|
+
(part: any) => part.type !== "thinking",
|
|
377
542
|
);
|
|
378
543
|
return {
|
|
379
544
|
...msg,
|
|
@@ -530,8 +695,33 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
530
695
|
|
|
531
696
|
// Pre-flight: if current model is already known rate-limited, switch to fallback
|
|
532
697
|
let activeFinalInit: RequestInit = finalInit;
|
|
698
|
+
const attemptedModels = new Set<string>();
|
|
699
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
700
|
+
const requestedModel = currentModel;
|
|
701
|
+
if (currentModel) {
|
|
702
|
+
const circuitRemainingMs =
|
|
703
|
+
getFamilyCircuitRemainingMs(currentModel);
|
|
704
|
+
if (circuitRemainingMs > 0) {
|
|
705
|
+
if (
|
|
706
|
+
circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
707
|
+
) {
|
|
708
|
+
log(
|
|
709
|
+
"info",
|
|
710
|
+
`Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
|
|
711
|
+
);
|
|
712
|
+
await sleep(circuitRemainingMs);
|
|
713
|
+
} else {
|
|
714
|
+
throw new Error(
|
|
715
|
+
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
|
|
716
|
+
);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
533
720
|
if (currentModel && isModelRateLimited(currentModel)) {
|
|
534
|
-
const fallback = getNextFallbackModel(
|
|
721
|
+
const fallback = getNextFallbackModel(
|
|
722
|
+
currentModel,
|
|
723
|
+
attemptedModels,
|
|
724
|
+
);
|
|
535
725
|
if (fallback) {
|
|
536
726
|
log(
|
|
537
727
|
"info",
|
|
@@ -540,6 +730,26 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
540
730
|
activeFinalInit =
|
|
541
731
|
swapModelInBody(finalInit, fallback) || finalInit;
|
|
542
732
|
currentModel = fallback;
|
|
733
|
+
attemptedModels.add(fallback);
|
|
734
|
+
} else {
|
|
735
|
+
const familyCooldownMs =
|
|
736
|
+
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
737
|
+
openFamilyCircuitBreaker(currentModel, familyCooldownMs);
|
|
738
|
+
if (
|
|
739
|
+
familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
740
|
+
) {
|
|
741
|
+
log(
|
|
742
|
+
"info",
|
|
743
|
+
`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
|
|
744
|
+
);
|
|
745
|
+
await sleep(familyCooldownMs);
|
|
746
|
+
attemptedModels.clear();
|
|
747
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
748
|
+
} else {
|
|
749
|
+
throw new Error(
|
|
750
|
+
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
751
|
+
);
|
|
752
|
+
}
|
|
543
753
|
}
|
|
544
754
|
}
|
|
545
755
|
|
|
@@ -547,12 +757,20 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
547
757
|
let lastError: Error | undefined;
|
|
548
758
|
let fallbacksUsed = 0;
|
|
549
759
|
let attempt = 0;
|
|
760
|
+
let recoveryCyclesUsed = 0;
|
|
550
761
|
|
|
551
762
|
while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
|
|
552
763
|
try {
|
|
764
|
+
if (currentModel) {
|
|
765
|
+
await shapeRequestForModel(currentModel);
|
|
766
|
+
}
|
|
553
767
|
const response = await fetch(input, activeFinalInit);
|
|
554
768
|
|
|
555
769
|
if (response.status === 429) {
|
|
770
|
+
try {
|
|
771
|
+
await response.body?.cancel();
|
|
772
|
+
} catch {}
|
|
773
|
+
|
|
556
774
|
// Parse Retry-After header for server-suggested cooldown
|
|
557
775
|
const retryAfterMs = parseRetryAfter(response);
|
|
558
776
|
const cooldownMs =
|
|
@@ -568,7 +786,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
568
786
|
currentModel &&
|
|
569
787
|
fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
|
|
570
788
|
) {
|
|
571
|
-
const fallback = getNextFallbackModel(
|
|
789
|
+
const fallback = getNextFallbackModel(
|
|
790
|
+
currentModel,
|
|
791
|
+
attemptedModels,
|
|
792
|
+
);
|
|
572
793
|
if (fallback) {
|
|
573
794
|
log(
|
|
574
795
|
"warn",
|
|
@@ -583,6 +804,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
583
804
|
swapModelInBody(activeFinalInit, fallback) ||
|
|
584
805
|
activeFinalInit;
|
|
585
806
|
currentModel = fallback;
|
|
807
|
+
attemptedModels.add(fallback);
|
|
586
808
|
fallbacksUsed++;
|
|
587
809
|
continue; // Retry immediately with new model, no delay
|
|
588
810
|
}
|
|
@@ -590,10 +812,41 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
590
812
|
|
|
591
813
|
// No fallback available — use exponential backoff on same model
|
|
592
814
|
if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
815
|
+
if (
|
|
816
|
+
currentModel &&
|
|
817
|
+
isEntireModelFamilyCoolingDown(currentModel)
|
|
818
|
+
) {
|
|
819
|
+
const familyCooldownMs =
|
|
820
|
+
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
821
|
+
openFamilyCircuitBreaker(currentModel, familyCooldownMs);
|
|
822
|
+
if (
|
|
823
|
+
familyCooldownMs <=
|
|
824
|
+
CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
825
|
+
) {
|
|
826
|
+
log(
|
|
827
|
+
"info",
|
|
828
|
+
`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
|
|
829
|
+
);
|
|
830
|
+
await sleep(familyCooldownMs);
|
|
831
|
+
attemptedModels.clear();
|
|
832
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
833
|
+
attempt++;
|
|
834
|
+
continue;
|
|
835
|
+
}
|
|
836
|
+
throw new Error(
|
|
837
|
+
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
838
|
+
);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
const modelCooldownMs = currentModel
|
|
842
|
+
? getRateLimitRemainingMs(currentModel)
|
|
843
|
+
: null;
|
|
844
|
+
const delay = Math.min(
|
|
845
|
+
modelCooldownMs ??
|
|
846
|
+
retryAfterMs ??
|
|
847
|
+
calculateRetryDelay(attempt),
|
|
848
|
+
RATE_LIMIT_CONFIG.maxDelayMs,
|
|
849
|
+
);
|
|
597
850
|
log(
|
|
598
851
|
"warn",
|
|
599
852
|
`Rate limited (429), no fallback available, waiting ${delay}ms`,
|
|
@@ -605,11 +858,47 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
605
858
|
},
|
|
606
859
|
);
|
|
607
860
|
await sleep(delay);
|
|
861
|
+
attemptedModels.clear();
|
|
862
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
608
863
|
attempt++;
|
|
609
864
|
continue;
|
|
610
865
|
}
|
|
611
866
|
|
|
612
867
|
// Exhausted retries and fallbacks
|
|
868
|
+
if (currentModel) {
|
|
869
|
+
const familyCooldownMs =
|
|
870
|
+
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
871
|
+
const recoveryDelayMs =
|
|
872
|
+
familyCooldownMs > 0
|
|
873
|
+
? Math.min(
|
|
874
|
+
familyCooldownMs,
|
|
875
|
+
CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
|
|
876
|
+
)
|
|
877
|
+
: calculateRetryDelay(0);
|
|
878
|
+
if (
|
|
879
|
+
recoveryDelayMs > 0 &&
|
|
880
|
+
recoveryCyclesUsed <
|
|
881
|
+
CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
|
|
882
|
+
) {
|
|
883
|
+
recoveryCyclesUsed++;
|
|
884
|
+
log(
|
|
885
|
+
"info",
|
|
886
|
+
`Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
|
|
887
|
+
);
|
|
888
|
+
await sleep(recoveryDelayMs);
|
|
889
|
+
attempt = 0;
|
|
890
|
+
fallbacksUsed = 0;
|
|
891
|
+
if (requestedModel) {
|
|
892
|
+
currentModel = requestedModel;
|
|
893
|
+
activeFinalInit =
|
|
894
|
+
swapModelInBody(finalInit, requestedModel) ||
|
|
895
|
+
finalInit;
|
|
896
|
+
}
|
|
897
|
+
attemptedModels.clear();
|
|
898
|
+
if (currentModel) attemptedModels.add(currentModel);
|
|
899
|
+
continue;
|
|
900
|
+
}
|
|
901
|
+
}
|
|
613
902
|
throw new Error(
|
|
614
903
|
`[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
|
|
615
904
|
);
|
|
@@ -621,6 +910,15 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
621
910
|
} catch (error) {
|
|
622
911
|
lastError = error as Error;
|
|
623
912
|
|
|
913
|
+
if (
|
|
914
|
+
lastError.message.includes(
|
|
915
|
+
"All fallback models cooling down",
|
|
916
|
+
) ||
|
|
917
|
+
lastError.message.includes("Local request queue saturated")
|
|
918
|
+
) {
|
|
919
|
+
throw lastError;
|
|
920
|
+
}
|
|
921
|
+
|
|
624
922
|
// Network errors might be transient, retry
|
|
625
923
|
if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
|
|
626
924
|
const delay = calculateRetryDelay(attempt);
|
|
@@ -90,24 +90,51 @@ export function createHooks(deps: HookDeps) {
|
|
|
90
90
|
}
|
|
91
91
|
}
|
|
92
92
|
|
|
93
|
-
// --- Session error:
|
|
93
|
+
// --- Session error: classify and guide ---
|
|
94
94
|
if (event.type === "session.error") {
|
|
95
95
|
const props = event.properties as Record<string, unknown> | undefined;
|
|
96
96
|
const errorMsg = props?.error
|
|
97
|
-
? String(props.error).slice(0,
|
|
97
|
+
? String(props.error).slice(0, 200)
|
|
98
98
|
: props?.message
|
|
99
|
-
? String(props.message).slice(0,
|
|
99
|
+
? String(props.message).slice(0, 200)
|
|
100
100
|
: "Unknown error";
|
|
101
101
|
|
|
102
|
-
//
|
|
103
|
-
|
|
102
|
+
// Log full error for debugging
|
|
103
|
+
await log(`Session error: ${errorMsg}`, "warn");
|
|
104
|
+
|
|
105
|
+
// Classify error and provide specific guidance
|
|
106
|
+
let guidance: string;
|
|
107
|
+
if (
|
|
104
108
|
/token.{0,20}(exceed|limit)/i.test(errorMsg) ||
|
|
105
|
-
errorMsg.includes("context_length_exceeded")
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
+
errorMsg.includes("context_length_exceeded")
|
|
110
|
+
) {
|
|
111
|
+
guidance = "Context too large — use /compact or start a new session";
|
|
112
|
+
} else if (
|
|
113
|
+
/rate.?limit|429|too many requests/i.test(errorMsg)
|
|
114
|
+
) {
|
|
115
|
+
guidance = "Rate limited — wait a moment and retry";
|
|
116
|
+
} else if (
|
|
117
|
+
/unauthorized|401|403|auth/i.test(errorMsg)
|
|
118
|
+
) {
|
|
119
|
+
guidance = "Auth error — check API key or token";
|
|
120
|
+
} else if (
|
|
121
|
+
/timeout|ETIMEDOUT|ECONNRESET|network|fetch failed/i.test(errorMsg)
|
|
122
|
+
) {
|
|
123
|
+
guidance = "Network error — check connection and retry";
|
|
124
|
+
} else if (
|
|
125
|
+
/invalid.*signature|thinking block/i.test(errorMsg)
|
|
126
|
+
) {
|
|
127
|
+
guidance = "API format error — try starting a new session";
|
|
128
|
+
} else if (
|
|
129
|
+
/500|502|503|504|internal server|service unavailable/i.test(errorMsg)
|
|
130
|
+
) {
|
|
131
|
+
guidance = "Server error — retry in a few seconds";
|
|
132
|
+
} else {
|
|
133
|
+
guidance = "Unexpected error — save work with observation tool if needed";
|
|
134
|
+
}
|
|
109
135
|
|
|
110
|
-
|
|
136
|
+
const short = errorMsg.length > 80 ? `${errorMsg.slice(0, 80)}…` : errorMsg;
|
|
137
|
+
await showToast("Session Error", `${guidance} (${short})`, "warning");
|
|
111
138
|
}
|
|
112
139
|
},
|
|
113
140
|
|