opencodekit 0.18.25 → 0.18.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/dist/template/.opencode/dcp.jsonc +81 -70
- package/dist/template/.opencode/memory.db +0 -0
- package/dist/template/.opencode/memory.db-shm +0 -0
- package/dist/template/.opencode/memory.db-wal +0 -0
- package/dist/template/.opencode/package.json +1 -1
- package/dist/template/.opencode/plugin/copilot-auth.ts +111 -451
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,72 +1,83 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
2
|
+
"$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json",
|
|
3
|
+
"enabled": true,
|
|
4
|
+
"debug": false,
|
|
5
|
+
// "off" | "minimal" | "detailed" — keep minimal for low-noise dev flow
|
|
6
|
+
"pruneNotification": "minimal",
|
|
7
|
+
// "chat" (in-conversation) or "toast" (system notification)
|
|
8
|
+
"pruneNotificationType": "toast",
|
|
9
|
+
// Slash commands: /dcp context, /dcp stats, /dcp sweep, /dcp compress, /dcp decompress, /dcp recompress
|
|
10
|
+
"commands": {
|
|
11
|
+
"enabled": true,
|
|
12
|
+
// Additional tools to protect from /dcp sweep (supports glob wildcards)
|
|
13
|
+
"protectedTools": ["observation", "memory-*"],
|
|
14
|
+
},
|
|
15
|
+
// Manual mode: disables autonomous context management
|
|
16
|
+
"manualMode": {
|
|
17
|
+
"enabled": false,
|
|
18
|
+
"automaticStrategies": true,
|
|
19
|
+
},
|
|
20
|
+
// Protect recent tool outputs from pruning
|
|
21
|
+
"turnProtection": {
|
|
22
|
+
"enabled": false,
|
|
23
|
+
"turns": 4,
|
|
24
|
+
},
|
|
25
|
+
// Glob patterns for files that should never be auto-pruned
|
|
26
|
+
// Keep tight: broad patterns reduce DCP effectiveness
|
|
27
|
+
"protectedFilePatterns": [
|
|
28
|
+
"**/.env*",
|
|
29
|
+
"**/AGENTS.md",
|
|
30
|
+
"**/.opencode/**",
|
|
31
|
+
"**/.beads/**",
|
|
32
|
+
"**/package.json",
|
|
33
|
+
"**/tsconfig.json",
|
|
34
|
+
],
|
|
35
|
+
// Unified context compression tool (v3.1.0)
|
|
36
|
+
"compress": {
|
|
37
|
+
// "range" (stable) compresses spans into block summaries
|
|
38
|
+
// "message" (experimental) compresses individual raw messages
|
|
39
|
+
"mode": "message",
|
|
40
|
+
// "allow" (no prompt) | "ask" (prompt) | "deny" (tool not registered)
|
|
41
|
+
"permission": "allow",
|
|
42
|
+
"showCompression": false,
|
|
43
|
+
// v3.1.0: active summary tokens extend effective maxContextLimit
|
|
44
|
+
"summaryBuffer": true,
|
|
45
|
+
// Soft upper threshold: above this, strong compression nudges fire
|
|
46
|
+
// Accepts number or "X%" of model context window
|
|
47
|
+
"maxContextLimit": "80%",
|
|
48
|
+
// Soft lower threshold: below this, turn/iteration reminders are off
|
|
49
|
+
"minContextLimit": "35%",
|
|
50
|
+
// How often context-limit nudge fires above maxContextLimit (1 = every fetch)
|
|
51
|
+
"nudgeFrequency": 5,
|
|
52
|
+
// Messages since last user message before adding compression reminders
|
|
53
|
+
"iterationNudgeThreshold": 15,
|
|
54
|
+
// "strong" = more likely to compress, "soft" = less likely
|
|
55
|
+
"nudgeForce": "soft",
|
|
56
|
+
// Keep user messages compressible to avoid permanent context growth
|
|
57
|
+
"protectUserMessages": false,
|
|
58
|
+
// Auto-protected by DCP: task, skill, todowrite, todoread, compress, batch, plan_enter, plan_exit, write, edit
|
|
59
|
+
// Only list ADDITIONAL tools whose outputs should be appended to compression summaries
|
|
60
|
+
"protectedTools": ["observation", "memory-*", "tilth_*"],
|
|
61
|
+
},
|
|
62
|
+
// Experimental features
|
|
63
|
+
"experimental": {
|
|
64
|
+
// Allow DCP processing in subagent sessions (default: false)
|
|
65
|
+
"allowSubAgents": false,
|
|
66
|
+
// Enable user-editable prompt overrides under dcp-prompts directories
|
|
67
|
+
"customPrompts": false,
|
|
68
|
+
},
|
|
69
|
+
// Automatic pruning strategies (zero LLM cost)
|
|
70
|
+
"strategies": {
|
|
71
|
+
// Removes duplicate tool calls (same tool + same arguments), keeps most recent
|
|
72
|
+
"deduplication": {
|
|
73
|
+
"enabled": true,
|
|
74
|
+
"protectedTools": [],
|
|
75
|
+
},
|
|
76
|
+
// Prunes inputs from errored tool calls after N turns (error messages preserved)
|
|
77
|
+
"purgeErrors": {
|
|
78
|
+
"enabled": true,
|
|
79
|
+
"turns": 4,
|
|
80
|
+
"protectedTools": [],
|
|
81
|
+
},
|
|
82
|
+
},
|
|
72
83
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -96,11 +96,8 @@ const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
|
96
96
|
|
|
97
97
|
// Rate limit handling configuration
|
|
98
98
|
const RATE_LIMIT_CONFIG = {
|
|
99
|
-
maxRetries: 3,
|
|
100
|
-
baseDelayMs: 2000, // Start with 2 seconds
|
|
101
99
|
maxDelayMs: 60000, // Cap at 60 seconds
|
|
102
100
|
defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
|
|
103
|
-
maxFallbacks: 4, // Max model fallback switches per request
|
|
104
101
|
};
|
|
105
102
|
|
|
106
103
|
// Local request shaping to smooth bursts before they hit Copilot limits
|
|
@@ -110,11 +107,6 @@ const REQUEST_SHAPING_CONFIG = {
|
|
|
110
107
|
maxQueueDelayMs: 15000,
|
|
111
108
|
};
|
|
112
109
|
|
|
113
|
-
const CIRCUIT_BREAKER_CONFIG = {
|
|
114
|
-
maxInlineWaitMs: 30000,
|
|
115
|
-
maxRecoveryCycles: 3,
|
|
116
|
-
};
|
|
117
|
-
|
|
118
110
|
// Per-model rate limit state (in-memory, resets on restart)
|
|
119
111
|
interface RateLimitEntry {
|
|
120
112
|
rateLimitedUntil: number; // Unix timestamp (ms)
|
|
@@ -170,14 +162,14 @@ function parseRetryAfter(response: Response): number | null {
|
|
|
170
162
|
return null;
|
|
171
163
|
}
|
|
172
164
|
|
|
173
|
-
function
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
165
|
+
function clampCooldownMs(
|
|
166
|
+
value: number | null | undefined,
|
|
167
|
+
fallbackMs = 0,
|
|
168
|
+
): number {
|
|
169
|
+
return Math.min(
|
|
170
|
+
Math.max(value ?? fallbackMs, 0),
|
|
171
|
+
RATE_LIMIT_CONFIG.maxDelayMs,
|
|
172
|
+
);
|
|
181
173
|
}
|
|
182
174
|
|
|
183
175
|
function getRateLimitRemainingMs(model: string): number | null {
|
|
@@ -217,10 +209,7 @@ function getFamilyCircuitRemainingMs(model: string): number {
|
|
|
217
209
|
|
|
218
210
|
function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
|
|
219
211
|
const key = getFamilyCircuitKey(model);
|
|
220
|
-
familyCircuitBreakerState.set(
|
|
221
|
-
key,
|
|
222
|
-
Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
|
|
223
|
-
);
|
|
212
|
+
familyCircuitBreakerState.set(key, Date.now() + clampCooldownMs(cooldownMs));
|
|
224
213
|
}
|
|
225
214
|
|
|
226
215
|
function getFamilyMaxCooldownRemainingMs(model: string): number {
|
|
@@ -232,14 +221,6 @@ function getFamilyMaxCooldownRemainingMs(model: string): number {
|
|
|
232
221
|
return maxRemaining;
|
|
233
222
|
}
|
|
234
223
|
|
|
235
|
-
function isEntireModelFamilyCoolingDown(model: string): boolean {
|
|
236
|
-
const family = getModelFamily(model);
|
|
237
|
-
return (
|
|
238
|
-
family.length > 0 &&
|
|
239
|
-
family.every((candidate) => isModelRateLimited(candidate))
|
|
240
|
-
);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
224
|
function formatRetryAfter(seconds: number): string {
|
|
244
225
|
if (seconds < 60) return `${seconds}s`;
|
|
245
226
|
const mins = Math.floor(seconds / 60);
|
|
@@ -319,50 +300,16 @@ async function shapeRequestForModel(model: string): Promise<void> {
|
|
|
319
300
|
}
|
|
320
301
|
|
|
321
302
|
function markModelRateLimited(model: string, cooldownMs: number): void {
|
|
303
|
+
const boundedCooldownMs = clampCooldownMs(cooldownMs);
|
|
322
304
|
rateLimitState.set(model, {
|
|
323
|
-
rateLimitedUntil: Date.now() +
|
|
305
|
+
rateLimitedUntil: Date.now() + boundedCooldownMs,
|
|
324
306
|
});
|
|
325
307
|
log(
|
|
326
308
|
"info",
|
|
327
|
-
`Marked ${model} as rate-limited for ${Math.round(
|
|
309
|
+
`Marked ${model} as rate-limited for ${Math.round(boundedCooldownMs / 1000)}s`,
|
|
328
310
|
);
|
|
329
311
|
}
|
|
330
312
|
|
|
331
|
-
/**
|
|
332
|
-
* Find the next available fallback model in the same family.
|
|
333
|
-
* Skips models that are themselves rate-limited.
|
|
334
|
-
*/
|
|
335
|
-
function getNextFallbackModel(
|
|
336
|
-
model: string,
|
|
337
|
-
attemptedModels: Set<string>,
|
|
338
|
-
): string | null {
|
|
339
|
-
const chain = MODEL_FALLBACK_CHAINS[model];
|
|
340
|
-
if (!chain) return null;
|
|
341
|
-
for (const fallback of chain) {
|
|
342
|
-
if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
|
|
343
|
-
return fallback;
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
return null;
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
/**
|
|
350
|
-
* Swap the model field in a fetch RequestInit body.
|
|
351
|
-
*/
|
|
352
|
-
function swapModelInBody(
|
|
353
|
-
init: RequestInit | undefined,
|
|
354
|
-
newModel: string,
|
|
355
|
-
): RequestInit | undefined {
|
|
356
|
-
if (!init?.body || typeof init.body !== "string") return init;
|
|
357
|
-
try {
|
|
358
|
-
const body = JSON.parse(init.body);
|
|
359
|
-
body.model = newModel;
|
|
360
|
-
return { ...init, body: JSON.stringify(body) };
|
|
361
|
-
} catch {
|
|
362
|
-
return init;
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
|
|
366
313
|
// Maximum length for item IDs in the OpenAI Responses API
|
|
367
314
|
const MAX_RESPONSE_API_ID_LENGTH = 64;
|
|
368
315
|
// OpenAI Responses API only allows: letters, numbers, underscores, dashes
|
|
@@ -402,13 +349,18 @@ function sanitizeResponseId(id: string, forcedPrefix?: string): string {
|
|
|
402
349
|
const cleanCore = rawCore.replace(INVALID_ID_CHARS, "_").replace(/_+$/g, "");
|
|
403
350
|
|
|
404
351
|
// Check if any sanitization is actually needed
|
|
405
|
-
const needsSanitization =
|
|
352
|
+
const needsSanitization =
|
|
353
|
+
forcedPrefix ||
|
|
354
|
+
hasInvalidIdChars(rawCore) ||
|
|
406
355
|
id.length > MAX_RESPONSE_API_ID_LENGTH;
|
|
407
356
|
|
|
408
357
|
if (!needsSanitization) return id;
|
|
409
358
|
|
|
410
359
|
// If result fits within length and core is non-empty, use cleaned core directly
|
|
411
|
-
if (
|
|
360
|
+
if (
|
|
361
|
+
cleanCore.length > 0 &&
|
|
362
|
+
prefix.length + cleanCore.length <= MAX_RESPONSE_API_ID_LENGTH
|
|
363
|
+
) {
|
|
412
364
|
return `${prefix}${cleanCore}`;
|
|
413
365
|
}
|
|
414
366
|
|
|
@@ -422,7 +374,10 @@ function sanitizeResponseId(id: string, forcedPrefix?: string): string {
|
|
|
422
374
|
MAX_RESPONSE_API_ID_LENGTH - prefix.length - hashStr.length - 1;
|
|
423
375
|
const middle = cleanCore.slice(0, Math.max(0, maxMiddleLen));
|
|
424
376
|
// Format: prefix + middle + "_" + hash (ensure total <= 64)
|
|
425
|
-
const result = `${prefix}${middle}_${hashStr}`.slice(
|
|
377
|
+
const result = `${prefix}${middle}_${hashStr}`.slice(
|
|
378
|
+
0,
|
|
379
|
+
MAX_RESPONSE_API_ID_LENGTH,
|
|
380
|
+
);
|
|
426
381
|
// Strip trailing underscores from truncation
|
|
427
382
|
return result.replace(/_+$/, "");
|
|
428
383
|
}
|
|
@@ -480,7 +435,11 @@ function sanitizeResponseInputIds(input: any[]): any[] {
|
|
|
480
435
|
|
|
481
436
|
// Check for wrong prefix (e.g., function_call with "h_" instead of "fc_")
|
|
482
437
|
const expectedPrefix = getExpectedPrefix(item);
|
|
483
|
-
if (
|
|
438
|
+
if (
|
|
439
|
+
expectedPrefix &&
|
|
440
|
+
typeof item.id === "string" &&
|
|
441
|
+
!idRemap.has(item.id)
|
|
442
|
+
) {
|
|
484
443
|
const newId = sanitizeResponseId(item.id, expectedPrefix);
|
|
485
444
|
if (newId !== item.id) {
|
|
486
445
|
idRemap.set(item.id, newId);
|
|
@@ -530,26 +489,16 @@ function sanitizeResponseInputIds(input: any[]): any[] {
|
|
|
530
489
|
if (typeof sanitized.id === "string" && idRemap.has(sanitized.id)) {
|
|
531
490
|
sanitized.id = idRemap.get(sanitized.id);
|
|
532
491
|
}
|
|
533
|
-
if (
|
|
492
|
+
if (
|
|
493
|
+
typeof sanitized.call_id === "string" &&
|
|
494
|
+
idRemap.has(sanitized.call_id)
|
|
495
|
+
) {
|
|
534
496
|
sanitized.call_id = idRemap.get(sanitized.call_id);
|
|
535
497
|
}
|
|
536
498
|
return sanitized;
|
|
537
499
|
});
|
|
538
500
|
}
|
|
539
501
|
|
|
540
|
-
/**
|
|
541
|
-
* Retries: 2s, 4s, 8s (with jitter)
|
|
542
|
-
*/
|
|
543
|
-
function calculateRetryDelay(attempt: number): number {
|
|
544
|
-
const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * 2 ** attempt;
|
|
545
|
-
const jitter = Math.random() * 1000; // Add 0-1s random jitter
|
|
546
|
-
const delay = Math.min(
|
|
547
|
-
exponentialDelay + jitter,
|
|
548
|
-
RATE_LIMIT_CONFIG.maxDelayMs,
|
|
549
|
-
);
|
|
550
|
-
return Math.round(delay);
|
|
551
|
-
}
|
|
552
|
-
|
|
553
502
|
export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
554
503
|
// Initialize logger with the SDK client
|
|
555
504
|
setLogger(sdk);
|
|
@@ -742,10 +691,14 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
742
691
|
])
|
|
743
692
|
.filter(Boolean);
|
|
744
693
|
if (rawIds.length > 0) {
|
|
745
|
-
log(
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
694
|
+
log(
|
|
695
|
+
"debug",
|
|
696
|
+
"[ID-SANITIZE] Raw input IDs before sanitization",
|
|
697
|
+
{
|
|
698
|
+
ids: rawIds,
|
|
699
|
+
count: rawIds.length,
|
|
700
|
+
},
|
|
701
|
+
);
|
|
749
702
|
}
|
|
750
703
|
|
|
751
704
|
// Sanitize IDs from Copilot backend:
|
|
@@ -753,7 +706,9 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
753
706
|
// 2. Excessive length — Copilot returns 400+ char IDs (max is 64)
|
|
754
707
|
const sanitizedInput = sanitizeResponseInputIds(body.input);
|
|
755
708
|
const refDiffers = sanitizedInput !== body.input;
|
|
756
|
-
const jsonDiffers =
|
|
709
|
+
const jsonDiffers =
|
|
710
|
+
refDiffers &&
|
|
711
|
+
JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
|
|
757
712
|
const inputWasSanitized = refDiffers && jsonDiffers;
|
|
758
713
|
|
|
759
714
|
log("debug", "[ID-SANITIZE] Sanitization result", {
|
|
@@ -764,26 +719,40 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
764
719
|
|
|
765
720
|
if (inputWasSanitized) {
|
|
766
721
|
const fixes = body.input
|
|
767
|
-
.map((item: any, i: number) => ({
|
|
768
|
-
|
|
769
|
-
|
|
722
|
+
.map((item: any, i: number) => ({
|
|
723
|
+
item,
|
|
724
|
+
i,
|
|
725
|
+
si: sanitizedInput[i],
|
|
726
|
+
}))
|
|
727
|
+
.filter(
|
|
728
|
+
({ item, si }: any) =>
|
|
729
|
+
item &&
|
|
730
|
+
si &&
|
|
731
|
+
(item.id !== si.id || item.call_id !== si.call_id),
|
|
770
732
|
);
|
|
771
|
-
log(
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
733
|
+
log(
|
|
734
|
+
"info",
|
|
735
|
+
"[ID-SANITIZE] Fixed IDs in Responses API input",
|
|
736
|
+
{
|
|
737
|
+
items_fixed: fixes.length,
|
|
738
|
+
fixes: fixes.map(({ item, si }: any) => ({
|
|
739
|
+
type: item.type,
|
|
740
|
+
old_id: item.id,
|
|
741
|
+
new_id: si?.id,
|
|
742
|
+
old_call_id: item.call_id,
|
|
743
|
+
new_call_id: si?.call_id,
|
|
744
|
+
})),
|
|
745
|
+
},
|
|
746
|
+
);
|
|
781
747
|
modifiedBody = {
|
|
782
748
|
...(modifiedBody || body),
|
|
783
749
|
input: sanitizedInput,
|
|
784
750
|
};
|
|
785
751
|
} else {
|
|
786
|
-
log(
|
|
752
|
+
log(
|
|
753
|
+
"debug",
|
|
754
|
+
"[ID-SANITIZE] No sanitization needed — all IDs valid",
|
|
755
|
+
);
|
|
787
756
|
}
|
|
788
757
|
|
|
789
758
|
isAgentCall = (sanitizedInput || body.input).some(
|
|
@@ -856,369 +825,60 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
|
|
|
856
825
|
currentModel = bodyObj?.model || "";
|
|
857
826
|
} catch {}
|
|
858
827
|
|
|
859
|
-
// Pre-flight: if current model is
|
|
860
|
-
|
|
861
|
-
const attemptedModels = new Set<string>();
|
|
862
|
-
if (currentModel) attemptedModels.add(currentModel);
|
|
863
|
-
const requestedModel = currentModel;
|
|
828
|
+
// Pre-flight: fail fast if current model family is cooling down
|
|
829
|
+
const activeFinalInit: RequestInit = finalInit;
|
|
864
830
|
if (currentModel) {
|
|
865
|
-
const
|
|
866
|
-
getFamilyCircuitRemainingMs(currentModel)
|
|
867
|
-
|
|
868
|
-
if (
|
|
869
|
-
circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
870
|
-
) {
|
|
871
|
-
log(
|
|
872
|
-
"info",
|
|
873
|
-
`Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
|
|
874
|
-
);
|
|
875
|
-
await sleep(circuitRemainingMs);
|
|
876
|
-
} else {
|
|
877
|
-
throw new Error(
|
|
878
|
-
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
|
|
879
|
-
);
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
if (currentModel && isModelRateLimited(currentModel)) {
|
|
884
|
-
const fallback = getNextFallbackModel(
|
|
885
|
-
currentModel,
|
|
886
|
-
attemptedModels,
|
|
831
|
+
const familyCooldownMs = Math.max(
|
|
832
|
+
getFamilyCircuitRemainingMs(currentModel),
|
|
833
|
+
getFamilyMaxCooldownRemainingMs(currentModel),
|
|
887
834
|
);
|
|
888
|
-
if (
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
`Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
|
|
835
|
+
if (familyCooldownMs > 0) {
|
|
836
|
+
throw new Error(
|
|
837
|
+
`[Copilot] Rate limited: all fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
892
838
|
);
|
|
893
|
-
activeFinalInit =
|
|
894
|
-
swapModelInBody(finalInit, fallback) || finalInit;
|
|
895
|
-
currentModel = fallback;
|
|
896
|
-
attemptedModels.add(fallback);
|
|
897
|
-
} else {
|
|
898
|
-
const familyCooldownMs =
|
|
899
|
-
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
900
|
-
openFamilyCircuitBreaker(currentModel, familyCooldownMs);
|
|
901
|
-
if (
|
|
902
|
-
familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
903
|
-
) {
|
|
904
|
-
log(
|
|
905
|
-
"info",
|
|
906
|
-
`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
|
|
907
|
-
);
|
|
908
|
-
await sleep(familyCooldownMs);
|
|
909
|
-
attemptedModels.clear();
|
|
910
|
-
if (currentModel) attemptedModels.add(currentModel);
|
|
911
|
-
} else {
|
|
912
|
-
throw new Error(
|
|
913
|
-
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
914
|
-
);
|
|
915
|
-
}
|
|
916
839
|
}
|
|
917
840
|
}
|
|
918
841
|
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
let attempted400Recovery = false;
|
|
925
|
-
|
|
926
|
-
while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
|
|
927
|
-
try {
|
|
928
|
-
if (currentModel) {
|
|
929
|
-
await shapeRequestForModel(currentModel);
|
|
930
|
-
}
|
|
931
|
-
const response = await fetch(input, activeFinalInit);
|
|
932
|
-
|
|
933
|
-
if (response.status === 429) {
|
|
934
|
-
try {
|
|
935
|
-
await response.body?.cancel();
|
|
936
|
-
} catch {}
|
|
937
|
-
|
|
938
|
-
// Parse Retry-After header for server-suggested cooldown
|
|
939
|
-
const retryAfterMs = parseRetryAfter(response);
|
|
940
|
-
const cooldownMs =
|
|
941
|
-
retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
|
|
942
|
-
|
|
943
|
-
// Mark this model as rate-limited
|
|
944
|
-
if (currentModel) {
|
|
945
|
-
markModelRateLimited(currentModel, cooldownMs);
|
|
946
|
-
}
|
|
947
|
-
|
|
948
|
-
// Try fallback model (doesn't count against retry budget)
|
|
949
|
-
if (
|
|
950
|
-
currentModel &&
|
|
951
|
-
fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
|
|
952
|
-
) {
|
|
953
|
-
const fallback = getNextFallbackModel(
|
|
954
|
-
currentModel,
|
|
955
|
-
attemptedModels,
|
|
956
|
-
);
|
|
957
|
-
if (fallback) {
|
|
958
|
-
log(
|
|
959
|
-
"warn",
|
|
960
|
-
`Rate limited on ${currentModel}, switching to ${fallback}`,
|
|
961
|
-
{
|
|
962
|
-
retry_after_ms: retryAfterMs,
|
|
963
|
-
cooldown_ms: cooldownMs,
|
|
964
|
-
fallbacks_used: fallbacksUsed + 1,
|
|
965
|
-
},
|
|
966
|
-
);
|
|
967
|
-
activeFinalInit =
|
|
968
|
-
swapModelInBody(activeFinalInit, fallback) ||
|
|
969
|
-
activeFinalInit;
|
|
970
|
-
currentModel = fallback;
|
|
971
|
-
attemptedModels.add(fallback);
|
|
972
|
-
fallbacksUsed++;
|
|
973
|
-
continue; // Retry immediately with new model, no delay
|
|
974
|
-
}
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
// No fallback available — use exponential backoff on same model
|
|
978
|
-
if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
|
|
979
|
-
if (
|
|
980
|
-
currentModel &&
|
|
981
|
-
isEntireModelFamilyCoolingDown(currentModel)
|
|
982
|
-
) {
|
|
983
|
-
const familyCooldownMs =
|
|
984
|
-
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
985
|
-
openFamilyCircuitBreaker(currentModel, familyCooldownMs);
|
|
986
|
-
if (
|
|
987
|
-
familyCooldownMs <=
|
|
988
|
-
CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
|
|
989
|
-
) {
|
|
990
|
-
log(
|
|
991
|
-
"info",
|
|
992
|
-
`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
|
|
993
|
-
);
|
|
994
|
-
await sleep(familyCooldownMs);
|
|
995
|
-
attemptedModels.clear();
|
|
996
|
-
if (currentModel) attemptedModels.add(currentModel);
|
|
997
|
-
attempt++;
|
|
998
|
-
continue;
|
|
999
|
-
}
|
|
1000
|
-
throw new Error(
|
|
1001
|
-
`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
|
|
1002
|
-
);
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
const modelCooldownMs = currentModel
|
|
1006
|
-
? getRateLimitRemainingMs(currentModel)
|
|
1007
|
-
: null;
|
|
1008
|
-
const delay = Math.min(
|
|
1009
|
-
modelCooldownMs ??
|
|
1010
|
-
retryAfterMs ??
|
|
1011
|
-
calculateRetryDelay(attempt),
|
|
1012
|
-
RATE_LIMIT_CONFIG.maxDelayMs,
|
|
1013
|
-
);
|
|
1014
|
-
log(
|
|
1015
|
-
"warn",
|
|
1016
|
-
`Rate limited (429), no fallback available, waiting ${delay}ms`,
|
|
1017
|
-
{
|
|
1018
|
-
delay_ms: delay,
|
|
1019
|
-
attempt: attempt + 1,
|
|
1020
|
-
max_retries: RATE_LIMIT_CONFIG.maxRetries,
|
|
1021
|
-
fallbacks_exhausted: true,
|
|
1022
|
-
},
|
|
1023
|
-
);
|
|
1024
|
-
await sleep(delay);
|
|
1025
|
-
attemptedModels.clear();
|
|
1026
|
-
if (currentModel) attemptedModels.add(currentModel);
|
|
1027
|
-
attempt++;
|
|
1028
|
-
continue;
|
|
1029
|
-
}
|
|
1030
|
-
|
|
1031
|
-
// Exhausted retries and fallbacks
|
|
1032
|
-
if (currentModel) {
|
|
1033
|
-
const familyCooldownMs =
|
|
1034
|
-
getFamilyMaxCooldownRemainingMs(currentModel);
|
|
1035
|
-
const recoveryDelayMs =
|
|
1036
|
-
familyCooldownMs > 0
|
|
1037
|
-
? Math.min(
|
|
1038
|
-
familyCooldownMs,
|
|
1039
|
-
CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
|
|
1040
|
-
)
|
|
1041
|
-
: calculateRetryDelay(0);
|
|
1042
|
-
if (
|
|
1043
|
-
recoveryDelayMs > 0 &&
|
|
1044
|
-
recoveryCyclesUsed <
|
|
1045
|
-
CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
|
|
1046
|
-
) {
|
|
1047
|
-
recoveryCyclesUsed++;
|
|
1048
|
-
log(
|
|
1049
|
-
"info",
|
|
1050
|
-
`Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
|
|
1051
|
-
);
|
|
1052
|
-
await sleep(recoveryDelayMs);
|
|
1053
|
-
attempt = 0;
|
|
1054
|
-
fallbacksUsed = 0;
|
|
1055
|
-
if (requestedModel) {
|
|
1056
|
-
currentModel = requestedModel;
|
|
1057
|
-
activeFinalInit =
|
|
1058
|
-
swapModelInBody(finalInit, requestedModel) ||
|
|
1059
|
-
finalInit;
|
|
1060
|
-
}
|
|
1061
|
-
attemptedModels.clear();
|
|
1062
|
-
if (currentModel) attemptedModels.add(currentModel);
|
|
1063
|
-
continue;
|
|
1064
|
-
}
|
|
1065
|
-
}
|
|
1066
|
-
throw new Error(
|
|
1067
|
-
`[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
|
|
1068
|
-
);
|
|
1069
|
-
}
|
|
1070
|
-
|
|
1071
|
-
// Handle 400 Bad Request with auto-recovery
|
|
1072
|
-
if (response.status === 400 && !attempted400Recovery) {
|
|
1073
|
-
let errorDetail = "Bad Request";
|
|
1074
|
-
try {
|
|
1075
|
-
const clonedResponse = response.clone();
|
|
1076
|
-
const errorBody = await clonedResponse.json();
|
|
1077
|
-
errorDetail =
|
|
1078
|
-
errorBody?.error?.message ||
|
|
1079
|
-
errorBody?.message ||
|
|
1080
|
-
"Bad Request";
|
|
1081
|
-
} catch {}
|
|
1082
|
-
|
|
1083
|
-
log(
|
|
1084
|
-
"warn",
|
|
1085
|
-
`[400-RECOVERY] Bad Request from Copilot API`,
|
|
1086
|
-
{
|
|
1087
|
-
model: currentModel,
|
|
1088
|
-
error_detail: errorDetail,
|
|
1089
|
-
attempt,
|
|
1090
|
-
},
|
|
1091
|
-
);
|
|
1092
|
-
|
|
1093
|
-
// Check for recoverable 400 causes
|
|
1094
|
-
const isThinkingBlockError =
|
|
1095
|
-
/thinking.?block|invalid.*signature|reasoning.*invalid/i.test(
|
|
1096
|
-
errorDetail,
|
|
1097
|
-
);
|
|
1098
|
-
const isIdError =
|
|
1099
|
-
/invalid.*\bid\b|item.*\bid\b|unknown.*\bid\b|malformed.*\bid\b/i.test(
|
|
1100
|
-
errorDetail,
|
|
1101
|
-
);
|
|
1102
|
-
|
|
1103
|
-
if (isThinkingBlockError || isIdError) {
|
|
1104
|
-
let bodyObj: any;
|
|
1105
|
-
try {
|
|
1106
|
-
bodyObj =
|
|
1107
|
-
typeof activeFinalInit.body === "string"
|
|
1108
|
-
? JSON.parse(activeFinalInit.body)
|
|
1109
|
-
: activeFinalInit.body;
|
|
1110
|
-
} catch {
|
|
1111
|
-
// Can't parse body — not recoverable
|
|
1112
|
-
log(
|
|
1113
|
-
"warn",
|
|
1114
|
-
`[400-RECOVERY] Cannot parse request body, giving up`,
|
|
1115
|
-
);
|
|
1116
|
-
return response;
|
|
1117
|
-
}
|
|
1118
|
-
|
|
1119
|
-
// Cancel original response body only after confirming we can recover
|
|
1120
|
-
try {
|
|
1121
|
-
await response.body?.cancel();
|
|
1122
|
-
} catch {}
|
|
1123
|
-
|
|
1124
|
-
if (isThinkingBlockError && bodyObj?.messages) {
|
|
1125
|
-
// Strip ALL thinking/reasoning content aggressively
|
|
1126
|
-
bodyObj.messages = bodyObj.messages.map(
|
|
1127
|
-
(msg: any) => {
|
|
1128
|
-
if (msg.role !== "assistant") return msg;
|
|
1129
|
-
const {
|
|
1130
|
-
reasoning_text: _rt,
|
|
1131
|
-
reasoning_opaque: _ro,
|
|
1132
|
-
...cleaned
|
|
1133
|
-
} = msg;
|
|
1134
|
-
if (Array.isArray(cleaned.content)) {
|
|
1135
|
-
cleaned.content = cleaned.content.filter(
|
|
1136
|
-
(part: any) => part.type !== "thinking",
|
|
1137
|
-
);
|
|
1138
|
-
if (cleaned.content.length === 0)
|
|
1139
|
-
cleaned.content = null;
|
|
1140
|
-
}
|
|
1141
|
-
return cleaned;
|
|
1142
|
-
},
|
|
1143
|
-
);
|
|
1144
|
-
delete bodyObj.thinking_budget;
|
|
1145
|
-
recovered = true;
|
|
1146
|
-
log(
|
|
1147
|
-
"info",
|
|
1148
|
-
`[400-RECOVERY] Stripped all thinking/reasoning content for retry`,
|
|
1149
|
-
);
|
|
1150
|
-
}
|
|
1151
|
-
|
|
1152
|
-
if (isIdError && bodyObj?.input) {
|
|
1153
|
-
bodyObj.input = sanitizeResponseInputIds(
|
|
1154
|
-
bodyObj.input,
|
|
1155
|
-
);
|
|
1156
|
-
recovered = true;
|
|
1157
|
-
log(
|
|
1158
|
-
"info",
|
|
1159
|
-
`[400-RECOVERY] Re-sanitized Responses API IDs for retry`,
|
|
1160
|
-
);
|
|
1161
|
-
}
|
|
842
|
+
try {
|
|
843
|
+
if (currentModel) {
|
|
844
|
+
await shapeRequestForModel(currentModel);
|
|
845
|
+
}
|
|
846
|
+
const response = await fetch(input, activeFinalInit);
|
|
1162
847
|
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
body: JSON.stringify(bodyObj),
|
|
1168
|
-
};
|
|
1169
|
-
attempt++;
|
|
1170
|
-
continue;
|
|
1171
|
-
}
|
|
1172
|
-
}
|
|
848
|
+
if (response.status === 429) {
|
|
849
|
+
try {
|
|
850
|
+
await response.body?.cancel();
|
|
851
|
+
} catch {}
|
|
1173
852
|
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
}
|
|
853
|
+
const retryAfterMs = parseRetryAfter(response);
|
|
854
|
+
const cooldownMs = clampCooldownMs(
|
|
855
|
+
retryAfterMs,
|
|
856
|
+
RATE_LIMIT_CONFIG.defaultCooldownMs,
|
|
857
|
+
);
|
|
1180
858
|
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
} catch (error) {
|
|
1185
|
-
lastError = error as Error;
|
|
1186
|
-
|
|
1187
|
-
if (
|
|
1188
|
-
lastError.message.includes(
|
|
1189
|
-
"All fallback models cooling down",
|
|
1190
|
-
) ||
|
|
1191
|
-
lastError.message.includes("Local request queue saturated")
|
|
1192
|
-
) {
|
|
1193
|
-
throw lastError;
|
|
859
|
+
if (currentModel) {
|
|
860
|
+
markModelRateLimited(currentModel, cooldownMs);
|
|
861
|
+
openFamilyCircuitBreaker(currentModel, cooldownMs);
|
|
1194
862
|
}
|
|
1195
863
|
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
log("warn", `Request failed, retrying`, {
|
|
1200
|
-
delay_ms: delay,
|
|
1201
|
-
attempt: attempt + 1,
|
|
1202
|
-
max_retries: RATE_LIMIT_CONFIG.maxRetries,
|
|
1203
|
-
error: lastError.message,
|
|
1204
|
-
});
|
|
1205
|
-
await sleep(delay);
|
|
1206
|
-
attempt++;
|
|
1207
|
-
continue;
|
|
1208
|
-
}
|
|
1209
|
-
throw error;
|
|
864
|
+
throw new Error(
|
|
865
|
+
`[Copilot] Rate limited: ${currentModel || "model"} cooling down. Retry in ${formatRetryAfter(Math.ceil(cooldownMs / 1000))}.`,
|
|
866
|
+
);
|
|
1210
867
|
}
|
|
1211
|
-
}
|
|
1212
868
|
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
869
|
+
// Response transformation is handled by the custom SDK at
|
|
870
|
+
// .opencode/plugin/sdk/copilot/
|
|
871
|
+
return response;
|
|
872
|
+
} catch (error) {
|
|
873
|
+
const lastError = error as Error;
|
|
874
|
+
if (
|
|
875
|
+
lastError.message.includes("Rate limited") ||
|
|
876
|
+
lastError.message.includes("Local request queue saturated")
|
|
877
|
+
) {
|
|
878
|
+
throw lastError;
|
|
879
|
+
}
|
|
880
|
+
throw error;
|
|
1218
881
|
}
|
|
1219
|
-
throw new Error(
|
|
1220
|
-
`[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
|
|
1221
|
-
);
|
|
1222
882
|
},
|
|
1223
883
|
};
|
|
1224
884
|
},
|