opencodekit 0.18.25 → 0.18.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,7 +20,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
20
20
 
21
21
  //#endregion
22
22
  //#region package.json
23
- var version = "0.18.25";
23
+ var version = "0.18.26";
24
24
 
25
25
  //#endregion
26
26
  //#region src/utils/license.ts
@@ -1,72 +1,83 @@
1
1
  {
2
- "$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json",
3
- "enabled": true,
4
- "debug": false,
5
- // "minimal" shows prune activity without noise; "detailed" shows token counts
6
- "pruneNotification": "detailed",
7
- // "chat" (in-conversation) or "toast" (system notification)
8
- "pruneNotificationType": "toast",
9
- // Commands: /dcp context, /dcp stats, /dcp sweep, /dcp compress, /dcp decompress, /dcp recompress
10
- "commands": {
11
- "enabled": true,
12
- // Protect these from /dcp sweep
13
- "protectedTools": ["observation", "memory-update", "memory-search"]
14
- },
15
- // Manual mode: disables autonomous context management
16
- "manualMode": {
17
- "enabled": false,
18
- "automaticStrategies": true
19
- },
20
- "turnProtection": {
21
- "enabled": true,
22
- "turns": 4
23
- },
24
- // Protected file patterns - never auto-prune reads of these files
25
- "protectedFilePatterns": [
26
- "**/.env*",
27
- "**/AGENTS.md",
28
- "**/.opencode/**",
29
- "**/.beads/**",
30
- "**/package.json",
31
- "**/tsconfig.json",
32
- "**/biome.json"
33
- ],
34
- "compress": {
35
- // v3.0.0: single compress tool replaces the old 3-tool system
36
- "permission": "allow",
37
- "showCompression": false,
38
- "maxContextLimit": "80%",
39
- "minContextLimit": 50000,
40
- "nudgeFrequency": 5,
41
- "iterationNudgeThreshold": 15,
42
- "nudgeForce": "soft",
43
- "flatSchema": false,
44
- "protectUserMessages": true,
45
- // v3.0.0 auto-protects: task, skill, todowrite, todoread, compress, batch, plan_enter, plan_exit
46
- // Only list additional tools here
47
- "protectedTools": ["write", "edit", "memory-*", "observation", "tilth_*"]
48
- },
49
- // Experimental features
50
- "experimental": {
51
- "allowSubAgents": true,
52
- "customPrompts": false
53
- },
54
- // Auto strategies
55
- "strategies": {
56
- // Dedup = zero LLM cost, high impact - always enable
57
- "deduplication": {
58
- "enabled": true,
59
- "protectedTools": []
60
- },
61
- // Supersede writes = zero cost, removes redundant write inputs after read
62
- "supersedeWrites": {
63
- "enabled": true
64
- },
65
- // Purge error inputs after N turns
66
- "purgeErrors": {
67
- "enabled": true,
68
- "turns": 4,
69
- "protectedTools": []
70
- }
71
- }
2
+ "$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json",
3
+ "enabled": true,
4
+ "debug": false,
5
+ // "off" | "minimal" | "detailed" keep minimal for low-noise dev flow
6
+ "pruneNotification": "minimal",
7
+ // "chat" (in-conversation) or "toast" (system notification)
8
+ "pruneNotificationType": "toast",
9
+ // Slash commands: /dcp context, /dcp stats, /dcp sweep, /dcp compress, /dcp decompress, /dcp recompress
10
+ "commands": {
11
+ "enabled": true,
12
+ // Additional tools to protect from /dcp sweep (supports glob wildcards)
13
+ "protectedTools": ["observation", "memory-*"],
14
+ },
15
+ // Manual mode: disables autonomous context management
16
+ "manualMode": {
17
+ "enabled": false,
18
+ "automaticStrategies": true,
19
+ },
20
+ // Protect recent tool outputs from pruning
21
+ "turnProtection": {
22
+ "enabled": false,
23
+ "turns": 4,
24
+ },
25
+ // Glob patterns for files that should never be auto-pruned
26
+ // Keep tight: broad patterns reduce DCP effectiveness
27
+ "protectedFilePatterns": [
28
+ "**/.env*",
29
+ "**/AGENTS.md",
30
+ "**/.opencode/**",
31
+ "**/.beads/**",
32
+ "**/package.json",
33
+ "**/tsconfig.json",
34
+ ],
35
+ // Unified context compression tool (v3.1.0)
36
+ "compress": {
37
+ // "range" (stable) compresses spans into block summaries
38
+ // "message" (experimental) compresses individual raw messages
39
+ "mode": "message",
40
+ // "allow" (no prompt) | "ask" (prompt) | "deny" (tool not registered)
41
+ "permission": "allow",
42
+ "showCompression": false,
43
+ // v3.1.0: active summary tokens extend effective maxContextLimit
44
+ "summaryBuffer": true,
45
+ // Soft upper threshold: above this, strong compression nudges fire
46
+ // Accepts number or "X%" of model context window
47
+ "maxContextLimit": "80%",
48
+ // Soft lower threshold: below this, turn/iteration reminders are off
49
+ "minContextLimit": "35%",
50
+ // How often context-limit nudge fires above maxContextLimit (1 = every fetch)
51
+ "nudgeFrequency": 5,
52
+ // Messages since last user message before adding compression reminders
53
+ "iterationNudgeThreshold": 15,
54
+ // "strong" = more likely to compress, "soft" = less likely
55
+ "nudgeForce": "soft",
56
+ // Keep user messages compressible to avoid permanent context growth
57
+ "protectUserMessages": false,
58
+ // Auto-protected by DCP: task, skill, todowrite, todoread, compress, batch, plan_enter, plan_exit, write, edit
59
+ // Only list ADDITIONAL tools whose outputs should be appended to compression summaries
60
+ "protectedTools": ["observation", "memory-*", "tilth_*"],
61
+ },
62
+ // Experimental features
63
+ "experimental": {
64
+ // Allow DCP processing in subagent sessions (default: false)
65
+ "allowSubAgents": false,
66
+ // Enable user-editable prompt overrides under dcp-prompts directories
67
+ "customPrompts": false,
68
+ },
69
+ // Automatic pruning strategies (zero LLM cost)
70
+ "strategies": {
71
+ // Removes duplicate tool calls (same tool + same arguments), keeps most recent
72
+ "deduplication": {
73
+ "enabled": true,
74
+ "protectedTools": [],
75
+ },
76
+ // Prunes inputs from errored tool calls after N turns (error messages preserved)
77
+ "purgeErrors": {
78
+ "enabled": true,
79
+ "turns": 4,
80
+ "protectedTools": [],
81
+ },
82
+ },
72
83
  }
Binary file
@@ -11,7 +11,7 @@
11
11
  "type-check": "tsc --noEmit"
12
12
  },
13
13
  "dependencies": {
14
- "@opencode-ai/plugin": "1.3.0"
14
+ "@opencode-ai/plugin": "1.3.2"
15
15
  },
16
16
  "devDependencies": {
17
17
  "@types/node": "^25.3.0",
@@ -96,11 +96,8 @@ const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
96
96
 
97
97
  // Rate limit handling configuration
98
98
  const RATE_LIMIT_CONFIG = {
99
- maxRetries: 3,
100
- baseDelayMs: 2000, // Start with 2 seconds
101
99
  maxDelayMs: 60000, // Cap at 60 seconds
102
100
  defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
103
- maxFallbacks: 4, // Max model fallback switches per request
104
101
  };
105
102
 
106
103
  // Local request shaping to smooth bursts before they hit Copilot limits
@@ -110,11 +107,6 @@ const REQUEST_SHAPING_CONFIG = {
110
107
  maxQueueDelayMs: 15000,
111
108
  };
112
109
 
113
- const CIRCUIT_BREAKER_CONFIG = {
114
- maxInlineWaitMs: 30000,
115
- maxRecoveryCycles: 3,
116
- };
117
-
118
110
  // Per-model rate limit state (in-memory, resets on restart)
119
111
  interface RateLimitEntry {
120
112
  rateLimitedUntil: number; // Unix timestamp (ms)
@@ -170,14 +162,14 @@ function parseRetryAfter(response: Response): number | null {
170
162
  return null;
171
163
  }
172
164
 
173
- function isModelRateLimited(model: string): boolean {
174
- const entry = rateLimitState.get(model);
175
- if (!entry) return false;
176
- if (Date.now() >= entry.rateLimitedUntil) {
177
- rateLimitState.delete(model);
178
- return false;
179
- }
180
- return true;
165
+ function clampCooldownMs(
166
+ value: number | null | undefined,
167
+ fallbackMs = 0,
168
+ ): number {
169
+ return Math.min(
170
+ Math.max(value ?? fallbackMs, 0),
171
+ RATE_LIMIT_CONFIG.maxDelayMs,
172
+ );
181
173
  }
182
174
 
183
175
  function getRateLimitRemainingMs(model: string): number | null {
@@ -217,10 +209,7 @@ function getFamilyCircuitRemainingMs(model: string): number {
217
209
 
218
210
  function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
219
211
  const key = getFamilyCircuitKey(model);
220
- familyCircuitBreakerState.set(
221
- key,
222
- Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
223
- );
212
+ familyCircuitBreakerState.set(key, Date.now() + clampCooldownMs(cooldownMs));
224
213
  }
225
214
 
226
215
  function getFamilyMaxCooldownRemainingMs(model: string): number {
@@ -232,14 +221,6 @@ function getFamilyMaxCooldownRemainingMs(model: string): number {
232
221
  return maxRemaining;
233
222
  }
234
223
 
235
- function isEntireModelFamilyCoolingDown(model: string): boolean {
236
- const family = getModelFamily(model);
237
- return (
238
- family.length > 0 &&
239
- family.every((candidate) => isModelRateLimited(candidate))
240
- );
241
- }
242
-
243
224
  function formatRetryAfter(seconds: number): string {
244
225
  if (seconds < 60) return `${seconds}s`;
245
226
  const mins = Math.floor(seconds / 60);
@@ -319,50 +300,16 @@ async function shapeRequestForModel(model: string): Promise<void> {
319
300
  }
320
301
 
321
302
  function markModelRateLimited(model: string, cooldownMs: number): void {
303
+ const boundedCooldownMs = clampCooldownMs(cooldownMs);
322
304
  rateLimitState.set(model, {
323
- rateLimitedUntil: Date.now() + cooldownMs,
305
+ rateLimitedUntil: Date.now() + boundedCooldownMs,
324
306
  });
325
307
  log(
326
308
  "info",
327
- `Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
309
+ `Marked ${model} as rate-limited for ${Math.round(boundedCooldownMs / 1000)}s`,
328
310
  );
329
311
  }
330
312
 
331
- /**
332
- * Find the next available fallback model in the same family.
333
- * Skips models that are themselves rate-limited.
334
- */
335
- function getNextFallbackModel(
336
- model: string,
337
- attemptedModels: Set<string>,
338
- ): string | null {
339
- const chain = MODEL_FALLBACK_CHAINS[model];
340
- if (!chain) return null;
341
- for (const fallback of chain) {
342
- if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
343
- return fallback;
344
- }
345
- }
346
- return null;
347
- }
348
-
349
- /**
350
- * Swap the model field in a fetch RequestInit body.
351
- */
352
- function swapModelInBody(
353
- init: RequestInit | undefined,
354
- newModel: string,
355
- ): RequestInit | undefined {
356
- if (!init?.body || typeof init.body !== "string") return init;
357
- try {
358
- const body = JSON.parse(init.body);
359
- body.model = newModel;
360
- return { ...init, body: JSON.stringify(body) };
361
- } catch {
362
- return init;
363
- }
364
- }
365
-
366
313
  // Maximum length for item IDs in the OpenAI Responses API
367
314
  const MAX_RESPONSE_API_ID_LENGTH = 64;
368
315
  // OpenAI Responses API only allows: letters, numbers, underscores, dashes
@@ -402,13 +349,18 @@ function sanitizeResponseId(id: string, forcedPrefix?: string): string {
402
349
  const cleanCore = rawCore.replace(INVALID_ID_CHARS, "_").replace(/_+$/g, "");
403
350
 
404
351
  // Check if any sanitization is actually needed
405
- const needsSanitization = forcedPrefix || hasInvalidIdChars(rawCore) ||
352
+ const needsSanitization =
353
+ forcedPrefix ||
354
+ hasInvalidIdChars(rawCore) ||
406
355
  id.length > MAX_RESPONSE_API_ID_LENGTH;
407
356
 
408
357
  if (!needsSanitization) return id;
409
358
 
410
359
  // If result fits within length and core is non-empty, use cleaned core directly
411
- if (cleanCore.length > 0 && (prefix.length + cleanCore.length) <= MAX_RESPONSE_API_ID_LENGTH) {
360
+ if (
361
+ cleanCore.length > 0 &&
362
+ prefix.length + cleanCore.length <= MAX_RESPONSE_API_ID_LENGTH
363
+ ) {
412
364
  return `${prefix}${cleanCore}`;
413
365
  }
414
366
 
@@ -422,7 +374,10 @@ function sanitizeResponseId(id: string, forcedPrefix?: string): string {
422
374
  MAX_RESPONSE_API_ID_LENGTH - prefix.length - hashStr.length - 1;
423
375
  const middle = cleanCore.slice(0, Math.max(0, maxMiddleLen));
424
376
  // Format: prefix + middle + "_" + hash (ensure total <= 64)
425
- const result = `${prefix}${middle}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
377
+ const result = `${prefix}${middle}_${hashStr}`.slice(
378
+ 0,
379
+ MAX_RESPONSE_API_ID_LENGTH,
380
+ );
426
381
  // Strip trailing underscores from truncation
427
382
  return result.replace(/_+$/, "");
428
383
  }
@@ -480,7 +435,11 @@ function sanitizeResponseInputIds(input: any[]): any[] {
480
435
 
481
436
  // Check for wrong prefix (e.g., function_call with "h_" instead of "fc_")
482
437
  const expectedPrefix = getExpectedPrefix(item);
483
- if (expectedPrefix && typeof item.id === "string" && !idRemap.has(item.id)) {
438
+ if (
439
+ expectedPrefix &&
440
+ typeof item.id === "string" &&
441
+ !idRemap.has(item.id)
442
+ ) {
484
443
  const newId = sanitizeResponseId(item.id, expectedPrefix);
485
444
  if (newId !== item.id) {
486
445
  idRemap.set(item.id, newId);
@@ -530,26 +489,16 @@ function sanitizeResponseInputIds(input: any[]): any[] {
530
489
  if (typeof sanitized.id === "string" && idRemap.has(sanitized.id)) {
531
490
  sanitized.id = idRemap.get(sanitized.id);
532
491
  }
533
- if (typeof sanitized.call_id === "string" && idRemap.has(sanitized.call_id)) {
492
+ if (
493
+ typeof sanitized.call_id === "string" &&
494
+ idRemap.has(sanitized.call_id)
495
+ ) {
534
496
  sanitized.call_id = idRemap.get(sanitized.call_id);
535
497
  }
536
498
  return sanitized;
537
499
  });
538
500
  }
539
501
 
540
- /**
541
- * Retries: 2s, 4s, 8s (with jitter)
542
- */
543
- function calculateRetryDelay(attempt: number): number {
544
- const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * 2 ** attempt;
545
- const jitter = Math.random() * 1000; // Add 0-1s random jitter
546
- const delay = Math.min(
547
- exponentialDelay + jitter,
548
- RATE_LIMIT_CONFIG.maxDelayMs,
549
- );
550
- return Math.round(delay);
551
- }
552
-
553
502
  export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
554
503
  // Initialize logger with the SDK client
555
504
  setLogger(sdk);
@@ -742,10 +691,14 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
742
691
  ])
743
692
  .filter(Boolean);
744
693
  if (rawIds.length > 0) {
745
- log("debug", "[ID-SANITIZE] Raw input IDs before sanitization", {
746
- ids: rawIds,
747
- count: rawIds.length,
748
- });
694
+ log(
695
+ "debug",
696
+ "[ID-SANITIZE] Raw input IDs before sanitization",
697
+ {
698
+ ids: rawIds,
699
+ count: rawIds.length,
700
+ },
701
+ );
749
702
  }
750
703
 
751
704
  // Sanitize IDs from Copilot backend:
@@ -753,7 +706,9 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
753
706
  // 2. Excessive length — Copilot returns 400+ char IDs (max is 64)
754
707
  const sanitizedInput = sanitizeResponseInputIds(body.input);
755
708
  const refDiffers = sanitizedInput !== body.input;
756
- const jsonDiffers = refDiffers && JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
709
+ const jsonDiffers =
710
+ refDiffers &&
711
+ JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
757
712
  const inputWasSanitized = refDiffers && jsonDiffers;
758
713
 
759
714
  log("debug", "[ID-SANITIZE] Sanitization result", {
@@ -764,26 +719,40 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
764
719
 
765
720
  if (inputWasSanitized) {
766
721
  const fixes = body.input
767
- .map((item: any, i: number) => ({ item, i, si: sanitizedInput[i] }))
768
- .filter(({ item, si }: any) =>
769
- item && si && (item.id !== si.id || item.call_id !== si.call_id),
722
+ .map((item: any, i: number) => ({
723
+ item,
724
+ i,
725
+ si: sanitizedInput[i],
726
+ }))
727
+ .filter(
728
+ ({ item, si }: any) =>
729
+ item &&
730
+ si &&
731
+ (item.id !== si.id || item.call_id !== si.call_id),
770
732
  );
771
- log("info", "[ID-SANITIZE] Fixed IDs in Responses API input", {
772
- items_fixed: fixes.length,
773
- fixes: fixes.map(({ item, si }: any) => ({
774
- type: item.type,
775
- old_id: item.id,
776
- new_id: si?.id,
777
- old_call_id: item.call_id,
778
- new_call_id: si?.call_id,
779
- })),
780
- });
733
+ log(
734
+ "info",
735
+ "[ID-SANITIZE] Fixed IDs in Responses API input",
736
+ {
737
+ items_fixed: fixes.length,
738
+ fixes: fixes.map(({ item, si }: any) => ({
739
+ type: item.type,
740
+ old_id: item.id,
741
+ new_id: si?.id,
742
+ old_call_id: item.call_id,
743
+ new_call_id: si?.call_id,
744
+ })),
745
+ },
746
+ );
781
747
  modifiedBody = {
782
748
  ...(modifiedBody || body),
783
749
  input: sanitizedInput,
784
750
  };
785
751
  } else {
786
- log("debug", "[ID-SANITIZE] No sanitization needed — all IDs valid");
752
+ log(
753
+ "debug",
754
+ "[ID-SANITIZE] No sanitization needed — all IDs valid",
755
+ );
787
756
  }
788
757
 
789
758
  isAgentCall = (sanitizedInput || body.input).some(
@@ -856,369 +825,60 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
856
825
  currentModel = bodyObj?.model || "";
857
826
  } catch {}
858
827
 
859
- // Pre-flight: if current model is already known rate-limited, switch to fallback
860
- let activeFinalInit: RequestInit = finalInit;
861
- const attemptedModels = new Set<string>();
862
- if (currentModel) attemptedModels.add(currentModel);
863
- const requestedModel = currentModel;
828
+ // Pre-flight: fail fast if current model family is cooling down
829
+ const activeFinalInit: RequestInit = finalInit;
864
830
  if (currentModel) {
865
- const circuitRemainingMs =
866
- getFamilyCircuitRemainingMs(currentModel);
867
- if (circuitRemainingMs > 0) {
868
- if (
869
- circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
870
- ) {
871
- log(
872
- "info",
873
- `Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
874
- );
875
- await sleep(circuitRemainingMs);
876
- } else {
877
- throw new Error(
878
- `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
879
- );
880
- }
881
- }
882
- }
883
- if (currentModel && isModelRateLimited(currentModel)) {
884
- const fallback = getNextFallbackModel(
885
- currentModel,
886
- attemptedModels,
831
+ const familyCooldownMs = Math.max(
832
+ getFamilyCircuitRemainingMs(currentModel),
833
+ getFamilyMaxCooldownRemainingMs(currentModel),
887
834
  );
888
- if (fallback) {
889
- log(
890
- "info",
891
- `Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
835
+ if (familyCooldownMs > 0) {
836
+ throw new Error(
837
+ `[Copilot] Rate limited: all fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
892
838
  );
893
- activeFinalInit =
894
- swapModelInBody(finalInit, fallback) || finalInit;
895
- currentModel = fallback;
896
- attemptedModels.add(fallback);
897
- } else {
898
- const familyCooldownMs =
899
- getFamilyMaxCooldownRemainingMs(currentModel);
900
- openFamilyCircuitBreaker(currentModel, familyCooldownMs);
901
- if (
902
- familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
903
- ) {
904
- log(
905
- "info",
906
- `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
907
- );
908
- await sleep(familyCooldownMs);
909
- attemptedModels.clear();
910
- if (currentModel) attemptedModels.add(currentModel);
911
- } else {
912
- throw new Error(
913
- `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
914
- );
915
- }
916
839
  }
917
840
  }
918
841
 
919
- // Retry logic with model fallback and exponential backoff for rate limiting
920
- let lastError: Error | undefined;
921
- let fallbacksUsed = 0;
922
- let attempt = 0;
923
- let recoveryCyclesUsed = 0;
924
- let attempted400Recovery = false;
925
-
926
- while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
927
- try {
928
- if (currentModel) {
929
- await shapeRequestForModel(currentModel);
930
- }
931
- const response = await fetch(input, activeFinalInit);
932
-
933
- if (response.status === 429) {
934
- try {
935
- await response.body?.cancel();
936
- } catch {}
937
-
938
- // Parse Retry-After header for server-suggested cooldown
939
- const retryAfterMs = parseRetryAfter(response);
940
- const cooldownMs =
941
- retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
942
-
943
- // Mark this model as rate-limited
944
- if (currentModel) {
945
- markModelRateLimited(currentModel, cooldownMs);
946
- }
947
-
948
- // Try fallback model (doesn't count against retry budget)
949
- if (
950
- currentModel &&
951
- fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
952
- ) {
953
- const fallback = getNextFallbackModel(
954
- currentModel,
955
- attemptedModels,
956
- );
957
- if (fallback) {
958
- log(
959
- "warn",
960
- `Rate limited on ${currentModel}, switching to ${fallback}`,
961
- {
962
- retry_after_ms: retryAfterMs,
963
- cooldown_ms: cooldownMs,
964
- fallbacks_used: fallbacksUsed + 1,
965
- },
966
- );
967
- activeFinalInit =
968
- swapModelInBody(activeFinalInit, fallback) ||
969
- activeFinalInit;
970
- currentModel = fallback;
971
- attemptedModels.add(fallback);
972
- fallbacksUsed++;
973
- continue; // Retry immediately with new model, no delay
974
- }
975
- }
976
-
977
- // No fallback available — use exponential backoff on same model
978
- if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
979
- if (
980
- currentModel &&
981
- isEntireModelFamilyCoolingDown(currentModel)
982
- ) {
983
- const familyCooldownMs =
984
- getFamilyMaxCooldownRemainingMs(currentModel);
985
- openFamilyCircuitBreaker(currentModel, familyCooldownMs);
986
- if (
987
- familyCooldownMs <=
988
- CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
989
- ) {
990
- log(
991
- "info",
992
- `All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
993
- );
994
- await sleep(familyCooldownMs);
995
- attemptedModels.clear();
996
- if (currentModel) attemptedModels.add(currentModel);
997
- attempt++;
998
- continue;
999
- }
1000
- throw new Error(
1001
- `[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
1002
- );
1003
- }
1004
-
1005
- const modelCooldownMs = currentModel
1006
- ? getRateLimitRemainingMs(currentModel)
1007
- : null;
1008
- const delay = Math.min(
1009
- modelCooldownMs ??
1010
- retryAfterMs ??
1011
- calculateRetryDelay(attempt),
1012
- RATE_LIMIT_CONFIG.maxDelayMs,
1013
- );
1014
- log(
1015
- "warn",
1016
- `Rate limited (429), no fallback available, waiting ${delay}ms`,
1017
- {
1018
- delay_ms: delay,
1019
- attempt: attempt + 1,
1020
- max_retries: RATE_LIMIT_CONFIG.maxRetries,
1021
- fallbacks_exhausted: true,
1022
- },
1023
- );
1024
- await sleep(delay);
1025
- attemptedModels.clear();
1026
- if (currentModel) attemptedModels.add(currentModel);
1027
- attempt++;
1028
- continue;
1029
- }
1030
-
1031
- // Exhausted retries and fallbacks
1032
- if (currentModel) {
1033
- const familyCooldownMs =
1034
- getFamilyMaxCooldownRemainingMs(currentModel);
1035
- const recoveryDelayMs =
1036
- familyCooldownMs > 0
1037
- ? Math.min(
1038
- familyCooldownMs,
1039
- CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
1040
- )
1041
- : calculateRetryDelay(0);
1042
- if (
1043
- recoveryDelayMs > 0 &&
1044
- recoveryCyclesUsed <
1045
- CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
1046
- ) {
1047
- recoveryCyclesUsed++;
1048
- log(
1049
- "info",
1050
- `Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
1051
- );
1052
- await sleep(recoveryDelayMs);
1053
- attempt = 0;
1054
- fallbacksUsed = 0;
1055
- if (requestedModel) {
1056
- currentModel = requestedModel;
1057
- activeFinalInit =
1058
- swapModelInBody(finalInit, requestedModel) ||
1059
- finalInit;
1060
- }
1061
- attemptedModels.clear();
1062
- if (currentModel) attemptedModels.add(currentModel);
1063
- continue;
1064
- }
1065
- }
1066
- throw new Error(
1067
- `[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
1068
- );
1069
- }
1070
-
1071
- // Handle 400 Bad Request with auto-recovery
1072
- if (response.status === 400 && !attempted400Recovery) {
1073
- let errorDetail = "Bad Request";
1074
- try {
1075
- const clonedResponse = response.clone();
1076
- const errorBody = await clonedResponse.json();
1077
- errorDetail =
1078
- errorBody?.error?.message ||
1079
- errorBody?.message ||
1080
- "Bad Request";
1081
- } catch {}
1082
-
1083
- log(
1084
- "warn",
1085
- `[400-RECOVERY] Bad Request from Copilot API`,
1086
- {
1087
- model: currentModel,
1088
- error_detail: errorDetail,
1089
- attempt,
1090
- },
1091
- );
1092
-
1093
- // Check for recoverable 400 causes
1094
- const isThinkingBlockError =
1095
- /thinking.?block|invalid.*signature|reasoning.*invalid/i.test(
1096
- errorDetail,
1097
- );
1098
- const isIdError =
1099
- /invalid.*\bid\b|item.*\bid\b|unknown.*\bid\b|malformed.*\bid\b/i.test(
1100
- errorDetail,
1101
- );
1102
-
1103
- if (isThinkingBlockError || isIdError) {
1104
- let bodyObj: any;
1105
- try {
1106
- bodyObj =
1107
- typeof activeFinalInit.body === "string"
1108
- ? JSON.parse(activeFinalInit.body)
1109
- : activeFinalInit.body;
1110
- } catch {
1111
- // Can't parse body — not recoverable
1112
- log(
1113
- "warn",
1114
- `[400-RECOVERY] Cannot parse request body, giving up`,
1115
- );
1116
- return response;
1117
- }
1118
-
1119
- // Cancel original response body only after confirming we can recover
1120
- try {
1121
- await response.body?.cancel();
1122
- } catch {}
1123
-
1124
- if (isThinkingBlockError && bodyObj?.messages) {
1125
- // Strip ALL thinking/reasoning content aggressively
1126
- bodyObj.messages = bodyObj.messages.map(
1127
- (msg: any) => {
1128
- if (msg.role !== "assistant") return msg;
1129
- const {
1130
- reasoning_text: _rt,
1131
- reasoning_opaque: _ro,
1132
- ...cleaned
1133
- } = msg;
1134
- if (Array.isArray(cleaned.content)) {
1135
- cleaned.content = cleaned.content.filter(
1136
- (part: any) => part.type !== "thinking",
1137
- );
1138
- if (cleaned.content.length === 0)
1139
- cleaned.content = null;
1140
- }
1141
- return cleaned;
1142
- },
1143
- );
1144
- delete bodyObj.thinking_budget;
1145
- recovered = true;
1146
- log(
1147
- "info",
1148
- `[400-RECOVERY] Stripped all thinking/reasoning content for retry`,
1149
- );
1150
- }
1151
-
1152
- if (isIdError && bodyObj?.input) {
1153
- bodyObj.input = sanitizeResponseInputIds(
1154
- bodyObj.input,
1155
- );
1156
- recovered = true;
1157
- log(
1158
- "info",
1159
- `[400-RECOVERY] Re-sanitized Responses API IDs for retry`,
1160
- );
1161
- }
842
+ try {
843
+ if (currentModel) {
844
+ await shapeRequestForModel(currentModel);
845
+ }
846
+ const response = await fetch(input, activeFinalInit);
1162
847
 
1163
- if (recovered) {
1164
- attempted400Recovery = true;
1165
- activeFinalInit = {
1166
- ...activeFinalInit,
1167
- body: JSON.stringify(bodyObj),
1168
- };
1169
- attempt++;
1170
- continue;
1171
- }
1172
- }
848
+ if (response.status === 429) {
849
+ try {
850
+ await response.body?.cancel();
851
+ } catch {}
1173
852
 
1174
- // Not recoverable — log detail and return original response
1175
- log(
1176
- "warn",
1177
- `[400-RECOVERY] Non-recoverable 400: ${errorDetail}`,
1178
- );
1179
- }
853
+ const retryAfterMs = parseRetryAfter(response);
854
+ const cooldownMs = clampCooldownMs(
855
+ retryAfterMs,
856
+ RATE_LIMIT_CONFIG.defaultCooldownMs,
857
+ );
1180
858
 
1181
- // Response transformation is handled by the custom SDK at
1182
- // .opencode/plugin/sdk/copilot/
1183
- return response;
1184
- } catch (error) {
1185
- lastError = error as Error;
1186
-
1187
- if (
1188
- lastError.message.includes(
1189
- "All fallback models cooling down",
1190
- ) ||
1191
- lastError.message.includes("Local request queue saturated")
1192
- ) {
1193
- throw lastError;
859
+ if (currentModel) {
860
+ markModelRateLimited(currentModel, cooldownMs);
861
+ openFamilyCircuitBreaker(currentModel, cooldownMs);
1194
862
  }
1195
863
 
1196
- // Network errors might be transient, retry
1197
- if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
1198
- const delay = calculateRetryDelay(attempt);
1199
- log("warn", `Request failed, retrying`, {
1200
- delay_ms: delay,
1201
- attempt: attempt + 1,
1202
- max_retries: RATE_LIMIT_CONFIG.maxRetries,
1203
- error: lastError.message,
1204
- });
1205
- await sleep(delay);
1206
- attempt++;
1207
- continue;
1208
- }
1209
- throw error;
864
+ throw new Error(
865
+ `[Copilot] Rate limited: ${currentModel || "model"} cooling down. Retry in ${formatRetryAfter(Math.ceil(cooldownMs / 1000))}.`,
866
+ );
1210
867
  }
1211
- }
1212
868
 
1213
- // Exhausted all retries
1214
- if (lastError) {
1215
- throw new Error(
1216
- `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
1217
- );
869
+ // Response transformation is handled by the custom SDK at
870
+ // .opencode/plugin/sdk/copilot/
871
+ return response;
872
+ } catch (error) {
873
+ const lastError = error as Error;
874
+ if (
875
+ lastError.message.includes("Rate limited") ||
876
+ lastError.message.includes("Local request queue saturated")
877
+ ) {
878
+ throw lastError;
879
+ }
880
+ throw error;
1218
881
  }
1219
- throw new Error(
1220
- `[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
1221
- );
1222
882
  },
1223
883
  };
1224
884
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencodekit",
3
- "version": "0.18.25",
3
+ "version": "0.18.26",
4
4
  "description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
5
5
  "keywords": [
6
6
  "agents",