opencodekit 0.20.0 → 0.20.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,7 +20,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
20
20
 
21
21
  //#endregion
22
22
  //#region package.json
23
- var version = "0.20.0";
23
+ var version = "0.20.2";
24
24
 
25
25
  //#endregion
26
26
  //#region src/utils/license.ts
@@ -24,11 +24,11 @@
24
24
  },
25
25
  // Glob patterns for files that should never be auto-pruned
26
26
  // Keep tight: broad patterns reduce DCP effectiveness
27
+ // .opencode/** and .beads/** removed — memory-* and tilth_* outputs
28
+ // already survive compression via compress.protectedTools
27
29
  "protectedFilePatterns": [
28
30
  "**/.env*",
29
31
  "**/AGENTS.md",
30
- "**/.opencode/**",
31
- "**/.beads/**",
32
32
  "**/package.json",
33
33
  "**/tsconfig.json"
34
34
  ],
@@ -43,34 +43,41 @@
43
43
  // v3.1.0: active summary tokens extend effective maxContextLimit
44
44
  "summaryBuffer": true,
45
45
  // Soft upper threshold: above this, strong compression nudges fire
46
- // Accepts number or "X%" of model context window
47
- "maxContextLimit": "80%",
46
+ // Use numeric values percentage requires modelContextLimit from runtime
47
+ // which may be unavailable for some provider/model combos (e.g. GitHub Copilot)
48
+ // Rule: must be BELOW OpenCode emergency threshold (model_max - reserved - max_output)
49
+ // For Copilot Claude (216k ctx, 64k out, 16k reserved): emergency = 136k
50
+ // So DCP must start compressing well before 136k
51
+ "maxContextLimit": 100000,
48
52
  // Per-model override for maxContextLimit (takes priority over global)
49
- // Aligned to claude-opus-4.6 (216k context, 64k output) as primary build agent
50
53
  "modelMaxLimits": {
51
- "github-copilot/claude-opus-4.6": 192000,
52
- "github-copilot/claude-opus-4.5": 192000,
53
- "github-copilot/claude-sonnet-4.6": 192000,
54
- "github-copilot/claude-sonnet-4.5": 192000,
55
- "github-copilot/claude-sonnet-4": 192000,
56
- "github-copilot/claude-haiku-4.5": 172000,
57
- "github-copilot/gpt-5.4": 192000,
58
- "github-copilot/gpt-5.3-codex": 192000,
59
- "github-copilot/gemini-3.1-pro-preview": 192000
54
+ // Claude: 216k ctx, 64k out → emergency at 136k → DCP starts at 110k
55
+ "github-copilot/claude-opus-4.6": 110000,
56
+ "github-copilot/claude-opus-4.5": 110000,
57
+ "github-copilot/claude-sonnet-4.6": 110000,
58
+ "github-copilot/claude-sonnet-4.5": 110000,
59
+ "github-copilot/claude-sonnet-4": 110000,
60
+ // Haiku: smaller model, be more conservative
61
+ "github-copilot/claude-haiku-4.5": 90000,
62
+ // GPT/Gemini: assume similar 200k+ windows
63
+ "github-copilot/gpt-5.4": 110000,
64
+ "github-copilot/gpt-5.3-codex": 110000,
65
+ "github-copilot/gemini-3.1-pro-preview": 110000
60
66
  },
61
67
  // Soft lower threshold: below this, turn/iteration reminders are off
62
- "minContextLimit": "35%",
68
+ // Use numeric values — same reason as maxContextLimit above
69
+ "minContextLimit": 50000,
63
70
  // Per-model override for minContextLimit (takes priority over global)
64
71
  "modelMinLimits": {
65
- "github-copilot/claude-opus-4.6": "30%",
66
- "github-copilot/claude-opus-4.5": "35%",
67
- "github-copilot/claude-sonnet-4.6": "35%",
68
- "github-copilot/claude-sonnet-4.5": "35%",
69
- "github-copilot/claude-sonnet-4": "35%",
70
- "github-copilot/claude-haiku-4.5": "25%",
71
- "github-copilot/gpt-5.4": "30%",
72
- "github-copilot/gpt-5.3-codex": "30%",
73
- "github-copilot/gemini-3.1-pro-preview": "30%"
72
+ "github-copilot/claude-opus-4.6": 65000,
73
+ "github-copilot/claude-opus-4.5": 65000,
74
+ "github-copilot/claude-sonnet-4.6": 65000,
75
+ "github-copilot/claude-sonnet-4.5": 65000,
76
+ "github-copilot/claude-sonnet-4": 65000,
77
+ "github-copilot/claude-haiku-4.5": 50000,
78
+ "github-copilot/gpt-5.4": 65000,
79
+ "github-copilot/gpt-5.3-codex": 65000,
80
+ "github-copilot/gemini-3.1-pro-preview": 65000
74
81
  },
75
82
  // How often context-limit nudge fires above maxContextLimit (1 = every fetch)
76
83
  "nudgeFrequency": 5,
Binary file
@@ -175,7 +175,7 @@
175
175
  "output": 32000
176
176
  },
177
177
  "options": {
178
- "thinking_budget": 10000,
178
+ "thinking_budget": 24000,
179
179
  "type": "enabled"
180
180
  },
181
181
  "reasoning": true,
@@ -229,7 +229,7 @@
229
229
  },
230
230
  "options": {
231
231
  "thinking": {
232
- "budget_tokens": 16000,
232
+ "budget_tokens": 24000,
233
233
  "type": "enabled"
234
234
  }
235
235
  },
@@ -324,7 +324,7 @@
324
324
  },
325
325
  "options": {
326
326
  "thinking": {
327
- "budget_tokens": 16000,
327
+ "budget_tokens": 24000,
328
328
  "type": "enabled"
329
329
  }
330
330
  },
@@ -1674,6 +1674,6 @@
1674
1674
  ]
1675
1675
  },
1676
1676
  "compaction": {
1677
- "reserved": 128000
1677
+ "reserved": 16000
1678
1678
  }
1679
1679
  }
@@ -12,7 +12,7 @@
12
12
  },
13
13
  "dependencies": {
14
14
  "@google/stitch-sdk": "^0.0.3",
15
- "@opencode-ai/plugin": "1.3.13"
15
+ "@opencode-ai/plugin": "1.3.17"
16
16
  },
17
17
  "devDependencies": {
18
18
  "@types/node": "^25.3.0",
@@ -94,6 +94,51 @@ function getUrls(domain: string) {
94
94
 
95
95
  const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
96
96
 
97
+ function getRequestUrl(input: RequestInfo | URL): string {
98
+ if (typeof input === "string") return input;
99
+ if (input instanceof URL) return input.toString();
100
+ if (typeof Request !== "undefined" && input instanceof Request) {
101
+ return input.url;
102
+ }
103
+ return input.toString();
104
+ }
105
+
106
+ function extractClaudeThinkingBudget(body: any): number | undefined {
107
+ const candidates = [body?.thinking_budget, body?.thinking?.budget_tokens];
108
+
109
+ for (const candidate of candidates) {
110
+ if (typeof candidate === "number" && Number.isFinite(candidate)) {
111
+ const normalized = Math.trunc(candidate);
112
+ if (normalized > 0) return normalized;
113
+ }
114
+ }
115
+
116
+ return undefined;
117
+ }
118
+
119
+ function getErrorMessage(error: unknown): string {
120
+ if (error instanceof Error) return error.message;
121
+ if (typeof error === "string") return error;
122
+ try {
123
+ return JSON.stringify(error);
124
+ } catch {
125
+ return String(error);
126
+ }
127
+ }
128
+
129
+ function isTransientUpstreamTimeoutError(error: unknown): boolean {
130
+ const message = getErrorMessage(error).toLowerCase();
131
+ return (
132
+ message.includes("upstream idle timeout") ||
133
+ message.includes("mid_stream") ||
134
+ message.includes("sse read timed out") ||
135
+ message.includes("socket connection was closed unexpectedly") ||
136
+ message.includes("connection reset") ||
137
+ message.includes("econnreset") ||
138
+ message.includes("etimedout")
139
+ );
140
+ }
141
+
97
142
  // Rate limit handling configuration
98
143
  const RATE_LIMIT_CONFIG = {
99
144
  maxDelayMs: 60000, // Cap at 60 seconds
@@ -553,7 +598,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
553
598
  ? JSON.parse(init.body)
554
599
  : init?.body;
555
600
 
556
- const url = input.toString();
601
+ const url = getRequestUrl(input);
557
602
 
558
603
  // Check if this is a Claude model request
559
604
  const modelId = body?.model || "";
@@ -574,18 +619,34 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
574
619
  // For Claude models, add thinking_budget to enable reasoning
575
620
  // The Copilot API accepts this parameter and returns reasoning_text/reasoning_opaque
576
621
  if (isClaudeModel) {
577
- // Use configured thinking_budget from model options, or default to 10000
578
- const thinkingBudget = body.thinking_budget || 10000;
579
-
580
- // Fix for "Invalid signature in thinking block" error:
581
- // The Copilot API uses reasoning_text/reasoning_opaque format for thinking
582
- // When these are passed back without proper signature, it causes errors
583
- // Solution: Ensure reasoning_opaque is present when reasoning_text exists,
584
- // or remove reasoning content entirely if signature is invalid/missing
622
+ const thinkingBudget = extractClaudeThinkingBudget(body);
623
+ const isThinkingEnabled = thinkingBudget != null;
624
+
585
625
  const cleanedMessages = body.messages.map(
586
626
  (msg: any, idx: number) => {
587
627
  if (msg.role !== "assistant") return msg;
588
628
 
629
+ // If thinking is disabled, strip all reasoning metadata to prevent
630
+ // stale reasoning context from continuing across turns.
631
+ if (!isThinkingEnabled) {
632
+ const {
633
+ reasoning_text: _reasoningText,
634
+ reasoning_opaque: _reasoningOpaque,
635
+ ...baseMsg
636
+ } = msg;
637
+ if (!Array.isArray(baseMsg.content)) return baseMsg;
638
+
639
+ const cleanedContent = baseMsg.content.filter(
640
+ (part: any) => part.type !== "thinking",
641
+ );
642
+
643
+ return {
644
+ ...baseMsg,
645
+ content:
646
+ cleanedContent.length > 0 ? cleanedContent : null,
647
+ };
648
+ }
649
+
589
650
  // Log message structure for debugging
590
651
  log("debug", `Processing assistant message ${idx}`, {
591
652
  has_reasoning_text: !!msg.reasoning_text,
@@ -605,11 +666,6 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
605
666
  }
606
667
 
607
668
  // If content is an array, strip ALL thinking blocks.
608
- // Reasoning is communicated via reasoning_text/reasoning_opaque
609
- // fields, not via thinking blocks in the content array.
610
- // Even thinking blocks WITH signatures can cause
611
- // "Invalid signature in thinking block" errors when
612
- // signatures are expired or from a different context.
613
669
  if (Array.isArray(msg.content)) {
614
670
  const hasThinkingBlock = msg.content.some(
615
671
  (part: any) => part.type === "thinking",
@@ -634,15 +690,31 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
634
690
  },
635
691
  );
636
692
 
637
- modifiedBody = {
638
- ...body,
693
+ const nextBody: Record<string, any> = {
694
+ ...(modifiedBody || body),
639
695
  messages: cleanedMessages,
640
- thinking_budget: thinkingBudget,
641
696
  };
642
- log("info", `Adding thinking_budget for Claude model`, {
643
- model: modelId,
644
- thinking_budget: thinkingBudget,
645
- });
697
+
698
+ if (isThinkingEnabled) {
699
+ nextBody.thinking_budget = thinkingBudget;
700
+ log("info", `Adding thinking_budget for Claude model`, {
701
+ model: modelId,
702
+ thinking_budget: thinkingBudget,
703
+ });
704
+ } else {
705
+ delete nextBody.thinking_budget;
706
+ log(
707
+ "info",
708
+ `Claude thinking disabled for this request (no thinking budget set)`,
709
+ { model: modelId },
710
+ );
711
+ }
712
+
713
+ // Copilot OpenAI-compatible endpoint expects `thinking_budget`.
714
+ // Remove Anthropic-style `thinking` object to avoid mixed payloads.
715
+ delete nextBody.thinking;
716
+
717
+ modifiedBody = nextBody;
646
718
  }
647
719
 
648
720
  // For GPT models (o1, gpt-5, etc.), add reasoning parameter
@@ -839,46 +911,82 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
839
911
  }
840
912
  }
841
913
 
842
- try {
843
- if (currentModel) {
844
- await shapeRequestForModel(currentModel);
845
- }
846
- const response = await fetch(input, activeFinalInit);
914
+ const maxFetchAttempts = 2;
847
915
 
848
- if (response.status === 429) {
849
- try {
850
- await response.body?.cancel();
851
- } catch {}
916
+ for (let attempt = 1; attempt <= maxFetchAttempts; attempt++) {
917
+ try {
918
+ if (currentModel) {
919
+ await shapeRequestForModel(currentModel);
920
+ }
921
+ const response = await fetch(input, activeFinalInit);
852
922
 
853
- const retryAfterMs = parseRetryAfter(response);
854
- const cooldownMs = clampCooldownMs(
855
- retryAfterMs,
856
- RATE_LIMIT_CONFIG.defaultCooldownMs,
857
- );
923
+ if (response.status === 429) {
924
+ try {
925
+ await response.body?.cancel();
926
+ } catch {}
858
927
 
859
- if (currentModel) {
860
- markModelRateLimited(currentModel, cooldownMs);
861
- openFamilyCircuitBreaker(currentModel, cooldownMs);
928
+ const retryAfterMs = parseRetryAfter(response);
929
+ const cooldownMs = clampCooldownMs(
930
+ retryAfterMs,
931
+ RATE_LIMIT_CONFIG.defaultCooldownMs,
932
+ );
933
+
934
+ if (currentModel) {
935
+ markModelRateLimited(currentModel, cooldownMs);
936
+ openFamilyCircuitBreaker(currentModel, cooldownMs);
937
+ }
938
+
939
+ throw new Error(
940
+ `[Copilot] Rate limited: ${currentModel || "model"} cooling down. Retry in ${formatRetryAfter(Math.ceil(cooldownMs / 1000))}.`,
941
+ );
862
942
  }
863
943
 
864
- throw new Error(
865
- `[Copilot] Rate limited: ${currentModel || "model"} cooling down. Retry in ${formatRetryAfter(Math.ceil(cooldownMs / 1000))}.`,
866
- );
867
- }
944
+ // Response transformation is handled by the custom SDK at
945
+ // .opencode/plugin/sdk/copilot/
946
+ return response;
947
+ } catch (error) {
948
+ const errorMessage = getErrorMessage(error);
949
+ if (
950
+ errorMessage.includes("Rate limited") ||
951
+ errorMessage.includes("Local request queue saturated")
952
+ ) {
953
+ throw error instanceof Error
954
+ ? error
955
+ : new Error(errorMessage);
956
+ }
957
+
958
+ if (
959
+ attempt < maxFetchAttempts &&
960
+ isTransientUpstreamTimeoutError(error)
961
+ ) {
962
+ const retryDelayMs = 750 * attempt;
963
+ log(
964
+ "warn",
965
+ `Transient upstream timeout from Copilot, retrying request`,
966
+ {
967
+ model: currentModel || undefined,
968
+ attempt,
969
+ retry_delay_ms: retryDelayMs,
970
+ error: errorMessage,
971
+ },
972
+ );
973
+ await sleep(retryDelayMs);
974
+ continue;
975
+ }
868
976
 
869
- // Response transformation is handled by the custom SDK at
870
- // .opencode/plugin/sdk/copilot/
871
- return response;
872
- } catch (error) {
873
- const lastError = error as Error;
874
- if (
875
- lastError.message.includes("Rate limited") ||
876
- lastError.message.includes("Local request queue saturated")
877
- ) {
878
- throw lastError;
977
+ if (isTransientUpstreamTimeoutError(error)) {
978
+ throw new Error(
979
+ `[Copilot] Upstream idle timeout while streaming ${currentModel || "request"}. Retry with a lower thinking budget or switch to a lower-latency Claude variant.`,
980
+ );
981
+ }
982
+
983
+ throw error;
879
984
  }
880
- throw error;
881
985
  }
986
+
987
+ throw new Error(
988
+ `[Copilot] Failed request after ${maxFetchAttempts} attempts.`,
989
+ );
882
990
  },
883
991
  };
884
992
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencodekit",
3
- "version": "0.20.0",
3
+ "version": "0.20.2",
4
4
  "description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
5
5
  "keywords": [
6
6
  "agents",