@oh-my-pi/pi-ai 14.1.0 → 14.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.1.1] - 2026-04-14
6
+
7
+ ### Added
8
+
9
+ - Added `toolStrictMode` compatibility option (`"all_strict"` or `"none"`) to OpenAI-compatible model config to force tool schemas to be sent uniformly strict, uniformly non-strict, or keep mixed per-tool behavior
10
+
11
+ ### Changed
12
+
13
+ - Changed Cerebras OpenAI-compatible providers to default `toolStrictMode` to `"all_strict"` unless explicitly overridden
14
+
15
+ ### Fixed
16
+
17
+ - Fixed OpenAI Completions handling for providers that reject mixed `strict` flags by automatically retrying with non-strict tool schemas when an initial all-strict tool request fails with strict-format 400/422 errors
18
+ - Fixed OpenAI-completions error reporting by including captured JSON error body details such as type, param, and code when a request fails without a body in the thrown SDK error
19
+ - Fixed shell execution failure responses to preserve all result fields when sanitizing, preventing truncated metadata in stream results
20
+ - Fixed context overflow detection to recognize `model_context_window_exceeded` from z.ai / GLM providers, preventing infinite retry loops when context window is exceeded ([#638](https://github.com/can1357/oh-my-pi/issues/638))
21
+ - Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
22
+
5
23
  ## [14.1.0] - 2026-04-11
6
24
  ### Added
7
25
 
@@ -31,6 +49,7 @@
31
49
 
32
50
  ### Removed
33
51
  - Removed Copilot JWT proxy-ep base URL resolution (no longer needed with opencode auth).
52
+
34
53
  ## [14.0.3] - 2026-04-09
35
54
 
36
55
  ### Fixed
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.1.0",
4
+ "version": "14.1.1",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -45,14 +45,15 @@
45
45
  "@aws-sdk/client-bedrock-runtime": "^3",
46
46
  "@bufbuild/protobuf": "^2.11",
47
47
  "@google/genai": "^1.43",
48
- "@oh-my-pi/pi-utils": "14.1.0",
48
+ "@oh-my-pi/pi-natives": "workspace:*",
49
+ "@oh-my-pi/pi-utils": "workspace:*",
49
50
  "@sinclair/typebox": "^0.34",
50
51
  "@smithy/node-http-handler": "^4.4",
51
52
  "ajv": "^8.18",
52
53
  "ajv-formats": "^3.0",
53
54
  "openai": "^6.25",
54
55
  "partial-json": "^0.1",
55
- "zod": "^4.3"
56
+ "zod": "4.3.6"
56
57
  },
57
58
  "devDependencies": {
58
59
  "@types/bun": "^1.3"
@@ -289,13 +289,6 @@ function applyAnthropicCatalogPolicy(model: ApiModel<Api>, parsedModel: Anthropi
289
289
  model.cost.cacheWrite = 6.25;
290
290
  }
291
291
 
292
- // GitHub Copilot Opus 4.6: discovery currently reports a stale 144K prompt window,
293
- // but the model supports a 1M context window. Keep the bundled catalog truthful
294
- // until Copilot fixes the upstream metadata.
295
- if (model.provider === "github-copilot" && parsedModel.kind === "opus" && semverEqual(parsedModel.version, "4.6")) {
296
- model.contextWindow = 1000000;
297
- }
298
-
299
292
  // Bedrock Opus 4.6: upstream metadata is stale for cache pricing and context.
300
293
  if (model.provider === "amazon-bedrock" && parsedModel.kind === "opus" && semverEqual(parsedModel.version, "4.6")) {
301
294
  model.cost.cacheRead = 0.5;
package/src/models.json CHANGED
@@ -4594,8 +4594,8 @@
4594
4594
  "cacheRead": 0,
4595
4595
  "cacheWrite": 0
4596
4596
  },
4597
- "contextWindow": 144000,
4598
- "maxTokens": 32000,
4597
+ "contextWindow": 136000,
4598
+ "maxTokens": 64000,
4599
4599
  "headers": {
4600
4600
  "User-Agent": "opencode/1.3.15"
4601
4601
  },
@@ -4623,7 +4623,7 @@
4623
4623
  "cacheRead": 0,
4624
4624
  "cacheWrite": 0
4625
4625
  },
4626
- "contextWindow": 160000,
4626
+ "contextWindow": 168000,
4627
4627
  "maxTokens": 32000,
4628
4628
  "headers": {
4629
4629
  "User-Agent": "opencode/1.3.15"
@@ -4651,8 +4651,8 @@
4651
4651
  "cacheRead": 0,
4652
4652
  "cacheWrite": 0
4653
4653
  },
4654
- "contextWindow": 1000000,
4655
- "maxTokens": 64000,
4654
+ "contextWindow": 168000,
4655
+ "maxTokens": 32000,
4656
4656
  "headers": {
4657
4657
  "User-Agent": "opencode/1.3.15"
4658
4658
  },
@@ -4680,7 +4680,7 @@
4680
4680
  "cacheRead": 0,
4681
4681
  "cacheWrite": 0
4682
4682
  },
4683
- "contextWindow": 216000,
4683
+ "contextWindow": 128000,
4684
4684
  "maxTokens": 16000,
4685
4685
  "headers": {
4686
4686
  "User-Agent": "opencode/1.3.15"
@@ -4708,7 +4708,7 @@
4708
4708
  "cacheRead": 0,
4709
4709
  "cacheWrite": 0
4710
4710
  },
4711
- "contextWindow": 144000,
4711
+ "contextWindow": 168000,
4712
4712
  "maxTokens": 32000,
4713
4713
  "headers": {
4714
4714
  "User-Agent": "opencode/1.3.15"
@@ -4736,7 +4736,7 @@
4736
4736
  "cacheRead": 0,
4737
4737
  "cacheWrite": 0
4738
4738
  },
4739
- "contextWindow": 200000,
4739
+ "contextWindow": 168000,
4740
4740
  "maxTokens": 32000,
4741
4741
  "headers": {
4742
4742
  "User-Agent": "opencode/1.3.15"
@@ -4858,7 +4858,7 @@
4858
4858
  "cacheRead": 0,
4859
4859
  "cacheWrite": 0
4860
4860
  },
4861
- "contextWindow": 128000,
4861
+ "contextWindow": 136000,
4862
4862
  "maxTokens": 64000,
4863
4863
  "headers": {
4864
4864
  "User-Agent": "opencode/1.3.15"
@@ -4919,7 +4919,7 @@
4919
4919
  "cacheRead": 0,
4920
4920
  "cacheWrite": 0
4921
4921
  },
4922
- "contextWindow": 128000,
4922
+ "contextWindow": 64000,
4923
4923
  "maxTokens": 4096,
4924
4924
  "headers": {
4925
4925
  "User-Agent": "opencode/1.3.15"
@@ -4976,7 +4976,7 @@
4976
4976
  "cacheRead": 0,
4977
4977
  "cacheWrite": 0
4978
4978
  },
4979
- "contextWindow": 264000,
4979
+ "contextWindow": 128000,
4980
4980
  "maxTokens": 64000,
4981
4981
  "headers": {
4982
4982
  "User-Agent": "opencode/1.3.15"
@@ -5004,7 +5004,7 @@
5004
5004
  "cacheRead": 0,
5005
5005
  "cacheWrite": 0
5006
5006
  },
5007
- "contextWindow": 264000,
5007
+ "contextWindow": 128000,
5008
5008
  "maxTokens": 64000,
5009
5009
  "headers": {
5010
5010
  "User-Agent": "opencode/1.3.15"
@@ -5116,8 +5116,8 @@
5116
5116
  "cacheRead": 0,
5117
5117
  "cacheWrite": 0
5118
5118
  },
5119
- "contextWindow": 264000,
5120
- "maxTokens": 64000,
5119
+ "contextWindow": 272000,
5120
+ "maxTokens": 128000,
5121
5121
  "headers": {
5122
5122
  "User-Agent": "opencode/1.3.15"
5123
5123
  },
@@ -5200,7 +5200,7 @@
5200
5200
  "cacheRead": 0,
5201
5201
  "cacheWrite": 0
5202
5202
  },
5203
- "contextWindow": 400000,
5203
+ "contextWindow": 272000,
5204
5204
  "maxTokens": 128000,
5205
5205
  "headers": {
5206
5206
  "User-Agent": "opencode/1.3.15"
@@ -5228,7 +5228,7 @@
5228
5228
  "cacheRead": 0,
5229
5229
  "cacheWrite": 0
5230
5230
  },
5231
- "contextWindow": 400000,
5231
+ "contextWindow": 272000,
5232
5232
  "maxTokens": 128000,
5233
5233
  "headers": {
5234
5234
  "User-Agent": "opencode/1.3.15"
@@ -5256,7 +5256,7 @@
5256
5256
  "cacheRead": 0,
5257
5257
  "cacheWrite": 0
5258
5258
  },
5259
- "contextWindow": 128000,
5259
+ "contextWindow": 192000,
5260
5260
  "maxTokens": 64000,
5261
5261
  "headers": {
5262
5262
  "User-Agent": "opencode/1.3.15"
@@ -173,31 +173,36 @@ function createBundledReferenceMap<TApi extends Api>(
173
173
  return references;
174
174
  }
175
175
 
176
- function shouldReplaceGlobalReference(existing: Model<Api> | undefined, candidate: Model<Api>): boolean {
177
- if (!existing) return true;
178
- if (candidate.contextWindow !== existing.contextWindow) {
179
- return candidate.contextWindow > existing.contextWindow;
180
- }
181
- if (candidate.maxTokens !== existing.maxTokens) {
182
- return candidate.maxTokens > existing.maxTokens;
183
- }
184
- // When limits tie, prefer OpenAI as the canonical reference so generic OpenAI-family
185
- // providers inherit OpenAI pricing/capabilities instead of Copilot-specific metadata.
186
- return existing.provider !== "openai" && candidate.provider === "openai";
187
- }
188
-
189
- function createGlobalReferenceMap(): Map<string, Model<Api>> {
190
- const references = new Map<string, Model<Api>>();
176
+ /**
177
+ * Returns a lookup that resolves a model ID to a bundled reference, preferring
178
+ * the provider-specific entry over a cross-provider fallback. The global fallback
179
+ * picks the best entry across all providers (largest contextWindow, then maxTokens,
180
+ * then canonical OpenAI), but proxy providers (Copilot, nanogpt, etc.) impose their
181
+ * own limits that are typically lower than native provider limits, so the
182
+ * provider-specific entry must win.
183
+ */
184
+ function createReferenceResolver<TApi extends Api>(
185
+ providerRefs: Map<string, Model<TApi>>,
186
+ ): (modelId: string) => Model<TApi> | undefined {
187
+ const globalRefs = new Map<string, Model<Api>>();
191
188
  for (const provider of getBundledProviders()) {
192
189
  for (const model of getBundledModels(provider as Parameters<typeof getBundledModels>[0])) {
193
190
  const candidate = model as Model<Api>;
194
- const existing = references.get(candidate.id);
195
- if (shouldReplaceGlobalReference(existing, candidate)) {
196
- references.set(candidate.id, candidate);
191
+ const existing = globalRefs.get(candidate.id);
192
+ if (!existing) {
193
+ globalRefs.set(candidate.id, candidate);
194
+ } else if (candidate.contextWindow !== existing.contextWindow) {
195
+ if (candidate.contextWindow > existing.contextWindow) globalRefs.set(candidate.id, candidate);
196
+ } else if (candidate.maxTokens !== existing.maxTokens) {
197
+ if (candidate.maxTokens > existing.maxTokens) globalRefs.set(candidate.id, candidate);
198
+ } else if (existing.provider !== "openai" && candidate.provider === "openai") {
199
+ // When limits tie, prefer OpenAI as canonical so generic OpenAI-family
200
+ // providers inherit OpenAI pricing/capabilities instead of proxy metadata.
201
+ globalRefs.set(candidate.id, candidate);
197
202
  }
198
203
  }
199
204
  }
200
- return references;
205
+ return (modelId: string) => providerRefs.get(modelId) ?? (globalRefs.get(modelId) as Model<TApi> | undefined);
201
206
  }
202
207
 
203
208
  function normalizeAnthropicBaseUrl(baseUrl: string | undefined, fallback: string): string {
@@ -1384,10 +1389,9 @@ export function nanoGptModelManagerOptions(
1384
1389
  ): ModelManagerOptions<"openai-completions"> {
1385
1390
  const apiKey = config?.apiKey;
1386
1391
  const baseUrl = config?.baseUrl ?? "https://nano-gpt.com/api/v1";
1387
- const references = createBundledReferenceMap<"openai-completions">(
1388
- "nanogpt" as Parameters<typeof getBundledModels>[0],
1392
+ const resolveReference = createReferenceResolver(
1393
+ createBundledReferenceMap<"openai-completions">("nanogpt" as Parameters<typeof getBundledModels>[0]),
1389
1394
  );
1390
- const globalReferences = createGlobalReferenceMap();
1391
1395
  return {
1392
1396
  providerId: "nanogpt",
1393
1397
  ...(apiKey && {
@@ -1400,14 +1404,7 @@ export function nanoGptModelManagerOptions(
1400
1404
  baseUrl,
1401
1405
  apiKey,
1402
1406
  mapModel: (entry, defaults) => {
1403
- const providerReference = references.get(defaults.id);
1404
- const globalReference = globalReferences.get(defaults.id);
1405
- const reference =
1406
- providerReference && globalReference
1407
- ? providerReference.contextWindow >= globalReference.contextWindow
1408
- ? providerReference
1409
- : globalReference
1410
- : (providerReference ?? globalReference);
1407
+ const reference = resolveReference(defaults.id);
1411
1408
  const mapped = mapWithBundledReference(entry, defaults, reference);
1412
1409
  return { ...mapped, api: "openai-completions", provider: "nanogpt" };
1413
1410
  },
@@ -1475,15 +1472,15 @@ function extractCopilotLimits(entry: OpenAICompatibleModelRecord): {
1475
1472
 
1476
1473
  export function githubCopilotModelManagerOptions(config?: GithubCopilotModelManagerConfig): ModelManagerOptions<Api> {
1477
1474
  const rawApiKey = config?.apiKey;
1478
- const baseUrl = config?.baseUrl ?? "https://api.githubcopilot.com";
1475
+ const configuredBaseUrl = config?.baseUrl ?? "https://api.githubcopilot.com";
1479
1476
  const parsedApiKey = rawApiKey ? parseGitHubCopilotApiKey(rawApiKey) : undefined;
1480
1477
  const apiKey = parsedApiKey?.accessToken;
1481
- const resolvedBaseUrl =
1482
- parsedApiKey?.enterpriseUrl && baseUrl.includes("githubcopilot.com")
1478
+ const baseUrl =
1479
+ parsedApiKey?.enterpriseUrl && configuredBaseUrl.includes("githubcopilot.com")
1483
1480
  ? getGitHubCopilotBaseUrl(parsedApiKey.enterpriseUrl)
1484
- : baseUrl;
1485
- const references = createBundledReferenceMap<Api>("github-copilot");
1486
- const globalReferences = createGlobalReferenceMap();
1481
+ : configuredBaseUrl;
1482
+ const providerRefs = createBundledReferenceMap<Api>("github-copilot");
1483
+ const resolveReference = createReferenceResolver(providerRefs);
1487
1484
  return {
1488
1485
  providerId: "github-copilot",
1489
1486
  ...(apiKey && {
@@ -1491,7 +1488,7 @@ export function githubCopilotModelManagerOptions(config?: GithubCopilotModelMana
1491
1488
  fetchOpenAICompatibleModels<Api>({
1492
1489
  api: "openai-completions",
1493
1490
  provider: "github-copilot",
1494
- baseUrl: resolvedBaseUrl,
1491
+ baseUrl,
1495
1492
  apiKey,
1496
1493
  headers: OPENCODE_HEADERS,
1497
1494
  mapModel: (
@@ -1499,26 +1496,18 @@ export function githubCopilotModelManagerOptions(config?: GithubCopilotModelMana
1499
1496
  defaults: Model<Api>,
1500
1497
  _context: OpenAICompatibleModelMapperContext<Api>,
1501
1498
  ): Model<Api> => {
1502
- const providerReference = references.get(defaults.id);
1503
- const globalReference = globalReferences.get(defaults.id) as Model<Api> | undefined;
1504
- const reference =
1505
- providerReference && globalReference
1506
- ? providerReference.contextWindow >= globalReference.contextWindow
1507
- ? providerReference
1508
- : globalReference
1509
- : (providerReference ?? globalReference);
1499
+ const reference = resolveReference(defaults.id);
1510
1500
  const copilotLimits = extractCopilotLimits(entry);
1511
- // Copilot currently exposes token limits under capabilities.limits.*.
1512
- // Keep OpenAI-compatible fields as outer fallbacks for forward compatibility if
1513
- // `/models` starts returning context_length/max_completion_tokens in the future.
1501
+ // Copilot exposes token limits under capabilities.limits.*.
1502
+ // max_prompt_tokens is the prompt capacity (what OMP calls contextWindow).
1503
+ // max_context_window_tokens is the total window (prompt + output budget)
1504
+ // and must NOT be used for contextWindow — it inflates the limit and
1505
+ // breaks compaction thresholds, overflow detection, and promotion.
1514
1506
  const contextWindow = toPositiveNumber(
1515
1507
  entry.context_length,
1516
1508
  toPositiveNumber(
1517
- copilotLimits.maxContextWindowTokens,
1518
- toPositiveNumber(
1519
- copilotLimits.maxPromptTokens,
1520
- reference?.contextWindow ?? defaults.contextWindow,
1521
- ),
1509
+ copilotLimits.maxPromptTokens,
1510
+ reference?.contextWindow ?? defaults.contextWindow,
1522
1511
  ),
1523
1512
  );
1524
1513
  const maxTokens = toPositiveNumber(
@@ -1545,7 +1534,7 @@ export function githubCopilotModelManagerOptions(config?: GithubCopilotModelMana
1545
1534
  name,
1546
1535
  contextWindow,
1547
1536
  maxTokens,
1548
- headers: { ...OPENCODE_HEADERS, ...(providerReference?.headers ?? {}) },
1537
+ headers: { ...OPENCODE_HEADERS, ...(providerRefs.get(defaults.id)?.headers ?? {}) },
1549
1538
  ...(api === "openai-completions"
1550
1539
  ? {
1551
1540
  compat: {
@@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
3
3
  import http2 from "node:http2";
4
4
  import { create, fromBinary, fromJson, type JsonValue, toBinary, toJson } from "@bufbuild/protobuf";
5
5
  import { ValueSchema } from "@bufbuild/protobuf/wkt";
6
+ import { sanitizeText } from "@oh-my-pi/pi-natives";
6
7
  import { $env } from "@oh-my-pi/pi-utils";
7
8
  import { calculateCost } from "../models";
8
9
  import type {
@@ -105,6 +106,7 @@ import {
105
106
  type ShellArgs,
106
107
  ShellFailureSchema,
107
108
  ShellRejectedSchema,
109
+ type ShellResult,
108
110
  ShellResultSchema,
109
111
  type ShellStream,
110
112
  ShellStreamExitSchema,
@@ -674,6 +676,31 @@ function sendShellStreamEvent(
674
676
  sendExecClientMessage(h2Request, execMsg, "shellStream", create(ShellStreamSchema, { event }));
675
677
  }
676
678
 
679
+ function sanitizeShellExecResult(execResult: ShellResult): ShellResult {
680
+ const result = execResult.result;
681
+ if (!result) return execResult;
682
+
683
+ switch (result.case) {
684
+ case "success":
685
+ case "failure": {
686
+ const value = result.value;
687
+ return {
688
+ ...execResult,
689
+ result: {
690
+ case: result.case,
691
+ value: {
692
+ ...value,
693
+ stdout: value.stdout ? sanitizeText(value.stdout) : value.stdout,
694
+ stderr: value.stderr ? sanitizeText(value.stderr) : value.stderr,
695
+ },
696
+ },
697
+ } as ShellResult;
698
+ }
699
+ default:
700
+ return execResult;
701
+ }
702
+ }
703
+
677
704
  async function handleShellStreamArgs(
678
705
  args: ShellArgs,
679
706
  execMsg: ExecServerMessage,
@@ -695,18 +722,95 @@ async function handleShellStreamArgs(
695
722
 
696
723
  sendShellStreamEvent(h2Request, execMsg, { case: "start", value: create(ShellStreamStartSchema, {}) });
697
724
 
725
+ // Buffer for incomplete ANSI sequences across chunks
726
+ let stdoutBuffer = "";
727
+ let stderrBuffer = "";
728
+
729
+ const incompleteEscapeRegex = /\x1b(|\[|\[\d*|\[\?|\[\?\d*|\]\d*;?)$/;
730
+
731
+ const flushStdout = () => {
732
+ if (stdoutBuffer) {
733
+ let safeEnd = stdoutBuffer.length;
734
+ const match = stdoutBuffer.match(incompleteEscapeRegex);
735
+ if (match && match[0].length > 0) {
736
+ safeEnd = stdoutBuffer.length - match[0].length;
737
+ }
738
+ const toSend = stdoutBuffer.slice(0, safeEnd);
739
+ const remaining = stdoutBuffer.slice(safeEnd);
740
+ if (toSend) {
741
+ sendShellStreamEvent(h2Request, execMsg, {
742
+ case: "stdout",
743
+ value: create(ShellStreamStdoutSchema, { data: sanitizeText(toSend) }),
744
+ });
745
+ }
746
+ stdoutBuffer = remaining;
747
+ }
748
+ };
749
+
750
+ const flushStderr = () => {
751
+ if (stderrBuffer) {
752
+ let safeEnd = stderrBuffer.length;
753
+ const match = stderrBuffer.match(incompleteEscapeRegex);
754
+ if (match && match[0].length > 0) {
755
+ safeEnd = stderrBuffer.length - match[0].length;
756
+ }
757
+ const toSend = stderrBuffer.slice(0, safeEnd);
758
+ const remaining = stderrBuffer.slice(safeEnd);
759
+ if (toSend) {
760
+ sendShellStreamEvent(h2Request, execMsg, {
761
+ case: "stderr",
762
+ value: create(ShellStreamStderrSchema, { data: sanitizeText(toSend) }),
763
+ });
764
+ }
765
+ stderrBuffer = remaining;
766
+ }
767
+ };
768
+
769
+ let stdoutFlushTimer: NodeJS.Timeout | null = null;
770
+ let stderrFlushTimer: NodeJS.Timeout | null = null;
771
+
772
+ const scheduleStdoutFlush = () => {
773
+ if (!stdoutFlushTimer) {
774
+ stdoutFlushTimer = setTimeout(() => {
775
+ stdoutFlushTimer = null;
776
+ flushStdout();
777
+ }, 100);
778
+ }
779
+ };
780
+
781
+ const scheduleStderrFlush = () => {
782
+ if (!stderrFlushTimer) {
783
+ stderrFlushTimer = setTimeout(() => {
784
+ stderrFlushTimer = null;
785
+ flushStderr();
786
+ }, 100);
787
+ }
788
+ };
789
+
698
790
  const streamCallbacks: CursorShellStreamCallbacks = {
699
791
  onStdout(data: string) {
700
- sendShellStreamEvent(h2Request, execMsg, {
701
- case: "stdout",
702
- value: create(ShellStreamStdoutSchema, { data }),
703
- });
792
+ stdoutBuffer += data;
793
+ if (stdoutBuffer.includes("\n") || stdoutBuffer.length > 4096) {
794
+ if (stdoutFlushTimer) {
795
+ clearTimeout(stdoutFlushTimer);
796
+ stdoutFlushTimer = null;
797
+ }
798
+ flushStdout();
799
+ } else {
800
+ scheduleStdoutFlush();
801
+ }
704
802
  },
705
803
  onStderr(data: string) {
706
- sendShellStreamEvent(h2Request, execMsg, {
707
- case: "stderr",
708
- value: create(ShellStreamStderrSchema, { data }),
709
- });
804
+ stderrBuffer += data;
805
+ if (stderrBuffer.includes("\n") || stderrBuffer.length > 4096) {
806
+ if (stderrFlushTimer) {
807
+ clearTimeout(stderrFlushTimer);
808
+ stderrFlushTimer = null;
809
+ }
810
+ flushStderr();
811
+ } else {
812
+ scheduleStderrFlush();
813
+ }
710
814
  },
711
815
  };
712
816
 
@@ -730,10 +834,18 @@ async function handleShellStreamArgs(
730
834
  // When using the batch handler (no shellStream), send buffered stdout/stderr
731
835
  // after execution completes. With shellStream these were already sent in real time.
732
836
  const sendBufferedOutput = !streamHandler;
733
- sendShellStreamExitFromResult(h2Request, execMsg, execResult, sendBufferedOutput);
837
+ const sanitizedExecResult = sanitizeShellExecResult(execResult);
838
+
839
+ // Flush any remaining buffered output before sending results
840
+ if (stdoutFlushTimer) clearTimeout(stdoutFlushTimer);
841
+ if (stderrFlushTimer) clearTimeout(stderrFlushTimer);
842
+ flushStdout();
843
+ flushStderr();
844
+
845
+ sendShellStreamExitFromResult(h2Request, execMsg, sanitizedExecResult, sendBufferedOutput);
734
846
  // Cursor can keep the turn pending when it receives only stream deltas.
735
847
  // Send the final structured shellResult as completion acknowledgement.
736
- sendExecClientMessage(h2Request, execMsg, "shellResult", execResult);
848
+ sendExecClientMessage(h2Request, execMsg, "shellResult", sanitizedExecResult);
737
849
  sendExecClientStreamClose(h2Request, execMsg);
738
850
 
739
851
  log("shellStream", "done", { elapsed: Date.now() - startTs });
@@ -742,7 +854,7 @@ async function handleShellStreamArgs(
742
854
  function sendShellStreamExitFromResult(
743
855
  h2Request: http2.ClientHttp2Stream,
744
856
  execMsg: ExecServerMessage,
745
- execResult: { result: { case?: string; value?: any } },
857
+ execResult: ShellResult,
746
858
  sendBufferedOutput: boolean,
747
859
  ): void {
748
860
  const result = execResult.result;
@@ -753,13 +865,13 @@ function sendShellStreamExitFromResult(
753
865
  if (value.stdout) {
754
866
  sendShellStreamEvent(h2Request, execMsg, {
755
867
  case: "stdout",
756
- value: create(ShellStreamStdoutSchema, { data: value.stdout }),
868
+ value: create(ShellStreamStdoutSchema, { data: sanitizeText(value.stdout) }),
757
869
  });
758
870
  }
759
871
  if (value.stderr) {
760
872
  sendShellStreamEvent(h2Request, execMsg, {
761
873
  case: "stderr",
762
- value: create(ShellStreamStderrSchema, { data: value.stderr }),
874
+ value: create(ShellStreamStderrSchema, { data: sanitizeText(value.stderr) }),
763
875
  });
764
876
  }
765
877
  }
@@ -779,13 +891,13 @@ function sendShellStreamExitFromResult(
779
891
  if (value.stdout) {
780
892
  sendShellStreamEvent(h2Request, execMsg, {
781
893
  case: "stdout",
782
- value: create(ShellStreamStdoutSchema, { data: value.stdout }),
894
+ value: create(ShellStreamStdoutSchema, { data: sanitizeText(value.stdout) }),
783
895
  });
784
896
  }
785
897
  if (value.stderr) {
786
898
  sendShellStreamEvent(h2Request, execMsg, {
787
899
  case: "stderr",
788
- value: create(ShellStreamStderrSchema, { data: value.stderr }),
900
+ value: create(ShellStreamStderrSchema, { data: sanitizeText(value.stderr) }),
789
901
  });
790
902
  }
791
903
  }
@@ -970,7 +1082,8 @@ async function handleExecServerMessage(
970
1082
  reason => buildShellRejectedResult(normalizedArgs.command, normalizedArgs.workingDirectory, reason),
971
1083
  error => buildShellFailureResult(normalizedArgs.command, normalizedArgs.workingDirectory, error),
972
1084
  );
973
- sendExecClientMessage(h2Request, execMsg, "shellResult", execResult);
1085
+ const sanitizedExecResult = sanitizeShellExecResult(execResult);
1086
+ sendExecClientMessage(h2Request, execMsg, "shellResult", sanitizedExecResult);
974
1087
  return;
975
1088
  }
976
1089
  case "shellStreamArgs": {
@@ -1,13 +1,15 @@
1
1
  import type { Model, OpenAICompat } from "../types";
2
2
 
3
3
  type OpenAIReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
4
+ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mixed";
4
5
 
5
6
  export type ResolvedOpenAICompat = Required<
6
- Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody">
7
+ Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode">
7
8
  > & {
8
9
  openRouterRouting?: OpenAICompat["openRouterRouting"];
9
10
  vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
10
11
  extraBody?: OpenAICompat["extraBody"];
12
+ toolStrictMode: ResolvedToolStrictMode;
11
13
  };
12
14
 
13
15
  function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
@@ -109,6 +111,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
109
111
  vercelGatewayRouting: undefined,
110
112
  supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
111
113
  extraBody: undefined,
114
+ toolStrictMode: isCerebras ? "all_strict" : "mixed",
112
115
  };
113
116
  }
114
117
 
@@ -151,5 +154,6 @@ export function resolveOpenAICompat(
151
154
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
152
155
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
153
156
  extraBody: model.compat.extraBody,
157
+ toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
154
158
  };
155
159
  }
@@ -31,7 +31,12 @@ import {
31
31
  } from "../types";
32
32
  import { createAbortSourceTracker } from "../utils/abort";
33
33
  import { AssistantMessageEventStream } from "../utils/event-stream";
34
- import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotAuthError } from "../utils/http-inspector";
34
+ import {
35
+ type CapturedHttpErrorResponse,
36
+ finalizeErrorMessage,
37
+ type RawHttpRequestDump,
38
+ rewriteCopilotAuthError,
39
+ } from "../utils/http-inspector";
35
40
  import {
36
41
  createFirstEventWatchdog,
37
42
  getOpenAIStreamIdleTimeoutMs,
@@ -42,6 +47,7 @@ import {
42
47
  import { parseStreamingJson } from "../utils/json-parse";
43
48
  import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
44
49
  import { getKimiCommonHeaders } from "../utils/oauth/kimi";
50
+ import { extractHttpStatusFromError } from "../utils/retry";
45
51
  import { adaptSchemaForStrict, NO_STRICT } from "../utils/schema";
46
52
  import { mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
47
53
  import {
@@ -126,6 +132,14 @@ type OpenAICompletionsSamplingParams = OpenAI.Chat.Completions.ChatCompletionCre
126
132
  repetition_penalty?: number;
127
133
  };
128
134
 
135
+ type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
136
+ type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
137
+
138
+ type BuiltOpenAICompletionTools = {
139
+ tools: OpenAI.Chat.Completions.ChatCompletionTool[];
140
+ toolStrictMode: AppliedToolStrictMode;
141
+ };
142
+
129
143
  // LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
130
144
  // If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
131
145
  // they will be incorrectly consumed as thinking delimiters, truncating visible output.
@@ -177,6 +191,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
177
191
  (async () => {
178
192
  const startTime = Date.now();
179
193
  let firstTokenTime: number | undefined;
194
+ let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
180
195
 
181
196
  const output: AssistantMessage = {
182
197
  role: "assistant",
@@ -203,24 +218,42 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
203
218
  try {
204
219
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
205
220
  const idleTimeoutMs = getOpenAIStreamIdleTimeoutMs();
206
- const { client, copilotPremiumRequests, baseUrl } = await createClient(
207
- model,
208
- context,
209
- apiKey,
210
- options?.headers,
211
- options?.initiatorOverride,
212
- );
213
- const params = buildParams(model, context, options, baseUrl);
214
- options?.onPayload?.(params);
215
- rawRequestDump = {
216
- provider: model.provider,
217
- api: output.api,
218
- model: model.id,
219
- method: "POST",
220
- url: `${baseUrl}/chat/completions`,
221
- body: params,
221
+ const {
222
+ client,
223
+ copilotPremiumRequests,
224
+ baseUrl,
225
+ requestHeaders,
226
+ getCapturedErrorResponse: captureErrorResponse,
227
+ clearCapturedErrorResponse,
228
+ } = await createClient(model, context, apiKey, options?.headers, options?.initiatorOverride);
229
+ getCapturedErrorResponse = captureErrorResponse;
230
+ let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
231
+ const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
232
+ clearCapturedErrorResponse();
233
+ const { params, toolStrictMode } = buildParams(model, context, options, baseUrl, toolStrictModeOverride);
234
+ appliedToolStrictMode = toolStrictMode;
235
+ options?.onPayload?.(params);
236
+ rawRequestDump = {
237
+ provider: model.provider,
238
+ api: output.api,
239
+ model: model.id,
240
+ method: "POST",
241
+ url: `${baseUrl}/chat/completions`,
242
+ headers: requestHeaders,
243
+ body: params,
244
+ };
245
+ return client.chat.completions.create(params, { signal: requestSignal });
222
246
  };
223
- const openaiStream = await client.chat.completions.create(params, { signal: requestSignal });
247
+ let openaiStream: AsyncIterable<ChatCompletionChunk>;
248
+ try {
249
+ openaiStream = await createCompletionsStream();
250
+ } catch (error) {
251
+ const capturedErrorResponse = getCapturedErrorResponse();
252
+ if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
253
+ throw error;
254
+ }
255
+ openaiStream = await createCompletionsStream("none");
256
+ }
224
257
  const firstEventWatchdog = createFirstEventWatchdog(
225
258
  options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
226
259
  () => abortTracker.abortLocally(firstEventTimeoutAbortError),
@@ -513,7 +546,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
513
546
  for (const block of output.content) delete (block as any).index;
514
547
  const firstEventTimeoutError = abortTracker.getLocalAbortReason();
515
548
  output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
516
- output.errorMessage = firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
549
+ output.errorMessage =
550
+ firstEventTimeoutError?.message ??
551
+ (await finalizeErrorMessage(error, rawRequestDump, getCapturedErrorResponse?.()));
517
552
  // Some providers via OpenRouter include extra details here.
518
553
  const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
519
554
  if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
@@ -538,6 +573,9 @@ async function createClient(
538
573
  client: OpenAI;
539
574
  copilotPremiumRequests: number | undefined;
540
575
  baseUrl: string | undefined;
576
+ requestHeaders: Record<string, string>;
577
+ getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
578
+ clearCapturedErrorResponse: () => void;
541
579
  }> {
542
580
  if (!apiKey) {
543
581
  if (!$env.OPENAI_API_KEY) {
@@ -573,6 +611,34 @@ async function createClient(
573
611
  copilotPremiumRequests = copilot.premiumRequests;
574
612
  baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
575
613
  }
614
+ let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
615
+ const wrappedFetch = Object.assign(
616
+ async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
617
+ const response = await fetch(input, init);
618
+ if (response.ok) {
619
+ capturedErrorResponse = undefined;
620
+ return response;
621
+ }
622
+ let bodyText: string | undefined;
623
+ let bodyJson: unknown;
624
+ try {
625
+ bodyText = await response.clone().text();
626
+ if (bodyText.trim().length > 0) {
627
+ try {
628
+ bodyJson = JSON.parse(bodyText);
629
+ } catch {}
630
+ }
631
+ } catch {}
632
+ capturedErrorResponse = {
633
+ status: response.status,
634
+ headers: response.headers,
635
+ bodyText,
636
+ bodyJson,
637
+ };
638
+ return response;
639
+ },
640
+ { preconnect: fetch.preconnect },
641
+ );
576
642
  return {
577
643
  client: new OpenAI({
578
644
  apiKey,
@@ -580,9 +646,15 @@ async function createClient(
580
646
  dangerouslyAllowBrowser: true,
581
647
  maxRetries: 5,
582
648
  defaultHeaders: headers,
649
+ fetch: wrappedFetch,
583
650
  }),
584
651
  copilotPremiumRequests,
585
652
  baseUrl,
653
+ requestHeaders: headers,
654
+ getCapturedErrorResponse: () => capturedErrorResponse,
655
+ clearCapturedErrorResponse: () => {
656
+ capturedErrorResponse = undefined;
657
+ },
586
658
  };
587
659
  }
588
660
 
@@ -591,7 +663,8 @@ function buildParams(
591
663
  context: Context,
592
664
  options: OpenAICompletionsOptions | undefined,
593
665
  resolvedBaseUrl?: string,
594
- ) {
666
+ toolStrictModeOverride?: ToolStrictModeOverride,
667
+ ): { params: OpenAICompletionsSamplingParams; toolStrictMode: AppliedToolStrictMode } {
595
668
  const compat = getCompat(model, resolvedBaseUrl);
596
669
  const messages = convertMessages(model, context, compat);
597
670
  maybeAddOpenRouterAnthropicCacheControl(model, messages);
@@ -607,6 +680,7 @@ function buildParams(
607
680
  messages,
608
681
  stream: true,
609
682
  };
683
+ let toolStrictMode: AppliedToolStrictMode = "none";
610
684
 
611
685
  if (compat.supportsUsageInStreaming !== false) {
612
686
  (params as { stream_options?: { include_usage: boolean } }).stream_options = { include_usage: true };
@@ -647,7 +721,9 @@ function buildParams(
647
721
  }
648
722
 
649
723
  if (context.tools) {
650
- params.tools = convertTools(context.tools, compat);
724
+ const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
725
+ params.tools = builtTools.tools;
726
+ toolStrictMode = builtTools.toolStrictMode;
651
727
  } else if (hasToolHistory(context.messages)) {
652
728
  // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
653
729
  params.tools = [];
@@ -697,7 +773,14 @@ function buildParams(
697
773
  Object.assign(params, compat.extraBody);
698
774
  }
699
775
 
700
- return params;
776
+ return buildParamsResult(params, toolStrictMode);
777
+ }
778
+
779
+ function buildParamsResult(
780
+ params: OpenAICompletionsSamplingParams,
781
+ toolStrictMode: AppliedToolStrictMode,
782
+ ): { params: OpenAICompletionsSamplingParams; toolStrictMode: AppliedToolStrictMode } {
783
+ return { params, toolStrictMode };
701
784
  }
702
785
 
703
786
  function getOptionalNumberProperty(value: object, key: string): number | undefined {
@@ -1102,22 +1185,68 @@ export function convertMessages(
1102
1185
  return params;
1103
1186
  }
1104
1187
 
1105
- function convertTools(tools: Tool[], compat: ResolvedOpenAICompat): OpenAI.Chat.Completions.ChatCompletionTool[] {
1106
- return tools.map(tool => {
1188
+ function convertTools(
1189
+ tools: Tool[],
1190
+ compat: ResolvedOpenAICompat,
1191
+ toolStrictModeOverride?: ToolStrictModeOverride,
1192
+ ): BuiltOpenAICompletionTools {
1193
+ const adaptedTools = tools.map(tool => {
1107
1194
  const strict = !NO_STRICT && compat.supportsStrictMode !== false && tool.strict !== false;
1108
1195
  const baseParameters = tool.parameters as unknown as Record<string, unknown>;
1109
- const { schema: parameters, strict: effectiveStrict } = adaptSchemaForStrict(baseParameters, strict);
1196
+ const adapted = adaptSchemaForStrict(baseParameters, strict);
1110
1197
  return {
1111
- type: "function",
1112
- function: {
1113
- name: tool.name,
1114
- description: tool.description || "",
1115
- parameters,
1116
- // Only include strict if provider supports it. Some reject unknown fields.
1117
- ...(effectiveStrict && { strict: true }),
1118
- },
1198
+ tool,
1199
+ baseParameters,
1200
+ parameters: adapted.schema,
1201
+ strict: adapted.strict,
1119
1202
  };
1120
1203
  });
1204
+
1205
+ const requestedStrictMode = toolStrictModeOverride ?? compat.toolStrictMode;
1206
+ const toolStrictMode =
1207
+ requestedStrictMode === "none"
1208
+ ? "none"
1209
+ : requestedStrictMode === "all_strict"
1210
+ ? adaptedTools.every(tool => tool.strict)
1211
+ ? "all_strict"
1212
+ : "none"
1213
+ : "mixed";
1214
+
1215
+ return {
1216
+ tools: adaptedTools.map(({ tool, baseParameters, parameters, strict }) => {
1217
+ const includeStrict = toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && strict);
1218
+ return {
1219
+ type: "function",
1220
+ function: {
1221
+ name: tool.name,
1222
+ description: tool.description || "",
1223
+ parameters: includeStrict ? parameters : baseParameters,
1224
+ // Only include strict if provider supports it. Some reject unknown fields.
1225
+ ...(includeStrict && { strict: true }),
1226
+ },
1227
+ };
1228
+ }),
1229
+ toolStrictMode,
1230
+ };
1231
+ }
1232
+
1233
+ function shouldRetryWithoutStrictTools(
1234
+ error: unknown,
1235
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
1236
+ toolStrictMode: AppliedToolStrictMode,
1237
+ tools: Tool[] | undefined,
1238
+ ): boolean {
1239
+ if (!tools || tools.length === 0 || toolStrictMode !== "all_strict") {
1240
+ return false;
1241
+ }
1242
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
1243
+ if (status !== 400 && status !== 422) {
1244
+ return false;
1245
+ }
1246
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
1247
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
1248
+ .join("\n");
1249
+ return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool/i.test(messageParts);
1121
1250
  }
1122
1251
 
1123
1252
  function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
package/src/types.ts CHANGED
@@ -483,6 +483,8 @@ export interface OpenAICompat {
483
483
  extraBody?: Record<string, unknown>;
484
484
  /** Whether the provider supports the `strict` field in tool definitions. Default: auto-detected per provider/baseUrl (conservative for unknown providers). */
485
485
  supportsStrictMode?: boolean;
486
+ /** Whether tool schemas must be sent either all strict or all non-strict. Undefined keeps the existing per-tool mixed behavior. */
487
+ toolStrictMode?: "all_strict" | "none";
486
488
  }
487
489
 
488
490
  /**
@@ -13,6 +13,13 @@ export type RawHttpRequestDump = {
13
13
  body?: unknown;
14
14
  };
15
15
 
16
+ export type CapturedHttpErrorResponse = {
17
+ status: number;
18
+ headers?: Headers;
19
+ bodyText?: string;
20
+ bodyJson?: unknown;
21
+ };
22
+
16
23
  type ErrorWithStatus = {
17
24
  status?: unknown;
18
25
  };
@@ -44,8 +51,18 @@ export async function appendRawHttpRequestDumpFor400(
44
51
  export async function finalizeErrorMessage(
45
52
  error: unknown,
46
53
  rawRequestDump: RawHttpRequestDump | undefined,
54
+ capturedErrorResponse?: CapturedHttpErrorResponse,
47
55
  ): Promise<string> {
48
- return appendRawHttpRequestDumpFor400(formatErrorMessageWithRetryAfter(error), error, rawRequestDump);
56
+ let message = formatErrorMessageWithRetryAfter(error, capturedErrorResponse?.headers);
57
+ const capturedMessage = formatCapturedHttpError(capturedErrorResponse);
58
+ if (capturedMessage) {
59
+ if (/\bstatus code\s*\(no body\)/i.test(message)) {
60
+ message = `${capturedErrorResponse?.status ?? "HTTP"} status code: ${capturedMessage}`;
61
+ } else if (!message.includes(capturedMessage)) {
62
+ message = `${message}\n${capturedMessage}`;
63
+ }
64
+ }
65
+ return appendRawHttpRequestDumpFor400(message, error, rawRequestDump);
49
66
  }
50
67
 
51
68
  export function withHttpStatus(error: unknown, status: number): Error {
@@ -96,3 +113,53 @@ function redactHeaders(headers: Record<string, string> | undefined): Record<stri
96
113
  }
97
114
  return redacted;
98
115
  }
116
+
117
+ function formatCapturedHttpError(captured: CapturedHttpErrorResponse | undefined): string | undefined {
118
+ if (!captured) return undefined;
119
+ const bodyText = captured.bodyText?.trim();
120
+ if (!bodyText) return undefined;
121
+ const payload = parseCapturedErrorPayload(captured);
122
+ if (!payload) return bodyText;
123
+
124
+ const errorPayload = getObjectProperty(payload, "error") ?? payload;
125
+ const message = getStringProperty(errorPayload, "message") ?? getStringProperty(payload, "message") ?? bodyText;
126
+ const extras = [
127
+ getStringProperty(errorPayload, "type") ?? getStringProperty(payload, "type"),
128
+ getStringProperty(errorPayload, "param") ?? getStringProperty(payload, "param"),
129
+ getStringProperty(errorPayload, "code") ?? getStringProperty(payload, "code"),
130
+ ]
131
+ .filter(Boolean)
132
+ .map((value, index) => {
133
+ if (index === 0) return `type=${value}`;
134
+ if (index === 1) return `param=${value}`;
135
+ return `code=${value}`;
136
+ });
137
+ return extras.length > 0 ? `${message} (${extras.join(" ")})` : message;
138
+ }
139
+
140
+ function parseCapturedErrorPayload(captured: CapturedHttpErrorResponse): Record<string, unknown> | undefined {
141
+ if (isObject(captured.bodyJson)) {
142
+ return captured.bodyJson;
143
+ }
144
+ if (!captured.bodyText) return undefined;
145
+ try {
146
+ const parsed = JSON.parse(captured.bodyText);
147
+ return isObject(parsed) ? parsed : undefined;
148
+ } catch {
149
+ return undefined;
150
+ }
151
+ }
152
+
153
+ function getObjectProperty(value: Record<string, unknown>, key: string): Record<string, unknown> | undefined {
154
+ const property = value[key];
155
+ return isObject(property) ? property : undefined;
156
+ }
157
+
158
+ function getStringProperty(value: Record<string, unknown>, key: string): string | undefined {
159
+ const property = value[key];
160
+ return typeof property === "string" && property.trim().length > 0 ? property : undefined;
161
+ }
162
+
163
+ function isObject(value: unknown): value is Record<string, unknown> {
164
+ return typeof value === "object" && value !== null && !Array.isArray(value);
165
+ }
@@ -21,6 +21,7 @@ import type { AssistantMessage } from "../types";
21
21
  * - Kimi For Coding: "Your request exceeded model token limit: X (requested: Y)"
22
22
  * - Anthropic 413: "request_too_large" / "Request exceeds the maximum size" (payload too large)
23
23
  * - HTTP 413 variants: "Payload Too Large" / "Request Entity Too Large"
24
+ * - z.ai / GLM: Returns finish_reason: "model_context_window_exceeded" mapped to error message
24
25
  * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
25
26
  * - Ollama: Silently truncates input - not detectable via error message
26
27
  */
@@ -49,6 +50,7 @@ const OVERFLOW_PATTERNS = [
49
50
  /payload too large/i, // Generic HTTP 413 variant
50
51
  /entity too large/i, // Generic HTTP 413 variant
51
52
  /\b413\b.*\b(request|payload|entity)\b.*\btoo large\b/i, // "413 Request Entity Too Large" variants
53
+ /model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text
52
54
  ];
53
55
  /**
54
56
  * Check if an assistant message represents a context overflow error.
@@ -88,8 +88,12 @@ function hasUnrepresentableStrictObjectMap(schema: Record<string, unknown>, seen
88
88
  export function sanitizeSchemaForStrictMode(
89
89
  schema: Record<string, unknown>,
90
90
  seen?: WeakSet<object>,
91
+ cache?: WeakMap<Record<string, unknown>, Record<string, unknown>>,
91
92
  ): Record<string, unknown> {
92
93
  if (!seen) seen = new WeakSet();
94
+ if (!cache) cache = new WeakMap();
95
+ const cached = cache.get(schema);
96
+ if (cached) return cached;
93
97
  if (seen.has(schema)) return {};
94
98
  seen.add(schema);
95
99
  const typeValue = schema.type;
@@ -98,8 +102,10 @@ export function sanitizeSchemaForStrictMode(
98
102
  const schemaWithoutType = { ...schema };
99
103
  delete schemaWithoutType.type;
100
104
 
101
- const sanitizedWithoutType = sanitizeSchemaForStrictMode(schemaWithoutType, seen);
105
+ const sanitizedWithoutType = sanitizeSchemaForStrictMode(schemaWithoutType, seen, cache);
102
106
  if (typeVariants.length === 0) {
107
+ cache.set(schema, sanitizedWithoutType);
108
+ seen.delete(schema);
103
109
  return sanitizedWithoutType;
104
110
  }
105
111
 
@@ -113,19 +119,25 @@ export function sanitizeSchemaForStrictMode(
113
119
  if (variantType !== "array") {
114
120
  delete variantSchema.items;
115
121
  }
116
- return sanitizeSchemaForStrictMode(variantSchema, seen);
122
+ return sanitizeSchemaForStrictMode(variantSchema, seen, cache);
117
123
  });
118
124
 
119
125
  if (variants.length === 1) {
126
+ cache.set(schema, variants[0] as Record<string, unknown>);
127
+ seen.delete(schema);
120
128
  return variants[0] as Record<string, unknown>;
121
129
  }
122
130
 
123
- return {
131
+ const result = {
124
132
  anyOf: variants,
125
133
  };
134
+ cache.set(schema, result);
135
+ seen.delete(schema);
136
+ return result;
126
137
  }
127
138
 
128
139
  const sanitized: Record<string, unknown> = {};
140
+ cache.set(schema, sanitized);
129
141
  for (const [key, value] of Object.entries(schema)) {
130
142
  if (NON_STRUCTURAL_SCHEMA_KEYS.has(key) || key === "type" || key === "const" || key === "nullable") {
131
143
  continue;
@@ -135,7 +147,7 @@ export function sanitizeSchemaForStrictMode(
135
147
  const properties = Object.fromEntries(
136
148
  Object.entries(value).map(([propertyName, propertySchema]) => [
137
149
  propertyName,
138
- isJsonObject(propertySchema) ? sanitizeSchemaForStrictMode(propertySchema, seen) : propertySchema,
150
+ isJsonObject(propertySchema) ? sanitizeSchemaForStrictMode(propertySchema, seen, cache) : propertySchema,
139
151
  ]),
140
152
  );
141
153
  sanitized.properties = properties;
@@ -144,10 +156,10 @@ export function sanitizeSchemaForStrictMode(
144
156
 
145
157
  if (key === "items") {
146
158
  if (isJsonObject(value)) {
147
- sanitized.items = sanitizeSchemaForStrictMode(value, seen);
159
+ sanitized.items = sanitizeSchemaForStrictMode(value, seen, cache);
148
160
  } else if (Array.isArray(value)) {
149
161
  sanitized.items = value.map(entry =>
150
- isJsonObject(entry) ? sanitizeSchemaForStrictMode(entry, seen) : entry,
162
+ isJsonObject(entry) ? sanitizeSchemaForStrictMode(entry, seen, cache) : entry,
151
163
  );
152
164
  } else {
153
165
  sanitized.items = value;
@@ -156,7 +168,9 @@ export function sanitizeSchemaForStrictMode(
156
168
  }
157
169
 
158
170
  if (COMBINATOR_KEYS.includes(key as (typeof COMBINATOR_KEYS)[number]) && Array.isArray(value)) {
159
- sanitized[key] = value.map(entry => (isJsonObject(entry) ? sanitizeSchemaForStrictMode(entry, seen) : entry));
171
+ sanitized[key] = value.map(entry =>
172
+ isJsonObject(entry) ? sanitizeSchemaForStrictMode(entry, seen, cache) : entry,
173
+ );
160
174
  continue;
161
175
  }
162
176
 
@@ -164,7 +178,9 @@ export function sanitizeSchemaForStrictMode(
164
178
  sanitized[key] = Object.fromEntries(
165
179
  Object.entries(value).map(([definitionName, definitionSchema]) => [
166
180
  definitionName,
167
- isJsonObject(definitionSchema) ? sanitizeSchemaForStrictMode(definitionSchema, seen) : definitionSchema,
181
+ isJsonObject(definitionSchema)
182
+ ? sanitizeSchemaForStrictMode(definitionSchema, seen, cache)
183
+ : definitionSchema,
168
184
  ]),
169
185
  );
170
186
  continue;
@@ -221,9 +237,11 @@ export function sanitizeSchemaForStrictMode(
221
237
 
222
238
  if (schema.nullable === true) {
223
239
  const { nullable: _, ...withoutNullable } = sanitized;
240
+ seen.delete(schema);
224
241
  return { anyOf: [withoutNullable, { type: "null" }] };
225
242
  }
226
243
 
244
+ seen.delete(schema);
227
245
  return sanitized;
228
246
  }
229
247
 
@@ -241,11 +259,23 @@ export function sanitizeSchemaForStrictMode(
241
259
  * i.e. the node is not representable in strict mode. Prefer
242
260
  * {@link tryEnforceStrictSchema} which catches this and degrades gracefully.
243
261
  */
244
- export function enforceStrictSchema(schema: Record<string, unknown>, seen?: WeakSet<object>): Record<string, unknown> {
262
+ export function enforceStrictSchema(
263
+ schema: Record<string, unknown>,
264
+ seen?: WeakSet<object>,
265
+ cache?: WeakMap<Record<string, unknown>, Record<string, unknown>>,
266
+ ): Record<string, unknown> {
245
267
  if (!seen) seen = new WeakSet();
246
- if (seen.has(schema)) return schema;
268
+ if (!cache) cache = new WeakMap();
269
+ if (seen.has(schema)) {
270
+ throw new Error("Schema contains a circular object graph — cannot enforce strict mode");
271
+ }
272
+ const cached = cache.get(schema);
273
+ if (cached) {
274
+ return cached;
275
+ }
247
276
  seen.add(schema);
248
277
  const result = { ...schema };
278
+ cache.set(schema, result);
249
279
  const isObjectType = result.type === "object";
250
280
  if (isObjectType) {
251
281
  result.additionalProperties = false;
@@ -263,7 +293,7 @@ export function enforceStrictSchema(schema: Record<string, unknown>, seen?: Weak
263
293
  Object.entries(props).map(([key, value]) => {
264
294
  const processed =
265
295
  value != null && typeof value === "object" && !Array.isArray(value)
266
- ? enforceStrictSchema(value as Record<string, unknown>, seen)
296
+ ? enforceStrictSchema(value as Record<string, unknown>, seen, cache)
267
297
  : value;
268
298
  // Optional property — wrap as nullable so strict mode accepts it
269
299
  if (!originalRequired.has(key)) {
@@ -287,18 +317,18 @@ export function enforceStrictSchema(schema: Record<string, unknown>, seen?: Weak
287
317
  if (Array.isArray(result.items)) {
288
318
  result.items = result.items.map(entry =>
289
319
  entry != null && typeof entry === "object" && !Array.isArray(entry)
290
- ? enforceStrictSchema(entry as Record<string, unknown>, seen)
320
+ ? enforceStrictSchema(entry as Record<string, unknown>, seen, cache)
291
321
  : entry,
292
322
  );
293
323
  } else {
294
- result.items = enforceStrictSchema(result.items as Record<string, unknown>, seen);
324
+ result.items = enforceStrictSchema(result.items as Record<string, unknown>, seen, cache);
295
325
  }
296
326
  }
297
327
  for (const key of COMBINATOR_KEYS) {
298
328
  if (Array.isArray(result[key])) {
299
329
  result[key] = (result[key] as unknown[]).map(entry =>
300
330
  entry != null && typeof entry === "object" && !Array.isArray(entry)
301
- ? enforceStrictSchema(entry as Record<string, unknown>, seen)
331
+ ? enforceStrictSchema(entry as Record<string, unknown>, seen, cache)
302
332
  : entry,
303
333
  );
304
334
  }
@@ -310,7 +340,7 @@ export function enforceStrictSchema(schema: Record<string, unknown>, seen?: Weak
310
340
  Object.entries(defs).map(([name, def]) => [
311
341
  name,
312
342
  def != null && typeof def === "object" && !Array.isArray(def)
313
- ? enforceStrictSchema(def as Record<string, unknown>, seen)
343
+ ? enforceStrictSchema(def as Record<string, unknown>, seen, cache)
314
344
  : def,
315
345
  ]),
316
346
  );
@@ -326,6 +356,7 @@ export function enforceStrictSchema(schema: Record<string, unknown>, seen?: Weak
326
356
  ) {
327
357
  throw new Error("Schema node has no type, combinator, or $ref — cannot enforce strict mode");
328
358
  }
359
+ seen.delete(schema);
329
360
  return result;
330
361
  }
331
362