@oh-my-pi/pi-ai 14.3.0 → 14.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,27 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.4.0] - 2026-04-26
6
+
7
+ ### Added
8
+
9
+ - Added an `examples` option to `StringEnum` to include example values in the generated schema
10
+
11
+ ### Changed
12
+
13
+ - Changed Anthropic tool schema generation to strip unsupported schema fields (including `patternProperties`), add `additionalProperties: false` for object types, and apply Anthropic strict-mode limits when marking tools as strict
14
+ - Changed Anthropic strict tool planning to cap strict `tools` at twenty entries and convert excess optional/union parameters to nullable schemas to stay within provider constraints
15
+
16
+ ### Fixed
17
+
18
+ - Fixed Anthropic tool schema compilation failures by keeping the `write` tool out of the strict-tool allowlist when the full coding-agent tool set is active
19
+ - Fixed Anthropic 400 `tools.*.custom: For 'object' type, property 'minItems' is not supported` by stripping `minItems` from object-shaped JSON schema nodes (array nodes still keep supported `minItems` values)
20
+ - Fixed Anthropic tool schemas that used tuple-style arrays by stripping unsupported `maxItems` and only preserving provider-supported `minItems` values
21
+ - Fixed Anthropic and OpenRouter Anthropic tool calls that previously failed with `compiled grammar is too large` by retrying automatically without strict tool schemas and reusing non-strict mode for subsequent requests in the same provider session
22
+ - Fixed parsing of JSON tool arguments containing raw control characters inside string values (such as embedded newlines) by escaping them before JSON parsing
23
+ - Fixed `validateToolArguments` to accept stringified objects and arrays that include literal control characters inside string fields
24
+ - Fixed OpenAI Codex Spark OAuth selection to fall back to non-Pro accounts when no ChatGPT Pro account is connected, so users without a Pro account can still attempt Spark requests in case the server permits access.
25
+
5
26
  ## [14.3.0] - 2026-04-25
6
27
 
7
28
  ### Added
@@ -2110,4 +2131,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
2110
2131
 
2111
2132
  ## [0.9.4] - 2025-11-26
2112
2133
 
2113
- Initial release with multi-provider LLM support.
2134
+ Initial release with multi-provider LLM support.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.3.0",
4
+ "version": "14.4.1",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.36",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.50.1",
49
- "@oh-my-pi/pi-natives": "14.3.0",
50
- "@oh-my-pi/pi-utils": "14.3.0",
49
+ "@oh-my-pi/pi-natives": "14.4.1",
50
+ "@oh-my-pi/pi-utils": "14.4.1",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -1711,6 +1711,11 @@ export class AuthStorage {
1711
1711
  }),
1712
1712
  );
1713
1713
 
1714
+ // Skip the Pro-plan filter when no candidate is confirmed Pro, so users with only
1715
+ // non-Pro accounts can still attempt Spark requests (e.g. trial/grandfathered access).
1716
+ const enforceProRequirement =
1717
+ requiresProModel && candidates.some(candidate => hasOpenAICodexProPlan(candidate.usage));
1718
+
1714
1719
  const fallback = candidates[0];
1715
1720
 
1716
1721
  for (const candidate of candidates) {
@@ -1719,6 +1724,7 @@ export class AuthStorage {
1719
1724
  allowBlocked: false,
1720
1725
  prefetchedUsage: candidate.usage,
1721
1726
  usagePrechecked: candidate.usageChecked,
1727
+ enforceProRequirement,
1722
1728
  });
1723
1729
  if (apiKey) return apiKey;
1724
1730
  }
@@ -1729,6 +1735,7 @@ export class AuthStorage {
1729
1735
  allowBlocked: true,
1730
1736
  prefetchedUsage: fallback.usage,
1731
1737
  usagePrechecked: fallback.usageChecked,
1738
+ enforceProRequirement,
1732
1739
  });
1733
1740
  }
1734
1741
 
@@ -1774,14 +1781,22 @@ export class AuthStorage {
1774
1781
  allowBlocked: boolean;
1775
1782
  prefetchedUsage?: UsageReport | null;
1776
1783
  usagePrechecked?: boolean;
1784
+ enforceProRequirement?: boolean;
1777
1785
  },
1778
1786
  ): Promise<string | undefined> {
1779
- const { checkUsage, allowBlocked, prefetchedUsage = null, usagePrechecked = false } = usageOptions;
1787
+ const {
1788
+ checkUsage,
1789
+ allowBlocked,
1790
+ prefetchedUsage = null,
1791
+ usagePrechecked = false,
1792
+ enforceProRequirement,
1793
+ } = usageOptions;
1780
1794
  if (!allowBlocked && this.#isCredentialBlocked(providerKey, selection.index)) {
1781
1795
  return undefined;
1782
1796
  }
1783
1797
 
1784
1798
  const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
1799
+ const applyProFilter = enforceProRequirement ?? requiresProModel;
1785
1800
  let usage: UsageReport | null = null;
1786
1801
  let usageChecked = false;
1787
1802
 
@@ -1796,7 +1811,7 @@ export class AuthStorage {
1796
1811
  });
1797
1812
  usageChecked = true;
1798
1813
  }
1799
- if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
1814
+ if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
1800
1815
  return undefined;
1801
1816
  }
1802
1817
  if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
@@ -1846,7 +1861,7 @@ export class AuthStorage {
1846
1861
  });
1847
1862
  usageChecked = true;
1848
1863
  }
1849
- if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
1864
+ if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
1850
1865
  return undefined;
1851
1866
  }
1852
1867
  if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
package/src/models.json CHANGED
@@ -4751,6 +4751,56 @@
4751
4751
  }
4752
4752
  }
4753
4753
  },
4754
+ "deepseek": {
4755
+ "deepseek-v4-flash": {
4756
+ "id": "deepseek-v4-flash",
4757
+ "name": "DeepSeek V4 Flash",
4758
+ "api": "openai-completions",
4759
+ "provider": "deepseek",
4760
+ "baseUrl": "https://api.deepseek.com",
4761
+ "reasoning": true,
4762
+ "input": [
4763
+ "text"
4764
+ ],
4765
+ "cost": {
4766
+ "input": 0.14,
4767
+ "output": 0.28,
4768
+ "cacheRead": 0,
4769
+ "cacheWrite": 0
4770
+ },
4771
+ "contextWindow": 1000000,
4772
+ "maxTokens": 384000,
4773
+ "thinking": {
4774
+ "mode": "effort",
4775
+ "minLevel": "minimal",
4776
+ "maxLevel": "high"
4777
+ }
4778
+ },
4779
+ "deepseek-v4-pro": {
4780
+ "id": "deepseek-v4-pro",
4781
+ "name": "DeepSeek V4 Pro",
4782
+ "api": "openai-completions",
4783
+ "provider": "deepseek",
4784
+ "baseUrl": "https://api.deepseek.com",
4785
+ "reasoning": true,
4786
+ "input": [
4787
+ "text"
4788
+ ],
4789
+ "cost": {
4790
+ "input": 1.74,
4791
+ "output": 3.48,
4792
+ "cacheRead": 0,
4793
+ "cacheWrite": 0
4794
+ },
4795
+ "contextWindow": 1000000,
4796
+ "maxTokens": 384000,
4797
+ "thinking": {
4798
+ "mode": "effort",
4799
+ "minLevel": "minimal",
4800
+ "maxLevel": "high"
4801
+ }
4802
+ }
4803
+ },
4754
4804
  "github-copilot": {
4755
4805
  "claude-haiku-4.5": {
4756
4806
  "id": "claude-haiku-4.5",
@@ -19,6 +19,7 @@ import type {
19
19
  ImageContent,
20
20
  Message,
21
21
  Model,
22
+ ProviderSessionState,
22
23
  RedactedThinkingContent,
23
24
  SimpleStreamOptions,
24
25
  StopReason,
@@ -31,7 +32,7 @@ import type {
31
32
  ToolResultMessage,
32
33
  Usage,
33
34
  } from "../types";
34
- import { isAnthropicOAuthToken, normalizeToolCallId, resolveCacheRetention } from "../utils";
35
+ import { isAnthropicOAuthToken, isRecord, normalizeToolCallId, resolveCacheRetention } from "../utils";
35
36
  import { createAbortSourceTracker } from "../utils/abort";
36
37
  import { AssistantMessageEventStream } from "../utils/event-stream";
37
38
  import { isFoundryEnabled } from "../utils/foundry";
@@ -39,7 +40,8 @@ import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotError } fr
39
40
  import { createWatchdog, getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
40
41
  import { parseStreamingJson } from "../utils/json-parse";
41
42
  import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
42
- import { isCopilotRetryableError } from "../utils/retry";
43
+ import { extractHttpStatusFromError, isCopilotRetryableError } from "../utils/retry";
44
+ import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
43
45
  import {
44
46
  buildCopilotDynamicHeaders,
45
47
  hasCopilotVisionInput,
@@ -82,7 +84,6 @@ export function buildBetaHeader(baseBetas: string[], extraBetas: string[]): stri
82
84
  const claudeCodeBetaDefaults = [
83
85
  "claude-code-20250219",
84
86
  "oauth-2025-04-20",
85
- "interleaved-thinking-2025-05-14",
86
87
  "context-management-2025-06-27",
87
88
  "prompt-caching-scope-2026-01-05",
88
89
  ];
@@ -180,6 +181,58 @@ function supportsAdaptiveThinkingDisplay(modelId: string): boolean {
180
181
  const minor = Number(match[2]);
181
182
  return major > 4 || (major === 4 && minor >= 7);
182
183
  }
184
+
185
+ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
186
+
187
+ type AnthropicProviderSessionState = ProviderSessionState & {
188
+ strictToolsDisabled: boolean;
189
+ };
190
+
191
+ function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
192
+ const state: AnthropicProviderSessionState = {
193
+ strictToolsDisabled: false,
194
+ close: () => {
195
+ state.strictToolsDisabled = false;
196
+ },
197
+ };
198
+ return state;
199
+ }
200
+
201
+ function getAnthropicProviderSessionState(
202
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
203
+ ): AnthropicProviderSessionState | undefined {
204
+ if (!providerSessionState) return undefined;
205
+ const existing = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
206
+ | AnthropicProviderSessionState
207
+ | undefined;
208
+ if (existing) return existing;
209
+ const created = createAnthropicProviderSessionState();
210
+ providerSessionState.set(ANTHROPIC_PROVIDER_SESSION_STATE_KEY, created);
211
+ return created;
212
+ }
213
+
214
+ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
215
+ if (extractHttpStatusFromError(error) !== 400) return false;
216
+ const message = error instanceof Error ? error.message : String(error);
217
+ const isStrictGrammarTooLarge = /compiled grammar/i.test(message) && /too large/i.test(message);
218
+ const isSchemaCompilationTooComplex =
219
+ /schema/i.test(message) && /too complex/i.test(message) && /compil/i.test(message);
220
+ return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
221
+ }
222
+
223
+ function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
224
+ const tools = params.tools as Array<{ strict?: unknown }> | undefined;
225
+ return tools?.some(tool => tool.strict === true) ?? false;
226
+ }
227
+
228
+ function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
229
+ const tools = params.tools as Array<{ strict?: unknown }> | undefined;
230
+ if (!tools) return;
231
+ for (const tool of tools) {
232
+ delete tool.strict;
233
+ }
234
+ }
235
+
183
236
  function getCacheControl(
184
237
  baseUrl: string,
185
238
  cacheRetention?: CacheRetention,
@@ -705,19 +758,29 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
705
758
  const baseUrl =
706
759
  resolveAnthropicBaseUrl(model, options?.apiKey ?? getEnvApiKey(model.provider) ?? "") ??
707
760
  "https://api.anthropic.com";
708
- let params = buildParams(model, baseUrl, context, isOAuthToken, options);
709
- const replacementPayload = await options?.onPayload?.(params, model);
710
- if (replacementPayload !== undefined) {
711
- params = replacementPayload as typeof params;
712
- }
713
- rawRequestDump = {
714
- provider: model.provider,
715
- api: output.api,
716
- model: model.id,
717
- method: "POST",
718
- url: `${baseUrl}/v1/messages`,
719
- body: params,
761
+ const providerSessionState = getAnthropicProviderSessionState(options?.providerSessionState);
762
+ let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
763
+ let strictFallbackErrorMessage: string | undefined;
764
+ const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
765
+ let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
766
+ const replacementPayload = await options?.onPayload?.(nextParams, model);
767
+ if (replacementPayload !== undefined) {
768
+ nextParams = replacementPayload as typeof nextParams;
769
+ }
770
+ if (disableStrictTools) {
771
+ dropAnthropicStrictTools(nextParams);
772
+ }
773
+ rawRequestDump = {
774
+ provider: model.provider,
775
+ api: output.api,
776
+ model: model.id,
777
+ method: "POST",
778
+ url: `${baseUrl}/v1/messages`,
779
+ body: nextParams,
780
+ };
781
+ return nextParams;
720
782
  };
783
+ let params = await prepareParams();
721
784
 
722
785
  type Block = (
723
786
  | ThinkingContent
@@ -954,6 +1017,28 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
954
1017
  break;
955
1018
  } catch (streamError) {
956
1019
  const streamFailure = activeAbortTracker.getLocalAbortReason() ?? streamError;
1020
+ if (
1021
+ !disableStrictTools &&
1022
+ firstTokenTime === undefined &&
1023
+ hasStrictAnthropicTools(params) &&
1024
+ isAnthropicStrictGrammarTooLargeError(streamFailure)
1025
+ ) {
1026
+ strictFallbackErrorMessage = await finalizeErrorMessage(streamFailure, rawRequestDump);
1027
+ output.errorMessage = strictFallbackErrorMessage;
1028
+ if (providerSessionState) {
1029
+ providerSessionState.strictToolsDisabled = true;
1030
+ }
1031
+ disableStrictTools = true;
1032
+ params = await prepareParams();
1033
+ providerRetryAttempt = 0;
1034
+ output.content.length = 0;
1035
+ output.responseId = undefined;
1036
+ output.providerPayload = undefined;
1037
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1038
+ output.stopReason = "stop";
1039
+ firstTokenTime = undefined;
1040
+ continue;
1041
+ }
957
1042
  const isTransientEnvelopeFailure =
958
1043
  isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
959
1044
  const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
@@ -971,7 +1056,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
971
1056
  await abortableSleep(delayMs, options?.signal);
972
1057
  output.content.length = 0;
973
1058
  output.responseId = undefined;
974
- output.errorMessage = undefined;
1059
+ output.errorMessage = strictFallbackErrorMessage;
975
1060
  output.providerPayload = undefined;
976
1061
  output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
977
1062
  output.stopReason = "stop";
@@ -1074,6 +1159,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1074
1159
  dynamicHeaders,
1075
1160
  isOAuth,
1076
1161
  } = args;
1162
+ const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinkingDisplay(model.id);
1077
1163
  const oauthToken = isOAuth ?? isAnthropicOAuthToken(apiKey);
1078
1164
  const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
1079
1165
  const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
@@ -1107,7 +1193,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1107
1193
  }
1108
1194
 
1109
1195
  const betaFeatures = [...extraBetas];
1110
- if (interleavedThinking) {
1196
+ if (needsInterleavedBeta) {
1111
1197
  betaFeatures.push("interleaved-thinking-2025-05-14");
1112
1198
  }
1113
1199
 
@@ -1402,6 +1488,7 @@ function buildParams(
1402
1488
  context: Context,
1403
1489
  isOAuthToken: boolean,
1404
1490
  options?: AnthropicOptions,
1491
+ disableStrictTools = false,
1405
1492
  ): MessageCreateParamsStreaming {
1406
1493
  const { cacheControl } = getCacheControl(baseUrl, options?.cacheRetention);
1407
1494
  const params: AnthropicSamplingParams = {
@@ -1411,9 +1498,6 @@ function buildParams(
1411
1498
  stream: true,
1412
1499
  };
1413
1500
 
1414
- if (options?.temperature !== undefined) {
1415
- params.temperature = options.temperature;
1416
- }
1417
1501
  if (options?.topP !== undefined) {
1418
1502
  params.top_p = options.topP;
1419
1503
  }
@@ -1423,13 +1507,16 @@ function buildParams(
1423
1507
 
1424
1508
  // Opus 4.7+ rejects non-default sampling parameters with 400 error.
1425
1509
  if (hasOpus47ApiRestrictions(model.id)) {
1426
- delete params.temperature;
1427
- delete (params as AnthropicSamplingParams).top_p;
1428
- delete (params as AnthropicSamplingParams).top_k;
1510
+ delete params.top_p;
1511
+ delete params.top_k;
1429
1512
  }
1430
1513
 
1431
1514
  if (context.tools) {
1432
- params.tools = convertTools(context.tools, isOAuthToken);
1515
+ params.tools = convertTools(
1516
+ context.tools,
1517
+ isOAuthToken,
1518
+ disableStrictTools || model.provider === "github-copilot",
1519
+ );
1433
1520
  }
1434
1521
 
1435
1522
  if (options?.thinkingEnabled && model.reasoning && model.provider !== "github-copilot") {
@@ -1667,20 +1754,309 @@ export function convertAnthropicMessages(
1667
1754
  return params;
1668
1755
  }
1669
1756
 
1670
- function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
1671
- if (!tools) return [];
1757
+ const ANTHROPIC_UNSUPPORTED_TOOL_SCHEMA_FIELDS = new Set(["maxItems", "patternProperties"]);
1758
+ const ANTHROPIC_STRICT_TOOL_ALLOWLIST = new Set(["bash", "python", "edit", "find"]);
1759
+ const MAX_ANTHROPIC_STRICT_TOOLS = 20;
1760
+ const MAX_ANTHROPIC_STRICT_OPTIONAL_PARAMETERS = 24;
1761
+ const MAX_ANTHROPIC_STRICT_UNION_PARAMETERS = 16;
1762
+
1763
+ /** `minItems` / `maxItems` apply to arrays; Anthropic rejects them on `type: "object"` (including `minItems: 0`/`1`). */
1764
+ function isJsonSchemaArrayNode(schema: Record<string, unknown>): boolean {
1765
+ const t = schema.type;
1766
+ if (t === "array") return true;
1767
+ if (Array.isArray(t) && t.includes("array") && !t.includes("object")) return true;
1768
+ return false;
1769
+ }
1770
+
1771
+ function isJsonSchemaObjectNode(schema: Record<string, unknown>): boolean {
1772
+ if (isJsonSchemaArrayNode(schema)) return false;
1773
+ if (schema.type === "object") return true;
1774
+ if (Array.isArray(schema.type) && schema.type.includes("object")) return true;
1775
+ if (isRecord(schema.properties)) return true;
1776
+ return false;
1777
+ }
1778
+
1779
+ function normalizeAnthropicToolSchema(
1780
+ schema: unknown,
1781
+ cache: WeakMap<Record<string, unknown>, Record<string, unknown>> = new WeakMap(),
1782
+ ): unknown {
1783
+ if (!isRecord(schema)) return schema;
1784
+
1785
+ const cached = cache.get(schema);
1786
+ if (cached) return cached;
1787
+
1788
+ const result = Object.fromEntries(
1789
+ Object.entries(schema).filter(([key]) => !ANTHROPIC_UNSUPPORTED_TOOL_SCHEMA_FIELDS.has(key)),
1790
+ );
1791
+ cache.set(schema, result);
1792
+ if (isJsonSchemaObjectNode(result)) {
1793
+ delete result.minItems;
1794
+ } else {
1795
+ const minItems = result.minItems;
1796
+ if (typeof minItems === "number" && minItems !== 0 && minItems !== 1) {
1797
+ delete result.minItems;
1798
+ }
1799
+ }
1800
+
1801
+ const type = result.type;
1802
+ const canBeObject =
1803
+ type === "object" || (Array.isArray(type) && type.includes("object")) || isRecord(result.properties);
1804
+ if (canBeObject) {
1805
+ result.additionalProperties = false;
1806
+ }
1807
+
1808
+ if (isRecord(result.properties)) {
1809
+ result.properties = Object.fromEntries(
1810
+ Object.entries(result.properties).map(([propertyName, propertySchema]) => [
1811
+ propertyName,
1812
+ normalizeAnthropicToolSchema(propertySchema, cache),
1813
+ ]),
1814
+ );
1815
+ }
1816
+
1817
+ if (Array.isArray(result.items)) {
1818
+ result.items = result.items.map(item => normalizeAnthropicToolSchema(item, cache));
1819
+ } else if (isRecord(result.items)) {
1820
+ result.items = normalizeAnthropicToolSchema(result.items, cache);
1821
+ }
1822
+
1823
+ for (const key of COMBINATOR_KEYS) {
1824
+ const variants = result[key];
1825
+ if (Array.isArray(variants)) {
1826
+ result[key] = variants.map(variant => normalizeAnthropicToolSchema(variant, cache));
1827
+ }
1828
+ }
1829
+
1830
+ for (const defsKey of ["$defs", "definitions"] as const) {
1831
+ const definitions = result[defsKey];
1832
+ if (!isRecord(definitions)) continue;
1833
+ result[defsKey] = Object.fromEntries(
1834
+ Object.entries(definitions).map(([definitionName, definitionSchema]) => [
1835
+ definitionName,
1836
+ normalizeAnthropicToolSchema(definitionSchema, cache),
1837
+ ]),
1838
+ );
1839
+ }
1840
+
1841
+ return result;
1842
+ }
1843
+
1844
+ type AnthropicToolInputSchema = Anthropic.Messages.Tool["input_schema"];
1845
+
1846
+ type AnthropicToolSchemaPlan = {
1847
+ inputSchema: AnthropicToolInputSchema;
1848
+ strict: boolean;
1849
+ };
1850
+
1851
+ type AnthropicStrictBudget = {
1852
+ optionalRemaining: number;
1853
+ unionRemaining: number;
1854
+ optionalCount: number;
1855
+ unionCount: number;
1856
+ };
1857
+
1858
+ function hasAnthropicUnionType(schema: Record<string, unknown>): boolean {
1859
+ return Array.isArray(schema.type) || Array.isArray(schema.anyOf);
1860
+ }
1861
+
1862
+ function hasNullVariant(schema: Record<string, unknown>): boolean {
1863
+ if (Array.isArray(schema.type) && schema.type.includes("null")) return true;
1864
+ return Array.isArray(schema.anyOf) && schema.anyOf.some(variant => isRecord(variant) && variant.type === "null");
1865
+ }
1866
+
1867
+ function makeAnthropicNullableSchema(schema: unknown, budget: AnthropicStrictBudget): unknown | undefined {
1868
+ if (isRecord(schema)) {
1869
+ if (hasNullVariant(schema)) return schema;
1870
+ if (Array.isArray(schema.anyOf)) {
1871
+ return { ...schema, anyOf: [...schema.anyOf, { type: "null" }] };
1872
+ }
1873
+ if (Array.isArray(schema.type)) {
1874
+ return { ...schema, type: [...schema.type, "null"] };
1875
+ }
1876
+ }
1877
+
1878
+ if (budget.unionRemaining <= 0) return undefined;
1879
+ budget.unionRemaining--;
1880
+ budget.unionCount++;
1881
+ return { anyOf: [schema, { type: "null" }] };
1882
+ }
1883
+
1884
+ function normalizeAnthropicStrictSchemaNode(
1885
+ schema: unknown,
1886
+ budget: AnthropicStrictBudget,
1887
+ cache: WeakMap<Record<string, unknown>, Record<string, unknown>>,
1888
+ ): unknown | undefined {
1889
+ if (Array.isArray(schema)) {
1890
+ const result: unknown[] = [];
1891
+ for (const entry of schema) {
1892
+ const normalized = normalizeAnthropicStrictSchemaNode(entry, budget, cache);
1893
+ if (normalized === undefined) return undefined;
1894
+ result.push(normalized);
1895
+ }
1896
+ return result;
1897
+ }
1898
+
1899
+ if (!isRecord(schema)) return schema;
1900
+
1901
+ const cached = cache.get(schema);
1902
+ if (cached) return cached;
1903
+
1904
+ const result: Record<string, unknown> = { ...schema };
1905
+ cache.set(schema, result);
1906
+
1907
+ if (hasAnthropicUnionType(result)) {
1908
+ if (budget.unionRemaining <= 0) return undefined;
1909
+ budget.unionRemaining--;
1910
+ budget.unionCount++;
1911
+ }
1912
+
1913
+ if (isRecord(result.properties)) {
1914
+ const originalRequired = new Set(
1915
+ Array.isArray(result.required)
1916
+ ? result.required.filter((entry): entry is string => typeof entry === "string")
1917
+ : [],
1918
+ );
1919
+ const properties: Record<string, unknown> = {};
1920
+ const required: string[] = [];
1921
+
1922
+ for (const [propertyName, propertySchema] of Object.entries(result.properties)) {
1923
+ const normalizedProperty = normalizeAnthropicStrictSchemaNode(propertySchema, budget, cache);
1924
+ if (normalizedProperty === undefined) return undefined;
1925
+
1926
+ if (originalRequired.has(propertyName)) {
1927
+ properties[propertyName] = normalizedProperty;
1928
+ required.push(propertyName);
1929
+ continue;
1930
+ }
1931
+
1932
+ if (budget.optionalRemaining > 0) {
1933
+ budget.optionalRemaining--;
1934
+ budget.optionalCount++;
1935
+ properties[propertyName] = normalizedProperty;
1936
+ continue;
1937
+ }
1938
+
1939
+ const nullableProperty = makeAnthropicNullableSchema(normalizedProperty, budget);
1940
+ if (nullableProperty === undefined) return undefined;
1941
+ properties[propertyName] = nullableProperty;
1942
+ required.push(propertyName);
1943
+ }
1944
+
1945
+ result.properties = properties;
1946
+ result.required = required;
1947
+ }
1948
+
1949
+ if (Array.isArray(result.items)) {
1950
+ const items = normalizeAnthropicStrictSchemaNode(result.items, budget, cache);
1951
+ if (items === undefined) return undefined;
1952
+ result.items = items;
1953
+ } else if (isRecord(result.items)) {
1954
+ const items = normalizeAnthropicStrictSchemaNode(result.items, budget, cache);
1955
+ if (items === undefined) return undefined;
1956
+ result.items = items;
1957
+ }
1958
+
1959
+ for (const key of COMBINATOR_KEYS) {
1960
+ const variants = result[key];
1961
+ if (!Array.isArray(variants)) continue;
1962
+ const normalizedVariants = normalizeAnthropicStrictSchemaNode(variants, budget, cache);
1963
+ if (normalizedVariants === undefined) return undefined;
1964
+ result[key] = normalizedVariants;
1965
+ }
1672
1966
 
1673
- return tools.map(tool => {
1674
- const jsonSchema = tool.parameters as any; // TypeBox already generates JSON Schema
1967
+ for (const defsKey of ["$defs", "definitions"] as const) {
1968
+ const definitions = result[defsKey];
1969
+ if (!isRecord(definitions)) continue;
1970
+ const normalizedDefinitions: Record<string, unknown> = {};
1971
+ for (const [definitionName, definitionSchema] of Object.entries(definitions)) {
1972
+ const normalizedDefinition = normalizeAnthropicStrictSchemaNode(definitionSchema, budget, cache);
1973
+ if (normalizedDefinition === undefined) return undefined;
1974
+ normalizedDefinitions[definitionName] = normalizedDefinition;
1975
+ }
1976
+ result[defsKey] = normalizedDefinitions;
1977
+ }
1675
1978
 
1979
+ return result;
1980
+ }
1981
+
1982
+ function normalizeAnthropicStrictSchema(
1983
+ schema: Record<string, unknown>,
1984
+ optionalRemaining: number,
1985
+ unionRemaining: number,
1986
+ ): { schema: Record<string, unknown>; optionalCount: number; unionCount: number } | undefined {
1987
+ const budget: AnthropicStrictBudget = {
1988
+ optionalRemaining,
1989
+ unionRemaining,
1990
+ optionalCount: 0,
1991
+ unionCount: 0,
1992
+ };
1993
+ const normalized = normalizeAnthropicStrictSchemaNode(schema, budget, new WeakMap());
1994
+ if (!isRecord(normalized)) return undefined;
1995
+ return { schema: normalized, optionalCount: budget.optionalCount, unionCount: budget.unionCount };
1996
+ }
1997
+
1998
+ function buildAnthropicBaseToolInputSchema(tool: Tool): Record<string, unknown> {
1999
+ const jsonSchema = tool.parameters as Record<string, unknown>;
2000
+ return normalizeAnthropicToolSchema({
2001
+ ...jsonSchema,
2002
+ type: "object",
2003
+ properties: isRecord(jsonSchema.properties) ? jsonSchema.properties : {},
2004
+ required: Array.isArray(jsonSchema.required)
2005
+ ? jsonSchema.required.filter((entry): entry is string => typeof entry === "string")
2006
+ : [],
2007
+ }) as Record<string, unknown>;
2008
+ }
2009
+
2010
+ function buildAnthropicToolSchemaPlans(tools: Tool[], disableStrictTools = false): AnthropicToolSchemaPlan[] {
2011
+ const plans = tools.map(
2012
+ (tool): AnthropicToolSchemaPlan => ({
2013
+ inputSchema: buildAnthropicBaseToolInputSchema(tool) as AnthropicToolInputSchema,
2014
+ strict: false,
2015
+ }),
2016
+ );
2017
+ if (NO_STRICT || disableStrictTools) return plans;
2018
+
2019
+ const candidateIndexes = tools.flatMap((tool, index) => {
2020
+ if (!ANTHROPIC_STRICT_TOOL_ALLOWLIST.has(tool.name)) return [];
2021
+ return tool.strict === false ? [] : [index];
2022
+ });
2023
+
2024
+ let strictToolCount = 0;
2025
+ let strictOptionalParameterCount = 0;
2026
+ let strictUnionParameterCount = 0;
2027
+ for (const index of candidateIndexes) {
2028
+ if (strictToolCount >= MAX_ANTHROPIC_STRICT_TOOLS) break;
2029
+
2030
+ const strictResult = normalizeAnthropicStrictSchema(
2031
+ plans[index].inputSchema as Record<string, unknown>,
2032
+ MAX_ANTHROPIC_STRICT_OPTIONAL_PARAMETERS - strictOptionalParameterCount,
2033
+ MAX_ANTHROPIC_STRICT_UNION_PARAMETERS - strictUnionParameterCount,
2034
+ );
2035
+ if (!strictResult) continue;
2036
+
2037
+ plans[index] = {
2038
+ inputSchema: strictResult.schema as AnthropicToolInputSchema,
2039
+ strict: true,
2040
+ };
2041
+ strictToolCount++;
2042
+ strictOptionalParameterCount += strictResult.optionalCount;
2043
+ strictUnionParameterCount += strictResult.unionCount;
2044
+ }
2045
+
2046
+ return plans;
2047
+ }
2048
+
2049
+ function convertTools(tools: Tool[], isOAuthToken: boolean, disableStrictTools = false): Anthropic.Messages.Tool[] {
2050
+ if (!tools) return [];
2051
+ const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
2052
+
2053
+ return tools.map((tool, index) => {
2054
+ const plan = schemaPlans[index];
1676
2055
  return {
1677
2056
  name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
1678
2057
  description: tool.description || "",
1679
- input_schema: {
1680
- type: "object" as const,
1681
- properties: jsonSchema.properties || {},
1682
- required: jsonSchema.required || [],
1683
- },
2058
+ input_schema: plan.inputSchema,
2059
+ ...(plan.strict ? { strict: true } : {}),
1684
2060
  };
1685
2061
  });
1686
2062
  }
@@ -10,11 +10,11 @@ import {
10
10
  type Api,
11
11
  type AssistantMessage,
12
12
  type Context,
13
- isSpecialServiceTier,
14
13
  type Model,
15
14
  type ServiceTier,
16
15
  type StreamFunction,
17
16
  type StreamOptions,
17
+ shouldSendServiceTier,
18
18
  type Tool,
19
19
  type ToolChoice,
20
20
  } from "../types";
@@ -298,7 +298,7 @@ function buildParams(
298
298
  if (options?.repetitionPenalty !== undefined) {
299
299
  params.repetition_penalty = options.repetitionPenalty;
300
300
  }
301
- if (isSpecialServiceTier(options?.serviceTier)) {
301
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
302
302
  params.service_tier = options.serviceTier;
303
303
  }
304
304
 
@@ -19,12 +19,12 @@ import {
19
19
  type Api,
20
20
  type AssistantMessage,
21
21
  type Context,
22
- isSpecialServiceTier,
23
22
  type Model,
24
23
  type ProviderSessionState,
25
24
  type ServiceTier,
26
25
  type StreamFunction,
27
26
  type StreamOptions,
27
+ shouldSendServiceTier,
28
28
  type TextContent,
29
29
  type ThinkingContent,
30
30
  type Tool,
@@ -75,27 +75,6 @@ export interface OpenAICodexResponsesOptions extends StreamOptions {
75
75
  serviceTier?: ServiceTier;
76
76
  }
77
77
 
78
- export const CODEX_INSTRUCTIONS = `You are an expert coding assistant operating inside pi, a coding agent harness.`;
79
-
80
- export interface CodexSystemPrompt {
81
- instructions: string;
82
- developerMessages: string[];
83
- }
84
-
85
- export function buildCodexSystemPrompt(args: { userSystemPrompt?: string }): CodexSystemPrompt {
86
- const { userSystemPrompt } = args;
87
- const developerMessages: string[] = [];
88
-
89
- if (userSystemPrompt && userSystemPrompt.trim().length > 0) {
90
- developerMessages.push(userSystemPrompt.trim());
91
- }
92
-
93
- return {
94
- instructions: CODEX_INSTRUCTIONS,
95
- developerMessages,
96
- };
97
- }
98
-
99
78
  const CODEX_DEBUG = $flag("PI_CODEX_DEBUG");
100
79
  const CODEX_MAX_RETRIES = 5;
101
80
  const CODEX_RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504]);
@@ -514,7 +493,7 @@ async function buildTransformedCodexRequestBody(
514
493
  if (options?.repetitionPenalty !== undefined) {
515
494
  params.repetition_penalty = options.repetitionPenalty;
516
495
  }
517
- if (isSpecialServiceTier(options?.serviceTier)) {
496
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
518
497
  params.service_tier = options.serviceTier;
519
498
  }
520
499
  if (context.tools && context.tools.length > 0) {
@@ -537,8 +516,7 @@ async function buildTransformedCodexRequestBody(
537
516
  }
538
517
  }
539
518
 
540
- const systemPrompt = buildCodexSystemPrompt({ userSystemPrompt: context.systemPrompt });
541
- params.instructions = systemPrompt.instructions;
519
+ params.instructions = context.systemPrompt;
542
520
 
543
521
  const codexOptions: CodexRequestOptions = {
544
522
  reasoningEffort: options?.reasoning,
@@ -547,7 +525,7 @@ async function buildTransformedCodexRequestBody(
547
525
  include: options?.include,
548
526
  };
549
527
 
550
- return transformRequestBody(params, model, codexOptions, systemPrompt);
528
+ return transformRequestBody(params, model, codexOptions);
551
529
  }
552
530
 
553
531
  async function openInitialCodexEventStream(
@@ -14,14 +14,15 @@ import { getEnvApiKey } from "../stream";
14
14
  import {
15
15
  type AssistantMessage,
16
16
  type Context,
17
- isSpecialServiceTier,
18
17
  type Message,
19
18
  type MessageAttribution,
20
19
  type Model,
20
+ type ProviderSessionState,
21
21
  type ServiceTier,
22
22
  type StopReason,
23
23
  type StreamFunction,
24
24
  type StreamOptions,
25
+ shouldSendServiceTier,
25
26
  type TextContent,
26
27
  type ThinkingContent,
27
28
  type Tool,
@@ -139,6 +140,56 @@ type BuiltOpenAICompletionTools = {
139
140
  toolStrictMode: AppliedToolStrictMode;
140
141
  };
141
142
 
143
+ const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
144
+
145
+ type OpenAICompletionsProviderSessionState = ProviderSessionState & {
146
+ strictToolsDisabled: boolean;
147
+ };
148
+
149
+ function createOpenAICompletionsProviderSessionState(): OpenAICompletionsProviderSessionState {
150
+ const state: OpenAICompletionsProviderSessionState = {
151
+ strictToolsDisabled: false,
152
+ close: () => {
153
+ state.strictToolsDisabled = false;
154
+ },
155
+ };
156
+ return state;
157
+ }
158
+
159
+ function getOpenAICompletionsProviderSessionState(
160
+ model: Model<"openai-completions">,
161
+ baseUrl: string | undefined,
162
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
163
+ ): OpenAICompletionsProviderSessionState | undefined {
164
+ if (!providerSessionState) return undefined;
165
+ const key = `${OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX}${model.provider}:${baseUrl ?? ""}:${model.id}`;
166
+ const existing = providerSessionState.get(key) as OpenAICompletionsProviderSessionState | undefined;
167
+ if (existing) return existing;
168
+ const created = createOpenAICompletionsProviderSessionState();
169
+ providerSessionState.set(key, created);
170
+ return created;
171
+ }
172
+
173
+ function isOpenRouterAnthropicModel(model: Model<"openai-completions">): boolean {
174
+ return model.provider === "openrouter" && model.id.toLowerCase().startsWith("anthropic/");
175
+ }
176
+
177
+ function isCompiledGrammarTooLargeStrictError(
178
+ error: unknown,
179
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
180
+ ): boolean {
181
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
182
+ if (status !== 400) return false;
183
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
184
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
185
+ .join("\n");
186
+ return (
187
+ /invalid_request_error/i.test(messageParts) &&
188
+ /compiled grammar/i.test(messageParts) &&
189
+ /too large/i.test(messageParts)
190
+ );
191
+ }
192
+
142
193
  // LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
143
194
  // If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
144
195
  // they will be incorrectly consumed as thinking delimiters, truncating visible output.
@@ -227,9 +278,23 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
227
278
  } = await createClient(model, context, apiKey, options?.headers, options?.initiatorOverride);
228
279
  getCapturedErrorResponse = captureErrorResponse;
229
280
  let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
281
+ const providerSessionState = getOpenAICompletionsProviderSessionState(
282
+ model,
283
+ baseUrl,
284
+ options?.providerSessionState,
285
+ );
286
+ let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
287
+ let strictFallbackErrorMessage: string | undefined;
230
288
  const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
231
289
  clearCapturedErrorResponse();
232
- const { params, toolStrictMode } = buildParams(model, context, options, baseUrl, toolStrictModeOverride);
290
+ const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
291
+ const { params, toolStrictMode } = buildParams(
292
+ model,
293
+ context,
294
+ options,
295
+ baseUrl,
296
+ effectiveToolStrictModeOverride,
297
+ );
233
298
  appliedToolStrictMode = toolStrictMode;
234
299
  options?.onPayload?.(params);
235
300
  rawRequestDump = {
@@ -251,10 +316,24 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
251
316
  });
252
317
  } catch (error) {
253
318
  const capturedErrorResponse = getCapturedErrorResponse();
254
- if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
255
- throw error;
319
+ if (
320
+ isOpenRouterAnthropicModel(model) &&
321
+ !disableStrictTools &&
322
+ isCompiledGrammarTooLargeStrictError(error, capturedErrorResponse)
323
+ ) {
324
+ strictFallbackErrorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
325
+ output.errorMessage = strictFallbackErrorMessage;
326
+ if (providerSessionState) {
327
+ providerSessionState.strictToolsDisabled = true;
328
+ }
329
+ disableStrictTools = true;
330
+ openaiStream = await createCompletionsStream("none");
331
+ } else {
332
+ if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
333
+ throw error;
334
+ }
335
+ openaiStream = await createCompletionsStream("none");
256
336
  }
257
- openaiStream = await createCompletionsStream("none");
258
337
  }
259
338
  const firstEventWatchdog = createWatchdog(
260
339
  options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
@@ -541,6 +620,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
541
620
  throw new Error(output.errorMessage || "Provider returned an error stop reason");
542
621
  }
543
622
 
623
+ output.errorMessage = strictFallbackErrorMessage;
544
624
  output.duration = Date.now() - startTime;
545
625
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
546
626
  stream.push({ type: "done", reason: output.stopReason, message: output });
@@ -614,6 +694,16 @@ async function createClient(
614
694
  copilotPremiumRequests = copilot.premiumRequests;
615
695
  baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
616
696
  }
697
+ // Azure OpenAI requires /deployments/{id}/chat/completions?api-version=YYYY-MM-DD.
698
+ // The generic openai-completions path adds neither, producing silent 404s.
699
+ let azureDefaultQuery: Record<string, string> | undefined;
700
+ if (baseUrl?.includes(".openai.azure.com")) {
701
+ const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
702
+ if (!baseUrl.includes("/deployments/")) {
703
+ baseUrl = `${baseUrl}/deployments/${model.id}`;
704
+ }
705
+ azureDefaultQuery = { "api-version": apiVersion };
706
+ }
617
707
  let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
618
708
  const wrappedFetch = Object.assign(
619
709
  async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
@@ -649,6 +739,7 @@ async function createClient(
649
739
  dangerouslyAllowBrowser: true,
650
740
  maxRetries: 5,
651
741
  defaultHeaders: headers,
742
+ defaultQuery: azureDefaultQuery,
652
743
  fetch: wrappedFetch,
653
744
  }),
654
745
  copilotPremiumRequests,
@@ -720,7 +811,7 @@ function buildParams(
720
811
  if (options?.repetitionPenalty !== undefined) {
721
812
  params.repetition_penalty = options.repetitionPenalty;
722
813
  }
723
- if (isSpecialServiceTier(options?.serviceTier)) {
814
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
724
815
  params.service_tier = options.serviceTier;
725
816
  }
726
817
 
@@ -1095,6 +1186,10 @@ export function convertMessages(
1095
1186
  (assistantMsg as any).reasoning_details = reasoningDetails;
1096
1187
  }
1097
1188
  }
1189
+ // DeepSeek requires non-null content when reasoning_content is present
1190
+ if (assistantMsg.content === null && hasReasoningField) {
1191
+ assistantMsg.content = "";
1192
+ }
1098
1193
  // Skip assistant messages that have no content, no tool calls, and no reasoning payload.
1099
1194
  // Some OpenAI-compatible backends require replaying reasoning-only assistant turns
1100
1195
  // so follow-up requests preserve the provider-specific reasoning field name.
@@ -11,13 +11,13 @@ import {
11
11
  type AssistantMessage,
12
12
  type CacheRetention,
13
13
  type Context,
14
- isSpecialServiceTier,
15
14
  type MessageAttribution,
16
15
  type Model,
17
16
  type ProviderSessionState,
18
17
  type ServiceTier,
19
18
  type StreamFunction,
20
19
  type StreamOptions,
20
+ shouldSendServiceTier,
21
21
  type Tool,
22
22
  type ToolChoice,
23
23
  } from "../types";
@@ -384,7 +384,7 @@ function buildParams(
384
384
  if (options?.repetitionPenalty !== undefined) {
385
385
  params.repetition_penalty = options.repetitionPenalty;
386
386
  }
387
- if (isSpecialServiceTier(options?.serviceTier)) {
387
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
388
388
  params.service_tier = options.serviceTier;
389
389
  }
390
390
 
package/src/stream.ts CHANGED
@@ -72,6 +72,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
72
72
  "opencode-go": "OPENCODE_API_KEY",
73
73
  "opencode-zen": "OPENCODE_API_KEY",
74
74
  cursor: "CURSOR_ACCESS_TOKEN",
75
+ deepseek: "DEEPSEEK_API_KEY",
75
76
  "openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
76
77
  "azure-openai-responses": "AZURE_OPENAI_API_KEY",
77
78
  exa: "EXA_API_KEY",
package/src/types.ts CHANGED
@@ -100,6 +100,7 @@ export type KnownProvider =
100
100
  | "github-copilot"
101
101
  | "gitlab-duo"
102
102
  | "cursor"
103
+ | "deepseek"
103
104
  | "xai"
104
105
  | "groq"
105
106
  | "cerebras"
@@ -151,7 +152,13 @@ export type CacheRetention = "none" | "short" | "long";
151
152
  /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
152
153
  export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
153
154
 
154
- export function isSpecialServiceTier(serviceTier?: ServiceTier | null): serviceTier is "flex" | "scale" | "priority" {
155
+ export function shouldSendServiceTier(
156
+ serviceTier?: ServiceTier | null,
157
+ provider?: Provider,
158
+ ): serviceTier is "flex" | "scale" | "priority" {
159
+ if (provider !== "openai" && provider !== "openai-codex") {
160
+ return false;
161
+ }
155
162
  return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
156
163
  }
157
164
 
@@ -17,13 +17,14 @@ import { isJsonObject } from "./types";
17
17
  */
18
18
  export function StringEnum<const T extends readonly string[]>(
19
19
  values: T,
20
- options?: { description?: string; default?: T[number] },
20
+ options?: { description?: string; default?: T[number]; examples?: readonly T[number][] },
21
21
  ): TUnsafe<T[number]> {
22
22
  return Type.Unsafe<T[number]>({
23
23
  type: "string",
24
24
  enum: values as unknown as string[],
25
25
  ...(options?.description && { description: options.description }),
26
26
  ...(options?.default && { default: options.default }),
27
+ ...(options?.examples && { examples: options.examples }),
27
28
  });
28
29
  }
29
30
 
@@ -141,6 +141,13 @@ function tryParseLeadingJsonContainer(value: string): unknown | undefined {
141
141
  return JSON.parse(cleaned) as unknown;
142
142
  } catch {}
143
143
  }
144
+ // Try escaping raw control chars that appear inside string literals.
145
+ const escapedControls = escapeRawControlsInJsonStrings(prefix);
146
+ if (escapedControls !== prefix) {
147
+ try {
148
+ return JSON.parse(escapedControls) as unknown;
149
+ } catch {}
150
+ }
144
151
  // Also try single-char healing on the extracted prefix.
145
152
  return tryHealMalformedJson(prefix);
146
153
  }
@@ -191,6 +198,72 @@ function cleanLiteralEscapes(value: string): string {
191
198
  }
192
199
  return result;
193
200
  }
201
+ /**
202
+ * Escape raw control characters (0x00–0x1F) that appear *inside* JSON string
203
+ * literals. LLMs sometimes emit literal newlines/tabs/etc. inside string
204
+ * content instead of `\n` / `\t` escape sequences, which `JSON.parse` rejects
205
+ * even though the surrounding structure is valid.
206
+ *
207
+ * This function only rewrites characters while inside a string; structural
208
+ * whitespace outside of strings is preserved unchanged.
209
+ */
210
+ function escapeRawControlsInJsonStrings(value: string): string {
211
+ let result = "";
212
+ let inString = false;
213
+ let escaped = false;
214
+ let changed = false;
215
+ for (let i = 0; i < value.length; i += 1) {
216
+ const ch = value[i];
217
+ if (inString) {
218
+ if (escaped) {
219
+ result += ch;
220
+ escaped = false;
221
+ continue;
222
+ }
223
+ if (ch === "\\") {
224
+ result += ch;
225
+ escaped = true;
226
+ continue;
227
+ }
228
+ if (ch === '"') {
229
+ result += ch;
230
+ inString = false;
231
+ continue;
232
+ }
233
+ const code = ch.charCodeAt(0);
234
+ if (code < 0x20) {
235
+ changed = true;
236
+ switch (ch) {
237
+ case "\n":
238
+ result += "\\n";
239
+ break;
240
+ case "\r":
241
+ result += "\\r";
242
+ break;
243
+ case "\t":
244
+ result += "\\t";
245
+ break;
246
+ case "\b":
247
+ result += "\\b";
248
+ break;
249
+ case "\f":
250
+ result += "\\f";
251
+ break;
252
+ default:
253
+ result += `\\u${code.toString(16).padStart(4, "0")}`;
254
+ }
255
+ continue;
256
+ }
257
+ result += ch;
258
+ continue;
259
+ }
260
+ if (ch === '"') {
261
+ inString = true;
262
+ }
263
+ result += ch;
264
+ }
265
+ return changed ? result : value;
266
+ }
194
267
 
195
268
  /** Maximum single-character edits to attempt when healing malformed JSON. */
196
269
  const MAX_HEAL_DISTANCE = 3;
@@ -286,6 +359,17 @@ function tryParseJsonForTypes(value: string, expectedTypes: string[]): { value:
286
359
  }
287
360
  } catch {
288
361
  if (looksJsonObject || looksJsonArray) {
362
+ // Try escaping raw control chars inside string literals (LLMs sometimes
363
+ // emit literal newlines/tabs inside string content rather than `\n`/`\t`).
364
+ const escapedControls = escapeRawControlsInJsonStrings(trimmed);
365
+ if (escapedControls !== trimmed) {
366
+ try {
367
+ const parsed = JSON.parse(escapedControls) as unknown;
368
+ if (matchesExpectedType(parsed, expectedTypes)) {
369
+ return { value: parsed, changed: true };
370
+ }
371
+ } catch {}
372
+ }
289
373
  // Try extracting a valid JSON prefix (handles trailing junk after balanced container)
290
374
  const leading = tryParseLeadingJsonContainer(trimmed);
291
375
  if (leading !== undefined && matchesExpectedType(leading, expectedTypes)) {
@@ -501,6 +585,26 @@ function normalizeOptionalNullsForSchema(schema: unknown, value: unknown): { val
501
585
  nextValue[key] = normalized.value;
502
586
  }
503
587
 
588
+ // Strip unknown keys with null/"null" values when the schema forbids extras.
589
+ // LLMs sometimes hallucinate verbs alongside valid ones (e.g. `split: null`,
590
+ // `original: null`). Rejecting the entire tool call wastes a turn; treating
591
+ // these the same as null on known optional fields is a safer fallback. Keys
592
+ // with non-null unknown values are left intact so genuine schema mistakes
593
+ // still surface as validation errors.
594
+ if (schemaObject.additionalProperties === false) {
595
+ const knownKeys = new Set(Object.keys(properties));
596
+ for (const key of Object.keys(nextValue)) {
597
+ if (knownKeys.has(key)) continue;
598
+ const v = nextValue[key];
599
+ if (v !== null && v !== "null") continue;
600
+ if (!changed) {
601
+ nextValue = { ...nextValue };
602
+ changed = true;
603
+ }
604
+ delete nextValue[key];
605
+ }
606
+ }
607
+
504
608
  return { value: changed ? nextValue : value, changed };
505
609
  }
506
610