@oh-my-pi/pi-ai 14.2.1 → 14.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,40 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.4.0] - 2026-04-26
6
+
7
+ ### Added
8
+
9
+ - Added an `examples` option to `StringEnum` to include example values in the generated schema
10
+
11
+ ### Changed
12
+
13
+ - Changed Anthropic tool schema generation to strip unsupported schema fields (including `patternProperties`), add `additionalProperties: false` for object types, and apply Anthropic strict-mode limits when marking tools as strict
14
+ - Changed Anthropic strict tool planning to cap strict `tools` at twenty entries and convert excess optional/union parameters to nullable schemas to stay within provider constraints
15
+
16
+ ### Fixed
17
+
18
+ - Fixed Anthropic tool schema compilation failures by keeping the `write` tool out of the strict-tool allowlist when the full coding-agent tool set is active
19
+ - Fixed Anthropic 400 `tools.*.custom: For 'object' type, property 'minItems' is not supported` by stripping `minItems` from object-shaped JSON schema nodes (array nodes still keep supported `minItems` values)
20
+ - Fixed Anthropic tool schemas that used tuple-style arrays by stripping unsupported `maxItems` and only preserving provider-supported `minItems` values
21
+ - Fixed Anthropic and OpenRouter Anthropic tool calls that previously failed with `compiled grammar is too large` by retrying automatically without strict tool schemas and reusing non-strict mode for subsequent requests in the same provider session
22
+ - Fixed parsing of JSON tool arguments containing raw control characters inside string values (such as embedded newlines) by escaping them before JSON parsing
23
+ - Fixed `validateToolArguments` to accept stringified objects and arrays that include literal control characters inside string fields
24
+ - Fixed OpenAI Codex Spark OAuth selection to fall back to non-Pro accounts when no ChatGPT Pro account is connected, so users without a Pro account can still attempt Spark requests in case the server permits access.
25
+
26
+ ## [14.3.0] - 2026-04-25
27
+
28
+ ### Added
29
+
30
+ - Added support for Claude Opus 4.7 (`claude-opus-4-7`) model ([#726](https://github.com/can1357/oh-my-pi/issues/726))
31
+ - Suppresses sampling parameters (temperature/top_p/top_k) that Opus 4.7 rejects
32
+ - Enables `display: "summarized"` for adaptive thinking to restore visible thinking content
33
+
34
+ ### Fixed
35
+
36
+ - Fixed Cursor provider losing conversation history on follow-up turns (model responding "this appears to be the start of our session") by populating `ConversationStateStructure.rootPromptMessagesJson` with JSON blob IDs for the system prompt plus prior user/assistant/tool-result messages. Cursor's server builds the model prompt from `rootPromptMessagesJson`, not from the protobuf `turns[]` tree, so sending only the system prompt there caused prior turns to be dropped
37
+ - Fixed Cursor provider multi-turn conversations failing with `Connect error internal: Blob not found` on the second message by storing `ConversationStateStructure.turns`, `AgentConversationTurnStructure.user_message`, and `AgentConversationTurnStructure.steps` as content-addressed blob IDs in the KV store (matching the existing handling for `rootPromptMessagesJson`) rather than sending the raw serialized bytes inline ([#678](https://github.com/can1357/oh-my-pi/issues/678))
38
+
5
39
  ## [14.2.1] - 2026-04-24
6
40
 
7
41
  ### Fixed
@@ -2097,4 +2131,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
2097
2131
 
2098
2132
  ## [0.9.4] - 2025-11-26
2099
2133
 
2100
- Initial release with multi-provider LLM support.
2134
+ Initial release with multi-provider LLM support.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.2.1",
4
+ "version": "14.4.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -41,24 +41,24 @@
41
41
  "generate-models": "bun scripts/generate-models.ts"
42
42
  },
43
43
  "dependencies": {
44
- "@anthropic-ai/sdk": "^0.78",
45
- "@aws-sdk/client-bedrock-runtime": "^3",
46
- "@aws-sdk/credential-provider-node": "^3",
47
- "@bufbuild/protobuf": "^2.11",
48
- "@google/genai": "^1.43",
49
- "@oh-my-pi/pi-natives": "14.2.1",
50
- "@oh-my-pi/pi-utils": "14.2.1",
51
- "@sinclair/typebox": "^0.34",
52
- "@smithy/node-http-handler": "^4.4",
53
- "ajv": "^8.18",
54
- "ajv-formats": "^3.0",
55
- "openai": "^6.25",
56
- "partial-json": "^0.1",
57
- "proxy-agent": "^6.5",
44
+ "@anthropic-ai/sdk": "^0.91.1",
45
+ "@aws-sdk/client-bedrock-runtime": "^3.1037.0",
46
+ "@aws-sdk/credential-provider-node": "^3.972.36",
47
+ "@bufbuild/protobuf": "^2.12.0",
48
+ "@google/genai": "^1.50.1",
49
+ "@oh-my-pi/pi-natives": "14.4.0",
50
+ "@oh-my-pi/pi-utils": "14.4.0",
51
+ "@sinclair/typebox": "^0.34.49",
52
+ "@smithy/node-http-handler": "^4.6.1",
53
+ "ajv": "^8.20.0",
54
+ "ajv-formats": "^3.0.1",
55
+ "openai": "^6.34.0",
56
+ "partial-json": "^0.1.7",
57
+ "proxy-agent": "^8.0.1",
58
58
  "zod": "4.3.6"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3"
61
+ "@types/bun": "^1.3.13"
62
62
  },
63
63
  "engines": {
64
64
  "bun": ">=1.3.7"
@@ -1711,6 +1711,11 @@ export class AuthStorage {
1711
1711
  }),
1712
1712
  );
1713
1713
 
1714
+ // Skip the Pro-plan filter when no candidate is confirmed Pro, so users with only
1715
+ // non-Pro accounts can still attempt Spark requests (e.g. trial/grandfathered access).
1716
+ const enforceProRequirement =
1717
+ requiresProModel && candidates.some(candidate => hasOpenAICodexProPlan(candidate.usage));
1718
+
1714
1719
  const fallback = candidates[0];
1715
1720
 
1716
1721
  for (const candidate of candidates) {
@@ -1719,6 +1724,7 @@ export class AuthStorage {
1719
1724
  allowBlocked: false,
1720
1725
  prefetchedUsage: candidate.usage,
1721
1726
  usagePrechecked: candidate.usageChecked,
1727
+ enforceProRequirement,
1722
1728
  });
1723
1729
  if (apiKey) return apiKey;
1724
1730
  }
@@ -1729,6 +1735,7 @@ export class AuthStorage {
1729
1735
  allowBlocked: true,
1730
1736
  prefetchedUsage: fallback.usage,
1731
1737
  usagePrechecked: fallback.usageChecked,
1738
+ enforceProRequirement,
1732
1739
  });
1733
1740
  }
1734
1741
 
@@ -1774,14 +1781,22 @@ export class AuthStorage {
1774
1781
  allowBlocked: boolean;
1775
1782
  prefetchedUsage?: UsageReport | null;
1776
1783
  usagePrechecked?: boolean;
1784
+ enforceProRequirement?: boolean;
1777
1785
  },
1778
1786
  ): Promise<string | undefined> {
1779
- const { checkUsage, allowBlocked, prefetchedUsage = null, usagePrechecked = false } = usageOptions;
1787
+ const {
1788
+ checkUsage,
1789
+ allowBlocked,
1790
+ prefetchedUsage = null,
1791
+ usagePrechecked = false,
1792
+ enforceProRequirement,
1793
+ } = usageOptions;
1780
1794
  if (!allowBlocked && this.#isCredentialBlocked(providerKey, selection.index)) {
1781
1795
  return undefined;
1782
1796
  }
1783
1797
 
1784
1798
  const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
1799
+ const applyProFilter = enforceProRequirement ?? requiresProModel;
1785
1800
  let usage: UsageReport | null = null;
1786
1801
  let usageChecked = false;
1787
1802
 
@@ -1796,7 +1811,7 @@ export class AuthStorage {
1796
1811
  });
1797
1812
  usageChecked = true;
1798
1813
  }
1799
- if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
1814
+ if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
1800
1815
  return undefined;
1801
1816
  }
1802
1817
  if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
@@ -1846,7 +1861,7 @@ export class AuthStorage {
1846
1861
  });
1847
1862
  usageChecked = true;
1848
1863
  }
1849
- if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
1864
+ if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
1850
1865
  return undefined;
1851
1866
  }
1852
1867
  if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
@@ -154,19 +154,27 @@ export function applyGeneratedModelPolicies(models: ApiModel<Api>[]): void {
154
154
  }
155
155
 
156
156
  /**
157
- * Link `-spark` model variants to their base models for context promotion.
157
+ * Link OpenAI model variants to their context promotion targets.
158
158
  *
159
- * When a spark model's context is exhausted, the agent can promote to the
160
- * corresponding full model. This sets `contextPromotionTarget` on each
161
- * spark variant that has a matching base model.
159
+ * When a model's context is exhausted, the agent can promote to a sibling
160
+ * model with a larger context window on the same provider:
161
+ * - `-spark` variants promote to `gpt-5.5`.
162
+ * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
162
163
  */
163
- export function linkSparkPromotionTargets(models: ApiModel<Api>[]): void {
164
+ export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
164
165
  for (const candidate of models) {
165
166
  const parsedCandidate = parseKnownModel(candidate.id);
166
- if (parsedCandidate.family !== "openai" || parsedCandidate.variant !== "codex-spark") continue;
167
- const baseId = candidate.id.slice(0, -"-spark".length);
167
+ if (parsedCandidate.family !== "openai") continue;
168
+ let targetId: string | undefined;
169
+ if (parsedCandidate.variant === "codex-spark") {
170
+ targetId = "gpt-5.5";
171
+ } else if (parsedCandidate.variant === "base" && semverEqual(parsedCandidate.version, "5.5")) {
172
+ targetId = "gpt-5.4";
173
+ } else {
174
+ continue;
175
+ }
168
176
  const fallback = models.find(
169
- model => model.provider === candidate.provider && model.api === candidate.api && model.id === baseId,
177
+ model => model.provider === candidate.provider && model.api === candidate.api && model.id === targetId,
170
178
  );
171
179
  if (!fallback) continue;
172
180
  candidate.contextPromotionTarget = `${fallback.provider}/${fallback.id}`;
@@ -283,6 +291,17 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
283
291
  }
284
292
  }
285
293
 
294
+ /**
295
+ * Returns true for Anthropic models with Opus 4.7 API restrictions:
296
+ * - Sampling parameters (temperature/top_p/top_k) return 400 error
297
+ * - Thinking content is omitted by default (needs display: "summarized")
298
+ */
299
+ export function hasOpus47ApiRestrictions(modelId: string): boolean {
300
+ const parsed = parseAnthropicModel(getCanonicalModelId(modelId));
301
+ if (!parsed) return false;
302
+ return semverGte(parsed.version, "4.7") && parsed.kind === "opus";
303
+ }
304
+
286
305
  function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
287
306
  if (model.api !== "anthropic-messages") return false;
288
307
  const parsedModel = parseKnownModel(model.id);
package/src/models.json CHANGED
@@ -4751,6 +4751,56 @@
4751
4751
  }
4752
4752
  }
4753
4753
  },
4754
+ "deepseek": {
4755
+ "deepseek-v4-flash": {
4756
+ "id": "deepseek-v4-flash",
4757
+ "name": "DeepSeek V4 Flash",
4758
+ "api": "openai-completions",
4759
+ "provider": "deepseek",
4760
+ "baseUrl": "https://api.deepseek.com",
4761
+ "reasoning": true,
4762
+ "input": [
4763
+ "text"
4764
+ ],
4765
+ "cost": {
4766
+ "input": 0.14,
4767
+ "output": 0.28,
4768
+ "cacheRead": 0,
4769
+ "cacheWrite": 0
4770
+ },
4771
+ "contextWindow": 1000000,
4772
+ "maxTokens": 384000,
4773
+ "thinking": {
4774
+ "mode": "effort",
4775
+ "minLevel": "minimal",
4776
+ "maxLevel": "high"
4777
+ }
4778
+ },
4779
+ "deepseek-v4-pro": {
4780
+ "id": "deepseek-v4-pro",
4781
+ "name": "DeepSeek V4 Pro",
4782
+ "api": "openai-completions",
4783
+ "provider": "deepseek",
4784
+ "baseUrl": "https://api.deepseek.com",
4785
+ "reasoning": true,
4786
+ "input": [
4787
+ "text"
4788
+ ],
4789
+ "cost": {
4790
+ "input": 1.74,
4791
+ "output": 3.48,
4792
+ "cacheRead": 0,
4793
+ "cacheWrite": 0
4794
+ },
4795
+ "contextWindow": 1000000,
4796
+ "maxTokens": 384000,
4797
+ "thinking": {
4798
+ "mode": "effort",
4799
+ "minLevel": "minimal",
4800
+ "maxLevel": "high"
4801
+ }
4802
+ }
4803
+ },
4754
4804
  "github-copilot": {
4755
4805
  "claude-haiku-4.5": {
4756
4806
  "id": "claude-haiku-4.5",
@@ -16931,7 +16981,7 @@
16931
16981
  },
16932
16982
  "contextWindow": 128000,
16933
16983
  "maxTokens": 128000,
16934
- "contextPromotionTarget": "litellm/gpt-5.3-codex",
16984
+ "contextPromotionTarget": "litellm/gpt-5.5",
16935
16985
  "thinking": {
16936
16986
  "mode": "effort",
16937
16987
  "minLevel": "low",
@@ -17011,7 +17061,8 @@
17011
17061
  "mode": "effort",
17012
17062
  "minLevel": "low",
17013
17063
  "maxLevel": "xhigh"
17014
- }
17064
+ },
17065
+ "contextPromotionTarget": "litellm/gpt-5.4"
17015
17066
  },
17016
17067
  "gpt-image-2": {
17017
17068
  "id": "gpt-image-2",
@@ -32938,7 +32989,7 @@
32938
32989
  "maxLevel": "xhigh"
32939
32990
  },
32940
32991
  "applyPatchToolType": "freeform",
32941
- "contextPromotionTarget": "openai/gpt-5.3-codex"
32992
+ "contextPromotionTarget": "openai/gpt-5.5"
32942
32993
  },
32943
32994
  "gpt-5.4": {
32944
32995
  "id": "gpt-5.4",
@@ -33068,7 +33119,8 @@
33068
33119
  "minLevel": "low",
33069
33120
  "maxLevel": "xhigh"
33070
33121
  },
33071
- "applyPatchToolType": "freeform"
33122
+ "applyPatchToolType": "freeform",
33123
+ "contextPromotionTarget": "openai/gpt-5.4"
33072
33124
  },
33073
33125
  "o1": {
33074
33126
  "id": "o1",
@@ -33597,7 +33649,7 @@
33597
33649
  },
33598
33650
  "contextWindow": 128000,
33599
33651
  "maxTokens": 128000,
33600
- "contextPromotionTarget": "openai-codex/gpt-5.3-codex",
33652
+ "contextPromotionTarget": "openai-codex/gpt-5.5",
33601
33653
  "thinking": {
33602
33654
  "mode": "effort",
33603
33655
  "minLevel": "low",
@@ -33715,7 +33767,8 @@
33715
33767
  "minLevel": "low",
33716
33768
  "maxLevel": "xhigh"
33717
33769
  },
33718
- "applyPatchToolType": "freeform"
33770
+ "applyPatchToolType": "freeform",
33771
+ "contextPromotionTarget": "openai-codex/gpt-5.4"
33719
33772
  }
33720
33773
  },
33721
33774
  "opencode": {
@@ -33765,8 +33818,7 @@
33765
33818
  "mode": "effort",
33766
33819
  "minLevel": "low",
33767
33820
  "maxLevel": "xhigh"
33768
- },
33769
- "contextPromotionTarget": "opencode/gpt-5.3-codex"
33821
+ }
33770
33822
  },
33771
33823
  "gpt-5.4": {
33772
33824
  "id": "gpt-5.4",
@@ -34828,8 +34880,7 @@
34828
34880
  "mode": "effort",
34829
34881
  "minLevel": "low",
34830
34882
  "maxLevel": "xhigh"
34831
- },
34832
- "contextPromotionTarget": "opencode-zen/gpt-5.3-codex"
34883
+ }
34833
34884
  },
34834
34885
  "gpt-5.4": {
34835
34886
  "id": "gpt-5.4",
@@ -246,26 +246,64 @@ async function fetchOllamaNativeModels(baseUrl: string): Promise<Model<"openai-r
246
246
  }
247
247
  const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
248
248
  const entries = payload.models ?? [];
249
- const models: Model<"openai-responses">[] = [];
250
- for (const entry of entries) {
251
- const id = entry.model ?? entry.name;
252
- if (!id) {
253
- continue;
254
- }
255
- models.push({
256
- id,
257
- name: entry.name ?? id,
258
- api: "openai-responses",
259
- provider: "ollama",
260
- baseUrl,
261
- reasoning: false,
262
- input: ["text"],
263
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
264
- contextWindow: 128000,
265
- maxTokens: 8192,
249
+ const resolved = await Promise.all(
250
+ entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
251
+ const id = entry.model ?? entry.name;
252
+ if (!id) return null;
253
+ const { contextWindow, maxTokens } = await fetchOllamaModelLimits(nativeBaseUrl, id);
254
+ return {
255
+ id,
256
+ name: entry.name ?? id,
257
+ api: "openai-responses",
258
+ provider: "ollama",
259
+ baseUrl,
260
+ reasoning: false,
261
+ input: ["text"],
262
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
263
+ contextWindow,
264
+ maxTokens,
265
+ };
266
+ }),
267
+ );
268
+ const models: Model<"openai-responses">[] = resolved.filter((m): m is Model<"openai-responses"> => m !== null);
269
+ return models.sort((left, right) => left.id.localeCompare(right.id));
270
+ }
271
+
272
+ /** Ollama's default `num_ctx` when the runtime request does not override it. */
273
+ const OLLAMA_DEFAULT_CONTEXT_WINDOW = 4096;
274
+ /** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
275
+ const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
276
+
277
+ /**
278
+ * Query Ollama's `/api/show` endpoint for a single model and pull its native
279
+ * context length out of `model_info.<arch>.context_length`. Falls back to
280
+ * Ollama's default context window when the endpoint or field is unavailable
281
+ * so discovery still succeeds against older Ollama builds.
282
+ */
283
+ async function fetchOllamaModelLimits(
284
+ nativeBaseUrl: string,
285
+ modelId: string,
286
+ ): Promise<{ contextWindow: number; maxTokens: number }> {
287
+ try {
288
+ const response = await fetch(`${nativeBaseUrl}/api/show`, {
289
+ method: "POST",
290
+ headers: { "Content-Type": "application/json", Accept: "application/json" },
291
+ body: JSON.stringify({ model: modelId }),
266
292
  });
293
+ if (!response.ok) {
294
+ return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
295
+ }
296
+ const payload = (await response.json()) as { model_info?: Record<string, unknown> };
297
+ const info = payload.model_info ?? {};
298
+ for (const [key, value] of Object.entries(info)) {
299
+ if (key.endsWith(".context_length") && typeof value === "number" && value > 0) {
300
+ return { contextWindow: value, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
301
+ }
302
+ }
303
+ } catch {
304
+ // fall through to default
267
305
  }
268
- return models.sort((left, right) => left.id.localeCompare(right.id));
306
+ return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
269
307
  }
270
308
 
271
309
  const OPENAI_NON_RESPONSES_PREFIXES = [