npm - @oh-my-pi/pi-ai - Versions diffs - 14.2.1 → 14.4.0 - Mend

@oh-my-pi/pi-ai 14.2.1 → 14.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +35 -1
package/package.json +16 -16
package/src/auth-storage.ts +18 -3
package/src/model-thinking.ts +27 -8
package/src/models.json +61 -10
package/src/provider-models/openai-compat.ts +56 -18
package/src/providers/anthropic.ts +409 -26
package/src/providers/cursor.ts +98 -12
package/src/providers/openai-codex-responses.ts +2 -24
package/src/providers/openai-completions.ts +99 -4
package/src/stream.ts +1 -0
package/src/types.ts +1 -0
package/src/utils/schema/strict-mode.ts +2 -1
package/src/utils/validation.ts +84 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,40 @@
 ## [Unreleased]
+## [14.4.0] - 2026-04-26
+### Added
+- Added an `examples` option to `StringEnum` to include example values in the generated schema
+### Changed
+- Changed Anthropic tool schema generation to strip unsupported schema fields (including `patternProperties`), add `additionalProperties: false` for object types, and apply Anthropic strict-mode limits when marking tools as strict
+- Changed Anthropic strict tool planning to cap strict `tools` at twenty entries and convert excess optional/union parameters to nullable schemas to stay within provider constraints
+### Fixed
+- Fixed Anthropic tool schema compilation failures by keeping the `write` tool out of the strict-tool allowlist when the full coding-agent tool set is active
+- Fixed Anthropic 400 `tools.*.custom: For 'object' type, property 'minItems' is not supported` by stripping `minItems` from object-shaped JSON schema nodes (array nodes still keep supported `minItems` values)
+- Fixed Anthropic tool schemas that used tuple-style arrays by stripping unsupported `maxItems` and only preserving provider-supported `minItems` values
+- Fixed Anthropic and OpenRouter Anthropic tool calls that previously failed with `compiled grammar is too large` by retrying automatically without strict tool schemas and reusing non-strict mode for subsequent requests in the same provider session
+- Fixed parsing of JSON tool arguments containing raw control characters inside string values (such as embedded newlines) by escaping them before JSON parsing
+- Fixed `validateToolArguments` to accept stringified objects and arrays that include literal control characters inside string fields
+- Fixed OpenAI Codex Spark OAuth selection to fall back to non-Pro accounts when no ChatGPT Pro account is connected, so users without a Pro account can still attempt Spark requests in case the server permits access.
+## [14.3.0] - 2026-04-25
+### Added
+- Added support for Claude Opus 4.7 (`claude-opus-4-7`) model ([#726](https://github.com/can1357/oh-my-pi/issues/726))
+  - Suppresses sampling parameters (temperature/top_p/top_k) that Opus 4.7 rejects
+  - Enables `display: "summarized"` for adaptive thinking to restore visible thinking content
+### Fixed
+- Fixed Cursor provider losing conversation history on follow-up turns (model responding "this appears to be the start of our session") by populating `ConversationStateStructure.rootPromptMessagesJson` with JSON blob IDs for the system prompt plus prior user/assistant/tool-result messages. Cursor's server builds the model prompt from `rootPromptMessagesJson`, not from the protobuf `turns[]` tree, so sending only the system prompt there caused prior turns to be dropped
+- Fixed Cursor provider multi-turn conversations failing with `Connect error internal: Blob not found` on the second message by storing `ConversationStateStructure.turns`, `AgentConversationTurnStructure.user_message`, and `AgentConversationTurnStructure.steps` as content-addressed blob IDs in the KV store (matching the existing handling for `rootPromptMessagesJson`) rather than sending the raw serialized bytes inline ([#678](https://github.com/can1357/oh-my-pi/issues/678))
 ## [14.2.1] - 2026-04-24
 ### Fixed
@@ -2097,4 +2131,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
 ## [0.9.4] - 2025-11-26
-Initial release with multi-provider LLM support.
+Initial release with multi-provider LLM support.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "14.2.1",
+	"version": "14.4.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -41,24 +41,24 @@
 		"generate-models": "bun scripts/generate-models.ts"
 	},
 	"dependencies": {
-		"@anthropic-ai/sdk": "^0.78",
-		"@aws-sdk/client-bedrock-runtime": "^3",
-		"@aws-sdk/credential-provider-node": "^3",
-		"@bufbuild/protobuf": "^2.11",
-		"@google/genai": "^1.43",
-		"@oh-my-pi/pi-natives": "14.2.1",
-		"@oh-my-pi/pi-utils": "14.2.1",
-		"@sinclair/typebox": "^0.34",
-		"@smithy/node-http-handler": "^4.4",
-		"ajv": "^8.18",
-		"ajv-formats": "^3.0",
-		"openai": "^6.25",
-		"partial-json": "^0.1",
-		"proxy-agent": "^6.5",
+		"@anthropic-ai/sdk": "^0.91.1",
+		"@aws-sdk/client-bedrock-runtime": "^3.1037.0",
+		"@aws-sdk/credential-provider-node": "^3.972.36",
+		"@bufbuild/protobuf": "^2.12.0",
+		"@google/genai": "^1.50.1",
+		"@oh-my-pi/pi-natives": "14.4.0",
+		"@oh-my-pi/pi-utils": "14.4.0",
+		"@sinclair/typebox": "^0.34.49",
+		"@smithy/node-http-handler": "^4.6.1",
+		"ajv": "^8.20.0",
+		"ajv-formats": "^3.0.1",
+		"openai": "^6.34.0",
+		"partial-json": "^0.1.7",
+		"proxy-agent": "^8.0.1",
 		"zod": "4.3.6"
 	},
 	"devDependencies": {
-		"@types/bun": "^1.3"
+		"@types/bun": "^1.3.13"
 	},
 	"engines": {
 		"bun": ">=1.3.7"

package/src/auth-storage.ts CHANGED Viewed

@@ -1711,6 +1711,11 @@ export class AuthStorage {
 			}),
 		);
+		// Skip the Pro-plan filter when no candidate is confirmed Pro, so users with only
+		// non-Pro accounts can still attempt Spark requests (e.g. trial/grandfathered access).
+		const enforceProRequirement =
+			requiresProModel && candidates.some(candidate => hasOpenAICodexProPlan(candidate.usage));
 		const fallback = candidates[0];
 		for (const candidate of candidates) {
@@ -1719,6 +1724,7 @@ export class AuthStorage {
 				allowBlocked: false,
 				prefetchedUsage: candidate.usage,
 				usagePrechecked: candidate.usageChecked,
+				enforceProRequirement,
 			});
 			if (apiKey) return apiKey;
 		}
@@ -1729,6 +1735,7 @@ export class AuthStorage {
 				allowBlocked: true,
 				prefetchedUsage: fallback.usage,
 				usagePrechecked: fallback.usageChecked,
+				enforceProRequirement,
 			});
 		}
@@ -1774,14 +1781,22 @@ export class AuthStorage {
 			allowBlocked: boolean;
 			prefetchedUsage?: UsageReport | null;
 			usagePrechecked?: boolean;
+			enforceProRequirement?: boolean;
 		},
 	): Promise<string | undefined> {
-		const { checkUsage, allowBlocked, prefetchedUsage = null, usagePrechecked = false } = usageOptions;
+		const {
+			checkUsage,
+			allowBlocked,
+			prefetchedUsage = null,
+			usagePrechecked = false,
+			enforceProRequirement,
+		} = usageOptions;
 		if (!allowBlocked && this.#isCredentialBlocked(providerKey, selection.index)) {
 			return undefined;
 		}
 		const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
+		const applyProFilter = enforceProRequirement ?? requiresProModel;
 		let usage: UsageReport | null = null;
 		let usageChecked = false;
@@ -1796,7 +1811,7 @@ export class AuthStorage {
 				});
 				usageChecked = true;
 			}
-			if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
+			if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
 				return undefined;
 			}
 			if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
@@ -1846,7 +1861,7 @@ export class AuthStorage {
 					});
 					usageChecked = true;
 				}
-				if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
+				if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
 					return undefined;
 				}
 				if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {

package/src/model-thinking.ts CHANGED Viewed

@@ -154,19 +154,27 @@ export function applyGeneratedModelPolicies(models: ApiModel<Api>[]): void {
 }
 /**
- * Link `-spark` model variants to their base models for context promotion.
+ * Link OpenAI model variants to their context promotion targets.
  *
- * When a spark model's context is exhausted, the agent can promote to the
- * corresponding full model. This sets `contextPromotionTarget` on each
- * spark variant that has a matching base model.
+ * When a model's context is exhausted, the agent can promote to a sibling
+ * model with a larger context window on the same provider:
+ * - `-spark` variants promote to `gpt-5.5`.
+ * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
  */
-export function linkSparkPromotionTargets(models: ApiModel<Api>[]): void {
+export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
 	for (const candidate of models) {
 		const parsedCandidate = parseKnownModel(candidate.id);
-		if (parsedCandidate.family !== "openai" || parsedCandidate.variant !== "codex-spark") continue;
-		const baseId = candidate.id.slice(0, -"-spark".length);
+		if (parsedCandidate.family !== "openai") continue;
+		let targetId: string | undefined;
+		if (parsedCandidate.variant === "codex-spark") {
+			targetId = "gpt-5.5";
+		} else if (parsedCandidate.variant === "base" && semverEqual(parsedCandidate.version, "5.5")) {
+			targetId = "gpt-5.4";
+		} else {
+			continue;
+		}
 		const fallback = models.find(
-			model => model.provider === candidate.provider && model.api === candidate.api && model.id === baseId,
+			model => model.provider === candidate.provider && model.api === candidate.api && model.id === targetId,
 		);
 		if (!fallback) continue;
 		candidate.contextPromotionTarget = `${fallback.provider}/${fallback.id}`;
@@ -283,6 +291,17 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
 	}
 }
+/**
+ * Returns true for Anthropic models with Opus 4.7 API restrictions:
+ * - Sampling parameters (temperature/top_p/top_k) return 400 error
+ * - Thinking content is omitted by default (needs display: "summarized")
+ */
+export function hasOpus47ApiRestrictions(modelId: string): boolean {
+	const parsed = parseAnthropicModel(getCanonicalModelId(modelId));
+	if (!parsed) return false;
+	return semverGte(parsed.version, "4.7") && parsed.kind === "opus";
+}
 function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
 	if (model.api !== "anthropic-messages") return false;
 	const parsedModel = parseKnownModel(model.id);

package/src/models.json CHANGED Viewed

@@ -4751,6 +4751,56 @@
 			}
 		}
 	},
+	"deepseek": {
+		"deepseek-v4-flash": {
+			"id": "deepseek-v4-flash",
+			"name": "DeepSeek V4 Flash",
+			"api": "openai-completions",
+			"provider": "deepseek",
+			"baseUrl": "https://api.deepseek.com",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0.14,
+				"output": 0.28,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 1000000,
+			"maxTokens": 384000,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		},
+		"deepseek-v4-pro": {
+			"id": "deepseek-v4-pro",
+			"name": "DeepSeek V4 Pro",
+			"api": "openai-completions",
+			"provider": "deepseek",
+			"baseUrl": "https://api.deepseek.com",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 1.74,
+				"output": 3.48,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 1000000,
+			"maxTokens": 384000,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "high"
+			}
+		}
+	},
 	"github-copilot": {
 		"claude-haiku-4.5": {
 			"id": "claude-haiku-4.5",
@@ -16931,7 +16981,7 @@
 			},
 			"contextWindow": 128000,
 			"maxTokens": 128000,
-			"contextPromotionTarget": "litellm/gpt-5.3-codex",
+			"contextPromotionTarget": "litellm/gpt-5.5",
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "low",
@@ -17011,7 +17061,8 @@
 				"mode": "effort",
 				"minLevel": "low",
 				"maxLevel": "xhigh"
-			}
+			},
+			"contextPromotionTarget": "litellm/gpt-5.4"
 		},
 		"gpt-image-2": {
 			"id": "gpt-image-2",
@@ -32938,7 +32989,7 @@
 				"maxLevel": "xhigh"
 			},
 			"applyPatchToolType": "freeform",
-			"contextPromotionTarget": "openai/gpt-5.3-codex"
+			"contextPromotionTarget": "openai/gpt-5.5"
 		},
 		"gpt-5.4": {
 			"id": "gpt-5.4",
@@ -33068,7 +33119,8 @@
 				"minLevel": "low",
 				"maxLevel": "xhigh"
 			},
-			"applyPatchToolType": "freeform"
+			"applyPatchToolType": "freeform",
+			"contextPromotionTarget": "openai/gpt-5.4"
 		},
 		"o1": {
 			"id": "o1",
@@ -33597,7 +33649,7 @@
 			},
 			"contextWindow": 128000,
 			"maxTokens": 128000,
-			"contextPromotionTarget": "openai-codex/gpt-5.3-codex",
+			"contextPromotionTarget": "openai-codex/gpt-5.5",
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "low",
@@ -33715,7 +33767,8 @@
 				"minLevel": "low",
 				"maxLevel": "xhigh"
 			},
-			"applyPatchToolType": "freeform"
+			"applyPatchToolType": "freeform",
+			"contextPromotionTarget": "openai-codex/gpt-5.4"
 		}
 	},
 	"opencode": {
@@ -33765,8 +33818,7 @@
 				"mode": "effort",
 				"minLevel": "low",
 				"maxLevel": "xhigh"
-			},
-			"contextPromotionTarget": "opencode/gpt-5.3-codex"
+			}
 		},
 		"gpt-5.4": {
 			"id": "gpt-5.4",
@@ -34828,8 +34880,7 @@
 				"mode": "effort",
 				"minLevel": "low",
 				"maxLevel": "xhigh"
-			},
-			"contextPromotionTarget": "opencode-zen/gpt-5.3-codex"
+			}
 		},
 		"gpt-5.4": {
 			"id": "gpt-5.4",

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -246,26 +246,64 @@ async function fetchOllamaNativeModels(baseUrl: string): Promise<Model<"openai-r
 	}
 	const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
 	const entries = payload.models ?? [];
-	const models: Model<"openai-responses">[] = [];
-	for (const entry of entries) {
-		const id = entry.model ?? entry.name;
-		if (!id) {
-			continue;
-		}
-		models.push({
-			id,
-			name: entry.name ?? id,
-			api: "openai-responses",
-			provider: "ollama",
-			baseUrl,
-			reasoning: false,
-			input: ["text"],
-			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-			contextWindow: 128000,
-			maxTokens: 8192,
+	const resolved = await Promise.all(
+		entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
+			const id = entry.model ?? entry.name;
+			if (!id) return null;
+			const { contextWindow, maxTokens } = await fetchOllamaModelLimits(nativeBaseUrl, id);
+			return {
+				id,
+				name: entry.name ?? id,
+				api: "openai-responses",
+				provider: "ollama",
+				baseUrl,
+				reasoning: false,
+				input: ["text"],
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+				contextWindow,
+				maxTokens,
+			};
+		}),
+	);
+	const models: Model<"openai-responses">[] = resolved.filter((m): m is Model<"openai-responses"> => m !== null);
+	return models.sort((left, right) => left.id.localeCompare(right.id));
+}
+/** Ollama's default `num_ctx` when the runtime request does not override it. */
+const OLLAMA_DEFAULT_CONTEXT_WINDOW = 4096;
+/** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
+const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
+/**
+ * Query Ollama's `/api/show` endpoint for a single model and pull its native
+ * context length out of `model_info.<arch>.context_length`. Falls back to
+ * Ollama's default context window when the endpoint or field is unavailable
+ * so discovery still succeeds against older Ollama builds.
+ */
+async function fetchOllamaModelLimits(
+	nativeBaseUrl: string,
+	modelId: string,
+): Promise<{ contextWindow: number; maxTokens: number }> {
+	try {
+		const response = await fetch(`${nativeBaseUrl}/api/show`, {
+			method: "POST",
+			headers: { "Content-Type": "application/json", Accept: "application/json" },
+			body: JSON.stringify({ model: modelId }),
 		});
+		if (!response.ok) {
+			return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
+		}
+		const payload = (await response.json()) as { model_info?: Record<string, unknown> };
+		const info = payload.model_info ?? {};
+		for (const [key, value] of Object.entries(info)) {
+			if (key.endsWith(".context_length") && typeof value === "number" && value > 0) {
+				return { contextWindow: value, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
+			}
+		}
+	} catch {
+		// fall through to default
 	}
-	return models.sort((left, right) => left.id.localeCompare(right.id));
+	return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
 }
 const OPENAI_NON_RESPONSES_PREFIXES = [