npm - @oh-my-pi/pi-ai - Versions diffs - 14.2.1 → 14.3.0 - Mend

@oh-my-pi/pi-ai 14.2.1 → 14.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +13 -0
package/package.json +16 -16
package/src/model-thinking.ts +27 -8
package/src/models.json +11 -10
package/src/provider-models/openai-compat.ts +56 -18
package/src/providers/anthropic.ts +8 -1
package/src/providers/cursor.ts +98 -12

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,19 @@
 ## [Unreleased]
+## [14.3.0] - 2026-04-25
+### Added
+- Added support for Claude Opus 4.7 (`claude-opus-4-7`) model ([#726](https://github.com/can1357/oh-my-pi/issues/726))
+  - Suppresses sampling parameters (temperature/top_p/top_k) that Opus 4.7 rejects
+  - Enables `display: "summarized"` for adaptive thinking to restore visible thinking content
+### Fixed
+- Fixed Cursor provider losing conversation history on follow-up turns (model responding "this appears to be the start of our session") by populating `ConversationStateStructure.rootPromptMessagesJson` with JSON blob IDs for the system prompt plus prior user/assistant/tool-result messages. Cursor's server builds the model prompt from `rootPromptMessagesJson`, not from the protobuf `turns[]` tree, so sending only the system prompt there caused prior turns to be dropped
+- Fixed Cursor provider multi-turn conversations failing with `Connect error internal: Blob not found` on the second message by storing `ConversationStateStructure.turns`, `AgentConversationTurnStructure.user_message`, and `AgentConversationTurnStructure.steps` as content-addressed blob IDs in the KV store (matching the existing handling for `rootPromptMessagesJson`) rather than sending the raw serialized bytes inline ([#678](https://github.com/can1357/oh-my-pi/issues/678))
 ## [14.2.1] - 2026-04-24
 ### Fixed

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "14.2.1",
+	"version": "14.3.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -41,24 +41,24 @@
 		"generate-models": "bun scripts/generate-models.ts"
 	},
 	"dependencies": {
-		"@anthropic-ai/sdk": "^0.78",
-		"@aws-sdk/client-bedrock-runtime": "^3",
-		"@aws-sdk/credential-provider-node": "^3",
-		"@bufbuild/protobuf": "^2.11",
-		"@google/genai": "^1.43",
-		"@oh-my-pi/pi-natives": "14.2.1",
-		"@oh-my-pi/pi-utils": "14.2.1",
-		"@sinclair/typebox": "^0.34",
-		"@smithy/node-http-handler": "^4.4",
-		"ajv": "^8.18",
-		"ajv-formats": "^3.0",
-		"openai": "^6.25",
-		"partial-json": "^0.1",
-		"proxy-agent": "^6.5",
+		"@anthropic-ai/sdk": "^0.91.1",
+		"@aws-sdk/client-bedrock-runtime": "^3.1037.0",
+		"@aws-sdk/credential-provider-node": "^3.972.36",
+		"@bufbuild/protobuf": "^2.12.0",
+		"@google/genai": "^1.50.1",
+		"@oh-my-pi/pi-natives": "14.3.0",
+		"@oh-my-pi/pi-utils": "14.3.0",
+		"@sinclair/typebox": "^0.34.49",
+		"@smithy/node-http-handler": "^4.6.1",
+		"ajv": "^8.20.0",
+		"ajv-formats": "^3.0.1",
+		"openai": "^6.34.0",
+		"partial-json": "^0.1.7",
+		"proxy-agent": "^8.0.1",
 		"zod": "4.3.6"
 	},
 	"devDependencies": {
-		"@types/bun": "^1.3"
+		"@types/bun": "^1.3.13"
 	},
 	"engines": {
 		"bun": ">=1.3.7"

package/src/model-thinking.ts CHANGED Viewed

@@ -154,19 +154,27 @@ export function applyGeneratedModelPolicies(models: ApiModel<Api>[]): void {
 }
 /**
- * Link `-spark` model variants to their base models for context promotion.
+ * Link OpenAI model variants to their context promotion targets.
  *
- * When a spark model's context is exhausted, the agent can promote to the
- * corresponding full model. This sets `contextPromotionTarget` on each
- * spark variant that has a matching base model.
+ * When a model's context is exhausted, the agent can promote to a sibling
+ * model with a larger context window on the same provider:
+ * - `-spark` variants promote to `gpt-5.5`.
+ * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
  */
-export function linkSparkPromotionTargets(models: ApiModel<Api>[]): void {
+export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
 	for (const candidate of models) {
 		const parsedCandidate = parseKnownModel(candidate.id);
-		if (parsedCandidate.family !== "openai" || parsedCandidate.variant !== "codex-spark") continue;
-		const baseId = candidate.id.slice(0, -"-spark".length);
+		if (parsedCandidate.family !== "openai") continue;
+		let targetId: string | undefined;
+		if (parsedCandidate.variant === "codex-spark") {
+			targetId = "gpt-5.5";
+		} else if (parsedCandidate.variant === "base" && semverEqual(parsedCandidate.version, "5.5")) {
+			targetId = "gpt-5.4";
+		} else {
+			continue;
+		}
 		const fallback = models.find(
-			model => model.provider === candidate.provider && model.api === candidate.api && model.id === baseId,
+			model => model.provider === candidate.provider && model.api === candidate.api && model.id === targetId,
 		);
 		if (!fallback) continue;
 		candidate.contextPromotionTarget = `${fallback.provider}/${fallback.id}`;
@@ -283,6 +291,17 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
 	}
 }
+/**
+ * Returns true for Anthropic models with Opus 4.7 API restrictions:
+ * - Sampling parameters (temperature/top_p/top_k) return 400 error
+ * - Thinking content is omitted by default (needs display: "summarized")
+ */
+export function hasOpus47ApiRestrictions(modelId: string): boolean {
+	const parsed = parseAnthropicModel(getCanonicalModelId(modelId));
+	if (!parsed) return false;
+	return semverGte(parsed.version, "4.7") && parsed.kind === "opus";
+}
 function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
 	if (model.api !== "anthropic-messages") return false;
 	const parsedModel = parseKnownModel(model.id);

package/src/models.json CHANGED Viewed

@@ -16931,7 +16931,7 @@
 			},
 			"contextWindow": 128000,
 			"maxTokens": 128000,
-			"contextPromotionTarget": "litellm/gpt-5.3-codex",
+			"contextPromotionTarget": "litellm/gpt-5.5",
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "low",
@@ -17011,7 +17011,8 @@
 				"mode": "effort",
 				"minLevel": "low",
 				"maxLevel": "xhigh"
-			}
+			},
+			"contextPromotionTarget": "litellm/gpt-5.4"
 		},
 		"gpt-image-2": {
 			"id": "gpt-image-2",
@@ -32938,7 +32939,7 @@
 				"maxLevel": "xhigh"
 			},
 			"applyPatchToolType": "freeform",
-			"contextPromotionTarget": "openai/gpt-5.3-codex"
+			"contextPromotionTarget": "openai/gpt-5.5"
 		},
 		"gpt-5.4": {
 			"id": "gpt-5.4",
@@ -33068,7 +33069,8 @@
 				"minLevel": "low",
 				"maxLevel": "xhigh"
 			},
-			"applyPatchToolType": "freeform"
+			"applyPatchToolType": "freeform",
+			"contextPromotionTarget": "openai/gpt-5.4"
 		},
 		"o1": {
 			"id": "o1",
@@ -33597,7 +33599,7 @@
 			},
 			"contextWindow": 128000,
 			"maxTokens": 128000,
-			"contextPromotionTarget": "openai-codex/gpt-5.3-codex",
+			"contextPromotionTarget": "openai-codex/gpt-5.5",
 			"thinking": {
 				"mode": "effort",
 				"minLevel": "low",
@@ -33715,7 +33717,8 @@
 				"minLevel": "low",
 				"maxLevel": "xhigh"
 			},
-			"applyPatchToolType": "freeform"
+			"applyPatchToolType": "freeform",
+			"contextPromotionTarget": "openai-codex/gpt-5.4"
 		}
 	},
 	"opencode": {
@@ -33765,8 +33768,7 @@
 				"mode": "effort",
 				"minLevel": "low",
 				"maxLevel": "xhigh"
-			},
-			"contextPromotionTarget": "opencode/gpt-5.3-codex"
+			}
 		},
 		"gpt-5.4": {
 			"id": "gpt-5.4",
@@ -34828,8 +34830,7 @@
 				"mode": "effort",
 				"minLevel": "low",
 				"maxLevel": "xhigh"
-			},
-			"contextPromotionTarget": "opencode-zen/gpt-5.3-codex"
+			}
 		},
 		"gpt-5.4": {
 			"id": "gpt-5.4",

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -246,26 +246,64 @@ async function fetchOllamaNativeModels(baseUrl: string): Promise<Model<"openai-r
 	}
 	const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
 	const entries = payload.models ?? [];
-	const models: Model<"openai-responses">[] = [];
-	for (const entry of entries) {
-		const id = entry.model ?? entry.name;
-		if (!id) {
-			continue;
-		}
-		models.push({
-			id,
-			name: entry.name ?? id,
-			api: "openai-responses",
-			provider: "ollama",
-			baseUrl,
-			reasoning: false,
-			input: ["text"],
-			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-			contextWindow: 128000,
-			maxTokens: 8192,
+	const resolved = await Promise.all(
+		entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
+			const id = entry.model ?? entry.name;
+			if (!id) return null;
+			const { contextWindow, maxTokens } = await fetchOllamaModelLimits(nativeBaseUrl, id);
+			return {
+				id,
+				name: entry.name ?? id,
+				api: "openai-responses",
+				provider: "ollama",
+				baseUrl,
+				reasoning: false,
+				input: ["text"],
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+				contextWindow,
+				maxTokens,
+			};
+		}),
+	);
+	const models: Model<"openai-responses">[] = resolved.filter((m): m is Model<"openai-responses"> => m !== null);
+	return models.sort((left, right) => left.id.localeCompare(right.id));
+}
+/** Ollama's default `num_ctx` when the runtime request does not override it. */
+const OLLAMA_DEFAULT_CONTEXT_WINDOW = 4096;
+/** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
+const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
+/**
+ * Query Ollama's `/api/show` endpoint for a single model and pull its native
+ * context length out of `model_info.<arch>.context_length`. Falls back to
+ * Ollama's default context window when the endpoint or field is unavailable
+ * so discovery still succeeds against older Ollama builds.
+ */
+async function fetchOllamaModelLimits(
+	nativeBaseUrl: string,
+	modelId: string,
+): Promise<{ contextWindow: number; maxTokens: number }> {
+	try {
+		const response = await fetch(`${nativeBaseUrl}/api/show`, {
+			method: "POST",
+			headers: { "Content-Type": "application/json", Accept: "application/json" },
+			body: JSON.stringify({ model: modelId }),
 		});
+		if (!response.ok) {
+			return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
+		}
+		const payload = (await response.json()) as { model_info?: Record<string, unknown> };
+		const info = payload.model_info ?? {};
+		for (const [key, value] of Object.entries(info)) {
+			if (key.endsWith(".context_length") && typeof value === "number" && value > 0) {
+				return { contextWindow: value, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
+			}
+		}
+	} catch {
+		// fall through to default
 	}
-	return models.sort((left, right) => left.id.localeCompare(right.id));
+	return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
 }
 const OPENAI_NON_RESPONSES_PREFIXES = [

package/src/providers/anthropic.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import type {
 	MessageParam,
 } from "@anthropic-ai/sdk/resources/messages";
 import { $env, abortableSleep, isEnoent } from "@oh-my-pi/pi-utils";
-import { mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
+import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
 import { calculateCost } from "../models";
 import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
 import type {
@@ -1421,6 +1421,13 @@ function buildParams(
 		params.top_k = options.topK;
 	}
+	// Opus 4.7+ rejects non-default sampling parameters with 400 error.
+	if (hasOpus47ApiRestrictions(model.id)) {
+		delete params.temperature;
+		delete (params as AnthropicSamplingParams).top_p;
+		delete (params as AnthropicSamplingParams).top_k;
+	}
 	if (context.tools) {
 		params.tools = convertTools(context.tools, isOAuthToken);
 	}

package/src/providers/cursor.ts CHANGED Viewed

@@ -2109,10 +2109,86 @@ function extractAssistantMessageText(msg: Message): string {
 }
 /**
- * Convert context.messages to Cursor's serialized ConversationTurn format.
+ * Derive a stable, UUID-formatted `message_id` from a content key.
+ * Ensures identical historical messages hash to the same blob IDs across
+ * requests, so `conversationBlobStores` does not grow unboundedly and
+ * unchanged history reuses existing blob IDs.
+ */
+function deterministicMessageId(key: string): string {
+	const hex = createHash("sha256").update(key).digest("hex");
+	return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
+}
+/**
+ * Index of the last user/developer message in `messages`, or -1 if none.
+ * Used to exclude the current user turn from history builders — it goes in
+ * `ConversationActionSchema.userMessageAction`, not in history structures.
+ */
+function findLastUserMessageIndex(messages: Message[]): number {
+	for (let i = messages.length - 1; i >= 0; i--) {
+		const role = messages[i].role;
+		if (role === "user" || role === "developer") {
+			return i;
+		}
+	}
+	return -1;
+}
+/**
+ * Build `ConversationStateStructure.rootPromptMessagesJson` blob IDs for the
+ * system prompt plus prior conversation history, as JSON blobs matching
+ * Cursor's internal Vercel-AI-SDK-shaped message format.
+ *
+ * Cursor's server uses `rootPromptMessagesJson` (not `turns[]`) to build the
+ * actual model prompt. `turns[]` is UI/display metadata. Without populating
+ * this field, multi-turn conversations lose prior context — the model sees
+ * only an empty placeholder where historical user turns should be.
+ * The last user message is excluded because it is sent in the action.
+ */
+function buildRootPromptMessagesJson(
+	messages: Message[],
+	systemPromptId: Uint8Array,
+	blobStore: Map<string, Uint8Array>,
+): Uint8Array[] {
+	const entries: Uint8Array[] = [systemPromptId];
+	const lastUserIdx = findLastUserMessageIndex(messages);
+	const pushJson = (obj: unknown) => {
+		const bytes = new TextEncoder().encode(JSON.stringify(obj));
+		entries.push(storeCursorBlob(blobStore, bytes));
+	};
+	for (let i = 0; i < messages.length; i++) {
+		if (i === lastUserIdx) break;
+		const msg = messages[i];
+		if (msg.role === "user" || msg.role === "developer") {
+			const text = extractUserMessageText(msg);
+			if (!text) continue;
+			pushJson({ role: "user", content: [{ type: "text", text }] });
+		} else if (msg.role === "assistant") {
+			const text = extractAssistantMessageText(msg);
+			if (!text) continue;
+			pushJson({ role: "assistant", content: [{ type: "text", text }] });
+		} else if (msg.role === "toolResult") {
+			const text = toolResultToText(msg);
+			if (!text) continue;
+			pushJson({
+				role: "user",
+				content: [{ type: "text", text: `[Tool Result]\n${text}` }],
+			});
+		}
+	}
+	return entries;
+}
+/**
+ * Convert context.messages to Cursor's ConversationTurnStructure blob IDs.
  * Groups messages into turns: each turn is a user message followed by the assistant's response.
  * Excludes the last user message (which goes in the action).
- * Returns blob IDs for ConversationStateStructure.turns field.
+ *
+ * Each `AgentConversationTurnStructure.user_message`, `steps[]`, and the outer
+ * `ConversationStateStructure.turns[]` entry is a blob ID into `blobStore`.
  */
 function buildConversationTurns(messages: Message[], blobStore: Map<string, Uint8Array>): Uint8Array[] {
 	const turns: Uint8Array[] = [];
@@ -2149,10 +2225,10 @@ function buildConversationTurns(messages: Message[], blobStore: Map<string, Uint
 		const userMessage = create(UserMessageSchema, {
 			text: userText,
-			messageId: crypto.randomUUID(),
+			messageId: deterministicMessageId(`u:${turns.length}:${userText}`),
 		});
 		const userMessageBytes = toBinary(UserMessageSchema, userMessage);
-		const userMessageId = storeCursorBlob(blobStore, userMessageBytes);
+		const userMessageBlobId = storeCursorBlob(blobStore, userMessageBytes);
 		// Collect and serialize steps until next user message
 		const stepBlobIds: Uint8Array[] = [];
@@ -2189,9 +2265,10 @@ function buildConversationTurns(messages: Message[], blobStore: Map<string, Uint
 			i++;
 		}
-		// Cursor stores turn parts in the KV blob channel; these fields carry blob IDs.
+		// Create the serialized turn using Structure types. The bytes fields
+		// (user_message, steps) are blob IDs resolved through the KV store.
 		const agentTurn = create(AgentConversationTurnStructureSchema, {
-			userMessage: userMessageId,
+			userMessage: userMessageBlobId,
 			steps: stepBlobIds,
 		});
 		const turn = create(ConversationTurnStructureSchema, {
@@ -2254,15 +2331,21 @@ function buildGrpcRequest(
 		},
 	});
-	// Build conversation turns from prior messages (excluding the last user message)
+	// Build conversation turns from prior messages (excluding the last user message).
+	// This populates the UI-side history view (`turns[]`).
 	const turns = buildConversationTurns(context.messages, blobStore);
+	// Build `rootPromptMessagesJson` from prior messages. Cursor's server uses this
+	// field (not `turns[]`) to construct the actual model prompt; if we only send the
+	// system prompt here, multi-turn conversations lose prior context and the model
+	// sees only the current user message.
+	const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptId, blobStore);
+	// Preserve cached non-history state fields (todos, file states, summaries, etc.)
+	// when the system prompt is unchanged; otherwise start fresh.
 	const hasMatchingPrompt = state.conversationState?.rootPromptMessagesJson?.some(entry =>
 		Buffer.from(entry).equals(systemPromptId),
 	);
-	// Use cached state if available and system prompt matches, but always update turns
-	// from context.messages to ensure full conversation history is sent
 	const baseState =
 		state.conversationState && hasMatchingPrompt
 			? state.conversationState
@@ -2281,10 +2364,13 @@ function buildGrpcRequest(
 					readPaths: [],
 				});
-	// Always populate turns from context.messages to ensure Cursor sees full conversation
+	// Always override `rootPromptMessagesJson` and `turns` with content freshly built from
+	// `context.messages`. The server-echoed checkpoint replaces historical user entries
+	// with empty placeholders, so we cannot rely on the cached `rootPromptMessagesJson`.
 	const conversationState = create(ConversationStateStructureSchema, {
 		...baseState,
-		turns: turns.length > 0 ? turns : baseState.turns,
+		rootPromptMessagesJson,
+		turns,
 	});
 	const modelDetails = create(ModelDetailsSchema, {