@oh-my-pi/pi-ai 14.2.0 → 14.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,21 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.3.0] - 2026-04-25
6
+
7
+ ### Added
8
+
9
+ - Added support for Claude Opus 4.7 (`claude-opus-4-7`) model ([#726](https://github.com/can1357/oh-my-pi/issues/726))
10
+ - Suppresses sampling parameters (temperature/top_p/top_k) that Opus 4.7 rejects
11
+ - Enables `display: "summarized"` for adaptive thinking to restore visible thinking content
12
+
13
+ ### Fixed
14
+
15
+ - Fixed Cursor provider losing conversation history on follow-up turns (model responding "this appears to be the start of our session") by populating `ConversationStateStructure.rootPromptMessagesJson` with JSON blob IDs for the system prompt plus prior user/assistant/tool-result messages. Cursor's server builds the model prompt from `rootPromptMessagesJson`, not from the protobuf `turns[]` tree, so sending only the system prompt there caused prior turns to be dropped
16
+ - Fixed Cursor provider multi-turn conversations failing with `Connect error internal: Blob not found` on the second message by storing `ConversationStateStructure.turns`, `AgentConversationTurnStructure.user_message`, and `AgentConversationTurnStructure.steps` as content-addressed blob IDs in the KV store (matching the existing handling for `rootPromptMessagesJson`) rather than sending the raw serialized bytes inline ([#678](https://github.com/can1357/oh-my-pi/issues/678))
17
+
18
+ ## [14.2.1] - 2026-04-24
19
+
5
20
  ### Fixed
6
21
 
7
22
  - Fixed OpenAI Codex Spark OAuth selection to require a verified ChatGPT Pro account instead of falling back to Plus or unknown-plan accounts.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.2.0",
4
+ "version": "14.3.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -41,24 +41,24 @@
41
41
  "generate-models": "bun scripts/generate-models.ts"
42
42
  },
43
43
  "dependencies": {
44
- "@anthropic-ai/sdk": "^0.78",
45
- "@aws-sdk/client-bedrock-runtime": "^3",
46
- "@aws-sdk/credential-provider-node": "^3",
47
- "@bufbuild/protobuf": "^2.11",
48
- "@google/genai": "^1.43",
49
- "@oh-my-pi/pi-natives": "14.2.0",
50
- "@oh-my-pi/pi-utils": "14.2.0",
51
- "@sinclair/typebox": "^0.34",
52
- "@smithy/node-http-handler": "^4.4",
53
- "ajv": "^8.18",
54
- "ajv-formats": "^3.0",
55
- "openai": "^6.25",
56
- "partial-json": "^0.1",
57
- "proxy-agent": "^6.5",
44
+ "@anthropic-ai/sdk": "^0.91.1",
45
+ "@aws-sdk/client-bedrock-runtime": "^3.1037.0",
46
+ "@aws-sdk/credential-provider-node": "^3.972.36",
47
+ "@bufbuild/protobuf": "^2.12.0",
48
+ "@google/genai": "^1.50.1",
49
+ "@oh-my-pi/pi-natives": "14.3.0",
50
+ "@oh-my-pi/pi-utils": "14.3.0",
51
+ "@sinclair/typebox": "^0.34.49",
52
+ "@smithy/node-http-handler": "^4.6.1",
53
+ "ajv": "^8.20.0",
54
+ "ajv-formats": "^3.0.1",
55
+ "openai": "^6.34.0",
56
+ "partial-json": "^0.1.7",
57
+ "proxy-agent": "^8.0.1",
58
58
  "zod": "4.3.6"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3"
61
+ "@types/bun": "^1.3.13"
62
62
  },
63
63
  "engines": {
64
64
  "bun": ">=1.3.7"
@@ -154,19 +154,27 @@ export function applyGeneratedModelPolicies(models: ApiModel<Api>[]): void {
154
154
  }
155
155
 
156
156
  /**
157
- * Link `-spark` model variants to their base models for context promotion.
157
+ * Link OpenAI model variants to their context promotion targets.
158
158
  *
159
- * When a spark model's context is exhausted, the agent can promote to the
160
- * corresponding full model. This sets `contextPromotionTarget` on each
161
- * spark variant that has a matching base model.
159
+ * When a model's context is exhausted, the agent can promote to a sibling
160
+ * model with a larger context window on the same provider:
161
+ * - `-spark` variants promote to `gpt-5.5`.
162
+ * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
162
163
  */
163
- export function linkSparkPromotionTargets(models: ApiModel<Api>[]): void {
164
+ export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
164
165
  for (const candidate of models) {
165
166
  const parsedCandidate = parseKnownModel(candidate.id);
166
- if (parsedCandidate.family !== "openai" || parsedCandidate.variant !== "codex-spark") continue;
167
- const baseId = candidate.id.slice(0, -"-spark".length);
167
+ if (parsedCandidate.family !== "openai") continue;
168
+ let targetId: string | undefined;
169
+ if (parsedCandidate.variant === "codex-spark") {
170
+ targetId = "gpt-5.5";
171
+ } else if (parsedCandidate.variant === "base" && semverEqual(parsedCandidate.version, "5.5")) {
172
+ targetId = "gpt-5.4";
173
+ } else {
174
+ continue;
175
+ }
168
176
  const fallback = models.find(
169
- model => model.provider === candidate.provider && model.api === candidate.api && model.id === baseId,
177
+ model => model.provider === candidate.provider && model.api === candidate.api && model.id === targetId,
170
178
  );
171
179
  if (!fallback) continue;
172
180
  candidate.contextPromotionTarget = `${fallback.provider}/${fallback.id}`;
@@ -283,6 +291,17 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
283
291
  }
284
292
  }
285
293
 
294
+ /**
295
+ * Returns true for Anthropic models with Opus 4.7 API restrictions:
296
+ * - Sampling parameters (temperature/top_p/top_k) return 400 error
297
+ * - Thinking content is omitted by default (needs display: "summarized")
298
+ */
299
+ export function hasOpus47ApiRestrictions(modelId: string): boolean {
300
+ const parsed = parseAnthropicModel(getCanonicalModelId(modelId));
301
+ if (!parsed) return false;
302
+ return semverGte(parsed.version, "4.7") && parsed.kind === "opus";
303
+ }
304
+
286
305
  function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
287
306
  if (model.api !== "anthropic-messages") return false;
288
307
  const parsedModel = parseKnownModel(model.id);
package/src/models.json CHANGED
@@ -16931,7 +16931,7 @@
16931
16931
  },
16932
16932
  "contextWindow": 128000,
16933
16933
  "maxTokens": 128000,
16934
- "contextPromotionTarget": "litellm/gpt-5.3-codex",
16934
+ "contextPromotionTarget": "litellm/gpt-5.5",
16935
16935
  "thinking": {
16936
16936
  "mode": "effort",
16937
16937
  "minLevel": "low",
@@ -17011,7 +17011,8 @@
17011
17011
  "mode": "effort",
17012
17012
  "minLevel": "low",
17013
17013
  "maxLevel": "xhigh"
17014
- }
17014
+ },
17015
+ "contextPromotionTarget": "litellm/gpt-5.4"
17015
17016
  },
17016
17017
  "gpt-image-2": {
17017
17018
  "id": "gpt-image-2",
@@ -32938,7 +32939,7 @@
32938
32939
  "maxLevel": "xhigh"
32939
32940
  },
32940
32941
  "applyPatchToolType": "freeform",
32941
- "contextPromotionTarget": "openai/gpt-5.3-codex"
32942
+ "contextPromotionTarget": "openai/gpt-5.5"
32942
32943
  },
32943
32944
  "gpt-5.4": {
32944
32945
  "id": "gpt-5.4",
@@ -33068,7 +33069,8 @@
33068
33069
  "minLevel": "low",
33069
33070
  "maxLevel": "xhigh"
33070
33071
  },
33071
- "applyPatchToolType": "freeform"
33072
+ "applyPatchToolType": "freeform",
33073
+ "contextPromotionTarget": "openai/gpt-5.4"
33072
33074
  },
33073
33075
  "o1": {
33074
33076
  "id": "o1",
@@ -33597,7 +33599,7 @@
33597
33599
  },
33598
33600
  "contextWindow": 128000,
33599
33601
  "maxTokens": 128000,
33600
- "contextPromotionTarget": "openai-codex/gpt-5.3-codex",
33602
+ "contextPromotionTarget": "openai-codex/gpt-5.5",
33601
33603
  "thinking": {
33602
33604
  "mode": "effort",
33603
33605
  "minLevel": "low",
@@ -33715,7 +33717,8 @@
33715
33717
  "minLevel": "low",
33716
33718
  "maxLevel": "xhigh"
33717
33719
  },
33718
- "applyPatchToolType": "freeform"
33720
+ "applyPatchToolType": "freeform",
33721
+ "contextPromotionTarget": "openai-codex/gpt-5.4"
33719
33722
  }
33720
33723
  },
33721
33724
  "opencode": {
@@ -33765,8 +33768,7 @@
33765
33768
  "mode": "effort",
33766
33769
  "minLevel": "low",
33767
33770
  "maxLevel": "xhigh"
33768
- },
33769
- "contextPromotionTarget": "opencode/gpt-5.3-codex"
33771
+ }
33770
33772
  },
33771
33773
  "gpt-5.4": {
33772
33774
  "id": "gpt-5.4",
@@ -34828,8 +34830,7 @@
34828
34830
  "mode": "effort",
34829
34831
  "minLevel": "low",
34830
34832
  "maxLevel": "xhigh"
34831
- },
34832
- "contextPromotionTarget": "opencode-zen/gpt-5.3-codex"
34833
+ }
34833
34834
  },
34834
34835
  "gpt-5.4": {
34835
34836
  "id": "gpt-5.4",
@@ -246,26 +246,64 @@ async function fetchOllamaNativeModels(baseUrl: string): Promise<Model<"openai-r
246
246
  }
247
247
  const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
248
248
  const entries = payload.models ?? [];
249
- const models: Model<"openai-responses">[] = [];
250
- for (const entry of entries) {
251
- const id = entry.model ?? entry.name;
252
- if (!id) {
253
- continue;
254
- }
255
- models.push({
256
- id,
257
- name: entry.name ?? id,
258
- api: "openai-responses",
259
- provider: "ollama",
260
- baseUrl,
261
- reasoning: false,
262
- input: ["text"],
263
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
264
- contextWindow: 128000,
265
- maxTokens: 8192,
249
+ const resolved = await Promise.all(
250
+ entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
251
+ const id = entry.model ?? entry.name;
252
+ if (!id) return null;
253
+ const { contextWindow, maxTokens } = await fetchOllamaModelLimits(nativeBaseUrl, id);
254
+ return {
255
+ id,
256
+ name: entry.name ?? id,
257
+ api: "openai-responses",
258
+ provider: "ollama",
259
+ baseUrl,
260
+ reasoning: false,
261
+ input: ["text"],
262
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
263
+ contextWindow,
264
+ maxTokens,
265
+ };
266
+ }),
267
+ );
268
+ const models: Model<"openai-responses">[] = resolved.filter((m): m is Model<"openai-responses"> => m !== null);
269
+ return models.sort((left, right) => left.id.localeCompare(right.id));
270
+ }
271
+
272
+ /** Ollama's default `num_ctx` when the runtime request does not override it. */
273
+ const OLLAMA_DEFAULT_CONTEXT_WINDOW = 4096;
274
+ /** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
275
+ const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
276
+
277
+ /**
278
+ * Query Ollama's `/api/show` endpoint for a single model and pull its native
279
+ * context length out of `model_info.<arch>.context_length`. Falls back to
280
+ * Ollama's default context window when the endpoint or field is unavailable
281
+ * so discovery still succeeds against older Ollama builds.
282
+ */
283
+ async function fetchOllamaModelLimits(
284
+ nativeBaseUrl: string,
285
+ modelId: string,
286
+ ): Promise<{ contextWindow: number; maxTokens: number }> {
287
+ try {
288
+ const response = await fetch(`${nativeBaseUrl}/api/show`, {
289
+ method: "POST",
290
+ headers: { "Content-Type": "application/json", Accept: "application/json" },
291
+ body: JSON.stringify({ model: modelId }),
266
292
  });
293
+ if (!response.ok) {
294
+ return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
295
+ }
296
+ const payload = (await response.json()) as { model_info?: Record<string, unknown> };
297
+ const info = payload.model_info ?? {};
298
+ for (const [key, value] of Object.entries(info)) {
299
+ if (key.endsWith(".context_length") && typeof value === "number" && value > 0) {
300
+ return { contextWindow: value, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
301
+ }
302
+ }
303
+ } catch {
304
+ // fall through to default
267
305
  }
268
- return models.sort((left, right) => left.id.localeCompare(right.id));
306
+ return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
269
307
  }
270
308
 
271
309
  const OPENAI_NON_RESPONSES_PREFIXES = [
@@ -8,7 +8,7 @@ import type {
8
8
  MessageParam,
9
9
  } from "@anthropic-ai/sdk/resources/messages";
10
10
  import { $env, abortableSleep, isEnoent } from "@oh-my-pi/pi-utils";
11
- import { mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
11
+ import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
12
12
  import { calculateCost } from "../models";
13
13
  import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
14
14
  import type {
@@ -1421,6 +1421,13 @@ function buildParams(
1421
1421
  params.top_k = options.topK;
1422
1422
  }
1423
1423
 
1424
+ // Opus 4.7+ rejects non-default sampling parameters with 400 error.
1425
+ if (hasOpus47ApiRestrictions(model.id)) {
1426
+ delete params.temperature;
1427
+ delete (params as AnthropicSamplingParams).top_p;
1428
+ delete (params as AnthropicSamplingParams).top_k;
1429
+ }
1430
+
1424
1431
  if (context.tools) {
1425
1432
  params.tools = convertTools(context.tools, isOAuthToken);
1426
1433
  }
@@ -2109,10 +2109,86 @@ function extractAssistantMessageText(msg: Message): string {
2109
2109
  }
2110
2110
 
2111
2111
  /**
2112
- * Convert context.messages to Cursor's serialized ConversationTurn format.
2112
+ * Derive a stable, UUID-formatted `message_id` from a content key.
2113
+ * Ensures identical historical messages hash to the same blob IDs across
2114
+ * requests, so `conversationBlobStores` does not grow unboundedly and
2115
+ * unchanged history reuses existing blob IDs.
2116
+ */
2117
+ function deterministicMessageId(key: string): string {
2118
+ const hex = createHash("sha256").update(key).digest("hex");
2119
+ return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
2120
+ }
2121
+
2122
+ /**
2123
+ * Index of the last user/developer message in `messages`, or -1 if none.
2124
+ * Used to exclude the current user turn from history builders — it goes in
2125
+ * `ConversationActionSchema.userMessageAction`, not in history structures.
2126
+ */
2127
+ function findLastUserMessageIndex(messages: Message[]): number {
2128
+ for (let i = messages.length - 1; i >= 0; i--) {
2129
+ const role = messages[i].role;
2130
+ if (role === "user" || role === "developer") {
2131
+ return i;
2132
+ }
2133
+ }
2134
+ return -1;
2135
+ }
2136
+
2137
+ /**
2138
+ * Build `ConversationStateStructure.rootPromptMessagesJson` blob IDs for the
2139
+ * system prompt plus prior conversation history, as JSON blobs matching
2140
+ * Cursor's internal Vercel-AI-SDK-shaped message format.
2141
+ *
2142
+ * Cursor's server uses `rootPromptMessagesJson` (not `turns[]`) to build the
2143
+ * actual model prompt. `turns[]` is UI/display metadata. Without populating
2144
+ * this field, multi-turn conversations lose prior context — the model sees
2145
+ * only an empty placeholder where historical user turns should be.
2146
+ * The last user message is excluded because it is sent in the action.
2147
+ */
2148
+ function buildRootPromptMessagesJson(
2149
+ messages: Message[],
2150
+ systemPromptId: Uint8Array,
2151
+ blobStore: Map<string, Uint8Array>,
2152
+ ): Uint8Array[] {
2153
+ const entries: Uint8Array[] = [systemPromptId];
2154
+ const lastUserIdx = findLastUserMessageIndex(messages);
2155
+
2156
+ const pushJson = (obj: unknown) => {
2157
+ const bytes = new TextEncoder().encode(JSON.stringify(obj));
2158
+ entries.push(storeCursorBlob(blobStore, bytes));
2159
+ };
2160
+
2161
+ for (let i = 0; i < messages.length; i++) {
2162
+ if (i === lastUserIdx) break;
2163
+ const msg = messages[i];
2164
+ if (msg.role === "user" || msg.role === "developer") {
2165
+ const text = extractUserMessageText(msg);
2166
+ if (!text) continue;
2167
+ pushJson({ role: "user", content: [{ type: "text", text }] });
2168
+ } else if (msg.role === "assistant") {
2169
+ const text = extractAssistantMessageText(msg);
2170
+ if (!text) continue;
2171
+ pushJson({ role: "assistant", content: [{ type: "text", text }] });
2172
+ } else if (msg.role === "toolResult") {
2173
+ const text = toolResultToText(msg);
2174
+ if (!text) continue;
2175
+ pushJson({
2176
+ role: "user",
2177
+ content: [{ type: "text", text: `[Tool Result]\n${text}` }],
2178
+ });
2179
+ }
2180
+ }
2181
+
2182
+ return entries;
2183
+ }
2184
+
2185
+ /**
2186
+ * Convert context.messages to Cursor's ConversationTurnStructure blob IDs.
2113
2187
  * Groups messages into turns: each turn is a user message followed by the assistant's response.
2114
2188
  * Excludes the last user message (which goes in the action).
2115
- * Returns blob IDs for ConversationStateStructure.turns field.
2189
+ *
2190
+ * Each `AgentConversationTurnStructure.user_message`, `steps[]`, and the outer
2191
+ * `ConversationStateStructure.turns[]` entry is a blob ID into `blobStore`.
2116
2192
  */
2117
2193
  function buildConversationTurns(messages: Message[], blobStore: Map<string, Uint8Array>): Uint8Array[] {
2118
2194
  const turns: Uint8Array[] = [];
@@ -2149,10 +2225,10 @@ function buildConversationTurns(messages: Message[], blobStore: Map<string, Uint
2149
2225
 
2150
2226
  const userMessage = create(UserMessageSchema, {
2151
2227
  text: userText,
2152
- messageId: crypto.randomUUID(),
2228
+ messageId: deterministicMessageId(`u:${turns.length}:${userText}`),
2153
2229
  });
2154
2230
  const userMessageBytes = toBinary(UserMessageSchema, userMessage);
2155
- const userMessageId = storeCursorBlob(blobStore, userMessageBytes);
2231
+ const userMessageBlobId = storeCursorBlob(blobStore, userMessageBytes);
2156
2232
 
2157
2233
  // Collect and serialize steps until next user message
2158
2234
  const stepBlobIds: Uint8Array[] = [];
@@ -2189,9 +2265,10 @@ function buildConversationTurns(messages: Message[], blobStore: Map<string, Uint
2189
2265
  i++;
2190
2266
  }
2191
2267
 
2192
- // Cursor stores turn parts in the KV blob channel; these fields carry blob IDs.
2268
+ // Create the serialized turn using Structure types. The bytes fields
2269
+ // (user_message, steps) are blob IDs resolved through the KV store.
2193
2270
  const agentTurn = create(AgentConversationTurnStructureSchema, {
2194
- userMessage: userMessageId,
2271
+ userMessage: userMessageBlobId,
2195
2272
  steps: stepBlobIds,
2196
2273
  });
2197
2274
  const turn = create(ConversationTurnStructureSchema, {
@@ -2254,15 +2331,21 @@ function buildGrpcRequest(
2254
2331
  },
2255
2332
  });
2256
2333
 
2257
- // Build conversation turns from prior messages (excluding the last user message)
2334
+ // Build conversation turns from prior messages (excluding the last user message).
2335
+ // This populates the UI-side history view (`turns[]`).
2258
2336
  const turns = buildConversationTurns(context.messages, blobStore);
2259
2337
 
2338
+ // Build `rootPromptMessagesJson` from prior messages. Cursor's server uses this
2339
+ // field (not `turns[]`) to construct the actual model prompt; if we only send the
2340
+ // system prompt here, multi-turn conversations lose prior context and the model
2341
+ // sees only the current user message.
2342
+ const rootPromptMessagesJson = buildRootPromptMessagesJson(context.messages, systemPromptId, blobStore);
2343
+
2344
+ // Preserve cached non-history state fields (todos, file states, summaries, etc.)
2345
+ // when the system prompt is unchanged; otherwise start fresh.
2260
2346
  const hasMatchingPrompt = state.conversationState?.rootPromptMessagesJson?.some(entry =>
2261
2347
  Buffer.from(entry).equals(systemPromptId),
2262
2348
  );
2263
-
2264
- // Use cached state if available and system prompt matches, but always update turns
2265
- // from context.messages to ensure full conversation history is sent
2266
2349
  const baseState =
2267
2350
  state.conversationState && hasMatchingPrompt
2268
2351
  ? state.conversationState
@@ -2281,10 +2364,13 @@ function buildGrpcRequest(
2281
2364
  readPaths: [],
2282
2365
  });
2283
2366
 
2284
- // Always populate turns from context.messages to ensure Cursor sees full conversation
2367
+ // Always override `rootPromptMessagesJson` and `turns` with content freshly built from
2368
+ // `context.messages`. The server-echoed checkpoint replaces historical user entries
2369
+ // with empty placeholders, so we cannot rely on the cached `rootPromptMessagesJson`.
2285
2370
  const conversationState = create(ConversationStateStructureSchema, {
2286
2371
  ...baseState,
2287
- turns: turns.length > 0 ? turns : baseState.turns,
2372
+ rootPromptMessagesJson,
2373
+ turns,
2288
2374
  });
2289
2375
 
2290
2376
  const modelDetails = create(ModelDetailsSchema, {