npm - @oh-my-pi/pi-ai - Versions diffs - 8.13.0 → 9.1.0 - Mend

@oh-my-pi/pi-ai 8.13.0 → 9.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +2 -2
package/src/providers/anthropic.ts +6 -5
package/src/providers/google-gemini-cli.ts +1 -1
package/src/providers/openai-completions.ts +1 -0
package/src/providers/openai-responses.ts +17 -0
package/src/providers/transform-messages.ts +21 -1
package/src/utils/overflow.ts +13 -7

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-ai",
-	"version": "8.13.0",
+	"version": "9.1.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./src/index.ts",
@@ -63,7 +63,7 @@
 		"@connectrpc/connect-node": "^2.1.1",
 		"@google/genai": "^1.38.0",
 		"@mistralai/mistralai": "^1.13.0",
-		"@oh-my-pi/pi-utils": "8.13.0",
+		"@oh-my-pi/pi-utils": "9.1.0",
 		"@sinclair/typebox": "^0.34.48",
 		"@smithy/node-http-handler": "^4.4.8",
 		"ajv": "^8.17.1",

package/src/providers/anthropic.ts CHANGED Viewed

@@ -914,7 +914,7 @@ function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.
 	});
 }
-function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
+function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {
 	switch (reason) {
 		case "end_turn":
 			return "stop";
@@ -928,9 +928,10 @@ function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
 			return "stop";
 		case "stop_sequence":
 			return "stop"; // We don't supply stop sequences, so this should never happen
-		default: {
-			const _exhaustive: never = reason;
-			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
-		}
+		case "sensitive": // Content flagged by safety filters (not yet in SDK types)
+			return "error";
+		default:
+			// Handle unknown stop reasons gracefully (API may add new values)
+			throw new Error(`Unhandled stop reason: ${reason}`);
 	}
 }

package/src/providers/google-gemini-cli.ts CHANGED Viewed

@@ -70,7 +70,7 @@ const GEMINI_CLI_HEADERS = {
 // Headers for Antigravity (sandbox endpoint) - requires specific User-Agent
 const ANTIGRAVITY_HEADERS = {
-	"User-Agent": "antigravity/1.11.5 darwin/arm64",
+	"User-Agent": "antigravity/1.15.8 darwin/arm64",
 	"X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
 	"Client-Metadata": JSON.stringify({
 		ideType: "IDE_UNSPECIFIED",

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -789,6 +789,7 @@ function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat
 		provider === "mistral" ||
 		baseUrl.includes("mistral.ai") ||
 		baseUrl.includes("chutes.ai") ||
+		baseUrl.includes("deepseek.com") ||
 		isZai ||
 		provider === "opencode" ||
 		baseUrl.includes("opencode.ai");

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -33,6 +33,22 @@ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
 import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
 import { transformMessages } from "./transform-messages";
+/**
+ * Get prompt cache retention based on PI_CACHE_RETENTION env var.
+ * Only applies to direct OpenAI API calls (api.openai.com).
+ * Returns '24h' for long retention, undefined for default (in-memory).
+ */
+function getPromptCacheRetention(baseUrl: string): "24h" | undefined {
+	if (
+		typeof process !== "undefined" &&
+		process.env.PI_CACHE_RETENTION === "long" &&
+		baseUrl.includes("api.openai.com")
+	) {
+		return "24h";
+	}
+	return undefined;
+}
 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -395,6 +411,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 		input: messages,
 		stream: true,
 		prompt_cache_key: options?.sessionId,
+		prompt_cache_retention: getPromptCacheRetention(model.baseUrl),
 	};
 	if (options?.maxTokens) {

package/src/providers/transform-messages.ts CHANGED Viewed

@@ -6,13 +6,33 @@ import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage
  * Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars).
  */
 function normalizeToolCallId(id: string): string {
+	// Handle pipe-separated IDs from OpenAI Responses API
+	// Format: {call_id}|{item_id} where {item_id} can be 400+ chars with special chars (+, /, =)
+	// Extract just the call_id part and normalize it
+	if (id.includes("|")) {
+		const [callId] = id.split("|");
+		// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
+		return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
+	}
 	return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
 }
 function normalizeResponsesToolCallId(id: string): string {
 	const [callId, itemId] = id.split("|");
 	if (callId && itemId) {
-		return id;
+		// Sanitize invalid characters and ensure proper format
+		const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		// OpenAI Responses API requires item id to start with "fc"
+		if (!sanitizedItemId.startsWith("fc")) {
+			sanitizedItemId = `fc_${sanitizedItemId}`;
+		}
+		// Truncate to 64 chars and strip trailing underscores (OpenAI Codex rejects them)
+		let normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
+		let normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
+		normalizedCallId = normalizedCallId.replace(/_+$/, "");
+		normalizedItemId = normalizedItemId.replace(/_+$/, "");
+		return `${normalizedCallId}|${normalizedItemId}`;
 	}
 	const hash = Bun.hash.xxHash64(id).toString(36);
 	return `call_${hash}|item_${hash}`;

package/src/utils/overflow.ts CHANGED Viewed

@@ -17,13 +17,16 @@ import type { AssistantMessage } from "../types";
  * - llama.cpp: "the request exceeds the available context size, try increasing it"
  * - LM Studio: "tokens to keep from the initial prompt is greater than the context length"
  * - GitHub Copilot: "prompt token count of X exceeds the limit of Y"
- * - Cerebras: Returns "400 status code (no body)" - handled separately below
- * - Mistral: Returns "400 status code (no body)" - handled separately below
+ * - MiniMax: "invalid params, context window exceeds limit"
+ * - Kimi For Coding: "Your request exceeded model token limit: X (requested: Y)"
+ * - Cerebras: Returns "400/413 status code (no body)" - handled separately below
+ * - Mistral: Returns "400/413 status code (no body)" - handled separately below
  * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
  * - Ollama: Silently truncates input - not detectable via error message
  */
 const OVERFLOW_PATTERNS = [
 	/prompt is too long/i, // Anthropic
+	/input is too long for requested model/i, // Amazon Bedrock
 	/exceeds the context window/i, // OpenAI (Completions & Responses API)
 	/input token count.*exceeds the maximum/i, // Google (Gemini)
 	/maximum prompt length is \d+/i, // xAI (Grok)
@@ -32,6 +35,8 @@ const OVERFLOW_PATTERNS = [
 	/exceeds the limit of \d+/i, // GitHub Copilot
 	/exceeds the available context size/i, // llama.cpp server
 	/greater than the context length/i, // LM Studio
+	/context window exceeds limit/i, // MiniMax
+	/exceeded model token limit/i, // Kimi For Coding
 	/context[_ ]length[_ ]exceeded/i, // Generic fallback
 	/too many tokens/i, // Generic fallback
 	/token limit exceeded/i, // Generic fallback
@@ -54,11 +59,12 @@ const OVERFLOW_PATTERNS = [
  * - Google Gemini: "input token count exceeds the maximum"
  * - xAI (Grok): "maximum prompt length is X but request contains Y"
  * - Groq: "reduce the length of the messages"
- * - Cerebras: 400/413/429 status code (no body)
- * - Mistral: 400/413/429 status code (no body)
+ * - Cerebras: 400/413 status code (no body)
+ * - Mistral: 400/413 status code (no body)
  * - OpenRouter (all backends): "maximum context length is X tokens"
  * - llama.cpp: "exceeds the available context size"
  * - LM Studio: "greater than the context length"
+ * - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
  *
  * **Unreliable detection:**
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
@@ -89,9 +95,9 @@ export function isContextOverflow(message: AssistantMessage, contextWindow?: num
 			return true;
 		}
-		// Cerebras and Mistral return 400/413/429 with no body - check for status code pattern
-		// 429 can indicate token-based rate limiting which correlates with context overflow
-		if (/^4(00|13|29)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) {
+		// Cerebras and Mistral return 400/413 with no body for context overflow
+		// Note: 429 is rate limiting (requests/tokens per time), NOT context overflow
+		if (/^4(00|13)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) {
 			return true;
 		}
 	}