@oh-my-pi/pi-ai 8.13.0 → 9.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/providers/anthropic.ts +6 -5
- package/src/providers/google-gemini-cli.ts +1 -1
- package/src/providers/openai-completions.ts +1 -0
- package/src/providers/openai-responses.ts +17 -0
- package/src/providers/transform-messages.ts +21 -1
- package/src/utils/overflow.ts +13 -7
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "9.1.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"@connectrpc/connect-node": "^2.1.1",
|
|
64
64
|
"@google/genai": "^1.38.0",
|
|
65
65
|
"@mistralai/mistralai": "^1.13.0",
|
|
66
|
-
"@oh-my-pi/pi-utils": "
|
|
66
|
+
"@oh-my-pi/pi-utils": "9.1.0",
|
|
67
67
|
"@sinclair/typebox": "^0.34.48",
|
|
68
68
|
"@smithy/node-http-handler": "^4.4.8",
|
|
69
69
|
"ajv": "^8.17.1",
|
|
@@ -914,7 +914,7 @@ function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.
|
|
|
914
914
|
});
|
|
915
915
|
}
|
|
916
916
|
|
|
917
|
-
function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
|
|
917
|
+
function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {
|
|
918
918
|
switch (reason) {
|
|
919
919
|
case "end_turn":
|
|
920
920
|
return "stop";
|
|
@@ -928,9 +928,10 @@ function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
|
|
|
928
928
|
return "stop";
|
|
929
929
|
case "stop_sequence":
|
|
930
930
|
return "stop"; // We don't supply stop sequences, so this should never happen
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
931
|
+
case "sensitive": // Content flagged by safety filters (not yet in SDK types)
|
|
932
|
+
return "error";
|
|
933
|
+
default:
|
|
934
|
+
// Handle unknown stop reasons gracefully (API may add new values)
|
|
935
|
+
throw new Error(`Unhandled stop reason: ${reason}`);
|
|
935
936
|
}
|
|
936
937
|
}
|
|
@@ -70,7 +70,7 @@ const GEMINI_CLI_HEADERS = {
|
|
|
70
70
|
|
|
71
71
|
// Headers for Antigravity (sandbox endpoint) - requires specific User-Agent
|
|
72
72
|
const ANTIGRAVITY_HEADERS = {
|
|
73
|
-
"User-Agent": "antigravity/1.
|
|
73
|
+
"User-Agent": "antigravity/1.15.8 darwin/arm64",
|
|
74
74
|
"X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
|
75
75
|
"Client-Metadata": JSON.stringify({
|
|
76
76
|
ideType: "IDE_UNSPECIFIED",
|
|
@@ -789,6 +789,7 @@ function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat
|
|
|
789
789
|
provider === "mistral" ||
|
|
790
790
|
baseUrl.includes("mistral.ai") ||
|
|
791
791
|
baseUrl.includes("chutes.ai") ||
|
|
792
|
+
baseUrl.includes("deepseek.com") ||
|
|
792
793
|
isZai ||
|
|
793
794
|
provider === "opencode" ||
|
|
794
795
|
baseUrl.includes("opencode.ai");
|
|
@@ -33,6 +33,22 @@ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
|
33
33
|
import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
34
34
|
import { transformMessages } from "./transform-messages";
|
|
35
35
|
|
|
36
|
+
/**
|
|
37
|
+
* Get prompt cache retention based on PI_CACHE_RETENTION env var.
|
|
38
|
+
* Only applies to direct OpenAI API calls (api.openai.com).
|
|
39
|
+
* Returns '24h' for long retention, undefined for default (in-memory).
|
|
40
|
+
*/
|
|
41
|
+
function getPromptCacheRetention(baseUrl: string): "24h" | undefined {
|
|
42
|
+
if (
|
|
43
|
+
typeof process !== "undefined" &&
|
|
44
|
+
process.env.PI_CACHE_RETENTION === "long" &&
|
|
45
|
+
baseUrl.includes("api.openai.com")
|
|
46
|
+
) {
|
|
47
|
+
return "24h";
|
|
48
|
+
}
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
|
|
36
52
|
// OpenAI Responses-specific options
|
|
37
53
|
export interface OpenAIResponsesOptions extends StreamOptions {
|
|
38
54
|
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
@@ -395,6 +411,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
|
|
|
395
411
|
input: messages,
|
|
396
412
|
stream: true,
|
|
397
413
|
prompt_cache_key: options?.sessionId,
|
|
414
|
+
prompt_cache_retention: getPromptCacheRetention(model.baseUrl),
|
|
398
415
|
};
|
|
399
416
|
|
|
400
417
|
if (options?.maxTokens) {
|
|
@@ -6,13 +6,33 @@ import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage
|
|
|
6
6
|
* Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars).
|
|
7
7
|
*/
|
|
8
8
|
function normalizeToolCallId(id: string): string {
|
|
9
|
+
// Handle pipe-separated IDs from OpenAI Responses API
|
|
10
|
+
// Format: {call_id}|{item_id} where {item_id} can be 400+ chars with special chars (+, /, =)
|
|
11
|
+
// Extract just the call_id part and normalize it
|
|
12
|
+
if (id.includes("|")) {
|
|
13
|
+
const [callId] = id.split("|");
|
|
14
|
+
// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
|
|
15
|
+
return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
|
|
16
|
+
}
|
|
9
17
|
return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
|
|
10
18
|
}
|
|
11
19
|
|
|
12
20
|
function normalizeResponsesToolCallId(id: string): string {
|
|
13
21
|
const [callId, itemId] = id.split("|");
|
|
14
22
|
if (callId && itemId) {
|
|
15
|
-
|
|
23
|
+
// Sanitize invalid characters and ensure proper format
|
|
24
|
+
const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
25
|
+
let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
26
|
+
// OpenAI Responses API requires item id to start with "fc"
|
|
27
|
+
if (!sanitizedItemId.startsWith("fc")) {
|
|
28
|
+
sanitizedItemId = `fc_${sanitizedItemId}`;
|
|
29
|
+
}
|
|
30
|
+
// Truncate to 64 chars and strip trailing underscores (OpenAI Codex rejects them)
|
|
31
|
+
let normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
|
|
32
|
+
let normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
|
|
33
|
+
normalizedCallId = normalizedCallId.replace(/_+$/, "");
|
|
34
|
+
normalizedItemId = normalizedItemId.replace(/_+$/, "");
|
|
35
|
+
return `${normalizedCallId}|${normalizedItemId}`;
|
|
16
36
|
}
|
|
17
37
|
const hash = Bun.hash.xxHash64(id).toString(36);
|
|
18
38
|
return `call_${hash}|item_${hash}`;
|
package/src/utils/overflow.ts
CHANGED
|
@@ -17,13 +17,16 @@ import type { AssistantMessage } from "../types";
|
|
|
17
17
|
* - llama.cpp: "the request exceeds the available context size, try increasing it"
|
|
18
18
|
* - LM Studio: "tokens to keep from the initial prompt is greater than the context length"
|
|
19
19
|
* - GitHub Copilot: "prompt token count of X exceeds the limit of Y"
|
|
20
|
-
* -
|
|
21
|
-
* -
|
|
20
|
+
* - MiniMax: "invalid params, context window exceeds limit"
|
|
21
|
+
* - Kimi For Coding: "Your request exceeded model token limit: X (requested: Y)"
|
|
22
|
+
* - Cerebras: Returns "400/413 status code (no body)" - handled separately below
|
|
23
|
+
* - Mistral: Returns "400/413 status code (no body)" - handled separately below
|
|
22
24
|
* - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
|
|
23
25
|
* - Ollama: Silently truncates input - not detectable via error message
|
|
24
26
|
*/
|
|
25
27
|
const OVERFLOW_PATTERNS = [
|
|
26
28
|
/prompt is too long/i, // Anthropic
|
|
29
|
+
/input is too long for requested model/i, // Amazon Bedrock
|
|
27
30
|
/exceeds the context window/i, // OpenAI (Completions & Responses API)
|
|
28
31
|
/input token count.*exceeds the maximum/i, // Google (Gemini)
|
|
29
32
|
/maximum prompt length is \d+/i, // xAI (Grok)
|
|
@@ -32,6 +35,8 @@ const OVERFLOW_PATTERNS = [
|
|
|
32
35
|
/exceeds the limit of \d+/i, // GitHub Copilot
|
|
33
36
|
/exceeds the available context size/i, // llama.cpp server
|
|
34
37
|
/greater than the context length/i, // LM Studio
|
|
38
|
+
/context window exceeds limit/i, // MiniMax
|
|
39
|
+
/exceeded model token limit/i, // Kimi For Coding
|
|
35
40
|
/context[_ ]length[_ ]exceeded/i, // Generic fallback
|
|
36
41
|
/too many tokens/i, // Generic fallback
|
|
37
42
|
/token limit exceeded/i, // Generic fallback
|
|
@@ -54,11 +59,12 @@ const OVERFLOW_PATTERNS = [
|
|
|
54
59
|
* - Google Gemini: "input token count exceeds the maximum"
|
|
55
60
|
* - xAI (Grok): "maximum prompt length is X but request contains Y"
|
|
56
61
|
* - Groq: "reduce the length of the messages"
|
|
57
|
-
* - Cerebras: 400/413
|
|
58
|
-
* - Mistral: 400/413
|
|
62
|
+
* - Cerebras: 400/413 status code (no body)
|
|
63
|
+
* - Mistral: 400/413 status code (no body)
|
|
59
64
|
* - OpenRouter (all backends): "maximum context length is X tokens"
|
|
60
65
|
* - llama.cpp: "exceeds the available context size"
|
|
61
66
|
* - LM Studio: "greater than the context length"
|
|
67
|
+
* - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
|
|
62
68
|
*
|
|
63
69
|
* **Unreliable detection:**
|
|
64
70
|
* - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
|
|
@@ -89,9 +95,9 @@ export function isContextOverflow(message: AssistantMessage, contextWindow?: num
|
|
|
89
95
|
return true;
|
|
90
96
|
}
|
|
91
97
|
|
|
92
|
-
// Cerebras and Mistral return 400/413
|
|
93
|
-
// 429
|
|
94
|
-
if (/^4(00|13
|
|
98
|
+
// Cerebras and Mistral return 400/413 with no body for context overflow
|
|
99
|
+
// Note: 429 is rate limiting (requests/tokens per time), NOT context overflow
|
|
100
|
+
if (/^4(00|13)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) {
|
|
95
101
|
return true;
|
|
96
102
|
}
|
|
97
103
|
}
|