@oh-my-pi/pi-ai 12.4.0 → 12.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -8
- package/src/providers/openai-completions.ts +141 -36
- package/src/utils/overflow.ts +6 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "12.
|
|
3
|
+
"version": "12.5.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -57,19 +57,19 @@
|
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
59
|
"@anthropic-ai/sdk": "^0.74.0",
|
|
60
|
-
"@aws-sdk/client-bedrock-runtime": "^3.
|
|
60
|
+
"@aws-sdk/client-bedrock-runtime": "^3.990.0",
|
|
61
61
|
"@bufbuild/protobuf": "^2.11.0",
|
|
62
62
|
"@connectrpc/connect": "^2.1.1",
|
|
63
63
|
"@connectrpc/connect-node": "^2.1.1",
|
|
64
|
-
"@google/genai": "^1.
|
|
65
|
-
"@mistralai/mistralai": "^1.
|
|
66
|
-
"@oh-my-pi/pi-utils": "12.
|
|
64
|
+
"@google/genai": "^1.41.0",
|
|
65
|
+
"@mistralai/mistralai": "^1.14.0",
|
|
66
|
+
"@oh-my-pi/pi-utils": "12.5.0",
|
|
67
67
|
"@sinclair/typebox": "^0.34.48",
|
|
68
|
-
"@smithy/node-http-handler": "^4.4.
|
|
69
|
-
"ajv": "^8.
|
|
68
|
+
"@smithy/node-http-handler": "^4.4.10",
|
|
69
|
+
"ajv": "^8.18.0",
|
|
70
70
|
"ajv-formats": "^3.0.1",
|
|
71
71
|
"chalk": "^5.6.2",
|
|
72
|
-
"openai": "^6.
|
|
72
|
+
"openai": "^6.22.0",
|
|
73
73
|
"partial-json": "^0.1.7",
|
|
74
74
|
"zod-to-json-schema": "^3.25.1"
|
|
75
75
|
},
|
|
@@ -83,6 +83,44 @@ export interface OpenAICompletionsOptions extends StreamOptions {
|
|
|
83
83
|
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
+
// LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
|
|
87
|
+
// If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
|
|
88
|
+
// they will be incorrectly consumed as thinking delimiters, truncating visible output.
|
|
89
|
+
// A streaming parser with arbitrary chunk boundaries cannot reliably detect code block context.
|
|
90
|
+
// This is acceptable because: (1) only enabled for minimax-code providers, (2) MiniMax models
|
|
91
|
+
// use these tags as their actual thinking format, and (3) false positives are rare in practice.
|
|
92
|
+
const MINIMAX_THINK_OPEN_TAGS = ["<think>", "<thinking>"] as const;
|
|
93
|
+
const MINIMAX_THINK_CLOSE_TAGS = ["</think>", "</thinking>"] as const;
|
|
94
|
+
|
|
95
|
+
function findFirstTag(text: string, tags: readonly string[]): { index: number; tag: string } | undefined {
|
|
96
|
+
let earliestIndex = Number.POSITIVE_INFINITY;
|
|
97
|
+
let earliestTag: string | undefined;
|
|
98
|
+
for (const tag of tags) {
|
|
99
|
+
const index = text.indexOf(tag);
|
|
100
|
+
if (index !== -1 && index < earliestIndex) {
|
|
101
|
+
earliestIndex = index;
|
|
102
|
+
earliestTag = tag;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (!earliestTag) return undefined;
|
|
106
|
+
return { index: earliestIndex, tag: earliestTag };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function getTrailingPartialTag(text: string, tags: readonly string[]): string {
|
|
110
|
+
let maxLength = 0;
|
|
111
|
+
for (const tag of tags) {
|
|
112
|
+
const maxCandidateLength = Math.min(tag.length - 1, text.length);
|
|
113
|
+
for (let length = maxCandidateLength; length > 0; length--) {
|
|
114
|
+
if (text.endsWith(tag.slice(0, length))) {
|
|
115
|
+
if (length > maxLength) maxLength = length;
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (maxLength === 0) return "";
|
|
121
|
+
return text.slice(-maxLength);
|
|
122
|
+
}
|
|
123
|
+
|
|
86
124
|
export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
87
125
|
model: Model<"openai-completions">,
|
|
88
126
|
context: Context,
|
|
@@ -152,6 +190,93 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
152
190
|
}
|
|
153
191
|
};
|
|
154
192
|
|
|
193
|
+
const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
|
|
194
|
+
let taggedTextBuffer = "";
|
|
195
|
+
let insideTaggedThinking = false;
|
|
196
|
+
|
|
197
|
+
const appendTextDelta = (delta: string) => {
|
|
198
|
+
if (delta.length === 0) return;
|
|
199
|
+
if (!currentBlock || currentBlock.type !== "text") {
|
|
200
|
+
finishCurrentBlock(currentBlock);
|
|
201
|
+
currentBlock = { type: "text", text: "" };
|
|
202
|
+
output.content.push(currentBlock);
|
|
203
|
+
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
|
204
|
+
}
|
|
205
|
+
if (currentBlock.type === "text") {
|
|
206
|
+
currentBlock.text += delta;
|
|
207
|
+
stream.push({
|
|
208
|
+
type: "text_delta",
|
|
209
|
+
contentIndex: blockIndex(),
|
|
210
|
+
delta,
|
|
211
|
+
partial: output,
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
const appendThinkingDelta = (delta: string, signature?: string) => {
|
|
217
|
+
if (delta.length === 0) return;
|
|
218
|
+
if (
|
|
219
|
+
!currentBlock ||
|
|
220
|
+
currentBlock.type !== "thinking" ||
|
|
221
|
+
(signature !== undefined && currentBlock.thinkingSignature !== signature)
|
|
222
|
+
) {
|
|
223
|
+
finishCurrentBlock(currentBlock);
|
|
224
|
+
currentBlock = {
|
|
225
|
+
type: "thinking",
|
|
226
|
+
thinking: "",
|
|
227
|
+
thinkingSignature: signature,
|
|
228
|
+
};
|
|
229
|
+
output.content.push(currentBlock);
|
|
230
|
+
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
231
|
+
}
|
|
232
|
+
if (currentBlock.type === "thinking") {
|
|
233
|
+
if (signature !== undefined && !currentBlock.thinkingSignature) {
|
|
234
|
+
currentBlock.thinkingSignature = signature;
|
|
235
|
+
}
|
|
236
|
+
currentBlock.thinking += delta;
|
|
237
|
+
stream.push({
|
|
238
|
+
type: "thinking_delta",
|
|
239
|
+
contentIndex: blockIndex(),
|
|
240
|
+
delta,
|
|
241
|
+
partial: output,
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
const flushTaggedTextBuffer = () => {
|
|
247
|
+
while (taggedTextBuffer.length > 0) {
|
|
248
|
+
if (insideTaggedThinking) {
|
|
249
|
+
const closingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
|
|
250
|
+
if (closingTag) {
|
|
251
|
+
appendThinkingDelta(taggedTextBuffer.slice(0, closingTag.index));
|
|
252
|
+
taggedTextBuffer = taggedTextBuffer.slice(closingTag.index + closingTag.tag.length);
|
|
253
|
+
insideTaggedThinking = false;
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
|
|
258
|
+
const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
|
|
259
|
+
appendThinkingDelta(taggedTextBuffer.slice(0, flushLength));
|
|
260
|
+
taggedTextBuffer = trailingPartialTag;
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const openingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
|
|
265
|
+
if (openingTag) {
|
|
266
|
+
appendTextDelta(taggedTextBuffer.slice(0, openingTag.index));
|
|
267
|
+
taggedTextBuffer = taggedTextBuffer.slice(openingTag.index + openingTag.tag.length);
|
|
268
|
+
insideTaggedThinking = true;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
|
|
273
|
+
const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
|
|
274
|
+
appendTextDelta(taggedTextBuffer.slice(0, flushLength));
|
|
275
|
+
taggedTextBuffer = trailingPartialTag;
|
|
276
|
+
break;
|
|
277
|
+
}
|
|
278
|
+
};
|
|
279
|
+
|
|
155
280
|
for await (const chunk of openaiStream) {
|
|
156
281
|
if (chunk.usage) {
|
|
157
282
|
// Check for cached_tokens at root level (Kimi) or in prompt_tokens_details (OpenAI)
|
|
@@ -196,21 +321,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
196
321
|
choice.delta.content.length > 0
|
|
197
322
|
) {
|
|
198
323
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
199
|
-
if (
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
if (currentBlock.type === "text") {
|
|
207
|
-
currentBlock.text += choice.delta.content;
|
|
208
|
-
stream.push({
|
|
209
|
-
type: "text_delta",
|
|
210
|
-
contentIndex: blockIndex(),
|
|
211
|
-
delta: choice.delta.content,
|
|
212
|
-
partial: output,
|
|
213
|
-
});
|
|
324
|
+
if (parseMiniMaxThinkTags) {
|
|
325
|
+
taggedTextBuffer += choice.delta.content;
|
|
326
|
+
flushTaggedTextBuffer();
|
|
327
|
+
} else {
|
|
328
|
+
appendTextDelta(choice.delta.content);
|
|
214
329
|
}
|
|
215
330
|
}
|
|
216
331
|
|
|
@@ -234,27 +349,8 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
234
349
|
}
|
|
235
350
|
|
|
236
351
|
if (foundReasoningField) {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
currentBlock = {
|
|
240
|
-
type: "thinking",
|
|
241
|
-
thinking: "",
|
|
242
|
-
thinkingSignature: foundReasoningField,
|
|
243
|
-
};
|
|
244
|
-
output.content.push(currentBlock);
|
|
245
|
-
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
if (currentBlock.type === "thinking") {
|
|
249
|
-
const delta = (choice.delta as any)[foundReasoningField];
|
|
250
|
-
currentBlock.thinking += delta;
|
|
251
|
-
stream.push({
|
|
252
|
-
type: "thinking_delta",
|
|
253
|
-
contentIndex: blockIndex(),
|
|
254
|
-
delta,
|
|
255
|
-
partial: output,
|
|
256
|
-
});
|
|
257
|
-
}
|
|
352
|
+
const delta = (choice.delta as any)[foundReasoningField];
|
|
353
|
+
appendThinkingDelta(delta, foundReasoningField);
|
|
258
354
|
}
|
|
259
355
|
|
|
260
356
|
if (choice?.delta?.tool_calls) {
|
|
@@ -311,6 +407,15 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
311
407
|
}
|
|
312
408
|
}
|
|
313
409
|
|
|
410
|
+
if (parseMiniMaxThinkTags && taggedTextBuffer.length > 0) {
|
|
411
|
+
if (insideTaggedThinking) {
|
|
412
|
+
appendThinkingDelta(taggedTextBuffer);
|
|
413
|
+
} else {
|
|
414
|
+
appendTextDelta(taggedTextBuffer);
|
|
415
|
+
}
|
|
416
|
+
taggedTextBuffer = "";
|
|
417
|
+
}
|
|
418
|
+
|
|
314
419
|
finishCurrentBlock(currentBlock);
|
|
315
420
|
|
|
316
421
|
if (options?.signal?.aborted) {
|
package/src/utils/overflow.ts
CHANGED
|
@@ -35,6 +35,10 @@ const OVERFLOW_PATTERNS = [
|
|
|
35
35
|
/maximum context length is \d+ tokens/i, // OpenRouter (all backends)
|
|
36
36
|
/exceeds the limit of \d+/i, // GitHub Copilot
|
|
37
37
|
/exceeds the available context size/i, // llama.cpp server
|
|
38
|
+
/requested tokens?.*exceed.*context (window|length|size)/i, // llama.cpp / OpenAI-compatible local servers
|
|
39
|
+
/context (window|length|size).*(exceeded|overflow|too small)/i, // Generic local server variants
|
|
40
|
+
/(prompt|input).*(too long|too large).*(context|n_ctx)/i, // llama.cpp phrasing variants
|
|
41
|
+
/requested tokens?.*(exceeds?|greater than).*(n_ctx|context)/i, // llama.cpp n_ctx variants
|
|
38
42
|
/greater than the context length/i, // LM Studio
|
|
39
43
|
/context window exceeds limit/i, // MiniMax
|
|
40
44
|
/exceeded model token limit/i, // Kimi For Coding
|
|
@@ -105,8 +109,8 @@ export function isContextOverflow(message: AssistantMessage, contextWindow?: num
|
|
|
105
109
|
}
|
|
106
110
|
}
|
|
107
111
|
|
|
108
|
-
// Case 2:
|
|
109
|
-
if (contextWindow
|
|
112
|
+
// Case 2: Usage-based overflow (silent or provider-specific)
|
|
113
|
+
if (contextWindow) {
|
|
110
114
|
const inputTokens = message.usage.input + message.usage.cacheRead;
|
|
111
115
|
if (inputTokens > contextWindow) {
|
|
112
116
|
return true;
|