@mariozechner/pi-ai 0.67.68 → 0.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/models.generated.d.ts +64 -35
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +77 -56
- package/dist/models.generated.js.map +1 -1
- package/dist/providers/amazon-bedrock.d.ts.map +1 -1
- package/dist/providers/amazon-bedrock.js +9 -1
- package/dist/providers/amazon-bedrock.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +1 -2
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/google-shared.d.ts.map +1 -1
- package/dist/providers/google-shared.js +30 -4
- package/dist/providers/google-shared.js.map +1 -1
- package/dist/providers/openai-completions.d.ts +5 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +149 -60
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses-shared.d.ts.map +1 -1
- package/dist/providers/openai-responses-shared.js +2 -5
- package/dist/providers/openai-responses-shared.js.map +1 -1
- package/dist/providers/simple-options.d.ts.map +1 -1
- package/dist/providers/simple-options.js +1 -1
- package/dist/providers/simple-options.js.map +1 -1
- package/dist/providers/transform-messages.d.ts.map +1 -1
- package/dist/providers/transform-messages.js +41 -2
- package/dist/providers/transform-messages.js.map +1 -1
- package/dist/types.d.ts +4 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/oauth/anthropic.d.ts.map +1 -1
- package/dist/utils/oauth/anthropic.js +1 -1
- package/dist/utils/oauth/anthropic.js.map +1 -1
- package/dist/utils/oauth/google-antigravity.d.ts.map +1 -1
- package/dist/utils/oauth/google-antigravity.js +2 -1
- package/dist/utils/oauth/google-antigravity.js.map +1 -1
- package/dist/utils/oauth/google-gemini-cli.d.ts.map +1 -1
- package/dist/utils/oauth/google-gemini-cli.js +2 -1
- package/dist/utils/oauth/google-gemini-cli.js.map +1 -1
- package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
- package/dist/utils/oauth/openai-codex.js +3 -2
- package/dist/utils/oauth/openai-codex.js.map +1 -1
- package/package.json +1 -1
|
@@ -26,6 +26,27 @@ function hasToolHistory(messages) {
|
|
|
26
26
|
}
|
|
27
27
|
return false;
|
|
28
28
|
}
|
|
29
|
+
function isTextContentBlock(block) {
|
|
30
|
+
return block.type === "text";
|
|
31
|
+
}
|
|
32
|
+
function isThinkingContentBlock(block) {
|
|
33
|
+
return block.type === "thinking";
|
|
34
|
+
}
|
|
35
|
+
function isToolCallBlock(block) {
|
|
36
|
+
return block.type === "toolCall";
|
|
37
|
+
}
|
|
38
|
+
function isImageContentBlock(block) {
|
|
39
|
+
return block.type === "image";
|
|
40
|
+
}
|
|
41
|
+
function resolveCacheRetention(cacheRetention) {
|
|
42
|
+
if (cacheRetention) {
|
|
43
|
+
return cacheRetention;
|
|
44
|
+
}
|
|
45
|
+
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
|
|
46
|
+
return "long";
|
|
47
|
+
}
|
|
48
|
+
return "short";
|
|
49
|
+
}
|
|
29
50
|
export const streamOpenAICompletions = (model, context, options) => {
|
|
30
51
|
const stream = new AssistantMessageEventStream();
|
|
31
52
|
(async () => {
|
|
@@ -48,8 +69,11 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
48
69
|
};
|
|
49
70
|
try {
|
|
50
71
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
51
|
-
const
|
|
52
|
-
|
|
72
|
+
const compat = getCompat(model);
|
|
73
|
+
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
|
74
|
+
const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId;
|
|
75
|
+
const client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);
|
|
76
|
+
let params = buildParams(model, context, options, compat, cacheRetention);
|
|
53
77
|
const nextParams = await options?.onPayload?.(params, model);
|
|
54
78
|
if (nextParams !== undefined) {
|
|
55
79
|
params = nextParams;
|
|
@@ -270,7 +294,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
|
|
|
270
294
|
toolChoice,
|
|
271
295
|
});
|
|
272
296
|
};
|
|
273
|
-
function createClient(model, context, apiKey, optionsHeaders) {
|
|
297
|
+
function createClient(model, context, apiKey, optionsHeaders, sessionId, compat = getCompat(model)) {
|
|
274
298
|
if (!apiKey) {
|
|
275
299
|
if (!process.env.OPENAI_API_KEY) {
|
|
276
300
|
throw new Error("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.");
|
|
@@ -286,6 +310,11 @@ function createClient(model, context, apiKey, optionsHeaders) {
|
|
|
286
310
|
});
|
|
287
311
|
Object.assign(headers, copilotHeaders);
|
|
288
312
|
}
|
|
313
|
+
if (sessionId && compat.sendSessionAffinityHeaders) {
|
|
314
|
+
headers.session_id = sessionId;
|
|
315
|
+
headers["x-client-request-id"] = sessionId;
|
|
316
|
+
headers["x-session-affinity"] = sessionId;
|
|
317
|
+
}
|
|
289
318
|
// Merge options headers last so they can override defaults
|
|
290
319
|
if (optionsHeaders) {
|
|
291
320
|
Object.assign(headers, optionsHeaders);
|
|
@@ -297,14 +326,15 @@ function createClient(model, context, apiKey, optionsHeaders) {
|
|
|
297
326
|
defaultHeaders: headers,
|
|
298
327
|
});
|
|
299
328
|
}
|
|
300
|
-
function buildParams(model, context, options) {
|
|
301
|
-
const compat = getCompat(model);
|
|
329
|
+
function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
|
|
302
330
|
const messages = convertMessages(model, context, compat);
|
|
303
|
-
|
|
331
|
+
const cacheControl = getCompatCacheControl(model, compat, cacheRetention);
|
|
304
332
|
const params = {
|
|
305
333
|
model: model.id,
|
|
306
334
|
messages,
|
|
307
335
|
stream: true,
|
|
336
|
+
prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none" ? options?.sessionId : undefined,
|
|
337
|
+
prompt_cache_retention: model.baseUrl.includes("api.openai.com") && cacheRetention === "long" ? "24h" : undefined,
|
|
308
338
|
};
|
|
309
339
|
if (compat.supportsUsageInStreaming !== false) {
|
|
310
340
|
params.stream_options = { include_usage: true };
|
|
@@ -333,6 +363,9 @@ function buildParams(model, context, options) {
|
|
|
333
363
|
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
|
334
364
|
params.tools = [];
|
|
335
365
|
}
|
|
366
|
+
if (cacheControl) {
|
|
367
|
+
applyAnthropicCacheControl(messages, params.tools, cacheControl);
|
|
368
|
+
}
|
|
336
369
|
if (options?.toolChoice) {
|
|
337
370
|
params.tool_choice = options.toolChoice;
|
|
338
371
|
}
|
|
@@ -385,34 +418,80 @@ function buildParams(model, context, options) {
|
|
|
385
418
|
function mapReasoningEffort(effort, reasoningEffortMap) {
|
|
386
419
|
return reasoningEffortMap[effort] ?? effort;
|
|
387
420
|
}
|
|
388
|
-
function
|
|
389
|
-
if (
|
|
390
|
-
return;
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
421
|
+
function getCompatCacheControl(model, compat, cacheRetention) {
|
|
422
|
+
if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
|
|
423
|
+
return undefined;
|
|
424
|
+
}
|
|
425
|
+
const ttl = cacheRetention === "long" && model.baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
|
|
426
|
+
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
|
|
427
|
+
}
|
|
428
|
+
function applyAnthropicCacheControl(messages, tools, cacheControl) {
|
|
429
|
+
addCacheControlToSystemPrompt(messages, cacheControl);
|
|
430
|
+
addCacheControlToLastTool(tools, cacheControl);
|
|
431
|
+
addCacheControlToLastConversationMessage(messages, cacheControl);
|
|
432
|
+
}
|
|
433
|
+
function addCacheControlToSystemPrompt(messages, cacheControl) {
|
|
434
|
+
for (const message of messages) {
|
|
435
|
+
if (message.role === "system" || message.role === "developer") {
|
|
436
|
+
addCacheControlToInstructionMessage(message, cacheControl);
|
|
402
437
|
return;
|
|
403
438
|
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
function addCacheControlToLastConversationMessage(messages, cacheControl) {
|
|
442
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
443
|
+
const message = messages[i];
|
|
444
|
+
if (message.role === "user" || message.role === "assistant") {
|
|
445
|
+
if (addCacheControlToMessage(message, cacheControl)) {
|
|
411
446
|
return;
|
|
412
447
|
}
|
|
413
448
|
}
|
|
414
449
|
}
|
|
415
450
|
}
|
|
451
|
+
function addCacheControlToLastTool(tools, cacheControl) {
|
|
452
|
+
if (!tools || tools.length === 0) {
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
const lastTool = tools[tools.length - 1];
|
|
456
|
+
lastTool.cache_control = cacheControl;
|
|
457
|
+
}
|
|
458
|
+
function addCacheControlToInstructionMessage(message, cacheControl) {
|
|
459
|
+
return addCacheControlToTextContent(message, cacheControl);
|
|
460
|
+
}
|
|
461
|
+
function addCacheControlToMessage(message, cacheControl) {
|
|
462
|
+
if (message.role === "user" || message.role === "assistant") {
|
|
463
|
+
return addCacheControlToTextContent(message, cacheControl);
|
|
464
|
+
}
|
|
465
|
+
return false;
|
|
466
|
+
}
|
|
467
|
+
function addCacheControlToTextContent(message, cacheControl) {
|
|
468
|
+
const content = message.content;
|
|
469
|
+
if (typeof content === "string") {
|
|
470
|
+
if (content.length === 0) {
|
|
471
|
+
return false;
|
|
472
|
+
}
|
|
473
|
+
message.content = [
|
|
474
|
+
{
|
|
475
|
+
type: "text",
|
|
476
|
+
text: content,
|
|
477
|
+
cache_control: cacheControl,
|
|
478
|
+
},
|
|
479
|
+
];
|
|
480
|
+
return true;
|
|
481
|
+
}
|
|
482
|
+
if (!Array.isArray(content)) {
|
|
483
|
+
return false;
|
|
484
|
+
}
|
|
485
|
+
for (let i = content.length - 1; i >= 0; i--) {
|
|
486
|
+
const part = content[i];
|
|
487
|
+
if (part?.type === "text") {
|
|
488
|
+
const textPart = part;
|
|
489
|
+
textPart.cache_control = cacheControl;
|
|
490
|
+
return true;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return false;
|
|
494
|
+
}
|
|
416
495
|
export function convertMessages(model, context, compat) {
|
|
417
496
|
const params = [];
|
|
418
497
|
const normalizeToolCallId = (id) => {
|
|
@@ -470,14 +549,11 @@ export function convertMessages(model, context, compat) {
|
|
|
470
549
|
};
|
|
471
550
|
}
|
|
472
551
|
});
|
|
473
|
-
|
|
474
|
-
? content.filter((c) => c.type !== "image_url")
|
|
475
|
-
: content;
|
|
476
|
-
if (filteredContent.length === 0)
|
|
552
|
+
if (content.length === 0)
|
|
477
553
|
continue;
|
|
478
554
|
params.push({
|
|
479
555
|
role: "user",
|
|
480
|
-
content
|
|
556
|
+
content,
|
|
481
557
|
});
|
|
482
558
|
}
|
|
483
559
|
}
|
|
@@ -487,42 +563,50 @@ export function convertMessages(model, context, compat) {
|
|
|
487
563
|
role: "assistant",
|
|
488
564
|
content: compat.requiresAssistantAfterToolResult ? "" : null,
|
|
489
565
|
};
|
|
490
|
-
const
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
// Handle thinking blocks
|
|
502
|
-
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking");
|
|
503
|
-
// Filter out empty thinking blocks to avoid API validation errors
|
|
504
|
-
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
|
|
566
|
+
const assistantTextParts = msg.content
|
|
567
|
+
.filter(isTextContentBlock)
|
|
568
|
+
.filter((block) => block.text.trim().length > 0)
|
|
569
|
+
.map((block) => ({
|
|
570
|
+
type: "text",
|
|
571
|
+
text: sanitizeSurrogates(block.text),
|
|
572
|
+
}));
|
|
573
|
+
const assistantText = assistantTextParts.map((part) => part.text).join("");
|
|
574
|
+
const nonEmptyThinkingBlocks = msg.content
|
|
575
|
+
.filter(isThinkingContentBlock)
|
|
576
|
+
.filter((block) => block.thinking.trim().length > 0);
|
|
505
577
|
if (nonEmptyThinkingBlocks.length > 0) {
|
|
506
578
|
if (compat.requiresThinkingAsText) {
|
|
507
579
|
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
508
|
-
const thinkingText = nonEmptyThinkingBlocks
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
}
|
|
513
|
-
else {
|
|
514
|
-
assistantMsg.content = [{ type: "text", text: thinkingText }];
|
|
515
|
-
}
|
|
580
|
+
const thinkingText = nonEmptyThinkingBlocks
|
|
581
|
+
.map((block) => sanitizeSurrogates(block.thinking))
|
|
582
|
+
.join("\n\n");
|
|
583
|
+
assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
|
|
516
584
|
}
|
|
517
585
|
else {
|
|
586
|
+
// Always send assistant content as a plain string (OpenAI Chat Completions
|
|
587
|
+
// API standard format). Sending as an array of {type:"text", text:"..."}
|
|
588
|
+
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
|
589
|
+
// NVIDIA NIM) to mirror the content-block structure literally in their
|
|
590
|
+
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
|
591
|
+
if (assistantText.length > 0) {
|
|
592
|
+
assistantMsg.content = assistantText;
|
|
593
|
+
}
|
|
518
594
|
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
|
|
519
595
|
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
|
520
596
|
if (signature && signature.length > 0) {
|
|
521
|
-
assistantMsg[signature] = nonEmptyThinkingBlocks.map((
|
|
597
|
+
assistantMsg[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
|
|
522
598
|
}
|
|
523
599
|
}
|
|
524
600
|
}
|
|
525
|
-
|
|
601
|
+
else if (assistantText.length > 0) {
|
|
602
|
+
// Always send assistant content as a plain string (OpenAI Chat Completions
|
|
603
|
+
// API standard format). Sending as an array of {type:"text", text:"..."}
|
|
604
|
+
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
|
605
|
+
// NVIDIA NIM) to mirror the content-block structure literally in their
|
|
606
|
+
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
|
607
|
+
assistantMsg.content = assistantText;
|
|
608
|
+
}
|
|
609
|
+
const toolCalls = msg.content.filter(isToolCallBlock);
|
|
526
610
|
if (toolCalls.length > 0) {
|
|
527
611
|
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
|
528
612
|
id: tc.id,
|
|
@@ -567,8 +651,8 @@ export function convertMessages(model, context, compat) {
|
|
|
567
651
|
const toolMsg = transformedMessages[j];
|
|
568
652
|
// Extract text and image content
|
|
569
653
|
const textResult = toolMsg.content
|
|
570
|
-
.filter(
|
|
571
|
-
.map((
|
|
654
|
+
.filter(isTextContentBlock)
|
|
655
|
+
.map((block) => block.text)
|
|
572
656
|
.join("\n");
|
|
573
657
|
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
|
574
658
|
// Always send tool result with text (or placeholder if only images)
|
|
@@ -585,7 +669,7 @@ export function convertMessages(model, context, compat) {
|
|
|
585
669
|
params.push(toolResultMsg);
|
|
586
670
|
if (hasImages && model.input.includes("image")) {
|
|
587
671
|
for (const block of toolMsg.content) {
|
|
588
|
-
if (block
|
|
672
|
+
if (isImageContentBlock(block)) {
|
|
589
673
|
imageBlocks.push({
|
|
590
674
|
type: "image_url",
|
|
591
675
|
image_url: {
|
|
@@ -707,6 +791,7 @@ function detectCompat(model) {
|
|
|
707
791
|
const useMaxTokens = baseUrl.includes("chutes.ai");
|
|
708
792
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
709
793
|
const isGroq = provider === "groq" || baseUrl.includes("groq.com");
|
|
794
|
+
const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
|
|
710
795
|
const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
|
|
711
796
|
? {
|
|
712
797
|
minimal: "default",
|
|
@@ -735,6 +820,8 @@ function detectCompat(model) {
|
|
|
735
820
|
vercelGatewayRouting: {},
|
|
736
821
|
zaiToolStream: false,
|
|
737
822
|
supportsStrictMode: true,
|
|
823
|
+
cacheControlFormat,
|
|
824
|
+
sendSessionAffinityHeaders: false,
|
|
738
825
|
};
|
|
739
826
|
}
|
|
740
827
|
/**
|
|
@@ -760,6 +847,8 @@ function getCompat(model) {
|
|
|
760
847
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
761
848
|
zaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,
|
|
762
849
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
850
|
+
cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
|
|
851
|
+
sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
|
|
763
852
|
};
|
|
764
853
|
}
|
|
765
854
|
//# sourceMappingURL=openai-completions.js.map
|