@mariozechner/pi-ai 0.67.68 → 0.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +3 -1
  2. package/dist/models.generated.d.ts +64 -35
  3. package/dist/models.generated.d.ts.map +1 -1
  4. package/dist/models.generated.js +77 -56
  5. package/dist/models.generated.js.map +1 -1
  6. package/dist/providers/amazon-bedrock.d.ts.map +1 -1
  7. package/dist/providers/amazon-bedrock.js +9 -1
  8. package/dist/providers/amazon-bedrock.js.map +1 -1
  9. package/dist/providers/anthropic.d.ts.map +1 -1
  10. package/dist/providers/anthropic.js +1 -2
  11. package/dist/providers/anthropic.js.map +1 -1
  12. package/dist/providers/google-shared.d.ts.map +1 -1
  13. package/dist/providers/google-shared.js +30 -4
  14. package/dist/providers/google-shared.js.map +1 -1
  15. package/dist/providers/openai-completions.d.ts +5 -1
  16. package/dist/providers/openai-completions.d.ts.map +1 -1
  17. package/dist/providers/openai-completions.js +149 -60
  18. package/dist/providers/openai-completions.js.map +1 -1
  19. package/dist/providers/openai-responses-shared.d.ts.map +1 -1
  20. package/dist/providers/openai-responses-shared.js +2 -5
  21. package/dist/providers/openai-responses-shared.js.map +1 -1
  22. package/dist/providers/simple-options.d.ts.map +1 -1
  23. package/dist/providers/simple-options.js +1 -1
  24. package/dist/providers/simple-options.js.map +1 -1
  25. package/dist/providers/transform-messages.d.ts.map +1 -1
  26. package/dist/providers/transform-messages.js +41 -2
  27. package/dist/providers/transform-messages.js.map +1 -1
  28. package/dist/types.d.ts +4 -0
  29. package/dist/types.d.ts.map +1 -1
  30. package/dist/types.js.map +1 -1
  31. package/dist/utils/oauth/anthropic.d.ts.map +1 -1
  32. package/dist/utils/oauth/anthropic.js +1 -1
  33. package/dist/utils/oauth/anthropic.js.map +1 -1
  34. package/dist/utils/oauth/google-antigravity.d.ts.map +1 -1
  35. package/dist/utils/oauth/google-antigravity.js +2 -1
  36. package/dist/utils/oauth/google-antigravity.js.map +1 -1
  37. package/dist/utils/oauth/google-gemini-cli.d.ts.map +1 -1
  38. package/dist/utils/oauth/google-gemini-cli.js +2 -1
  39. package/dist/utils/oauth/google-gemini-cli.js.map +1 -1
  40. package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
  41. package/dist/utils/oauth/openai-codex.js +3 -2
  42. package/dist/utils/oauth/openai-codex.js.map +1 -1
  43. package/package.json +1 -1
@@ -26,6 +26,27 @@ function hasToolHistory(messages) {
26
26
  }
27
27
  return false;
28
28
  }
29
+ function isTextContentBlock(block) {
30
+ return block.type === "text";
31
+ }
32
+ function isThinkingContentBlock(block) {
33
+ return block.type === "thinking";
34
+ }
35
+ function isToolCallBlock(block) {
36
+ return block.type === "toolCall";
37
+ }
38
+ function isImageContentBlock(block) {
39
+ return block.type === "image";
40
+ }
41
+ function resolveCacheRetention(cacheRetention) {
42
+ if (cacheRetention) {
43
+ return cacheRetention;
44
+ }
45
+ if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
46
+ return "long";
47
+ }
48
+ return "short";
49
+ }
29
50
  export const streamOpenAICompletions = (model, context, options) => {
30
51
  const stream = new AssistantMessageEventStream();
31
52
  (async () => {
@@ -48,8 +69,11 @@ export const streamOpenAICompletions = (model, context, options) => {
48
69
  };
49
70
  try {
50
71
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
51
- const client = createClient(model, context, apiKey, options?.headers);
52
- let params = buildParams(model, context, options);
72
+ const compat = getCompat(model);
73
+ const cacheRetention = resolveCacheRetention(options?.cacheRetention);
74
+ const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId;
75
+ const client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);
76
+ let params = buildParams(model, context, options, compat, cacheRetention);
53
77
  const nextParams = await options?.onPayload?.(params, model);
54
78
  if (nextParams !== undefined) {
55
79
  params = nextParams;
@@ -270,7 +294,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
270
294
  toolChoice,
271
295
  });
272
296
  };
273
- function createClient(model, context, apiKey, optionsHeaders) {
297
+ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat = getCompat(model)) {
274
298
  if (!apiKey) {
275
299
  if (!process.env.OPENAI_API_KEY) {
276
300
  throw new Error("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.");
@@ -286,6 +310,11 @@ function createClient(model, context, apiKey, optionsHeaders) {
286
310
  });
287
311
  Object.assign(headers, copilotHeaders);
288
312
  }
313
+ if (sessionId && compat.sendSessionAffinityHeaders) {
314
+ headers.session_id = sessionId;
315
+ headers["x-client-request-id"] = sessionId;
316
+ headers["x-session-affinity"] = sessionId;
317
+ }
289
318
  // Merge options headers last so they can override defaults
290
319
  if (optionsHeaders) {
291
320
  Object.assign(headers, optionsHeaders);
@@ -297,14 +326,15 @@ function createClient(model, context, apiKey, optionsHeaders) {
297
326
  defaultHeaders: headers,
298
327
  });
299
328
  }
300
- function buildParams(model, context, options) {
301
- const compat = getCompat(model);
329
+ function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
302
330
  const messages = convertMessages(model, context, compat);
303
- maybeAddOpenRouterAnthropicCacheControl(model, messages);
331
+ const cacheControl = getCompatCacheControl(model, compat, cacheRetention);
304
332
  const params = {
305
333
  model: model.id,
306
334
  messages,
307
335
  stream: true,
336
+ prompt_cache_key: model.baseUrl.includes("api.openai.com") && cacheRetention !== "none" ? options?.sessionId : undefined,
337
+ prompt_cache_retention: model.baseUrl.includes("api.openai.com") && cacheRetention === "long" ? "24h" : undefined,
308
338
  };
309
339
  if (compat.supportsUsageInStreaming !== false) {
310
340
  params.stream_options = { include_usage: true };
@@ -333,6 +363,9 @@ function buildParams(model, context, options) {
333
363
  // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
334
364
  params.tools = [];
335
365
  }
366
+ if (cacheControl) {
367
+ applyAnthropicCacheControl(messages, params.tools, cacheControl);
368
+ }
336
369
  if (options?.toolChoice) {
337
370
  params.tool_choice = options.toolChoice;
338
371
  }
@@ -385,34 +418,80 @@ function buildParams(model, context, options) {
385
418
  function mapReasoningEffort(effort, reasoningEffortMap) {
386
419
  return reasoningEffortMap[effort] ?? effort;
387
420
  }
388
- function maybeAddOpenRouterAnthropicCacheControl(model, messages) {
389
- if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
390
- return;
391
- // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
392
- // on the last user/assistant message (walking backwards until we find text content).
393
- for (let i = messages.length - 1; i >= 0; i--) {
394
- const msg = messages[i];
395
- if (msg.role !== "user" && msg.role !== "assistant")
396
- continue;
397
- const content = msg.content;
398
- if (typeof content === "string") {
399
- msg.content = [
400
- Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }),
401
- ];
421
+ function getCompatCacheControl(model, compat, cacheRetention) {
422
+ if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
423
+ return undefined;
424
+ }
425
+ const ttl = cacheRetention === "long" && model.baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
426
+ return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
427
+ }
428
+ function applyAnthropicCacheControl(messages, tools, cacheControl) {
429
+ addCacheControlToSystemPrompt(messages, cacheControl);
430
+ addCacheControlToLastTool(tools, cacheControl);
431
+ addCacheControlToLastConversationMessage(messages, cacheControl);
432
+ }
433
+ function addCacheControlToSystemPrompt(messages, cacheControl) {
434
+ for (const message of messages) {
435
+ if (message.role === "system" || message.role === "developer") {
436
+ addCacheControlToInstructionMessage(message, cacheControl);
402
437
  return;
403
438
  }
404
- if (!Array.isArray(content))
405
- continue;
406
- // Find last text part and add cache_control
407
- for (let j = content.length - 1; j >= 0; j--) {
408
- const part = content[j];
409
- if (part?.type === "text") {
410
- Object.assign(part, { cache_control: { type: "ephemeral" } });
439
+ }
440
+ }
441
+ function addCacheControlToLastConversationMessage(messages, cacheControl) {
442
+ for (let i = messages.length - 1; i >= 0; i--) {
443
+ const message = messages[i];
444
+ if (message.role === "user" || message.role === "assistant") {
445
+ if (addCacheControlToMessage(message, cacheControl)) {
411
446
  return;
412
447
  }
413
448
  }
414
449
  }
415
450
  }
451
+ function addCacheControlToLastTool(tools, cacheControl) {
452
+ if (!tools || tools.length === 0) {
453
+ return;
454
+ }
455
+ const lastTool = tools[tools.length - 1];
456
+ lastTool.cache_control = cacheControl;
457
+ }
458
+ function addCacheControlToInstructionMessage(message, cacheControl) {
459
+ return addCacheControlToTextContent(message, cacheControl);
460
+ }
461
+ function addCacheControlToMessage(message, cacheControl) {
462
+ if (message.role === "user" || message.role === "assistant") {
463
+ return addCacheControlToTextContent(message, cacheControl);
464
+ }
465
+ return false;
466
+ }
467
+ function addCacheControlToTextContent(message, cacheControl) {
468
+ const content = message.content;
469
+ if (typeof content === "string") {
470
+ if (content.length === 0) {
471
+ return false;
472
+ }
473
+ message.content = [
474
+ {
475
+ type: "text",
476
+ text: content,
477
+ cache_control: cacheControl,
478
+ },
479
+ ];
480
+ return true;
481
+ }
482
+ if (!Array.isArray(content)) {
483
+ return false;
484
+ }
485
+ for (let i = content.length - 1; i >= 0; i--) {
486
+ const part = content[i];
487
+ if (part?.type === "text") {
488
+ const textPart = part;
489
+ textPart.cache_control = cacheControl;
490
+ return true;
491
+ }
492
+ }
493
+ return false;
494
+ }
416
495
  export function convertMessages(model, context, compat) {
417
496
  const params = [];
418
497
  const normalizeToolCallId = (id) => {
@@ -470,14 +549,11 @@ export function convertMessages(model, context, compat) {
470
549
  };
471
550
  }
472
551
  });
473
- const filteredContent = !model.input.includes("image")
474
- ? content.filter((c) => c.type !== "image_url")
475
- : content;
476
- if (filteredContent.length === 0)
552
+ if (content.length === 0)
477
553
  continue;
478
554
  params.push({
479
555
  role: "user",
480
- content: filteredContent,
556
+ content,
481
557
  });
482
558
  }
483
559
  }
@@ -487,42 +563,50 @@ export function convertMessages(model, context, compat) {
487
563
  role: "assistant",
488
564
  content: compat.requiresAssistantAfterToolResult ? "" : null,
489
565
  };
490
- const textBlocks = msg.content.filter((b) => b.type === "text");
491
- // Filter out empty text blocks to avoid API validation errors
492
- const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
493
- if (nonEmptyTextBlocks.length > 0) {
494
- // Always send assistant content as a plain string (OpenAI Chat Completions
495
- // API standard format). Sending as an array of {type:"text", text:"..."}
496
- // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
497
- // NVIDIA NIM) to mirror the content-block structure literally in their
498
- // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
499
- assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
500
- }
501
- // Handle thinking blocks
502
- const thinkingBlocks = msg.content.filter((b) => b.type === "thinking");
503
- // Filter out empty thinking blocks to avoid API validation errors
504
- const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
566
+ const assistantTextParts = msg.content
567
+ .filter(isTextContentBlock)
568
+ .filter((block) => block.text.trim().length > 0)
569
+ .map((block) => ({
570
+ type: "text",
571
+ text: sanitizeSurrogates(block.text),
572
+ }));
573
+ const assistantText = assistantTextParts.map((part) => part.text).join("");
574
+ const nonEmptyThinkingBlocks = msg.content
575
+ .filter(isThinkingContentBlock)
576
+ .filter((block) => block.thinking.trim().length > 0);
505
577
  if (nonEmptyThinkingBlocks.length > 0) {
506
578
  if (compat.requiresThinkingAsText) {
507
579
  // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
508
- const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
509
- const textContent = assistantMsg.content;
510
- if (textContent) {
511
- textContent.unshift({ type: "text", text: thinkingText });
512
- }
513
- else {
514
- assistantMsg.content = [{ type: "text", text: thinkingText }];
515
- }
580
+ const thinkingText = nonEmptyThinkingBlocks
581
+ .map((block) => sanitizeSurrogates(block.thinking))
582
+ .join("\n\n");
583
+ assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
516
584
  }
517
585
  else {
586
+ // Always send assistant content as a plain string (OpenAI Chat Completions
587
+ // API standard format). Sending as an array of {type:"text", text:"..."}
588
+ // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
589
+ // NVIDIA NIM) to mirror the content-block structure literally in their
590
+ // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
591
+ if (assistantText.length > 0) {
592
+ assistantMsg.content = assistantText;
593
+ }
518
594
  // Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
519
595
  const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
520
596
  if (signature && signature.length > 0) {
521
- assistantMsg[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
597
+ assistantMsg[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
522
598
  }
523
599
  }
524
600
  }
525
- const toolCalls = msg.content.filter((b) => b.type === "toolCall");
601
+ else if (assistantText.length > 0) {
602
+ // Always send assistant content as a plain string (OpenAI Chat Completions
603
+ // API standard format). Sending as an array of {type:"text", text:"..."}
604
+ // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
605
+ // NVIDIA NIM) to mirror the content-block structure literally in their
606
+ // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
607
+ assistantMsg.content = assistantText;
608
+ }
609
+ const toolCalls = msg.content.filter(isToolCallBlock);
526
610
  if (toolCalls.length > 0) {
527
611
  assistantMsg.tool_calls = toolCalls.map((tc) => ({
528
612
  id: tc.id,
@@ -567,8 +651,8 @@ export function convertMessages(model, context, compat) {
567
651
  const toolMsg = transformedMessages[j];
568
652
  // Extract text and image content
569
653
  const textResult = toolMsg.content
570
- .filter((c) => c.type === "text")
571
- .map((c) => c.text)
654
+ .filter(isTextContentBlock)
655
+ .map((block) => block.text)
572
656
  .join("\n");
573
657
  const hasImages = toolMsg.content.some((c) => c.type === "image");
574
658
  // Always send tool result with text (or placeholder if only images)
@@ -585,7 +669,7 @@ export function convertMessages(model, context, compat) {
585
669
  params.push(toolResultMsg);
586
670
  if (hasImages && model.input.includes("image")) {
587
671
  for (const block of toolMsg.content) {
588
- if (block.type === "image") {
672
+ if (isImageContentBlock(block)) {
589
673
  imageBlocks.push({
590
674
  type: "image_url",
591
675
  image_url: {
@@ -707,6 +791,7 @@ function detectCompat(model) {
707
791
  const useMaxTokens = baseUrl.includes("chutes.ai");
708
792
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
709
793
  const isGroq = provider === "groq" || baseUrl.includes("groq.com");
794
+ const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
710
795
  const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
711
796
  ? {
712
797
  minimal: "default",
@@ -735,6 +820,8 @@ function detectCompat(model) {
735
820
  vercelGatewayRouting: {},
736
821
  zaiToolStream: false,
737
822
  supportsStrictMode: true,
823
+ cacheControlFormat,
824
+ sendSessionAffinityHeaders: false,
738
825
  };
739
826
  }
740
827
  /**
@@ -760,6 +847,8 @@ function getCompat(model) {
760
847
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
761
848
  zaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,
762
849
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
850
+ cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
851
+ sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
763
852
  };
764
853
  }
765
854
  //# sourceMappingURL=openai-completions.js.map