@oh-my-pi/pi-ai 6.8.3 → 6.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "6.8.3",
3
+ "version": "6.8.5",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -17,7 +17,7 @@
17
17
  "test": "bun test"
18
18
  },
19
19
  "dependencies": {
20
- "@oh-my-pi/pi-utils": "6.8.3",
20
+ "@oh-my-pi/pi-utils": "6.8.5",
21
21
  "@anthropic-ai/sdk": "0.71.2",
22
22
  "@aws-sdk/client-bedrock-runtime": "^3.968.0",
23
23
  "@bufbuild/protobuf": "^2.10.2",
@@ -292,11 +292,21 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
292
292
  if (event.delta.stop_reason) {
293
293
  output.stopReason = mapStopReason(event.delta.stop_reason);
294
294
  }
295
- output.usage.input = event.usage.input_tokens || 0;
296
- output.usage.output = event.usage.output_tokens || 0;
297
- output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
298
- output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
299
- // Anthropic doesn't provide total_tokens, compute from components
295
+ // message_delta.usage only contains output_tokens (cumulative), not input_tokens
296
+ // Preserve input token counts from message_start, only update output
297
+ if (event.usage.output_tokens !== undefined && event.usage.output_tokens !== null) {
298
+ output.usage.output = event.usage.output_tokens;
299
+ }
300
+ // These fields may or may not be present in message_delta
301
+ if (event.usage.cache_read_input_tokens !== undefined && event.usage.cache_read_input_tokens !== null) {
302
+ output.usage.cacheRead = event.usage.cache_read_input_tokens;
303
+ }
304
+ if (
305
+ event.usage.cache_creation_input_tokens !== undefined &&
306
+ event.usage.cache_creation_input_tokens !== null
307
+ ) {
308
+ output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
309
+ }
300
310
  output.usage.totalTokens =
301
311
  output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
302
312
  calculateCost(model, output.usage);
@@ -468,16 +478,13 @@ export type AnthropicSystemBlock = {
468
478
  };
469
479
 
470
480
  type CacheControlBlock = {
471
- cache_control?: { type: "ephemeral" };
481
+ cache_control?: { type: "ephemeral" } | null;
472
482
  };
473
483
 
474
- type CacheControlMode = "none" | "toolBlocks" | "userText";
475
-
476
484
  const cacheControlEphemeral = { type: "ephemeral" as const };
477
485
 
478
486
  type SystemBlockOptions = {
479
487
  includeClaudeCodeInstruction?: boolean;
480
- includeCacheControl?: boolean;
481
488
  extraInstructions?: string[];
482
489
  };
483
490
 
@@ -485,17 +492,15 @@ export function buildAnthropicSystemBlocks(
485
492
  systemPrompt: string | undefined,
486
493
  options: SystemBlockOptions = {},
487
494
  ): AnthropicSystemBlock[] | undefined {
488
- const { includeClaudeCodeInstruction = false, includeCacheControl = true, extraInstructions = [] } = options;
495
+ const { includeClaudeCodeInstruction = false, extraInstructions = [] } = options;
489
496
  const blocks: AnthropicSystemBlock[] = [];
490
497
  const sanitizedPrompt = systemPrompt ? sanitizeSurrogates(systemPrompt) : "";
491
498
  const hasClaudeCodeInstruction = sanitizedPrompt.includes(claudeCodeSystemInstruction);
492
- const cacheControl = includeCacheControl ? { type: "ephemeral" as const } : undefined;
493
499
 
494
500
  if (includeClaudeCodeInstruction && !hasClaudeCodeInstruction) {
495
501
  blocks.push({
496
502
  type: "text",
497
503
  text: claudeCodeSystemInstruction,
498
- ...(cacheControl ? { cache_control: cacheControl } : {}),
499
504
  });
500
505
  }
501
506
 
@@ -505,7 +510,6 @@ export function buildAnthropicSystemBlocks(
505
510
  blocks.push({
506
511
  type: "text",
507
512
  text: trimmed,
508
- ...(cacheControl ? { cache_control: cacheControl } : {}),
509
513
  });
510
514
  }
511
515
 
@@ -513,7 +517,6 @@ export function buildAnthropicSystemBlocks(
513
517
  blocks.push({
514
518
  type: "text",
515
519
  text: sanitizedPrompt,
516
- ...(cacheControl ? { cache_control: cacheControl } : {}),
517
520
  });
518
521
  }
519
522
 
@@ -548,11 +551,9 @@ function buildParams(
548
551
  isOAuthToken: boolean,
549
552
  options?: AnthropicOptions,
550
553
  ): MessageCreateParamsStreaming {
551
- const hasTools = Boolean(context.tools?.length);
552
- const cacheControlMode = resolveCacheControlMode(context.messages, hasTools && isOAuthToken);
553
554
  const params: MessageCreateParamsStreaming = {
554
555
  model: model.id,
555
- messages: convertMessages(context.messages, model, isOAuthToken, cacheControlMode),
556
+ messages: convertMessages(context.messages, model, isOAuthToken),
556
557
  max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
557
558
  stream: true,
558
559
  };
@@ -560,7 +561,6 @@ function buildParams(
560
561
  const includeClaudeCodeSystem = !model.id.startsWith("claude-3-5-haiku");
561
562
  const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
562
563
  includeClaudeCodeInstruction: includeClaudeCodeSystem,
563
- includeCacheControl: cacheControlMode !== "none",
564
564
  });
565
565
  if (systemBlocks) {
566
566
  params.system = systemBlocks;
@@ -598,6 +598,8 @@ function buildParams(
598
598
  ensureMaxTokensForThinking(params, model);
599
599
  }
600
600
 
601
+ applyPromptCaching(params);
602
+
601
603
  return params;
602
604
  }
603
605
 
@@ -607,75 +609,141 @@ function sanitizeToolCallId(id: string): string {
607
609
  return id.replace(/[^a-zA-Z0-9_-]/g, "_");
608
610
  }
609
611
 
610
- function resolveCacheControlMode(messages: Message[], includeCacheControl: boolean): CacheControlMode {
611
- if (!includeCacheControl) return "none";
612
+ function stripCacheControl<T extends CacheControlBlock>(blocks: T[]): void {
613
+ for (const block of blocks) {
614
+ if ("cache_control" in block) {
615
+ delete block.cache_control;
616
+ }
617
+ }
618
+ }
619
+
620
+ function applyCacheControlToLastBlock<T extends CacheControlBlock>(blocks: T[]): void {
621
+ if (blocks.length === 0) return;
622
+ const lastIndex = blocks.length - 1;
623
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControlEphemeral };
624
+ }
612
625
 
613
- for (const message of messages) {
614
- if (message.role === "toolResult") return "toolBlocks";
615
- if (message.role === "assistant") {
616
- const hasToolCall = message.content.some((block) => block.type === "toolCall");
617
- if (hasToolCall) return "toolBlocks";
626
+ function applyCacheControlToLastTextBlock(blocks: Array<ContentBlockParam & CacheControlBlock>): void {
627
+ if (blocks.length === 0) return;
628
+ for (let i = blocks.length - 1; i >= 0; i--) {
629
+ if (blocks[i].type === "text") {
630
+ blocks[i] = { ...blocks[i], cache_control: cacheControlEphemeral };
631
+ return;
618
632
  }
619
633
  }
634
+ applyCacheControlToLastBlock(blocks);
635
+ }
636
+
637
+ function applyPromptCaching(params: MessageCreateParamsStreaming): void {
638
+ // Anthropic allows max 4 cache breakpoints
639
+ const MAX_CACHE_BREAKPOINTS = 4;
620
640
 
621
- return "userText";
641
+ // First, strip ALL existing cache_control to ensure clean slate
642
+ if (params.tools) {
643
+ for (const tool of params.tools) {
644
+ delete (tool as CacheControlBlock).cache_control;
645
+ }
646
+ }
647
+
648
+ if (params.system && Array.isArray(params.system)) {
649
+ stripCacheControl(params.system);
650
+ }
651
+
652
+ for (const message of params.messages) {
653
+ if (Array.isArray(message.content)) {
654
+ stripCacheControl(message.content as Array<ContentBlockParam & CacheControlBlock>);
655
+ }
656
+ }
657
+
658
+ let cacheBreakpointsUsed = 0;
659
+
660
+ // Cache hierarchy order: tools -> system -> messages
661
+ // See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
662
+
663
+ // 1. Cache tools - place breakpoint on last tool definition
664
+ if (params.tools && params.tools.length > 0) {
665
+ applyCacheControlToLastBlock(params.tools as Array<CacheControlBlock>);
666
+ cacheBreakpointsUsed++;
667
+ }
668
+
669
+ if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
670
+
671
+ // 2. Cache system prompt
672
+ if (params.system && Array.isArray(params.system) && params.system.length > 0) {
673
+ applyCacheControlToLastBlock(params.system);
674
+ cacheBreakpointsUsed++;
675
+ }
676
+
677
+ if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
678
+
679
+ // 3. Cache penultimate user message for conversation history caching
680
+ const userIndexes = params.messages
681
+ .map((message, index) => (message.role === "user" ? index : -1))
682
+ .filter((index) => index >= 0);
683
+
684
+ if (userIndexes.length >= 2) {
685
+ const penultimateUserIndex = userIndexes[userIndexes.length - 2];
686
+ const penultimateUser = params.messages[penultimateUserIndex];
687
+ if (penultimateUser) {
688
+ if (typeof penultimateUser.content === "string") {
689
+ penultimateUser.content = [
690
+ { type: "text", text: penultimateUser.content, cache_control: cacheControlEphemeral },
691
+ ];
692
+ cacheBreakpointsUsed++;
693
+ } else if (Array.isArray(penultimateUser.content) && penultimateUser.content.length > 0) {
694
+ applyCacheControlToLastTextBlock(penultimateUser.content as Array<ContentBlockParam & CacheControlBlock>);
695
+ cacheBreakpointsUsed++;
696
+ }
697
+ }
698
+ }
699
+
700
+ if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
701
+
702
+ // 4. Cache final user message for current turn (enables cache hit on next request)
703
+ if (userIndexes.length >= 1) {
704
+ const lastUserIndex = userIndexes[userIndexes.length - 1];
705
+ const lastUser = params.messages[lastUserIndex];
706
+ if (lastUser) {
707
+ if (typeof lastUser.content === "string") {
708
+ lastUser.content = [{ type: "text", text: lastUser.content, cache_control: cacheControlEphemeral }];
709
+ } else if (Array.isArray(lastUser.content) && lastUser.content.length > 0) {
710
+ applyCacheControlToLastTextBlock(lastUser.content as Array<ContentBlockParam & CacheControlBlock>);
711
+ }
712
+ }
713
+ }
622
714
  }
623
715
 
624
716
  function convertMessages(
625
717
  messages: Message[],
626
718
  model: Model<"anthropic-messages">,
627
719
  isOAuthToken: boolean,
628
- cacheControlMode: CacheControlMode,
629
720
  ): MessageParam[] {
630
721
  const params: MessageParam[] = [];
631
- const applyToolCacheControl = cacheControlMode === "toolBlocks";
632
- const applyUserTextCacheControl = cacheControlMode === "userText";
633
- const withCacheControl = <T extends object>(block: T, enabled: boolean): T | (T & CacheControlBlock) => {
634
- if (!enabled) return block;
635
- return { ...block, cache_control: cacheControlEphemeral };
636
- };
637
722
 
638
723
  // Transform messages for cross-provider compatibility
639
724
  const transformedMessages = transformMessages(messages, model);
640
-
641
725
  for (let i = 0; i < transformedMessages.length; i++) {
642
726
  const msg = transformedMessages[i];
643
727
 
644
728
  if (msg.role === "user") {
729
+ // Skip messages with undefined/null content
730
+ if (!msg.content) continue;
731
+
645
732
  if (typeof msg.content === "string") {
646
733
  if (msg.content.trim().length > 0) {
647
734
  const text = sanitizeSurrogates(msg.content);
648
- if (applyUserTextCacheControl) {
649
- const blocks: Array<ContentBlockParam & CacheControlBlock> = [
650
- withCacheControl(
651
- {
652
- type: "text",
653
- text,
654
- },
655
- true,
656
- ),
657
- ];
658
- params.push({
659
- role: "user",
660
- content: blocks,
661
- });
662
- } else {
663
- params.push({
664
- role: "user",
665
- content: text,
666
- });
667
- }
735
+ params.push({
736
+ role: "user",
737
+ content: text,
738
+ });
668
739
  }
669
- } else {
740
+ } else if (Array.isArray(msg.content)) {
670
741
  const blocks: Array<ContentBlockParam & CacheControlBlock> = msg.content.map((item) => {
671
742
  if (item.type === "text") {
672
- return withCacheControl(
673
- {
674
- type: "text",
675
- text: sanitizeSurrogates(item.text),
676
- },
677
- applyUserTextCacheControl,
678
- );
743
+ return {
744
+ type: "text",
745
+ text: sanitizeSurrogates(item.text),
746
+ };
679
747
  }
680
748
  return {
681
749
  type: "image",
@@ -700,6 +768,9 @@ function convertMessages(
700
768
  });
701
769
  }
702
770
  } else if (msg.role === "assistant") {
771
+ // Skip messages with undefined/null content
772
+ if (!msg.content || !Array.isArray(msg.content)) continue;
773
+
703
774
  const blocks: Array<ContentBlockParam & CacheControlBlock> = [];
704
775
 
705
776
  for (const block of msg.content) {
@@ -727,17 +798,12 @@ function convertMessages(
727
798
  });
728
799
  }
729
800
  } else if (block.type === "toolCall") {
730
- blocks.push(
731
- withCacheControl(
732
- {
733
- type: "tool_use",
734
- id: sanitizeToolCallId(block.id),
735
- name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
736
- input: block.arguments,
737
- },
738
- applyToolCacheControl,
739
- ),
740
- );
801
+ blocks.push({
802
+ type: "tool_use",
803
+ id: sanitizeToolCallId(block.id),
804
+ name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
805
+ input: block.arguments,
806
+ });
741
807
  }
742
808
  }
743
809
  if (blocks.length === 0) continue;
@@ -750,33 +816,23 @@ function convertMessages(
750
816
  const toolResults: Array<ContentBlockParam & CacheControlBlock> = [];
751
817
 
752
818
  // Add the current tool result
753
- toolResults.push(
754
- withCacheControl(
755
- {
756
- type: "tool_result",
757
- tool_use_id: sanitizeToolCallId(msg.toolCallId),
758
- content: convertContentBlocks(msg.content),
759
- is_error: msg.isError,
760
- },
761
- applyToolCacheControl,
762
- ),
763
- );
819
+ toolResults.push({
820
+ type: "tool_result",
821
+ tool_use_id: sanitizeToolCallId(msg.toolCallId),
822
+ content: convertContentBlocks(msg.content),
823
+ is_error: msg.isError,
824
+ });
764
825
 
765
826
  // Look ahead for consecutive toolResult messages
766
827
  let j = i + 1;
767
828
  while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
768
829
  const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
769
- toolResults.push(
770
- withCacheControl(
771
- {
772
- type: "tool_result",
773
- tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
774
- content: convertContentBlocks(nextMsg.content),
775
- is_error: nextMsg.isError,
776
- },
777
- applyToolCacheControl,
778
- ),
779
- );
830
+ toolResults.push({
831
+ type: "tool_result",
832
+ tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
833
+ content: convertContentBlocks(nextMsg.content),
834
+ is_error: nextMsg.isError,
835
+ });
780
836
  j++;
781
837
  }
782
838
 
@@ -784,14 +840,22 @@ function convertMessages(
784
840
  i = j - 1;
785
841
 
786
842
  // Add a single user message with all tool results
787
- params.push({
788
- role: "user",
789
- content: toolResults,
790
- });
843
+ if (toolResults.length > 0) {
844
+ params.push({
845
+ role: "user",
846
+ content: toolResults,
847
+ });
848
+ }
791
849
  }
792
850
  }
793
851
 
794
- return params;
852
+ // Final validation: filter out any messages with invalid content
853
+ return params.filter((msg) => {
854
+ if (!msg.content) return false;
855
+ if (typeof msg.content === "string") return msg.content.length > 0;
856
+ if (Array.isArray(msg.content)) return msg.content.length > 0;
857
+ return false;
858
+ });
795
859
  }
796
860
 
797
861
  function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {
package/src/stream.ts CHANGED
@@ -215,11 +215,11 @@ export const OUTPUT_FALLBACK_BUFFER = 4000;
215
215
  const ANTHROPIC_USE_INTERLEAVED_THINKING = true;
216
216
 
217
217
  const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
218
- minimal: 3072,
219
- low: 6144,
220
- medium: 12288,
221
- high: 24576,
222
- xhigh: 49152,
218
+ minimal: 1024,
219
+ low: 4096,
220
+ medium: 8192,
221
+ high: 16384,
222
+ xhigh: 32768,
223
223
  };
224
224
 
225
225
  const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
@@ -14,7 +14,7 @@
14
14
  import templateHtml from "./oauth.html" with { type: "text" };
15
15
  import type { OAuthController, OAuthCredentials } from "./types";
16
16
 
17
- const DEFAULT_TIMEOUT = 120;
17
+ const DEFAULT_TIMEOUT = 120_000;
18
18
  const DEFAULT_HOSTNAME = "localhost";
19
19
  const CALLBACK_PATH = "/callback";
20
20
 
@@ -182,7 +182,7 @@ export abstract class OAuthCallbackFlow {
182
182
  * Wait for OAuth callback or manual input (whichever comes first).
183
183
  */
184
184
  private waitForCallback(expectedState: string): Promise<CallbackResult> {
185
- const timeoutSignal = AbortSignal.timeout(DEFAULT_TIMEOUT * 1000);
185
+ const timeoutSignal = AbortSignal.timeout(DEFAULT_TIMEOUT);
186
186
  const signal = this.ctrl.signal ? AbortSignal.any([this.ctrl.signal, timeoutSignal]) : timeoutSignal;
187
187
 
188
188
  const callbackPromise = new Promise<CallbackResult>((resolve, reject) => {