@oh-my-pi/pi-ai 11.5.1 → 11.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "11.5.1",
3
+ "version": "11.6.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -63,7 +63,7 @@
63
63
  "@connectrpc/connect-node": "^2.1.1",
64
64
  "@google/genai": "^1.39.0",
65
65
  "@mistralai/mistralai": "^1.13.0",
66
- "@oh-my-pi/pi-utils": "11.5.1",
66
+ "@oh-my-pi/pi-utils": "11.6.0",
67
67
  "@sinclair/typebox": "^0.34.48",
68
68
  "@smithy/node-http-handler": "^4.4.9",
69
69
  "ajv": "^8.17.1",
package/src/index.ts CHANGED
@@ -1,3 +1,5 @@
1
+ export type { Static, TSchema } from "@sinclair/typebox";
2
+ export { Type } from "@sinclair/typebox";
1
3
  export * from "./models";
2
4
  export * from "./providers/anthropic";
3
5
  export * from "./providers/azure-openai-responses";
@@ -4,6 +4,7 @@ import {
4
4
  StopReason as BedrockStopReason,
5
5
  type Tool as BedrockTool,
6
6
  CachePointType,
7
+ CacheTTL,
7
8
  type ContentBlock,
8
9
  type ContentBlockDeltaEvent,
9
10
  type ContentBlockStartEvent,
@@ -23,8 +24,10 @@ import { calculateCost } from "../models";
23
24
  import type {
24
25
  Api,
25
26
  AssistantMessage,
27
+ CacheRetention,
26
28
  Context,
27
29
  Model,
30
+ SimpleStreamOptions,
28
31
  StopReason,
29
32
  StreamFunction,
30
33
  StreamOptions,
@@ -99,6 +102,19 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
99
102
  // in Node.js/Bun environment only
100
103
  if (typeof process !== "undefined" && (process.versions?.node || process.versions?.bun)) {
101
104
  config.region = config.region || $env.AWS_REGION || $env.AWS_DEFAULT_REGION;
105
+
106
+ // Support proxies that don't need authentication
107
+ if ($env.AWS_BEDROCK_SKIP_AUTH === "1") {
108
+ config.credentials = {
109
+ accessKeyId: "dummy-access-key",
110
+ secretAccessKey: "dummy-secret-key",
111
+ };
112
+ }
113
+
114
+ if ($env.AWS_BEDROCK_FORCE_HTTP1 === "1") {
115
+ const nodeHttpHandler = await import("@smithy/node-http-handler");
116
+ config.requestHandler = new nodeHttpHandler.NodeHttpHandler();
117
+ }
102
118
  }
103
119
 
104
120
  config.region = config.region || "us-east-1";
@@ -106,10 +122,12 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
106
122
  try {
107
123
  const client = new BedrockRuntimeClient(config);
108
124
 
125
+ const cacheRetention = resolveCacheRetention(options.cacheRetention);
126
+
109
127
  const commandInput = {
110
128
  modelId: model.id,
111
- messages: convertMessages(context, model),
112
- system: buildSystemPrompt(context.systemPrompt, model),
129
+ messages: convertMessages(context, model, cacheRetention),
130
+ system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
113
131
  inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
114
132
  toolConfig: convertToolConfig(context.tools, options.toolChoice),
115
133
  additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
@@ -301,11 +319,22 @@ function handleContentBlockStop(
301
319
  }
302
320
  }
303
321
 
322
+ /**
323
+ * Resolve cache retention preference.
324
+ * Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
325
+ */
326
+ function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
327
+ if (cacheRetention) return cacheRetention;
328
+ if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") return "long";
329
+ return "short";
330
+ }
331
+
304
332
  /**
305
333
  * Check if the model supports prompt caching.
306
334
  * Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
307
335
  */
308
336
  function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean {
337
+ if (model.cost.cacheRead || model.cost.cacheWrite) return true;
309
338
  const id = model.id.toLowerCase();
310
339
  // Claude 4.x models (opus-4, sonnet-4, haiku-4)
311
340
  if (id.includes("claude") && (id.includes("-4-") || id.includes("-4."))) return true;
@@ -330,20 +359,27 @@ function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boo
330
359
  function buildSystemPrompt(
331
360
  systemPrompt: string | undefined,
332
361
  model: Model<"bedrock-converse-stream">,
362
+ cacheRetention: CacheRetention,
333
363
  ): SystemContentBlock[] | undefined {
334
364
  if (!systemPrompt) return undefined;
335
365
 
336
366
  const blocks: SystemContentBlock[] = [{ text: sanitizeSurrogates(systemPrompt) }];
337
367
 
338
368
  // Add cache point for supported Claude models
339
- if (supportsPromptCaching(model)) {
340
- blocks.push({ cachePoint: { type: CachePointType.DEFAULT } });
369
+ if (cacheRetention !== "none" && supportsPromptCaching(model)) {
370
+ blocks.push({
371
+ cachePoint: { type: CachePointType.DEFAULT, ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}) },
372
+ });
341
373
  }
342
374
 
343
375
  return blocks;
344
376
  }
345
377
 
346
- function convertMessages(context: Context, model: Model<"bedrock-converse-stream">): Message[] {
378
+ function convertMessages(
379
+ context: Context,
380
+ model: Model<"bedrock-converse-stream">,
381
+ cacheRetention: CacheRetention,
382
+ ): Message[] {
347
383
  const result: Message[] = [];
348
384
  const transformedMessages = transformMessages(context.messages, model);
349
385
 
@@ -494,10 +530,15 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
494
530
  }
495
531
 
496
532
  // Add cache point to the last user message for supported Claude models
497
- if (supportsPromptCaching(model) && result.length > 0) {
533
+ if (cacheRetention !== "none" && supportsPromptCaching(model) && result.length > 0) {
498
534
  const lastMessage = result[result.length - 1];
499
535
  if (lastMessage.role === ConversationRole.USER && lastMessage.content) {
500
- (lastMessage.content as ContentBlock[]).push({ cachePoint: { type: CachePointType.DEFAULT } });
536
+ (lastMessage.content as ContentBlock[]).push({
537
+ cachePoint: {
538
+ type: CachePointType.DEFAULT,
539
+ ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
540
+ },
541
+ });
501
542
  }
502
543
  }
503
544
 
@@ -550,6 +591,28 @@ function mapStopReason(reason: string | undefined): StopReason {
550
591
  }
551
592
  }
552
593
 
594
+ /** Check if the model supports adaptive thinking (Opus 4.6+). */
595
+ function supportsAdaptiveThinking(modelId: string): boolean {
596
+ return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
597
+ }
598
+
599
+ /** Map a thinking level to an adaptive effort value. */
600
+ function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"]): "low" | "medium" | "high" | "max" {
601
+ switch (level) {
602
+ case "minimal":
603
+ case "low":
604
+ return "low";
605
+ case "medium":
606
+ return "medium";
607
+ case "high":
608
+ return "high";
609
+ case "xhigh":
610
+ return "max";
611
+ default:
612
+ return "high";
613
+ }
614
+ }
615
+
553
616
  function buildAdditionalModelRequestFields(
554
617
  model: Model<"bedrock-converse-stream">,
555
618
  options: BedrockOptions,
@@ -559,6 +622,15 @@ function buildAdditionalModelRequestFields(
559
622
  }
560
623
 
561
624
  if (model.id.includes("anthropic.claude")) {
625
+ // Opus 4.6+ uses adaptive thinking with effort levels
626
+ if (supportsAdaptiveThinking(model.id)) {
627
+ const result: Record<string, any> = {
628
+ thinking: { type: "adaptive" },
629
+ output_config: { effort: mapThinkingLevelToEffort(options.reasoning) },
630
+ };
631
+ return result;
632
+ }
633
+
562
634
  const defaultBudgets: Record<ThinkingLevel, number> = {
563
635
  minimal: 1024,
564
636
  low: 2048,
@@ -578,7 +650,7 @@ function buildAdditionalModelRequestFields(
578
650
  },
579
651
  };
580
652
 
581
- if (options.interleavedThinking) {
653
+ if (options.interleavedThinking && !supportsAdaptiveThinking(model.id)) {
582
654
  result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
583
655
  }
584
656
 
@@ -51,7 +51,7 @@ function clampReasoningEffort(model: string, effort: ReasoningConfig["effort"]):
51
51
  return "high";
52
52
  }
53
53
 
54
- if ((modelId === "gpt-5.2" || modelId === "gpt-5.2-codex") && effort === "minimal") {
54
+ if ((modelId.startsWith("gpt-5.2") || modelId.startsWith("gpt-5.3")) && effort === "minimal") {
55
55
  return "low";
56
56
  }
57
57
 
@@ -428,6 +428,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
428
428
  stream: true,
429
429
  prompt_cache_key: promptCacheKey,
430
430
  prompt_cache_retention: promptCacheKey ? getPromptCacheRetention(model.baseUrl, cacheRetention) : undefined,
431
+ store: false,
431
432
  };
432
433
 
433
434
  if (options?.maxTokens) {