@oh-my-pi/pi-ai 11.5.1 → 11.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "11.
|
|
3
|
+
"version": "11.6.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"@connectrpc/connect-node": "^2.1.1",
|
|
64
64
|
"@google/genai": "^1.39.0",
|
|
65
65
|
"@mistralai/mistralai": "^1.13.0",
|
|
66
|
-
"@oh-my-pi/pi-utils": "11.
|
|
66
|
+
"@oh-my-pi/pi-utils": "11.6.0",
|
|
67
67
|
"@sinclair/typebox": "^0.34.48",
|
|
68
68
|
"@smithy/node-http-handler": "^4.4.9",
|
|
69
69
|
"ajv": "^8.17.1",
|
package/src/index.ts
CHANGED
|
@@ -4,6 +4,7 @@ import {
|
|
|
4
4
|
StopReason as BedrockStopReason,
|
|
5
5
|
type Tool as BedrockTool,
|
|
6
6
|
CachePointType,
|
|
7
|
+
CacheTTL,
|
|
7
8
|
type ContentBlock,
|
|
8
9
|
type ContentBlockDeltaEvent,
|
|
9
10
|
type ContentBlockStartEvent,
|
|
@@ -23,8 +24,10 @@ import { calculateCost } from "../models";
|
|
|
23
24
|
import type {
|
|
24
25
|
Api,
|
|
25
26
|
AssistantMessage,
|
|
27
|
+
CacheRetention,
|
|
26
28
|
Context,
|
|
27
29
|
Model,
|
|
30
|
+
SimpleStreamOptions,
|
|
28
31
|
StopReason,
|
|
29
32
|
StreamFunction,
|
|
30
33
|
StreamOptions,
|
|
@@ -99,6 +102,19 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
99
102
|
// in Node.js/Bun environment only
|
|
100
103
|
if (typeof process !== "undefined" && (process.versions?.node || process.versions?.bun)) {
|
|
101
104
|
config.region = config.region || $env.AWS_REGION || $env.AWS_DEFAULT_REGION;
|
|
105
|
+
|
|
106
|
+
// Support proxies that don't need authentication
|
|
107
|
+
if ($env.AWS_BEDROCK_SKIP_AUTH === "1") {
|
|
108
|
+
config.credentials = {
|
|
109
|
+
accessKeyId: "dummy-access-key",
|
|
110
|
+
secretAccessKey: "dummy-secret-key",
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if ($env.AWS_BEDROCK_FORCE_HTTP1 === "1") {
|
|
115
|
+
const nodeHttpHandler = await import("@smithy/node-http-handler");
|
|
116
|
+
config.requestHandler = new nodeHttpHandler.NodeHttpHandler();
|
|
117
|
+
}
|
|
102
118
|
}
|
|
103
119
|
|
|
104
120
|
config.region = config.region || "us-east-1";
|
|
@@ -106,10 +122,12 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
106
122
|
try {
|
|
107
123
|
const client = new BedrockRuntimeClient(config);
|
|
108
124
|
|
|
125
|
+
const cacheRetention = resolveCacheRetention(options.cacheRetention);
|
|
126
|
+
|
|
109
127
|
const commandInput = {
|
|
110
128
|
modelId: model.id,
|
|
111
|
-
messages: convertMessages(context, model),
|
|
112
|
-
system: buildSystemPrompt(context.systemPrompt, model),
|
|
129
|
+
messages: convertMessages(context, model, cacheRetention),
|
|
130
|
+
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
|
|
113
131
|
inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
|
|
114
132
|
toolConfig: convertToolConfig(context.tools, options.toolChoice),
|
|
115
133
|
additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
|
|
@@ -301,11 +319,22 @@ function handleContentBlockStop(
|
|
|
301
319
|
}
|
|
302
320
|
}
|
|
303
321
|
|
|
322
|
+
/**
|
|
323
|
+
* Resolve cache retention preference.
|
|
324
|
+
* Defaults to "short" and uses PI_CACHE_RETENTION for backward compatibility.
|
|
325
|
+
*/
|
|
326
|
+
function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {
|
|
327
|
+
if (cacheRetention) return cacheRetention;
|
|
328
|
+
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") return "long";
|
|
329
|
+
return "short";
|
|
330
|
+
}
|
|
331
|
+
|
|
304
332
|
/**
|
|
305
333
|
* Check if the model supports prompt caching.
|
|
306
334
|
* Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
|
|
307
335
|
*/
|
|
308
336
|
function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean {
|
|
337
|
+
if (model.cost.cacheRead || model.cost.cacheWrite) return true;
|
|
309
338
|
const id = model.id.toLowerCase();
|
|
310
339
|
// Claude 4.x models (opus-4, sonnet-4, haiku-4)
|
|
311
340
|
if (id.includes("claude") && (id.includes("-4-") || id.includes("-4."))) return true;
|
|
@@ -330,20 +359,27 @@ function supportsThinkingSignature(model: Model<"bedrock-converse-stream">): boo
|
|
|
330
359
|
function buildSystemPrompt(
|
|
331
360
|
systemPrompt: string | undefined,
|
|
332
361
|
model: Model<"bedrock-converse-stream">,
|
|
362
|
+
cacheRetention: CacheRetention,
|
|
333
363
|
): SystemContentBlock[] | undefined {
|
|
334
364
|
if (!systemPrompt) return undefined;
|
|
335
365
|
|
|
336
366
|
const blocks: SystemContentBlock[] = [{ text: sanitizeSurrogates(systemPrompt) }];
|
|
337
367
|
|
|
338
368
|
// Add cache point for supported Claude models
|
|
339
|
-
if (supportsPromptCaching(model)) {
|
|
340
|
-
blocks.push({
|
|
369
|
+
if (cacheRetention !== "none" && supportsPromptCaching(model)) {
|
|
370
|
+
blocks.push({
|
|
371
|
+
cachePoint: { type: CachePointType.DEFAULT, ...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}) },
|
|
372
|
+
});
|
|
341
373
|
}
|
|
342
374
|
|
|
343
375
|
return blocks;
|
|
344
376
|
}
|
|
345
377
|
|
|
346
|
-
function convertMessages(
|
|
378
|
+
function convertMessages(
|
|
379
|
+
context: Context,
|
|
380
|
+
model: Model<"bedrock-converse-stream">,
|
|
381
|
+
cacheRetention: CacheRetention,
|
|
382
|
+
): Message[] {
|
|
347
383
|
const result: Message[] = [];
|
|
348
384
|
const transformedMessages = transformMessages(context.messages, model);
|
|
349
385
|
|
|
@@ -494,10 +530,15 @@ function convertMessages(context: Context, model: Model<"bedrock-converse-stream
|
|
|
494
530
|
}
|
|
495
531
|
|
|
496
532
|
// Add cache point to the last user message for supported Claude models
|
|
497
|
-
if (supportsPromptCaching(model) && result.length > 0) {
|
|
533
|
+
if (cacheRetention !== "none" && supportsPromptCaching(model) && result.length > 0) {
|
|
498
534
|
const lastMessage = result[result.length - 1];
|
|
499
535
|
if (lastMessage.role === ConversationRole.USER && lastMessage.content) {
|
|
500
|
-
(lastMessage.content as ContentBlock[]).push({
|
|
536
|
+
(lastMessage.content as ContentBlock[]).push({
|
|
537
|
+
cachePoint: {
|
|
538
|
+
type: CachePointType.DEFAULT,
|
|
539
|
+
...(cacheRetention === "long" ? { ttl: CacheTTL.ONE_HOUR } : {}),
|
|
540
|
+
},
|
|
541
|
+
});
|
|
501
542
|
}
|
|
502
543
|
}
|
|
503
544
|
|
|
@@ -550,6 +591,28 @@ function mapStopReason(reason: string | undefined): StopReason {
|
|
|
550
591
|
}
|
|
551
592
|
}
|
|
552
593
|
|
|
594
|
+
/** Check if the model supports adaptive thinking (Opus 4.6+). */
|
|
595
|
+
function supportsAdaptiveThinking(modelId: string): boolean {
|
|
596
|
+
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
/** Map a thinking level to an adaptive effort value. */
|
|
600
|
+
function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"]): "low" | "medium" | "high" | "max" {
|
|
601
|
+
switch (level) {
|
|
602
|
+
case "minimal":
|
|
603
|
+
case "low":
|
|
604
|
+
return "low";
|
|
605
|
+
case "medium":
|
|
606
|
+
return "medium";
|
|
607
|
+
case "high":
|
|
608
|
+
return "high";
|
|
609
|
+
case "xhigh":
|
|
610
|
+
return "max";
|
|
611
|
+
default:
|
|
612
|
+
return "high";
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
553
616
|
function buildAdditionalModelRequestFields(
|
|
554
617
|
model: Model<"bedrock-converse-stream">,
|
|
555
618
|
options: BedrockOptions,
|
|
@@ -559,6 +622,15 @@ function buildAdditionalModelRequestFields(
|
|
|
559
622
|
}
|
|
560
623
|
|
|
561
624
|
if (model.id.includes("anthropic.claude")) {
|
|
625
|
+
// Opus 4.6+ uses adaptive thinking with effort levels
|
|
626
|
+
if (supportsAdaptiveThinking(model.id)) {
|
|
627
|
+
const result: Record<string, any> = {
|
|
628
|
+
thinking: { type: "adaptive" },
|
|
629
|
+
output_config: { effort: mapThinkingLevelToEffort(options.reasoning) },
|
|
630
|
+
};
|
|
631
|
+
return result;
|
|
632
|
+
}
|
|
633
|
+
|
|
562
634
|
const defaultBudgets: Record<ThinkingLevel, number> = {
|
|
563
635
|
minimal: 1024,
|
|
564
636
|
low: 2048,
|
|
@@ -578,7 +650,7 @@ function buildAdditionalModelRequestFields(
|
|
|
578
650
|
},
|
|
579
651
|
};
|
|
580
652
|
|
|
581
|
-
if (options.interleavedThinking) {
|
|
653
|
+
if (options.interleavedThinking && !supportsAdaptiveThinking(model.id)) {
|
|
582
654
|
result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
|
|
583
655
|
}
|
|
584
656
|
|
|
@@ -51,7 +51,7 @@ function clampReasoningEffort(model: string, effort: ReasoningConfig["effort"]):
|
|
|
51
51
|
return "high";
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
if ((modelId
|
|
54
|
+
if ((modelId.startsWith("gpt-5.2") || modelId.startsWith("gpt-5.3")) && effort === "minimal") {
|
|
55
55
|
return "low";
|
|
56
56
|
}
|
|
57
57
|
|
|
@@ -428,6 +428,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
|
|
|
428
428
|
stream: true,
|
|
429
429
|
prompt_cache_key: promptCacheKey,
|
|
430
430
|
prompt_cache_retention: promptCacheKey ? getPromptCacheRetention(model.baseUrl, cacheRetention) : undefined,
|
|
431
|
+
store: false,
|
|
431
432
|
};
|
|
432
433
|
|
|
433
434
|
if (options?.maxTokens) {
|