@oh-my-pi/pi-ai 5.0.1 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/models.ts CHANGED
@@ -12,12 +12,14 @@ for (const [provider, models] of Object.entries(MODELS)) {
12
12
  modelRegistry.set(provider, providerModels);
13
13
  }
14
14
 
15
+ type GeneratedProvider = keyof typeof MODELS;
16
+
15
17
  type ModelApi<
16
- TProvider extends KnownProvider,
18
+ TProvider extends GeneratedProvider,
17
19
  TModelId extends keyof (typeof MODELS)[TProvider],
18
20
  > = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
19
21
 
20
- export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(
22
+ export function getModel<TProvider extends GeneratedProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(
21
23
  provider: TProvider,
22
24
  modelId: TModelId,
23
25
  ): Model<ModelApi<TProvider, TModelId>> {
@@ -29,7 +31,7 @@ export function getProviders(): KnownProvider[] {
29
31
  return Array.from(modelRegistry.keys()) as KnownProvider[];
30
32
  }
31
33
 
32
- export function getModels<TProvider extends KnownProvider>(
34
+ export function getModels<TProvider extends GeneratedProvider>(
33
35
  provider: TProvider,
34
36
  ): Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[] {
35
37
  const models = modelRegistry.get(provider);
@@ -0,0 +1,549 @@
1
+ import {
2
+ BedrockRuntimeClient,
3
+ StopReason as BedrockStopReason,
4
+ type Tool as BedrockTool,
5
+ CachePointType,
6
+ type ContentBlock,
7
+ type ContentBlockDeltaEvent,
8
+ type ContentBlockStartEvent,
9
+ type ContentBlockStopEvent,
10
+ ConversationRole,
11
+ ConverseStreamCommand,
12
+ type ConverseStreamMetadataEvent,
13
+ ImageFormat,
14
+ type Message,
15
+ type SystemContentBlock,
16
+ type ToolChoice,
17
+ type ToolConfiguration,
18
+ ToolResultStatus,
19
+ } from "@aws-sdk/client-bedrock-runtime";
20
+
21
+ import { calculateCost } from "../models";
22
+ import type {
23
+ Api,
24
+ AssistantMessage,
25
+ Context,
26
+ Model,
27
+ StopReason,
28
+ StreamFunction,
29
+ StreamOptions,
30
+ TextContent,
31
+ ThinkingBudgets,
32
+ ThinkingContent,
33
+ ThinkingLevel,
34
+ Tool,
35
+ ToolCall,
36
+ ToolResultMessage,
37
+ } from "../types";
38
+ import { AssistantMessageEventStream } from "../utils/event-stream";
39
+ import { parseStreamingJson } from "../utils/json-parse";
40
+ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
41
+ import { transformMessages } from "./transform-messages";
42
+
43
+ export interface BedrockOptions extends StreamOptions {
44
+ region?: string;
45
+ profile?: string;
46
+ toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
47
+ /* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
48
+ reasoning?: ThinkingLevel;
49
+ /* Custom token budgets per thinking level. Overrides default budgets. */
50
+ thinkingBudgets?: ThinkingBudgets;
51
+ /* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
52
+ interleavedThinking?: boolean;
53
+ }
54
+
55
+ type Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };
56
+
57
+ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
58
+ model: Model<"bedrock-converse-stream">,
59
+ context: Context,
60
+ options: BedrockOptions,
61
+ ): AssistantMessageEventStream => {
62
+ const stream = new AssistantMessageEventStream();
63
+
64
+ (async () => {
65
+ const output: AssistantMessage = {
66
+ role: "assistant",
67
+ content: [],
68
+ api: "bedrock-converse-stream" as Api,
69
+ provider: model.provider,
70
+ model: model.id,
71
+ usage: {
72
+ input: 0,
73
+ output: 0,
74
+ cacheRead: 0,
75
+ cacheWrite: 0,
76
+ totalTokens: 0,
77
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
78
+ },
79
+ stopReason: "stop",
80
+ timestamp: Date.now(),
81
+ };
82
+
83
+ const blocks = output.content as Block[];
84
+
85
+ try {
86
+ const client = new BedrockRuntimeClient({
87
+ region: options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || "us-east-1",
88
+ profile: options.profile,
89
+ });
90
+
91
+ const command = new ConverseStreamCommand({
92
+ modelId: model.id,
93
+ messages: convertMessages(context, model),
94
+ system: buildSystemPrompt(context.systemPrompt, model),
95
+ inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
96
+ toolConfig: convertToolConfig(context.tools, options.toolChoice),
97
+ additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
98
+ });
99
+
100
+ const response = await client.send(command, { abortSignal: options.signal });
101
+
102
+ for await (const item of response.stream!) {
103
+ if (item.messageStart) {
104
+ if (item.messageStart.role !== ConversationRole.ASSISTANT) {
105
+ throw new Error("Unexpected assistant message start but got user message start instead");
106
+ }
107
+ stream.push({ type: "start", partial: output });
108
+ } else if (item.contentBlockStart) {
109
+ handleContentBlockStart(item.contentBlockStart, blocks, output, stream);
110
+ } else if (item.contentBlockDelta) {
111
+ handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);
112
+ } else if (item.contentBlockStop) {
113
+ handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
114
+ } else if (item.messageStop) {
115
+ output.stopReason = mapStopReason(item.messageStop.stopReason);
116
+ } else if (item.metadata) {
117
+ handleMetadata(item.metadata, model, output);
118
+ } else if (item.internalServerException) {
119
+ throw new Error(`Internal server error: ${item.internalServerException.message}`);
120
+ } else if (item.modelStreamErrorException) {
121
+ throw new Error(`Model stream error: ${item.modelStreamErrorException.message}`);
122
+ } else if (item.validationException) {
123
+ throw new Error(`Validation error: ${item.validationException.message}`);
124
+ } else if (item.throttlingException) {
125
+ throw new Error(`Throttling error: ${item.throttlingException.message}`);
126
+ } else if (item.serviceUnavailableException) {
127
+ throw new Error(`Service unavailable: ${item.serviceUnavailableException.message}`);
128
+ }
129
+ }
130
+
131
+ if (options.signal?.aborted) {
132
+ throw new Error("Request was aborted");
133
+ }
134
+
135
+ if (output.stopReason === "error" || output.stopReason === "aborted") {
136
+ throw new Error("An unknown error occurred");
137
+ }
138
+
139
+ stream.push({ type: "done", reason: output.stopReason, message: output });
140
+ stream.end();
141
+ } catch (error) {
142
+ for (const block of output.content) {
143
+ delete (block as Block).index;
144
+ delete (block as Block).partialJson;
145
+ }
146
+ output.stopReason = options.signal?.aborted ? "aborted" : "error";
147
+ output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
148
+ stream.push({ type: "error", reason: output.stopReason, error: output });
149
+ stream.end();
150
+ }
151
+ })();
152
+
153
+ return stream;
154
+ };
155
+
156
+ function handleContentBlockStart(
157
+ event: ContentBlockStartEvent,
158
+ blocks: Block[],
159
+ output: AssistantMessage,
160
+ stream: AssistantMessageEventStream,
161
+ ): void {
162
+ const index = event.contentBlockIndex!;
163
+ const start = event.start;
164
+
165
+ if (start?.toolUse) {
166
+ const block: Block = {
167
+ type: "toolCall",
168
+ id: start.toolUse.toolUseId || "",
169
+ name: start.toolUse.name || "",
170
+ arguments: {},
171
+ partialJson: "",
172
+ index,
173
+ };
174
+ output.content.push(block);
175
+ stream.push({ type: "toolcall_start", contentIndex: blocks.length - 1, partial: output });
176
+ }
177
+ }
178
+
179
+ function handleContentBlockDelta(
180
+ event: ContentBlockDeltaEvent,
181
+ blocks: Block[],
182
+ output: AssistantMessage,
183
+ stream: AssistantMessageEventStream,
184
+ ): void {
185
+ const contentBlockIndex = event.contentBlockIndex!;
186
+ const delta = event.delta;
187
+ let index = blocks.findIndex((b) => b.index === contentBlockIndex);
188
+ let block = blocks[index];
189
+
190
+ if (delta?.text !== undefined) {
191
+ // If no text block exists yet, create one, as `handleContentBlockStart` is not sent for text blocks
192
+ if (!block) {
193
+ const newBlock: Block = { type: "text", text: "", index: contentBlockIndex };
194
+ output.content.push(newBlock);
195
+ index = blocks.length - 1;
196
+ block = blocks[index];
197
+ stream.push({ type: "text_start", contentIndex: index, partial: output });
198
+ }
199
+ if (block.type === "text") {
200
+ block.text += delta.text;
201
+ stream.push({ type: "text_delta", contentIndex: index, delta: delta.text, partial: output });
202
+ }
203
+ } else if (delta?.toolUse && block?.type === "toolCall") {
204
+ block.partialJson = (block.partialJson || "") + (delta.toolUse.input || "");
205
+ block.arguments = parseStreamingJson(block.partialJson);
206
+ stream.push({ type: "toolcall_delta", contentIndex: index, delta: delta.toolUse.input || "", partial: output });
207
+ } else if (delta?.reasoningContent) {
208
+ let thinkingBlock = block;
209
+ let thinkingIndex = index;
210
+
211
+ if (!thinkingBlock) {
212
+ const newBlock: Block = { type: "thinking", thinking: "", thinkingSignature: "", index: contentBlockIndex };
213
+ output.content.push(newBlock);
214
+ thinkingIndex = blocks.length - 1;
215
+ thinkingBlock = blocks[thinkingIndex];
216
+ stream.push({ type: "thinking_start", contentIndex: thinkingIndex, partial: output });
217
+ }
218
+
219
+ if (thinkingBlock?.type === "thinking") {
220
+ if (delta.reasoningContent.text) {
221
+ thinkingBlock.thinking += delta.reasoningContent.text;
222
+ stream.push({
223
+ type: "thinking_delta",
224
+ contentIndex: thinkingIndex,
225
+ delta: delta.reasoningContent.text,
226
+ partial: output,
227
+ });
228
+ }
229
+ if (delta.reasoningContent.signature) {
230
+ thinkingBlock.thinkingSignature =
231
+ (thinkingBlock.thinkingSignature || "") + delta.reasoningContent.signature;
232
+ }
233
+ }
234
+ }
235
+ }
236
+
237
+ function handleMetadata(
238
+ event: ConverseStreamMetadataEvent,
239
+ model: Model<"bedrock-converse-stream">,
240
+ output: AssistantMessage,
241
+ ): void {
242
+ if (event.usage) {
243
+ output.usage.input = event.usage.inputTokens || 0;
244
+ output.usage.output = event.usage.outputTokens || 0;
245
+ output.usage.cacheRead = event.usage.cacheReadInputTokens || 0;
246
+ output.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;
247
+ output.usage.totalTokens = event.usage.totalTokens || output.usage.input + output.usage.output;
248
+ calculateCost(model, output.usage);
249
+ }
250
+ }
251
+
252
+ function handleContentBlockStop(
253
+ event: ContentBlockStopEvent,
254
+ blocks: Block[],
255
+ output: AssistantMessage,
256
+ stream: AssistantMessageEventStream,
257
+ ): void {
258
+ const index = blocks.findIndex((b) => b.index === event.contentBlockIndex);
259
+ const block = blocks[index];
260
+ if (!block) return;
261
+ delete (block as Block).index;
262
+
263
+ switch (block.type) {
264
+ case "text":
265
+ stream.push({ type: "text_end", contentIndex: index, content: block.text, partial: output });
266
+ break;
267
+ case "thinking":
268
+ stream.push({ type: "thinking_end", contentIndex: index, content: block.thinking, partial: output });
269
+ break;
270
+ case "toolCall":
271
+ block.arguments = parseStreamingJson(block.partialJson);
272
+ delete (block as Block).partialJson;
273
+ stream.push({ type: "toolcall_end", contentIndex: index, toolCall: block, partial: output });
274
+ break;
275
+ }
276
+ }
277
+
278
+ /**
279
+ * Check if the model supports prompt caching.
280
+ * Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
281
+ */
282
+ function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean {
283
+ const id = model.id.toLowerCase();
284
+ // Claude 4.x models (opus-4, sonnet-4, haiku-4)
285
+ if (id.includes("claude") && (id.includes("-4-") || id.includes("-4."))) return true;
286
+ // Claude 3.7 Sonnet
287
+ if (id.includes("claude-3-7-sonnet")) return true;
288
+ // Claude 3.5 Haiku
289
+ if (id.includes("claude-3-5-haiku")) return true;
290
+ return false;
291
+ }
292
+
293
+ function buildSystemPrompt(
294
+ systemPrompt: string | undefined,
295
+ model: Model<"bedrock-converse-stream">,
296
+ ): SystemContentBlock[] | undefined {
297
+ if (!systemPrompt) return undefined;
298
+
299
+ const blocks: SystemContentBlock[] = [{ text: sanitizeSurrogates(systemPrompt) }];
300
+
301
+ // Add cache point for supported Claude models
302
+ if (supportsPromptCaching(model)) {
303
+ blocks.push({ cachePoint: { type: CachePointType.DEFAULT } });
304
+ }
305
+
306
+ return blocks;
307
+ }
308
+
309
+ function convertMessages(context: Context, model: Model<"bedrock-converse-stream">): Message[] {
310
+ const result: Message[] = [];
311
+ const transformedMessages = transformMessages(context.messages, model);
312
+
313
+ for (let i = 0; i < transformedMessages.length; i++) {
314
+ const m = transformedMessages[i];
315
+
316
+ switch (m.role) {
317
+ case "user":
318
+ result.push({
319
+ role: ConversationRole.USER,
320
+ content:
321
+ typeof m.content === "string"
322
+ ? [{ text: sanitizeSurrogates(m.content) }]
323
+ : m.content.map((c) => {
324
+ switch (c.type) {
325
+ case "text":
326
+ return { text: sanitizeSurrogates(c.text) };
327
+ case "image":
328
+ return { image: createImageBlock(c.mimeType, c.data) };
329
+ default:
330
+ throw new Error("Unknown user content type");
331
+ }
332
+ }),
333
+ });
334
+ break;
335
+ case "assistant": {
336
+ // Skip assistant messages with empty content (e.g., from aborted requests)
337
+ // Bedrock rejects messages with empty content arrays
338
+ if (m.content.length === 0) {
339
+ continue;
340
+ }
341
+ const contentBlocks: ContentBlock[] = [];
342
+ for (const c of m.content) {
343
+ switch (c.type) {
344
+ case "text":
345
+ // Skip empty text blocks
346
+ if (c.text.trim().length === 0) continue;
347
+ contentBlocks.push({ text: sanitizeSurrogates(c.text) });
348
+ break;
349
+ case "toolCall":
350
+ contentBlocks.push({
351
+ toolUse: { toolUseId: c.id, name: c.name, input: c.arguments },
352
+ });
353
+ break;
354
+ case "thinking":
355
+ // Skip empty thinking blocks
356
+ if (c.thinking.trim().length === 0) continue;
357
+ contentBlocks.push({
358
+ reasoningContent: {
359
+ reasoningText: { text: sanitizeSurrogates(c.thinking), signature: c.thinkingSignature },
360
+ },
361
+ });
362
+ break;
363
+ default:
364
+ throw new Error("Unknown assistant content type");
365
+ }
366
+ }
367
+ // Skip if all content blocks were filtered out
368
+ if (contentBlocks.length === 0) {
369
+ continue;
370
+ }
371
+ result.push({
372
+ role: ConversationRole.ASSISTANT,
373
+ content: contentBlocks,
374
+ });
375
+ break;
376
+ }
377
+ case "toolResult": {
378
+ // Collect all consecutive toolResult messages into a single user message
379
+ // Bedrock requires all tool results to be in one message
380
+ const toolResults: ContentBlock.ToolResultMember[] = [];
381
+
382
+ // Add current tool result with all content blocks combined
383
+ toolResults.push({
384
+ toolResult: {
385
+ toolUseId: m.toolCallId,
386
+ content: m.content.map((c) =>
387
+ c.type === "image"
388
+ ? { image: createImageBlock(c.mimeType, c.data) }
389
+ : { text: sanitizeSurrogates(c.text) },
390
+ ),
391
+ status: m.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,
392
+ },
393
+ });
394
+
395
+ // Look ahead for consecutive toolResult messages
396
+ let j = i + 1;
397
+ while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
398
+ const nextMsg = transformedMessages[j] as ToolResultMessage;
399
+ toolResults.push({
400
+ toolResult: {
401
+ toolUseId: nextMsg.toolCallId,
402
+ content: nextMsg.content.map((c) =>
403
+ c.type === "image"
404
+ ? { image: createImageBlock(c.mimeType, c.data) }
405
+ : { text: sanitizeSurrogates(c.text) },
406
+ ),
407
+ status: nextMsg.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,
408
+ },
409
+ });
410
+ j++;
411
+ }
412
+
413
+ // Skip the messages we've already processed
414
+ i = j - 1;
415
+
416
+ result.push({
417
+ role: ConversationRole.USER,
418
+ content: toolResults,
419
+ });
420
+ break;
421
+ }
422
+ default:
423
+ throw new Error("Unknown message role");
424
+ }
425
+ }
426
+
427
+ // Add cache point to the last user message for supported Claude models
428
+ if (supportsPromptCaching(model) && result.length > 0) {
429
+ const lastMessage = result[result.length - 1];
430
+ if (lastMessage.role === ConversationRole.USER && lastMessage.content) {
431
+ (lastMessage.content as ContentBlock[]).push({ cachePoint: { type: CachePointType.DEFAULT } });
432
+ }
433
+ }
434
+
435
+ return result;
436
+ }
437
+
438
+ function convertToolConfig(
439
+ tools: Tool[] | undefined,
440
+ toolChoice: BedrockOptions["toolChoice"],
441
+ ): ToolConfiguration | undefined {
442
+ if (!tools?.length || toolChoice === "none") return undefined;
443
+
444
+ const bedrockTools: BedrockTool[] = tools.map((tool) => ({
445
+ toolSpec: {
446
+ name: tool.name,
447
+ description: tool.description,
448
+ inputSchema: { json: tool.parameters },
449
+ },
450
+ }));
451
+
452
+ let bedrockToolChoice: ToolChoice | undefined;
453
+ switch (toolChoice) {
454
+ case "auto":
455
+ bedrockToolChoice = { auto: {} };
456
+ break;
457
+ case "any":
458
+ bedrockToolChoice = { any: {} };
459
+ break;
460
+ default:
461
+ if (toolChoice?.type === "tool") {
462
+ bedrockToolChoice = { tool: { name: toolChoice.name } };
463
+ }
464
+ }
465
+
466
+ return { tools: bedrockTools, toolChoice: bedrockToolChoice };
467
+ }
468
+
469
+ function mapStopReason(reason: string | undefined): StopReason {
470
+ switch (reason) {
471
+ case BedrockStopReason.END_TURN:
472
+ case BedrockStopReason.STOP_SEQUENCE:
473
+ return "stop";
474
+ case BedrockStopReason.MAX_TOKENS:
475
+ case BedrockStopReason.MODEL_CONTEXT_WINDOW_EXCEEDED:
476
+ return "length";
477
+ case BedrockStopReason.TOOL_USE:
478
+ return "toolUse";
479
+ default:
480
+ return "error";
481
+ }
482
+ }
483
+
484
+ function buildAdditionalModelRequestFields(
485
+ model: Model<"bedrock-converse-stream">,
486
+ options: BedrockOptions,
487
+ ): Record<string, any> | undefined {
488
+ if (!options.reasoning || !model.reasoning) {
489
+ return undefined;
490
+ }
491
+
492
+ if (model.id.includes("anthropic.claude")) {
493
+ const defaultBudgets: Record<ThinkingLevel, number> = {
494
+ minimal: 1024,
495
+ low: 2048,
496
+ medium: 8192,
497
+ high: 16384,
498
+ xhigh: 16384, // Claude doesn't support xhigh, clamp to high
499
+ };
500
+
501
+ // Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
502
+ const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
503
+ const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
504
+
505
+ const result: Record<string, any> = {
506
+ thinking: {
507
+ type: "enabled",
508
+ budget_tokens: budget,
509
+ },
510
+ };
511
+
512
+ if (options.interleavedThinking) {
513
+ result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
514
+ }
515
+
516
+ return result;
517
+ }
518
+
519
+ return undefined;
520
+ }
521
+
522
+ function createImageBlock(mimeType: string, data: string) {
523
+ let format: ImageFormat;
524
+ switch (mimeType) {
525
+ case "image/jpeg":
526
+ case "image/jpg":
527
+ format = ImageFormat.JPEG;
528
+ break;
529
+ case "image/png":
530
+ format = ImageFormat.PNG;
531
+ break;
532
+ case "image/gif":
533
+ format = ImageFormat.GIF;
534
+ break;
535
+ case "image/webp":
536
+ format = ImageFormat.WEBP;
537
+ break;
538
+ default:
539
+ throw new Error(`Unknown image type: ${mimeType}`);
540
+ }
541
+
542
+ const binaryString = atob(data);
543
+ const bytes = new Uint8Array(binaryString.length);
544
+ for (let i = 0; i < binaryString.length; i++) {
545
+ bytes[i] = binaryString.charCodeAt(i);
546
+ }
547
+
548
+ return { source: { bytes }, format };
549
+ }
@@ -27,7 +27,7 @@ import { parseStreamingJson } from "../utils/json-parse";
27
27
  import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
28
28
  import { sanitizeSurrogates } from "../utils/sanitize-unicode";
29
29
 
30
- import { transformMessages } from "./transorm-messages";
30
+ import { transformMessages } from "./transform-messages";
31
31
 
32
32
  // Stealth mode: Mimic Claude Code headers while avoiding tool name collisions.
33
33
  export const claudeCodeVersion = "2.1.2";