@hebo-ai/gateway 0.9.4 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,661 @@
1
+ import { Output, jsonSchema, tool } from "ai";
2
+ import { parseBase64, parseImageInput, parsePromptCachingOptions, normalizeToolName, stripEmptyKeys, resolveResponseServiceTier, extractReasoningMetadata, parseJsonOrText, } from "../shared/converters";
3
+ // --- Request Flow ---
4
+ export function convertToTextCallOptions(inputs) {
5
+ const options = {
6
+ messages: convertToModelMessages(inputs.messages, inputs.system),
7
+ temperature: inputs.temperature,
8
+ maxOutputTokens: inputs.max_tokens,
9
+ topP: inputs.top_p,
10
+ stopSequences: inputs.stop_sequences,
11
+ providerOptions: {},
12
+ };
13
+ // Tools
14
+ const toolSet = convertToToolSet(inputs.tools);
15
+ if (toolSet)
16
+ options.tools = toolSet;
17
+ const toolChoice = convertToToolChoiceOptions(inputs.tool_choice);
18
+ if (toolChoice)
19
+ options.toolChoice = toolChoice;
20
+ // Build providerOptions.unknown in one pass — reasoning, cache control, metadata,
21
+ // and service tier all go into the same object for middleware consumption.
22
+ const unknown = {};
23
+ // Thinking/reasoning — convert to the shared `reasoning` config format so the
24
+ // model middleware (claudeReasoningMiddleware) and provider middleware
25
+ // (bedrockClaudeReasoningMiddleware) handle provider-specific conversion.
26
+ const reasoningResult = convertThinkingToReasoning(inputs.thinking, inputs.output_config);
27
+ if (reasoningResult) {
28
+ unknown["reasoning"] = reasoningResult.reasoning;
29
+ unknown["reasoning_effort"] = reasoningResult.reasoning_effort;
30
+ }
31
+ // Per-block cache control is handled in convertToModelMessages.
32
+ // Top-level automatic caching:
33
+ if (inputs.cache_control) {
34
+ Object.assign(unknown, parsePromptCachingOptions(undefined, undefined, inputs.cache_control));
35
+ }
36
+ // Metadata passthrough
37
+ if (inputs.metadata) {
38
+ unknown["metadata"] = inputs.metadata;
39
+ }
40
+ // Service tier — map Anthropic-native values to internal representation
41
+ if (inputs.service_tier) {
42
+ unknown["service_tier"] = toInternalServiceTier(inputs.service_tier);
43
+ }
44
+ if (Object.keys(unknown).length > 0) {
45
+ options.providerOptions["unknown"] = unknown;
46
+ }
47
+ // Structured output
48
+ if (inputs.output_config) {
49
+ options.output = convertToOutput(inputs.output_config);
50
+ }
51
+ return options;
52
+ }
53
+ function convertToOutput(config) {
54
+ if (!config.format || config.format.type !== "json_schema")
55
+ return undefined;
56
+ return Output.object({
57
+ schema: jsonSchema(config.format.schema),
58
+ });
59
+ }
60
+ export function convertThinkingToReasoning(thinking, outputConfig) {
61
+ // Map Anthropic "max" effort → internal "xhigh"
62
+ const effort = outputConfig?.effort === "max" ? "xhigh" : outputConfig?.effort;
63
+ if (!thinking) {
64
+ return effort ? { reasoning: { enabled: true, effort }, reasoning_effort: effort } : undefined;
65
+ }
66
+ if (thinking.type === "disabled") {
67
+ return { reasoning: { enabled: false } };
68
+ }
69
+ const summary = thinking.display === "summarized"
70
+ ? "auto"
71
+ : thinking.display === "omitted"
72
+ ? "none"
73
+ : undefined;
74
+ if (thinking.type === "enabled") {
75
+ const reasoning = {
76
+ enabled: true,
77
+ max_tokens: thinking.budget_tokens,
78
+ summary,
79
+ };
80
+ if (effort)
81
+ reasoning.effort = effort;
82
+ return { reasoning, reasoning_effort: effort };
83
+ }
84
+ // adaptive — no fixed token budget; effort defaults to "high" per API spec
85
+ const adaptiveEffort = effort ?? "high";
86
+ return {
87
+ reasoning: { enabled: true, effort: adaptiveEffort, summary },
88
+ reasoning_effort: adaptiveEffort,
89
+ };
90
+ }
91
+ // --- Message Conversion ---
92
+ export function convertToModelMessages(messages, system) {
93
+ const modelMessages = [];
94
+ // System prompt
95
+ if (system) {
96
+ if (typeof system === "string") {
97
+ modelMessages.push({ role: "system", content: system });
98
+ }
99
+ else {
100
+ const text = system.map((block) => block.text).join("");
101
+ const msg = { role: "system", content: text };
102
+ // Pass through cache_control from the last system block that has it
103
+ for (let i = system.length - 1; i >= 0; i--) {
104
+ if (system[i].cache_control) {
105
+ msg.providerOptions = { unknown: { cache_control: system[i].cache_control } };
106
+ break;
107
+ }
108
+ }
109
+ modelMessages.push(msg);
110
+ }
111
+ }
112
+ // Tool call id → name map built incrementally; assistant messages always
113
+ // precede their corresponding tool results in a valid conversation.
114
+ const toolNameMap = new Map();
115
+ for (const message of messages) {
116
+ if (message.role === "user") {
117
+ const userMessages = fromUserMessage(message, toolNameMap);
118
+ for (let i = 0; i < userMessages.length; i++) {
119
+ modelMessages.push(userMessages[i]);
120
+ }
121
+ }
122
+ else if (message.role === "assistant") {
123
+ if (Array.isArray(message.content)) {
124
+ for (const block of message.content) {
125
+ // oxlint-disable-next-line max-depth
126
+ if (block.type === "tool_use")
127
+ toolNameMap.set(block.id, block.name);
128
+ }
129
+ }
130
+ modelMessages.push(fromAssistantMessage(message));
131
+ }
132
+ }
133
+ return modelMessages;
134
+ }
135
+ function fromUserMessage(message, toolNameMap) {
136
+ const result = [];
137
+ if (typeof message.content === "string") {
138
+ result.push({ role: "user", content: message.content });
139
+ return result;
140
+ }
141
+ const userParts = [];
142
+ const toolResultParts = [];
143
+ for (const block of message.content) {
144
+ if (block.type === "tool_result") {
145
+ toolResultParts.push(fromToolResultBlock(block, toolNameMap));
146
+ }
147
+ else {
148
+ const part = fromUserContentBlock(block);
149
+ if (part)
150
+ userParts.push(part);
151
+ }
152
+ }
153
+ if (userParts.length > 0) {
154
+ result.push({ role: "user", content: userParts });
155
+ }
156
+ if (toolResultParts.length > 0) {
157
+ result.push({ role: "tool", content: toolResultParts });
158
+ }
159
+ // If only tool results and no user parts, still valid
160
+ if (userParts.length === 0 && toolResultParts.length === 0) {
161
+ result.push({ role: "user", content: "" });
162
+ }
163
+ return result;
164
+ }
165
+ function fromUserContentBlock(block) {
166
+ // tool_result blocks are handled separately in fromUserMessage
167
+ // oxlint-disable-next-line switch-exhaustiveness-check
168
+ switch (block.type) {
169
+ case "text": {
170
+ const part = { type: "text", text: block.text };
171
+ if (block.cache_control) {
172
+ part.providerOptions = { unknown: { cache_control: block.cache_control } };
173
+ }
174
+ return part;
175
+ }
176
+ case "image": {
177
+ if (block.source.type === "base64") {
178
+ const part = {
179
+ type: "image",
180
+ image: parseBase64(block.source.data),
181
+ mediaType: block.source.media_type,
182
+ };
183
+ if (block.cache_control) {
184
+ part.providerOptions = { unknown: { cache_control: block.cache_control } };
185
+ }
186
+ return part;
187
+ }
188
+ // URL source
189
+ const { image, mediaType } = parseImageInput(block.source.url);
190
+ const part = { type: "image", image, mediaType };
191
+ if (block.cache_control) {
192
+ part.providerOptions = { unknown: { cache_control: block.cache_control } };
193
+ }
194
+ return part;
195
+ }
196
+ case "document": {
197
+ if (block.source.type === "base64") {
198
+ const filePart = {
199
+ type: "file",
200
+ data: parseBase64(block.source.data),
201
+ mediaType: block.source.media_type,
202
+ };
203
+ if (block.cache_control) {
204
+ filePart.providerOptions = { unknown: { cache_control: block.cache_control } };
205
+ }
206
+ return filePart;
207
+ }
208
+ if (block.source.type === "url") {
209
+ const filePart = {
210
+ type: "file",
211
+ data: new URL(block.source.url),
212
+ mediaType: "application/octet-stream",
213
+ };
214
+ if (block.cache_control) {
215
+ filePart.providerOptions = { unknown: { cache_control: block.cache_control } };
216
+ }
217
+ return filePart;
218
+ }
219
+ // text source
220
+ const textPart = { type: "text", text: block.source.data };
221
+ if (block.cache_control) {
222
+ textPart.providerOptions = { unknown: { cache_control: block.cache_control } };
223
+ }
224
+ return textPart;
225
+ }
226
+ default:
227
+ return undefined;
228
+ }
229
+ }
230
+ function fromToolResultBlock(block, toolNameMap) {
231
+ let output;
232
+ if (block.content === undefined) {
233
+ output = { type: "text", value: "" };
234
+ }
235
+ else if (typeof block.content === "string") {
236
+ output = parseJsonOrText(block.content);
237
+ }
238
+ else {
239
+ const parts = [];
240
+ for (const part of block.content) {
241
+ if (part.type === "text") {
242
+ parts.push({ type: "text", text: part.text });
243
+ }
244
+ else if (part.type === "image") {
245
+ if (part.source.type === "base64") {
246
+ parts.push({
247
+ type: "image-data",
248
+ data: part.source.data,
249
+ mediaType: part.source.media_type,
250
+ });
251
+ }
252
+ else {
253
+ parts.push({ type: "image-url", url: part.source.url });
254
+ }
255
+ }
256
+ }
257
+ output = { type: "content", value: parts };
258
+ }
259
+ const result = {
260
+ type: "tool-result",
261
+ toolCallId: block.tool_use_id,
262
+ toolName: toolNameMap.get(block.tool_use_id) ?? "",
263
+ output,
264
+ };
265
+ if (block.cache_control) {
266
+ result.providerOptions = { unknown: { cache_control: block.cache_control } };
267
+ }
268
+ return result;
269
+ }
270
+ function fromAssistantMessage(message) {
271
+ if (typeof message.content === "string") {
272
+ return { role: "assistant", content: message.content };
273
+ }
274
+ const parts = [];
275
+ for (const block of message.content) {
276
+ switch (block.type) {
277
+ case "text":
278
+ parts.push({ type: "text", text: block.text });
279
+ break;
280
+ case "tool_use": {
281
+ const toolCallPart = {
282
+ type: "tool-call",
283
+ toolCallId: block.id,
284
+ toolName: block.name,
285
+ input: block.input,
286
+ };
287
+ if (block.extra_content)
288
+ toolCallPart.providerOptions = block.extra_content;
289
+ parts.push(toolCallPart);
290
+ break;
291
+ }
292
+ case "thinking":
293
+ parts.push({
294
+ type: "reasoning",
295
+ text: block.thinking,
296
+ providerOptions: {
297
+ unknown: { signature: block.signature },
298
+ },
299
+ });
300
+ break;
301
+ case "redacted_thinking":
302
+ parts.push({
303
+ type: "reasoning",
304
+ text: "",
305
+ providerOptions: {
306
+ unknown: { redactedData: block.data },
307
+ },
308
+ });
309
+ break;
310
+ }
311
+ }
312
+ return {
313
+ role: "assistant",
314
+ content: parts.length > 0 ? parts : "",
315
+ };
316
+ }
317
+ // --- Tool Conversion ---
318
+ export function convertToToolSet(tools) {
319
+ if (!tools || tools.length === 0)
320
+ return undefined;
321
+ const toolSet = {};
322
+ for (const t of tools) {
323
+ toolSet[t.name] = tool({
324
+ description: t.description,
325
+ inputSchema: jsonSchema(t.input_schema),
326
+ strict: t.strict,
327
+ });
328
+ }
329
+ return toolSet;
330
+ }
331
+ export function convertToToolChoiceOptions(toolChoice) {
332
+ if (!toolChoice)
333
+ return undefined;
334
+ switch (toolChoice.type) {
335
+ case "auto":
336
+ return "auto";
337
+ case "any":
338
+ return "required";
339
+ case "none":
340
+ return "none";
341
+ case "tool":
342
+ return { type: "tool", toolName: toolChoice.name };
343
+ default:
344
+ return undefined;
345
+ }
346
+ }
347
+ // --- Response Flow ---
348
+ export function toMessages(result, modelId) {
349
+ const content = [];
350
+ // Thinking blocks
351
+ for (const part of result.content) {
352
+ if (part.type === "reasoning") {
353
+ content.push(toThinkingBlock(part));
354
+ }
355
+ }
356
+ // Text blocks
357
+ for (const part of result.content) {
358
+ if (part.type === "text" && part.text) {
359
+ content.push({ type: "text", text: part.text });
360
+ }
361
+ }
362
+ // Tool use blocks
363
+ const toolCalls = result.toolCalls;
364
+ for (let i = 0; i < toolCalls.length; i++) {
365
+ const tc = toolCalls[i];
366
+ const toolUseBlock = {
367
+ type: "tool_use",
368
+ id: tc.toolCallId,
369
+ name: normalizeToolName(tc.toolName),
370
+ input: stripEmptyKeys(tc.input) ?? {},
371
+ };
372
+ if (tc.providerMetadata)
373
+ toolUseBlock.extra_content = tc.providerMetadata;
374
+ content.push(toolUseBlock);
375
+ }
376
+ return {
377
+ id: `msg_${crypto.randomUUID()}`,
378
+ type: "message",
379
+ role: "assistant",
380
+ content,
381
+ model: modelId,
382
+ stop_reason: mapStopReason(result.finishReason),
383
+ stop_sequence: null,
384
+ usage: mapUsage(result.totalUsage),
385
+ service_tier: toMessagesServiceTier(resolveResponseServiceTier(result.providerMetadata)),
386
+ };
387
+ }
388
+ function toThinkingBlock(reasoning) {
389
+ const { redactedData, signature } = extractReasoningMetadata(reasoning.providerMetadata);
390
+ if (redactedData) {
391
+ return { type: "redacted_thinking", data: redactedData };
392
+ }
393
+ return {
394
+ type: "thinking",
395
+ thinking: reasoning.text,
396
+ signature: signature ?? "",
397
+ };
398
+ }
399
+ export function mapStopReason(reason) {
400
+ switch (reason) {
401
+ case "stop":
402
+ return "end_turn";
403
+ case "tool-calls":
404
+ return "tool_use";
405
+ case "length":
406
+ return "max_tokens";
407
+ case "content-filter":
408
+ return "end_turn";
409
+ case "error":
410
+ case "other":
411
+ return null;
412
+ default:
413
+ return null;
414
+ }
415
+ }
416
+ export function mapUsage(usage) {
417
+ const result = {
418
+ input_tokens: usage?.inputTokens ?? 0,
419
+ output_tokens: usage?.outputTokens ?? 0,
420
+ };
421
+ if (usage?.inputTokenDetails?.cacheWriteTokens !== undefined) {
422
+ result.cache_creation_input_tokens = usage.inputTokenDetails.cacheWriteTokens;
423
+ }
424
+ if (usage?.inputTokenDetails?.cacheReadTokens !== undefined) {
425
+ result.cache_read_input_tokens = usage.inputTokenDetails.cacheReadTokens;
426
+ }
427
+ return result;
428
+ }
429
+ // --- Service Tier Mapping ---
430
+ function toInternalServiceTier(tier) {
431
+ if (tier === "standard_only")
432
+ return "default";
433
+ return tier; // "auto" maps directly
434
+ }
435
+ function toMessagesServiceTier(tier) {
436
+ if (!tier)
437
+ return undefined;
438
+ if (tier === "default")
439
+ return "standard_only";
440
+ if (tier === "auto")
441
+ return "auto";
442
+ return undefined; // flex, scale, priority don't have Anthropic equivalents
443
+ }
444
+ // --- Streaming ---
445
+ export function toMessagesStream(result, modelId) {
446
+ return result.fullStream.pipeThrough(new MessagesTransformStream(modelId));
447
+ }
448
+ export class MessagesTransformStream extends TransformStream {
449
+ constructor(modelId) {
450
+ let blockIndex = 0;
451
+ let currentToolCallId;
452
+ super({
453
+ start(controller) {
454
+ const emptyMessage = {
455
+ id: `msg_${crypto.randomUUID()}`,
456
+ type: "message",
457
+ role: "assistant",
458
+ content: [],
459
+ model: modelId,
460
+ stop_reason: null,
461
+ stop_sequence: null,
462
+ usage: { input_tokens: 0, output_tokens: 0 },
463
+ };
464
+ controller.enqueue({
465
+ event: "message_start",
466
+ data: { type: "message_start", message: emptyMessage },
467
+ });
468
+ },
469
+ transform(part, controller) {
470
+ // Not all TextStreamPart types are relevant for Messages SSE format
471
+ // oxlint-disable-next-line switch-exhaustiveness-check
472
+ switch (part.type) {
473
+ case "reasoning-start": {
474
+ controller.enqueue({
475
+ event: "content_block_start",
476
+ data: {
477
+ type: "content_block_start",
478
+ index: blockIndex,
479
+ content_block: { type: "thinking", thinking: "" },
480
+ },
481
+ });
482
+ break;
483
+ }
484
+ case "reasoning-delta": {
485
+ controller.enqueue({
486
+ event: "content_block_delta",
487
+ data: {
488
+ type: "content_block_delta",
489
+ index: blockIndex,
490
+ delta: { type: "thinking_delta", thinking: part.text },
491
+ },
492
+ });
493
+ break;
494
+ }
495
+ case "reasoning-end": {
496
+ // Emit signature delta if available from provider metadata
497
+ const { signature } = extractReasoningMetadata(part.providerMetadata);
498
+ if (signature) {
499
+ controller.enqueue({
500
+ event: "content_block_delta",
501
+ data: {
502
+ type: "content_block_delta",
503
+ index: blockIndex,
504
+ delta: { type: "signature_delta", signature },
505
+ },
506
+ });
507
+ }
508
+ controller.enqueue({
509
+ event: "content_block_stop",
510
+ data: { type: "content_block_stop", index: blockIndex },
511
+ });
512
+ blockIndex++;
513
+ break;
514
+ }
515
+ case "text-start": {
516
+ controller.enqueue({
517
+ event: "content_block_start",
518
+ data: {
519
+ type: "content_block_start",
520
+ index: blockIndex,
521
+ content_block: { type: "text", text: "" },
522
+ },
523
+ });
524
+ break;
525
+ }
526
+ case "text-delta": {
527
+ controller.enqueue({
528
+ event: "content_block_delta",
529
+ data: {
530
+ type: "content_block_delta",
531
+ index: blockIndex,
532
+ delta: { type: "text_delta", text: part.text },
533
+ },
534
+ });
535
+ break;
536
+ }
537
+ case "text-end": {
538
+ controller.enqueue({
539
+ event: "content_block_stop",
540
+ data: { type: "content_block_stop", index: blockIndex },
541
+ });
542
+ blockIndex++;
543
+ break;
544
+ }
545
+ case "tool-input-start": {
546
+ currentToolCallId = part.id;
547
+ controller.enqueue({
548
+ event: "content_block_start",
549
+ data: {
550
+ type: "content_block_start",
551
+ index: blockIndex,
552
+ content_block: {
553
+ type: "tool_use",
554
+ id: part.id,
555
+ name: normalizeToolName(part.toolName),
556
+ input: {},
557
+ },
558
+ },
559
+ });
560
+ break;
561
+ }
562
+ case "tool-input-delta": {
563
+ controller.enqueue({
564
+ event: "content_block_delta",
565
+ data: {
566
+ type: "content_block_delta",
567
+ index: blockIndex,
568
+ delta: { type: "input_json_delta", partial_json: part.delta },
569
+ },
570
+ });
571
+ break;
572
+ }
573
+ case "tool-call": {
574
+ // If we had streaming tool input, close the block
575
+ if (currentToolCallId === part.toolCallId) {
576
+ controller.enqueue({
577
+ event: "content_block_stop",
578
+ data: { type: "content_block_stop", index: blockIndex },
579
+ });
580
+ blockIndex++;
581
+ currentToolCallId = undefined;
582
+ }
583
+ else {
584
+ // Non-streaming tool call: emit start + stop
585
+ const contentBlock = {
586
+ type: "tool_use",
587
+ id: part.toolCallId,
588
+ name: normalizeToolName(part.toolName),
589
+ input: {},
590
+ };
591
+ if (part.providerMetadata)
592
+ contentBlock.extra_content = part.providerMetadata;
593
+ controller.enqueue({
594
+ event: "content_block_start",
595
+ data: {
596
+ type: "content_block_start",
597
+ index: blockIndex,
598
+ content_block: contentBlock,
599
+ },
600
+ });
601
+ const inputStr = typeof part.input === "string"
602
+ ? part.input
603
+ : JSON.stringify(stripEmptyKeys(part.input));
604
+ if (inputStr) {
605
+ controller.enqueue({
606
+ event: "content_block_delta",
607
+ data: {
608
+ type: "content_block_delta",
609
+ index: blockIndex,
610
+ delta: { type: "input_json_delta", partial_json: inputStr },
611
+ },
612
+ });
613
+ }
614
+ controller.enqueue({
615
+ event: "content_block_stop",
616
+ data: { type: "content_block_stop", index: blockIndex },
617
+ });
618
+ blockIndex++;
619
+ }
620
+ break;
621
+ }
622
+ case "finish-step": {
623
+ // No-op for messages; metadata is not surfaced in Anthropic stream format
624
+ break;
625
+ }
626
+ case "finish": {
627
+ const stopReason = mapStopReason(part.finishReason);
628
+ const totalOutputTokens = part.totalUsage?.outputTokens ?? 0;
629
+ const totalInputTokens = part.totalUsage?.inputTokens ?? 0;
630
+ controller.enqueue({
631
+ event: "message_delta",
632
+ data: {
633
+ type: "message_delta",
634
+ delta: { stop_reason: stopReason, stop_sequence: null },
635
+ usage: { output_tokens: totalOutputTokens, input_tokens: totalInputTokens },
636
+ },
637
+ });
638
+ controller.enqueue({
639
+ event: "message_stop",
640
+ data: { type: "message_stop" },
641
+ });
642
+ break;
643
+ }
644
+ case "error": {
645
+ const message = part.error instanceof Error ? part.error.message : String(part.error);
646
+ controller.enqueue({
647
+ event: "error",
648
+ data: {
649
+ type: "error",
650
+ error: { type: "api_error", message },
651
+ },
652
+ });
653
+ break;
654
+ }
655
+ default:
656
+ break;
657
+ }
658
+ },
659
+ });
660
+ }
661
+ }
@@ -0,0 +1,2 @@
1
+ import type { GatewayConfig, Endpoint } from "../../types";
2
+ export declare const messages: (config: GatewayConfig) => Endpoint;