@hebo-ai/gateway 0.6.2-rc0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/endpoints/chat-completions/converters.js +26 -21
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/otel.js +1 -1
- package/dist/endpoints/chat-completions/schema.d.ts +4 -18
- package/dist/endpoints/chat-completions/schema.js +14 -17
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/embeddings/otel.js +5 -0
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/converters.js +3 -3
- package/dist/lifecycle.js +2 -2
- package/dist/logger/default.js +3 -3
- package/dist/logger/index.d.ts +2 -5
- package/dist/middleware/common.js +1 -0
- package/dist/middleware/utils.js +0 -3
- package/dist/models/amazon/middleware.js +8 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/catalog.js +5 -1
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/openai/middleware.js +13 -9
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.js +21 -23
- package/dist/providers/registry.js +3 -0
- package/dist/telemetry/fetch.js +7 -2
- package/dist/telemetry/gen-ai.js +15 -12
- package/dist/telemetry/memory.d.ts +1 -1
- package/dist/telemetry/memory.js +30 -14
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.js +30 -23
- package/dist/utils/env.js +4 -2
- package/dist/utils/preset.js +1 -0
- package/dist/utils/response.js +3 -1
- package/package.json +36 -50
- package/src/config.ts +0 -98
- package/src/endpoints/chat-completions/converters.test.ts +0 -631
- package/src/endpoints/chat-completions/converters.ts +0 -899
- package/src/endpoints/chat-completions/handler.test.ts +0 -391
- package/src/endpoints/chat-completions/handler.ts +0 -201
- package/src/endpoints/chat-completions/index.ts +0 -4
- package/src/endpoints/chat-completions/otel.test.ts +0 -315
- package/src/endpoints/chat-completions/otel.ts +0 -214
- package/src/endpoints/chat-completions/schema.ts +0 -364
- package/src/endpoints/embeddings/converters.ts +0 -51
- package/src/endpoints/embeddings/handler.test.ts +0 -133
- package/src/endpoints/embeddings/handler.ts +0 -137
- package/src/endpoints/embeddings/index.ts +0 -4
- package/src/endpoints/embeddings/otel.ts +0 -40
- package/src/endpoints/embeddings/schema.ts +0 -36
- package/src/endpoints/models/converters.ts +0 -56
- package/src/endpoints/models/handler.test.ts +0 -122
- package/src/endpoints/models/handler.ts +0 -37
- package/src/endpoints/models/index.ts +0 -3
- package/src/endpoints/models/schema.ts +0 -37
- package/src/errors/ai-sdk.ts +0 -99
- package/src/errors/gateway.ts +0 -17
- package/src/errors/openai.ts +0 -57
- package/src/errors/utils.ts +0 -47
- package/src/gateway.ts +0 -50
- package/src/index.ts +0 -19
- package/src/lifecycle.ts +0 -135
- package/src/logger/default.ts +0 -105
- package/src/logger/index.ts +0 -42
- package/src/middleware/common.test.ts +0 -215
- package/src/middleware/common.ts +0 -163
- package/src/middleware/debug.ts +0 -37
- package/src/middleware/matcher.ts +0 -161
- package/src/middleware/utils.ts +0 -34
- package/src/models/amazon/index.ts +0 -2
- package/src/models/amazon/middleware.test.ts +0 -133
- package/src/models/amazon/middleware.ts +0 -79
- package/src/models/amazon/presets.ts +0 -104
- package/src/models/anthropic/index.ts +0 -2
- package/src/models/anthropic/middleware.test.ts +0 -643
- package/src/models/anthropic/middleware.ts +0 -148
- package/src/models/anthropic/presets.ts +0 -191
- package/src/models/catalog.ts +0 -13
- package/src/models/cohere/index.ts +0 -2
- package/src/models/cohere/middleware.test.ts +0 -138
- package/src/models/cohere/middleware.ts +0 -76
- package/src/models/cohere/presets.ts +0 -186
- package/src/models/google/index.ts +0 -2
- package/src/models/google/middleware.test.ts +0 -298
- package/src/models/google/middleware.ts +0 -137
- package/src/models/google/presets.ts +0 -118
- package/src/models/meta/index.ts +0 -1
- package/src/models/meta/presets.ts +0 -143
- package/src/models/openai/index.ts +0 -2
- package/src/models/openai/middleware.test.ts +0 -189
- package/src/models/openai/middleware.ts +0 -103
- package/src/models/openai/presets.ts +0 -280
- package/src/models/types.ts +0 -114
- package/src/models/voyage/index.ts +0 -2
- package/src/models/voyage/middleware.test.ts +0 -28
- package/src/models/voyage/middleware.ts +0 -23
- package/src/models/voyage/presets.ts +0 -126
- package/src/providers/anthropic/canonical.ts +0 -17
- package/src/providers/anthropic/index.ts +0 -1
- package/src/providers/bedrock/canonical.ts +0 -87
- package/src/providers/bedrock/index.ts +0 -2
- package/src/providers/bedrock/middleware.test.ts +0 -303
- package/src/providers/bedrock/middleware.ts +0 -128
- package/src/providers/cohere/canonical.ts +0 -26
- package/src/providers/cohere/index.ts +0 -1
- package/src/providers/groq/canonical.ts +0 -21
- package/src/providers/groq/index.ts +0 -1
- package/src/providers/openai/canonical.ts +0 -16
- package/src/providers/openai/index.ts +0 -1
- package/src/providers/registry.test.ts +0 -44
- package/src/providers/registry.ts +0 -165
- package/src/providers/types.ts +0 -20
- package/src/providers/vertex/canonical.ts +0 -17
- package/src/providers/vertex/index.ts +0 -1
- package/src/providers/voyage/canonical.ts +0 -16
- package/src/providers/voyage/index.ts +0 -1
- package/src/telemetry/ai-sdk.ts +0 -46
- package/src/telemetry/baggage.ts +0 -27
- package/src/telemetry/fetch.ts +0 -62
- package/src/telemetry/gen-ai.ts +0 -113
- package/src/telemetry/http.ts +0 -62
- package/src/telemetry/index.ts +0 -1
- package/src/telemetry/memory.ts +0 -36
- package/src/telemetry/span.ts +0 -85
- package/src/telemetry/stream.ts +0 -64
- package/src/types.ts +0 -223
- package/src/utils/env.ts +0 -7
- package/src/utils/headers.ts +0 -27
- package/src/utils/preset.ts +0 -65
- package/src/utils/request.test.ts +0 -75
- package/src/utils/request.ts +0 -52
- package/src/utils/response.ts +0 -84
- package/src/utils/url.ts +0 -26
package/README.md
CHANGED
|
@@ -536,14 +536,14 @@ Normalization rules:
|
|
|
536
536
|
|
|
537
537
|
- `enabled` -> fall-back to model default if none provided
|
|
538
538
|
- `max_tokens`: fall-back to model default if model supports
|
|
539
|
-
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh
|
|
539
|
+
- `effort` supports: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`
|
|
540
540
|
- Generic `effort` -> budget = percentage of `max_tokens`
|
|
541
541
|
- `none`: 0%
|
|
542
542
|
- `minimal`: 10%
|
|
543
543
|
- `low`: 20%
|
|
544
544
|
- `medium`: 50% (default)
|
|
545
545
|
- `high`: 80%
|
|
546
|
-
- `xhigh
|
|
546
|
+
- `xhigh`: 95%
|
|
547
547
|
|
|
548
548
|
Reasoning output is surfaced as extension to the `completion` object.
|
|
549
549
|
|
|
@@ -665,7 +665,7 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
|
|
|
665
665
|
|
|
666
666
|
> [!TIP]
|
|
667
667
|
> To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
|
|
668
|
-
> For `/chat/completions`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
|
|
668
|
+
> For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
|
|
669
669
|
|
|
670
670
|
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
671
671
|
|
|
@@ -8,7 +8,12 @@ import { parseDataUrl } from "../../utils/url";
|
|
|
8
8
|
export function convertToTextCallOptions(params) {
|
|
9
9
|
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
10
10
|
Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
|
|
11
|
-
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention,
|
|
11
|
+
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control));
|
|
12
|
+
if (extra_body) {
|
|
13
|
+
for (const v of Object.values(extra_body)) {
|
|
14
|
+
Object.assign(rest, v);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
12
17
|
const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
|
|
13
18
|
return {
|
|
14
19
|
messages: convertToModelMessages(messages),
|
|
@@ -192,7 +197,7 @@ export function fromChatCompletionsContent(content) {
|
|
|
192
197
|
return fromFilePart(part.file.data, part.file.media_type, part.file.filename, part.cache_control);
|
|
193
198
|
case "input_audio":
|
|
194
199
|
return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`, undefined, part.cache_control);
|
|
195
|
-
|
|
200
|
+
case "text": {
|
|
196
201
|
const out = {
|
|
197
202
|
type: "text",
|
|
198
203
|
text: part.text,
|
|
@@ -204,6 +209,8 @@ export function fromChatCompletionsContent(content) {
|
|
|
204
209
|
}
|
|
205
210
|
return out;
|
|
206
211
|
}
|
|
212
|
+
default:
|
|
213
|
+
throw new Error(`Unhandled content part type: ${part.type}`);
|
|
207
214
|
}
|
|
208
215
|
});
|
|
209
216
|
}
|
|
@@ -305,6 +312,7 @@ function parseToolResult(content) {
|
|
|
305
312
|
}
|
|
306
313
|
function parseJsonOrText(content) {
|
|
307
314
|
try {
|
|
315
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
308
316
|
return { type: "json", value: JSON.parse(content) };
|
|
309
317
|
}
|
|
310
318
|
catch {
|
|
@@ -334,29 +342,25 @@ function parseReasoningOptions(reasoning_effort, reasoning) {
|
|
|
334
342
|
}
|
|
335
343
|
return out;
|
|
336
344
|
}
|
|
337
|
-
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention,
|
|
345
|
+
function parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cache_control) {
|
|
338
346
|
const out = {};
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
if (!syncedCacheControl && syncedCacheRetention) {
|
|
347
|
-
syncedCacheControl = {
|
|
347
|
+
let retention = prompt_cache_retention;
|
|
348
|
+
if (!retention && cache_control?.ttl) {
|
|
349
|
+
retention = cache_control.ttl === "24h" ? "24h" : "in_memory";
|
|
350
|
+
}
|
|
351
|
+
let control = cache_control;
|
|
352
|
+
if (!control && retention) {
|
|
353
|
+
control = {
|
|
348
354
|
type: "ephemeral",
|
|
349
|
-
ttl:
|
|
355
|
+
ttl: retention === "24h" ? "24h" : "5m",
|
|
350
356
|
};
|
|
351
357
|
}
|
|
352
|
-
if (
|
|
353
|
-
out["prompt_cache_key"] =
|
|
354
|
-
if (
|
|
355
|
-
out["prompt_cache_retention"] =
|
|
356
|
-
if (
|
|
357
|
-
out["
|
|
358
|
-
if (syncedCacheControl)
|
|
359
|
-
out["cache_control"] = syncedCacheControl;
|
|
358
|
+
if (prompt_cache_key)
|
|
359
|
+
out["prompt_cache_key"] = prompt_cache_key;
|
|
360
|
+
if (retention)
|
|
361
|
+
out["prompt_cache_retention"] = retention;
|
|
362
|
+
if (control)
|
|
363
|
+
out["cache_control"] = control;
|
|
360
364
|
return out;
|
|
361
365
|
}
|
|
362
366
|
// --- Response Flow ---
|
|
@@ -414,6 +418,7 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
414
418
|
};
|
|
415
419
|
super({
|
|
416
420
|
transform(part, controller) {
|
|
421
|
+
// oxlint-disable-next-line switch-exhaustiveness-check
|
|
417
422
|
switch (part.type) {
|
|
418
423
|
case "text-delta": {
|
|
419
424
|
controller.enqueue(createChunk({ role: "assistant", content: part.text }, part.providerMetadata));
|
|
@@ -23,6 +23,7 @@ export const chatCompletions = (config) => {
|
|
|
23
23
|
}
|
|
24
24
|
// Parse + validate input.
|
|
25
25
|
try {
|
|
26
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
26
27
|
ctx.body = await ctx.request.json();
|
|
27
28
|
}
|
|
28
29
|
catch {
|
|
@@ -66,6 +67,7 @@ export const chatCompletions = (config) => {
|
|
|
66
67
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
68
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
69
|
// Convert inputs to AI SDK call options.
|
|
70
|
+
// oxlint-disable-next-line no-unsafe-argument
|
|
69
71
|
const textOptions = convertToTextCallOptions(inputs);
|
|
70
72
|
logger.trace({
|
|
71
73
|
requestId: ctx.requestId,
|
|
@@ -96,7 +96,7 @@ const toMessageParts = (message) => {
|
|
|
96
96
|
case "system":
|
|
97
97
|
return toTextParts(message.content);
|
|
98
98
|
default:
|
|
99
|
-
|
|
99
|
+
throw new Error(`Unhandled content part type: ${message.role}`);
|
|
100
100
|
}
|
|
101
101
|
};
|
|
102
102
|
export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
@@ -427,7 +427,6 @@ export declare const ChatCompletionsReasoningEffortSchema: z.ZodEnum<{
|
|
|
427
427
|
minimal: "minimal";
|
|
428
428
|
medium: "medium";
|
|
429
429
|
xhigh: "xhigh";
|
|
430
|
-
max: "max";
|
|
431
430
|
}>;
|
|
432
431
|
export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
|
|
433
432
|
export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
|
|
@@ -439,7 +438,6 @@ export declare const ChatCompletionsReasoningConfigSchema: z.ZodObject<{
|
|
|
439
438
|
minimal: "minimal";
|
|
440
439
|
medium: "medium";
|
|
441
440
|
xhigh: "xhigh";
|
|
442
|
-
max: "max";
|
|
443
441
|
}>>;
|
|
444
442
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
445
443
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -651,18 +649,12 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
651
649
|
minimal: "minimal";
|
|
652
650
|
medium: "medium";
|
|
653
651
|
xhigh: "xhigh";
|
|
654
|
-
max: "max";
|
|
655
652
|
}>>;
|
|
656
653
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
657
654
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
658
655
|
in_memory: "in_memory";
|
|
659
656
|
"24h": "24h";
|
|
660
657
|
}>>;
|
|
661
|
-
extra_body: z.ZodOptional<z.ZodObject<{
|
|
662
|
-
google: z.ZodOptional<z.ZodObject<{
|
|
663
|
-
cached_content: z.ZodOptional<z.ZodString>;
|
|
664
|
-
}, z.core.$strip>>;
|
|
665
|
-
}, z.core.$strip>>;
|
|
666
658
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
667
659
|
type: z.ZodLiteral<"ephemeral">;
|
|
668
660
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -676,11 +668,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
676
668
|
minimal: "minimal";
|
|
677
669
|
medium: "medium";
|
|
678
670
|
xhigh: "xhigh";
|
|
679
|
-
max: "max";
|
|
680
671
|
}>>;
|
|
681
672
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
682
673
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
683
674
|
}, z.core.$strip>>;
|
|
675
|
+
extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
684
676
|
}, z.core.$strip>;
|
|
685
677
|
export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
|
|
686
678
|
export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
@@ -863,18 +855,12 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
863
855
|
minimal: "minimal";
|
|
864
856
|
medium: "medium";
|
|
865
857
|
xhigh: "xhigh";
|
|
866
|
-
max: "max";
|
|
867
858
|
}>>;
|
|
868
859
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
869
860
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
870
861
|
in_memory: "in_memory";
|
|
871
862
|
"24h": "24h";
|
|
872
863
|
}>>;
|
|
873
|
-
extra_body: z.ZodOptional<z.ZodObject<{
|
|
874
|
-
google: z.ZodOptional<z.ZodObject<{
|
|
875
|
-
cached_content: z.ZodOptional<z.ZodString>;
|
|
876
|
-
}, z.core.$strip>>;
|
|
877
|
-
}, z.core.$strip>>;
|
|
878
864
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
879
865
|
type: z.ZodLiteral<"ephemeral">;
|
|
880
866
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -888,11 +874,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
888
874
|
minimal: "minimal";
|
|
889
875
|
medium: "medium";
|
|
890
876
|
xhigh: "xhigh";
|
|
891
|
-
max: "max";
|
|
892
877
|
}>>;
|
|
893
878
|
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
894
879
|
exclude: z.ZodOptional<z.ZodBoolean>;
|
|
895
880
|
}, z.core.$strip>>;
|
|
881
|
+
extra_body: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
896
882
|
model: z.ZodString;
|
|
897
883
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
898
884
|
}, z.core.$loose>;
|
|
@@ -1029,7 +1015,7 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
1029
1015
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1030
1016
|
}, z.core.$strip>>;
|
|
1031
1017
|
}, z.core.$strip>>;
|
|
1032
|
-
provider_metadata: z.ZodOptional<z.ZodUnknown
|
|
1018
|
+
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1033
1019
|
}, z.core.$strip>;
|
|
1034
1020
|
export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
|
|
1035
1021
|
export declare const ChatCompletionsToolCallDeltaSchema: z.ZodObject<{
|
|
@@ -1196,7 +1182,7 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1196
1182
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1197
1183
|
}, z.core.$strip>>;
|
|
1198
1184
|
}, z.core.$strip>>;
|
|
1199
|
-
provider_metadata: z.ZodOptional<z.ZodUnknown
|
|
1185
|
+
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1200
1186
|
}, z.core.$strip>;
|
|
1201
1187
|
export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
|
|
1202
1188
|
export {};
|
|
@@ -161,14 +161,11 @@ export const ChatCompletionsToolChoiceSchema = z.union([
|
|
|
161
161
|
]);
|
|
162
162
|
export const ChatCompletionsReasoningEffortSchema = z.enum([
|
|
163
163
|
"none",
|
|
164
|
-
// Extension origin: Gemini
|
|
165
164
|
"minimal",
|
|
166
165
|
"low",
|
|
167
166
|
"medium",
|
|
168
167
|
"high",
|
|
169
168
|
"xhigh",
|
|
170
|
-
// Extension origin: Anthropic
|
|
171
|
-
"max",
|
|
172
169
|
]);
|
|
173
170
|
export const ChatCompletionsReasoningConfigSchema = z.object({
|
|
174
171
|
enabled: z.optional(z.boolean()),
|
|
@@ -212,22 +209,16 @@ const ChatCompletionsInputsSchema = z.object({
|
|
|
212
209
|
reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
|
|
213
210
|
prompt_cache_key: z.string().optional(),
|
|
214
211
|
prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
|
|
215
|
-
// Extension origin: Gemini explicit cache handle
|
|
216
|
-
// FUTURE: generalize extra_body handling
|
|
217
|
-
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
218
|
-
extra_body: z
|
|
219
|
-
.object({
|
|
220
|
-
google: z
|
|
221
|
-
.object({
|
|
222
|
-
cached_content: z.string().optional().meta({ extension: true }),
|
|
223
|
-
})
|
|
224
|
-
.optional(),
|
|
225
|
-
})
|
|
226
|
-
.optional(),
|
|
227
212
|
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
228
213
|
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
229
214
|
// Extension origin: OpenRouter
|
|
230
215
|
reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
|
|
216
|
+
// Extension origin: Gemini extra_body
|
|
217
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview#extra_body
|
|
218
|
+
extra_body: z
|
|
219
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
220
|
+
.optional()
|
|
221
|
+
.meta({ extension: true }),
|
|
231
222
|
});
|
|
232
223
|
export const ChatCompletionsBodySchema = z.looseObject({
|
|
233
224
|
model: z.string(),
|
|
@@ -274,7 +265,10 @@ export const ChatCompletionsSchema = z.object({
|
|
|
274
265
|
choices: z.array(ChatCompletionsChoiceSchema),
|
|
275
266
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
276
267
|
// Extension origin: Vercel AI Gateway
|
|
277
|
-
provider_metadata: z
|
|
268
|
+
provider_metadata: z
|
|
269
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
270
|
+
.optional()
|
|
271
|
+
.meta({ extension: true }),
|
|
278
272
|
});
|
|
279
273
|
export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
|
|
280
274
|
index: z.int().nonnegative(),
|
|
@@ -297,5 +291,8 @@ export const ChatCompletionsChunkSchema = z.object({
|
|
|
297
291
|
choices: z.array(ChatCompletionsChoiceDeltaSchema),
|
|
298
292
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
299
293
|
// Extension origin: Vercel AI Gateway
|
|
300
|
-
provider_metadata: z
|
|
294
|
+
provider_metadata: z
|
|
295
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
296
|
+
.optional()
|
|
297
|
+
.meta({ extension: true }),
|
|
301
298
|
});
|
|
@@ -23,6 +23,7 @@ export const embeddings = (config) => {
|
|
|
23
23
|
}
|
|
24
24
|
// Parse + validate input.
|
|
25
25
|
try {
|
|
26
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
26
27
|
ctx.body = await ctx.request.json();
|
|
27
28
|
}
|
|
28
29
|
catch {
|
|
@@ -65,6 +66,7 @@ export const embeddings = (config) => {
|
|
|
65
66
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
66
67
|
setSpanAttributes(genAiGeneralAttrs);
|
|
67
68
|
// Convert inputs to AI SDK call options.
|
|
69
|
+
// oxlint-disable-next-line no-unsafe-argument
|
|
68
70
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
69
71
|
logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
70
72
|
addSpanEvent("hebo.options.prepared");
|
|
@@ -7,6 +7,11 @@ export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
|
|
|
7
7
|
Object.assign(attrs, {
|
|
8
8
|
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
9
9
|
});
|
|
10
|
+
if (inputs.metadata) {
|
|
11
|
+
for (const key in inputs.metadata) {
|
|
12
|
+
attrs[`gen_ai.request.metadata.${key}`] = inputs.metadata[key];
|
|
13
|
+
}
|
|
14
|
+
}
|
|
10
15
|
}
|
|
11
16
|
return attrs;
|
|
12
17
|
};
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
export declare const EmbeddingsDimensionsSchema: z.ZodInt;
|
|
3
|
+
export type EmbeddingsDimensions = z.infer<typeof EmbeddingsDimensionsSchema>;
|
|
4
|
+
export declare const EmbeddingsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
|
|
5
|
+
export type EmbeddingsMetadata = z.infer<typeof EmbeddingsMetadataSchema>;
|
|
2
6
|
export declare const EmbeddingsInputsSchema: z.ZodObject<{
|
|
3
7
|
input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
|
|
4
8
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
9
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
5
10
|
}, z.core.$strip>;
|
|
6
11
|
export type EmbeddingsInputs = z.infer<typeof EmbeddingsInputsSchema>;
|
|
7
12
|
export declare const EmbeddingsBodySchema: z.ZodObject<{
|
|
8
13
|
input: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
|
|
9
14
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
15
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
10
16
|
model: z.ZodString;
|
|
11
17
|
}, z.core.$loose>;
|
|
12
18
|
export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
|
|
3
|
+
export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
|
|
2
4
|
export const EmbeddingsInputsSchema = z.object({
|
|
3
5
|
input: z.union([z.string(), z.array(z.string())]),
|
|
4
|
-
dimensions:
|
|
6
|
+
dimensions: EmbeddingsDimensionsSchema.optional(),
|
|
7
|
+
metadata: EmbeddingsMetadataSchema.optional(),
|
|
5
8
|
});
|
|
6
9
|
export const EmbeddingsBodySchema = z.looseObject({
|
|
7
10
|
model: z.string(),
|
|
@@ -12,13 +12,13 @@ export function toModel(id, catalogModel) {
|
|
|
12
12
|
id,
|
|
13
13
|
object: "model",
|
|
14
14
|
created: createdTimestamp,
|
|
15
|
-
owned_by: id.split("/")[0]
|
|
15
|
+
owned_by: id.split("/")[0] ?? "system",
|
|
16
16
|
architecture: {
|
|
17
|
-
input_modalities: modalities?.input
|
|
17
|
+
input_modalities: modalities?.input ?? [],
|
|
18
18
|
modality: modalities?.input &&
|
|
19
19
|
modalities?.output &&
|
|
20
20
|
`${modalities.input?.[0]}->${modalities.output?.[0]}`,
|
|
21
|
-
output_modalities: modalities?.output
|
|
21
|
+
output_modalities: modalities?.output ?? [],
|
|
22
22
|
},
|
|
23
23
|
endpoints: providers?.map((provider) => ({
|
|
24
24
|
tag: provider,
|
package/dist/lifecycle.js
CHANGED
|
@@ -6,7 +6,7 @@ import { getBaggageAttributes } from "./telemetry/baggage";
|
|
|
6
6
|
import { instrumentFetch } from "./telemetry/fetch";
|
|
7
7
|
import { recordRequestDuration } from "./telemetry/gen-ai";
|
|
8
8
|
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
9
|
-
import {
|
|
9
|
+
import { observeV8jsMemoryMetrics } from "./telemetry/memory";
|
|
10
10
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
11
11
|
import { wrapStream } from "./telemetry/stream";
|
|
12
12
|
import { resolveOrCreateRequestId } from "./utils/request";
|
|
@@ -17,6 +17,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
17
17
|
setSpanTracer(parsedConfig.telemetry?.tracer);
|
|
18
18
|
setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
|
|
19
19
|
instrumentFetch(parsedConfig.telemetry?.signals?.hebo);
|
|
20
|
+
observeV8jsMemoryMetrics(parsedConfig.telemetry?.signals?.hebo);
|
|
20
21
|
}
|
|
21
22
|
return async (request, state) => {
|
|
22
23
|
const start = performance.now();
|
|
@@ -57,7 +58,6 @@ export const winterCgHandler = (run, config) => {
|
|
|
57
58
|
if (ctx.operation === "chat" || ctx.operation === "embeddings") {
|
|
58
59
|
recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
|
|
59
60
|
}
|
|
60
|
-
recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
|
|
61
61
|
span.finish();
|
|
62
62
|
};
|
|
63
63
|
try {
|
package/dist/logger/default.js
CHANGED
|
@@ -37,8 +37,6 @@ function serializeError(err, _seen) {
|
|
|
37
37
|
return out;
|
|
38
38
|
}
|
|
39
39
|
const buildLogObject = (level, args) => {
|
|
40
|
-
if (args.length === 0)
|
|
41
|
-
return {};
|
|
42
40
|
const [first, second] = args;
|
|
43
41
|
let obj;
|
|
44
42
|
let err;
|
|
@@ -70,7 +68,9 @@ const buildLogObject = (level, args) => {
|
|
|
70
68
|
...obj,
|
|
71
69
|
};
|
|
72
70
|
};
|
|
73
|
-
const makeLogFn = (level, write) => (...args) =>
|
|
71
|
+
const makeLogFn = (level, write) => (...args) => {
|
|
72
|
+
write(JSON.stringify(buildLogObject(level, args)));
|
|
73
|
+
};
|
|
74
74
|
export const createDefaultLogger = (config) => {
|
|
75
75
|
if (config.level === "silent" || getDefaultLogLevel() === "silent") {
|
|
76
76
|
return { trace: noop, debug: noop, info: noop, warn: noop, error: noop };
|
package/dist/logger/index.d.ts
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
export type
|
|
2
|
-
|
|
3
|
-
(obj: Record<string, unknown>, msg?: string): void;
|
|
4
|
-
(err: Error, msg?: string): void;
|
|
5
|
-
};
|
|
1
|
+
export type LogArgs = [msg: string] | [obj: Record<string, unknown>, msg?: string] | [err: Error, msg?: string];
|
|
2
|
+
export type LogFn = (...args: LogArgs) => void;
|
|
6
3
|
export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
|
|
7
4
|
export type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
|
|
8
5
|
export type LoggerConfig = {
|
package/dist/middleware/utils.js
CHANGED
|
@@ -17,11 +17,8 @@ export function calculateReasoningBudgetFromEffort(effort, maxTokens, minTokens
|
|
|
17
17
|
percentage = 0.8;
|
|
18
18
|
break;
|
|
19
19
|
case "xhigh":
|
|
20
|
-
case "max":
|
|
21
20
|
percentage = 0.95;
|
|
22
21
|
break;
|
|
23
|
-
default:
|
|
24
|
-
return 0;
|
|
25
22
|
}
|
|
26
23
|
return Math.max(minTokens, Math.floor(maxTokens * percentage));
|
|
27
24
|
}
|
|
@@ -10,13 +10,17 @@ export const novaDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["nova"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["nova"] ??= {});
|
|
14
|
+
// @ts-expect-error AI SDK does the value checking for us
|
|
15
|
+
target.embeddingDimension = dimensions;
|
|
14
16
|
delete unknown["dimensions"];
|
|
15
17
|
return params;
|
|
16
18
|
},
|
|
17
19
|
};
|
|
18
20
|
function mapNovaEffort(effort) {
|
|
19
21
|
switch (effort) {
|
|
22
|
+
case "none":
|
|
23
|
+
return;
|
|
20
24
|
case "minimal":
|
|
21
25
|
case "low":
|
|
22
26
|
return "low";
|
|
@@ -24,7 +28,6 @@ function mapNovaEffort(effort) {
|
|
|
24
28
|
return "medium";
|
|
25
29
|
case "high":
|
|
26
30
|
case "xhigh":
|
|
27
|
-
case "max":
|
|
28
31
|
return "high";
|
|
29
32
|
}
|
|
30
33
|
}
|
|
@@ -40,18 +43,18 @@ export const novaReasoningMiddleware = {
|
|
|
40
43
|
return params;
|
|
41
44
|
const target = (params.providerOptions["amazon"] ??= {});
|
|
42
45
|
if (!reasoning.enabled) {
|
|
43
|
-
target
|
|
46
|
+
target.reasoningConfig = { type: "disabled" };
|
|
44
47
|
}
|
|
45
48
|
else if (reasoning.effort) {
|
|
46
49
|
// FUTURE: warn if mapNovaEffort modified the effort
|
|
47
|
-
target
|
|
50
|
+
target.reasoningConfig = {
|
|
48
51
|
type: "enabled",
|
|
49
52
|
maxReasoningEffort: mapNovaEffort(reasoning.effort),
|
|
50
53
|
};
|
|
51
54
|
}
|
|
52
55
|
else {
|
|
53
56
|
// FUTURE: warn if reasoning.max_tokens (unsupported) was ignored
|
|
54
|
-
target
|
|
57
|
+
target.reasoningConfig = { type: "enabled" };
|
|
55
58
|
}
|
|
56
59
|
delete unknown["reasoning"];
|
|
57
60
|
return params;
|
|
@@ -22,7 +22,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
22
22
|
case "high":
|
|
23
23
|
return "high";
|
|
24
24
|
case "xhigh":
|
|
25
|
-
case "max":
|
|
26
25
|
return "max";
|
|
27
26
|
}
|
|
28
27
|
}
|
|
@@ -35,7 +34,6 @@ export function mapClaudeReasoningEffort(effort, modelId) {
|
|
|
35
34
|
return "medium";
|
|
36
35
|
case "high":
|
|
37
36
|
case "xhigh":
|
|
38
|
-
case "max":
|
|
39
37
|
return "high";
|
|
40
38
|
}
|
|
41
39
|
}
|
|
@@ -66,41 +64,42 @@ export const claudeReasoningMiddleware = {
|
|
|
66
64
|
const modelId = model.modelId;
|
|
67
65
|
const clampedMaxTokens = reasoning.max_tokens && Math.min(reasoning.max_tokens, getMaxOutputTokens(modelId));
|
|
68
66
|
if (!reasoning.enabled) {
|
|
69
|
-
target
|
|
67
|
+
target.thinking = { type: "disabled" };
|
|
70
68
|
}
|
|
71
69
|
else if (reasoning.effort) {
|
|
72
70
|
if (isClaude4(modelId)) {
|
|
73
|
-
target
|
|
71
|
+
target.effort = mapClaudeReasoningEffort(reasoning.effort, modelId);
|
|
74
72
|
}
|
|
75
73
|
if (isOpus46(modelId)) {
|
|
76
|
-
target
|
|
77
|
-
?
|
|
74
|
+
target.thinking = clampedMaxTokens
|
|
75
|
+
? // @ts-expect-error AI SDK type missing type:adaptive with budgetToken
|
|
76
|
+
{ type: "adaptive", budgetTokens: clampedMaxTokens }
|
|
78
77
|
: { type: "adaptive" };
|
|
79
78
|
}
|
|
80
79
|
else if (isSonnet46(modelId)) {
|
|
81
|
-
target
|
|
80
|
+
target.thinking = clampedMaxTokens
|
|
82
81
|
? { type: "enabled", budgetTokens: clampedMaxTokens }
|
|
83
82
|
: { type: "adaptive" };
|
|
84
83
|
}
|
|
85
84
|
else {
|
|
86
|
-
target
|
|
85
|
+
target.thinking = { type: "enabled" };
|
|
87
86
|
if (clampedMaxTokens) {
|
|
88
|
-
target
|
|
87
|
+
target.thinking.budgetTokens = clampedMaxTokens;
|
|
89
88
|
}
|
|
90
89
|
else {
|
|
91
90
|
// FUTURE: warn that reasoning.max_tokens was computed
|
|
92
|
-
target
|
|
91
|
+
target.thinking.budgetTokens = calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? getMaxOutputTokens(modelId), 1024);
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
94
|
}
|
|
96
95
|
else if (clampedMaxTokens) {
|
|
97
|
-
target
|
|
96
|
+
target.thinking = {
|
|
98
97
|
type: "enabled",
|
|
99
98
|
budgetTokens: clampedMaxTokens,
|
|
100
99
|
};
|
|
101
100
|
}
|
|
102
101
|
else {
|
|
103
|
-
target
|
|
102
|
+
target.thinking = { type: "enabled" };
|
|
104
103
|
}
|
|
105
104
|
delete unknown["reasoning"];
|
|
106
105
|
return params;
|
|
@@ -116,7 +115,8 @@ export const claudePromptCachingMiddleware = {
|
|
|
116
115
|
return params;
|
|
117
116
|
const cacheControl = unknown["cache_control"];
|
|
118
117
|
if (cacheControl) {
|
|
119
|
-
(params.providerOptions["anthropic"] ??= {})
|
|
118
|
+
(params.providerOptions["anthropic"] ??= {}).cacheControl =
|
|
119
|
+
cacheControl;
|
|
120
120
|
}
|
|
121
121
|
delete unknown["cache_control"];
|
|
122
122
|
return params;
|
package/dist/models/catalog.js
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
export function defineModelCatalog(...inputs) {
|
|
2
2
|
const catalogs = inputs.flat().map((input) => (typeof input === "function" ? input() : input));
|
|
3
|
-
|
|
3
|
+
const out = {};
|
|
4
|
+
for (const catalog of catalogs) {
|
|
5
|
+
Object.assign(out, catalog);
|
|
6
|
+
}
|
|
7
|
+
return out;
|
|
4
8
|
}
|
|
@@ -17,7 +17,9 @@ export const cohereDimensionsMiddleware = {
|
|
|
17
17
|
const dimensions = unknown["dimensions"];
|
|
18
18
|
if (!dimensions)
|
|
19
19
|
return params;
|
|
20
|
-
(params.providerOptions["cohere"] ??= {})
|
|
20
|
+
const target = (params.providerOptions["cohere"] ??= {});
|
|
21
|
+
// @ts-expect-error AI SDK does the value checking for us
|
|
22
|
+
target.outputDimension = dimensions;
|
|
21
23
|
delete unknown["dimensions"];
|
|
22
24
|
return params;
|
|
23
25
|
},
|
|
@@ -35,20 +37,20 @@ export const cohereReasoningMiddleware = {
|
|
|
35
37
|
return params;
|
|
36
38
|
const target = (params.providerOptions["cohere"] ??= {});
|
|
37
39
|
if (!reasoning.enabled) {
|
|
38
|
-
target
|
|
40
|
+
target.thinking = { type: "disabled" };
|
|
39
41
|
}
|
|
40
42
|
else if (reasoning.max_tokens) {
|
|
41
|
-
target
|
|
43
|
+
target.thinking = { type: "enabled", tokenBudget: reasoning.max_tokens };
|
|
42
44
|
}
|
|
43
45
|
else if (reasoning.effort) {
|
|
44
46
|
// FUTURE: warn that reasoning.max_tokens was computed
|
|
45
|
-
target
|
|
47
|
+
target.thinking = {
|
|
46
48
|
type: "enabled",
|
|
47
49
|
tokenBudget: calculateReasoningBudgetFromEffort(reasoning.effort, params.maxOutputTokens ?? COHERE_MAX_OUTPUT_TOKENS, 1024),
|
|
48
50
|
};
|
|
49
51
|
}
|
|
50
52
|
else {
|
|
51
|
-
target
|
|
53
|
+
target.thinking = { type: "enabled" };
|
|
52
54
|
}
|
|
53
55
|
delete unknown["reasoning"];
|
|
54
56
|
return params;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
|
|
2
2
|
import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
|
|
3
3
|
export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
|
|
4
|
-
export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string):
|
|
4
|
+
export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
|
|
5
5
|
export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
6
6
|
export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
7
7
|
export declare const geminiReasoningMiddleware: LanguageModelMiddleware;
|