@hebo-ai/gateway 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -67
- package/dist/config.js +2 -12
- package/dist/endpoints/chat-completions/converters.d.ts +28 -24
- package/dist/endpoints/chat-completions/converters.js +99 -73
- package/dist/endpoints/chat-completions/handler.js +36 -30
- package/dist/endpoints/chat-completions/schema.d.ts +394 -272
- package/dist/endpoints/chat-completions/schema.js +124 -57
- package/dist/endpoints/embeddings/converters.d.ts +4 -4
- package/dist/endpoints/embeddings/converters.js +8 -9
- package/dist/endpoints/embeddings/handler.js +32 -26
- package/dist/endpoints/embeddings/schema.d.ts +28 -38
- package/dist/endpoints/embeddings/schema.js +10 -10
- package/dist/endpoints/models/converters.d.ts +2 -2
- package/dist/endpoints/models/converters.js +9 -12
- package/dist/endpoints/models/handler.js +8 -9
- package/dist/endpoints/models/schema.d.ts +37 -31
- package/dist/endpoints/models/schema.js +23 -12
- package/dist/gateway.d.ts +8 -9
- package/dist/gateway.js +7 -10
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/lifecycle.d.ts +2 -0
- package/dist/{utils/hooks.js → lifecycle.js} +16 -8
- package/dist/middleware/common.d.ts +4 -0
- package/dist/middleware/common.js +44 -0
- package/dist/middleware/matcher.d.ts +18 -0
- package/dist/middleware/matcher.js +83 -0
- package/dist/models/amazon/index.d.ts +2 -0
- package/dist/models/amazon/index.js +2 -0
- package/dist/models/amazon/middleware.d.ts +2 -0
- package/dist/models/amazon/middleware.js +20 -0
- package/dist/models/amazon/presets.d.ts +2390 -0
- package/dist/models/amazon/presets.js +80 -0
- package/dist/models/anthropic/index.d.ts +2 -0
- package/dist/models/anthropic/index.js +2 -0
- package/dist/models/anthropic/middleware.d.ts +5 -0
- package/dist/models/anthropic/middleware.js +67 -0
- package/dist/models/anthropic/presets.d.ts +4106 -0
- package/dist/models/anthropic/presets.js +113 -0
- package/dist/models/catalog.d.ts +3 -1
- package/dist/models/catalog.js +3 -2
- package/dist/models/cohere/index.d.ts +2 -0
- package/dist/models/cohere/index.js +2 -0
- package/dist/models/cohere/middleware.d.ts +2 -0
- package/dist/models/cohere/middleware.js +18 -0
- package/dist/models/cohere/presets.d.ts +2918 -0
- package/dist/models/cohere/presets.js +129 -0
- package/dist/models/google/index.d.ts +2 -0
- package/dist/models/google/index.js +2 -0
- package/dist/models/google/middleware.d.ts +2 -0
- package/dist/models/google/middleware.js +20 -0
- package/dist/models/{presets/gemini.d.ts → google/presets.d.ts} +400 -174
- package/dist/models/{presets/gemini.js → google/presets.js} +20 -5
- package/dist/models/meta/index.d.ts +1 -0
- package/dist/models/meta/index.js +1 -0
- package/dist/models/meta/presets.d.ts +3254 -0
- package/dist/models/{presets/llama.js → meta/presets.js} +44 -7
- package/dist/models/openai/index.d.ts +2 -0
- package/dist/models/openai/index.js +2 -0
- package/dist/models/openai/middleware.d.ts +2 -0
- package/dist/models/openai/middleware.js +20 -0
- package/dist/models/openai/presets.d.ts +6252 -0
- package/dist/models/openai/presets.js +206 -0
- package/dist/models/types.d.ts +3 -3
- package/dist/models/types.js +27 -0
- package/dist/models/voyage/index.d.ts +2 -0
- package/dist/models/voyage/index.js +2 -0
- package/dist/models/voyage/middleware.d.ts +2 -0
- package/dist/models/voyage/middleware.js +18 -0
- package/dist/models/{presets/voyage.d.ts → voyage/presets.d.ts} +322 -323
- package/dist/providers/anthropic/canonical.d.ts +3 -0
- package/dist/providers/anthropic/canonical.js +9 -0
- package/dist/providers/anthropic/index.d.ts +1 -0
- package/dist/providers/anthropic/index.js +1 -0
- package/dist/providers/bedrock/canonical.d.ts +15 -0
- package/dist/providers/{canonical/bedrock.js → bedrock/canonical.js} +13 -15
- package/dist/providers/bedrock/index.d.ts +1 -0
- package/dist/providers/bedrock/index.js +1 -0
- package/dist/providers/cohere/canonical.d.ts +3 -0
- package/dist/providers/{canonical/cohere.js → cohere/canonical.js} +6 -6
- package/dist/providers/cohere/index.d.ts +1 -0
- package/dist/providers/cohere/index.js +1 -0
- package/dist/providers/groq/canonical.d.ts +3 -0
- package/dist/providers/groq/canonical.js +12 -0
- package/dist/providers/groq/index.d.ts +1 -0
- package/dist/providers/groq/index.js +1 -0
- package/dist/providers/openai/canonical.d.ts +3 -0
- package/dist/providers/openai/canonical.js +8 -0
- package/dist/providers/openai/index.d.ts +1 -0
- package/dist/providers/openai/index.js +1 -0
- package/dist/providers/registry.d.ts +16 -26
- package/dist/providers/registry.js +19 -26
- package/dist/providers/types.d.ts +1 -1
- package/dist/providers/types.js +1 -0
- package/dist/providers/vertex/canonical.d.ts +3 -0
- package/dist/providers/vertex/canonical.js +8 -0
- package/dist/providers/vertex/index.d.ts +1 -0
- package/dist/providers/vertex/index.js +1 -0
- package/dist/providers/voyage/canonical.d.ts +3 -0
- package/dist/providers/voyage/canonical.js +7 -0
- package/dist/providers/voyage/index.d.ts +1 -0
- package/dist/providers/voyage/index.js +1 -0
- package/dist/types.d.ts +60 -30
- package/dist/utils/errors.js +2 -0
- package/dist/utils/preset.d.ts +1 -7
- package/dist/utils/preset.js +1 -1
- package/dist/utils/response.d.ts +1 -0
- package/dist/utils/response.js +10 -0
- package/package.json +79 -70
- package/src/config.ts +2 -18
- package/src/endpoints/chat-completions/converters.test.ts +39 -0
- package/src/endpoints/chat-completions/converters.ts +191 -112
- package/src/endpoints/chat-completions/handler.test.ts +47 -18
- package/src/endpoints/chat-completions/handler.ts +40 -34
- package/src/endpoints/chat-completions/schema.ts +161 -88
- package/src/endpoints/embeddings/converters.ts +15 -11
- package/src/endpoints/embeddings/handler.test.ts +27 -30
- package/src/endpoints/embeddings/handler.ts +34 -28
- package/src/endpoints/embeddings/schema.ts +10 -10
- package/src/endpoints/models/converters.ts +22 -14
- package/src/endpoints/models/handler.test.ts +26 -29
- package/src/endpoints/models/handler.ts +10 -12
- package/src/endpoints/models/schema.ts +26 -20
- package/src/gateway.ts +10 -24
- package/src/index.ts +3 -0
- package/src/{utils/hooks.ts → lifecycle.ts} +21 -11
- package/src/middleware/common.ts +68 -0
- package/src/middleware/matcher.ts +117 -0
- package/src/models/amazon/index.ts +2 -0
- package/src/models/amazon/middleware.ts +25 -0
- package/src/models/amazon/presets.ts +104 -0
- package/src/models/anthropic/index.ts +2 -0
- package/src/models/anthropic/middleware.test.ts +184 -0
- package/src/models/anthropic/middleware.ts +75 -0
- package/src/models/anthropic/presets.ts +161 -0
- package/src/models/catalog.ts +10 -2
- package/src/models/cohere/index.ts +2 -0
- package/src/models/cohere/middleware.ts +23 -0
- package/src/models/cohere/presets.ts +181 -0
- package/src/models/google/index.ts +2 -0
- package/src/models/google/middleware.ts +25 -0
- package/src/models/{presets/gemini.ts → google/presets.ts} +25 -5
- package/src/models/meta/index.ts +1 -0
- package/src/models/{presets/llama.ts → meta/presets.ts} +68 -7
- package/src/models/openai/index.ts +2 -0
- package/src/models/openai/middleware.ts +25 -0
- package/src/models/openai/presets.ts +269 -0
- package/src/models/types.ts +29 -2
- package/src/models/voyage/index.ts +2 -0
- package/src/models/voyage/middleware.ts +23 -0
- package/src/providers/anthropic/canonical.ts +17 -0
- package/src/providers/anthropic/index.ts +1 -0
- package/src/providers/{canonical/bedrock.ts → bedrock/canonical.ts} +22 -32
- package/src/providers/bedrock/index.ts +1 -0
- package/src/providers/cohere/canonical.ts +26 -0
- package/src/providers/cohere/index.ts +1 -0
- package/src/providers/groq/canonical.ts +21 -0
- package/src/providers/groq/index.ts +1 -0
- package/src/providers/openai/canonical.ts +16 -0
- package/src/providers/openai/index.ts +1 -0
- package/src/providers/registry.test.ts +12 -10
- package/src/providers/registry.ts +43 -43
- package/src/providers/types.ts +1 -0
- package/src/providers/vertex/canonical.ts +17 -0
- package/src/providers/vertex/index.ts +1 -0
- package/src/providers/voyage/canonical.ts +16 -0
- package/src/providers/voyage/index.ts +1 -0
- package/src/types.ts +64 -28
- package/src/utils/errors.ts +2 -0
- package/src/utils/preset.ts +2 -6
- package/src/utils/response.ts +15 -0
- package/dist/models/presets/claude.d.ts +0 -1165
- package/dist/models/presets/claude.js +0 -40
- package/dist/models/presets/cohere.d.ts +0 -383
- package/dist/models/presets/cohere.js +0 -26
- package/dist/models/presets/gpt-oss.d.ts +0 -779
- package/dist/models/presets/gpt-oss.js +0 -40
- package/dist/models/presets/llama.d.ts +0 -1400
- package/dist/providers/canonical/anthropic.d.ts +0 -25
- package/dist/providers/canonical/anthropic.js +0 -14
- package/dist/providers/canonical/bedrock.d.ts +0 -26
- package/dist/providers/canonical/cohere.d.ts +0 -17
- package/dist/providers/canonical/groq.d.ts +0 -17
- package/dist/providers/canonical/groq.js +0 -10
- package/dist/providers/canonical/openai.d.ts +0 -17
- package/dist/providers/canonical/openai.js +0 -8
- package/dist/providers/canonical/vertex.d.ts +0 -17
- package/dist/providers/canonical/vertex.js +0 -10
- package/dist/providers/canonical/voyage.d.ts +0 -17
- package/dist/providers/canonical/voyage.js +0 -8
- package/dist/utils/hooks.d.ts +0 -2
- package/src/models/presets/claude.ts +0 -59
- package/src/models/presets/cohere.ts +0 -37
- package/src/models/presets/gpt-oss.ts +0 -55
- package/src/providers/canonical/anthropic.ts +0 -32
- package/src/providers/canonical/cohere.ts +0 -36
- package/src/providers/canonical/groq.ts +0 -25
- package/src/providers/canonical/openai.ts +0 -16
- package/src/providers/canonical/vertex.ts +0 -18
- package/src/providers/canonical/voyage.ts +0 -16
- package/dist/models/{presets/voyage.js → voyage/presets.js} +10 -10
- package/src/models/{presets/voyage.ts → voyage/presets.ts} +10 -10
|
@@ -1,36 +1,35 @@
|
|
|
1
|
-
import { generateText, streamText } from "ai";
|
|
1
|
+
import { generateText, streamText, wrapLanguageModel } from "ai";
|
|
2
2
|
import * as z from "zod/mini";
|
|
3
3
|
|
|
4
|
-
import type { GatewayConfig, Endpoint } from "../../types";
|
|
4
|
+
import type { GatewayConfig, Endpoint, GatewayContext } from "../../types";
|
|
5
5
|
|
|
6
|
-
import {
|
|
6
|
+
import { withLifecycle } from "../../lifecycle";
|
|
7
|
+
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
8
|
import { resolveProvider } from "../../providers/registry";
|
|
8
9
|
import { createErrorResponse } from "../../utils/errors";
|
|
9
|
-
import { withHooks } from "../../utils/hooks";
|
|
10
10
|
import {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
convertToTextCallOptions,
|
|
12
|
+
toChatCompletionsResponse,
|
|
13
|
+
toChatCompletionsStreamResponse,
|
|
14
14
|
} from "./converters";
|
|
15
|
-
import {
|
|
15
|
+
import { ChatCompletionsBodySchema } from "./schema";
|
|
16
16
|
|
|
17
17
|
export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
18
|
-
const
|
|
18
|
+
const hooks = config.hooks;
|
|
19
19
|
|
|
20
|
-
const handler = async (
|
|
21
|
-
if (
|
|
20
|
+
const handler = async (ctx: GatewayContext): Promise<Response> => {
|
|
21
|
+
if (!ctx.request || ctx.request.method !== "POST") {
|
|
22
22
|
return createErrorResponse("METHOD_NOT_ALLOWED", "Method Not Allowed", 405);
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
let body;
|
|
26
26
|
try {
|
|
27
|
-
body = await
|
|
27
|
+
body = await ctx.request.json();
|
|
28
28
|
} catch {
|
|
29
29
|
return createErrorResponse("BAD_REQUEST", "Invalid JSON", 400);
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
-
const parsed =
|
|
33
|
-
|
|
32
|
+
const parsed = ChatCompletionsBodySchema.safeParse(body);
|
|
34
33
|
if (!parsed.success) {
|
|
35
34
|
return createErrorResponse(
|
|
36
35
|
"UNPROCESSABLE_ENTITY",
|
|
@@ -39,65 +38,72 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
39
38
|
z.prettifyError(parsed.error),
|
|
40
39
|
);
|
|
41
40
|
}
|
|
41
|
+
ctx.body = parsed.data;
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
let stream, inputs;
|
|
44
|
+
({ model: ctx.modelId, stream, ...inputs } = parsed.data);
|
|
44
45
|
|
|
45
|
-
let resolvedModelId;
|
|
46
46
|
try {
|
|
47
|
-
resolvedModelId = (await hooks?.resolveModelId?.(
|
|
47
|
+
ctx.resolvedModelId = (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
48
48
|
} catch (error) {
|
|
49
49
|
return createErrorResponse("BAD_REQUEST", error, 400);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
ctx.operation = "text";
|
|
53
53
|
try {
|
|
54
|
-
|
|
54
|
+
const override = await hooks?.resolveProvider?.(ctx);
|
|
55
|
+
ctx.provider =
|
|
56
|
+
override ??
|
|
57
|
+
resolveProvider({
|
|
58
|
+
providers: ctx.providers,
|
|
59
|
+
models: ctx.models,
|
|
60
|
+
modelId: ctx.resolvedModelId,
|
|
61
|
+
operation: ctx.operation,
|
|
62
|
+
});
|
|
55
63
|
} catch (error) {
|
|
56
64
|
return createErrorResponse("BAD_REQUEST", error, 400);
|
|
57
65
|
}
|
|
58
66
|
|
|
59
|
-
|
|
67
|
+
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
68
|
+
|
|
69
|
+
let textOptions;
|
|
60
70
|
try {
|
|
61
|
-
|
|
62
|
-
providers,
|
|
63
|
-
models,
|
|
64
|
-
modelId: resolvedModelId,
|
|
65
|
-
operation: "text" as const,
|
|
66
|
-
};
|
|
67
|
-
const override = await hooks?.resolveProvider?.(args);
|
|
68
|
-
provider = override ?? resolveProvider(args);
|
|
71
|
+
textOptions = convertToTextCallOptions(inputs);
|
|
69
72
|
} catch (error) {
|
|
70
73
|
return createErrorResponse("BAD_REQUEST", error, 400);
|
|
71
74
|
}
|
|
72
75
|
|
|
73
|
-
const
|
|
76
|
+
const languageModelWithMiddleware = wrapLanguageModel({
|
|
77
|
+
model: languageModel,
|
|
78
|
+
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
79
|
+
});
|
|
74
80
|
|
|
75
81
|
if (stream) {
|
|
76
82
|
let result;
|
|
77
83
|
try {
|
|
78
84
|
result = streamText({
|
|
79
|
-
model:
|
|
85
|
+
model: languageModelWithMiddleware,
|
|
80
86
|
...textOptions,
|
|
81
87
|
});
|
|
82
88
|
} catch (error) {
|
|
83
89
|
return createErrorResponse("INTERNAL_SERVER_ERROR", error, 500);
|
|
84
90
|
}
|
|
85
91
|
|
|
86
|
-
return
|
|
92
|
+
return toChatCompletionsStreamResponse(result, ctx.modelId);
|
|
87
93
|
}
|
|
88
94
|
|
|
89
95
|
let result;
|
|
90
96
|
try {
|
|
91
97
|
result = await generateText({
|
|
92
|
-
model:
|
|
98
|
+
model: languageModelWithMiddleware,
|
|
93
99
|
...textOptions,
|
|
94
100
|
});
|
|
95
101
|
} catch (error) {
|
|
96
102
|
return createErrorResponse("INTERNAL_SERVER_ERROR", error, 500);
|
|
97
103
|
}
|
|
98
104
|
|
|
99
|
-
return
|
|
105
|
+
return toChatCompletionsResponse(result, ctx.modelId);
|
|
100
106
|
};
|
|
101
107
|
|
|
102
|
-
return { handler:
|
|
108
|
+
return { handler: withLifecycle(handler, config) };
|
|
103
109
|
};
|
|
@@ -1,33 +1,34 @@
|
|
|
1
|
-
import * as z from "zod
|
|
1
|
+
import * as z from "zod";
|
|
2
2
|
|
|
3
|
-
export const
|
|
3
|
+
export const ChatCompletionsContentPartTextSchema = z.object({
|
|
4
4
|
type: z.literal("text"),
|
|
5
5
|
text: z.string(),
|
|
6
6
|
});
|
|
7
7
|
|
|
8
|
-
export const
|
|
8
|
+
export const ChatCompletionsContentPartImageSchema = z.object({
|
|
9
9
|
type: z.literal("image_url"),
|
|
10
10
|
image_url: z.object({
|
|
11
11
|
url: z.string(),
|
|
12
|
-
detail: z.
|
|
12
|
+
detail: z.union([z.literal("low"), z.literal("high"), z.literal("auto")]).optional(),
|
|
13
13
|
}),
|
|
14
14
|
});
|
|
15
15
|
|
|
16
|
-
export const
|
|
16
|
+
export const ChatCompletionsContentPartFileSchema = z.object({
|
|
17
17
|
type: z.literal("file"),
|
|
18
18
|
file: z.object({
|
|
19
19
|
data: z.string(),
|
|
20
20
|
media_type: z.string(),
|
|
21
|
-
filename: z.string(),
|
|
21
|
+
filename: z.string().optional(),
|
|
22
22
|
}),
|
|
23
23
|
});
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
| z.infer<typeof
|
|
28
|
-
| z.infer<typeof
|
|
25
|
+
// FUTURE: missing ContentPartAudio
|
|
26
|
+
export type ChatCompletionsContentPart =
|
|
27
|
+
| z.infer<typeof ChatCompletionsContentPartTextSchema>
|
|
28
|
+
| z.infer<typeof ChatCompletionsContentPartImageSchema>
|
|
29
|
+
| z.infer<typeof ChatCompletionsContentPartFileSchema>;
|
|
29
30
|
|
|
30
|
-
export const
|
|
31
|
+
export const ChatCompletionsToolCallSchema = z.object({
|
|
31
32
|
type: z.literal("function"),
|
|
32
33
|
id: z.string(),
|
|
33
34
|
function: z.object({
|
|
@@ -35,67 +36,75 @@ export const CompletionsToolCallSchema = z.object({
|
|
|
35
36
|
name: z.string(),
|
|
36
37
|
}),
|
|
37
38
|
});
|
|
38
|
-
export type
|
|
39
|
+
export type ChatCompletionsToolCall = z.infer<typeof ChatCompletionsToolCallSchema>;
|
|
39
40
|
|
|
40
|
-
export const
|
|
41
|
+
export const ChatCompletionsSystemMessageSchema = z.object({
|
|
41
42
|
role: z.literal("system"),
|
|
42
43
|
content: z.string(),
|
|
44
|
+
name: z.string().optional(),
|
|
43
45
|
});
|
|
44
|
-
export type
|
|
46
|
+
export type ChatCompletionsSystemMessage = z.infer<typeof ChatCompletionsSystemMessageSchema>;
|
|
45
47
|
|
|
46
|
-
export const
|
|
48
|
+
export const ChatCompletionsUserMessageSchema = z.object({
|
|
47
49
|
role: z.literal("user"),
|
|
48
50
|
content: z.union([
|
|
49
51
|
z.string(),
|
|
50
52
|
z.array(
|
|
51
53
|
z.union([
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
ChatCompletionsContentPartTextSchema,
|
|
55
|
+
ChatCompletionsContentPartImageSchema,
|
|
56
|
+
ChatCompletionsContentPartFileSchema,
|
|
55
57
|
]),
|
|
56
58
|
),
|
|
57
59
|
]),
|
|
60
|
+
name: z.string().optional(),
|
|
58
61
|
});
|
|
59
|
-
export type
|
|
62
|
+
export type ChatCompletionsUserMessage = z.infer<typeof ChatCompletionsUserMessageSchema>;
|
|
60
63
|
|
|
61
|
-
export const
|
|
64
|
+
export const ChatCompletionsAssistantMessageSchema = z.object({
|
|
62
65
|
role: z.literal("assistant"),
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
66
|
+
// FUTURE: this should support arrays of TextContentPart and RefusalContentPart
|
|
67
|
+
content: z.union([z.string(), z.null()]).optional(),
|
|
68
|
+
name: z.string().optional(),
|
|
69
|
+
// FUTURE: This should also support Custom Tool Calls
|
|
70
|
+
tool_calls: z.array(ChatCompletionsToolCallSchema).optional(),
|
|
71
|
+
// Extensions
|
|
72
|
+
reasoning_content: z.string().optional().meta({ extension: true }),
|
|
67
73
|
});
|
|
68
|
-
export type
|
|
74
|
+
export type ChatCompletionsAssistantMessage = z.infer<typeof ChatCompletionsAssistantMessageSchema>;
|
|
69
75
|
|
|
70
|
-
export const
|
|
76
|
+
export const ChatCompletionsToolMessageSchema = z.object({
|
|
71
77
|
role: z.literal("tool"),
|
|
78
|
+
// FUTURE: this should also support arrays of TextContentParts
|
|
72
79
|
content: z.string(),
|
|
73
80
|
tool_call_id: z.string(),
|
|
74
81
|
});
|
|
75
|
-
export type
|
|
82
|
+
export type ChatCompletionsToolMessage = z.infer<typeof ChatCompletionsToolMessageSchema>;
|
|
76
83
|
|
|
77
|
-
export const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
84
|
+
export const ChatCompletionsMessageSchema = z.union([
|
|
85
|
+
ChatCompletionsSystemMessageSchema,
|
|
86
|
+
ChatCompletionsUserMessageSchema,
|
|
87
|
+
ChatCompletionsAssistantMessageSchema,
|
|
88
|
+
ChatCompletionsToolMessageSchema,
|
|
82
89
|
]);
|
|
83
|
-
export type
|
|
90
|
+
export type ChatCompletionsMessage = z.infer<typeof ChatCompletionsMessageSchema>;
|
|
84
91
|
|
|
85
|
-
export const
|
|
92
|
+
export const ChatCompletionsToolSchema = z.object({
|
|
86
93
|
type: z.literal("function"),
|
|
87
94
|
function: z.object({
|
|
88
95
|
name: z.string(),
|
|
89
|
-
description: z.
|
|
96
|
+
description: z.string().optional(),
|
|
90
97
|
parameters: z.record(z.string(), z.any()),
|
|
98
|
+
// Missing strict parameter
|
|
91
99
|
}),
|
|
92
100
|
});
|
|
93
|
-
export type
|
|
101
|
+
export type ChatCompletionsTool = z.infer<typeof ChatCompletionsToolSchema>;
|
|
94
102
|
|
|
95
|
-
export const
|
|
103
|
+
export const ChatCompletionsToolChoiceSchema = z.union([
|
|
96
104
|
z.literal("none"),
|
|
97
105
|
z.literal("auto"),
|
|
98
106
|
z.literal("required"),
|
|
107
|
+
// FUTURE: missing AllowedTools and CustomToolChoice
|
|
99
108
|
z.object({
|
|
100
109
|
type: z.literal("function"),
|
|
101
110
|
function: z.object({
|
|
@@ -103,70 +112,134 @@ export const CompletionsToolChoiceSchema = z.union([
|
|
|
103
112
|
}),
|
|
104
113
|
}),
|
|
105
114
|
]);
|
|
106
|
-
export type
|
|
115
|
+
export type ChatCompletionsToolChoice = z.infer<typeof ChatCompletionsToolChoiceSchema>;
|
|
107
116
|
|
|
108
|
-
export const
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
117
|
+
export const ChatCompletionsReasoningEffortSchema = z.union([
|
|
118
|
+
z.literal("none"),
|
|
119
|
+
z.literal("minimal"),
|
|
120
|
+
z.literal("low"),
|
|
121
|
+
z.literal("medium"),
|
|
122
|
+
z.literal("high"),
|
|
123
|
+
z.literal("xhigh"),
|
|
124
|
+
]);
|
|
125
|
+
export type ChatCompletionsReasoningEffort = z.infer<typeof ChatCompletionsReasoningEffortSchema>;
|
|
126
|
+
|
|
127
|
+
export const ChatCompletionsReasoningConfigSchema = z.object({
|
|
128
|
+
enabled: z.optional(z.boolean()),
|
|
129
|
+
effort: z.optional(ChatCompletionsReasoningEffortSchema),
|
|
130
|
+
max_tokens: z.optional(z.number()),
|
|
131
|
+
exclude: z.optional(z.boolean()),
|
|
132
|
+
});
|
|
133
|
+
export type ChatCompletionsReasoningConfig = z.infer<typeof ChatCompletionsReasoningConfigSchema>;
|
|
134
|
+
|
|
135
|
+
const ChatCompletionsInputsSchema = z.object({
|
|
136
|
+
messages: z.array(ChatCompletionsMessageSchema),
|
|
137
|
+
tools: z
|
|
138
|
+
.array(
|
|
139
|
+
// FUTURE: Missing CustomTool
|
|
140
|
+
ChatCompletionsToolSchema,
|
|
141
|
+
)
|
|
142
|
+
.optional(),
|
|
143
|
+
tool_choice: ChatCompletionsToolChoiceSchema.optional(),
|
|
144
|
+
temperature: z.number().min(0).max(2).optional(),
|
|
145
|
+
max_tokens: z.int().nonnegative().optional(),
|
|
146
|
+
max_completion_tokens: z.int().nonnegative().optional(),
|
|
147
|
+
frequency_penalty: z.number().min(-2.0).max(2.0).optional(),
|
|
148
|
+
presence_penalty: z.number().min(-2.0).max(2.0).optional(),
|
|
149
|
+
seed: z.int().optional(),
|
|
150
|
+
stop: z.union([z.string(), z.array(z.string())]).optional(),
|
|
151
|
+
top_p: z.number().min(0).max(1.0).optional(),
|
|
152
|
+
reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
|
|
153
|
+
// Extensions
|
|
154
|
+
reasoning: ChatCompletionsReasoningConfigSchema.optional().meta({ extension: true }),
|
|
113
155
|
});
|
|
114
|
-
export type
|
|
156
|
+
export type ChatCompletionsInputs = z.infer<typeof ChatCompletionsInputsSchema>;
|
|
115
157
|
|
|
116
|
-
export const
|
|
158
|
+
export const ChatCompletionsBodySchema = z.looseObject({
|
|
117
159
|
model: z.string(),
|
|
118
|
-
stream: z.
|
|
160
|
+
stream: z.boolean().optional(),
|
|
161
|
+
...ChatCompletionsInputsSchema.shape,
|
|
119
162
|
});
|
|
120
|
-
export type
|
|
163
|
+
export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
|
|
121
164
|
|
|
122
|
-
export const
|
|
165
|
+
export const ChatCompletionsFinishReasonSchema = z.union([
|
|
123
166
|
z.literal("stop"),
|
|
124
167
|
z.literal("length"),
|
|
125
168
|
z.literal("content_filter"),
|
|
126
169
|
z.literal("tool_calls"),
|
|
127
170
|
]);
|
|
128
|
-
export type
|
|
129
|
-
|
|
130
|
-
export const
|
|
131
|
-
index: z.
|
|
132
|
-
message:
|
|
133
|
-
finish_reason:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
export type CompletionsChoice = z.infer<typeof CompletionsChoiceSchema>;
|
|
137
|
-
|
|
138
|
-
export const CompletionsUsageSchema = z.object({
|
|
139
|
-
prompt_tokens: z.number(),
|
|
140
|
-
completion_tokens: z.number(),
|
|
141
|
-
total_tokens: z.number(),
|
|
142
|
-
completion_tokens_details: z.optional(
|
|
143
|
-
z.object({
|
|
144
|
-
reasoning_tokens: z.optional(z.number()),
|
|
145
|
-
}),
|
|
146
|
-
),
|
|
147
|
-
prompt_tokens_details: z.optional(
|
|
148
|
-
z.object({
|
|
149
|
-
cached_tokens: z.optional(z.number()),
|
|
150
|
-
}),
|
|
151
|
-
),
|
|
171
|
+
export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
|
|
172
|
+
|
|
173
|
+
export const ChatCompletionsChoiceSchema = z.object({
|
|
174
|
+
index: z.int().nonnegative(),
|
|
175
|
+
message: ChatCompletionsAssistantMessageSchema,
|
|
176
|
+
finish_reason: ChatCompletionsFinishReasonSchema,
|
|
177
|
+
// FUTURE: model this out
|
|
178
|
+
logprobs: z.any().optional(),
|
|
152
179
|
});
|
|
153
|
-
export type
|
|
180
|
+
export type ChatCompletionsChoice = z.infer<typeof ChatCompletionsChoiceSchema>;
|
|
181
|
+
|
|
182
|
+
export const ChatCompletionsUsageSchema = z.object({
|
|
183
|
+
prompt_tokens: z.int().nonnegative().optional(),
|
|
184
|
+
completion_tokens: z.int().nonnegative().optional(),
|
|
185
|
+
total_tokens: z.int().nonnegative().optional(),
|
|
186
|
+
completion_tokens_details: z
|
|
187
|
+
.object({
|
|
188
|
+
// FUTURE: add missing properties
|
|
189
|
+
reasoning_tokens: z.int().nonnegative().optional(),
|
|
190
|
+
})
|
|
191
|
+
.optional(),
|
|
192
|
+
prompt_tokens_details: z
|
|
193
|
+
.object({
|
|
194
|
+
// FUTURE: add missing properties
|
|
195
|
+
cached_tokens: z.int().nonnegative().optional(),
|
|
196
|
+
})
|
|
197
|
+
.optional(),
|
|
198
|
+
});
|
|
199
|
+
export type ChatCompletionsUsage = z.infer<typeof ChatCompletionsUsageSchema>;
|
|
154
200
|
|
|
155
|
-
export const
|
|
201
|
+
export const ChatCompletionsSchema = z.object({
|
|
156
202
|
id: z.string(),
|
|
157
203
|
object: z.literal("chat.completion"),
|
|
158
|
-
created: z.
|
|
204
|
+
created: z.int().nonnegative(),
|
|
159
205
|
model: z.string(),
|
|
160
|
-
choices: z.array(
|
|
161
|
-
usage:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
});
|
|
165
|
-
export type
|
|
166
|
-
|
|
167
|
-
export
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
206
|
+
choices: z.array(ChatCompletionsChoiceSchema),
|
|
207
|
+
usage: ChatCompletionsUsageSchema.nullable(),
|
|
208
|
+
// Extensions
|
|
209
|
+
provider_metadata: z.any().optional().meta({ extension: true }),
|
|
210
|
+
});
|
|
211
|
+
export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
|
|
212
|
+
|
|
213
|
+
export const ChatCompletionsToolCallDeltaSchema = ChatCompletionsToolCallSchema.partial().extend({
|
|
214
|
+
index: z.int().nonnegative(),
|
|
215
|
+
});
|
|
216
|
+
export type ChatCompletionsToolCallDelta = z.infer<typeof ChatCompletionsToolCallDeltaSchema>;
|
|
217
|
+
|
|
218
|
+
export const ChatCompletionsAssistantMessageDeltaSchema =
|
|
219
|
+
ChatCompletionsAssistantMessageSchema.partial().extend({
|
|
220
|
+
tool_calls: z.array(ChatCompletionsToolCallDeltaSchema).optional(),
|
|
221
|
+
});
|
|
222
|
+
export type ChatCompletionsAssistantMessageDelta = z.infer<
|
|
223
|
+
typeof ChatCompletionsAssistantMessageDeltaSchema
|
|
224
|
+
>;
|
|
225
|
+
|
|
226
|
+
export const ChatCompletionsChoiceDeltaSchema = z.object({
|
|
227
|
+
index: z.int().nonnegative(),
|
|
228
|
+
delta: ChatCompletionsAssistantMessageDeltaSchema,
|
|
229
|
+
finish_reason: ChatCompletionsFinishReasonSchema.nullable(),
|
|
230
|
+
// FUTURE: model this out
|
|
231
|
+
logprobs: z.any().optional(),
|
|
232
|
+
});
|
|
233
|
+
export type ChatCompletionsChoiceDelta = z.infer<typeof ChatCompletionsChoiceDeltaSchema>;
|
|
234
|
+
|
|
235
|
+
export const ChatCompletionsChunkSchema = z.object({
|
|
236
|
+
id: z.string(),
|
|
237
|
+
object: z.literal("chat.completion.chunk"),
|
|
238
|
+
created: z.int().nonnegative(),
|
|
239
|
+
model: z.string(),
|
|
240
|
+
choices: z.array(ChatCompletionsChoiceDeltaSchema),
|
|
241
|
+
usage: ChatCompletionsUsageSchema.nullable(),
|
|
242
|
+
// Extensions
|
|
243
|
+
provider_metadata: z.any().optional().meta({ extension: true }),
|
|
244
|
+
});
|
|
245
|
+
export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
|
|
@@ -1,20 +1,22 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { JSONObject, SharedV3ProviderOptions } from "@ai-sdk/provider";
|
|
2
2
|
import type { EmbedManyResult } from "ai";
|
|
3
3
|
|
|
4
|
-
import type { EmbeddingsInputs,
|
|
4
|
+
import type { EmbeddingsInputs, EmbeddingsData, EmbeddingsUsage, Embeddings } from "./schema";
|
|
5
|
+
|
|
6
|
+
import { mergeResponseInit } from "../../utils/response";
|
|
5
7
|
|
|
6
8
|
export type EmbedCallOptions = {
|
|
7
9
|
values: string[];
|
|
8
|
-
providerOptions:
|
|
10
|
+
providerOptions: SharedV3ProviderOptions;
|
|
9
11
|
};
|
|
10
12
|
|
|
11
|
-
export function
|
|
13
|
+
export function convertToEmbedCallOptions(params: EmbeddingsInputs): EmbedCallOptions {
|
|
12
14
|
const { input, ...rest } = params;
|
|
13
15
|
|
|
14
16
|
return {
|
|
15
17
|
values: Array.isArray(input) ? input : [input],
|
|
16
18
|
providerOptions: {
|
|
17
|
-
|
|
19
|
+
unknown: rest as JSONObject,
|
|
18
20
|
},
|
|
19
21
|
};
|
|
20
22
|
}
|
|
@@ -27,8 +29,8 @@ export function toEmbeddings(embedManyResult: EmbedManyResult, modelId: string):
|
|
|
27
29
|
}));
|
|
28
30
|
|
|
29
31
|
const usage: EmbeddingsUsage = {
|
|
30
|
-
prompt_tokens: embedManyResult.usage
|
|
31
|
-
total_tokens: embedManyResult.usage
|
|
32
|
+
prompt_tokens: embedManyResult.usage.tokens,
|
|
33
|
+
total_tokens: embedManyResult.usage.tokens,
|
|
32
34
|
};
|
|
33
35
|
|
|
34
36
|
return {
|
|
@@ -36,15 +38,17 @@ export function toEmbeddings(embedManyResult: EmbedManyResult, modelId: string):
|
|
|
36
38
|
data,
|
|
37
39
|
model: modelId,
|
|
38
40
|
usage,
|
|
39
|
-
|
|
41
|
+
provider_metadata: embedManyResult.providerMetadata,
|
|
40
42
|
};
|
|
41
43
|
}
|
|
42
44
|
|
|
43
45
|
export function createEmbeddingsResponse(
|
|
44
46
|
embedManyResult: EmbedManyResult,
|
|
45
47
|
modelId: string,
|
|
48
|
+
responseInit?: ResponseInit,
|
|
46
49
|
): Response {
|
|
47
|
-
return new Response(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
+
return new Response(
|
|
51
|
+
JSON.stringify(toEmbeddings(embedManyResult, modelId)),
|
|
52
|
+
mergeResponseInit({ "Content-Type": "application/json" }, responseInit),
|
|
53
|
+
);
|
|
50
54
|
}
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import { createProviderRegistry } from "ai";
|
|
2
1
|
import { MockEmbeddingModelV3, MockProviderV3 } from "ai/test";
|
|
3
2
|
import { describe, expect, test } from "bun:test";
|
|
4
3
|
|
|
5
4
|
import { parseResponse, postJson } from "../../../test/helpers/http";
|
|
6
|
-
import { createModelCatalog } from "../../models/catalog";
|
|
7
5
|
import { embeddings } from "./handler";
|
|
8
6
|
|
|
9
7
|
const baseUrl = "http://localhost/embeddings";
|
|
@@ -20,45 +18,44 @@ const expectedEmbeddingResponse = (count: number) => ({
|
|
|
20
18
|
prompt_tokens: count * 10,
|
|
21
19
|
total_tokens: count * 10,
|
|
22
20
|
},
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
provider_metadata: {
|
|
22
|
+
provider: {
|
|
25
23
|
key: "value",
|
|
26
24
|
},
|
|
27
25
|
},
|
|
28
26
|
});
|
|
29
27
|
|
|
30
28
|
describe("Embeddings Handler", () => {
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
29
|
+
const endpoint = embeddings({
|
|
30
|
+
providers: {
|
|
31
|
+
openai: new MockProviderV3({
|
|
32
|
+
embeddingModels: {
|
|
33
|
+
"text-embedding-3-small": new MockEmbeddingModelV3({
|
|
34
|
+
// eslint-disable-next-line require-await
|
|
35
|
+
doEmbed: async (options) => ({
|
|
36
|
+
embeddings: options.values.map(() => [0.1, 0.2, 0.3]),
|
|
37
|
+
usage: { tokens: 10 },
|
|
38
|
+
providerMetadata: { provider: { key: "value" } },
|
|
39
|
+
warnings: [],
|
|
40
|
+
}),
|
|
41
41
|
}),
|
|
42
|
-
}
|
|
43
|
-
},
|
|
44
|
-
}),
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
const catalog = createModelCatalog({
|
|
48
|
-
"text-embedding-3-small": {
|
|
49
|
-
name: "OpenAI Embedding Model",
|
|
50
|
-
modalities: { input: ["text"], output: ["embeddings"] },
|
|
51
|
-
providers: ["openai"],
|
|
42
|
+
},
|
|
43
|
+
}),
|
|
52
44
|
},
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
45
|
+
models: {
|
|
46
|
+
"text-embedding-3-small": {
|
|
47
|
+
name: "OpenAI Embedding Model",
|
|
48
|
+
modalities: { input: ["text"], output: ["embeddings"] },
|
|
49
|
+
providers: ["openai"],
|
|
50
|
+
},
|
|
51
|
+
"gpt-oss-20b": {
|
|
52
|
+
name: "GPT-OSS 20B",
|
|
53
|
+
modalities: { input: ["text"], output: ["text"] },
|
|
54
|
+
providers: ["openai"],
|
|
55
|
+
},
|
|
57
56
|
},
|
|
58
57
|
});
|
|
59
58
|
|
|
60
|
-
const endpoint = embeddings({ providers: registry, models: catalog });
|
|
61
|
-
|
|
62
59
|
test("should return 400 if model does not support embeddings", async () => {
|
|
63
60
|
const request = postJson(baseUrl, {
|
|
64
61
|
model: "gpt-oss-20b",
|