@hebo-ai/gateway 0.6.2-rc0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/endpoints/chat-completions/converters.js +26 -21
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/otel.js +1 -1
- package/dist/endpoints/chat-completions/schema.d.ts +4 -18
- package/dist/endpoints/chat-completions/schema.js +14 -17
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/embeddings/otel.js +5 -0
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/converters.js +3 -3
- package/dist/lifecycle.js +2 -2
- package/dist/logger/default.js +3 -3
- package/dist/logger/index.d.ts +2 -5
- package/dist/middleware/common.js +1 -0
- package/dist/middleware/utils.js +0 -3
- package/dist/models/amazon/middleware.js +8 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/catalog.js +5 -1
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/openai/middleware.js +13 -9
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.js +21 -23
- package/dist/providers/registry.js +3 -0
- package/dist/telemetry/fetch.js +7 -2
- package/dist/telemetry/gen-ai.js +15 -12
- package/dist/telemetry/memory.d.ts +1 -1
- package/dist/telemetry/memory.js +30 -14
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.js +30 -23
- package/dist/utils/env.js +4 -2
- package/dist/utils/preset.js +1 -0
- package/dist/utils/response.js +3 -1
- package/package.json +36 -50
- package/src/config.ts +0 -98
- package/src/endpoints/chat-completions/converters.test.ts +0 -631
- package/src/endpoints/chat-completions/converters.ts +0 -899
- package/src/endpoints/chat-completions/handler.test.ts +0 -391
- package/src/endpoints/chat-completions/handler.ts +0 -201
- package/src/endpoints/chat-completions/index.ts +0 -4
- package/src/endpoints/chat-completions/otel.test.ts +0 -315
- package/src/endpoints/chat-completions/otel.ts +0 -214
- package/src/endpoints/chat-completions/schema.ts +0 -364
- package/src/endpoints/embeddings/converters.ts +0 -51
- package/src/endpoints/embeddings/handler.test.ts +0 -133
- package/src/endpoints/embeddings/handler.ts +0 -137
- package/src/endpoints/embeddings/index.ts +0 -4
- package/src/endpoints/embeddings/otel.ts +0 -40
- package/src/endpoints/embeddings/schema.ts +0 -36
- package/src/endpoints/models/converters.ts +0 -56
- package/src/endpoints/models/handler.test.ts +0 -122
- package/src/endpoints/models/handler.ts +0 -37
- package/src/endpoints/models/index.ts +0 -3
- package/src/endpoints/models/schema.ts +0 -37
- package/src/errors/ai-sdk.ts +0 -99
- package/src/errors/gateway.ts +0 -17
- package/src/errors/openai.ts +0 -57
- package/src/errors/utils.ts +0 -47
- package/src/gateway.ts +0 -50
- package/src/index.ts +0 -19
- package/src/lifecycle.ts +0 -135
- package/src/logger/default.ts +0 -105
- package/src/logger/index.ts +0 -42
- package/src/middleware/common.test.ts +0 -215
- package/src/middleware/common.ts +0 -163
- package/src/middleware/debug.ts +0 -37
- package/src/middleware/matcher.ts +0 -161
- package/src/middleware/utils.ts +0 -34
- package/src/models/amazon/index.ts +0 -2
- package/src/models/amazon/middleware.test.ts +0 -133
- package/src/models/amazon/middleware.ts +0 -79
- package/src/models/amazon/presets.ts +0 -104
- package/src/models/anthropic/index.ts +0 -2
- package/src/models/anthropic/middleware.test.ts +0 -643
- package/src/models/anthropic/middleware.ts +0 -148
- package/src/models/anthropic/presets.ts +0 -191
- package/src/models/catalog.ts +0 -13
- package/src/models/cohere/index.ts +0 -2
- package/src/models/cohere/middleware.test.ts +0 -138
- package/src/models/cohere/middleware.ts +0 -76
- package/src/models/cohere/presets.ts +0 -186
- package/src/models/google/index.ts +0 -2
- package/src/models/google/middleware.test.ts +0 -298
- package/src/models/google/middleware.ts +0 -137
- package/src/models/google/presets.ts +0 -118
- package/src/models/meta/index.ts +0 -1
- package/src/models/meta/presets.ts +0 -143
- package/src/models/openai/index.ts +0 -2
- package/src/models/openai/middleware.test.ts +0 -189
- package/src/models/openai/middleware.ts +0 -103
- package/src/models/openai/presets.ts +0 -280
- package/src/models/types.ts +0 -114
- package/src/models/voyage/index.ts +0 -2
- package/src/models/voyage/middleware.test.ts +0 -28
- package/src/models/voyage/middleware.ts +0 -23
- package/src/models/voyage/presets.ts +0 -126
- package/src/providers/anthropic/canonical.ts +0 -17
- package/src/providers/anthropic/index.ts +0 -1
- package/src/providers/bedrock/canonical.ts +0 -87
- package/src/providers/bedrock/index.ts +0 -2
- package/src/providers/bedrock/middleware.test.ts +0 -303
- package/src/providers/bedrock/middleware.ts +0 -128
- package/src/providers/cohere/canonical.ts +0 -26
- package/src/providers/cohere/index.ts +0 -1
- package/src/providers/groq/canonical.ts +0 -21
- package/src/providers/groq/index.ts +0 -1
- package/src/providers/openai/canonical.ts +0 -16
- package/src/providers/openai/index.ts +0 -1
- package/src/providers/registry.test.ts +0 -44
- package/src/providers/registry.ts +0 -165
- package/src/providers/types.ts +0 -20
- package/src/providers/vertex/canonical.ts +0 -17
- package/src/providers/vertex/index.ts +0 -1
- package/src/providers/voyage/canonical.ts +0 -16
- package/src/providers/voyage/index.ts +0 -1
- package/src/telemetry/ai-sdk.ts +0 -46
- package/src/telemetry/baggage.ts +0 -27
- package/src/telemetry/fetch.ts +0 -62
- package/src/telemetry/gen-ai.ts +0 -113
- package/src/telemetry/http.ts +0 -62
- package/src/telemetry/index.ts +0 -1
- package/src/telemetry/memory.ts +0 -36
- package/src/telemetry/span.ts +0 -85
- package/src/telemetry/stream.ts +0 -64
- package/src/types.ts +0 -223
- package/src/utils/env.ts +0 -7
- package/src/utils/headers.ts +0 -27
- package/src/utils/preset.ts +0 -65
- package/src/utils/request.test.ts +0 -75
- package/src/utils/request.ts +0 -52
- package/src/utils/response.ts +0 -84
- package/src/utils/url.ts +0 -26
|
@@ -1,391 +0,0 @@
|
|
|
1
|
-
import { simulateReadableStream } from "ai";
|
|
2
|
-
import { MockLanguageModelV3, MockProviderV3 } from "ai/test";
|
|
3
|
-
import { describe, expect, test } from "bun:test";
|
|
4
|
-
|
|
5
|
-
import { parseResponse, postJson } from "../../../test/helpers/http";
|
|
6
|
-
import { defineModelCatalog } from "../../models/catalog";
|
|
7
|
-
import { chatCompletions } from "./handler";
|
|
8
|
-
|
|
9
|
-
const baseUrl = "http://localhost/chat/completions";
|
|
10
|
-
|
|
11
|
-
describe("Chat Completions Handler", () => {
|
|
12
|
-
const mockLanguageModel = new MockLanguageModelV3({
|
|
13
|
-
// oxlint-disable-next-line require-await
|
|
14
|
-
doGenerate: async (options) => {
|
|
15
|
-
const isStructuredOutput = options.responseFormat?.type === "json";
|
|
16
|
-
const isToolCall = options.tools && options.tools.length > 0;
|
|
17
|
-
|
|
18
|
-
if (isToolCall) {
|
|
19
|
-
return {
|
|
20
|
-
finishReason: { unified: "tool-calls", raw: "tool-calls" },
|
|
21
|
-
usage: {
|
|
22
|
-
inputTokens: { total: 15, noCache: 15, cacheRead: 20, cacheWrite: 0 },
|
|
23
|
-
outputTokens: { total: 25, text: 0, reasoning: 10 },
|
|
24
|
-
},
|
|
25
|
-
content: [
|
|
26
|
-
{
|
|
27
|
-
type: "tool-call",
|
|
28
|
-
toolCallId: "call_123",
|
|
29
|
-
toolName: "get_current_weather",
|
|
30
|
-
input: '{"location":"San Francisco, CA"}',
|
|
31
|
-
},
|
|
32
|
-
],
|
|
33
|
-
providerMetadata: { provider: { key: "value" } },
|
|
34
|
-
warnings: [],
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
if (isStructuredOutput) {
|
|
39
|
-
return {
|
|
40
|
-
finishReason: { unified: "stop", raw: "stop" },
|
|
41
|
-
usage: {
|
|
42
|
-
inputTokens: { total: 10, noCache: 10, cacheRead: 20, cacheWrite: 0 },
|
|
43
|
-
outputTokens: { total: 20, text: 20, reasoning: 10 },
|
|
44
|
-
},
|
|
45
|
-
content: [
|
|
46
|
-
{
|
|
47
|
-
type: "text",
|
|
48
|
-
text: '{"city":"San Francisco","temp_c":18}',
|
|
49
|
-
},
|
|
50
|
-
],
|
|
51
|
-
providerMetadata: { provider: { key: "value" } },
|
|
52
|
-
warnings: [],
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
return {
|
|
57
|
-
finishReason: { unified: "stop", raw: "stop" },
|
|
58
|
-
usage: {
|
|
59
|
-
inputTokens: { total: 10, noCache: 10, cacheRead: 20, cacheWrite: 0 },
|
|
60
|
-
outputTokens: { total: 20, text: 20, reasoning: 10 },
|
|
61
|
-
},
|
|
62
|
-
content: [
|
|
63
|
-
{
|
|
64
|
-
type: "text",
|
|
65
|
-
text: "Hello from AI",
|
|
66
|
-
},
|
|
67
|
-
],
|
|
68
|
-
providerMetadata: { provider: { key: "value" } },
|
|
69
|
-
warnings: [],
|
|
70
|
-
};
|
|
71
|
-
},
|
|
72
|
-
// oxlint-disable-next-line require-await
|
|
73
|
-
doStream: async () => ({
|
|
74
|
-
stream: simulateReadableStream({
|
|
75
|
-
chunks: [
|
|
76
|
-
{ type: "text-start", id: "1" },
|
|
77
|
-
{ type: "text-delta", delta: "Hello", id: "1" },
|
|
78
|
-
{ type: "text-delta", delta: " world", id: "1" },
|
|
79
|
-
{ type: "text-end", id: "1" },
|
|
80
|
-
{
|
|
81
|
-
type: "finish",
|
|
82
|
-
finishReason: { unified: "stop", raw: "stop" },
|
|
83
|
-
usage: {
|
|
84
|
-
inputTokens: { total: 5, noCache: 5, cacheRead: 20, cacheWrite: 0 },
|
|
85
|
-
outputTokens: { total: 5, text: 5, reasoning: 10 },
|
|
86
|
-
},
|
|
87
|
-
},
|
|
88
|
-
],
|
|
89
|
-
}),
|
|
90
|
-
}),
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
const endpoint = chatCompletions({
|
|
94
|
-
providers: {
|
|
95
|
-
groq: new MockProviderV3({
|
|
96
|
-
languageModels: {
|
|
97
|
-
"openai/gpt-oss-20b": mockLanguageModel,
|
|
98
|
-
},
|
|
99
|
-
}),
|
|
100
|
-
},
|
|
101
|
-
models: defineModelCatalog({
|
|
102
|
-
"openai/gpt-oss-20b": {
|
|
103
|
-
name: "GPT-OSS 20B",
|
|
104
|
-
modalities: { input: ["text", "file"], output: ["text"] },
|
|
105
|
-
providers: ["groq"],
|
|
106
|
-
},
|
|
107
|
-
}),
|
|
108
|
-
});
|
|
109
|
-
|
|
110
|
-
test("should return 405 for non-POST requests", async () => {
|
|
111
|
-
const request = new Request(baseUrl, { method: "GET" });
|
|
112
|
-
const res = await endpoint.handler(request);
|
|
113
|
-
const data = await parseResponse(res);
|
|
114
|
-
expect(data).toMatchObject({
|
|
115
|
-
error: {
|
|
116
|
-
code: "method_not_allowed",
|
|
117
|
-
message: "Method Not Allowed",
|
|
118
|
-
type: "invalid_request_error",
|
|
119
|
-
},
|
|
120
|
-
});
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
test("should return 400 for invalid JSON", async () => {
|
|
124
|
-
const request = new Request(baseUrl, {
|
|
125
|
-
method: "POST",
|
|
126
|
-
body: "invalid-json",
|
|
127
|
-
});
|
|
128
|
-
const res = await endpoint.handler(request);
|
|
129
|
-
const data = await parseResponse(res);
|
|
130
|
-
expect(data).toMatchObject({
|
|
131
|
-
error: {
|
|
132
|
-
code: "bad_request",
|
|
133
|
-
message: "Invalid JSON",
|
|
134
|
-
type: "invalid_request_error",
|
|
135
|
-
},
|
|
136
|
-
});
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
test("should return 400 for validation errors (missing messages)", async () => {
|
|
140
|
-
const request = postJson(baseUrl, { model: "openai/gpt-oss-20b" });
|
|
141
|
-
const res = await endpoint.handler(request);
|
|
142
|
-
const data = await parseResponse(res);
|
|
143
|
-
expect(data).toMatchObject({
|
|
144
|
-
error: {
|
|
145
|
-
code: "bad_request",
|
|
146
|
-
message: "✖ Invalid input: expected array, received undefined\n → at messages",
|
|
147
|
-
type: "invalid_request_error",
|
|
148
|
-
param: "",
|
|
149
|
-
},
|
|
150
|
-
});
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
test("should return 422 for non-existent model", async () => {
|
|
154
|
-
const request = postJson(baseUrl, {
|
|
155
|
-
model: "non-existent",
|
|
156
|
-
messages: [{ role: "user", content: "hi" }],
|
|
157
|
-
});
|
|
158
|
-
const res = await endpoint.handler(request);
|
|
159
|
-
const data = await parseResponse(res);
|
|
160
|
-
expect(data).toMatchObject({
|
|
161
|
-
error: {
|
|
162
|
-
code: "model_not_found",
|
|
163
|
-
message: "Model 'non-existent' not found in catalog",
|
|
164
|
-
type: "invalid_request_error",
|
|
165
|
-
},
|
|
166
|
-
});
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
test("should generate non-streaming completion successfully", async () => {
|
|
170
|
-
const request = postJson(baseUrl, {
|
|
171
|
-
model: "openai/gpt-oss-20b",
|
|
172
|
-
messages: [{ role: "user", content: "hi" }],
|
|
173
|
-
});
|
|
174
|
-
const res = await endpoint.handler(request);
|
|
175
|
-
const data = await parseResponse(res);
|
|
176
|
-
expect(data).toEqual({
|
|
177
|
-
id: expect.stringMatching(/^chatcmpl-/),
|
|
178
|
-
object: "chat.completion",
|
|
179
|
-
created: expect.any(Number),
|
|
180
|
-
model: "openai/gpt-oss-20b",
|
|
181
|
-
choices: [
|
|
182
|
-
{
|
|
183
|
-
index: 0,
|
|
184
|
-
message: {
|
|
185
|
-
role: "assistant",
|
|
186
|
-
content: "Hello from AI",
|
|
187
|
-
},
|
|
188
|
-
finish_reason: "stop",
|
|
189
|
-
},
|
|
190
|
-
],
|
|
191
|
-
usage: {
|
|
192
|
-
prompt_tokens: 10,
|
|
193
|
-
completion_tokens: 20,
|
|
194
|
-
total_tokens: 30,
|
|
195
|
-
completion_tokens_details: {
|
|
196
|
-
reasoning_tokens: 10,
|
|
197
|
-
},
|
|
198
|
-
prompt_tokens_details: {
|
|
199
|
-
cached_tokens: 20,
|
|
200
|
-
cache_write_tokens: 0,
|
|
201
|
-
},
|
|
202
|
-
},
|
|
203
|
-
provider_metadata: { provider: { key: "value" } },
|
|
204
|
-
});
|
|
205
|
-
});
|
|
206
|
-
|
|
207
|
-
test("should accept input_audio content parts", async () => {
|
|
208
|
-
const request = postJson(baseUrl, {
|
|
209
|
-
model: "openai/gpt-oss-20b",
|
|
210
|
-
messages: [
|
|
211
|
-
{
|
|
212
|
-
role: "user",
|
|
213
|
-
content: [
|
|
214
|
-
{
|
|
215
|
-
type: "input_audio",
|
|
216
|
-
input_audio: {
|
|
217
|
-
data: "aGVsbG8=",
|
|
218
|
-
format: "wav",
|
|
219
|
-
},
|
|
220
|
-
},
|
|
221
|
-
],
|
|
222
|
-
},
|
|
223
|
-
],
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
const res = await endpoint.handler(request);
|
|
227
|
-
expect(res.status).toBe(200);
|
|
228
|
-
const data = await parseResponse(res);
|
|
229
|
-
expect(data.model).toBe("openai/gpt-oss-20b");
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
test("should generate completion with tool calls successfully", async () => {
|
|
233
|
-
const request = postJson(baseUrl, {
|
|
234
|
-
model: "openai/gpt-oss-20b",
|
|
235
|
-
messages: [{ role: "user", content: "What is the weather in SF?" }],
|
|
236
|
-
tools: [
|
|
237
|
-
{
|
|
238
|
-
type: "function",
|
|
239
|
-
function: {
|
|
240
|
-
name: "get_current_weather",
|
|
241
|
-
description: "Get the current weather",
|
|
242
|
-
parameters: {
|
|
243
|
-
type: "object",
|
|
244
|
-
properties: {
|
|
245
|
-
location: { type: "string" },
|
|
246
|
-
},
|
|
247
|
-
},
|
|
248
|
-
},
|
|
249
|
-
},
|
|
250
|
-
],
|
|
251
|
-
});
|
|
252
|
-
const res = await endpoint.handler(request);
|
|
253
|
-
const data = await parseResponse(res);
|
|
254
|
-
expect(data).toEqual({
|
|
255
|
-
id: expect.stringMatching(/^chatcmpl-/),
|
|
256
|
-
object: "chat.completion",
|
|
257
|
-
created: expect.any(Number),
|
|
258
|
-
model: "openai/gpt-oss-20b",
|
|
259
|
-
choices: [
|
|
260
|
-
{
|
|
261
|
-
index: 0,
|
|
262
|
-
message: {
|
|
263
|
-
role: "assistant",
|
|
264
|
-
content: null,
|
|
265
|
-
tool_calls: [
|
|
266
|
-
{
|
|
267
|
-
id: "call_123",
|
|
268
|
-
type: "function",
|
|
269
|
-
function: {
|
|
270
|
-
name: "get_current_weather",
|
|
271
|
-
arguments: '{"location":"San Francisco, CA"}',
|
|
272
|
-
},
|
|
273
|
-
},
|
|
274
|
-
],
|
|
275
|
-
},
|
|
276
|
-
finish_reason: "tool_calls",
|
|
277
|
-
},
|
|
278
|
-
],
|
|
279
|
-
usage: {
|
|
280
|
-
prompt_tokens: 15,
|
|
281
|
-
completion_tokens: 25,
|
|
282
|
-
total_tokens: 40,
|
|
283
|
-
completion_tokens_details: {
|
|
284
|
-
reasoning_tokens: 10,
|
|
285
|
-
},
|
|
286
|
-
prompt_tokens_details: {
|
|
287
|
-
cached_tokens: 20,
|
|
288
|
-
cache_write_tokens: 0,
|
|
289
|
-
},
|
|
290
|
-
},
|
|
291
|
-
provider_metadata: { provider: { key: "value" } },
|
|
292
|
-
});
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
test("should generate streaming completion successfully", async () => {
|
|
296
|
-
const request = postJson(baseUrl, {
|
|
297
|
-
model: "openai/gpt-oss-20b",
|
|
298
|
-
messages: [{ role: "user", content: "hi" }],
|
|
299
|
-
stream: true,
|
|
300
|
-
});
|
|
301
|
-
|
|
302
|
-
const res = await endpoint.handler(request);
|
|
303
|
-
expect(res.status).toBe(200);
|
|
304
|
-
expect(res.headers.get("Content-Type")).toBe("text/event-stream");
|
|
305
|
-
|
|
306
|
-
const decoder = new TextDecoder();
|
|
307
|
-
let result = "";
|
|
308
|
-
for await (const chunk of res.body!) {
|
|
309
|
-
result += decoder.decode(chunk);
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
expect(result).toContain('data: {"id":"chatcmpl-');
|
|
313
|
-
expect(result).toContain('"content":"Hello');
|
|
314
|
-
expect(result).toContain('"content":" world');
|
|
315
|
-
expect(result).toContain('"finish_reason":"stop');
|
|
316
|
-
expect(result).toContain("data: [DONE]");
|
|
317
|
-
});
|
|
318
|
-
|
|
319
|
-
test("should accept reasoning and reasoning_effort parameters", async () => {
|
|
320
|
-
const request = postJson(baseUrl, {
|
|
321
|
-
model: "openai/gpt-oss-20b",
|
|
322
|
-
messages: [{ role: "user", content: "hi" }],
|
|
323
|
-
reasoning: {
|
|
324
|
-
effort: "high",
|
|
325
|
-
max_tokens: 1000,
|
|
326
|
-
},
|
|
327
|
-
reasoning_effort: "medium",
|
|
328
|
-
});
|
|
329
|
-
|
|
330
|
-
const res = await endpoint.handler(request);
|
|
331
|
-
expect(res.status).toBe(200);
|
|
332
|
-
const data = await parseResponse(res);
|
|
333
|
-
expect(data.model).toBe("openai/gpt-oss-20b");
|
|
334
|
-
});
|
|
335
|
-
|
|
336
|
-
test("should accept max_completion_tokens parameter", async () => {
|
|
337
|
-
const request = postJson(baseUrl, {
|
|
338
|
-
model: "openai/gpt-oss-20b",
|
|
339
|
-
messages: [{ role: "user", content: "hi" }],
|
|
340
|
-
max_completion_tokens: 100,
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
const res = await endpoint.handler(request);
|
|
344
|
-
expect(res.status).toBe(200);
|
|
345
|
-
const data = await parseResponse(res);
|
|
346
|
-
expect(data.model).toBe("openai/gpt-oss-20b");
|
|
347
|
-
});
|
|
348
|
-
|
|
349
|
-
test("should generate non-streaming structured output", async () => {
|
|
350
|
-
const request = postJson(baseUrl, {
|
|
351
|
-
model: "openai/gpt-oss-20b",
|
|
352
|
-
messages: [{ role: "user", content: "Return weather as JSON" }],
|
|
353
|
-
response_format: {
|
|
354
|
-
type: "json_schema",
|
|
355
|
-
json_schema: {
|
|
356
|
-
name: "weather",
|
|
357
|
-
schema: {
|
|
358
|
-
type: "object",
|
|
359
|
-
properties: {
|
|
360
|
-
city: { type: "string" },
|
|
361
|
-
temp_c: { type: "number" },
|
|
362
|
-
},
|
|
363
|
-
required: ["city", "temp_c"],
|
|
364
|
-
additionalProperties: false,
|
|
365
|
-
},
|
|
366
|
-
strict: true,
|
|
367
|
-
},
|
|
368
|
-
},
|
|
369
|
-
});
|
|
370
|
-
|
|
371
|
-
const res = await endpoint.handler(request);
|
|
372
|
-
expect(res.status).toBe(200);
|
|
373
|
-
const data = await parseResponse(res);
|
|
374
|
-
expect(data.choices[0].message.content).toBe('{"city":"San Francisco","temp_c":18}');
|
|
375
|
-
});
|
|
376
|
-
|
|
377
|
-
test('should accept response_format type "text"', async () => {
|
|
378
|
-
const request = postJson(baseUrl, {
|
|
379
|
-
model: "openai/gpt-oss-20b",
|
|
380
|
-
messages: [{ role: "user", content: "Say hi" }],
|
|
381
|
-
response_format: {
|
|
382
|
-
type: "text",
|
|
383
|
-
},
|
|
384
|
-
});
|
|
385
|
-
|
|
386
|
-
const res = await endpoint.handler(request);
|
|
387
|
-
expect(res.status).toBe(200);
|
|
388
|
-
const data = await parseResponse(res);
|
|
389
|
-
expect(data.choices[0].message.content).toBe("Hello from AI");
|
|
390
|
-
});
|
|
391
|
-
});
|
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
generateText,
|
|
3
|
-
Output,
|
|
4
|
-
streamText,
|
|
5
|
-
wrapLanguageModel,
|
|
6
|
-
type GenerateTextResult,
|
|
7
|
-
type ToolSet,
|
|
8
|
-
} from "ai";
|
|
9
|
-
import * as z from "zod/mini";
|
|
10
|
-
|
|
11
|
-
import type {
|
|
12
|
-
AfterHookContext,
|
|
13
|
-
BeforeHookContext,
|
|
14
|
-
GatewayConfig,
|
|
15
|
-
Endpoint,
|
|
16
|
-
GatewayContext,
|
|
17
|
-
ResolveProviderHookContext,
|
|
18
|
-
ResolveModelHookContext,
|
|
19
|
-
} from "../../types";
|
|
20
|
-
|
|
21
|
-
import { GatewayError } from "../../errors/gateway";
|
|
22
|
-
import { winterCgHandler } from "../../lifecycle";
|
|
23
|
-
import { logger } from "../../logger";
|
|
24
|
-
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
25
|
-
import { resolveProvider } from "../../providers/registry";
|
|
26
|
-
import {
|
|
27
|
-
getGenAiGeneralAttributes,
|
|
28
|
-
recordTimePerOutputToken,
|
|
29
|
-
recordTokenUsage,
|
|
30
|
-
} from "../../telemetry/gen-ai";
|
|
31
|
-
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
32
|
-
import { prepareForwardHeaders } from "../../utils/request";
|
|
33
|
-
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
34
|
-
import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
|
|
35
|
-
import { ChatCompletionsBodySchema, type ChatCompletionsBody } from "./schema";
|
|
36
|
-
|
|
37
|
-
export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
38
|
-
const hooks = config.hooks;
|
|
39
|
-
|
|
40
|
-
const handler = async (ctx: GatewayContext) => {
|
|
41
|
-
const start = performance.now();
|
|
42
|
-
ctx.operation = "chat";
|
|
43
|
-
addSpanEvent("hebo.handler.started");
|
|
44
|
-
|
|
45
|
-
// Guard: enforce HTTP method early.
|
|
46
|
-
if (!ctx.request || ctx.request.method !== "POST") {
|
|
47
|
-
throw new GatewayError("Method Not Allowed", 405);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
// Parse + validate input.
|
|
51
|
-
try {
|
|
52
|
-
ctx.body = await ctx.request.json();
|
|
53
|
-
} catch {
|
|
54
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
55
|
-
}
|
|
56
|
-
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
|
|
57
|
-
addSpanEvent("hebo.request.deserialized");
|
|
58
|
-
|
|
59
|
-
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
60
|
-
if (!parsed.success) {
|
|
61
|
-
// FUTURE: consider adding body shape to metadata
|
|
62
|
-
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
63
|
-
}
|
|
64
|
-
ctx.body = parsed.data;
|
|
65
|
-
addSpanEvent("hebo.request.parsed");
|
|
66
|
-
|
|
67
|
-
if (hooks?.before) {
|
|
68
|
-
ctx.body =
|
|
69
|
-
((await hooks.before(ctx as BeforeHookContext)) as ChatCompletionsBody) ?? ctx.body;
|
|
70
|
-
addSpanEvent("hebo.hooks.before.completed");
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// Resolve model + provider (hooks may override defaults).
|
|
74
|
-
let inputs, stream;
|
|
75
|
-
({ model: ctx.modelId, stream, ...inputs } = ctx.body);
|
|
76
|
-
|
|
77
|
-
ctx.resolvedModelId =
|
|
78
|
-
(await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
|
|
79
|
-
logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
80
|
-
addSpanEvent("hebo.model.resolved");
|
|
81
|
-
|
|
82
|
-
const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
|
|
83
|
-
ctx.provider =
|
|
84
|
-
override ??
|
|
85
|
-
resolveProvider({
|
|
86
|
-
providers: ctx.providers,
|
|
87
|
-
models: ctx.models,
|
|
88
|
-
modelId: ctx.resolvedModelId,
|
|
89
|
-
operation: ctx.operation,
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
93
|
-
ctx.resolvedProviderId = languageModel.provider;
|
|
94
|
-
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
95
|
-
addSpanEvent("hebo.provider.resolved");
|
|
96
|
-
|
|
97
|
-
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
98
|
-
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
99
|
-
setSpanAttributes(genAiGeneralAttrs);
|
|
100
|
-
|
|
101
|
-
// Convert inputs to AI SDK call options.
|
|
102
|
-
const textOptions = convertToTextCallOptions(inputs);
|
|
103
|
-
logger.trace(
|
|
104
|
-
{
|
|
105
|
-
requestId: ctx.requestId,
|
|
106
|
-
options: textOptions,
|
|
107
|
-
},
|
|
108
|
-
"[chat] AI SDK options",
|
|
109
|
-
);
|
|
110
|
-
addSpanEvent("hebo.options.prepared");
|
|
111
|
-
setSpanAttributes(getChatRequestAttributes(ctx.body, genAiSignalLevel));
|
|
112
|
-
|
|
113
|
-
// Build middleware chain (model -> forward params -> provider).
|
|
114
|
-
const languageModelWithMiddleware = wrapLanguageModel({
|
|
115
|
-
model: languageModel,
|
|
116
|
-
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
// Execute request (streaming vs. non-streaming).
|
|
120
|
-
if (stream) {
|
|
121
|
-
addSpanEvent("hebo.ai-sdk.started");
|
|
122
|
-
const result = streamText({
|
|
123
|
-
model: languageModelWithMiddleware,
|
|
124
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
125
|
-
abortSignal: ctx.request.signal,
|
|
126
|
-
timeout: {
|
|
127
|
-
totalMs: 5 * 60 * 1000,
|
|
128
|
-
},
|
|
129
|
-
onAbort: () => {
|
|
130
|
-
throw new DOMException("The operation was aborted.", "AbortError");
|
|
131
|
-
},
|
|
132
|
-
onError: () => {},
|
|
133
|
-
onFinish: (res) => {
|
|
134
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
135
|
-
const streamResult = toChatCompletions(
|
|
136
|
-
res as unknown as GenerateTextResult<ToolSet, Output.Output>,
|
|
137
|
-
ctx.resolvedModelId!,
|
|
138
|
-
);
|
|
139
|
-
logger.trace(
|
|
140
|
-
{ requestId: ctx.requestId, result: streamResult },
|
|
141
|
-
"[chat] ChatCompletions",
|
|
142
|
-
);
|
|
143
|
-
addSpanEvent("hebo.result.transformed");
|
|
144
|
-
|
|
145
|
-
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
146
|
-
setSpanAttributes(genAiResponseAttrs);
|
|
147
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
148
|
-
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
149
|
-
},
|
|
150
|
-
experimental_include: {
|
|
151
|
-
requestBody: false,
|
|
152
|
-
},
|
|
153
|
-
includeRawChunks: false,
|
|
154
|
-
...textOptions,
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
|
|
158
|
-
|
|
159
|
-
if (hooks?.after) {
|
|
160
|
-
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
161
|
-
addSpanEvent("hebo.hooks.after.completed");
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
return ctx.result;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
addSpanEvent("hebo.ai-sdk.started");
|
|
168
|
-
const result = await generateText({
|
|
169
|
-
model: languageModelWithMiddleware,
|
|
170
|
-
headers: prepareForwardHeaders(ctx.request),
|
|
171
|
-
abortSignal: ctx.request.signal,
|
|
172
|
-
timeout: 5 * 60 * 1000,
|
|
173
|
-
experimental_include: {
|
|
174
|
-
requestBody: false,
|
|
175
|
-
responseBody: false,
|
|
176
|
-
},
|
|
177
|
-
...textOptions,
|
|
178
|
-
});
|
|
179
|
-
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
180
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
181
|
-
|
|
182
|
-
// Transform result.
|
|
183
|
-
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
184
|
-
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
|
|
185
|
-
addSpanEvent("hebo.result.transformed");
|
|
186
|
-
|
|
187
|
-
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
188
|
-
setSpanAttributes(genAiResponseAttrs);
|
|
189
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
190
|
-
|
|
191
|
-
if (hooks?.after) {
|
|
192
|
-
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
193
|
-
addSpanEvent("hebo.hooks.after.completed");
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
197
|
-
return ctx.result;
|
|
198
|
-
};
|
|
199
|
-
|
|
200
|
-
return { handler: winterCgHandler(handler, config) };
|
|
201
|
-
};
|