@hebo-ai/gateway 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -4
- package/dist/config.js +14 -0
- package/dist/endpoints/chat-completions/converters.d.ts +1 -1
- package/dist/endpoints/chat-completions/converters.js +4 -3
- package/dist/endpoints/chat-completions/handler.js +14 -11
- package/dist/endpoints/chat-completions/otel.d.ts +1 -1
- package/dist/endpoints/chat-completions/otel.js +4 -1
- package/dist/endpoints/conversations/handler.js +5 -22
- package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -3
- package/dist/endpoints/conversations/storage/dialects/postgres.js +4 -4
- package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -3
- package/dist/endpoints/conversations/storage/sql.d.ts +1 -1
- package/dist/endpoints/conversations/storage/sql.js +8 -10
- package/dist/endpoints/embeddings/handler.js +4 -9
- package/dist/endpoints/embeddings/otel.d.ts +1 -1
- package/dist/endpoints/responses/converters.d.ts +1 -1
- package/dist/endpoints/responses/converters.js +5 -2
- package/dist/endpoints/responses/handler.js +14 -10
- package/dist/endpoints/responses/otel.d.ts +1 -1
- package/dist/endpoints/responses/otel.js +6 -1
- package/dist/endpoints/shared/converters.js +3 -2
- package/dist/errors/utils.d.ts +3 -1
- package/dist/errors/utils.js +2 -0
- package/dist/lifecycle.js +1 -0
- package/dist/logger/default.js +3 -3
- package/dist/models/amazon/middleware.js +2 -1
- package/dist/models/anthropic/middleware.d.ts +1 -1
- package/dist/models/anthropic/middleware.js +1 -0
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +1 -0
- package/dist/models/google/presets.d.ts +412 -0
- package/dist/models/google/presets.js +96 -0
- package/dist/models/openai/middleware.js +2 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +8 -0
- package/dist/providers/bedrock/canonical.js +3 -0
- package/dist/providers/bedrock/middleware.js +4 -8
- package/dist/providers/groq/middleware.js +1 -2
- package/dist/telemetry/gen-ai.d.ts +2 -1
- package/dist/telemetry/gen-ai.js +41 -11
- package/dist/types.d.ts +19 -4
- package/dist/utils/body.d.ts +19 -0
- package/dist/utils/body.js +99 -0
- package/dist/utils/env.js +2 -2
- package/dist/utils/stream.js +1 -1
- package/package.json +31 -31
package/README.md
CHANGED
|
@@ -42,7 +42,7 @@ bun install @hebo-ai/gateway
|
|
|
42
42
|
- Endpoints
|
|
43
43
|
- [/chat/completions](#chatcompletions) | [/embeddings](#embeddings) | [/models](#models) | [/responses](#responses) | [/conversations](#conversations)
|
|
44
44
|
- OpenAI Extensions
|
|
45
|
-
- [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching)
|
|
45
|
+
- [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching) | [Compressed Requests](#compressed-requests)
|
|
46
46
|
- Advanced Usage
|
|
47
47
|
- [Passing Framework State to Hooks](#passing-framework-state-to-hooks) | [Selective Route Mounting](#selective-route-mounting) | [Low-level Schemas & Converters](#low-level-schemas--converters)
|
|
48
48
|
|
|
@@ -792,6 +792,36 @@ Provider behavior:
|
|
|
792
792
|
- **Google Gemini**: maps `cached_content` to Gemini `cachedContent`.
|
|
793
793
|
- **Amazon Nova (Bedrock)**: maps `cache_control` to Bedrock `cachePoints` and inserts an automatic cache point on a stable prefix when none is provided.
|
|
794
794
|
|
|
795
|
+
|
|
796
|
+
### Compressed Requests
|
|
797
|
+
|
|
798
|
+
The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum *decompressed* body size for these compressed requests, protecting against gzip bombs and oversized payloads.
|
|
799
|
+
|
|
800
|
+
```ts
|
|
801
|
+
import { gateway } from "@hebo-ai/gateway";
|
|
802
|
+
|
|
803
|
+
const gw = gateway({
|
|
804
|
+
// ...
|
|
805
|
+
// Maximum decompressed body size in bytes (default: 10 MB).
|
|
806
|
+
// Set to 0 to disable the decompressed size limit.
|
|
807
|
+
maxBodySize: 10 * 1024 * 1024,
|
|
808
|
+
});
|
|
809
|
+
```
|
|
810
|
+
|
|
811
|
+
Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
|
|
812
|
+
|
|
813
|
+
> [!IMPORTANT]
|
|
814
|
+
> **Plain (uncompressed) request body size limits** are *not* enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
|
|
815
|
+
>
|
|
816
|
+
> Framework-level configuration examples:
|
|
817
|
+
>
|
|
818
|
+
> - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
|
|
819
|
+
> - **Elysia** — inherits from Bun's `maxRequestBodySize`
|
|
820
|
+
> - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
|
|
821
|
+
> - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
|
|
822
|
+
> - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
|
|
823
|
+
> - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
|
|
824
|
+
|
|
795
825
|
## 🧪 Advanced Usage
|
|
796
826
|
|
|
797
827
|
### Logger Settings
|
|
@@ -863,19 +893,37 @@ Attribute names and span & metrics semantics follow OpenTelemetry GenAI semantic
|
|
|
863
893
|
https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
|
|
864
894
|
https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
|
|
865
895
|
|
|
896
|
+
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
897
|
+
|
|
898
|
+
#### Custom Telemetry Attributes
|
|
899
|
+
|
|
900
|
+
Use `ctx.otel` in any hook to attach attributes to both spans and metrics:
|
|
901
|
+
|
|
902
|
+
```ts
|
|
903
|
+
hooks: {
|
|
904
|
+
onRequest: (ctx) => {
|
|
905
|
+
ctx.otel["app.tenant.id"] = tenantId;
|
|
906
|
+
ctx.otel["app.user.id"] = userId;
|
|
907
|
+
},
|
|
908
|
+
}
|
|
909
|
+
```
|
|
910
|
+
|
|
911
|
+
These attributes appear on the active span and on all metric instruments (request duration, token usage, TPOT, TTFT).
|
|
912
|
+
|
|
866
913
|
> [!TIP]
|
|
867
914
|
> To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
|
|
868
915
|
> For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
|
|
869
916
|
|
|
870
|
-
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
871
|
-
|
|
872
917
|
#### Metrics
|
|
873
918
|
|
|
874
919
|
The Gateway also emits `gen_ai` metrics:
|
|
875
920
|
|
|
876
921
|
- `gen_ai.server.request.duration` (histogram, seconds)
|
|
877
922
|
- `gen_ai.server.time_per_output_token` (histogram, seconds)
|
|
878
|
-
- `gen_ai.
|
|
923
|
+
- `gen_ai.server.time_to_first_token` (histogram, seconds)
|
|
924
|
+
- `gen_ai.client.token.usage` (histogram, tokens; tagged with `gen_ai.token.type=input|output|cached|reasoning`)
|
|
925
|
+
|
|
926
|
+
Metric names and attributes follow OpenTelemetry GenAI semantic conventions. Histogram bucket boundaries are tuned for practical dashboards and alerting rather than copied verbatim from upstream recommendations.
|
|
879
927
|
|
|
880
928
|
To capture them, configure a global `MeterProvider` before creating the gateway:
|
|
881
929
|
|
|
@@ -1073,3 +1121,33 @@ Non-streaming versions are available via `toChatCompletionsResponse`. Equivalent
|
|
|
1073
1121
|
|
|
1074
1122
|
> [!TIP]
|
|
1075
1123
|
> Since Zod v4.3 you can generate a JSON Schema from any zod object by calling `z.toJSONSchema(...)`. This is useful for producing OpenAPI documentation from the same source of truth.
|
|
1124
|
+
|
|
1125
|
+
|
|
1126
|
+
### Request Body Size
|
|
1127
|
+
|
|
1128
|
+
The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum *decompressed* body size for these compressed requests, protecting against gzip bombs and oversized payloads.
|
|
1129
|
+
|
|
1130
|
+
```ts
|
|
1131
|
+
import { gateway } from "@hebo-ai/gateway";
|
|
1132
|
+
|
|
1133
|
+
const gw = gateway({
|
|
1134
|
+
// ...
|
|
1135
|
+
// Maximum decompressed body size in bytes (default: 10 MB).
|
|
1136
|
+
// Set to 0 to disable the decompressed size limit.
|
|
1137
|
+
maxBodySize: 10 * 1024 * 1024,
|
|
1138
|
+
});
|
|
1139
|
+
```
|
|
1140
|
+
|
|
1141
|
+
Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
|
|
1142
|
+
|
|
1143
|
+
> [!IMPORTANT]
|
|
1144
|
+
> **Plain (uncompressed) request body size limits** are *not* enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
|
|
1145
|
+
>
|
|
1146
|
+
> Framework-level configuration examples:
|
|
1147
|
+
>
|
|
1148
|
+
> - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
|
|
1149
|
+
> - **Elysia** — inherits from Bun's `maxRequestBodySize`
|
|
1150
|
+
> - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
|
|
1151
|
+
> - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
|
|
1152
|
+
> - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
|
|
1153
|
+
> - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
|
package/dist/config.js
CHANGED
|
@@ -3,6 +3,7 @@ import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
|
3
3
|
import { createDefaultLogger } from "./logger/default";
|
|
4
4
|
import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
|
|
5
5
|
import { DEFAULT_CHAT_TIMEOUT_MS, kParsed, } from "./types";
|
|
6
|
+
import { DEFAULT_MAX_BODY_SIZE } from "./utils/body";
|
|
6
7
|
export const parseConfig = (config) => {
|
|
7
8
|
// If it has been parsed before, just return.
|
|
8
9
|
if (kParsed in config)
|
|
@@ -94,10 +95,23 @@ export const parseConfig = (config) => {
|
|
|
94
95
|
flex = t.flex;
|
|
95
96
|
}
|
|
96
97
|
const parsedTimeouts = { normal, flex };
|
|
98
|
+
// Body size limit
|
|
99
|
+
const rawMax = config.maxBodySize;
|
|
100
|
+
let maxBodySize;
|
|
101
|
+
if (typeof rawMax === "number" && Number.isFinite(rawMax) && rawMax >= 0) {
|
|
102
|
+
maxBodySize = rawMax;
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
maxBodySize = DEFAULT_MAX_BODY_SIZE;
|
|
106
|
+
if (rawMax !== undefined) {
|
|
107
|
+
logger.warn(`[config] invalid maxBodySize (${rawMax}), using default ${DEFAULT_MAX_BODY_SIZE}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
97
110
|
// Return parsed config.
|
|
98
111
|
return {
|
|
99
112
|
...config,
|
|
100
113
|
timeouts: parsedTimeouts,
|
|
114
|
+
maxBodySize,
|
|
101
115
|
telemetry: {
|
|
102
116
|
...config.telemetry,
|
|
103
117
|
enabled: telemetryEnabled,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import type { SharedV3ProviderMetadata } from "@ai-sdk/provider";
|
|
2
2
|
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
3
|
import { Output } from "ai";
|
|
4
|
-
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
5
4
|
import type { SseErrorFrame, SseFrame } from "../../utils/stream";
|
|
6
5
|
import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
|
|
6
|
+
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
7
7
|
export declare function convertToTextCallOptions(params: ChatCompletionsInputs): TextCallOptions;
|
|
8
8
|
export declare function convertToModelMessages(messages: ChatCompletionsMessage[]): ModelMessage[];
|
|
9
9
|
export declare function fromChatCompletionsUserMessage(message: ChatCompletionsUserMessage): UserModelMessage;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Output, jsonSchema, tool } from "ai";
|
|
2
|
+
import { GatewayError } from "../../errors/gateway";
|
|
2
3
|
import { toResponse } from "../../utils/response";
|
|
3
4
|
import { parseJsonOrText, parseReasoningOptions, parsePromptCachingOptions, resolveResponseServiceTier, normalizeToolName, stripEmptyKeys, parseBase64, parseImageInput, extractReasoningMetadata, } from "../shared/converters";
|
|
4
5
|
// --- Request Flow ---
|
|
@@ -32,7 +33,7 @@ export function convertToTextCallOptions(params) {
|
|
|
32
33
|
}
|
|
33
34
|
function convertToOutput(responseFormat) {
|
|
34
35
|
if (!responseFormat || responseFormat.type === "text") {
|
|
35
|
-
return;
|
|
36
|
+
return undefined;
|
|
36
37
|
}
|
|
37
38
|
const { name, description, schema } = responseFormat.json_schema;
|
|
38
39
|
return Output.object({
|
|
@@ -207,7 +208,7 @@ export function fromChatCompletionsContent(content) {
|
|
|
207
208
|
return out;
|
|
208
209
|
}
|
|
209
210
|
default:
|
|
210
|
-
throw new
|
|
211
|
+
throw new GatewayError(`Unsupported content part type: ${part.type}`, 400);
|
|
211
212
|
}
|
|
212
213
|
});
|
|
213
214
|
}
|
|
@@ -257,7 +258,7 @@ function fromFilePart(base64Data, mediaType, filename, cacheControl) {
|
|
|
257
258
|
}
|
|
258
259
|
export const convertToToolSet = (tools) => {
|
|
259
260
|
if (!tools) {
|
|
260
|
-
return;
|
|
261
|
+
return undefined;
|
|
261
262
|
}
|
|
262
263
|
const toolSet = {};
|
|
263
264
|
for (const t of tools) {
|
|
@@ -5,8 +5,9 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
8
|
+
import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTimeToFirstToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
+
import { parseRequestBody } from "../../utils/body";
|
|
10
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
12
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
12
13
|
import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
|
|
@@ -22,14 +23,8 @@ export const chatCompletions = (config) => {
|
|
|
22
23
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
23
24
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
25
|
}
|
|
25
|
-
// Parse + validate input.
|
|
26
|
-
|
|
27
|
-
// oxlint-disable-next-line no-unsafe-assignment
|
|
28
|
-
ctx.body = await ctx.request.json();
|
|
29
|
-
}
|
|
30
|
-
catch {
|
|
31
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
32
|
-
}
|
|
26
|
+
// Parse + validate input (handles Content-Encoding decompression + body size limits).
|
|
27
|
+
ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
|
|
33
28
|
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
|
|
34
29
|
addSpanEvent("hebo.request.deserialized");
|
|
35
30
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
@@ -83,6 +78,7 @@ export const chatCompletions = (config) => {
|
|
|
83
78
|
// Execute request (streaming vs. non-streaming).
|
|
84
79
|
if (stream) {
|
|
85
80
|
addSpanEvent("hebo.ai-sdk.started");
|
|
81
|
+
let ttft = 0;
|
|
86
82
|
const result = streamText({
|
|
87
83
|
model: languageModelWithMiddleware,
|
|
88
84
|
headers: prepareForwardHeaders(ctx.request),
|
|
@@ -94,6 +90,12 @@ export const chatCompletions = (config) => {
|
|
|
94
90
|
throw new DOMException("The operation was aborted.", "AbortError");
|
|
95
91
|
},
|
|
96
92
|
onError: () => { },
|
|
93
|
+
onChunk: () => {
|
|
94
|
+
if (!ttft) {
|
|
95
|
+
ttft = performance.now() - start;
|
|
96
|
+
recordTimeToFirstToken(ttft, genAiGeneralAttrs, genAiSignalLevel);
|
|
97
|
+
}
|
|
98
|
+
},
|
|
97
99
|
onFinish: (res) => {
|
|
98
100
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
99
101
|
const streamResult = toChatCompletions(res, ctx.resolvedModelId);
|
|
@@ -102,7 +104,7 @@ export const chatCompletions = (config) => {
|
|
|
102
104
|
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
103
105
|
setSpanAttributes(genAiResponseAttrs);
|
|
104
106
|
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
105
|
-
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
107
|
+
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
106
108
|
},
|
|
107
109
|
experimental_include: {
|
|
108
110
|
requestBody: false,
|
|
@@ -131,6 +133,7 @@ export const chatCompletions = (config) => {
|
|
|
131
133
|
});
|
|
132
134
|
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
133
135
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
136
|
+
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
134
137
|
// Transform result.
|
|
135
138
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
136
139
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
|
|
@@ -142,7 +145,7 @@ export const chatCompletions = (config) => {
|
|
|
142
145
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
143
146
|
addSpanEvent("hebo.hooks.after.completed");
|
|
144
147
|
}
|
|
145
|
-
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
148
|
+
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
146
149
|
return ctx.result;
|
|
147
150
|
};
|
|
148
151
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
-
import type { ChatCompletions, ChatCompletionsBody } from "./schema";
|
|
3
2
|
import { type TelemetrySignalLevel } from "../../types";
|
|
3
|
+
import type { ChatCompletions, ChatCompletionsBody } from "./schema";
|
|
4
4
|
export declare const getChatRequestAttributes: (body: ChatCompletionsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
5
|
export declare const getChatResponseAttributes: (completions: ChatCompletions, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -79,6 +79,9 @@ const toUserParts = (content) => {
|
|
|
79
79
|
parts.push(filePart);
|
|
80
80
|
break;
|
|
81
81
|
}
|
|
82
|
+
default:
|
|
83
|
+
parts.push({ type: part.type, content: "[UNHANDLED_CONTENT_PART]" });
|
|
84
|
+
break;
|
|
82
85
|
}
|
|
83
86
|
}
|
|
84
87
|
return parts;
|
|
@@ -96,7 +99,7 @@ const toMessageParts = (message) => {
|
|
|
96
99
|
case "system":
|
|
97
100
|
return toTextParts(message.content);
|
|
98
101
|
default:
|
|
99
|
-
|
|
102
|
+
return [{ type: message.role, content: "[UNHANDLED_ROLE]" }];
|
|
100
103
|
}
|
|
101
104
|
};
|
|
102
105
|
export const getChatRequestAttributes = (body, signalLevel) => {
|
|
@@ -4,8 +4,9 @@ import { GatewayError } from "../../errors/gateway";
|
|
|
4
4
|
import { winterCgHandler } from "../../lifecycle";
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { addSpanEvent } from "../../telemetry/span";
|
|
7
|
-
import {
|
|
7
|
+
import { parseRequestBody } from "../../utils/body";
|
|
8
8
|
import { toConversation, toConversationItem, toConversationDeleted } from "./converters";
|
|
9
|
+
import { ConversationCreateParamsSchema, ConversationItemsAddBodySchema, ConversationUpdateBodySchema, ConversationItemListParamsSchema, ConversationListParamsSchema, } from "./schema";
|
|
9
10
|
export const conversations = (config) => {
|
|
10
11
|
const parsedConfig = parseConfig(config);
|
|
11
12
|
const storage = parsedConfig.storage;
|
|
@@ -42,13 +43,7 @@ export const conversations = (config) => {
|
|
|
42
43
|
};
|
|
43
44
|
}
|
|
44
45
|
async function create(ctx) {
|
|
45
|
-
|
|
46
|
-
try {
|
|
47
|
-
body = await ctx.request.json();
|
|
48
|
-
}
|
|
49
|
-
catch {
|
|
50
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
51
|
-
}
|
|
46
|
+
const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
|
|
52
47
|
addSpanEvent("hebo.request.deserialized");
|
|
53
48
|
const parsed = ConversationCreateParamsSchema.safeParse(body);
|
|
54
49
|
if (!parsed.success) {
|
|
@@ -72,13 +67,7 @@ export const conversations = (config) => {
|
|
|
72
67
|
return toConversation(entity);
|
|
73
68
|
}
|
|
74
69
|
async function update(ctx, conversationId) {
|
|
75
|
-
|
|
76
|
-
try {
|
|
77
|
-
body = await ctx.request.json();
|
|
78
|
-
}
|
|
79
|
-
catch {
|
|
80
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
81
|
-
}
|
|
70
|
+
const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
|
|
82
71
|
addSpanEvent("hebo.request.deserialized");
|
|
83
72
|
const parsed = ConversationUpdateBodySchema.safeParse(body);
|
|
84
73
|
if (!parsed.success) {
|
|
@@ -150,13 +139,7 @@ export const conversations = (config) => {
|
|
|
150
139
|
};
|
|
151
140
|
}
|
|
152
141
|
async function addItems(ctx, conversationId) {
|
|
153
|
-
|
|
154
|
-
try {
|
|
155
|
-
body = await ctx.request.json();
|
|
156
|
-
}
|
|
157
|
-
catch {
|
|
158
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
159
|
-
}
|
|
142
|
+
const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
|
|
160
143
|
addSpanEvent("hebo.request.deserialized");
|
|
161
144
|
const parsed = ConversationItemsAddBodySchema.safeParse(body);
|
|
162
145
|
if (!parsed.success) {
|
|
@@ -36,7 +36,7 @@ function createMysql2Executor(pool) {
|
|
|
36
36
|
async run(sql, params) {
|
|
37
37
|
const [res] = await pool.execute(sql, mapParams(params));
|
|
38
38
|
const header = res;
|
|
39
|
-
return { changes:
|
|
39
|
+
return { changes: header.affectedRows ?? 0 };
|
|
40
40
|
},
|
|
41
41
|
async transaction(fn) {
|
|
42
42
|
const conn = await pool.getConnection();
|
|
@@ -53,7 +53,7 @@ function createMysql2Executor(pool) {
|
|
|
53
53
|
async run(sql, params) {
|
|
54
54
|
const [res] = await conn.execute(sql, mapParams(params));
|
|
55
55
|
const header = res;
|
|
56
|
-
return { changes:
|
|
56
|
+
return { changes: header.affectedRows ?? 0 };
|
|
57
57
|
},
|
|
58
58
|
transaction(txCallback) {
|
|
59
59
|
return txCallback(txExecutor);
|
|
@@ -87,7 +87,7 @@ function createBunMysqlExecutor(sql) {
|
|
|
87
87
|
async run(query, params) {
|
|
88
88
|
const res = (await sql.unsafe(query, mapParams(params)));
|
|
89
89
|
const result = res;
|
|
90
|
-
return { changes:
|
|
90
|
+
return { changes: result.affectedRows ?? result.count ?? 0 };
|
|
91
91
|
},
|
|
92
92
|
transaction(fn) {
|
|
93
93
|
return sql.transaction((tx) => {
|
|
@@ -59,7 +59,7 @@ function createPgExecutor(pool, mapParams) {
|
|
|
59
59
|
async run(sql, params) {
|
|
60
60
|
const p = mapParams(params);
|
|
61
61
|
const res = await pool.query(getQuery(sql, p?.length > 0 ? p : undefined));
|
|
62
|
-
return { changes:
|
|
62
|
+
return { changes: res.rowCount ?? 0 };
|
|
63
63
|
},
|
|
64
64
|
async transaction(fn) {
|
|
65
65
|
const client = await pool.connect();
|
|
@@ -78,7 +78,7 @@ function createPgExecutor(pool, mapParams) {
|
|
|
78
78
|
async run(sql, params) {
|
|
79
79
|
const p = mapParams(params);
|
|
80
80
|
const res = await client.query(getQuery(sql, p?.length > 0 ? p : undefined));
|
|
81
|
-
return { changes:
|
|
81
|
+
return { changes: res.rowCount ?? 0 };
|
|
82
82
|
},
|
|
83
83
|
transaction(txCallback) {
|
|
84
84
|
return txCallback(txExecutor);
|
|
@@ -115,7 +115,7 @@ function createPostgresJsExecutor(sql, mapParams) {
|
|
|
115
115
|
const p = mapParams(params);
|
|
116
116
|
const res = await sql.unsafe(query, (p?.length > 0 ? p : undefined), { prepare: true });
|
|
117
117
|
const result = res;
|
|
118
|
-
return { changes:
|
|
118
|
+
return { changes: result.count ?? 0 };
|
|
119
119
|
},
|
|
120
120
|
async transaction(fn) {
|
|
121
121
|
return (await sql.begin((tx) => {
|
|
@@ -151,7 +151,7 @@ function createBunPostgresExecutor(sql, mapParams) {
|
|
|
151
151
|
if (!isNaN(parsed))
|
|
152
152
|
changes = parsed;
|
|
153
153
|
}
|
|
154
|
-
return { changes
|
|
154
|
+
return { changes };
|
|
155
155
|
},
|
|
156
156
|
transaction(fn) {
|
|
157
157
|
return sql.transaction((tx) => {
|
|
@@ -97,7 +97,7 @@ function createLibsqlExecutor(client) {
|
|
|
97
97
|
},
|
|
98
98
|
async run(sql, params) {
|
|
99
99
|
const rs = await client.execute({ sql, args: mapParams(params) ?? [] });
|
|
100
|
-
return { changes:
|
|
100
|
+
return { changes: rs.rowsAffected };
|
|
101
101
|
},
|
|
102
102
|
async transaction(fn) {
|
|
103
103
|
const tx = await client.transaction("deferred");
|
|
@@ -112,7 +112,7 @@ function createLibsqlExecutor(client) {
|
|
|
112
112
|
},
|
|
113
113
|
async run(sql, params) {
|
|
114
114
|
const rs = await tx.execute({ sql, args: mapParams(params) ?? [] });
|
|
115
|
-
return { changes:
|
|
115
|
+
return { changes: rs.rowsAffected };
|
|
116
116
|
},
|
|
117
117
|
transaction(txCallback) {
|
|
118
118
|
return txCallback(txExecutor);
|
|
@@ -143,7 +143,7 @@ function createBunSqliteExecutor(sql) {
|
|
|
143
143
|
async run(query, params) {
|
|
144
144
|
const res = (await sql.unsafe(query, mapParams(params)));
|
|
145
145
|
const result = res;
|
|
146
|
-
return { changes:
|
|
146
|
+
return { changes: result.affectedRows ?? result.count ?? 0 };
|
|
147
147
|
},
|
|
148
148
|
transaction(fn) {
|
|
149
149
|
return sql.transaction((tx) => {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { ConversationStorage, ConversationEntity, ConversationItemEntity, ConversationMetadata, ConversationItemInput, ConversationQueryOptions } from "./types";
|
|
2
1
|
import type { SqlDialect } from "./dialects/types";
|
|
2
|
+
import type { ConversationStorage, ConversationEntity, ConversationItemEntity, ConversationMetadata, ConversationItemInput, ConversationQueryOptions } from "./types";
|
|
3
3
|
export declare class SqlStorage implements ConversationStorage {
|
|
4
4
|
readonly dialect: SqlDialect;
|
|
5
5
|
constructor(options: SqlDialect | {
|
|
@@ -138,14 +138,13 @@ export class SqlStorage {
|
|
|
138
138
|
args.push(after);
|
|
139
139
|
}
|
|
140
140
|
sqlParts.push(`ORDER BY c.${q("created_at")} ${dir}, c.${q("id")} ${dir}`);
|
|
141
|
-
|
|
142
|
-
if (!isNaN(limitVal)) {
|
|
141
|
+
if (!Number.isNaN(limit)) {
|
|
143
142
|
if (limitAsLiteral) {
|
|
144
|
-
sqlParts.push(`LIMIT ${
|
|
143
|
+
sqlParts.push(`LIMIT ${limit}`);
|
|
145
144
|
}
|
|
146
145
|
else {
|
|
147
146
|
sqlParts.push(`LIMIT ${p(nextIdx++)}`);
|
|
148
|
-
args.push(
|
|
147
|
+
args.push(limit);
|
|
149
148
|
}
|
|
150
149
|
}
|
|
151
150
|
const query = sqlParts.join(" ");
|
|
@@ -168,7 +167,7 @@ export class SqlStorage {
|
|
|
168
167
|
// to deduplicate the row.
|
|
169
168
|
const conversation = await this.getConversationInternal(id, tx);
|
|
170
169
|
if (!conversation)
|
|
171
|
-
return;
|
|
170
|
+
return conversation;
|
|
172
171
|
const createdAt = conversation.created_at;
|
|
173
172
|
const pk = ["id"];
|
|
174
173
|
const updateCols = ["metadata"];
|
|
@@ -195,7 +194,7 @@ export class SqlStorage {
|
|
|
195
194
|
if (!skipCheck) {
|
|
196
195
|
const conversation = await this.getConversationInternal(conversationId, tx);
|
|
197
196
|
if (!conversation)
|
|
198
|
-
return;
|
|
197
|
+
return conversation;
|
|
199
198
|
}
|
|
200
199
|
const { placeholder: p, quote: q } = this.config;
|
|
201
200
|
const columns = ["id", "conversation_id", "type", "data", "created_at"];
|
|
@@ -253,14 +252,13 @@ export class SqlStorage {
|
|
|
253
252
|
args.push(after, conversationId);
|
|
254
253
|
}
|
|
255
254
|
sqlParts.push(`ORDER BY c.${q("created_at")} ${dir}, c.${q("id")} ${dir}`);
|
|
256
|
-
|
|
257
|
-
if (!isNaN(limitVal)) {
|
|
255
|
+
if (!Number.isNaN(limit)) {
|
|
258
256
|
if (limitAsLiteral) {
|
|
259
|
-
sqlParts.push(`LIMIT ${
|
|
257
|
+
sqlParts.push(`LIMIT ${limit}`);
|
|
260
258
|
}
|
|
261
259
|
else {
|
|
262
260
|
sqlParts.push(`LIMIT ${p(nextIdx++)}`);
|
|
263
|
-
args.push(
|
|
261
|
+
args.push(limit);
|
|
264
262
|
}
|
|
265
263
|
}
|
|
266
264
|
const query = sqlParts.join(" ");
|
|
@@ -7,6 +7,7 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
8
|
import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
+
import { parseRequestBody } from "../../utils/body";
|
|
10
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
12
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
12
13
|
import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from "./otel";
|
|
@@ -22,14 +23,8 @@ export const embeddings = (config) => {
|
|
|
22
23
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
23
24
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
25
|
}
|
|
25
|
-
// Parse + validate input.
|
|
26
|
-
|
|
27
|
-
// oxlint-disable-next-line no-unsafe-assignment
|
|
28
|
-
ctx.body = await ctx.request.json();
|
|
29
|
-
}
|
|
30
|
-
catch {
|
|
31
|
-
throw new GatewayError("Invalid JSON", 400);
|
|
32
|
-
}
|
|
26
|
+
// Parse + validate input (handles Content-Encoding decompression + body size limits).
|
|
27
|
+
ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
|
|
33
28
|
logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
|
|
34
29
|
addSpanEvent("hebo.request.deserialized");
|
|
35
30
|
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
@@ -97,7 +92,7 @@ export const embeddings = (config) => {
|
|
|
97
92
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
98
93
|
addSpanEvent("hebo.hooks.after.completed");
|
|
99
94
|
}
|
|
100
|
-
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
95
|
+
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
101
96
|
return ctx.result;
|
|
102
97
|
};
|
|
103
98
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
-
import type { Embeddings, EmbeddingsBody } from "./schema";
|
|
3
2
|
import { type TelemetrySignalLevel } from "../../types";
|
|
3
|
+
import type { Embeddings, EmbeddingsBody } from "./schema";
|
|
4
4
|
export declare const getEmbeddingsRequestAttributes: (body: EmbeddingsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
5
|
export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { GenerateTextResult, StreamTextResult, ToolSet, ModelMessage, LanguageModelUsage, TextStreamPart } from "ai";
|
|
2
2
|
import { Output } from "ai";
|
|
3
|
-
import type { ResponsesInputItem, ResponsesInputs, Responses, ResponsesUsage, ResponsesStream, ResponsesStreamEvent, ResponsesToolChoice, ResponsesTool } from "./schema";
|
|
4
3
|
import type { SseErrorFrame } from "../../utils/stream";
|
|
5
4
|
import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
|
|
5
|
+
import type { ResponsesInputItem, ResponsesInputs, Responses, ResponsesUsage, ResponsesStream, ResponsesStreamEvent, ResponsesToolChoice, ResponsesTool } from "./schema";
|
|
6
6
|
export declare function convertToTextCallOptions(params: ResponsesInputs): TextCallOptions;
|
|
7
7
|
export declare function convertToModelMessages(input: string | ResponsesInputItem[], instructions?: string): ModelMessage[];
|
|
8
8
|
export declare const convertToToolSet: (tools: ResponsesTool[] | undefined) => ToolSet | undefined;
|
|
@@ -43,7 +43,7 @@ export function convertToTextCallOptions(params) {
|
|
|
43
43
|
function convertToOutput(text) {
|
|
44
44
|
if (!text?.format || text.format.type === "text") {
|
|
45
45
|
// FUTURE: Support text.verbosity when AI SDK adds top-level support
|
|
46
|
-
return;
|
|
46
|
+
return undefined;
|
|
47
47
|
}
|
|
48
48
|
const { name, description, schema } = text.format;
|
|
49
49
|
return Output.object({
|
|
@@ -147,6 +147,7 @@ function fromMessageItem(item) {
|
|
|
147
147
|
case "assistant":
|
|
148
148
|
return fromAssistantMessageItem(item);
|
|
149
149
|
}
|
|
150
|
+
throw new GatewayError("Unsupported message role", 400);
|
|
150
151
|
}
|
|
151
152
|
function fromUserMessageItem(item) {
|
|
152
153
|
const out = {
|
|
@@ -236,6 +237,8 @@ function fromInputContent(content) {
|
|
|
236
237
|
result.push(out);
|
|
237
238
|
break;
|
|
238
239
|
}
|
|
240
|
+
default:
|
|
241
|
+
throw new GatewayError(`Unsupported content part type: ${part.type}`, 400);
|
|
239
242
|
}
|
|
240
243
|
}
|
|
241
244
|
return result;
|
|
@@ -359,7 +362,7 @@ function fromFunctionCallOutputItem(item, toolOutputByCallId) {
|
|
|
359
362
|
}
|
|
360
363
|
export const convertToToolSet = (tools) => {
|
|
361
364
|
if (!tools) {
|
|
362
|
-
return;
|
|
365
|
+
return undefined;
|
|
363
366
|
}
|
|
364
367
|
const toolSet = {};
|
|
365
368
|
for (const t of tools) {
|