@hebo-ai/gateway 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +82 -4
  2. package/dist/config.js +14 -0
  3. package/dist/endpoints/chat-completions/converters.d.ts +1 -1
  4. package/dist/endpoints/chat-completions/converters.js +4 -3
  5. package/dist/endpoints/chat-completions/handler.js +14 -11
  6. package/dist/endpoints/chat-completions/otel.d.ts +1 -1
  7. package/dist/endpoints/chat-completions/otel.js +4 -1
  8. package/dist/endpoints/conversations/handler.js +5 -22
  9. package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -3
  10. package/dist/endpoints/conversations/storage/dialects/postgres.js +4 -4
  11. package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -3
  12. package/dist/endpoints/conversations/storage/sql.d.ts +1 -1
  13. package/dist/endpoints/conversations/storage/sql.js +8 -10
  14. package/dist/endpoints/embeddings/handler.js +4 -9
  15. package/dist/endpoints/embeddings/otel.d.ts +1 -1
  16. package/dist/endpoints/responses/converters.d.ts +1 -1
  17. package/dist/endpoints/responses/converters.js +5 -2
  18. package/dist/endpoints/responses/handler.js +14 -10
  19. package/dist/endpoints/responses/otel.d.ts +1 -1
  20. package/dist/endpoints/responses/otel.js +6 -1
  21. package/dist/endpoints/shared/converters.js +3 -2
  22. package/dist/errors/utils.d.ts +3 -1
  23. package/dist/errors/utils.js +2 -0
  24. package/dist/lifecycle.js +1 -0
  25. package/dist/logger/default.js +3 -3
  26. package/dist/models/amazon/middleware.js +2 -1
  27. package/dist/models/anthropic/middleware.d.ts +1 -1
  28. package/dist/models/anthropic/middleware.js +1 -0
  29. package/dist/models/google/middleware.d.ts +1 -1
  30. package/dist/models/google/middleware.js +1 -0
  31. package/dist/models/google/presets.d.ts +412 -0
  32. package/dist/models/google/presets.js +96 -0
  33. package/dist/models/openai/middleware.js +2 -1
  34. package/dist/models/types.d.ts +1 -1
  35. package/dist/models/types.js +8 -0
  36. package/dist/providers/bedrock/canonical.js +3 -0
  37. package/dist/providers/bedrock/middleware.js +4 -8
  38. package/dist/providers/groq/middleware.js +1 -2
  39. package/dist/telemetry/gen-ai.d.ts +2 -1
  40. package/dist/telemetry/gen-ai.js +41 -11
  41. package/dist/types.d.ts +19 -4
  42. package/dist/utils/body.d.ts +19 -0
  43. package/dist/utils/body.js +99 -0
  44. package/dist/utils/env.js +2 -2
  45. package/dist/utils/stream.js +1 -1
  46. package/package.json +31 -31
package/README.md CHANGED
@@ -42,7 +42,7 @@ bun install @hebo-ai/gateway
42
42
  - Endpoints
43
43
  - [/chat/completions](#chatcompletions) | [/embeddings](#embeddings) | [/models](#models) | [/responses](#responses) | [/conversations](#conversations)
44
44
  - OpenAI Extensions
45
- - [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching)
45
+ - [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching) | [Compressed Requests](#compressed-requests)
46
46
  - Advanced Usage
47
47
  - [Passing Framework State to Hooks](#passing-framework-state-to-hooks) | [Selective Route Mounting](#selective-route-mounting) | [Low-level Schemas & Converters](#low-level-schemas--converters)
48
48
 
@@ -792,6 +792,36 @@ Provider behavior:
792
792
  - **Google Gemini**: maps `cached_content` to Gemini `cachedContent`.
793
793
  - **Amazon Nova (Bedrock)**: maps `cache_control` to Bedrock `cachePoints` and inserts an automatic cache point on a stable prefix when none is provided.
794
794
 
795
+
796
+ ### Compressed Requests
797
+
798
+ The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum *decompressed* body size for these compressed requests, protecting against gzip bombs and oversized payloads.
799
+
800
+ ```ts
801
+ import { gateway } from "@hebo-ai/gateway";
802
+
803
+ const gw = gateway({
804
+ // ...
805
+ // Maximum decompressed body size in bytes (default: 10 MB).
806
+ // Set to 0 to disable the decompressed size limit.
807
+ maxBodySize: 10 * 1024 * 1024,
808
+ });
809
+ ```
810
+
811
+ Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
812
+
813
+ > [!IMPORTANT]
814
+ > **Plain (uncompressed) request body size limits** are *not* enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
815
+ >
816
+ > Framework-level configuration examples:
817
+ >
818
+ > - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
819
+ > - **Elysia** — inherits from Bun's `maxRequestBodySize`
820
+ > - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
821
+ > - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
822
+ > - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
823
+ > - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
824
+
795
825
  ## 🧪 Advanced Usage
796
826
 
797
827
  ### Logger Settings
@@ -863,19 +893,37 @@ Attribute names and span & metrics semantics follow OpenTelemetry GenAI semantic
863
893
  https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
864
894
  https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/
865
895
 
896
+ For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
897
+
898
+ #### Custom Telemetry Attributes
899
+
900
+ Use `ctx.otel` in any hook to attach attributes to both spans and metrics:
901
+
902
+ ```ts
903
+ hooks: {
904
+ onRequest: (ctx) => {
905
+ ctx.otel["app.tenant.id"] = tenantId;
906
+ ctx.otel["app.user.id"] = userId;
907
+ },
908
+ }
909
+ ```
910
+
911
+ These attributes appear on the active span and on all metric instruments (request duration, token usage, TPOT, TTFT).
912
+
866
913
  > [!TIP]
867
914
  > To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
868
915
  > For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
869
916
 
870
- For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
871
-
872
917
  #### Metrics
873
918
 
874
919
  The Gateway also emits `gen_ai` metrics:
875
920
 
876
921
  - `gen_ai.server.request.duration` (histogram, seconds)
877
922
  - `gen_ai.server.time_per_output_token` (histogram, seconds)
878
- - `gen_ai.client.token.usage` (histogram, tokens; tagged with `gen_ai.token.type=input|output`)
923
+ - `gen_ai.server.time_to_first_token` (histogram, seconds)
924
+ - `gen_ai.client.token.usage` (histogram, tokens; tagged with `gen_ai.token.type=input|output|cached|reasoning`)
925
+
926
+ Metric names and attributes follow OpenTelemetry GenAI semantic conventions. Histogram bucket boundaries are tuned for practical dashboards and alerting rather than copied verbatim from upstream recommendations.
879
927
 
880
928
  To capture them, configure a global `MeterProvider` before creating the gateway:
881
929
 
@@ -1073,3 +1121,33 @@ Non-streaming versions are available via `toChatCompletionsResponse`. Equivalent
1073
1121
 
1074
1122
  > [!TIP]
1075
1123
  > Since Zod v4.3 you can generate a JSON Schema from any zod object by calling `z.toJSONSchema(...)`. This is useful for producing OpenAPI documentation from the same source of truth.
1124
+
1125
+
1126
+ ### Request Body Size
1127
+
1128
+ The gateway supports gzip and deflate compressed request bodies via the Web Compression Streams API. The `maxBodySize` option controls the maximum *decompressed* body size for these compressed requests, protecting against gzip bombs and oversized payloads.
1129
+
1130
+ ```ts
1131
+ import { gateway } from "@hebo-ai/gateway";
1132
+
1133
+ const gw = gateway({
1134
+ // ...
1135
+ // Maximum decompressed body size in bytes (default: 10 MB).
1136
+ // Set to 0 to disable the decompressed size limit.
1137
+ maxBodySize: 10 * 1024 * 1024,
1138
+ });
1139
+ ```
1140
+
1141
+ Compressed requests that exceed this limit after decompression receive an HTTP `413 Payload Too Large` response. Unsupported `Content-Encoding` values return HTTP `415 Unsupported Media Type`.
1142
+
1143
+ > [!IMPORTANT]
1144
+ > **Plain (uncompressed) request body size limits** are *not* enforced by the gateway — they should be configured at the framework or server level. The gateway only enforces `maxBodySize` on decompressed output, since the framework cannot know the decompressed size ahead of time.
1145
+ >
1146
+ > Framework-level configuration examples:
1147
+ >
1148
+ > - **Bun** — [`Bun.serve({ maxRequestBodySize: 10_485_760 })`](https://bun.sh/docs/api/http#bun-serve)
1149
+ > - **Elysia** — inherits from Bun's `maxRequestBodySize`
1150
+ > - **Hono** — [`bodyLimit` middleware](https://hono.dev/docs/middleware/builtin/body-limit): `app.use(bodyLimit({ maxSize: 10 * 1024 * 1024 }))`
1151
+ > - **Express** — [`express.json({ limit: '10mb' })`](https://expressjs.com/en/api.html#express.json)
1152
+ > - **Fastify** — [`fastify({ bodyLimit: 10485760 })`](https://fastify.dev/docs/latest/Reference/Server/#bodylimit)
1153
+ > - **Node.js `http`** — [`server.maxRequestSize`](https://nodejs.org/api/http.html) (v22.6+), or use a reverse proxy like nginx (`client_max_body_size 10m`)
package/dist/config.js CHANGED
@@ -3,6 +3,7 @@ import { isLogger, logger, setLoggerInstance } from "./logger";
3
3
  import { createDefaultLogger } from "./logger/default";
4
4
  import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
5
5
  import { DEFAULT_CHAT_TIMEOUT_MS, kParsed, } from "./types";
6
+ import { DEFAULT_MAX_BODY_SIZE } from "./utils/body";
6
7
  export const parseConfig = (config) => {
7
8
  // If it has been parsed before, just return.
8
9
  if (kParsed in config)
@@ -94,10 +95,23 @@ export const parseConfig = (config) => {
94
95
  flex = t.flex;
95
96
  }
96
97
  const parsedTimeouts = { normal, flex };
98
+ // Body size limit
99
+ const rawMax = config.maxBodySize;
100
+ let maxBodySize;
101
+ if (typeof rawMax === "number" && Number.isFinite(rawMax) && rawMax >= 0) {
102
+ maxBodySize = rawMax;
103
+ }
104
+ else {
105
+ maxBodySize = DEFAULT_MAX_BODY_SIZE;
106
+ if (rawMax !== undefined) {
107
+ logger.warn(`[config] invalid maxBodySize (${rawMax}), using default ${DEFAULT_MAX_BODY_SIZE}`);
108
+ }
109
+ }
97
110
  // Return parsed config.
98
111
  return {
99
112
  ...config,
100
113
  timeouts: parsedTimeouts,
114
+ maxBodySize,
101
115
  telemetry: {
102
116
  ...config.telemetry,
103
117
  enabled: telemetryEnabled,
@@ -1,9 +1,9 @@
1
1
  import type { SharedV3ProviderMetadata } from "@ai-sdk/provider";
2
2
  import type { GenerateTextResult, StreamTextResult, FinishReason, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
3
3
  import { Output } from "ai";
4
- import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
5
4
  import type { SseErrorFrame, SseFrame } from "../../utils/stream";
6
5
  import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
6
+ import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
7
7
  export declare function convertToTextCallOptions(params: ChatCompletionsInputs): TextCallOptions;
8
8
  export declare function convertToModelMessages(messages: ChatCompletionsMessage[]): ModelMessage[];
9
9
  export declare function fromChatCompletionsUserMessage(message: ChatCompletionsUserMessage): UserModelMessage;
@@ -1,4 +1,5 @@
1
1
  import { Output, jsonSchema, tool } from "ai";
2
+ import { GatewayError } from "../../errors/gateway";
2
3
  import { toResponse } from "../../utils/response";
3
4
  import { parseJsonOrText, parseReasoningOptions, parsePromptCachingOptions, resolveResponseServiceTier, normalizeToolName, stripEmptyKeys, parseBase64, parseImageInput, extractReasoningMetadata, } from "../shared/converters";
4
5
  // --- Request Flow ---
@@ -32,7 +33,7 @@ export function convertToTextCallOptions(params) {
32
33
  }
33
34
  function convertToOutput(responseFormat) {
34
35
  if (!responseFormat || responseFormat.type === "text") {
35
- return;
36
+ return undefined;
36
37
  }
37
38
  const { name, description, schema } = responseFormat.json_schema;
38
39
  return Output.object({
@@ -207,7 +208,7 @@ export function fromChatCompletionsContent(content) {
207
208
  return out;
208
209
  }
209
210
  default:
210
- throw new Error(`Unhandled content part type: ${part.type}`);
211
+ throw new GatewayError(`Unsupported content part type: ${part.type}`, 400);
211
212
  }
212
213
  });
213
214
  }
@@ -257,7 +258,7 @@ function fromFilePart(base64Data, mediaType, filename, cacheControl) {
257
258
  }
258
259
  export const convertToToolSet = (tools) => {
259
260
  if (!tools) {
260
- return;
261
+ return undefined;
261
262
  }
262
263
  const toolSet = {};
263
264
  for (const t of tools) {
@@ -5,8 +5,9 @@ import { winterCgHandler } from "../../lifecycle";
5
5
  import { logger } from "../../logger";
6
6
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
- import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
8
+ import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTimeToFirstToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
9
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
+ import { parseRequestBody } from "../../utils/body";
10
11
  import { prepareForwardHeaders } from "../../utils/request";
11
12
  import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
12
13
  import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
@@ -22,14 +23,8 @@ export const chatCompletions = (config) => {
22
23
  if (!ctx.request || ctx.request.method !== "POST") {
23
24
  throw new GatewayError("Method Not Allowed", 405);
24
25
  }
25
- // Parse + validate input.
26
- try {
27
- // oxlint-disable-next-line no-unsafe-assignment
28
- ctx.body = await ctx.request.json();
29
- }
30
- catch {
31
- throw new GatewayError("Invalid JSON", 400);
32
- }
26
+ // Parse + validate input (handles Content-Encoding decompression + body size limits).
27
+ ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
33
28
  logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[chat] ChatCompletionsBody");
34
29
  addSpanEvent("hebo.request.deserialized");
35
30
  const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
@@ -83,6 +78,7 @@ export const chatCompletions = (config) => {
83
78
  // Execute request (streaming vs. non-streaming).
84
79
  if (stream) {
85
80
  addSpanEvent("hebo.ai-sdk.started");
81
+ let ttft = 0;
86
82
  const result = streamText({
87
83
  model: languageModelWithMiddleware,
88
84
  headers: prepareForwardHeaders(ctx.request),
@@ -94,6 +90,12 @@ export const chatCompletions = (config) => {
94
90
  throw new DOMException("The operation was aborted.", "AbortError");
95
91
  },
96
92
  onError: () => { },
93
+ onChunk: () => {
94
+ if (!ttft) {
95
+ ttft = performance.now() - start;
96
+ recordTimeToFirstToken(ttft, genAiGeneralAttrs, genAiSignalLevel);
97
+ }
98
+ },
97
99
  onFinish: (res) => {
98
100
  addSpanEvent("hebo.ai-sdk.completed");
99
101
  const streamResult = toChatCompletions(res, ctx.resolvedModelId);
@@ -102,7 +104,7 @@ export const chatCompletions = (config) => {
102
104
  const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
103
105
  setSpanAttributes(genAiResponseAttrs);
104
106
  recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
105
- recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
107
+ recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
106
108
  },
107
109
  experimental_include: {
108
110
  requestBody: false,
@@ -131,6 +133,7 @@ export const chatCompletions = (config) => {
131
133
  });
132
134
  logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
133
135
  addSpanEvent("hebo.ai-sdk.completed");
136
+ recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
134
137
  // Transform result.
135
138
  ctx.result = toChatCompletions(result, ctx.resolvedModelId);
136
139
  logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
@@ -142,7 +145,7 @@ export const chatCompletions = (config) => {
142
145
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
143
146
  addSpanEvent("hebo.hooks.after.completed");
144
147
  }
145
- recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
148
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
146
149
  return ctx.result;
147
150
  };
148
151
  return { handler: winterCgHandler(handler, config) };
@@ -1,5 +1,5 @@
1
1
  import type { Attributes } from "@opentelemetry/api";
2
- import type { ChatCompletions, ChatCompletionsBody } from "./schema";
3
2
  import { type TelemetrySignalLevel } from "../../types";
3
+ import type { ChatCompletions, ChatCompletionsBody } from "./schema";
4
4
  export declare const getChatRequestAttributes: (body: ChatCompletionsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
5
5
  export declare const getChatResponseAttributes: (completions: ChatCompletions, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -79,6 +79,9 @@ const toUserParts = (content) => {
79
79
  parts.push(filePart);
80
80
  break;
81
81
  }
82
+ default:
83
+ parts.push({ type: part.type, content: "[UNHANDLED_CONTENT_PART]" });
84
+ break;
82
85
  }
83
86
  }
84
87
  return parts;
@@ -96,7 +99,7 @@ const toMessageParts = (message) => {
96
99
  case "system":
97
100
  return toTextParts(message.content);
98
101
  default:
99
- throw new Error(`Unhandled content part type: ${message.role}`);
102
+ return [{ type: message.role, content: "[UNHANDLED_ROLE]" }];
100
103
  }
101
104
  };
102
105
  export const getChatRequestAttributes = (body, signalLevel) => {
@@ -4,8 +4,9 @@ import { GatewayError } from "../../errors/gateway";
4
4
  import { winterCgHandler } from "../../lifecycle";
5
5
  import { logger } from "../../logger";
6
6
  import { addSpanEvent } from "../../telemetry/span";
7
- import { ConversationCreateParamsSchema, ConversationItemsAddBodySchema, ConversationUpdateBodySchema, ConversationItemListParamsSchema, ConversationListParamsSchema, } from "./schema";
7
+ import { parseRequestBody } from "../../utils/body";
8
8
  import { toConversation, toConversationItem, toConversationDeleted } from "./converters";
9
+ import { ConversationCreateParamsSchema, ConversationItemsAddBodySchema, ConversationUpdateBodySchema, ConversationItemListParamsSchema, ConversationListParamsSchema, } from "./schema";
9
10
  export const conversations = (config) => {
10
11
  const parsedConfig = parseConfig(config);
11
12
  const storage = parsedConfig.storage;
@@ -42,13 +43,7 @@ export const conversations = (config) => {
42
43
  };
43
44
  }
44
45
  async function create(ctx) {
45
- let body;
46
- try {
47
- body = await ctx.request.json();
48
- }
49
- catch {
50
- throw new GatewayError("Invalid JSON", 400);
51
- }
46
+ const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
52
47
  addSpanEvent("hebo.request.deserialized");
53
48
  const parsed = ConversationCreateParamsSchema.safeParse(body);
54
49
  if (!parsed.success) {
@@ -72,13 +67,7 @@ export const conversations = (config) => {
72
67
  return toConversation(entity);
73
68
  }
74
69
  async function update(ctx, conversationId) {
75
- let body;
76
- try {
77
- body = await ctx.request.json();
78
- }
79
- catch {
80
- throw new GatewayError("Invalid JSON", 400);
81
- }
70
+ const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
82
71
  addSpanEvent("hebo.request.deserialized");
83
72
  const parsed = ConversationUpdateBodySchema.safeParse(body);
84
73
  if (!parsed.success) {
@@ -150,13 +139,7 @@ export const conversations = (config) => {
150
139
  };
151
140
  }
152
141
  async function addItems(ctx, conversationId) {
153
- let body;
154
- try {
155
- body = await ctx.request.json();
156
- }
157
- catch {
158
- throw new GatewayError("Invalid JSON", 400);
159
- }
142
+ const body = await parseRequestBody(ctx.request, parsedConfig.maxBodySize);
160
143
  addSpanEvent("hebo.request.deserialized");
161
144
  const parsed = ConversationItemsAddBodySchema.safeParse(body);
162
145
  if (!parsed.success) {
@@ -36,7 +36,7 @@ function createMysql2Executor(pool) {
36
36
  async run(sql, params) {
37
37
  const [res] = await pool.execute(sql, mapParams(params));
38
38
  const header = res;
39
- return { changes: Number(header.affectedRows ?? 0) };
39
+ return { changes: header.affectedRows ?? 0 };
40
40
  },
41
41
  async transaction(fn) {
42
42
  const conn = await pool.getConnection();
@@ -53,7 +53,7 @@ function createMysql2Executor(pool) {
53
53
  async run(sql, params) {
54
54
  const [res] = await conn.execute(sql, mapParams(params));
55
55
  const header = res;
56
- return { changes: Number(header.affectedRows ?? 0) };
56
+ return { changes: header.affectedRows ?? 0 };
57
57
  },
58
58
  transaction(txCallback) {
59
59
  return txCallback(txExecutor);
@@ -87,7 +87,7 @@ function createBunMysqlExecutor(sql) {
87
87
  async run(query, params) {
88
88
  const res = (await sql.unsafe(query, mapParams(params)));
89
89
  const result = res;
90
- return { changes: Number(result.affectedRows ?? result.count ?? 0) };
90
+ return { changes: result.affectedRows ?? result.count ?? 0 };
91
91
  },
92
92
  transaction(fn) {
93
93
  return sql.transaction((tx) => {
@@ -59,7 +59,7 @@ function createPgExecutor(pool, mapParams) {
59
59
  async run(sql, params) {
60
60
  const p = mapParams(params);
61
61
  const res = await pool.query(getQuery(sql, p?.length > 0 ? p : undefined));
62
- return { changes: Number(res.rowCount ?? 0) };
62
+ return { changes: res.rowCount ?? 0 };
63
63
  },
64
64
  async transaction(fn) {
65
65
  const client = await pool.connect();
@@ -78,7 +78,7 @@ function createPgExecutor(pool, mapParams) {
78
78
  async run(sql, params) {
79
79
  const p = mapParams(params);
80
80
  const res = await client.query(getQuery(sql, p?.length > 0 ? p : undefined));
81
- return { changes: Number(res.rowCount ?? 0) };
81
+ return { changes: res.rowCount ?? 0 };
82
82
  },
83
83
  transaction(txCallback) {
84
84
  return txCallback(txExecutor);
@@ -115,7 +115,7 @@ function createPostgresJsExecutor(sql, mapParams) {
115
115
  const p = mapParams(params);
116
116
  const res = await sql.unsafe(query, (p?.length > 0 ? p : undefined), { prepare: true });
117
117
  const result = res;
118
- return { changes: Number(result.count ?? 0) };
118
+ return { changes: result.count ?? 0 };
119
119
  },
120
120
  async transaction(fn) {
121
121
  return (await sql.begin((tx) => {
@@ -151,7 +151,7 @@ function createBunPostgresExecutor(sql, mapParams) {
151
151
  if (!isNaN(parsed))
152
152
  changes = parsed;
153
153
  }
154
- return { changes: Number(changes) };
154
+ return { changes };
155
155
  },
156
156
  transaction(fn) {
157
157
  return sql.transaction((tx) => {
@@ -97,7 +97,7 @@ function createLibsqlExecutor(client) {
97
97
  },
98
98
  async run(sql, params) {
99
99
  const rs = await client.execute({ sql, args: mapParams(params) ?? [] });
100
- return { changes: Number(rs.rowsAffected) };
100
+ return { changes: rs.rowsAffected };
101
101
  },
102
102
  async transaction(fn) {
103
103
  const tx = await client.transaction("deferred");
@@ -112,7 +112,7 @@ function createLibsqlExecutor(client) {
112
112
  },
113
113
  async run(sql, params) {
114
114
  const rs = await tx.execute({ sql, args: mapParams(params) ?? [] });
115
- return { changes: Number(rs.rowsAffected) };
115
+ return { changes: rs.rowsAffected };
116
116
  },
117
117
  transaction(txCallback) {
118
118
  return txCallback(txExecutor);
@@ -143,7 +143,7 @@ function createBunSqliteExecutor(sql) {
143
143
  async run(query, params) {
144
144
  const res = (await sql.unsafe(query, mapParams(params)));
145
145
  const result = res;
146
- return { changes: Number(result.affectedRows ?? result.count ?? 0) };
146
+ return { changes: result.affectedRows ?? result.count ?? 0 };
147
147
  },
148
148
  transaction(fn) {
149
149
  return sql.transaction((tx) => {
@@ -1,5 +1,5 @@
1
- import type { ConversationStorage, ConversationEntity, ConversationItemEntity, ConversationMetadata, ConversationItemInput, ConversationQueryOptions } from "./types";
2
1
  import type { SqlDialect } from "./dialects/types";
2
+ import type { ConversationStorage, ConversationEntity, ConversationItemEntity, ConversationMetadata, ConversationItemInput, ConversationQueryOptions } from "./types";
3
3
  export declare class SqlStorage implements ConversationStorage {
4
4
  readonly dialect: SqlDialect;
5
5
  constructor(options: SqlDialect | {
@@ -138,14 +138,13 @@ export class SqlStorage {
138
138
  args.push(after);
139
139
  }
140
140
  sqlParts.push(`ORDER BY c.${q("created_at")} ${dir}, c.${q("id")} ${dir}`);
141
- const limitVal = Number(limit);
142
- if (!isNaN(limitVal)) {
141
+ if (!Number.isNaN(limit)) {
143
142
  if (limitAsLiteral) {
144
- sqlParts.push(`LIMIT ${limitVal}`);
143
+ sqlParts.push(`LIMIT ${limit}`);
145
144
  }
146
145
  else {
147
146
  sqlParts.push(`LIMIT ${p(nextIdx++)}`);
148
- args.push(limitVal);
147
+ args.push(limit);
149
148
  }
150
149
  }
151
150
  const query = sqlParts.join(" ");
@@ -168,7 +167,7 @@ export class SqlStorage {
168
167
  // to deduplicate the row.
169
168
  const conversation = await this.getConversationInternal(id, tx);
170
169
  if (!conversation)
171
- return;
170
+ return conversation;
172
171
  const createdAt = conversation.created_at;
173
172
  const pk = ["id"];
174
173
  const updateCols = ["metadata"];
@@ -195,7 +194,7 @@ export class SqlStorage {
195
194
  if (!skipCheck) {
196
195
  const conversation = await this.getConversationInternal(conversationId, tx);
197
196
  if (!conversation)
198
- return;
197
+ return conversation;
199
198
  }
200
199
  const { placeholder: p, quote: q } = this.config;
201
200
  const columns = ["id", "conversation_id", "type", "data", "created_at"];
@@ -253,14 +252,13 @@ export class SqlStorage {
253
252
  args.push(after, conversationId);
254
253
  }
255
254
  sqlParts.push(`ORDER BY c.${q("created_at")} ${dir}, c.${q("id")} ${dir}`);
256
- const limitVal = Number(limit);
257
- if (!isNaN(limitVal)) {
255
+ if (!Number.isNaN(limit)) {
258
256
  if (limitAsLiteral) {
259
- sqlParts.push(`LIMIT ${limitVal}`);
257
+ sqlParts.push(`LIMIT ${limit}`);
260
258
  }
261
259
  else {
262
260
  sqlParts.push(`LIMIT ${p(nextIdx++)}`);
263
- args.push(limitVal);
261
+ args.push(limit);
264
262
  }
265
263
  }
266
264
  const query = sqlParts.join(" ");
@@ -7,6 +7,7 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
8
  import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
9
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
+ import { parseRequestBody } from "../../utils/body";
10
11
  import { prepareForwardHeaders } from "../../utils/request";
11
12
  import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
12
13
  import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from "./otel";
@@ -22,14 +23,8 @@ export const embeddings = (config) => {
22
23
  if (!ctx.request || ctx.request.method !== "POST") {
23
24
  throw new GatewayError("Method Not Allowed", 405);
24
25
  }
25
- // Parse + validate input.
26
- try {
27
- // oxlint-disable-next-line no-unsafe-assignment
28
- ctx.body = await ctx.request.json();
29
- }
30
- catch {
31
- throw new GatewayError("Invalid JSON", 400);
32
- }
26
+ // Parse + validate input (handles Content-Encoding decompression + body size limits).
27
+ ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
33
28
  logger.trace({ requestId: ctx.requestId, result: ctx.body }, "[chat] EmbeddingsBody");
34
29
  addSpanEvent("hebo.request.deserialized");
35
30
  const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
@@ -97,7 +92,7 @@ export const embeddings = (config) => {
97
92
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
98
93
  addSpanEvent("hebo.hooks.after.completed");
99
94
  }
100
- recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
95
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
101
96
  return ctx.result;
102
97
  };
103
98
  return { handler: winterCgHandler(handler, config) };
@@ -1,5 +1,5 @@
1
1
  import type { Attributes } from "@opentelemetry/api";
2
- import type { Embeddings, EmbeddingsBody } from "./schema";
3
2
  import { type TelemetrySignalLevel } from "../../types";
3
+ import type { Embeddings, EmbeddingsBody } from "./schema";
4
4
  export declare const getEmbeddingsRequestAttributes: (body: EmbeddingsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
5
5
  export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -1,8 +1,8 @@
1
1
  import type { GenerateTextResult, StreamTextResult, ToolSet, ModelMessage, LanguageModelUsage, TextStreamPart } from "ai";
2
2
  import { Output } from "ai";
3
- import type { ResponsesInputItem, ResponsesInputs, Responses, ResponsesUsage, ResponsesStream, ResponsesStreamEvent, ResponsesToolChoice, ResponsesTool } from "./schema";
4
3
  import type { SseErrorFrame } from "../../utils/stream";
5
4
  import { type TextCallOptions, type ToolChoiceOptions } from "../shared/converters";
5
+ import type { ResponsesInputItem, ResponsesInputs, Responses, ResponsesUsage, ResponsesStream, ResponsesStreamEvent, ResponsesToolChoice, ResponsesTool } from "./schema";
6
6
  export declare function convertToTextCallOptions(params: ResponsesInputs): TextCallOptions;
7
7
  export declare function convertToModelMessages(input: string | ResponsesInputItem[], instructions?: string): ModelMessage[];
8
8
  export declare const convertToToolSet: (tools: ResponsesTool[] | undefined) => ToolSet | undefined;
@@ -43,7 +43,7 @@ export function convertToTextCallOptions(params) {
43
43
  function convertToOutput(text) {
44
44
  if (!text?.format || text.format.type === "text") {
45
45
  // FUTURE: Support text.verbosity when AI SDK adds top-level support
46
- return;
46
+ return undefined;
47
47
  }
48
48
  const { name, description, schema } = text.format;
49
49
  return Output.object({
@@ -147,6 +147,7 @@ function fromMessageItem(item) {
147
147
  case "assistant":
148
148
  return fromAssistantMessageItem(item);
149
149
  }
150
+ throw new GatewayError("Unsupported message role", 400);
150
151
  }
151
152
  function fromUserMessageItem(item) {
152
153
  const out = {
@@ -236,6 +237,8 @@ function fromInputContent(content) {
236
237
  result.push(out);
237
238
  break;
238
239
  }
240
+ default:
241
+ throw new GatewayError(`Unsupported content part type: ${part.type}`, 400);
239
242
  }
240
243
  }
241
244
  return result;
@@ -359,7 +362,7 @@ function fromFunctionCallOutputItem(item, toolOutputByCallId) {
359
362
  }
360
363
  export const convertToToolSet = (tools) => {
361
364
  if (!tools) {
362
- return;
365
+ return undefined;
363
366
  }
364
367
  const toolSet = {};
365
368
  for (const t of tools) {