@hebo-ai/gateway 0.8.2 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +131 -32
  2. package/dist/endpoints/chat-completions/converters.d.ts +4 -21
  3. package/dist/endpoints/chat-completions/converters.js +23 -160
  4. package/dist/endpoints/chat-completions/handler.js +2 -2
  5. package/dist/endpoints/chat-completions/schema.d.ts +45 -101
  6. package/dist/endpoints/chat-completions/schema.js +13 -69
  7. package/dist/endpoints/conversations/converters.js +2 -3
  8. package/dist/endpoints/conversations/schema.d.ts +506 -644
  9. package/dist/endpoints/conversations/schema.js +8 -159
  10. package/dist/endpoints/conversations/storage/dialects/greptime.js +20 -6
  11. package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -1
  12. package/dist/endpoints/conversations/storage/dialects/postgres.js +6 -3
  13. package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -1
  14. package/dist/endpoints/conversations/storage/sql.js +11 -6
  15. package/dist/endpoints/embeddings/handler.js +1 -1
  16. package/dist/endpoints/responses/converters.d.ts +17 -0
  17. package/dist/endpoints/responses/converters.js +1034 -0
  18. package/dist/endpoints/responses/handler.d.ts +2 -0
  19. package/dist/endpoints/responses/handler.js +137 -0
  20. package/dist/endpoints/responses/index.d.ts +4 -0
  21. package/dist/endpoints/responses/index.js +4 -0
  22. package/dist/endpoints/responses/otel.d.ts +6 -0
  23. package/dist/endpoints/responses/otel.js +221 -0
  24. package/dist/endpoints/responses/schema.d.ts +2109 -0
  25. package/dist/endpoints/responses/schema.js +314 -0
  26. package/dist/endpoints/shared/converters.d.ts +56 -0
  27. package/dist/endpoints/shared/converters.js +179 -0
  28. package/dist/endpoints/shared/schema.d.ts +70 -0
  29. package/dist/endpoints/shared/schema.js +46 -0
  30. package/dist/gateway.d.ts +1 -0
  31. package/dist/gateway.js +2 -0
  32. package/dist/index.d.ts +0 -4
  33. package/dist/index.js +0 -4
  34. package/dist/lifecycle.js +46 -29
  35. package/dist/models/anthropic/middleware.d.ts +1 -1
  36. package/dist/models/anthropic/presets.js +6 -1
  37. package/dist/models/google/middleware.d.ts +1 -1
  38. package/dist/models/google/middleware.js +9 -3
  39. package/dist/models/meta/presets.js +12 -2
  40. package/dist/providers/registry.d.ts +1 -1
  41. package/dist/types.d.ts +18 -6
  42. package/dist/utils/env.js +1 -1
  43. package/dist/utils/preset.js +0 -1
  44. package/package.json +8 -4
package/dist/index.d.ts CHANGED
@@ -5,10 +5,6 @@ export * from "./errors/openai";
5
5
  export * from "./logger";
6
6
  export * from "./middleware/common";
7
7
  export * from "./middleware/matcher";
8
- export * from "./endpoints/chat-completions";
9
- export * from "./endpoints/conversations";
10
- export * from "./endpoints/embeddings";
11
- export * from "./endpoints/models";
12
8
  export * from "./models/catalog";
13
9
  export * from "./models/types";
14
10
  export * from "./providers/registry";
package/dist/index.js CHANGED
@@ -4,10 +4,6 @@ export * from "./errors/openai";
4
4
  export * from "./logger";
5
5
  export * from "./middleware/common";
6
6
  export * from "./middleware/matcher";
7
- export * from "./endpoints/chat-completions";
8
- export * from "./endpoints/conversations";
9
- export * from "./endpoints/embeddings";
10
- export * from "./endpoints/models";
11
7
  export * from "./models/catalog";
12
8
  export * from "./models/types";
13
9
  export * from "./providers/registry";
package/dist/lifecycle.js CHANGED
@@ -54,43 +54,60 @@ export const winterCgHandler = (run, config) => {
54
54
  span.recordError(reason);
55
55
  }
56
56
  span.setAttributes({ "http.response.status_code_effective": realStatus });
57
- if (ctx.operation === "chat" || ctx.operation === "embeddings") {
57
+ if (ctx.operation === "chat" ||
58
+ ctx.operation === "embeddings" ||
59
+ ctx.operation === "responses") {
58
60
  recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
59
61
  }
60
62
  span.finish();
61
63
  };
62
- try {
63
- if (parsedConfig.hooks?.onRequest) {
64
- const onRequest = await parsedConfig.hooks.onRequest(ctx);
65
- addSpanEvent("hebo.hooks.on_request.completed");
66
- if (onRequest instanceof Response) {
67
- ctx.response = onRequest;
64
+ await span.runWithContext(async () => {
65
+ try {
66
+ if (parsedConfig.hooks?.onRequest) {
67
+ const onRequest = await parsedConfig.hooks.onRequest(ctx);
68
+ addSpanEvent("hebo.hooks.on_request.completed");
69
+ if (onRequest instanceof Response) {
70
+ ctx.response = onRequest;
71
+ }
68
72
  }
69
- }
70
- if (!ctx.response) {
71
- ctx.result = (await span.runWithContext(() => run(ctx, parsedConfig)));
72
- ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId), {
73
- onDone: finalize,
74
- });
75
- }
76
- if (parsedConfig.hooks?.onResponse) {
77
- const onResponse = await parsedConfig.hooks.onResponse(ctx);
78
- addSpanEvent("hebo.hooks.on_response.completed");
79
- if (onResponse) {
80
- ctx.response = onResponse;
73
+ if (!ctx.response) {
74
+ ctx.result = (await run(ctx, parsedConfig));
75
+ ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId), {
76
+ onDone: finalize,
77
+ });
78
+ }
79
+ if (parsedConfig.hooks?.onResponse) {
80
+ const onResponse = await parsedConfig.hooks.onResponse(ctx);
81
+ addSpanEvent("hebo.hooks.on_response.completed");
82
+ if (onResponse) {
83
+ ctx.response = onResponse;
84
+ }
85
+ }
86
+ // FUTURE: this can leak if onResponse removed wrapper from response.body
87
+ if (!(ctx.result instanceof ReadableStream)) {
88
+ finalize(ctx.response.status);
81
89
  }
82
90
  }
83
- // FUTURE: this can leak if onResponse removed wrapper from response.body
84
- if (!(ctx.result instanceof ReadableStream)) {
85
- finalize(ctx.response.status);
91
+ catch (error) {
92
+ if (parsedConfig.hooks?.onError) {
93
+ try {
94
+ ctx.error = error;
95
+ const onError = await parsedConfig.hooks.onError(ctx);
96
+ addSpanEvent("hebo.hooks.on_error.completed");
97
+ if (onError) {
98
+ ctx.response = onError;
99
+ }
100
+ }
101
+ catch {
102
+ logger.debug("[lifecycle] onError hook threw");
103
+ }
104
+ }
105
+ ctx.response ??= toOpenAIErrorResponse(ctx.request.signal.aborted
106
+ ? new GatewayError(error ?? ctx.request.signal.reason, 499)
107
+ : error, prepareResponseInit(ctx.requestId));
108
+ finalize(ctx.response.status, error);
86
109
  }
87
- }
88
- catch (error) {
89
- ctx.response = toOpenAIErrorResponse(ctx.request.signal.aborted
90
- ? new GatewayError(error ?? ctx.request.signal.reason, 499)
91
- : error, prepareResponseInit(ctx.requestId));
92
- finalize(ctx.response.status, error);
93
- }
110
+ });
94
111
  return ctx.response ?? new Response("Internal Server Error", { status: 500 });
95
112
  };
96
113
  };
@@ -1,5 +1,5 @@
1
1
  import type { LanguageModelMiddleware } from "ai";
2
2
  import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
3
- export declare function mapClaudeReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "medium" | "max";
3
+ export declare function mapClaudeReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "medium" | "high" | "max";
4
4
  export declare const claudeReasoningMiddleware: LanguageModelMiddleware;
5
5
  export declare const claudePromptCachingMiddleware: LanguageModelMiddleware;
@@ -6,7 +6,12 @@ const CLAUDE_BASE = {
6
6
  },
7
7
  capabilities: ["attachments", "tool_call", "structured_output", "temperature"],
8
8
  context: 200000,
9
- providers: ["anthropic", "bedrock", "vertex", "azure"],
9
+ providers: [
10
+ "anthropic",
11
+ "bedrock",
12
+ "vertex",
13
+ "azure",
14
+ ],
10
15
  };
11
16
  const CLAUDE_PDF_MODALITIES = {
12
17
  modalities: {
@@ -1,7 +1,7 @@
1
1
  import type { EmbeddingModelMiddleware, LanguageModelMiddleware } from "ai";
2
2
  import type { ChatCompletionsReasoningEffort } from "../../endpoints/chat-completions/schema";
3
3
  export declare const geminiDimensionsMiddleware: EmbeddingModelMiddleware;
4
- export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "low" | "high" | "minimal" | "medium";
4
+ export declare function mapGeminiReasoningEffort(effort: ChatCompletionsReasoningEffort, modelId: string): "minimal" | "low" | "medium" | "high";
5
5
  export declare const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
6
6
  export declare const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
7
7
  export declare const geminiReasoningMiddleware: LanguageModelMiddleware;
@@ -71,9 +71,15 @@ export const geminiReasoningMiddleware = {
71
71
  };
72
72
  }
73
73
  else if (modelId.includes("gemini-3") && reasoning.effort) {
74
- target.thinkingConfig = {
75
- thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
76
- };
74
+ if (reasoning.effort === "none") {
75
+ // thinkingBudget: 0 fully disables thinking (thinkingLevel: "minimal" still allows some)
76
+ target.thinkingConfig = { thinkingBudget: 0 };
77
+ }
78
+ else {
79
+ target.thinkingConfig = {
80
+ thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
81
+ };
82
+ }
77
83
  // FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
78
84
  }
79
85
  const thinkingConfig = (target.thinkingConfig ??= {});
@@ -6,7 +6,12 @@ const LLAMA_3_BASE = {
6
6
  },
7
7
  capabilities: ["attachments", "tool_call", "temperature"],
8
8
  context: 128000,
9
- providers: ["groq", "bedrock", "vertex", "azure"],
9
+ providers: [
10
+ "groq",
11
+ "bedrock",
12
+ "vertex",
13
+ "azure",
14
+ ],
10
15
  };
11
16
  export const llama31_8b = presetFor()("meta/llama-3.1-8b", {
12
17
  ...LLAMA_3_BASE,
@@ -63,7 +68,12 @@ const LLAMA_4_BASE = {
63
68
  },
64
69
  capabilities: ["attachments", "tool_call", "temperature"],
65
70
  context: 1000000,
66
- providers: ["groq", "bedrock", "vertex", "azure"],
71
+ providers: [
72
+ "groq",
73
+ "bedrock",
74
+ "vertex",
75
+ "azure",
76
+ ],
67
77
  };
68
78
  export const llama4Scout = presetFor()("meta/llama-4-scout", {
69
79
  ...LLAMA_4_BASE,
@@ -5,7 +5,7 @@ export declare const resolveProvider: (args: {
5
5
  providers: ProviderRegistry;
6
6
  models: ModelCatalog;
7
7
  modelId: ModelId;
8
- operation: "chat" | "embeddings";
8
+ operation: "chat" | "embeddings" | "responses";
9
9
  }) => ProviderV3;
10
10
  export type CanonicalIdsOptions = {
11
11
  mapping?: Partial<Record<ModelId, string>>;
package/dist/types.d.ts CHANGED
@@ -3,10 +3,12 @@ import type { Tracer } from "@opentelemetry/api";
3
3
  import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsStream } from "./endpoints/chat-completions/schema";
4
4
  import type { ConversationStorage } from "./endpoints/conversations/storage/types";
5
5
  import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
6
+ import type { Responses, ResponsesBody, ResponsesStream } from "./endpoints/responses/schema";
6
7
  import type { Model, ModelList } from "./endpoints/models";
7
8
  import type { Logger, LoggerConfig } from "./logger";
8
9
  import type { ModelCatalog, ModelId } from "./models/types";
9
10
  import type { ProviderId, ProviderRegistry } from "./providers/types";
11
+ export type GatewayOperation = "chat" | "embeddings" | "responses" | "models" | "conversations";
10
12
  /**
11
13
  * Per-request context shared across handlers and hooks.
12
14
  */
@@ -34,7 +36,7 @@ export type GatewayContext = {
34
36
  /**
35
37
  * Parsed body from the request.
36
38
  */
37
- body?: ChatCompletionsBody | EmbeddingsBody;
39
+ body?: ChatCompletionsBody | EmbeddingsBody | ResponsesBody;
38
40
  /**
39
41
  * Incoming model ID.
40
42
  */
@@ -46,7 +48,7 @@ export type GatewayContext = {
46
48
  /**
47
49
  * Operation type.
48
50
  */
49
- operation?: "chat" | "embeddings" | "models" | "conversations";
51
+ operation?: GatewayOperation;
50
52
  /**
51
53
  * Resolved provider instance.
52
54
  */
@@ -58,11 +60,15 @@ export type GatewayContext = {
58
60
  /**
59
61
  * Result returned by the handler (pre-response).
60
62
  */
61
- result?: ChatCompletions | ChatCompletionsStream | Embeddings | Model | ModelList;
63
+ result?: ChatCompletions | ChatCompletionsStream | Embeddings | Model | ModelList | Responses | ResponsesStream;
62
64
  /**
63
65
  * Response object returned by the handler.
64
66
  */
65
67
  response?: Response;
68
+ /**
69
+ * Error thrown during execution.
70
+ */
71
+ error?: unknown;
66
72
  };
67
73
  /**
68
74
  * Hook context: all fields readonly except `state`.
@@ -77,6 +83,7 @@ export type ResolveModelHookContext = RequiredHookContext<"request" | "operation
77
83
  export type ResolveProviderHookContext = RequiredHookContext<"request" | "operation" | "body" | "modelId" | "resolvedModelId">;
78
84
  export type AfterHookContext = RequiredHookContext<"request" | "operation" | "body" | "modelId" | "resolvedModelId" | "provider" | "resolvedProviderId" | "result">;
79
85
  export type OnResponseHookContext = RequiredHookContext<"request" | "response">;
86
+ export type OnErrorHookContext = RequiredHookContext<"error">;
80
87
  /**
81
88
  * Hooks to plugin to the gateway lifecycle.
82
89
  */
@@ -87,10 +94,10 @@ export type GatewayHooks = {
87
94
  */
88
95
  onRequest?: (ctx: OnRequestHookContext) => void | Response | Promise<void | Response>;
89
96
  /**
90
- * Runs after request JSON is parsed and validated for chat completions / embeddings.
97
+ * Runs after request JSON is parsed and validated for chat completions / embeddings / responses.
91
98
  * @returns Replacement parsed body, or undefined to keep original.
92
99
  */
93
- before?: (ctx: BeforeHookContext) => void | ChatCompletionsBody | EmbeddingsBody | Promise<void | ChatCompletionsBody | EmbeddingsBody>;
100
+ before?: (ctx: BeforeHookContext) => void | ChatCompletionsBody | EmbeddingsBody | ResponsesBody | Promise<void | ChatCompletionsBody | EmbeddingsBody | ResponsesBody>;
94
101
  /**
95
102
  * Maps a user-provided model ID or alias to a canonical ID.
96
103
  * @returns Canonical model ID or undefined to keep original.
@@ -105,12 +112,17 @@ export type GatewayHooks = {
105
112
  * Runs after the endpoint handler.
106
113
  * @returns Result to replace, or undefined to keep original.
107
114
  */
108
- after?: (ctx: AfterHookContext) => void | ChatCompletions | ChatCompletionsStream | Embeddings | Promise<void | ChatCompletions | ChatCompletionsStream | Embeddings>;
115
+ after?: (ctx: AfterHookContext) => void | ChatCompletions | ChatCompletionsStream | Embeddings | Model | ModelList | Responses | ResponsesStream | Promise<void | ChatCompletions | ChatCompletionsStream | Embeddings | Model | ModelList | Responses | ResponsesStream>;
109
116
  /**
110
117
  * Runs after the lifecycle has produced the final Response.
111
118
  * @returns Replacement Response, or undefined to keep original.
112
119
  */
113
120
  onResponse?: (ctx: OnResponseHookContext) => void | Response | Promise<void | Response>;
121
+ /**
122
+ * Runs when the lifecycle catches an error.
123
+ * @returns Optional Response to replace the default error response.
124
+ */
125
+ onError?: (ctx: OnErrorHookContext) => void | Response | Promise<void | Response>;
114
126
  };
115
127
  export type TelemetrySignalLevel = "off" | "required" | "recommended" | "full";
116
128
  export declare const DEFAULT_CHAT_TIMEOUT_MS: number;
package/dist/utils/env.js CHANGED
@@ -2,6 +2,6 @@ const NODE_ENV = typeof process === "undefined"
2
2
  ? // oxlint-disable-next-line no-unsafe-member-access
3
3
  (globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
4
4
  : // oxlint-disable-next-line no-unsafe-assignment
5
- process.env?.NODE_ENV;
5
+ process.env?.["NODE_ENV"];
6
6
  export const isProduction = () => NODE_ENV === "production";
7
7
  export const isTest = () => NODE_ENV === "test";
@@ -1,7 +1,6 @@
1
1
  function isPlainObject(v) {
2
2
  if (!v || typeof v !== "object" || Array.isArray(v))
3
3
  return false;
4
- // oxlint-disable-next-line no-unsafe-assignment
5
4
  const proto = Object.getPrototypeOf(v);
6
5
  return proto === Object.prototype || proto === null;
7
6
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.8.2",
3
+ "version": "0.9.1",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -66,6 +66,10 @@
66
66
  "types": "./dist/endpoints/models/index.d.ts",
67
67
  "import": "./dist/endpoints/models/index.js"
68
68
  },
69
+ "./endpoints/responses": {
70
+ "types": "./dist/endpoints/responses/index.d.ts",
71
+ "import": "./dist/endpoints/responses/index.js"
72
+ },
69
73
  "./storage/memory": {
70
74
  "types": "./dist/endpoints/conversations/storage/memory.d.ts",
71
75
  "import": "./dist/endpoints/conversations/storage/memory.js",
@@ -156,7 +160,7 @@
156
160
  "typecheck": "oxlint --type-check",
157
161
  "test": "bun test",
158
162
  "check": "bun lint && bun typecheck",
159
- "fix": "bun lint:staged && bun fmt:staged"
163
+ "fix": "bun lint:staged && bun format:staged"
160
164
  },
161
165
  "dependencies": {
162
166
  "@ai-sdk/provider": "^3.0.8",
@@ -199,7 +203,7 @@
199
203
  "pg": "^8.19.0",
200
204
  "pino": "^10.3.1",
201
205
  "postgres": "^3.4.8",
202
- "typescript": "^5.9.3",
206
+ "typescript": "^6.0.2",
203
207
  "vite": "^7.3.1",
204
208
  "vite-tsconfig-paths": "^6.1.1",
205
209
  "voyage-ai-provider": "^3.0.0"
@@ -218,7 +222,7 @@
218
222
  "mysql2": "^3.11.0",
219
223
  "pg": "^8.13.0",
220
224
  "postgres": "^3.4.0",
221
- "typescript": "^5.9.3",
225
+ "typescript": ">=5.9.3",
222
226
  "voyage-ai-provider": "^3.0.0"
223
227
  },
224
228
  "peerDependenciesMeta": {