@hebo-ai/gateway 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -5
- package/dist/config.js +28 -1
- package/dist/endpoints/chat-completions/converters.d.ts +5 -5
- package/dist/endpoints/chat-completions/converters.js +65 -29
- package/dist/endpoints/chat-completions/handler.js +4 -4
- package/dist/endpoints/chat-completions/otel.d.ts +1 -1
- package/dist/endpoints/chat-completions/otel.js +20 -18
- package/dist/endpoints/chat-completions/schema.d.ts +43 -5
- package/dist/endpoints/chat-completions/schema.js +10 -0
- package/dist/endpoints/embeddings/handler.js +2 -2
- package/dist/endpoints/embeddings/otel.d.ts +2 -2
- package/dist/endpoints/embeddings/otel.js +5 -5
- package/dist/endpoints/models/handler.js +2 -2
- package/dist/errors/openai.d.ts +1 -6
- package/dist/lifecycle.d.ts +3 -2
- package/dist/lifecycle.js +4 -6
- package/dist/models/google/presets.d.ts +28 -0
- package/dist/models/google/presets.js +7 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/middleware.d.ts +1 -0
- package/dist/providers/bedrock/middleware.js +33 -0
- package/dist/providers/groq/index.d.ts +1 -0
- package/dist/providers/groq/index.js +1 -0
- package/dist/providers/groq/middleware.d.ts +2 -0
- package/dist/providers/groq/middleware.js +31 -0
- package/dist/providers/vertex/index.d.ts +1 -0
- package/dist/providers/vertex/index.js +1 -0
- package/dist/providers/vertex/middleware.d.ts +2 -0
- package/dist/providers/vertex/middleware.js +47 -0
- package/dist/types.d.ts +25 -4
- package/dist/types.js +1 -0
- package/dist/utils/response.d.ts +4 -1
- package/dist/utils/response.js +5 -20
- package/dist/utils/stream.d.ts +9 -0
- package/dist/utils/stream.js +100 -0
- package/package.json +1 -1
- package/dist/telemetry/stream.d.ts +0 -3
- package/dist/telemetry/stream.js +0 -58
package/README.md
CHANGED
|
@@ -32,13 +32,13 @@ bun install @hebo-ai/gateway
|
|
|
32
32
|
- Quickstart
|
|
33
33
|
- [Setup A Gateway Instance](#setup-a-gateway-instance) | [Mount Route Handlers](#mount-route-handlers) | [Call the Gateway](#call-the-gateway)
|
|
34
34
|
- Configuration Reference
|
|
35
|
-
- [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Logger](#logger-settings) | [Observability](#observability)
|
|
35
|
+
- [Providers](#providers) | [Models](#models) | [Hooks](#hooks) | [Logger](#logger-settings) | [Observability](#observability) | [Timeouts](#timeout-settings)
|
|
36
36
|
- Framework Support
|
|
37
37
|
- [ElysiaJS](#elysiajs) | [Hono](#hono) | [Next.js](#nextjs) | [TanStack Start](#tanstack-start)
|
|
38
38
|
- Runtime Support
|
|
39
39
|
- [Vercel Edge](#vercel-edge) | [Cloudflare Workers](#cloudflare-workers) | [Deno Deploy](#deno-deploy) | [AWS Lambda](#aws-lambda)
|
|
40
40
|
- OpenAI Extensions
|
|
41
|
-
- [Reasoning](#reasoning) | [Prompt Caching](#prompt-caching)
|
|
41
|
+
- [Reasoning](#reasoning) | [Service Tier](#service-tier) | [Prompt Caching](#prompt-caching)
|
|
42
42
|
- Advanced Usage
|
|
43
43
|
- [Passing Framework State to Hooks](#passing-framework-state-to-hooks) | [Selective Route Mounting](#selective-route-mounting) | [Low-level Schemas & Converters](#low-level-schemas--converters)
|
|
44
44
|
|
|
@@ -342,9 +342,9 @@ const gw = gateway({
|
|
|
342
342
|
* @returns Modified result, or undefined to keep original.
|
|
343
343
|
*/
|
|
344
344
|
after: async (ctx: {
|
|
345
|
-
result: ChatCompletions |
|
|
345
|
+
result: ChatCompletions | ChatCompletionsStream | Embeddings;
|
|
346
346
|
}): Promise<
|
|
347
|
-
ChatCompletions |
|
|
347
|
+
ChatCompletions | ChatCompletionsStream | Embeddings | void
|
|
348
348
|
> => {
|
|
349
349
|
// Example Use Cases:
|
|
350
350
|
// - Transform result
|
|
@@ -561,6 +561,25 @@ Advanced models (like Anthropic Claude 3.7 or Gemini 3) surface structured reaso
|
|
|
561
561
|
|
|
562
562
|
For **Gemini 3** models, returning the thought signature via `extra_content` is mandatory to resume the chain-of-thought; failing to do so may result in errors or degraded performance.
|
|
563
563
|
|
|
564
|
+
### Service Tier
|
|
565
|
+
|
|
566
|
+
The chat completions endpoint accepts a provider-agnostic `service_tier` extension:
|
|
567
|
+
|
|
568
|
+
- `auto`, `default`, `flex`, `priority`, `scale`
|
|
569
|
+
|
|
570
|
+
Provider-specific mapping:
|
|
571
|
+
|
|
572
|
+
- **OpenAI**: forwards as OpenAI `serviceTier` (no middleware remap).
|
|
573
|
+
- **Groq**: maps to Groq `serviceTier` (`default` -> `on_demand`, `scale`/`priority` -> `performance`).
|
|
574
|
+
- **Google Vertex**: maps to request headers via middleware:
|
|
575
|
+
- `default` -> `x-vertex-ai-llm-request-type: shared`
|
|
576
|
+
- `flex` -> `x-vertex-ai-llm-request-type: shared` + `x-vertex-ai-llm-shared-request-type: flex`
|
|
577
|
+
- `priority` -> `x-vertex-ai-llm-request-type: shared` + `x-vertex-ai-llm-shared-request-type: priority`
|
|
578
|
+
- `scale` -> `x-vertex-ai-llm-request-type: dedicated`
|
|
579
|
+
- **Amazon Bedrock**: maps to Bedrock `serviceTier.type` (`default`, `flex`, `priority`, `reserved`; `scale` -> `reserved`, `auto` -> omitted/default).
|
|
580
|
+
|
|
581
|
+
When available, the resolved value is echoed back on response as `service_tier`.
|
|
582
|
+
|
|
564
583
|
### Prompt Caching
|
|
565
584
|
|
|
566
585
|
The chat completions endpoint supports both implicit (provider-managed) and explicit prompt caching across OpenAI-compatible providers.
|
|
@@ -737,6 +756,37 @@ const gw = gateway({
|
|
|
737
756
|
|
|
738
757
|
Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
|
|
739
758
|
|
|
759
|
+
### Timeout Settings
|
|
760
|
+
|
|
761
|
+
You can configure request timeouts via the `timeouts` field:
|
|
762
|
+
|
|
763
|
+
```ts
|
|
764
|
+
import { gateway } from "@hebo-ai/gateway";
|
|
765
|
+
|
|
766
|
+
const gw = gateway({
|
|
767
|
+
// ...
|
|
768
|
+
// default timeout is 300_000 (5 minutes).
|
|
769
|
+
// You can set one timeout for all tiers...
|
|
770
|
+
timeouts: 60_000,
|
|
771
|
+
// ...disable timeouts completely:
|
|
772
|
+
// timeouts: null,
|
|
773
|
+
// ...or split by service tier:
|
|
774
|
+
// - normal: all non-flex tiers (set null to disable)
|
|
775
|
+
// - flex: defaults to 3x normal when omitted (set null to disable)
|
|
776
|
+
// timeouts: { normal: 30_000, flex: null },
|
|
777
|
+
});
|
|
778
|
+
```
|
|
779
|
+
|
|
780
|
+
> [!NOTE]
|
|
781
|
+
> **Runtime/engine timeout limits**
|
|
782
|
+
> Runtime-level `fetch()` clients may enforce their own timeouts. Configure those runtime/platform limits in addition to gateway `timeouts`.
|
|
783
|
+
>
|
|
784
|
+
> - Node.js runtimes use Undici: https://github.com/nodejs/undici/issues/1373 (Node.js, Vercel Serverless Functions, AWS Lambda)
|
|
785
|
+
> - Bun context: https://github.com/oven-sh/bun/issues/16682
|
|
786
|
+
>
|
|
787
|
+
> **Provider/service timeout limits**
|
|
788
|
+
> Serverless platforms (e.g. Cloudflare Workers, Vercel Edge/Serverless, AWS Lambda) also enforce platform time limits (roughly ~25-100s on edge paths, ~300s for streaming, and up to ~900s configurable for some).
|
|
789
|
+
|
|
740
790
|
### Passing Framework State to Hooks
|
|
741
791
|
|
|
742
792
|
You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
|
|
@@ -838,7 +888,7 @@ export async function handler(req: Request): Promise<Response> {
|
|
|
838
888
|
}
|
|
839
889
|
```
|
|
840
890
|
|
|
841
|
-
Non-streaming versions are available via `
|
|
891
|
+
Non-streaming versions are available via `toChatCompletionsResponse`. Equivalent schemas and helpers are available in the `embeddings` and `models` endpoints.
|
|
842
892
|
|
|
843
893
|
> [!TIP]
|
|
844
894
|
> Since Zod v4.3 you can generate a JSON Schema from any zod object by calling `z.toJSONSchema(...)`. This is useful for producing OpenAPI documentation from the same source of truth.
|
package/dist/config.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
2
2
|
import { createDefaultLogger } from "./logger/default";
|
|
3
3
|
import { installAiSdkWarningLogger } from "./telemetry/ai-sdk";
|
|
4
|
-
import { kParsed, } from "./types";
|
|
4
|
+
import { DEFAULT_CHAT_TIMEOUT_MS, kParsed, } from "./types";
|
|
5
5
|
export const parseConfig = (config) => {
|
|
6
6
|
// If it has been parsed before, just return.
|
|
7
7
|
if (kParsed in config)
|
|
@@ -66,9 +66,36 @@ export const parseConfig = (config) => {
|
|
|
66
66
|
hebo: "off",
|
|
67
67
|
};
|
|
68
68
|
installAiSdkWarningLogger(telemetrySignals.gen_ai);
|
|
69
|
+
// Default timeouts
|
|
70
|
+
let normal;
|
|
71
|
+
let flex;
|
|
72
|
+
const t = config.timeouts;
|
|
73
|
+
if (t === null) {
|
|
74
|
+
normal = flex = undefined;
|
|
75
|
+
}
|
|
76
|
+
else if (typeof t === "number") {
|
|
77
|
+
normal = t;
|
|
78
|
+
flex = t * 3;
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
if (t?.normal === null)
|
|
82
|
+
normal = undefined;
|
|
83
|
+
else if (t?.normal === undefined)
|
|
84
|
+
normal = DEFAULT_CHAT_TIMEOUT_MS;
|
|
85
|
+
else
|
|
86
|
+
normal = t.normal;
|
|
87
|
+
if (t?.flex === null)
|
|
88
|
+
flex = undefined;
|
|
89
|
+
else if (t?.flex === undefined)
|
|
90
|
+
flex = normal === undefined ? undefined : normal * 3;
|
|
91
|
+
else
|
|
92
|
+
flex = t.flex;
|
|
93
|
+
}
|
|
94
|
+
const parsedTimeouts = { normal, flex };
|
|
69
95
|
// Return parsed config.
|
|
70
96
|
return {
|
|
71
97
|
...config,
|
|
98
|
+
timeouts: parsedTimeouts,
|
|
72
99
|
telemetry: {
|
|
73
100
|
...config.telemetry,
|
|
74
101
|
enabled: telemetryEnabled,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { SharedV3ProviderOptions, SharedV3ProviderMetadata } from "@ai-sdk/provider";
|
|
2
2
|
import type { GenerateTextResult, StreamTextResult, FinishReason, ToolChoice, ToolSet, ModelMessage, UserContent, LanguageModelUsage, TextStreamPart, ReasoningOutput, AssistantModelMessage, ToolModelMessage, UserModelMessage } from "ai";
|
|
3
3
|
import { Output } from "ai";
|
|
4
|
-
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
5
|
-
import {
|
|
4
|
+
import type { ChatCompletionsToolCall, ChatCompletionsTool, ChatCompletionsToolChoice, ChatCompletionsStream, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsUserMessage, ChatCompletionsAssistantMessage, ChatCompletionsToolMessage, ChatCompletionsFinishReason, ChatCompletionsUsage, ChatCompletionsInputs, ChatCompletions, ChatCompletionsChunk, ChatCompletionsReasoningDetail } from "./schema";
|
|
5
|
+
import type { SseErrorFrame, SseFrame } from "../../utils/stream";
|
|
6
6
|
export type TextCallOptions = {
|
|
7
7
|
messages: ModelMessage[];
|
|
8
8
|
tools?: ToolSet;
|
|
@@ -31,10 +31,10 @@ export declare const convertToToolChoiceOptions: (toolChoice: ChatCompletionsToo
|
|
|
31
31
|
};
|
|
32
32
|
export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
|
|
33
33
|
export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
34
|
-
export declare function toChatCompletionsStream
|
|
34
|
+
export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ChatCompletionsStream;
|
|
35
35
|
export declare function toChatCompletionsStreamResponse(result: StreamTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
36
|
-
export declare class
|
|
37
|
-
constructor(model: string
|
|
36
|
+
export declare class ChatCompletionsTransformStream extends TransformStream<TextStreamPart<ToolSet>, SseFrame<ChatCompletionsChunk> | SseErrorFrame> {
|
|
37
|
+
constructor(model: string);
|
|
38
38
|
}
|
|
39
39
|
export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
|
|
40
40
|
export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { Output, jsonSchema, tool } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { GatewayError } from "../../errors/gateway";
|
|
4
|
-
import { OpenAIError, toOpenAIError } from "../../errors/openai";
|
|
5
4
|
import { toResponse } from "../../utils/response";
|
|
6
5
|
import { parseDataUrl } from "../../utils/url";
|
|
7
6
|
// --- Request Flow ---
|
|
@@ -379,19 +378,20 @@ export function toChatCompletions(result, model) {
|
|
|
379
378
|
],
|
|
380
379
|
usage: result.totalUsage ? toChatCompletionsUsage(result.totalUsage) : null,
|
|
381
380
|
provider_metadata: result.providerMetadata,
|
|
381
|
+
service_tier: resolveResponseServiceTier(result.providerMetadata),
|
|
382
382
|
};
|
|
383
383
|
}
|
|
384
384
|
export function toChatCompletionsResponse(result, model, responseInit) {
|
|
385
385
|
return toResponse(toChatCompletions(result, model), responseInit);
|
|
386
386
|
}
|
|
387
|
-
export function toChatCompletionsStream(result, model
|
|
388
|
-
return result.fullStream.pipeThrough(new
|
|
387
|
+
export function toChatCompletionsStream(result, model) {
|
|
388
|
+
return result.fullStream.pipeThrough(new ChatCompletionsTransformStream(model));
|
|
389
389
|
}
|
|
390
390
|
export function toChatCompletionsStreamResponse(result, model, responseInit) {
|
|
391
|
-
return toResponse(toChatCompletionsStream(result, model
|
|
391
|
+
return toResponse(toChatCompletionsStream(result, model), responseInit);
|
|
392
392
|
}
|
|
393
|
-
export class
|
|
394
|
-
constructor(model
|
|
393
|
+
export class ChatCompletionsTransformStream extends TransformStream {
|
|
394
|
+
constructor(model) {
|
|
395
395
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
396
396
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
397
397
|
let toolCallIndexCounter = 0;
|
|
@@ -402,18 +402,21 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
402
402
|
delta.extra_content = provider_metadata;
|
|
403
403
|
}
|
|
404
404
|
return {
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
405
|
+
data: {
|
|
406
|
+
id: streamId,
|
|
407
|
+
object: "chat.completion.chunk",
|
|
408
|
+
created: creationTime,
|
|
409
|
+
model,
|
|
410
|
+
choices: [
|
|
411
|
+
{
|
|
412
|
+
index: 0,
|
|
413
|
+
delta,
|
|
414
|
+
finish_reason: finish_reason ?? null,
|
|
415
|
+
},
|
|
416
|
+
],
|
|
417
|
+
usage: usage ?? null,
|
|
418
|
+
service_tier: resolveResponseServiceTier(provider_metadata),
|
|
419
|
+
},
|
|
417
420
|
};
|
|
418
421
|
};
|
|
419
422
|
super({
|
|
@@ -459,23 +462,56 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
459
462
|
break;
|
|
460
463
|
}
|
|
461
464
|
case "error": {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
}
|
|
466
|
-
else if (part.error instanceof Error) {
|
|
467
|
-
err = part.error;
|
|
468
|
-
}
|
|
469
|
-
else {
|
|
470
|
-
err = new Error(String(part.error));
|
|
471
|
-
}
|
|
472
|
-
controller.enqueue(err);
|
|
465
|
+
controller.enqueue({
|
|
466
|
+
data: part.error instanceof Error ? part.error : new Error(String(part.error)),
|
|
467
|
+
});
|
|
473
468
|
}
|
|
474
469
|
}
|
|
475
470
|
},
|
|
476
471
|
});
|
|
477
472
|
}
|
|
478
473
|
}
|
|
474
|
+
function resolveResponseServiceTier(providerMetadata) {
|
|
475
|
+
if (!providerMetadata)
|
|
476
|
+
return;
|
|
477
|
+
for (const metadata of Object.values(providerMetadata)) {
|
|
478
|
+
const tier = parseReturnedServiceTier(metadata["service_tier"] ??
|
|
479
|
+
metadata["usage_metadata"]?.["traffic_type"]);
|
|
480
|
+
if (tier)
|
|
481
|
+
return tier;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
function parseReturnedServiceTier(value) {
|
|
485
|
+
if (typeof value !== "string")
|
|
486
|
+
return undefined;
|
|
487
|
+
const n = value.toLowerCase();
|
|
488
|
+
switch (n) {
|
|
489
|
+
case "traffic_type_unspecified":
|
|
490
|
+
case "auto":
|
|
491
|
+
return "auto";
|
|
492
|
+
case "default":
|
|
493
|
+
case "on_demand":
|
|
494
|
+
case "on-demand":
|
|
495
|
+
case "shared":
|
|
496
|
+
return "default";
|
|
497
|
+
case "on_demand_flex":
|
|
498
|
+
case "flex":
|
|
499
|
+
return "flex";
|
|
500
|
+
case "on_demand_priority":
|
|
501
|
+
case "priority":
|
|
502
|
+
case "performance":
|
|
503
|
+
return "priority";
|
|
504
|
+
case "provisioned_throughput":
|
|
505
|
+
case "scale":
|
|
506
|
+
case "reserved":
|
|
507
|
+
case "dedicated":
|
|
508
|
+
case "provisioned":
|
|
509
|
+
case "throughput":
|
|
510
|
+
return "scale";
|
|
511
|
+
default:
|
|
512
|
+
return undefined;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
479
515
|
export const toChatCompletionsAssistantMessage = (result) => {
|
|
480
516
|
const message = {
|
|
481
517
|
role: "assistant",
|
|
@@ -13,7 +13,7 @@ import { getChatRequestAttributes, getChatResponseAttributes } from "./otel";
|
|
|
13
13
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
14
14
|
export const chatCompletions = (config) => {
|
|
15
15
|
const hooks = config.hooks;
|
|
16
|
-
const handler = async (ctx) => {
|
|
16
|
+
const handler = async (ctx, cfg) => {
|
|
17
17
|
const start = performance.now();
|
|
18
18
|
ctx.operation = "chat";
|
|
19
19
|
addSpanEvent("hebo.handler.started");
|
|
@@ -63,7 +63,7 @@ export const chatCompletions = (config) => {
|
|
|
63
63
|
ctx.resolvedProviderId = languageModel.provider;
|
|
64
64
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
65
65
|
addSpanEvent("hebo.provider.resolved");
|
|
66
|
-
const genAiSignalLevel =
|
|
66
|
+
const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
|
|
67
67
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
68
68
|
setSpanAttributes(genAiGeneralAttrs);
|
|
69
69
|
// Convert inputs to AI SDK call options.
|
|
@@ -88,7 +88,7 @@ export const chatCompletions = (config) => {
|
|
|
88
88
|
headers: prepareForwardHeaders(ctx.request),
|
|
89
89
|
abortSignal: ctx.request.signal,
|
|
90
90
|
timeout: {
|
|
91
|
-
totalMs:
|
|
91
|
+
totalMs: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
|
|
92
92
|
},
|
|
93
93
|
onAbort: () => {
|
|
94
94
|
throw new DOMException("The operation was aborted.", "AbortError");
|
|
@@ -122,7 +122,7 @@ export const chatCompletions = (config) => {
|
|
|
122
122
|
model: languageModelWithMiddleware,
|
|
123
123
|
headers: prepareForwardHeaders(ctx.request),
|
|
124
124
|
abortSignal: ctx.request.signal,
|
|
125
|
-
timeout:
|
|
125
|
+
timeout: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
|
|
126
126
|
experimental_include: {
|
|
127
127
|
requestBody: false,
|
|
128
128
|
responseBody: false,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Attributes } from "@opentelemetry/api";
|
|
2
2
|
import type { ChatCompletions, ChatCompletionsBody } from "./schema";
|
|
3
3
|
import { type TelemetrySignalLevel } from "../../types";
|
|
4
|
-
export declare const getChatRequestAttributes: (
|
|
4
|
+
export declare const getChatRequestAttributes: (body: ChatCompletionsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
5
|
export declare const getChatResponseAttributes: (completions: ChatCompletions, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -99,31 +99,32 @@ const toMessageParts = (message) => {
|
|
|
99
99
|
throw new Error(`Unhandled content part type: ${message.role}`);
|
|
100
100
|
}
|
|
101
101
|
};
|
|
102
|
-
export const getChatRequestAttributes = (
|
|
102
|
+
export const getChatRequestAttributes = (body, signalLevel) => {
|
|
103
103
|
if (!signalLevel || signalLevel === "off")
|
|
104
104
|
return {};
|
|
105
105
|
const attrs = {};
|
|
106
|
-
if (
|
|
107
|
-
Object.assign(attrs, { "gen_ai.request.seed":
|
|
106
|
+
if (body.seed !== undefined) {
|
|
107
|
+
Object.assign(attrs, { "gen_ai.request.seed": body.seed });
|
|
108
108
|
}
|
|
109
109
|
if (signalLevel !== "required") {
|
|
110
110
|
Object.assign(attrs, {
|
|
111
111
|
// FUTURE: add reasoning info
|
|
112
|
-
"gen_ai.request.stream":
|
|
113
|
-
"gen_ai.request.
|
|
114
|
-
"gen_ai.request.
|
|
115
|
-
"gen_ai.request.
|
|
116
|
-
"gen_ai.request.
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
112
|
+
"gen_ai.request.stream": body.stream,
|
|
113
|
+
"gen_ai.request.service_tier": body.service_tier,
|
|
114
|
+
"gen_ai.request.frequency_penalty": body.frequency_penalty,
|
|
115
|
+
"gen_ai.request.max_tokens": body.max_completion_tokens,
|
|
116
|
+
"gen_ai.request.presence_penalty": body.presence_penalty,
|
|
117
|
+
"gen_ai.request.stop_sequences": body.stop
|
|
118
|
+
? Array.isArray(body.stop)
|
|
119
|
+
? body.stop
|
|
120
|
+
: [body.stop]
|
|
120
121
|
: undefined,
|
|
121
|
-
"gen_ai.request.temperature":
|
|
122
|
-
"gen_ai.request.top_p":
|
|
122
|
+
"gen_ai.request.temperature": body.temperature,
|
|
123
|
+
"gen_ai.request.top_p": body.top_p,
|
|
123
124
|
});
|
|
124
|
-
if (
|
|
125
|
-
for (const key in
|
|
126
|
-
attrs[`gen_ai.request.metadata.${key}`] =
|
|
125
|
+
if (body.metadata) {
|
|
126
|
+
for (const key in body.metadata) {
|
|
127
|
+
attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
|
|
127
128
|
}
|
|
128
129
|
}
|
|
129
130
|
}
|
|
@@ -134,10 +135,10 @@ export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
|
134
135
|
// "gen_ai.system_instructions": inputs.messages
|
|
135
136
|
// .filter((m) => m.role === "system")
|
|
136
137
|
// .map((m) => JSON.stringify(toTextPart(m.content))),
|
|
137
|
-
"gen_ai.input.messages":
|
|
138
|
+
"gen_ai.input.messages": body.messages
|
|
138
139
|
//.filter((m) => m.role !== "system")
|
|
139
140
|
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
140
|
-
"gen_ai.tool.definitions":
|
|
141
|
+
"gen_ai.tool.definitions": body.tools?.map((toolDefinition) => JSON.stringify(toolDefinition)),
|
|
141
142
|
});
|
|
142
143
|
}
|
|
143
144
|
return attrs;
|
|
@@ -151,6 +152,7 @@ export const getChatResponseAttributes = (completions, signalLevel) => {
|
|
|
151
152
|
if (signalLevel !== "required") {
|
|
152
153
|
Object.assign(attrs, {
|
|
153
154
|
"gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
|
|
155
|
+
"gen_ai.response.service_tier": completions.service_tier,
|
|
154
156
|
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
155
157
|
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
156
158
|
"gen_ai.usage.cache_read.input_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
import type { SseErrorFrame, SseFrame } from "../../utils/stream";
|
|
2
3
|
export declare const ChatCompletionsCacheControlSchema: z.ZodObject<{
|
|
3
4
|
type: z.ZodLiteral<"ephemeral">;
|
|
4
5
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -469,6 +470,14 @@ export declare const ChatCompletionsResponseFormatSchema: z.ZodDiscriminatedUnio
|
|
|
469
470
|
export type ChatCompletionsResponseFormat = z.infer<typeof ChatCompletionsResponseFormatSchema>;
|
|
470
471
|
export declare const ChatCompletionsMetadataSchema: z.ZodRecord<z.ZodString, z.ZodString>;
|
|
471
472
|
export type ChatCompletionsMetadata = z.infer<typeof ChatCompletionsMetadataSchema>;
|
|
473
|
+
export declare const ChatCompletionsServiceTierSchema: z.ZodEnum<{
|
|
474
|
+
auto: "auto";
|
|
475
|
+
default: "default";
|
|
476
|
+
flex: "flex";
|
|
477
|
+
scale: "scale";
|
|
478
|
+
priority: "priority";
|
|
479
|
+
}>;
|
|
480
|
+
export type ChatCompletionsServiceTier = z.infer<typeof ChatCompletionsServiceTierSchema>;
|
|
472
481
|
declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
473
482
|
messages: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
474
483
|
role: z.ZodLiteral<"system">;
|
|
@@ -650,6 +659,13 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
650
659
|
medium: "medium";
|
|
651
660
|
xhigh: "xhigh";
|
|
652
661
|
}>>;
|
|
662
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
663
|
+
auto: "auto";
|
|
664
|
+
default: "default";
|
|
665
|
+
flex: "flex";
|
|
666
|
+
scale: "scale";
|
|
667
|
+
priority: "priority";
|
|
668
|
+
}>>;
|
|
653
669
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
654
670
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
655
671
|
in_memory: "in_memory";
|
|
@@ -856,6 +872,13 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
856
872
|
medium: "medium";
|
|
857
873
|
xhigh: "xhigh";
|
|
858
874
|
}>>;
|
|
875
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
876
|
+
auto: "auto";
|
|
877
|
+
default: "default";
|
|
878
|
+
flex: "flex";
|
|
879
|
+
scale: "scale";
|
|
880
|
+
priority: "priority";
|
|
881
|
+
}>>;
|
|
859
882
|
prompt_cache_key: z.ZodOptional<z.ZodString>;
|
|
860
883
|
prompt_cache_retention: z.ZodOptional<z.ZodEnum<{
|
|
861
884
|
in_memory: "in_memory";
|
|
@@ -884,9 +907,9 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
884
907
|
}, z.core.$loose>;
|
|
885
908
|
export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
|
|
886
909
|
export declare const ChatCompletionsFinishReasonSchema: z.ZodEnum<{
|
|
887
|
-
tool_calls: "tool_calls";
|
|
888
910
|
stop: "stop";
|
|
889
911
|
length: "length";
|
|
912
|
+
tool_calls: "tool_calls";
|
|
890
913
|
content_filter: "content_filter";
|
|
891
914
|
}>;
|
|
892
915
|
export type ChatCompletionsFinishReason = z.infer<typeof ChatCompletionsFinishReasonSchema>;
|
|
@@ -930,9 +953,9 @@ export declare const ChatCompletionsChoiceSchema: z.ZodObject<{
|
|
|
930
953
|
}, z.core.$strip>>;
|
|
931
954
|
}, z.core.$strip>;
|
|
932
955
|
finish_reason: z.ZodEnum<{
|
|
933
|
-
tool_calls: "tool_calls";
|
|
934
956
|
stop: "stop";
|
|
935
957
|
length: "length";
|
|
958
|
+
tool_calls: "tool_calls";
|
|
936
959
|
content_filter: "content_filter";
|
|
937
960
|
}>;
|
|
938
961
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -996,9 +1019,9 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
996
1019
|
}, z.core.$strip>>;
|
|
997
1020
|
}, z.core.$strip>;
|
|
998
1021
|
finish_reason: z.ZodEnum<{
|
|
999
|
-
tool_calls: "tool_calls";
|
|
1000
1022
|
stop: "stop";
|
|
1001
1023
|
length: "length";
|
|
1024
|
+
tool_calls: "tool_calls";
|
|
1002
1025
|
content_filter: "content_filter";
|
|
1003
1026
|
}>;
|
|
1004
1027
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1015,6 +1038,13 @@ export declare const ChatCompletionsSchema: z.ZodObject<{
|
|
|
1015
1038
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1016
1039
|
}, z.core.$strip>>;
|
|
1017
1040
|
}, z.core.$strip>>;
|
|
1041
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
1042
|
+
auto: "auto";
|
|
1043
|
+
default: "default";
|
|
1044
|
+
flex: "flex";
|
|
1045
|
+
scale: "scale";
|
|
1046
|
+
priority: "priority";
|
|
1047
|
+
}>>;
|
|
1018
1048
|
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1019
1049
|
}, z.core.$strip>;
|
|
1020
1050
|
export type ChatCompletions = z.infer<typeof ChatCompletionsSchema>;
|
|
@@ -1109,9 +1139,9 @@ export declare const ChatCompletionsChoiceDeltaSchema: z.ZodObject<{
|
|
|
1109
1139
|
}, z.core.$strip>>>;
|
|
1110
1140
|
}, z.core.$strip>;
|
|
1111
1141
|
finish_reason: z.ZodNullable<z.ZodEnum<{
|
|
1112
|
-
tool_calls: "tool_calls";
|
|
1113
1142
|
stop: "stop";
|
|
1114
1143
|
length: "length";
|
|
1144
|
+
tool_calls: "tool_calls";
|
|
1115
1145
|
content_filter: "content_filter";
|
|
1116
1146
|
}>>;
|
|
1117
1147
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1163,9 +1193,9 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1163
1193
|
}, z.core.$strip>>>;
|
|
1164
1194
|
}, z.core.$strip>;
|
|
1165
1195
|
finish_reason: z.ZodNullable<z.ZodEnum<{
|
|
1166
|
-
tool_calls: "tool_calls";
|
|
1167
1196
|
stop: "stop";
|
|
1168
1197
|
length: "length";
|
|
1198
|
+
tool_calls: "tool_calls";
|
|
1169
1199
|
content_filter: "content_filter";
|
|
1170
1200
|
}>>;
|
|
1171
1201
|
logprobs: z.ZodOptional<z.ZodUnknown>;
|
|
@@ -1182,7 +1212,15 @@ export declare const ChatCompletionsChunkSchema: z.ZodObject<{
|
|
|
1182
1212
|
cache_write_tokens: z.ZodOptional<z.ZodInt>;
|
|
1183
1213
|
}, z.core.$strip>>;
|
|
1184
1214
|
}, z.core.$strip>>;
|
|
1215
|
+
service_tier: z.ZodOptional<z.ZodEnum<{
|
|
1216
|
+
auto: "auto";
|
|
1217
|
+
default: "default";
|
|
1218
|
+
flex: "flex";
|
|
1219
|
+
scale: "scale";
|
|
1220
|
+
priority: "priority";
|
|
1221
|
+
}>>;
|
|
1185
1222
|
provider_metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1186
1223
|
}, z.core.$strip>;
|
|
1187
1224
|
export type ChatCompletionsChunk = z.infer<typeof ChatCompletionsChunkSchema>;
|
|
1225
|
+
export type ChatCompletionsStream = ReadableStream<SseFrame<ChatCompletionsChunk> | SseErrorFrame>;
|
|
1188
1226
|
export {};
|
|
@@ -192,6 +192,13 @@ export const ChatCompletionsResponseFormatSchema = z.discriminatedUnion("type",
|
|
|
192
192
|
ChatCompletionsResponseFormatTextSchema,
|
|
193
193
|
]);
|
|
194
194
|
export const ChatCompletionsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
|
|
195
|
+
export const ChatCompletionsServiceTierSchema = z.enum([
|
|
196
|
+
"auto",
|
|
197
|
+
"default",
|
|
198
|
+
"flex",
|
|
199
|
+
"scale",
|
|
200
|
+
"priority",
|
|
201
|
+
]);
|
|
195
202
|
const ChatCompletionsInputsSchema = z.object({
|
|
196
203
|
messages: z.array(ChatCompletionsMessageSchema),
|
|
197
204
|
tools: z.array(ChatCompletionsToolSchema).optional(),
|
|
@@ -207,6 +214,7 @@ const ChatCompletionsInputsSchema = z.object({
|
|
|
207
214
|
metadata: ChatCompletionsMetadataSchema.optional(),
|
|
208
215
|
response_format: ChatCompletionsResponseFormatSchema.optional(),
|
|
209
216
|
reasoning_effort: ChatCompletionsReasoningEffortSchema.optional(),
|
|
217
|
+
service_tier: ChatCompletionsServiceTierSchema.optional(),
|
|
210
218
|
prompt_cache_key: z.string().optional(),
|
|
211
219
|
prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
|
|
212
220
|
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
@@ -264,6 +272,7 @@ export const ChatCompletionsSchema = z.object({
|
|
|
264
272
|
model: z.string(),
|
|
265
273
|
choices: z.array(ChatCompletionsChoiceSchema),
|
|
266
274
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
275
|
+
service_tier: ChatCompletionsServiceTierSchema.optional(),
|
|
267
276
|
// Extension origin: Vercel AI Gateway
|
|
268
277
|
provider_metadata: z
|
|
269
278
|
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
@@ -290,6 +299,7 @@ export const ChatCompletionsChunkSchema = z.object({
|
|
|
290
299
|
model: z.string(),
|
|
291
300
|
choices: z.array(ChatCompletionsChoiceDeltaSchema),
|
|
292
301
|
usage: ChatCompletionsUsageSchema.nullable(),
|
|
302
|
+
service_tier: ChatCompletionsServiceTierSchema.optional(),
|
|
293
303
|
// Extension origin: Vercel AI Gateway
|
|
294
304
|
provider_metadata: z
|
|
295
305
|
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
@@ -13,7 +13,7 @@ import { getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes } from
|
|
|
13
13
|
import { EmbeddingsBodySchema } from "./schema";
|
|
14
14
|
export const embeddings = (config) => {
|
|
15
15
|
const hooks = config.hooks;
|
|
16
|
-
const handler = async (ctx) => {
|
|
16
|
+
const handler = async (ctx, cfg) => {
|
|
17
17
|
const start = performance.now();
|
|
18
18
|
ctx.operation = "embeddings";
|
|
19
19
|
addSpanEvent("hebo.handler.started");
|
|
@@ -62,7 +62,7 @@ export const embeddings = (config) => {
|
|
|
62
62
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
63
63
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
64
64
|
addSpanEvent("hebo.provider.resolved");
|
|
65
|
-
const genAiSignalLevel =
|
|
65
|
+
const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
|
|
66
66
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
67
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
68
|
// Convert inputs to AI SDK call options.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
-
import type { Embeddings,
|
|
2
|
+
import type { Embeddings, EmbeddingsBody } from "./schema";
|
|
3
3
|
import { type TelemetrySignalLevel } from "../../types";
|
|
4
|
-
export declare const getEmbeddingsRequestAttributes: (
|
|
4
|
+
export declare const getEmbeddingsRequestAttributes: (body: EmbeddingsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
5
|
export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import {} from "../../types";
|
|
2
|
-
export const getEmbeddingsRequestAttributes = (
|
|
2
|
+
export const getEmbeddingsRequestAttributes = (body, signalLevel) => {
|
|
3
3
|
if (!signalLevel || signalLevel === "off")
|
|
4
4
|
return {};
|
|
5
5
|
const attrs = {};
|
|
6
6
|
if (signalLevel !== "required") {
|
|
7
7
|
Object.assign(attrs, {
|
|
8
|
-
"gen_ai.embeddings.dimension.count":
|
|
8
|
+
"gen_ai.embeddings.dimension.count": body.dimensions,
|
|
9
9
|
});
|
|
10
|
-
if (
|
|
11
|
-
for (const key in
|
|
12
|
-
attrs[`gen_ai.request.metadata.${key}`] =
|
|
10
|
+
if (body.metadata) {
|
|
11
|
+
for (const key in body.metadata) {
|
|
12
|
+
attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -2,8 +2,8 @@ import { GatewayError } from "../../errors/gateway";
|
|
|
2
2
|
import { winterCgHandler } from "../../lifecycle";
|
|
3
3
|
import { toModels, toModel } from "./converters";
|
|
4
4
|
export const models = (config) => {
|
|
5
|
-
//
|
|
6
|
-
const handler = async (ctx) => {
|
|
5
|
+
// eslint-disable-next-line require-await
|
|
6
|
+
const handler = async (ctx, _cfg) => {
|
|
7
7
|
ctx.operation = "models";
|
|
8
8
|
if (!ctx.request || ctx.request.method !== "GET") {
|
|
9
9
|
throw new GatewayError("Method Not Allowed", 405);
|