@hebo-ai/gateway 0.6.1 → 0.6.2-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +1 -1
  2. package/dist/endpoints/chat-completions/converters.js +7 -3
  3. package/dist/endpoints/chat-completions/handler.js +2 -0
  4. package/dist/endpoints/chat-completions/otel.js +1 -1
  5. package/dist/endpoints/chat-completions/schema.d.ts +10 -2
  6. package/dist/endpoints/chat-completions/schema.js +11 -1
  7. package/dist/endpoints/embeddings/handler.js +2 -0
  8. package/dist/endpoints/models/converters.js +3 -3
  9. package/dist/lifecycle.js +2 -2
  10. package/dist/logger/default.js +3 -3
  11. package/dist/logger/index.d.ts +2 -5
  12. package/dist/middleware/common.js +1 -0
  13. package/dist/middleware/utils.js +0 -2
  14. package/dist/models/amazon/middleware.js +2 -0
  15. package/dist/models/catalog.js +5 -1
  16. package/dist/models/openai/middleware.js +6 -2
  17. package/dist/providers/registry.js +3 -0
  18. package/dist/telemetry/fetch.js +7 -2
  19. package/dist/telemetry/gen-ai.js +15 -12
  20. package/dist/telemetry/memory.d.ts +1 -1
  21. package/dist/telemetry/memory.js +30 -14
  22. package/dist/telemetry/span.js +1 -1
  23. package/dist/telemetry/stream.js +30 -23
  24. package/dist/utils/env.js +4 -2
  25. package/dist/utils/preset.js +1 -0
  26. package/dist/utils/response.js +3 -1
  27. package/package.json +32 -50
  28. package/src/config.ts +0 -98
  29. package/src/endpoints/chat-completions/converters.test.ts +0 -631
  30. package/src/endpoints/chat-completions/converters.ts +0 -899
  31. package/src/endpoints/chat-completions/handler.test.ts +0 -391
  32. package/src/endpoints/chat-completions/handler.ts +0 -201
  33. package/src/endpoints/chat-completions/index.ts +0 -4
  34. package/src/endpoints/chat-completions/otel.test.ts +0 -315
  35. package/src/endpoints/chat-completions/otel.ts +0 -214
  36. package/src/endpoints/chat-completions/schema.ts +0 -354
  37. package/src/endpoints/embeddings/converters.ts +0 -51
  38. package/src/endpoints/embeddings/handler.test.ts +0 -133
  39. package/src/endpoints/embeddings/handler.ts +0 -137
  40. package/src/endpoints/embeddings/index.ts +0 -4
  41. package/src/endpoints/embeddings/otel.ts +0 -40
  42. package/src/endpoints/embeddings/schema.ts +0 -36
  43. package/src/endpoints/models/converters.ts +0 -56
  44. package/src/endpoints/models/handler.test.ts +0 -122
  45. package/src/endpoints/models/handler.ts +0 -37
  46. package/src/endpoints/models/index.ts +0 -3
  47. package/src/endpoints/models/schema.ts +0 -37
  48. package/src/errors/ai-sdk.ts +0 -99
  49. package/src/errors/gateway.ts +0 -17
  50. package/src/errors/openai.ts +0 -57
  51. package/src/errors/utils.ts +0 -47
  52. package/src/gateway.ts +0 -50
  53. package/src/index.ts +0 -19
  54. package/src/lifecycle.ts +0 -135
  55. package/src/logger/default.ts +0 -105
  56. package/src/logger/index.ts +0 -42
  57. package/src/middleware/common.test.ts +0 -215
  58. package/src/middleware/common.ts +0 -163
  59. package/src/middleware/debug.ts +0 -37
  60. package/src/middleware/matcher.ts +0 -161
  61. package/src/middleware/utils.ts +0 -34
  62. package/src/models/amazon/index.ts +0 -2
  63. package/src/models/amazon/middleware.test.ts +0 -133
  64. package/src/models/amazon/middleware.ts +0 -79
  65. package/src/models/amazon/presets.ts +0 -104
  66. package/src/models/anthropic/index.ts +0 -2
  67. package/src/models/anthropic/middleware.test.ts +0 -643
  68. package/src/models/anthropic/middleware.ts +0 -148
  69. package/src/models/anthropic/presets.ts +0 -191
  70. package/src/models/catalog.ts +0 -13
  71. package/src/models/cohere/index.ts +0 -2
  72. package/src/models/cohere/middleware.test.ts +0 -138
  73. package/src/models/cohere/middleware.ts +0 -76
  74. package/src/models/cohere/presets.ts +0 -186
  75. package/src/models/google/index.ts +0 -2
  76. package/src/models/google/middleware.test.ts +0 -298
  77. package/src/models/google/middleware.ts +0 -137
  78. package/src/models/google/presets.ts +0 -118
  79. package/src/models/meta/index.ts +0 -1
  80. package/src/models/meta/presets.ts +0 -143
  81. package/src/models/openai/index.ts +0 -2
  82. package/src/models/openai/middleware.test.ts +0 -189
  83. package/src/models/openai/middleware.ts +0 -103
  84. package/src/models/openai/presets.ts +0 -280
  85. package/src/models/types.ts +0 -114
  86. package/src/models/voyage/index.ts +0 -2
  87. package/src/models/voyage/middleware.test.ts +0 -28
  88. package/src/models/voyage/middleware.ts +0 -23
  89. package/src/models/voyage/presets.ts +0 -126
  90. package/src/providers/anthropic/canonical.ts +0 -17
  91. package/src/providers/anthropic/index.ts +0 -1
  92. package/src/providers/bedrock/canonical.ts +0 -87
  93. package/src/providers/bedrock/index.ts +0 -2
  94. package/src/providers/bedrock/middleware.test.ts +0 -303
  95. package/src/providers/bedrock/middleware.ts +0 -128
  96. package/src/providers/cohere/canonical.ts +0 -26
  97. package/src/providers/cohere/index.ts +0 -1
  98. package/src/providers/groq/canonical.ts +0 -21
  99. package/src/providers/groq/index.ts +0 -1
  100. package/src/providers/openai/canonical.ts +0 -16
  101. package/src/providers/openai/index.ts +0 -1
  102. package/src/providers/registry.test.ts +0 -44
  103. package/src/providers/registry.ts +0 -165
  104. package/src/providers/types.ts +0 -20
  105. package/src/providers/vertex/canonical.ts +0 -17
  106. package/src/providers/vertex/index.ts +0 -1
  107. package/src/providers/voyage/canonical.ts +0 -16
  108. package/src/providers/voyage/index.ts +0 -1
  109. package/src/telemetry/ai-sdk.ts +0 -46
  110. package/src/telemetry/baggage.ts +0 -27
  111. package/src/telemetry/fetch.ts +0 -62
  112. package/src/telemetry/gen-ai.ts +0 -113
  113. package/src/telemetry/http.ts +0 -62
  114. package/src/telemetry/index.ts +0 -1
  115. package/src/telemetry/memory.ts +0 -36
  116. package/src/telemetry/span.ts +0 -85
  117. package/src/telemetry/stream.ts +0 -64
  118. package/src/types.ts +0 -223
  119. package/src/utils/env.ts +0 -7
  120. package/src/utils/headers.ts +0 -27
  121. package/src/utils/preset.ts +0 -65
  122. package/src/utils/request.test.ts +0 -75
  123. package/src/utils/request.ts +0 -52
  124. package/src/utils/response.ts +0 -84
  125. package/src/utils/url.ts +0 -26
package/README.md CHANGED
@@ -569,7 +569,7 @@ Accepted request fields:
569
569
 
570
570
  - `prompt_cache_key` + `prompt_cache_retention` (OpenAI style)
571
571
  - `cache_control` (OpenRouter / Vercel / Claude style)
572
- - `cached_content` (Gemini style)
572
+ - `extra_body { google: { cached_content } }` (Gemini style)
573
573
 
574
574
  ```json
575
575
  {
@@ -6,9 +6,9 @@ import { toResponse } from "../../utils/response";
6
6
  import { parseDataUrl } from "../../utils/url";
7
7
  // --- Request Flow ---
8
8
  export function convertToTextCallOptions(params) {
9
- const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, cached_content, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
9
+ const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
10
10
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
11
- Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control));
11
+ Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, extra_body?.google?.cached_content, cache_control));
12
12
  const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
13
13
  return {
14
14
  messages: convertToModelMessages(messages),
@@ -192,7 +192,7 @@ export function fromChatCompletionsContent(content) {
192
192
  return fromFilePart(part.file.data, part.file.media_type, part.file.filename, part.cache_control);
193
193
  case "input_audio":
194
194
  return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`, undefined, part.cache_control);
195
- default: {
195
+ case "text": {
196
196
  const out = {
197
197
  type: "text",
198
198
  text: part.text,
@@ -204,6 +204,8 @@ export function fromChatCompletionsContent(content) {
204
204
  }
205
205
  return out;
206
206
  }
207
+ default:
208
+ throw new Error(`Unhandled content part type: ${part.type}`);
207
209
  }
208
210
  });
209
211
  }
@@ -305,6 +307,7 @@ function parseToolResult(content) {
305
307
  }
306
308
  function parseJsonOrText(content) {
307
309
  try {
310
+ // oxlint-disable-next-line no-unsafe-assignment
308
311
  return { type: "json", value: JSON.parse(content) };
309
312
  }
310
313
  catch {
@@ -414,6 +417,7 @@ export class ChatCompletionsStream extends TransformStream {
414
417
  };
415
418
  super({
416
419
  transform(part, controller) {
420
+ // oxlint-disable-next-line switch-exhaustiveness-check
417
421
  switch (part.type) {
418
422
  case "text-delta": {
419
423
  controller.enqueue(createChunk({ role: "assistant", content: part.text }, part.providerMetadata));
@@ -23,6 +23,7 @@ export const chatCompletions = (config) => {
23
23
  }
24
24
  // Parse + validate input.
25
25
  try {
26
+ // oxlint-disable-next-line no-unsafe-assignment
26
27
  ctx.body = await ctx.request.json();
27
28
  }
28
29
  catch {
@@ -66,6 +67,7 @@ export const chatCompletions = (config) => {
66
67
  const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
67
68
  setSpanAttributes(genAiGeneralAttrs);
68
69
  // Convert inputs to AI SDK call options.
70
+ // oxlint-disable-next-line no-unsafe-argument
69
71
  const textOptions = convertToTextCallOptions(inputs);
70
72
  logger.trace({
71
73
  requestId: ctx.requestId,
@@ -96,7 +96,7 @@ const toMessageParts = (message) => {
96
96
  case "system":
97
97
  return toTextParts(message.content);
98
98
  default:
99
- return [];
99
+ throw new Error(`Unhandled content part type: ${message.role}`);
100
100
  }
101
101
  };
102
102
  export const getChatRequestAttributes = (inputs, signalLevel) => {
@@ -658,7 +658,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
658
658
  in_memory: "in_memory";
659
659
  "24h": "24h";
660
660
  }>>;
661
- cached_content: z.ZodOptional<z.ZodString>;
661
+ extra_body: z.ZodOptional<z.ZodObject<{
662
+ google: z.ZodOptional<z.ZodObject<{
663
+ cached_content: z.ZodOptional<z.ZodString>;
664
+ }, z.core.$strip>>;
665
+ }, z.core.$strip>>;
662
666
  cache_control: z.ZodOptional<z.ZodObject<{
663
667
  type: z.ZodLiteral<"ephemeral">;
664
668
  ttl: z.ZodOptional<z.ZodString>;
@@ -866,7 +870,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
866
870
  in_memory: "in_memory";
867
871
  "24h": "24h";
868
872
  }>>;
869
- cached_content: z.ZodOptional<z.ZodString>;
873
+ extra_body: z.ZodOptional<z.ZodObject<{
874
+ google: z.ZodOptional<z.ZodObject<{
875
+ cached_content: z.ZodOptional<z.ZodString>;
876
+ }, z.core.$strip>>;
877
+ }, z.core.$strip>>;
870
878
  cache_control: z.ZodOptional<z.ZodObject<{
871
879
  type: z.ZodLiteral<"ephemeral">;
872
880
  ttl: z.ZodOptional<z.ZodString>;
@@ -213,7 +213,17 @@ const ChatCompletionsInputsSchema = z.object({
213
213
  prompt_cache_key: z.string().optional(),
214
214
  prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
215
215
  // Extension origin: Gemini explicit cache handle
216
- cached_content: z.string().optional().meta({ extension: true }),
216
+ // FUTURE: generalize extra_body handling
217
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
218
+ extra_body: z
219
+ .object({
220
+ google: z
221
+ .object({
222
+ cached_content: z.string().optional().meta({ extension: true }),
223
+ })
224
+ .optional(),
225
+ })
226
+ .optional(),
217
227
  // Extension origin: OpenRouter/Vercel/Anthropic
218
228
  cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
219
229
  // Extension origin: OpenRouter
@@ -23,6 +23,7 @@ export const embeddings = (config) => {
23
23
  }
24
24
  // Parse + validate input.
25
25
  try {
26
+ // oxlint-disable-next-line no-unsafe-assignment
26
27
  ctx.body = await ctx.request.json();
27
28
  }
28
29
  catch {
@@ -65,6 +66,7 @@ export const embeddings = (config) => {
65
66
  const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
66
67
  setSpanAttributes(genAiGeneralAttrs);
67
68
  // Convert inputs to AI SDK call options.
69
+ // oxlint-disable-next-line no-unsafe-argument
68
70
  const embedOptions = convertToEmbedCallOptions(inputs);
69
71
  logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
70
72
  addSpanEvent("hebo.options.prepared");
@@ -12,13 +12,13 @@ export function toModel(id, catalogModel) {
12
12
  id,
13
13
  object: "model",
14
14
  created: createdTimestamp,
15
- owned_by: id.split("/")[0] || "system",
15
+ owned_by: id.split("/")[0] ?? "system",
16
16
  architecture: {
17
- input_modalities: modalities?.input || [],
17
+ input_modalities: modalities?.input ?? [],
18
18
  modality: modalities?.input &&
19
19
  modalities?.output &&
20
20
  `${modalities.input?.[0]}->${modalities.output?.[0]}`,
21
- output_modalities: modalities?.output || [],
21
+ output_modalities: modalities?.output ?? [],
22
22
  },
23
23
  endpoints: providers?.map((provider) => ({
24
24
  tag: provider,
package/dist/lifecycle.js CHANGED
@@ -6,7 +6,7 @@ import { getBaggageAttributes } from "./telemetry/baggage";
6
6
  import { instrumentFetch } from "./telemetry/fetch";
7
7
  import { recordRequestDuration } from "./telemetry/gen-ai";
8
8
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
9
- import { recordV8jsMemory } from "./telemetry/memory";
9
+ import { observeV8jsMemoryMetrics } from "./telemetry/memory";
10
10
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
11
11
  import { wrapStream } from "./telemetry/stream";
12
12
  import { resolveOrCreateRequestId } from "./utils/request";
@@ -17,6 +17,7 @@ export const winterCgHandler = (run, config) => {
17
17
  setSpanTracer(parsedConfig.telemetry?.tracer);
18
18
  setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
19
19
  instrumentFetch(parsedConfig.telemetry?.signals?.hebo);
20
+ observeV8jsMemoryMetrics(parsedConfig.telemetry?.signals?.hebo);
20
21
  }
21
22
  return async (request, state) => {
22
23
  const start = performance.now();
@@ -57,7 +58,6 @@ export const winterCgHandler = (run, config) => {
57
58
  if (ctx.operation === "chat" || ctx.operation === "embeddings") {
58
59
  recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
59
60
  }
60
- recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
61
61
  span.finish();
62
62
  };
63
63
  try {
@@ -37,8 +37,6 @@ function serializeError(err, _seen) {
37
37
  return out;
38
38
  }
39
39
  const buildLogObject = (level, args) => {
40
- if (args.length === 0)
41
- return {};
42
40
  const [first, second] = args;
43
41
  let obj;
44
42
  let err;
@@ -70,7 +68,9 @@ const buildLogObject = (level, args) => {
70
68
  ...obj,
71
69
  };
72
70
  };
73
- const makeLogFn = (level, write) => (...args) => write(JSON.stringify(buildLogObject(level, args)));
71
+ const makeLogFn = (level, write) => (...args) => {
72
+ write(JSON.stringify(buildLogObject(level, args)));
73
+ };
74
74
  export const createDefaultLogger = (config) => {
75
75
  if (config.level === "silent" || getDefaultLogLevel() === "silent") {
76
76
  return { trace: noop, debug: noop, info: noop, warn: noop, error: noop };
@@ -1,8 +1,5 @@
1
- export type LogFn = {
2
- (msg: string): void;
3
- (obj: Record<string, unknown>, msg?: string): void;
4
- (err: Error, msg?: string): void;
5
- };
1
+ export type LogArgs = [msg: string] | [obj: Record<string, unknown>, msg?: string] | [err: Error, msg?: string];
2
+ export type LogFn = (...args: LogArgs) => void;
6
3
  export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
7
4
  export type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
8
5
  export type LoggerConfig = {
@@ -1,4 +1,5 @@
1
1
  function snakeToCamel(key) {
2
+ // oxlint-disable-next-line prefer-includes
2
3
  if (key.indexOf("_") === -1)
3
4
  return key;
4
5
  let out = "";
@@ -20,8 +20,6 @@ export function calculateReasoningBudgetFromEffort(effort, maxTokens, minTokens
20
20
  case "max":
21
21
  percentage = 0.95;
22
22
  break;
23
- default:
24
- return 0;
25
23
  }
26
24
  return Math.max(minTokens, Math.floor(maxTokens * percentage));
27
25
  }
@@ -17,6 +17,8 @@ export const novaDimensionsMiddleware = {
17
17
  };
18
18
  function mapNovaEffort(effort) {
19
19
  switch (effort) {
20
+ case "none":
21
+ return;
20
22
  case "minimal":
21
23
  case "low":
22
24
  return "low";
@@ -1,4 +1,8 @@
1
1
  export function defineModelCatalog(...inputs) {
2
2
  const catalogs = inputs.flat().map((input) => (typeof input === "function" ? input() : input));
3
- return Object.assign({}, ...catalogs);
3
+ const out = {};
4
+ for (const catalog of catalogs) {
5
+ Object.assign(out, catalog);
6
+ }
7
+ return out;
4
8
  }
@@ -17,14 +17,18 @@ export const openAIDimensionsMiddleware = {
17
17
  };
18
18
  function mapGptOssReasoningEffort(effort) {
19
19
  switch (effort) {
20
+ case undefined:
21
+ case "none":
22
+ return;
23
+ case "minimal":
24
+ case "low":
25
+ return "low";
20
26
  case "medium":
21
27
  return "medium";
22
28
  case "high":
23
29
  case "xhigh":
24
30
  case "max":
25
31
  return "high";
26
- default:
27
- return "low";
28
32
  }
29
33
  }
30
34
  export const openAIReasoningMiddleware = {
@@ -55,7 +55,9 @@ export const withCanonicalIds = (provider, config = {}) => {
55
55
  };
56
56
  const needsFallbackWrap = stripNamespace || normalizeDelimiters || namespaceSeparator !== "/" || !!prefix || !!postfix;
57
57
  // FUTURE: use embeddingModel instead of textEmbeddingModel once voyage supports it
58
+ // oxlint-disable-next-line unbound-method
58
59
  const languageModel = provider.languageModel;
60
+ // oxlint-disable-next-line unbound-method, no-deprecated
59
61
  const embeddingModel = provider.textEmbeddingModel;
60
62
  const fallbackProvider = needsFallbackWrap
61
63
  ? {
@@ -69,6 +71,7 @@ export const withCanonicalIds = (provider, config = {}) => {
69
71
  embeddingModel: (id) => {
70
72
  const mapped = applyFallbackAffixes(normalizeId(id));
71
73
  logger.debug(`[canonical] mapped ${id} to ${mapped}`);
74
+ // oxlint-disable-next-line no-deprecated
72
75
  return embeddingModel(mapped);
73
76
  },
74
77
  }
@@ -16,8 +16,13 @@ const getRequestAttributes = (input, init) => {
16
16
  attrs["url.full"] = input.url;
17
17
  return attrs;
18
18
  };
19
- const shouldTraceFetch = (init) => typeof init?.headers?.["user-agent"] === "string" &&
20
- init.headers["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
19
+ const shouldTraceFetch = (init) => {
20
+ const h = init?.headers;
21
+ if (!h || typeof h !== "object" || Array.isArray(h) || h instanceof Headers)
22
+ return false;
23
+ const ua = h["user-agent"];
24
+ return typeof ua === "string" && ua.includes("ai-sdk/provider-utils");
25
+ };
21
26
  const otelFetch = (input, init) => {
22
27
  const original = g[ORIGINAL_FETCH_KEY];
23
28
  if (!fetchTracingEnabled)
@@ -1,7 +1,10 @@
1
1
  import { metrics } from "@opentelemetry/api";
2
2
  import { STATUS_CODE } from "../errors/utils";
3
- const meter = metrics.getMeter("@hebo/gateway");
4
- const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
3
+ const getMeter = () => metrics.getMeter("@hebo/gateway");
4
+ let requestDurationHistogram;
5
+ let timePerOutputTokenHistogram;
6
+ let tokenUsageHistogram;
7
+ const getRequestDurationHistogram = () => (requestDurationHistogram ??= getMeter().createHistogram("gen_ai.server.request.duration", {
5
8
  description: "End-to-end gateway request duration",
6
9
  unit: "s",
7
10
  advice: {
@@ -9,8 +12,8 @@ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.du
9
12
  0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
10
13
  ],
11
14
  },
12
- });
13
- const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
15
+ }));
16
+ const getTimePerOutputTokenHistogram = () => (timePerOutputTokenHistogram ??= getMeter().createHistogram("gen_ai.server.time_per_output_token", {
14
17
  description: "End-to-end gateway request duration per output token",
15
18
  unit: "s",
16
19
  advice: {
@@ -18,17 +21,17 @@ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_pe
18
21
  0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
19
22
  ],
20
23
  },
21
- });
22
- const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
24
+ }));
25
+ const getTokenUsageHistogram = () => (tokenUsageHistogram ??= getMeter().createHistogram("gen_ai.client.token.usage", {
23
26
  description: "Token usage reported by upstream model responses",
24
27
  unit: "{token}",
25
28
  advice: {
26
29
  explicitBucketBoundaries: [
27
- 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
28
- 524288, 1048576,
30
+ 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
31
+ 262144, 524288, 1048576,
29
32
  ],
30
33
  },
31
- });
34
+ }));
32
35
  export const getGenAiGeneralAttributes = (ctx, signalLevel) => {
33
36
  if (!signalLevel || signalLevel === "off")
34
37
  return {};
@@ -47,7 +50,7 @@ export const recordRequestDuration = (duration, status, ctx, signalLevel) => {
47
50
  if (status !== 200) {
48
51
  attrs["error.type"] = `${status} ${STATUS_CODE(status).toLowerCase()}`;
49
52
  }
50
- requestDurationHistogram.record(duration / 1000, attrs);
53
+ getRequestDurationHistogram().record(duration / 1000, attrs);
51
54
  };
52
55
  // FUTURE: record unsuccessful calls
53
56
  export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalLevel) => {
@@ -56,7 +59,7 @@ export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalL
56
59
  const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
57
60
  if (typeof outputTokens !== "number" || outputTokens <= 0)
58
61
  return;
59
- timePerOutputTokenHistogram.record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
62
+ getTimePerOutputTokenHistogram().record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
60
63
  };
61
64
  // FUTURE: record unsuccessful calls
62
65
  export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
@@ -65,7 +68,7 @@ export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
65
68
  const record = (value, tokenType) => {
66
69
  if (typeof value !== "number")
67
70
  return;
68
- tokenUsageHistogram.record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
71
+ getTokenUsageHistogram().record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
69
72
  };
70
73
  record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
71
74
  record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
@@ -1,2 +1,2 @@
1
1
  import type { TelemetrySignalLevel } from "../types";
2
- export declare const recordV8jsMemory: (level?: TelemetrySignalLevel) => void;
2
+ export declare const observeV8jsMemoryMetrics: (level?: TelemetrySignalLevel) => void;
@@ -1,18 +1,9 @@
1
1
  import { metrics } from "@opentelemetry/api";
2
- const meter = metrics.getMeter("@hebo/gateway");
2
+ const getMeter = () => metrics.getMeter("@hebo/gateway");
3
3
  const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" };
4
- const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
5
- description: "Used bytes in the V8 heap",
6
- unit: "By",
7
- });
8
- const heapSpacePhysicalSizeCounter = meter.createUpDownCounter("v8js.memory.heap.space.physical_size", {
9
- description: "Physical bytes allocated for the V8 heap space",
10
- unit: "By",
11
- });
4
+ let registered = false;
12
5
  const isEnabled = (level) => level === "recommended" || level === "full";
13
- export const recordV8jsMemory = (level) => {
14
- if (!isEnabled(level))
15
- return;
6
+ const observeMemory = (observe) => {
16
7
  let usage;
17
8
  try {
18
9
  usage = globalThis.process?.memoryUsage?.();
@@ -22,6 +13,31 @@ export const recordV8jsMemory = (level) => {
22
13
  }
23
14
  if (!usage)
24
15
  return;
25
- heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
26
- heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
16
+ observe(usage.heapUsed, usage.rss);
17
+ };
18
+ export const observeV8jsMemoryMetrics = (level) => {
19
+ if (!isEnabled(level) || registered)
20
+ return;
21
+ registered = true;
22
+ const meter = getMeter();
23
+ meter
24
+ .createObservableGauge("v8js.memory.heap.used", {
25
+ description: "Used bytes in the V8 heap",
26
+ unit: "By",
27
+ })
28
+ .addCallback((result) => {
29
+ observeMemory((heapUsed) => {
30
+ result.observe(heapUsed, defaultHeapSpaceAttrs);
31
+ });
32
+ });
33
+ meter
34
+ .createObservableGauge("v8js.memory.heap.space.physical_size", {
35
+ description: "Physical bytes allocated for the V8 heap space",
36
+ unit: "By",
37
+ })
38
+ .addCallback((result) => {
39
+ observeMemory((_, rss) => {
40
+ result.observe(rss, defaultHeapSpaceAttrs);
41
+ });
42
+ });
27
43
  };
@@ -34,7 +34,7 @@ export const startSpan = (name, options) => {
34
34
  };
35
35
  export const withSpan = async (name, run, options) => {
36
36
  if (!spanTracer) {
37
- return await run();
37
+ return run();
38
38
  }
39
39
  const started = startSpan(name, options);
40
40
  try {
@@ -1,51 +1,58 @@
1
1
  import { toOpenAIError } from "../errors/openai";
2
- const isErrorChunk = (v) => v instanceof Error || !!v?.error;
2
+ const isErrorChunk = (v) => v instanceof Error || (typeof v === "object" && v !== null && "error" in v);
3
3
  export const wrapStream = (src, hooks) => {
4
4
  let finished = false;
5
- const done = (reader, controller, status, reason) => {
6
- if (!finished) {
7
- finished = true;
8
- hooks.onDone?.(status, reason);
5
+ let reader;
6
+ const done = (controller, status, reason) => {
7
+ if (finished)
8
+ return;
9
+ finished = true;
10
+ hooks.onDone?.(status, reason);
11
+ if (status !== 200) {
12
+ reader?.cancel(reason).catch(() => { });
13
+ }
14
+ try {
15
+ controller.close();
9
16
  }
10
- reader.cancel(reason).catch(() => { });
11
- controller.close();
17
+ catch { }
12
18
  };
13
19
  return new ReadableStream({
14
20
  async start(controller) {
15
- const reader = src.getReader();
21
+ reader = src.getReader();
16
22
  try {
17
23
  for (;;) {
18
- // oxlint-disable-next-line no-await-in-loop
24
+ // oxlint-disable-next-line no-await-in-loop, no-unsafe-assignment
19
25
  const { value, done: eof } = await reader.read();
20
26
  if (eof)
21
27
  break;
22
- const out = isErrorChunk(value) ? toOpenAIError(value) : value;
23
- controller.enqueue(out);
24
- if (out !== value) {
25
- const status = out.error?.type === "invalid_request_error" ? 422 : 502;
26
- done(reader, controller, status, value);
28
+ controller.enqueue(value);
29
+ if (isErrorChunk(value)) {
30
+ done(controller, toOpenAIError(value).error.type === "invalid_request_error" ? 422 : 502, value);
27
31
  return;
28
32
  }
29
33
  }
30
- done(reader, controller, 200);
34
+ done(controller, 200);
31
35
  }
32
36
  catch (err) {
33
- controller.enqueue(toOpenAIError(err));
34
- done(reader, controller, 502, err);
37
+ try {
38
+ controller.enqueue(toOpenAIError(err));
39
+ }
40
+ catch { }
41
+ done(controller, 502, err);
35
42
  }
36
43
  finally {
37
44
  try {
38
- reader.releaseLock();
45
+ reader?.releaseLock();
39
46
  }
40
47
  catch { }
41
48
  }
42
49
  },
43
50
  cancel(reason) {
44
- if (!finished) {
45
- finished = true;
46
- hooks.onDone?.(499, reason);
47
- }
48
- src.cancel(reason).catch(() => { });
51
+ if (finished)
52
+ return;
53
+ finished = true;
54
+ hooks.onDone?.(499, reason);
55
+ reader?.cancel(reason).catch(() => { });
49
56
  },
50
57
  });
51
58
  };
package/dist/utils/env.js CHANGED
@@ -1,5 +1,7 @@
1
1
  const NODE_ENV = typeof process === "undefined"
2
- ? (globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
3
- : process.env?.NODE_ENV;
2
+ ? // oxlint-disable-next-line no-unsafe-member-access
3
+ (globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
4
+ : // oxlint-disable-next-line no-unsafe-assignment
5
+ process.env?.NODE_ENV;
4
6
  export const isProduction = () => NODE_ENV === "production";
5
7
  export const isTest = () => NODE_ENV === "test";
@@ -1,6 +1,7 @@
1
1
  function isPlainObject(v) {
2
2
  if (!v || typeof v !== "object" || Array.isArray(v))
3
3
  return false;
4
+ // oxlint-disable-next-line no-unsafe-assignment
4
5
  const proto = Object.getPrototypeOf(v);
5
6
  return proto === Object.prototype || proto === null;
6
7
  }
@@ -19,7 +19,9 @@ export const mergeResponseInit = (defaultHeaders, responseInit) => {
19
19
  const headers = new Headers(defaultHeaders);
20
20
  const override = responseInit?.headers;
21
21
  if (override) {
22
- new Headers(override).forEach((value, key) => headers.set(key, value));
22
+ new Headers(override).forEach((value, key) => {
23
+ headers.set(key, value);
24
+ });
23
25
  }
24
26
  if (!responseInit)
25
27
  return { headers };