@hebo-ai/gateway 0.6.2-rc0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +3 -3
  2. package/dist/endpoints/chat-completions/converters.js +26 -21
  3. package/dist/endpoints/chat-completions/handler.js +2 -0
  4. package/dist/endpoints/chat-completions/otel.js +1 -1
  5. package/dist/endpoints/chat-completions/schema.d.ts +4 -18
  6. package/dist/endpoints/chat-completions/schema.js +14 -17
  7. package/dist/endpoints/embeddings/handler.js +2 -0
  8. package/dist/endpoints/embeddings/otel.js +5 -0
  9. package/dist/endpoints/embeddings/schema.d.ts +6 -0
  10. package/dist/endpoints/embeddings/schema.js +4 -1
  11. package/dist/endpoints/models/converters.js +3 -3
  12. package/dist/lifecycle.js +2 -2
  13. package/dist/logger/default.js +3 -3
  14. package/dist/logger/index.d.ts +2 -5
  15. package/dist/middleware/common.js +1 -0
  16. package/dist/middleware/utils.js +0 -3
  17. package/dist/models/amazon/middleware.js +8 -5
  18. package/dist/models/anthropic/middleware.js +13 -13
  19. package/dist/models/catalog.js +5 -1
  20. package/dist/models/cohere/middleware.js +7 -5
  21. package/dist/models/google/middleware.d.ts +1 -1
  22. package/dist/models/google/middleware.js +29 -25
  23. package/dist/models/openai/middleware.js +13 -9
  24. package/dist/models/voyage/middleware.js +2 -1
  25. package/dist/providers/bedrock/middleware.js +21 -23
  26. package/dist/providers/registry.js +3 -0
  27. package/dist/telemetry/fetch.js +7 -2
  28. package/dist/telemetry/gen-ai.js +15 -12
  29. package/dist/telemetry/memory.d.ts +1 -1
  30. package/dist/telemetry/memory.js +30 -14
  31. package/dist/telemetry/span.js +1 -1
  32. package/dist/telemetry/stream.js +30 -23
  33. package/dist/utils/env.js +4 -2
  34. package/dist/utils/preset.js +1 -0
  35. package/dist/utils/response.js +3 -1
  36. package/package.json +36 -50
  37. package/src/config.ts +0 -98
  38. package/src/endpoints/chat-completions/converters.test.ts +0 -631
  39. package/src/endpoints/chat-completions/converters.ts +0 -899
  40. package/src/endpoints/chat-completions/handler.test.ts +0 -391
  41. package/src/endpoints/chat-completions/handler.ts +0 -201
  42. package/src/endpoints/chat-completions/index.ts +0 -4
  43. package/src/endpoints/chat-completions/otel.test.ts +0 -315
  44. package/src/endpoints/chat-completions/otel.ts +0 -214
  45. package/src/endpoints/chat-completions/schema.ts +0 -364
  46. package/src/endpoints/embeddings/converters.ts +0 -51
  47. package/src/endpoints/embeddings/handler.test.ts +0 -133
  48. package/src/endpoints/embeddings/handler.ts +0 -137
  49. package/src/endpoints/embeddings/index.ts +0 -4
  50. package/src/endpoints/embeddings/otel.ts +0 -40
  51. package/src/endpoints/embeddings/schema.ts +0 -36
  52. package/src/endpoints/models/converters.ts +0 -56
  53. package/src/endpoints/models/handler.test.ts +0 -122
  54. package/src/endpoints/models/handler.ts +0 -37
  55. package/src/endpoints/models/index.ts +0 -3
  56. package/src/endpoints/models/schema.ts +0 -37
  57. package/src/errors/ai-sdk.ts +0 -99
  58. package/src/errors/gateway.ts +0 -17
  59. package/src/errors/openai.ts +0 -57
  60. package/src/errors/utils.ts +0 -47
  61. package/src/gateway.ts +0 -50
  62. package/src/index.ts +0 -19
  63. package/src/lifecycle.ts +0 -135
  64. package/src/logger/default.ts +0 -105
  65. package/src/logger/index.ts +0 -42
  66. package/src/middleware/common.test.ts +0 -215
  67. package/src/middleware/common.ts +0 -163
  68. package/src/middleware/debug.ts +0 -37
  69. package/src/middleware/matcher.ts +0 -161
  70. package/src/middleware/utils.ts +0 -34
  71. package/src/models/amazon/index.ts +0 -2
  72. package/src/models/amazon/middleware.test.ts +0 -133
  73. package/src/models/amazon/middleware.ts +0 -79
  74. package/src/models/amazon/presets.ts +0 -104
  75. package/src/models/anthropic/index.ts +0 -2
  76. package/src/models/anthropic/middleware.test.ts +0 -643
  77. package/src/models/anthropic/middleware.ts +0 -148
  78. package/src/models/anthropic/presets.ts +0 -191
  79. package/src/models/catalog.ts +0 -13
  80. package/src/models/cohere/index.ts +0 -2
  81. package/src/models/cohere/middleware.test.ts +0 -138
  82. package/src/models/cohere/middleware.ts +0 -76
  83. package/src/models/cohere/presets.ts +0 -186
  84. package/src/models/google/index.ts +0 -2
  85. package/src/models/google/middleware.test.ts +0 -298
  86. package/src/models/google/middleware.ts +0 -137
  87. package/src/models/google/presets.ts +0 -118
  88. package/src/models/meta/index.ts +0 -1
  89. package/src/models/meta/presets.ts +0 -143
  90. package/src/models/openai/index.ts +0 -2
  91. package/src/models/openai/middleware.test.ts +0 -189
  92. package/src/models/openai/middleware.ts +0 -103
  93. package/src/models/openai/presets.ts +0 -280
  94. package/src/models/types.ts +0 -114
  95. package/src/models/voyage/index.ts +0 -2
  96. package/src/models/voyage/middleware.test.ts +0 -28
  97. package/src/models/voyage/middleware.ts +0 -23
  98. package/src/models/voyage/presets.ts +0 -126
  99. package/src/providers/anthropic/canonical.ts +0 -17
  100. package/src/providers/anthropic/index.ts +0 -1
  101. package/src/providers/bedrock/canonical.ts +0 -87
  102. package/src/providers/bedrock/index.ts +0 -2
  103. package/src/providers/bedrock/middleware.test.ts +0 -303
  104. package/src/providers/bedrock/middleware.ts +0 -128
  105. package/src/providers/cohere/canonical.ts +0 -26
  106. package/src/providers/cohere/index.ts +0 -1
  107. package/src/providers/groq/canonical.ts +0 -21
  108. package/src/providers/groq/index.ts +0 -1
  109. package/src/providers/openai/canonical.ts +0 -16
  110. package/src/providers/openai/index.ts +0 -1
  111. package/src/providers/registry.test.ts +0 -44
  112. package/src/providers/registry.ts +0 -165
  113. package/src/providers/types.ts +0 -20
  114. package/src/providers/vertex/canonical.ts +0 -17
  115. package/src/providers/vertex/index.ts +0 -1
  116. package/src/providers/voyage/canonical.ts +0 -16
  117. package/src/providers/voyage/index.ts +0 -1
  118. package/src/telemetry/ai-sdk.ts +0 -46
  119. package/src/telemetry/baggage.ts +0 -27
  120. package/src/telemetry/fetch.ts +0 -62
  121. package/src/telemetry/gen-ai.ts +0 -113
  122. package/src/telemetry/http.ts +0 -62
  123. package/src/telemetry/index.ts +0 -1
  124. package/src/telemetry/memory.ts +0 -36
  125. package/src/telemetry/span.ts +0 -85
  126. package/src/telemetry/stream.ts +0 -64
  127. package/src/types.ts +0 -223
  128. package/src/utils/env.ts +0 -7
  129. package/src/utils/headers.ts +0 -27
  130. package/src/utils/preset.ts +0 -65
  131. package/src/utils/request.test.ts +0 -75
  132. package/src/utils/request.ts +0 -52
  133. package/src/utils/response.ts +0 -84
  134. package/src/utils/url.ts +0 -26
@@ -11,14 +11,15 @@ export const geminiDimensionsMiddleware = {
11
11
  const dimensions = unknown["dimensions"];
12
12
  if (!dimensions)
13
13
  return params;
14
- (params.providerOptions["google"] ??= {})["outputDimensionality"] = dimensions;
14
+ const target = (params.providerOptions["google"] ??= {});
15
+ target.outputDimensionality = dimensions;
15
16
  delete unknown["dimensions"];
16
17
  return params;
17
18
  },
18
19
  };
19
20
  // https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
20
21
  export function mapGeminiReasoningEffort(effort, modelId) {
21
- if (modelId.includes("gemini-3.1-pro")) {
22
+ if (modelId.includes("pro")) {
22
23
  switch (effort) {
23
24
  case "none":
24
25
  case "minimal":
@@ -28,26 +29,22 @@ export function mapGeminiReasoningEffort(effort, modelId) {
28
29
  return "medium";
29
30
  case "high":
30
31
  case "xhigh":
31
- case "max":
32
32
  return "high";
33
33
  }
34
34
  }
35
- if (modelId.includes("gemini-3-flash") || modelId.includes("gemini-3.1-flash")) {
36
- switch (effort) {
37
- case "none":
38
- case "minimal":
39
- return "minimal";
40
- case "low":
41
- return "low";
42
- case "medium":
43
- return "medium";
44
- case "high":
45
- case "xhigh":
46
- case "max":
47
- return "high";
48
- }
35
+ // Flash
36
+ switch (effort) {
37
+ case "none":
38
+ case "minimal":
39
+ return "minimal";
40
+ case "low":
41
+ return "low";
42
+ case "medium":
43
+ return "medium";
44
+ case "high":
45
+ case "xhigh":
46
+ return "high";
49
47
  }
50
- return effort;
51
48
  }
52
49
  export const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
53
50
  export const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
@@ -58,6 +55,9 @@ export const geminiReasoningMiddleware = {
58
55
  const unknown = params.providerOptions?.["unknown"];
59
56
  if (!unknown)
60
57
  return params;
58
+ // If thinking options exist, just pass through
59
+ if (unknown["thinking_config"])
60
+ return params;
61
61
  const reasoning = unknown["reasoning"];
62
62
  if (!reasoning)
63
63
  return params;
@@ -65,19 +65,19 @@ export const geminiReasoningMiddleware = {
65
65
  const modelId = model.modelId;
66
66
  if (modelId.includes("gemini-2")) {
67
67
  const is25Pro = modelId.includes("gemini-2.5-pro");
68
- target["thinkingConfig"] = {
68
+ target.thinkingConfig = {
69
69
  thinkingBudget: reasoning.max_tokens ??
70
70
  calculateReasoningBudgetFromEffort(reasoning.effort ?? "none", params.maxOutputTokens ?? GEMINI_DEFAULT_MAX_OUTPUT_TOKENS, is25Pro ? GEMINI_2_5_PRO_MIN_THINKING_BUDGET : 0),
71
71
  };
72
72
  }
73
73
  else if (modelId.includes("gemini-3") && reasoning.effort) {
74
- target["thinkingConfig"] = {
74
+ target.thinkingConfig = {
75
75
  thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
76
76
  };
77
77
  // FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
78
78
  }
79
- (target["thinkingConfig"] ??= {})["includeThoughts"] =
80
- reasoning.enabled ? !reasoning.exclude : false;
79
+ const thinkingConfig = (target.thinkingConfig ??= {});
80
+ thinkingConfig.includeThoughts = reasoning.enabled ? !reasoning.exclude : false;
81
81
  delete unknown["reasoning"];
82
82
  return params;
83
83
  },
@@ -91,9 +91,13 @@ export const geminiPromptCachingMiddleware = {
91
91
  const unknown = params.providerOptions?.["unknown"];
92
92
  if (!unknown)
93
93
  return params;
94
- const cachedContent = unknown["cached_content"];
95
- if (cachedContent) {
96
- (params.providerOptions["google"] ??= {})["cachedContent"] = cachedContent;
94
+ // If cached_content options exist, just pass through
95
+ if (unknown["cached_content"])
96
+ return params;
97
+ const promptCacheKey = unknown["prompt_cache_key"];
98
+ if (promptCacheKey) {
99
+ (params.providerOptions["google"] ??= {}).cachedContent =
100
+ promptCacheKey;
97
101
  }
98
102
  delete unknown["cached_content"];
99
103
  return params;
@@ -10,21 +10,25 @@ export const openAIDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["openai"] ??= {})["dimensions"] = dimensions;
13
+ const target = (params.providerOptions["openai"] ??= {});
14
+ target.dimensions = dimensions;
14
15
  delete unknown["dimensions"];
15
16
  return params;
16
17
  },
17
18
  };
18
19
  function mapGptOssReasoningEffort(effort) {
19
20
  switch (effort) {
21
+ case undefined:
22
+ case "none":
23
+ return;
24
+ case "minimal":
25
+ case "low":
26
+ return "low";
20
27
  case "medium":
21
28
  return "medium";
22
29
  case "high":
23
30
  case "xhigh":
24
- case "max":
25
31
  return "high";
26
- default:
27
- return "low";
28
32
  }
29
33
  }
30
34
  export const openAIReasoningMiddleware = {
@@ -41,13 +45,13 @@ export const openAIReasoningMiddleware = {
41
45
  const isGptOss = model.modelId.includes("gpt-oss");
42
46
  if (isGptOss) {
43
47
  // FUTURE: warn that unable to disable reasoning for gpt-oss models
44
- target["reasoningEffort"] = mapGptOssReasoningEffort(reasoning.effort);
48
+ target.reasoningEffort = mapGptOssReasoningEffort(reasoning.effort);
45
49
  }
46
50
  else if (reasoning.enabled === false) {
47
- target["reasoningEffort"] = "none";
51
+ target.reasoningEffort = "none";
48
52
  }
49
53
  else if (reasoning.effort) {
50
- target["reasoningEffort"] = reasoning.effort;
54
+ target.reasoningEffort = reasoning.effort;
51
55
  }
52
56
  // FUTURE: warn that reasoning.max_tokens (not supported) was ignored
53
57
  delete unknown["reasoning"];
@@ -67,9 +71,9 @@ export const openAIPromptCachingMiddleware = {
67
71
  if (key || retention) {
68
72
  const target = (params.providerOptions["openai"] ??= {});
69
73
  if (key)
70
- target["promptCacheKey"] = key;
74
+ target.promptCacheKey = key;
71
75
  if (retention)
72
- target["promptCacheRetention"] = retention;
76
+ target.promptCacheRetention = retention;
73
77
  }
74
78
  delete unknown["prompt_cache_key"];
75
79
  delete unknown["prompt_cache_retention"];
@@ -10,7 +10,8 @@ export const voyageDimensionsMiddleware = {
10
10
  const dimensions = unknown["dimensions"];
11
11
  if (!dimensions)
12
12
  return params;
13
- (params.providerOptions["voyage"] ??= {})["outputDimension"] = dimensions;
13
+ const target = (params.providerOptions["voyage"] ??= {});
14
+ target.outputDimension = dimensions;
14
15
  delete unknown["dimensions"];
15
16
  return params;
16
17
  },
@@ -7,14 +7,15 @@ export const bedrockGptReasoningMiddleware = {
7
7
  if (!model.modelId.includes("gpt"))
8
8
  return params;
9
9
  const bedrock = params.providerOptions?.["bedrock"];
10
- if (!bedrock || typeof bedrock !== "object")
10
+ if (!bedrock)
11
11
  return params;
12
- const effort = bedrock["reasoningEffort"];
12
+ const effort = bedrock.reasoningEffort;
13
13
  if (effort === undefined)
14
14
  return params;
15
- const target = (bedrock["reasoningConfig"] ??= {});
16
- target["maxReasoningEffort"] = effort;
17
- delete bedrock["reasoningEffort"];
15
+ const target = (bedrock.reasoningConfig ??= {});
16
+ // @ts-expect-error AI SDK does accept this
17
+ target.maxReasoningEffort = effort;
18
+ delete bedrock.reasoningEffort;
18
19
  return params;
19
20
  },
20
21
  };
@@ -25,28 +26,25 @@ export const bedrockClaudeReasoningMiddleware = {
25
26
  if (!model.modelId.includes("claude"))
26
27
  return params;
27
28
  const bedrock = params.providerOptions?.["bedrock"];
28
- if (!bedrock || typeof bedrock !== "object")
29
+ if (!bedrock)
29
30
  return params;
30
- const thinking = bedrock["thinking"];
31
- const effort = bedrock["effort"];
31
+ const thinking = bedrock.thinking;
32
+ const effort = bedrock.effort;
32
33
  if (!thinking && effort === undefined)
33
34
  return params;
34
- const target = (bedrock["reasoningConfig"] ??= {});
35
+ const target = (bedrock.reasoningConfig ??= {});
35
36
  if (thinking && typeof thinking === "object") {
36
- const thinkingOptions = thinking;
37
- if (thinkingOptions["type"] !== undefined) {
38
- target["type"] = thinkingOptions["type"];
39
- }
40
- if (thinkingOptions["budgetTokens"] !== undefined) {
41
- target["budgetTokens"] = thinkingOptions["budgetTokens"];
37
+ target.type = thinking.type;
38
+ if ("budgetTokens" in thinking && thinking.budgetTokens !== undefined) {
39
+ target.budgetTokens = thinking.budgetTokens;
42
40
  }
43
41
  }
44
42
  // FUTURE: bedrock currently does not support "effort" for other 4.x models
45
43
  if (effort !== undefined && isClaude46(model.modelId)) {
46
- target["maxReasoningEffort"] = effort;
44
+ target.maxReasoningEffort = effort;
47
45
  }
48
- delete bedrock["thinking"];
49
- delete bedrock["effort"];
46
+ delete bedrock.thinking;
47
+ delete bedrock.effort;
50
48
  return params;
51
49
  },
52
50
  };
@@ -79,18 +77,18 @@ export const bedrockPromptCachingMiddleware = {
79
77
  delete entryBedrock["cacheControl"];
80
78
  };
81
79
  for (const message of params.prompt) {
82
- processCacheControl(message["providerOptions"]);
83
- if (!Array.isArray(message["content"]))
80
+ processCacheControl(message.providerOptions);
81
+ if (!Array.isArray(message.content))
84
82
  continue;
85
- for (const part of message["content"]) {
86
- processCacheControl(part["providerOptions"]);
83
+ for (const part of message.content) {
84
+ processCacheControl(part.providerOptions);
87
85
  }
88
86
  lastCacheableBlock = message;
89
87
  }
90
88
  const bedrock = params.providerOptions?.["bedrock"];
91
89
  const cacheControl = bedrock?.["cacheControl"];
92
90
  if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
93
- ((lastCacheableBlock["providerOptions"] ??= {})["bedrock"] ??= {})["cachePoint"] =
91
+ ((lastCacheableBlock.providerOptions ??= {})["bedrock"] ??= {})["cachePoint"] =
94
92
  toBedrockCachePoint(model.modelId, cacheControl);
95
93
  }
96
94
  delete bedrock?.["cacheControl"];
@@ -55,7 +55,9 @@ export const withCanonicalIds = (provider, config = {}) => {
55
55
  };
56
56
  const needsFallbackWrap = stripNamespace || normalizeDelimiters || namespaceSeparator !== "/" || !!prefix || !!postfix;
57
57
  // FUTURE: use embeddingModel instead of textEmbeddingModel once voyage supports it
58
+ // oxlint-disable-next-line unbound-method
58
59
  const languageModel = provider.languageModel;
60
+ // oxlint-disable-next-line unbound-method, no-deprecated
59
61
  const embeddingModel = provider.textEmbeddingModel;
60
62
  const fallbackProvider = needsFallbackWrap
61
63
  ? {
@@ -69,6 +71,7 @@ export const withCanonicalIds = (provider, config = {}) => {
69
71
  embeddingModel: (id) => {
70
72
  const mapped = applyFallbackAffixes(normalizeId(id));
71
73
  logger.debug(`[canonical] mapped ${id} to ${mapped}`);
74
+ // oxlint-disable-next-line no-deprecated
72
75
  return embeddingModel(mapped);
73
76
  },
74
77
  }
@@ -16,8 +16,13 @@ const getRequestAttributes = (input, init) => {
16
16
  attrs["url.full"] = input.url;
17
17
  return attrs;
18
18
  };
19
- const shouldTraceFetch = (init) => typeof init?.headers?.["user-agent"] === "string" &&
20
- init.headers["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
19
+ const shouldTraceFetch = (init) => {
20
+ const h = init?.headers;
21
+ if (!h || typeof h !== "object" || Array.isArray(h) || h instanceof Headers)
22
+ return false;
23
+ const ua = h["user-agent"];
24
+ return typeof ua === "string" && ua.includes("ai-sdk/provider-utils");
25
+ };
21
26
  const otelFetch = (input, init) => {
22
27
  const original = g[ORIGINAL_FETCH_KEY];
23
28
  if (!fetchTracingEnabled)
@@ -1,7 +1,10 @@
1
1
  import { metrics } from "@opentelemetry/api";
2
2
  import { STATUS_CODE } from "../errors/utils";
3
- const meter = metrics.getMeter("@hebo/gateway");
4
- const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
3
+ const getMeter = () => metrics.getMeter("@hebo/gateway");
4
+ let requestDurationHistogram;
5
+ let timePerOutputTokenHistogram;
6
+ let tokenUsageHistogram;
7
+ const getRequestDurationHistogram = () => (requestDurationHistogram ??= getMeter().createHistogram("gen_ai.server.request.duration", {
5
8
  description: "End-to-end gateway request duration",
6
9
  unit: "s",
7
10
  advice: {
@@ -9,8 +12,8 @@ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.du
9
12
  0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
10
13
  ],
11
14
  },
12
- });
13
- const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
15
+ }));
16
+ const getTimePerOutputTokenHistogram = () => (timePerOutputTokenHistogram ??= getMeter().createHistogram("gen_ai.server.time_per_output_token", {
14
17
  description: "End-to-end gateway request duration per output token",
15
18
  unit: "s",
16
19
  advice: {
@@ -18,17 +21,17 @@ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_pe
18
21
  0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
19
22
  ],
20
23
  },
21
- });
22
- const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
24
+ }));
25
+ const getTokenUsageHistogram = () => (tokenUsageHistogram ??= getMeter().createHistogram("gen_ai.client.token.usage", {
23
26
  description: "Token usage reported by upstream model responses",
24
27
  unit: "{token}",
25
28
  advice: {
26
29
  explicitBucketBoundaries: [
27
- 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
28
- 524288, 1048576,
30
+ 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
31
+ 262144, 524288, 1048576,
29
32
  ],
30
33
  },
31
- });
34
+ }));
32
35
  export const getGenAiGeneralAttributes = (ctx, signalLevel) => {
33
36
  if (!signalLevel || signalLevel === "off")
34
37
  return {};
@@ -47,7 +50,7 @@ export const recordRequestDuration = (duration, status, ctx, signalLevel) => {
47
50
  if (status !== 200) {
48
51
  attrs["error.type"] = `${status} ${STATUS_CODE(status).toLowerCase()}`;
49
52
  }
50
- requestDurationHistogram.record(duration / 1000, attrs);
53
+ getRequestDurationHistogram().record(duration / 1000, attrs);
51
54
  };
52
55
  // FUTURE: record unsuccessful calls
53
56
  export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalLevel) => {
@@ -56,7 +59,7 @@ export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalL
56
59
  const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
57
60
  if (typeof outputTokens !== "number" || outputTokens <= 0)
58
61
  return;
59
- timePerOutputTokenHistogram.record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
62
+ getTimePerOutputTokenHistogram().record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
60
63
  };
61
64
  // FUTURE: record unsuccessful calls
62
65
  export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
@@ -65,7 +68,7 @@ export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
65
68
  const record = (value, tokenType) => {
66
69
  if (typeof value !== "number")
67
70
  return;
68
- tokenUsageHistogram.record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
71
+ getTokenUsageHistogram().record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
69
72
  };
70
73
  record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
71
74
  record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
@@ -1,2 +1,2 @@
1
1
  import type { TelemetrySignalLevel } from "../types";
2
- export declare const recordV8jsMemory: (level?: TelemetrySignalLevel) => void;
2
+ export declare const observeV8jsMemoryMetrics: (level?: TelemetrySignalLevel) => void;
@@ -1,18 +1,9 @@
1
1
  import { metrics } from "@opentelemetry/api";
2
- const meter = metrics.getMeter("@hebo/gateway");
2
+ const getMeter = () => metrics.getMeter("@hebo/gateway");
3
3
  const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" };
4
- const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
5
- description: "Used bytes in the V8 heap",
6
- unit: "By",
7
- });
8
- const heapSpacePhysicalSizeCounter = meter.createUpDownCounter("v8js.memory.heap.space.physical_size", {
9
- description: "Physical bytes allocated for the V8 heap space",
10
- unit: "By",
11
- });
4
+ let registered = false;
12
5
  const isEnabled = (level) => level === "recommended" || level === "full";
13
- export const recordV8jsMemory = (level) => {
14
- if (!isEnabled(level))
15
- return;
6
+ const observeMemory = (observe) => {
16
7
  let usage;
17
8
  try {
18
9
  usage = globalThis.process?.memoryUsage?.();
@@ -22,6 +13,31 @@ export const recordV8jsMemory = (level) => {
22
13
  }
23
14
  if (!usage)
24
15
  return;
25
- heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
26
- heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
16
+ observe(usage.heapUsed, usage.rss);
17
+ };
18
+ export const observeV8jsMemoryMetrics = (level) => {
19
+ if (!isEnabled(level) || registered)
20
+ return;
21
+ registered = true;
22
+ const meter = getMeter();
23
+ meter
24
+ .createObservableGauge("v8js.memory.heap.used", {
25
+ description: "Used bytes in the V8 heap",
26
+ unit: "By",
27
+ })
28
+ .addCallback((result) => {
29
+ observeMemory((heapUsed) => {
30
+ result.observe(heapUsed, defaultHeapSpaceAttrs);
31
+ });
32
+ });
33
+ meter
34
+ .createObservableGauge("v8js.memory.heap.space.physical_size", {
35
+ description: "Physical bytes allocated for the V8 heap space",
36
+ unit: "By",
37
+ })
38
+ .addCallback((result) => {
39
+ observeMemory((_, rss) => {
40
+ result.observe(rss, defaultHeapSpaceAttrs);
41
+ });
42
+ });
27
43
  };
@@ -34,7 +34,7 @@ export const startSpan = (name, options) => {
34
34
  };
35
35
  export const withSpan = async (name, run, options) => {
36
36
  if (!spanTracer) {
37
- return await run();
37
+ return run();
38
38
  }
39
39
  const started = startSpan(name, options);
40
40
  try {
@@ -1,51 +1,58 @@
1
1
  import { toOpenAIError } from "../errors/openai";
2
- const isErrorChunk = (v) => v instanceof Error || !!v?.error;
2
+ const isErrorChunk = (v) => v instanceof Error || (typeof v === "object" && v !== null && "error" in v);
3
3
  export const wrapStream = (src, hooks) => {
4
4
  let finished = false;
5
- const done = (reader, controller, status, reason) => {
6
- if (!finished) {
7
- finished = true;
8
- hooks.onDone?.(status, reason);
5
+ let reader;
6
+ const done = (controller, status, reason) => {
7
+ if (finished)
8
+ return;
9
+ finished = true;
10
+ hooks.onDone?.(status, reason);
11
+ if (status !== 200) {
12
+ reader?.cancel(reason).catch(() => { });
13
+ }
14
+ try {
15
+ controller.close();
9
16
  }
10
- reader.cancel(reason).catch(() => { });
11
- controller.close();
17
+ catch { }
12
18
  };
13
19
  return new ReadableStream({
14
20
  async start(controller) {
15
- const reader = src.getReader();
21
+ reader = src.getReader();
16
22
  try {
17
23
  for (;;) {
18
- // oxlint-disable-next-line no-await-in-loop
24
+ // oxlint-disable-next-line no-await-in-loop, no-unsafe-assignment
19
25
  const { value, done: eof } = await reader.read();
20
26
  if (eof)
21
27
  break;
22
- const out = isErrorChunk(value) ? toOpenAIError(value) : value;
23
- controller.enqueue(out);
24
- if (out !== value) {
25
- const status = out.error?.type === "invalid_request_error" ? 422 : 502;
26
- done(reader, controller, status, value);
28
+ controller.enqueue(value);
29
+ if (isErrorChunk(value)) {
30
+ done(controller, toOpenAIError(value).error.type === "invalid_request_error" ? 422 : 502, value);
27
31
  return;
28
32
  }
29
33
  }
30
- done(reader, controller, 200);
34
+ done(controller, 200);
31
35
  }
32
36
  catch (err) {
33
- controller.enqueue(toOpenAIError(err));
34
- done(reader, controller, 502, err);
37
+ try {
38
+ controller.enqueue(toOpenAIError(err));
39
+ }
40
+ catch { }
41
+ done(controller, 502, err);
35
42
  }
36
43
  finally {
37
44
  try {
38
- reader.releaseLock();
45
+ reader?.releaseLock();
39
46
  }
40
47
  catch { }
41
48
  }
42
49
  },
43
50
  cancel(reason) {
44
- if (!finished) {
45
- finished = true;
46
- hooks.onDone?.(499, reason);
47
- }
48
- src.cancel(reason).catch(() => { });
51
+ if (finished)
52
+ return;
53
+ finished = true;
54
+ hooks.onDone?.(499, reason);
55
+ reader?.cancel(reason).catch(() => { });
49
56
  },
50
57
  });
51
58
  };
package/dist/utils/env.js CHANGED
@@ -1,5 +1,7 @@
1
1
  const NODE_ENV = typeof process === "undefined"
2
- ? (globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
3
- : process.env?.NODE_ENV;
2
+ ? // oxlint-disable-next-line no-unsafe-member-access
3
+ (globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
4
+ : // oxlint-disable-next-line no-unsafe-assignment
5
+ process.env?.NODE_ENV;
4
6
  export const isProduction = () => NODE_ENV === "production";
5
7
  export const isTest = () => NODE_ENV === "test";
@@ -1,6 +1,7 @@
1
1
  function isPlainObject(v) {
2
2
  if (!v || typeof v !== "object" || Array.isArray(v))
3
3
  return false;
4
+ // oxlint-disable-next-line no-unsafe-assignment
4
5
  const proto = Object.getPrototypeOf(v);
5
6
  return proto === Object.prototype || proto === null;
6
7
  }
@@ -19,7 +19,9 @@ export const mergeResponseInit = (defaultHeaders, responseInit) => {
19
19
  const headers = new Headers(defaultHeaders);
20
20
  const override = responseInit?.headers;
21
21
  if (override) {
22
- new Headers(override).forEach((value, key) => headers.set(key, value));
22
+ new Headers(override).forEach((value, key) => {
23
+ headers.set(key, value);
24
+ });
23
25
  }
24
26
  if (!responseInit)
25
27
  return { headers };