autotel 2.26.1 → 2.26.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autotel",
3
- "version": "2.26.1",
3
+ "version": "2.26.3",
4
4
  "description": "Write Once, Observe Anywhere",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -250,30 +250,30 @@
250
250
  "license": "MIT",
251
251
  "dependencies": {
252
252
  "@opentelemetry/api": "^1.9.1",
253
- "@opentelemetry/api-logs": "^0.214.0",
254
- "@opentelemetry/exporter-logs-otlp-http": "^0.214.0",
255
- "@opentelemetry/exporter-metrics-otlp-http": "^0.214.0",
256
- "@opentelemetry/exporter-trace-otlp-http": "^0.214.0",
257
- "@opentelemetry/instrumentation": "^0.214.0",
258
- "@opentelemetry/resources": "^2.6.1",
259
- "@opentelemetry/sdk-logs": "^0.214.0",
260
- "@opentelemetry/sdk-metrics": "^2.6.1",
261
- "@opentelemetry/sdk-node": "^0.214.0",
262
- "@opentelemetry/sdk-trace-base": "^2.6.1",
253
+ "@opentelemetry/api-logs": "^0.215.0",
254
+ "@opentelemetry/exporter-logs-otlp-http": "^0.215.0",
255
+ "@opentelemetry/exporter-metrics-otlp-http": "^0.215.0",
256
+ "@opentelemetry/exporter-trace-otlp-http": "^0.215.0",
257
+ "@opentelemetry/instrumentation": "^0.215.0",
258
+ "@opentelemetry/resources": "^2.7.0",
259
+ "@opentelemetry/sdk-logs": "^0.215.0",
260
+ "@opentelemetry/sdk-metrics": "^2.7.0",
261
+ "@opentelemetry/sdk-node": "^0.215.0",
262
+ "@opentelemetry/sdk-trace-base": "^2.7.0",
263
263
  "@opentelemetry/semantic-conventions": "^1.40.0",
264
264
  "import-in-the-middle": "^3.0.1",
265
- "@tanstack/intent": "^0.0.29"
265
+ "@tanstack/intent": "^0.0.36"
266
266
  },
267
267
  "peerDependencies": {
268
- "@opentelemetry/auto-instrumentations-node": "^0.72.0",
269
- "@opentelemetry/exporter-logs-otlp-grpc": "^0.214.0",
270
- "@opentelemetry/exporter-metrics-otlp-grpc": "^0.214.0",
271
- "@opentelemetry/exporter-trace-otlp-grpc": "^0.214.0",
272
- "@opentelemetry/resource-detector-aws": "^2.14.0",
273
- "@opentelemetry/resource-detector-container": "^0.8.5",
274
- "@opentelemetry/resource-detector-gcp": "^0.49.0",
275
- "@opentelemetry/sdk-trace-node": "^2.6.1",
276
- "@traceloop/node-server-sdk": "^0.24.0",
268
+ "@opentelemetry/auto-instrumentations-node": "^0.73.0",
269
+ "@opentelemetry/exporter-logs-otlp-grpc": "^0.215.0",
270
+ "@opentelemetry/exporter-metrics-otlp-grpc": "^0.215.0",
271
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
272
+ "@opentelemetry/resource-detector-aws": "^2.15.0",
273
+ "@opentelemetry/resource-detector-container": "^0.8.6",
274
+ "@opentelemetry/resource-detector-gcp": "^0.50.0",
275
+ "@opentelemetry/sdk-trace-node": "^2.7.0",
276
+ "@traceloop/node-server-sdk": "^0.26.0",
277
277
  "pino": "^10.3.1",
278
278
  "pino-pretty": "^13.1.3",
279
279
  "yaml": "^2.8.3"
@@ -319,34 +319,34 @@
319
319
  "devDependencies": {
320
320
  "@arethetypeswrong/cli": "^0.18.2",
321
321
  "@edge-runtime/vm": "^5.0.0",
322
- "@opentelemetry/auto-instrumentations-node": "^0.72.0",
323
- "@opentelemetry/context-async-hooks": "^2.6.1",
324
- "@opentelemetry/exporter-logs-otlp-grpc": "^0.214.0",
325
- "@opentelemetry/exporter-metrics-otlp-grpc": "^0.214.0",
326
- "@opentelemetry/exporter-trace-otlp-grpc": "^0.214.0",
327
- "@opentelemetry/resource-detector-aws": "^2.14.0",
328
- "@opentelemetry/resource-detector-container": "^0.8.5",
329
- "@opentelemetry/resource-detector-gcp": "^0.49.0",
330
- "@opentelemetry/sdk-trace-node": "^2.6.1",
331
- "@swc/core": "^1.15.24",
322
+ "@opentelemetry/auto-instrumentations-node": "^0.73.0",
323
+ "@opentelemetry/context-async-hooks": "^2.7.0",
324
+ "@opentelemetry/exporter-logs-otlp-grpc": "^0.215.0",
325
+ "@opentelemetry/exporter-metrics-otlp-grpc": "^0.215.0",
326
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
327
+ "@opentelemetry/resource-detector-aws": "^2.15.0",
328
+ "@opentelemetry/resource-detector-container": "^0.8.6",
329
+ "@opentelemetry/resource-detector-gcp": "^0.50.0",
330
+ "@opentelemetry/sdk-trace-node": "^2.7.0",
331
+ "@swc/core": "^1.15.30",
332
332
  "@total-typescript/ts-reset": "^0.6.1",
333
333
  "@total-typescript/tsconfig": "^1.0.4",
334
334
  "@types/eslint-config-prettier": "^6.11.3",
335
- "@types/node": "^25.5.2",
336
- "@typescript-eslint/eslint-plugin": "^8.58.1",
337
- "@typescript-eslint/parser": "^8.58.1",
335
+ "@types/node": "^25.6.0",
336
+ "@typescript-eslint/eslint-plugin": "^8.59.0",
337
+ "@typescript-eslint/parser": "^8.59.0",
338
338
  "eslint-config-prettier": "^10.1.8",
339
339
  "eslint-plugin-unicorn": "^64.0.0",
340
340
  "pino": "^10.3.1",
341
- "prettier": "^3.8.1",
341
+ "prettier": "^3.8.3",
342
342
  "rimraf": "^6.1.3",
343
343
  "tsup": "^8.5.1",
344
344
  "tsx": "^4.21.0",
345
- "typescript": "^6.0.2",
346
- "typescript-eslint": "^8.58.1",
345
+ "typescript": "^6.0.3",
346
+ "typescript-eslint": "^8.59.0",
347
347
  "unplugin-swc": "^1.5.9",
348
348
  "vite-tsconfig-paths": "^6.1.1",
349
- "vitest": "^4.1.3",
349
+ "vitest": "^4.1.5",
350
350
  "vitest-mock-extended": "^4.0.0",
351
351
  "winston": "^3.19.0",
352
352
  "yaml": "^2.8.3"
@@ -0,0 +1,135 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import type { TraceContext } from './trace-context';
3
+ import {
4
+ recordPromptSent,
5
+ recordResponseReceived,
6
+ recordRetry,
7
+ recordStreamFirstToken,
8
+ recordToolCall,
9
+ } from './gen-ai-events';
10
+
11
+ type CapturedEvent = { name: string; attrs?: Record<string, unknown> };
12
+
13
+ function captureCtx(): {
14
+ ctx: TraceContext;
15
+ events: CapturedEvent[];
16
+ } {
17
+ const events: CapturedEvent[] = [];
18
+ const ctx = {
19
+ addEvent: (name: string, attrs?: Record<string, unknown>) => {
20
+ events.push({ name, attrs });
21
+ },
22
+ setAttribute: () => {},
23
+ setAttributes: () => {},
24
+ setStatus: () => {},
25
+ recordException: () => {},
26
+ addLink: () => {},
27
+ addLinks: () => {},
28
+ updateName: () => {},
29
+ isRecording: () => true,
30
+ end: () => {},
31
+ } as unknown as TraceContext;
32
+ return { ctx, events };
33
+ }
34
+
35
+ describe('GenAI span event helpers', () => {
36
+ it('recordPromptSent emits gen_ai.prompt.sent with canonical attrs', () => {
37
+ const { ctx, events } = captureCtx();
38
+ recordPromptSent(ctx, {
39
+ model: 'gpt-4o',
40
+ promptTokens: 1200,
41
+ messageCount: 3,
42
+ operation: 'chat',
43
+ });
44
+ expect(events).toHaveLength(1);
45
+ expect(events[0]).toEqual({
46
+ name: 'gen_ai.prompt.sent',
47
+ attrs: {
48
+ 'gen_ai.request.model': 'gpt-4o',
49
+ 'gen_ai.usage.input_tokens': 1200,
50
+ 'gen_ai.request.message_count': 3,
51
+ 'gen_ai.operation.name': 'chat',
52
+ },
53
+ });
54
+ });
55
+
56
+ it('recordPromptSent omits unset fields rather than writing undefined', () => {
57
+ const { ctx, events } = captureCtx();
58
+ recordPromptSent(ctx);
59
+ expect(events[0]?.attrs).toEqual({});
60
+ });
61
+
62
+ it('recordResponseReceived joins finish reasons into a CSV for attribute compat', () => {
63
+ const { ctx, events } = captureCtx();
64
+ recordResponseReceived(ctx, {
65
+ model: 'gpt-4o-2024-11-20',
66
+ promptTokens: 1200,
67
+ completionTokens: 400,
68
+ totalTokens: 1600,
69
+ finishReasons: ['stop', 'tool_calls'],
70
+ });
71
+ expect(events[0]).toEqual({
72
+ name: 'gen_ai.response.received',
73
+ attrs: {
74
+ 'gen_ai.response.model': 'gpt-4o-2024-11-20',
75
+ 'gen_ai.usage.input_tokens': 1200,
76
+ 'gen_ai.usage.output_tokens': 400,
77
+ 'gen_ai.usage.total_tokens': 1600,
78
+ 'gen_ai.response.finish_reasons': 'stop,tool_calls',
79
+ },
80
+ });
81
+ });
82
+
83
+ it('recordResponseReceived omits finish_reasons when empty', () => {
84
+ const { ctx, events } = captureCtx();
85
+ recordResponseReceived(ctx, { model: 'claude-sonnet-4-6' });
86
+ expect(events[0]?.attrs).not.toHaveProperty(
87
+ 'gen_ai.response.finish_reasons',
88
+ );
89
+ });
90
+
91
+ it('recordRetry captures attempt, reason, delay, and status code', () => {
92
+ const { ctx, events } = captureCtx();
93
+ recordRetry(ctx, {
94
+ attempt: 2,
95
+ reason: 'rate_limit',
96
+ delayMs: 1000,
97
+ statusCode: 429,
98
+ });
99
+ expect(events[0]).toEqual({
100
+ name: 'gen_ai.retry',
101
+ attrs: {
102
+ 'retry.attempt': 2,
103
+ 'retry.reason': 'rate_limit',
104
+ 'retry.delay_ms': 1000,
105
+ 'http.response.status_code': 429,
106
+ },
107
+ });
108
+ });
109
+
110
+ it('recordToolCall writes canonical gen_ai.tool.* keys', () => {
111
+ const { ctx, events } = captureCtx();
112
+ recordToolCall(ctx, {
113
+ toolName: 'search_traces',
114
+ toolCallId: 'call-123',
115
+ arguments: '{"serviceName":"api"}',
116
+ });
117
+ expect(events[0]).toEqual({
118
+ name: 'gen_ai.tool.call',
119
+ attrs: {
120
+ 'gen_ai.tool.name': 'search_traces',
121
+ 'gen_ai.tool.call.id': 'call-123',
122
+ 'gen_ai.tool.arguments': '{"serviceName":"api"}',
123
+ },
124
+ });
125
+ });
126
+
127
+ it('recordStreamFirstToken is the bare marker for TTFT', () => {
128
+ const { ctx, events } = captureCtx();
129
+ recordStreamFirstToken(ctx, { tokensSoFar: 1 });
130
+ expect(events[0]).toEqual({
131
+ name: 'gen_ai.stream.first_token',
132
+ attrs: { 'gen_ai.stream.tokens_so_far': 1 },
133
+ });
134
+ });
135
+ });
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Span event helpers for LLM lifecycle, aligned with the OpenTelemetry
3
+ * GenAI semantic conventions.
4
+ *
5
+ * Span events are timestamped points within a span — they render as dots
6
+ * on the trace timeline in Jaeger / Tempo / Langfuse / Arize. Use them
7
+ * to mark lifecycle moments the span attributes alone can't express:
8
+ *
9
+ * - When the prompt was sent (vs. when the first token arrived)
10
+ * - When each retry attempt started, and why
11
+ * - When a streaming response produced its first token (TTFT)
12
+ * - When a tool was invoked
13
+ *
14
+ * Every helper pins the event name + attribute keys to the published
15
+ * spec so downstream tooling (autotel-mcp, Langfuse, vendor UIs) can
16
+ * render them consistently.
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * import { trace, recordPromptSent, recordResponseReceived, recordRetry } from 'autotel';
21
+ *
22
+ * export const chat = trace('chat', ctx => async (prompt: string) => {
23
+ * recordPromptSent(ctx, { model: 'gpt-4o', messageCount: 1 });
24
+ *
25
+ * for (let attempt = 1; attempt <= 3; attempt++) {
26
+ * try {
27
+ * const res = await openai.chat.completions.create({...});
28
+ * recordResponseReceived(ctx, {
29
+ * model: res.model,
30
+ * promptTokens: res.usage?.prompt_tokens,
31
+ * completionTokens: res.usage?.completion_tokens,
32
+ * finishReasons: res.choices.map(c => c.finish_reason),
33
+ * });
34
+ * return res;
35
+ * } catch (err) {
36
+ * recordRetry(ctx, { attempt, reason: 'rate_limit', delayMs: 500 });
37
+ * await sleep(500 * attempt);
38
+ * }
39
+ * }
40
+ * });
41
+ * ```
42
+ */
43
+
44
+ import type { TraceContext } from './trace-context';
45
+
46
+ type EventAttrs = Record<string, string | number | boolean>;
47
+
48
+ /** Attributes expected on a `gen_ai.prompt.sent` event. */
49
+ export interface PromptSentEvent {
50
+ /** Model the caller intends to invoke (may differ from response model). */
51
+ model?: string;
52
+ /** Estimated input token count, when known before the call. */
53
+ promptTokens?: number;
54
+ /** Number of messages in a chat request (system + user + assistant). */
55
+ messageCount?: number;
56
+ /** Free-form operation kind — `chat` / `completion` / `embedding`. */
57
+ operation?: string;
58
+ }
59
+
60
+ /** Attributes expected on a `gen_ai.response.received` event. */
61
+ export interface ResponseReceivedEvent {
62
+ /** Model the provider actually served (may be more specific than requested). */
63
+ model?: string;
64
+ promptTokens?: number;
65
+ completionTokens?: number;
66
+ totalTokens?: number;
67
+ /** `stop`, `length`, `content_filter`, `tool_calls`, etc. */
68
+ finishReasons?: string[];
69
+ }
70
+
71
+ /** Attributes expected on a `gen_ai.retry` event. */
72
+ export interface RetryEvent {
73
+ attempt: number;
74
+ /** `rate_limit` | `timeout` | `provider_error` | custom label. */
75
+ reason?: string;
76
+ /** How long we'll wait before the next attempt. */
77
+ delayMs?: number;
78
+ /** HTTP status that triggered the retry, when applicable. */
79
+ statusCode?: number;
80
+ }
81
+
82
+ /** Attributes expected on a `gen_ai.tool.call` event. */
83
+ export interface ToolCallEvent {
84
+ toolName: string;
85
+ /** Call identifier so responses can be correlated back to calls. */
86
+ toolCallId?: string;
87
+ /** Pre-serialised tool arguments; omit if sensitive. */
88
+ arguments?: string;
89
+ }
90
+
91
+ /** Attributes expected on a `gen_ai.stream.first_token` event. */
92
+ export interface StreamFirstTokenEvent {
93
+ /** Tokens streamed so far, if the caller tracks that. */
94
+ tokensSoFar?: number;
95
+ }
96
+
97
+ /**
98
+ * Record that a prompt was dispatched to the provider. Typically called
99
+ * before `await provider.chat.completions.create(...)`.
100
+ */
101
+ export function recordPromptSent(
102
+ ctx: TraceContext,
103
+ event: PromptSentEvent = {},
104
+ ): void {
105
+ ctx.addEvent('gen_ai.prompt.sent', buildPromptSentAttrs(event));
106
+ }
107
+
108
+ /**
109
+ * Record a successful provider response. Call after the response arrives
110
+ * (for non-streaming) or after the stream completes.
111
+ */
112
+ export function recordResponseReceived(
113
+ ctx: TraceContext,
114
+ event: ResponseReceivedEvent = {},
115
+ ): void {
116
+ ctx.addEvent('gen_ai.response.received', buildResponseAttrs(event));
117
+ }
118
+
119
+ /**
120
+ * Record a retry attempt on an LLM call. Call *before* sleeping for
121
+ * `delayMs` so the event timestamp accurately marks when the retry
122
+ * decision was made.
123
+ */
124
+ export function recordRetry(ctx: TraceContext, event: RetryEvent): void {
125
+ ctx.addEvent('gen_ai.retry', buildRetryAttrs(event));
126
+ }
127
+
128
+ /**
129
+ * Record a tool / function call made in the course of an agent step.
130
+ * Emits an event rather than a child span because many frameworks fire
131
+ * several tool calls within a single provider response.
132
+ */
133
+ export function recordToolCall(ctx: TraceContext, event: ToolCallEvent): void {
134
+ ctx.addEvent('gen_ai.tool.call', buildToolCallAttrs(event));
135
+ }
136
+
137
+ /**
138
+ * Record the time-to-first-token for a streaming response. Pair with
139
+ * `recordResponseReceived` at the end so the span carries both the TTFT
140
+ * marker and the final usage numbers.
141
+ */
142
+ export function recordStreamFirstToken(
143
+ ctx: TraceContext,
144
+ event: StreamFirstTokenEvent = {},
145
+ ): void {
146
+ ctx.addEvent('gen_ai.stream.first_token', buildStreamFirstTokenAttrs(event));
147
+ }
148
+
149
+ // ---- Attribute builders -------------------------------------------------
150
+
151
+ function buildPromptSentAttrs(event: PromptSentEvent): EventAttrs {
152
+ const attrs: EventAttrs = {};
153
+ if (event.model) attrs['gen_ai.request.model'] = event.model;
154
+ if (event.promptTokens !== undefined)
155
+ attrs['gen_ai.usage.input_tokens'] = event.promptTokens;
156
+ if (event.messageCount !== undefined)
157
+ attrs['gen_ai.request.message_count'] = event.messageCount;
158
+ if (event.operation) attrs['gen_ai.operation.name'] = event.operation;
159
+ return attrs;
160
+ }
161
+
162
+ function buildResponseAttrs(event: ResponseReceivedEvent): EventAttrs {
163
+ const attrs: EventAttrs = {};
164
+ if (event.model) attrs['gen_ai.response.model'] = event.model;
165
+ if (event.promptTokens !== undefined)
166
+ attrs['gen_ai.usage.input_tokens'] = event.promptTokens;
167
+ if (event.completionTokens !== undefined)
168
+ attrs['gen_ai.usage.output_tokens'] = event.completionTokens;
169
+ if (event.totalTokens !== undefined)
170
+ attrs['gen_ai.usage.total_tokens'] = event.totalTokens;
171
+ if (event.finishReasons && event.finishReasons.length > 0) {
172
+ // Arrays aren't primitive AttributeValues on this context, so join.
173
+ attrs['gen_ai.response.finish_reasons'] = event.finishReasons.join(',');
174
+ }
175
+ return attrs;
176
+ }
177
+
178
+ function buildRetryAttrs(event: RetryEvent): EventAttrs {
179
+ const attrs: EventAttrs = { 'retry.attempt': event.attempt };
180
+ if (event.reason) attrs['retry.reason'] = event.reason;
181
+ if (event.delayMs !== undefined) attrs['retry.delay_ms'] = event.delayMs;
182
+ if (event.statusCode !== undefined)
183
+ attrs['http.response.status_code'] = event.statusCode;
184
+ return attrs;
185
+ }
186
+
187
+ function buildToolCallAttrs(event: ToolCallEvent): EventAttrs {
188
+ const attrs: EventAttrs = { 'gen_ai.tool.name': event.toolName };
189
+ if (event.toolCallId) attrs['gen_ai.tool.call.id'] = event.toolCallId;
190
+ if (event.arguments) attrs['gen_ai.tool.arguments'] = event.arguments;
191
+ return attrs;
192
+ }
193
+
194
+ function buildStreamFirstTokenAttrs(event: StreamFirstTokenEvent): EventAttrs {
195
+ const attrs: EventAttrs = {};
196
+ if (event.tokensSoFar !== undefined)
197
+ attrs['gen_ai.stream.tokens_so_far'] = event.tokensSoFar;
198
+ return attrs;
199
+ }
@@ -0,0 +1,96 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { AggregationType } from '@opentelemetry/sdk-metrics';
3
+ import {
4
+ GEN_AI_COST_USD_BUCKETS,
5
+ GEN_AI_DURATION_BUCKETS_SECONDS,
6
+ GEN_AI_TOKEN_USAGE_BUCKETS,
7
+ genAiMetricViews,
8
+ llmHistogramAdvice,
9
+ } from './gen-ai-metrics';
10
+
11
+ describe('gen-ai-metrics', () => {
12
+ it('bucket arrays are strictly ascending (required by Prometheus + OTel)', () => {
13
+ for (const buckets of [
14
+ GEN_AI_DURATION_BUCKETS_SECONDS,
15
+ GEN_AI_TOKEN_USAGE_BUCKETS,
16
+ GEN_AI_COST_USD_BUCKETS,
17
+ ]) {
18
+ for (let i = 1; i < buckets.length; i++) {
19
+ expect(
20
+ buckets[i]! > buckets[i - 1]!,
21
+ `index ${i} not ascending: ${buckets[i - 1]} → ${buckets[i]}`,
22
+ ).toBe(true);
23
+ }
24
+ }
25
+ });
26
+
27
+ it('duration buckets cover tail through 5 minutes for reasoning models', () => {
28
+ expect(GEN_AI_DURATION_BUCKETS_SECONDS[0]).toBeLessThanOrEqual(0.05);
29
+ expect(
30
+ GEN_AI_DURATION_BUCKETS_SECONDS[
31
+ GEN_AI_DURATION_BUCKETS_SECONDS.length - 1
32
+ ],
33
+ ).toBeGreaterThanOrEqual(300);
34
+ });
35
+
36
+ it('token buckets cover up to a million-token context window', () => {
37
+ expect(
38
+ GEN_AI_TOKEN_USAGE_BUCKETS[GEN_AI_TOKEN_USAGE_BUCKETS.length - 1],
39
+ ).toBeGreaterThanOrEqual(1_000_000);
40
+ });
41
+
42
+ it('cost buckets resolve sub-cent spend', () => {
43
+ expect(GEN_AI_COST_USD_BUCKETS[0]).toBeLessThan(0.001);
44
+ });
45
+
46
+ it('bucket arrays are frozen — consumers cannot mutate shared state', () => {
47
+ expect(() => {
48
+ (GEN_AI_DURATION_BUCKETS_SECONDS as number[]).push(999);
49
+ }).toThrow();
50
+ });
51
+
52
+ it('llmHistogramAdvice returns explicitBucketBoundaries advice shape', () => {
53
+ const advice = llmHistogramAdvice('duration');
54
+ expect(advice.advice.explicitBucketBoundaries).toEqual([
55
+ ...GEN_AI_DURATION_BUCKETS_SECONDS,
56
+ ]);
57
+ // The returned array is a fresh copy so callers can mutate without
58
+ // affecting the shared constant.
59
+ advice.advice.explicitBucketBoundaries.push(0);
60
+ expect([...GEN_AI_DURATION_BUCKETS_SECONDS]).not.toContain(0);
61
+ });
62
+
63
+ it('genAiMetricViews targets the OTel GenAI instrument names with the right buckets', () => {
64
+ const views = genAiMetricViews();
65
+ expect(views).toHaveLength(3);
66
+
67
+ const byInstrument = Object.fromEntries(
68
+ views.map((v) => [v.instrumentName, v]),
69
+ );
70
+ expect(
71
+ byInstrument['gen_ai.client.operation.duration']?.aggregation,
72
+ ).toEqual({
73
+ type: AggregationType.EXPLICIT_BUCKET_HISTOGRAM,
74
+ options: { boundaries: [...GEN_AI_DURATION_BUCKETS_SECONDS] },
75
+ });
76
+ expect(byInstrument['gen_ai.client.token.usage']?.aggregation).toEqual({
77
+ type: AggregationType.EXPLICIT_BUCKET_HISTOGRAM,
78
+ options: { boundaries: [...GEN_AI_TOKEN_USAGE_BUCKETS] },
79
+ });
80
+ expect(byInstrument['gen_ai.client.cost.usd']?.aggregation).toEqual({
81
+ type: AggregationType.EXPLICIT_BUCKET_HISTOGRAM,
82
+ options: { boundaries: [...GEN_AI_COST_USD_BUCKETS] },
83
+ });
84
+ });
85
+
86
+ it('genAiMetricViews accepts extra instruments', () => {
87
+ const views = genAiMetricViews([
88
+ { instrumentName: 'custom.llm.prompt_tokens', kind: 'tokens' },
89
+ ]);
90
+ expect(views).toHaveLength(4);
91
+ const custom = views.find(
92
+ (v) => v.instrumentName === 'custom.llm.prompt_tokens',
93
+ );
94
+ expect(custom).toBeDefined();
95
+ });
96
+ });
@@ -0,0 +1,128 @@
1
+ /**
2
+ * LLM-tuned histogram buckets.
3
+ *
4
+ * Default OpenTelemetry histogram buckets target HTTP latency (0ms–10s)
5
+ * and small counter values. LLM workloads have very different shapes:
6
+ *
7
+ * - **Duration**: single-token prompts can be fast (50ms), long
8
+ * generations and reasoning models can run for minutes. Default buckets
9
+ * crush everything above 10s into one bucket.
10
+ * - **Token usage**: heavily right-skewed. A single request can range
11
+ * from tens of tokens to the million-token context windows.
12
+ * - **Cost (USD)**: per-request values are tiny (fractions of a cent),
13
+ * so linear buckets waste resolution at the low end.
14
+ *
15
+ * This module exposes empirically-chosen bucket arrays and a View helper
16
+ * so users can apply them to their `MeterProvider` without knowing the
17
+ * exact instrument names emitted by OpenAI/Anthropic/Traceloop plugins.
18
+ *
19
+ * @example
20
+ * ```typescript
21
+ * import { NodeSDK } from '@opentelemetry/sdk-node';
22
+ * import { genAiMetricViews } from 'autotel';
23
+ *
24
+ * const sdk = new NodeSDK({
25
+ * serviceName: 'my-agent',
26
+ * views: [...genAiMetricViews()],
27
+ * });
28
+ * sdk.start();
29
+ * ```
30
+ */
31
+
32
+ import { AggregationType, type ViewOptions } from '@opentelemetry/sdk-metrics';
33
+
34
+ /**
35
+ * Duration buckets for LLM operations, in **seconds**. Covers fast
36
+ * completions (50ms) through long-running reasoning jobs (5 min).
37
+ *
38
+ * Aligns with the OTel GenAI semantic conventions' published advice for
39
+ * `gen_ai.client.operation.duration`.
40
+ */
41
+ export const GEN_AI_DURATION_BUCKETS_SECONDS: readonly number[] = Object.freeze(
42
+ [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 20, 30, 60, 120, 300],
43
+ );
44
+
45
+ /**
46
+ * Token-count buckets for prompt, completion, and total token histograms.
47
+ * Ranges from tiny prompts to million-token context windows.
48
+ *
49
+ * Aligns with the OTel GenAI semantic conventions' published advice for
50
+ * `gen_ai.client.token.usage`.
51
+ */
52
+ export const GEN_AI_TOKEN_USAGE_BUCKETS: readonly number[] = Object.freeze([
53
+ 1, 4, 16, 64, 256, 1_024, 4_096, 16_384, 65_536, 262_144, 1_048_576,
54
+ 4_194_304,
55
+ ]);
56
+
57
+ /**
58
+ * USD cost buckets. Sub-cent resolution at the low end (fractions of a
59
+ * cent per small call) up to tens of dollars (batch jobs, Opus/o1 runs).
60
+ */
61
+ export const GEN_AI_COST_USD_BUCKETS: readonly number[] = Object.freeze([
62
+ 0.000_01, 0.000_1, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50,
63
+ ]);
64
+
65
+ /**
66
+ * Instrument-level advice object for `createHistogram(name, advice)`.
67
+ * Use when you control the instrument creation (e.g. custom business
68
+ * LLM metrics); `genAiMetricViews()` is better when the metric comes
69
+ * from a third-party plugin.
70
+ */
71
+ export function llmHistogramAdvice(kind: 'duration' | 'tokens' | 'cost'): {
72
+ advice: { explicitBucketBoundaries: number[] };
73
+ } {
74
+ const boundaries =
75
+ kind === 'duration'
76
+ ? GEN_AI_DURATION_BUCKETS_SECONDS
77
+ : kind === 'tokens'
78
+ ? GEN_AI_TOKEN_USAGE_BUCKETS
79
+ : GEN_AI_COST_USD_BUCKETS;
80
+ return { advice: { explicitBucketBoundaries: [...boundaries] } };
81
+ }
82
+
83
+ /**
84
+ * Returns `View`s that re-bucket the standard OTel GenAI histograms. Pass
85
+ * the result to your `MeterProvider`'s `views` option.
86
+ *
87
+ * Matches instrument names emitted by:
88
+ * - OpenTelemetry GenAI autoinstrumentation
89
+ * - OpenInference / OpenLLMetry (traceloop)
90
+ * - Arize Phoenix, LangSmith, etc. that follow the OTel spec
91
+ *
92
+ * Add more instrument patterns via the `extra` argument if you emit
93
+ * custom LLM metrics.
94
+ */
95
+ export function genAiMetricViews(
96
+ extra: {
97
+ instrumentName: string;
98
+ kind: 'duration' | 'tokens' | 'cost';
99
+ }[] = [],
100
+ ): ViewOptions[] {
101
+ const defaults: Array<{
102
+ instrumentName: string;
103
+ kind: 'duration' | 'tokens' | 'cost';
104
+ }> = [
105
+ { instrumentName: 'gen_ai.client.operation.duration', kind: 'duration' },
106
+ { instrumentName: 'gen_ai.client.token.usage', kind: 'tokens' },
107
+ // Autotel-emitted cost metric. No-op if you don't emit it.
108
+ { instrumentName: 'gen_ai.client.cost.usd', kind: 'cost' },
109
+ ];
110
+
111
+ return [...defaults, ...extra].map(
112
+ ({ instrumentName, kind }) =>
113
+ ({
114
+ instrumentName,
115
+ aggregation: {
116
+ type: AggregationType.EXPLICIT_BUCKET_HISTOGRAM,
117
+ options: {
118
+ boundaries:
119
+ kind === 'duration'
120
+ ? [...GEN_AI_DURATION_BUCKETS_SECONDS]
121
+ : kind === 'tokens'
122
+ ? [...GEN_AI_TOKEN_USAGE_BUCKETS]
123
+ : [...GEN_AI_COST_USD_BUCKETS],
124
+ },
125
+ },
126
+ }) satisfies ViewOptions,
127
+ );
128
+ }