@openclaw/diagnostics-otel 2026.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts ADDED
@@ -0,0 +1,16 @@
1
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
2
+ import { emptyPluginConfigSchema } from "openclaw/plugin-sdk";
3
+
4
+ import { createDiagnosticsOtelService } from "./src/service.js";
5
+
6
+ const plugin = {
7
+ id: "diagnostics-otel",
8
+ name: "Diagnostics OpenTelemetry",
9
+ description: "Export diagnostics events to OpenTelemetry",
10
+ configSchema: emptyPluginConfigSchema(),
11
+ register(api: OpenClawPluginApi) {
12
+ api.registerService(createDiagnosticsOtelService());
13
+ },
14
+ };
15
+
16
+ export default plugin;
@@ -0,0 +1,8 @@
1
+ {
2
+ "id": "diagnostics-otel",
3
+ "configSchema": {
4
+ "type": "object",
5
+ "additionalProperties": false,
6
+ "properties": {}
7
+ }
8
+ }
package/package.json ADDED
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "@openclaw/diagnostics-otel",
3
+ "version": "2026.1.29",
4
+ "type": "module",
5
+ "description": "OpenClaw diagnostics OpenTelemetry exporter",
6
+ "openclaw": {
7
+ "extensions": [
8
+ "./index.ts"
9
+ ]
10
+ },
11
+ "dependencies": {
12
+ "@opentelemetry/api": "^1.9.0",
13
+ "@opentelemetry/api-logs": "^0.211.0",
14
+ "@opentelemetry/exporter-logs-otlp-http": "^0.211.0",
15
+ "@opentelemetry/exporter-metrics-otlp-http": "^0.211.0",
16
+ "@opentelemetry/exporter-trace-otlp-http": "^0.211.0",
17
+ "@opentelemetry/resources": "^2.5.0",
18
+ "@opentelemetry/sdk-logs": "^0.211.0",
19
+ "@opentelemetry/sdk-metrics": "^2.5.0",
20
+ "@opentelemetry/sdk-node": "^0.211.0",
21
+ "@opentelemetry/sdk-trace-base": "^2.5.0",
22
+ "@opentelemetry/semantic-conventions": "^1.39.0"
23
+ }
24
+ }
@@ -0,0 +1,220 @@
1
+ import { beforeEach, describe, expect, test, vi } from "vitest";
2
+
3
+ const registerLogTransportMock = vi.hoisted(() => vi.fn());
4
+
5
+ const telemetryState = vi.hoisted(() => {
6
+ const counters = new Map<string, { add: ReturnType<typeof vi.fn> }>();
7
+ const histograms = new Map<string, { record: ReturnType<typeof vi.fn> }>();
8
+ const tracer = {
9
+ startSpan: vi.fn((_name: string, _opts?: unknown) => ({
10
+ end: vi.fn(),
11
+ setStatus: vi.fn(),
12
+ })),
13
+ };
14
+ const meter = {
15
+ createCounter: vi.fn((name: string) => {
16
+ const counter = { add: vi.fn() };
17
+ counters.set(name, counter);
18
+ return counter;
19
+ }),
20
+ createHistogram: vi.fn((name: string) => {
21
+ const histogram = { record: vi.fn() };
22
+ histograms.set(name, histogram);
23
+ return histogram;
24
+ }),
25
+ };
26
+ return { counters, histograms, tracer, meter };
27
+ });
28
+
29
+ const sdkStart = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
30
+ const sdkShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
31
+ const logEmit = vi.hoisted(() => vi.fn());
32
+ const logShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
33
+
34
+ vi.mock("@opentelemetry/api", () => ({
35
+ metrics: {
36
+ getMeter: () => telemetryState.meter,
37
+ },
38
+ trace: {
39
+ getTracer: () => telemetryState.tracer,
40
+ },
41
+ SpanStatusCode: {
42
+ ERROR: 2,
43
+ },
44
+ }));
45
+
46
+ vi.mock("@opentelemetry/sdk-node", () => ({
47
+ NodeSDK: class {
48
+ start = sdkStart;
49
+ shutdown = sdkShutdown;
50
+ },
51
+ }));
52
+
53
+ vi.mock("@opentelemetry/exporter-metrics-otlp-http", () => ({
54
+ OTLPMetricExporter: class {},
55
+ }));
56
+
57
+ vi.mock("@opentelemetry/exporter-trace-otlp-http", () => ({
58
+ OTLPTraceExporter: class {},
59
+ }));
60
+
61
+ vi.mock("@opentelemetry/exporter-logs-otlp-http", () => ({
62
+ OTLPLogExporter: class {},
63
+ }));
64
+
65
+ vi.mock("@opentelemetry/sdk-logs", () => ({
66
+ BatchLogRecordProcessor: class {},
67
+ LoggerProvider: class {
68
+ addLogRecordProcessor = vi.fn();
69
+ getLogger = vi.fn(() => ({
70
+ emit: logEmit,
71
+ }));
72
+ shutdown = logShutdown;
73
+ },
74
+ }));
75
+
76
+ vi.mock("@opentelemetry/sdk-metrics", () => ({
77
+ PeriodicExportingMetricReader: class {},
78
+ }));
79
+
80
+ vi.mock("@opentelemetry/sdk-trace-base", () => ({
81
+ ParentBasedSampler: class {},
82
+ TraceIdRatioBasedSampler: class {},
83
+ }));
84
+
85
+ vi.mock("@opentelemetry/resources", () => ({
86
+ Resource: class {
87
+ // eslint-disable-next-line @typescript-eslint/no-useless-constructor
88
+ constructor(_value?: unknown) {}
89
+ },
90
+ }));
91
+
92
+ vi.mock("@opentelemetry/semantic-conventions", () => ({
93
+ SemanticResourceAttributes: {
94
+ SERVICE_NAME: "service.name",
95
+ },
96
+ }));
97
+
98
+ vi.mock("openclaw/plugin-sdk", async () => {
99
+ const actual = await vi.importActual<typeof import("openclaw/plugin-sdk")>("openclaw/plugin-sdk");
100
+ return {
101
+ ...actual,
102
+ registerLogTransport: registerLogTransportMock,
103
+ };
104
+ });
105
+
106
+ import { createDiagnosticsOtelService } from "./service.js";
107
+ import { emitDiagnosticEvent } from "openclaw/plugin-sdk";
108
+
109
+ describe("diagnostics-otel service", () => {
110
+ beforeEach(() => {
111
+ telemetryState.counters.clear();
112
+ telemetryState.histograms.clear();
113
+ telemetryState.tracer.startSpan.mockClear();
114
+ telemetryState.meter.createCounter.mockClear();
115
+ telemetryState.meter.createHistogram.mockClear();
116
+ sdkStart.mockClear();
117
+ sdkShutdown.mockClear();
118
+ logEmit.mockClear();
119
+ logShutdown.mockClear();
120
+ registerLogTransportMock.mockReset();
121
+ });
122
+
123
+ test("records message-flow metrics and spans", async () => {
124
+ const registeredTransports: Array<(logObj: Record<string, unknown>) => void> = [];
125
+ const stopTransport = vi.fn();
126
+ registerLogTransportMock.mockImplementation((transport) => {
127
+ registeredTransports.push(transport);
128
+ return stopTransport;
129
+ });
130
+
131
+ const service = createDiagnosticsOtelService();
132
+ await service.start({
133
+ config: {
134
+ diagnostics: {
135
+ enabled: true,
136
+ otel: {
137
+ enabled: true,
138
+ endpoint: "http://otel-collector:4318",
139
+ protocol: "http/protobuf",
140
+ traces: true,
141
+ metrics: true,
142
+ logs: true,
143
+ },
144
+ },
145
+ },
146
+ logger: {
147
+ info: vi.fn(),
148
+ warn: vi.fn(),
149
+ error: vi.fn(),
150
+ debug: vi.fn(),
151
+ },
152
+ });
153
+
154
+ emitDiagnosticEvent({
155
+ type: "webhook.received",
156
+ channel: "telegram",
157
+ updateType: "telegram-post",
158
+ });
159
+ emitDiagnosticEvent({
160
+ type: "webhook.processed",
161
+ channel: "telegram",
162
+ updateType: "telegram-post",
163
+ durationMs: 120,
164
+ });
165
+ emitDiagnosticEvent({
166
+ type: "message.queued",
167
+ channel: "telegram",
168
+ source: "telegram",
169
+ queueDepth: 2,
170
+ });
171
+ emitDiagnosticEvent({
172
+ type: "message.processed",
173
+ channel: "telegram",
174
+ outcome: "completed",
175
+ durationMs: 55,
176
+ });
177
+ emitDiagnosticEvent({
178
+ type: "queue.lane.dequeue",
179
+ lane: "main",
180
+ queueSize: 3,
181
+ waitMs: 10,
182
+ });
183
+ emitDiagnosticEvent({
184
+ type: "session.stuck",
185
+ state: "processing",
186
+ ageMs: 125_000,
187
+ });
188
+ emitDiagnosticEvent({
189
+ type: "run.attempt",
190
+ runId: "run-1",
191
+ attempt: 2,
192
+ });
193
+
194
+ expect(telemetryState.counters.get("openclaw.webhook.received")?.add).toHaveBeenCalled();
195
+ expect(telemetryState.histograms.get("openclaw.webhook.duration_ms")?.record).toHaveBeenCalled();
196
+ expect(telemetryState.counters.get("openclaw.message.queued")?.add).toHaveBeenCalled();
197
+ expect(telemetryState.counters.get("openclaw.message.processed")?.add).toHaveBeenCalled();
198
+ expect(telemetryState.histograms.get("openclaw.message.duration_ms")?.record).toHaveBeenCalled();
199
+ expect(telemetryState.histograms.get("openclaw.queue.wait_ms")?.record).toHaveBeenCalled();
200
+ expect(telemetryState.counters.get("openclaw.session.stuck")?.add).toHaveBeenCalled();
201
+ expect(telemetryState.histograms.get("openclaw.session.stuck_age_ms")?.record).toHaveBeenCalled();
202
+ expect(telemetryState.counters.get("openclaw.run.attempt")?.add).toHaveBeenCalled();
203
+
204
+ const spanNames = telemetryState.tracer.startSpan.mock.calls.map((call) => call[0]);
205
+ expect(spanNames).toContain("openclaw.webhook.processed");
206
+ expect(spanNames).toContain("openclaw.message.processed");
207
+ expect(spanNames).toContain("openclaw.session.stuck");
208
+
209
+ expect(registerLogTransportMock).toHaveBeenCalledTimes(1);
210
+ expect(registeredTransports).toHaveLength(1);
211
+ registeredTransports[0]?.({
212
+ 0: "{\"subsystem\":\"diagnostic\"}",
213
+ 1: "hello",
214
+ _meta: { logLevelName: "INFO", date: new Date() },
215
+ });
216
+ expect(logEmit).toHaveBeenCalled();
217
+
218
+ await service.stop?.();
219
+ });
220
+ });
package/src/service.ts ADDED
@@ -0,0 +1,566 @@
1
+ import { metrics, trace, SpanStatusCode } from "@opentelemetry/api";
2
+ import type { SeverityNumber } from "@opentelemetry/api-logs";
3
+ import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
4
+ import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
5
+ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
6
+ import { Resource } from "@opentelemetry/resources";
7
+ import { BatchLogRecordProcessor, LoggerProvider } from "@opentelemetry/sdk-logs";
8
+ import { PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
9
+ import { NodeSDK } from "@opentelemetry/sdk-node";
10
+ import { ParentBasedSampler, TraceIdRatioBasedSampler } from "@opentelemetry/sdk-trace-base";
11
+ import { SemanticResourceAttributes } from "@opentelemetry/semantic-conventions";
12
+
13
+ import type { DiagnosticEventPayload, OpenClawPluginService } from "openclaw/plugin-sdk";
14
+ import { onDiagnosticEvent, registerLogTransport } from "openclaw/plugin-sdk";
15
+
16
+ const DEFAULT_SERVICE_NAME = "openclaw";
17
+
18
+ function normalizeEndpoint(endpoint?: string): string | undefined {
19
+ const trimmed = endpoint?.trim();
20
+ return trimmed ? trimmed.replace(/\/+$/, "") : undefined;
21
+ }
22
+
23
+ function resolveOtelUrl(endpoint: string | undefined, path: string): string | undefined {
24
+ if (!endpoint) return undefined;
25
+ if (endpoint.includes("/v1/")) return endpoint;
26
+ return `${endpoint}/${path}`;
27
+ }
28
+
29
+ function resolveSampleRate(value: number | undefined): number | undefined {
30
+ if (typeof value !== "number" || !Number.isFinite(value)) return undefined;
31
+ if (value < 0 || value > 1) return undefined;
32
+ return value;
33
+ }
34
+
35
+ export function createDiagnosticsOtelService(): OpenClawPluginService {
36
+ let sdk: NodeSDK | null = null;
37
+ let logProvider: LoggerProvider | null = null;
38
+ let stopLogTransport: (() => void) | null = null;
39
+ let unsubscribe: (() => void) | null = null;
40
+
41
+ return {
42
+ id: "diagnostics-otel",
43
+ async start(ctx) {
44
+ const cfg = ctx.config.diagnostics;
45
+ const otel = cfg?.otel;
46
+ if (!cfg?.enabled || !otel?.enabled) return;
47
+
48
+ const protocol = otel.protocol ?? process.env.OTEL_EXPORTER_OTLP_PROTOCOL ?? "http/protobuf";
49
+ if (protocol !== "http/protobuf") {
50
+ ctx.logger.warn(`diagnostics-otel: unsupported protocol ${protocol}`);
51
+ return;
52
+ }
53
+
54
+ const endpoint = normalizeEndpoint(otel.endpoint ?? process.env.OTEL_EXPORTER_OTLP_ENDPOINT);
55
+ const headers = otel.headers ?? undefined;
56
+ const serviceName =
57
+ otel.serviceName?.trim() || process.env.OTEL_SERVICE_NAME || DEFAULT_SERVICE_NAME;
58
+ const sampleRate = resolveSampleRate(otel.sampleRate);
59
+
60
+ const tracesEnabled = otel.traces !== false;
61
+ const metricsEnabled = otel.metrics !== false;
62
+ const logsEnabled = otel.logs === true;
63
+ if (!tracesEnabled && !metricsEnabled && !logsEnabled) return;
64
+
65
+ const resource = new Resource({
66
+ [SemanticResourceAttributes.SERVICE_NAME]: serviceName,
67
+ });
68
+
69
+ const traceUrl = resolveOtelUrl(endpoint, "v1/traces");
70
+ const metricUrl = resolveOtelUrl(endpoint, "v1/metrics");
71
+ const logUrl = resolveOtelUrl(endpoint, "v1/logs");
72
+ const traceExporter = tracesEnabled
73
+ ? new OTLPTraceExporter({
74
+ ...(traceUrl ? { url: traceUrl } : {}),
75
+ ...(headers ? { headers } : {}),
76
+ })
77
+ : undefined;
78
+
79
+ const metricExporter = metricsEnabled
80
+ ? new OTLPMetricExporter({
81
+ ...(metricUrl ? { url: metricUrl } : {}),
82
+ ...(headers ? { headers } : {}),
83
+ })
84
+ : undefined;
85
+
86
+ const metricReader = metricExporter
87
+ ? new PeriodicExportingMetricReader({
88
+ exporter: metricExporter,
89
+ ...(typeof otel.flushIntervalMs === "number"
90
+ ? { exportIntervalMillis: Math.max(1000, otel.flushIntervalMs) }
91
+ : {}),
92
+ })
93
+ : undefined;
94
+
95
+ if (tracesEnabled || metricsEnabled) {
96
+ sdk = new NodeSDK({
97
+ resource,
98
+ ...(traceExporter ? { traceExporter } : {}),
99
+ ...(metricReader ? { metricReader } : {}),
100
+ ...(sampleRate !== undefined
101
+ ? {
102
+ sampler: new ParentBasedSampler({
103
+ root: new TraceIdRatioBasedSampler(sampleRate),
104
+ }),
105
+ }
106
+ : {}),
107
+ });
108
+
109
+ await sdk.start();
110
+ }
111
+
112
+ const logSeverityMap: Record<string, SeverityNumber> = {
113
+ TRACE: 1 as SeverityNumber,
114
+ DEBUG: 5 as SeverityNumber,
115
+ INFO: 9 as SeverityNumber,
116
+ WARN: 13 as SeverityNumber,
117
+ ERROR: 17 as SeverityNumber,
118
+ FATAL: 21 as SeverityNumber,
119
+ };
120
+
121
+ const meter = metrics.getMeter("openclaw");
122
+ const tracer = trace.getTracer("openclaw");
123
+
124
+ const tokensCounter = meter.createCounter("openclaw.tokens", {
125
+ unit: "1",
126
+ description: "Token usage by type",
127
+ });
128
+ const costCounter = meter.createCounter("openclaw.cost.usd", {
129
+ unit: "1",
130
+ description: "Estimated model cost (USD)",
131
+ });
132
+ const durationHistogram = meter.createHistogram("openclaw.run.duration_ms", {
133
+ unit: "ms",
134
+ description: "Agent run duration",
135
+ });
136
+ const contextHistogram = meter.createHistogram("openclaw.context.tokens", {
137
+ unit: "1",
138
+ description: "Context window size and usage",
139
+ });
140
+ const webhookReceivedCounter = meter.createCounter("openclaw.webhook.received", {
141
+ unit: "1",
142
+ description: "Webhook requests received",
143
+ });
144
+ const webhookErrorCounter = meter.createCounter("openclaw.webhook.error", {
145
+ unit: "1",
146
+ description: "Webhook processing errors",
147
+ });
148
+ const webhookDurationHistogram = meter.createHistogram("openclaw.webhook.duration_ms", {
149
+ unit: "ms",
150
+ description: "Webhook processing duration",
151
+ });
152
+ const messageQueuedCounter = meter.createCounter("openclaw.message.queued", {
153
+ unit: "1",
154
+ description: "Messages queued for processing",
155
+ });
156
+ const messageProcessedCounter = meter.createCounter("openclaw.message.processed", {
157
+ unit: "1",
158
+ description: "Messages processed by outcome",
159
+ });
160
+ const messageDurationHistogram = meter.createHistogram("openclaw.message.duration_ms", {
161
+ unit: "ms",
162
+ description: "Message processing duration",
163
+ });
164
+ const queueDepthHistogram = meter.createHistogram("openclaw.queue.depth", {
165
+ unit: "1",
166
+ description: "Queue depth on enqueue/dequeue",
167
+ });
168
+ const queueWaitHistogram = meter.createHistogram("openclaw.queue.wait_ms", {
169
+ unit: "ms",
170
+ description: "Queue wait time before execution",
171
+ });
172
+ const laneEnqueueCounter = meter.createCounter("openclaw.queue.lane.enqueue", {
173
+ unit: "1",
174
+ description: "Command queue lane enqueue events",
175
+ });
176
+ const laneDequeueCounter = meter.createCounter("openclaw.queue.lane.dequeue", {
177
+ unit: "1",
178
+ description: "Command queue lane dequeue events",
179
+ });
180
+ const sessionStateCounter = meter.createCounter("openclaw.session.state", {
181
+ unit: "1",
182
+ description: "Session state transitions",
183
+ });
184
+ const sessionStuckCounter = meter.createCounter("openclaw.session.stuck", {
185
+ unit: "1",
186
+ description: "Sessions stuck in processing",
187
+ });
188
+ const sessionStuckAgeHistogram = meter.createHistogram("openclaw.session.stuck_age_ms", {
189
+ unit: "ms",
190
+ description: "Age of stuck sessions",
191
+ });
192
+ const runAttemptCounter = meter.createCounter("openclaw.run.attempt", {
193
+ unit: "1",
194
+ description: "Run attempts",
195
+ });
196
+
197
+ if (logsEnabled) {
198
+ const logExporter = new OTLPLogExporter({
199
+ ...(logUrl ? { url: logUrl } : {}),
200
+ ...(headers ? { headers } : {}),
201
+ });
202
+ logProvider = new LoggerProvider({ resource });
203
+ logProvider.addLogRecordProcessor(
204
+ new BatchLogRecordProcessor(logExporter, {
205
+ ...(typeof otel.flushIntervalMs === "number"
206
+ ? { scheduledDelayMillis: Math.max(1000, otel.flushIntervalMs) }
207
+ : {}),
208
+ }),
209
+ );
210
+ const otelLogger = logProvider.getLogger("openclaw");
211
+
212
+ stopLogTransport = registerLogTransport((logObj) => {
213
+ const safeStringify = (value: unknown) => {
214
+ try {
215
+ return JSON.stringify(value);
216
+ } catch {
217
+ return String(value);
218
+ }
219
+ };
220
+ const meta = (logObj as Record<string, unknown>)._meta as
221
+ | {
222
+ logLevelName?: string;
223
+ date?: Date;
224
+ name?: string;
225
+ parentNames?: string[];
226
+ path?: {
227
+ filePath?: string;
228
+ fileLine?: string;
229
+ fileColumn?: string;
230
+ filePathWithLine?: string;
231
+ method?: string;
232
+ };
233
+ }
234
+ | undefined;
235
+ const logLevelName = meta?.logLevelName ?? "INFO";
236
+ const severityNumber = logSeverityMap[logLevelName] ?? (9 as SeverityNumber);
237
+
238
+ const numericArgs = Object.entries(logObj)
239
+ .filter(([key]) => /^\d+$/.test(key))
240
+ .sort((a, b) => Number(a[0]) - Number(b[0]))
241
+ .map(([, value]) => value);
242
+
243
+ let bindings: Record<string, unknown> | undefined;
244
+ if (typeof numericArgs[0] === "string" && numericArgs[0].trim().startsWith("{")) {
245
+ try {
246
+ const parsed = JSON.parse(numericArgs[0]);
247
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
248
+ bindings = parsed as Record<string, unknown>;
249
+ numericArgs.shift();
250
+ }
251
+ } catch {
252
+ // ignore malformed json bindings
253
+ }
254
+ }
255
+
256
+ let message = "";
257
+ if (numericArgs.length > 0 && typeof numericArgs[numericArgs.length - 1] === "string") {
258
+ message = String(numericArgs.pop());
259
+ } else if (numericArgs.length === 1) {
260
+ message = safeStringify(numericArgs[0]);
261
+ numericArgs.length = 0;
262
+ }
263
+ if (!message) {
264
+ message = "log";
265
+ }
266
+
267
+ const attributes: Record<string, string | number | boolean> = {
268
+ "openclaw.log.level": logLevelName,
269
+ };
270
+ if (meta?.name) attributes["openclaw.logger"] = meta.name;
271
+ if (meta?.parentNames?.length) {
272
+ attributes["openclaw.logger.parents"] = meta.parentNames.join(".");
273
+ }
274
+ if (bindings) {
275
+ for (const [key, value] of Object.entries(bindings)) {
276
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
277
+ attributes[`openclaw.${key}`] = value;
278
+ } else if (value != null) {
279
+ attributes[`openclaw.${key}`] = safeStringify(value);
280
+ }
281
+ }
282
+ }
283
+ if (numericArgs.length > 0) {
284
+ attributes["openclaw.log.args"] = safeStringify(numericArgs);
285
+ }
286
+ if (meta?.path?.filePath) attributes["code.filepath"] = meta.path.filePath;
287
+ if (meta?.path?.fileLine) attributes["code.lineno"] = Number(meta.path.fileLine);
288
+ if (meta?.path?.method) attributes["code.function"] = meta.path.method;
289
+ if (meta?.path?.filePathWithLine) {
290
+ attributes["openclaw.code.location"] = meta.path.filePathWithLine;
291
+ }
292
+
293
+ otelLogger.emit({
294
+ body: message,
295
+ severityText: logLevelName,
296
+ severityNumber,
297
+ attributes,
298
+ timestamp: meta?.date ?? new Date(),
299
+ });
300
+ });
301
+ }
302
+
303
+ const spanWithDuration = (
304
+ name: string,
305
+ attributes: Record<string, string | number>,
306
+ durationMs?: number,
307
+ ) => {
308
+ const startTime =
309
+ typeof durationMs === "number" ? Date.now() - Math.max(0, durationMs) : undefined;
310
+ const span = tracer.startSpan(name, {
311
+ attributes,
312
+ ...(startTime ? { startTime } : {}),
313
+ });
314
+ return span;
315
+ };
316
+
317
+ const recordModelUsage = (evt: Extract<DiagnosticEventPayload, { type: "model.usage" }>) => {
318
+ const attrs = {
319
+ "openclaw.channel": evt.channel ?? "unknown",
320
+ "openclaw.provider": evt.provider ?? "unknown",
321
+ "openclaw.model": evt.model ?? "unknown",
322
+ };
323
+
324
+ const usage = evt.usage;
325
+ if (usage.input) tokensCounter.add(usage.input, { ...attrs, "openclaw.token": "input" });
326
+ if (usage.output) tokensCounter.add(usage.output, { ...attrs, "openclaw.token": "output" });
327
+ if (usage.cacheRead)
328
+ tokensCounter.add(usage.cacheRead, { ...attrs, "openclaw.token": "cache_read" });
329
+ if (usage.cacheWrite)
330
+ tokensCounter.add(usage.cacheWrite, { ...attrs, "openclaw.token": "cache_write" });
331
+ if (usage.promptTokens)
332
+ tokensCounter.add(usage.promptTokens, { ...attrs, "openclaw.token": "prompt" });
333
+ if (usage.total) tokensCounter.add(usage.total, { ...attrs, "openclaw.token": "total" });
334
+
335
+ if (evt.costUsd) costCounter.add(evt.costUsd, attrs);
336
+ if (evt.durationMs) durationHistogram.record(evt.durationMs, attrs);
337
+ if (evt.context?.limit)
338
+ contextHistogram.record(evt.context.limit, {
339
+ ...attrs,
340
+ "openclaw.context": "limit",
341
+ });
342
+ if (evt.context?.used)
343
+ contextHistogram.record(evt.context.used, {
344
+ ...attrs,
345
+ "openclaw.context": "used",
346
+ });
347
+
348
+ if (!tracesEnabled) return;
349
+ const spanAttrs: Record<string, string | number> = {
350
+ ...attrs,
351
+ "openclaw.sessionKey": evt.sessionKey ?? "",
352
+ "openclaw.sessionId": evt.sessionId ?? "",
353
+ "openclaw.tokens.input": usage.input ?? 0,
354
+ "openclaw.tokens.output": usage.output ?? 0,
355
+ "openclaw.tokens.cache_read": usage.cacheRead ?? 0,
356
+ "openclaw.tokens.cache_write": usage.cacheWrite ?? 0,
357
+ "openclaw.tokens.total": usage.total ?? 0,
358
+ };
359
+
360
+ const span = spanWithDuration("openclaw.model.usage", spanAttrs, evt.durationMs);
361
+ span.end();
362
+ };
363
+
364
+ const recordWebhookReceived = (
365
+ evt: Extract<DiagnosticEventPayload, { type: "webhook.received" }>,
366
+ ) => {
367
+ const attrs = {
368
+ "openclaw.channel": evt.channel ?? "unknown",
369
+ "openclaw.webhook": evt.updateType ?? "unknown",
370
+ };
371
+ webhookReceivedCounter.add(1, attrs);
372
+ };
373
+
374
+ const recordWebhookProcessed = (
375
+ evt: Extract<DiagnosticEventPayload, { type: "webhook.processed" }>,
376
+ ) => {
377
+ const attrs = {
378
+ "openclaw.channel": evt.channel ?? "unknown",
379
+ "openclaw.webhook": evt.updateType ?? "unknown",
380
+ };
381
+ if (typeof evt.durationMs === "number") {
382
+ webhookDurationHistogram.record(evt.durationMs, attrs);
383
+ }
384
+ if (!tracesEnabled) return;
385
+ const spanAttrs: Record<string, string | number> = { ...attrs };
386
+ if (evt.chatId !== undefined) spanAttrs["openclaw.chatId"] = String(evt.chatId);
387
+ const span = spanWithDuration("openclaw.webhook.processed", spanAttrs, evt.durationMs);
388
+ span.end();
389
+ };
390
+
391
+ const recordWebhookError = (
392
+ evt: Extract<DiagnosticEventPayload, { type: "webhook.error" }>,
393
+ ) => {
394
+ const attrs = {
395
+ "openclaw.channel": evt.channel ?? "unknown",
396
+ "openclaw.webhook": evt.updateType ?? "unknown",
397
+ };
398
+ webhookErrorCounter.add(1, attrs);
399
+ if (!tracesEnabled) return;
400
+ const spanAttrs: Record<string, string | number> = {
401
+ ...attrs,
402
+ "openclaw.error": evt.error,
403
+ };
404
+ if (evt.chatId !== undefined) spanAttrs["openclaw.chatId"] = String(evt.chatId);
405
+ const span = tracer.startSpan("openclaw.webhook.error", {
406
+ attributes: spanAttrs,
407
+ });
408
+ span.setStatus({ code: SpanStatusCode.ERROR, message: evt.error });
409
+ span.end();
410
+ };
411
+
412
+ const recordMessageQueued = (
413
+ evt: Extract<DiagnosticEventPayload, { type: "message.queued" }>,
414
+ ) => {
415
+ const attrs = {
416
+ "openclaw.channel": evt.channel ?? "unknown",
417
+ "openclaw.source": evt.source ?? "unknown",
418
+ };
419
+ messageQueuedCounter.add(1, attrs);
420
+ if (typeof evt.queueDepth === "number") {
421
+ queueDepthHistogram.record(evt.queueDepth, attrs);
422
+ }
423
+ };
424
+
425
+ const recordMessageProcessed = (
426
+ evt: Extract<DiagnosticEventPayload, { type: "message.processed" }>,
427
+ ) => {
428
+ const attrs = {
429
+ "openclaw.channel": evt.channel ?? "unknown",
430
+ "openclaw.outcome": evt.outcome ?? "unknown",
431
+ };
432
+ messageProcessedCounter.add(1, attrs);
433
+ if (typeof evt.durationMs === "number") {
434
+ messageDurationHistogram.record(evt.durationMs, attrs);
435
+ }
436
+ if (!tracesEnabled) return;
437
+ const spanAttrs: Record<string, string | number> = { ...attrs };
438
+ if (evt.sessionKey) spanAttrs["openclaw.sessionKey"] = evt.sessionKey;
439
+ if (evt.sessionId) spanAttrs["openclaw.sessionId"] = evt.sessionId;
440
+ if (evt.chatId !== undefined) spanAttrs["openclaw.chatId"] = String(evt.chatId);
441
+ if (evt.messageId !== undefined) spanAttrs["openclaw.messageId"] = String(evt.messageId);
442
+ if (evt.reason) spanAttrs["openclaw.reason"] = evt.reason;
443
+ const span = spanWithDuration("openclaw.message.processed", spanAttrs, evt.durationMs);
444
+ if (evt.outcome === "error") {
445
+ span.setStatus({ code: SpanStatusCode.ERROR, message: evt.error });
446
+ }
447
+ span.end();
448
+ };
449
+
450
+ const recordLaneEnqueue = (
451
+ evt: Extract<DiagnosticEventPayload, { type: "queue.lane.enqueue" }>,
452
+ ) => {
453
+ const attrs = { "openclaw.lane": evt.lane };
454
+ laneEnqueueCounter.add(1, attrs);
455
+ queueDepthHistogram.record(evt.queueSize, attrs);
456
+ };
457
+
458
+ const recordLaneDequeue = (
459
+ evt: Extract<DiagnosticEventPayload, { type: "queue.lane.dequeue" }>,
460
+ ) => {
461
+ const attrs = { "openclaw.lane": evt.lane };
462
+ laneDequeueCounter.add(1, attrs);
463
+ queueDepthHistogram.record(evt.queueSize, attrs);
464
+ if (typeof evt.waitMs === "number") {
465
+ queueWaitHistogram.record(evt.waitMs, attrs);
466
+ }
467
+ };
468
+
469
+ const recordSessionState = (
470
+ evt: Extract<DiagnosticEventPayload, { type: "session.state" }>,
471
+ ) => {
472
+ const attrs: Record<string, string> = { "openclaw.state": evt.state };
473
+ if (evt.reason) attrs["openclaw.reason"] = evt.reason;
474
+ sessionStateCounter.add(1, attrs);
475
+ };
476
+
477
+ const recordSessionStuck = (
478
+ evt: Extract<DiagnosticEventPayload, { type: "session.stuck" }>,
479
+ ) => {
480
+ const attrs: Record<string, string> = { "openclaw.state": evt.state };
481
+ sessionStuckCounter.add(1, attrs);
482
+ if (typeof evt.ageMs === "number") {
483
+ sessionStuckAgeHistogram.record(evt.ageMs, attrs);
484
+ }
485
+ if (!tracesEnabled) return;
486
+ const spanAttrs: Record<string, string | number> = { ...attrs };
487
+ if (evt.sessionKey) spanAttrs["openclaw.sessionKey"] = evt.sessionKey;
488
+ if (evt.sessionId) spanAttrs["openclaw.sessionId"] = evt.sessionId;
489
+ spanAttrs["openclaw.queueDepth"] = evt.queueDepth ?? 0;
490
+ spanAttrs["openclaw.ageMs"] = evt.ageMs;
491
+ const span = tracer.startSpan("openclaw.session.stuck", { attributes: spanAttrs });
492
+ span.setStatus({ code: SpanStatusCode.ERROR, message: "session stuck" });
493
+ span.end();
494
+ };
495
+
496
+ const recordRunAttempt = (evt: Extract<DiagnosticEventPayload, { type: "run.attempt" }>) => {
497
+ runAttemptCounter.add(1, { "openclaw.attempt": evt.attempt });
498
+ };
499
+
500
+ const recordHeartbeat = (
501
+ evt: Extract<DiagnosticEventPayload, { type: "diagnostic.heartbeat" }>,
502
+ ) => {
503
+ queueDepthHistogram.record(evt.queued, { "openclaw.channel": "heartbeat" });
504
+ };
505
+
506
+ unsubscribe = onDiagnosticEvent((evt: DiagnosticEventPayload) => {
507
+ switch (evt.type) {
508
+ case "model.usage":
509
+ recordModelUsage(evt);
510
+ return;
511
+ case "webhook.received":
512
+ recordWebhookReceived(evt);
513
+ return;
514
+ case "webhook.processed":
515
+ recordWebhookProcessed(evt);
516
+ return;
517
+ case "webhook.error":
518
+ recordWebhookError(evt);
519
+ return;
520
+ case "message.queued":
521
+ recordMessageQueued(evt);
522
+ return;
523
+ case "message.processed":
524
+ recordMessageProcessed(evt);
525
+ return;
526
+ case "queue.lane.enqueue":
527
+ recordLaneEnqueue(evt);
528
+ return;
529
+ case "queue.lane.dequeue":
530
+ recordLaneDequeue(evt);
531
+ return;
532
+ case "session.state":
533
+ recordSessionState(evt);
534
+ return;
535
+ case "session.stuck":
536
+ recordSessionStuck(evt);
537
+ return;
538
+ case "run.attempt":
539
+ recordRunAttempt(evt);
540
+ return;
541
+ case "diagnostic.heartbeat":
542
+ recordHeartbeat(evt);
543
+ return;
544
+ }
545
+ });
546
+
547
+ if (logsEnabled) {
548
+ ctx.logger.info("diagnostics-otel: logs exporter enabled (OTLP/HTTP)");
549
+ }
550
+ },
551
+ async stop() {
552
+ unsubscribe?.();
553
+ unsubscribe = null;
554
+ stopLogTransport?.();
555
+ stopLogTransport = null;
556
+ if (logProvider) {
557
+ await logProvider.shutdown().catch(() => undefined);
558
+ logProvider = null;
559
+ }
560
+ if (sdk) {
561
+ await sdk.shutdown().catch(() => undefined);
562
+ sdk = null;
563
+ }
564
+ },
565
+ } satisfies OpenClawPluginService;
566
+ }