@openclaw/diagnostics-otel 2026.3.12 → 2026.5.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,35 @@
1
- import { beforeEach, describe, expect, test, vi } from "vitest";
2
-
3
- const registerLogTransportMock = vi.hoisted(() => vi.fn());
1
+ import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
4
2
 
5
3
  const telemetryState = vi.hoisted(() => {
6
4
  const counters = new Map<string, { add: ReturnType<typeof vi.fn> }>();
7
5
  const histograms = new Map<string, { record: ReturnType<typeof vi.fn> }>();
6
+ const spans: Array<{
7
+ name: string;
8
+ addEvent: ReturnType<typeof vi.fn>;
9
+ end: ReturnType<typeof vi.fn>;
10
+ setAttributes: ReturnType<typeof vi.fn>;
11
+ setStatus: ReturnType<typeof vi.fn>;
12
+ spanContext: ReturnType<typeof vi.fn>;
13
+ }> = [];
8
14
  const tracer = {
9
- startSpan: vi.fn((_name: string, _opts?: unknown) => ({
10
- end: vi.fn(),
11
- setStatus: vi.fn(),
12
- })),
15
+ startSpan: vi.fn((name: string, _opts?: unknown, _ctx?: unknown) => {
16
+ const spanNumber = spans.length + 1;
17
+ const spanId = spanNumber.toString(16).padStart(16, "0");
18
+ const span = {
19
+ addEvent: vi.fn(),
20
+ end: vi.fn(),
21
+ setAttributes: vi.fn(),
22
+ setStatus: vi.fn(),
23
+ spanContext: vi.fn(() => ({
24
+ traceId: "4bf92f3577b34da6a3ce929d0e0e4736",
25
+ spanId,
26
+ traceFlags: 1,
27
+ })),
28
+ };
29
+ spans.push({ name, ...span });
30
+ return span;
31
+ }),
32
+ setSpanContext: vi.fn((_ctx: unknown, spanContext: unknown) => ({ spanContext })),
13
33
  };
14
34
  const meter = {
15
35
  createCounter: vi.fn((name: string) => {
@@ -23,7 +43,7 @@ const telemetryState = vi.hoisted(() => {
23
43
  return histogram;
24
44
  }),
25
45
  };
26
- return { counters, histograms, tracer, meter };
46
+ return { counters, histograms, spans, tracer, meter };
27
47
  });
28
48
 
29
49
  const sdkStart = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
@@ -31,13 +51,23 @@ const sdkShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
31
51
  const logEmit = vi.hoisted(() => vi.fn());
32
52
  const logShutdown = vi.hoisted(() => vi.fn().mockResolvedValue(undefined));
33
53
  const traceExporterCtor = vi.hoisted(() => vi.fn());
54
+ const metricExporterCtor = vi.hoisted(() => vi.fn());
55
+ const logExporterCtor = vi.hoisted(() => vi.fn());
34
56
 
35
57
  vi.mock("@opentelemetry/api", () => ({
58
+ context: {
59
+ active: () => ({}),
60
+ },
36
61
  metrics: {
37
62
  getMeter: () => telemetryState.meter,
38
63
  },
39
64
  trace: {
40
65
  getTracer: () => telemetryState.tracer,
66
+ setSpanContext: telemetryState.tracer.setSpanContext,
67
+ },
68
+ TraceFlags: {
69
+ NONE: 0,
70
+ SAMPLED: 1,
41
71
  },
42
72
  SpanStatusCode: {
43
73
  ERROR: 2,
@@ -52,23 +82,25 @@ vi.mock("@opentelemetry/sdk-node", () => ({
52
82
  }));
53
83
 
54
84
  vi.mock("@opentelemetry/exporter-metrics-otlp-proto", () => ({
55
- OTLPMetricExporter: class {},
85
+ OTLPMetricExporter: function OTLPMetricExporter(options?: unknown) {
86
+ metricExporterCtor(options);
87
+ },
56
88
  }));
57
89
 
58
90
  vi.mock("@opentelemetry/exporter-trace-otlp-proto", () => ({
59
- OTLPTraceExporter: class {
60
- constructor(options?: unknown) {
61
- traceExporterCtor(options);
62
- }
91
+ OTLPTraceExporter: function OTLPTraceExporter(options?: unknown) {
92
+ traceExporterCtor(options);
63
93
  },
64
94
  }));
65
95
 
66
96
  vi.mock("@opentelemetry/exporter-logs-otlp-proto", () => ({
67
- OTLPLogExporter: class {},
97
+ OTLPLogExporter: function OTLPLogExporter(options?: unknown) {
98
+ logExporterCtor(options);
99
+ },
68
100
  }));
69
101
 
70
102
  vi.mock("@opentelemetry/sdk-logs", () => ({
71
- BatchLogRecordProcessor: class {},
103
+ BatchLogRecordProcessor: function BatchLogRecordProcessor() {},
72
104
  LoggerProvider: class {
73
105
  getLogger = vi.fn(() => ({
74
106
  emit: logEmit,
@@ -78,19 +110,18 @@ vi.mock("@opentelemetry/sdk-logs", () => ({
78
110
  }));
79
111
 
80
112
  vi.mock("@opentelemetry/sdk-metrics", () => ({
81
- PeriodicExportingMetricReader: class {},
113
+ PeriodicExportingMetricReader: function PeriodicExportingMetricReader() {},
82
114
  }));
83
115
 
84
116
  vi.mock("@opentelemetry/sdk-trace-base", () => ({
85
- ParentBasedSampler: class {},
86
- TraceIdRatioBasedSampler: class {},
117
+ ParentBasedSampler: function ParentBasedSampler() {},
118
+ TraceIdRatioBasedSampler: function TraceIdRatioBasedSampler() {},
87
119
  }));
88
120
 
89
121
  vi.mock("@opentelemetry/resources", () => ({
90
122
  resourceFromAttributes: vi.fn((attrs: Record<string, unknown>) => attrs),
91
- Resource: class {
92
- // eslint-disable-next-line @typescript-eslint/no-useless-constructor
93
- constructor(_value?: unknown) {}
123
+ Resource: function Resource(_value?: unknown) {
124
+ // Constructor shape required by the mocked OpenTelemetry API.
94
125
  },
95
126
  }));
96
127
 
@@ -98,23 +129,32 @@ vi.mock("@opentelemetry/semantic-conventions", () => ({
98
129
  ATTR_SERVICE_NAME: "service.name",
99
130
  }));
100
131
 
101
- vi.mock("openclaw/plugin-sdk/diagnostics-otel", async () => {
102
- const actual = await vi.importActual<typeof import("openclaw/plugin-sdk/diagnostics-otel")>(
103
- "openclaw/plugin-sdk/diagnostics-otel",
104
- );
105
- return {
106
- ...actual,
107
- registerLogTransport: registerLogTransportMock,
108
- };
109
- });
110
-
111
- import type { OpenClawPluginServiceContext } from "openclaw/plugin-sdk/diagnostics-otel";
112
- import { emitDiagnosticEvent } from "openclaw/plugin-sdk/diagnostics-otel";
132
+ import {
133
+ emitTrustedDiagnosticEvent,
134
+ onInternalDiagnosticEvent,
135
+ resetDiagnosticEventsForTest,
136
+ } from "openclaw/plugin-sdk/diagnostic-runtime";
137
+ import type { OpenClawPluginServiceContext } from "../api.js";
138
+ import { emitDiagnosticEvent } from "../api.js";
113
139
  import { createDiagnosticsOtelService } from "./service.js";
114
140
 
115
141
  const OTEL_TEST_STATE_DIR = "/tmp/openclaw-diagnostics-otel-test";
116
142
  const OTEL_TEST_ENDPOINT = "http://otel-collector:4318";
117
143
  const OTEL_TEST_PROTOCOL = "http/protobuf";
144
+ const TRACE_ID = "4bf92f3577b34da6a3ce929d0e0e4736";
145
+ const SPAN_ID = "00f067aa0ba902b7";
146
+ const CHILD_SPAN_ID = "1111111111111111";
147
+ const GRANDCHILD_SPAN_ID = "2222222222222222";
148
+ const TOOL_SPAN_ID = "3333333333333333";
149
+ const PROTO_KEY = "__proto__";
150
+ const MAX_TEST_OTEL_CONTENT_ATTRIBUTE_CHARS = 4096;
151
+ const OTEL_TRUNCATED_SUFFIX_MAX_CHARS = 20;
152
+ const ORIGINAL_OPENCLAW_OTEL_PRELOADED = process.env.OPENCLAW_OTEL_PRELOADED;
153
+ const ORIGINAL_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT;
154
+ const ORIGINAL_OTEL_EXPORTER_OTLP_METRICS_ENDPOINT =
155
+ process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT;
156
+ const ORIGINAL_OTEL_EXPORTER_OTLP_LOGS_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT;
157
+ const ORIGINAL_OTEL_SEMCONV_STABILITY_OPT_IN = process.env.OTEL_SEMCONV_STABILITY_OPT_IN;
118
158
 
119
159
  function createLogger() {
120
160
  return {
@@ -129,10 +169,13 @@ type OtelContextFlags = {
129
169
  traces?: boolean;
130
170
  metrics?: boolean;
131
171
  logs?: boolean;
172
+ captureContent?: NonNullable<
173
+ NonNullable<OpenClawPluginServiceContext["config"]["diagnostics"]>["otel"]
174
+ >["captureContent"];
132
175
  };
133
176
  function createOtelContext(
134
177
  endpoint: string,
135
- { traces = false, metrics = false, logs = false }: OtelContextFlags = {},
178
+ { traces = false, metrics = false, logs = false, captureContent }: OtelContextFlags = {},
136
179
  ): OpenClawPluginServiceContext {
137
180
  return {
138
181
  config: {
@@ -145,11 +188,16 @@ function createOtelContext(
145
188
  traces,
146
189
  metrics,
147
190
  logs,
191
+ ...(captureContent !== undefined ? { captureContent } : {}),
148
192
  },
149
193
  },
150
194
  },
151
195
  logger: createLogger(),
152
196
  stateDir: OTEL_TEST_STATE_DIR,
197
+ internalDiagnostics: {
198
+ emit: emitTrustedDiagnosticEvent,
199
+ onEvent: onInternalDiagnosticEvent,
200
+ },
153
201
  };
154
202
  }
155
203
 
@@ -157,48 +205,84 @@ function createTraceOnlyContext(endpoint: string): OpenClawPluginServiceContext
157
205
  return createOtelContext(endpoint, { traces: true });
158
206
  }
159
207
 
160
- type RegisteredLogTransport = (logObj: Record<string, unknown>) => void;
161
- function setupRegisteredTransports() {
162
- const registeredTransports: RegisteredLogTransport[] = [];
163
- const stopTransport = vi.fn();
164
- registerLogTransportMock.mockImplementation((transport) => {
165
- registeredTransports.push(transport);
166
- return stopTransport;
167
- });
168
- return { registeredTransports, stopTransport };
169
- }
170
-
171
- async function emitAndCaptureLog(logObj: Record<string, unknown>) {
172
- const { registeredTransports } = setupRegisteredTransports();
208
+ async function emitAndCaptureLog(
209
+ event: Omit<Extract<Parameters<typeof emitDiagnosticEvent>[0], { type: "log.record" }>, "type">,
210
+ options: { trusted?: boolean } = {},
211
+ ) {
173
212
  const service = createDiagnosticsOtelService();
174
213
  const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true });
175
214
  await service.start(ctx);
176
- expect(registeredTransports).toHaveLength(1);
177
- registeredTransports[0]?.(logObj);
215
+ const emit = options.trusted ? emitTrustedDiagnosticEvent : emitDiagnosticEvent;
216
+ emit({
217
+ type: "log.record",
218
+ ...event,
219
+ });
220
+ await flushDiagnosticEvents();
178
221
  expect(logEmit).toHaveBeenCalled();
179
222
  const emitCall = logEmit.mock.calls[0]?.[0];
180
223
  await service.stop?.(ctx);
181
224
  return emitCall;
182
225
  }
183
226
 
227
+ function flushDiagnosticEvents() {
228
+ return new Promise<void>((resolve) => setImmediate(resolve));
229
+ }
230
+
184
231
  describe("diagnostics-otel service", () => {
185
232
  beforeEach(() => {
233
+ resetDiagnosticEventsForTest();
234
+ delete process.env.OPENCLAW_OTEL_PRELOADED;
235
+ delete process.env.OTEL_SEMCONV_STABILITY_OPT_IN;
186
236
  telemetryState.counters.clear();
187
237
  telemetryState.histograms.clear();
238
+ telemetryState.spans.length = 0;
188
239
  telemetryState.tracer.startSpan.mockClear();
240
+ telemetryState.tracer.setSpanContext.mockClear();
189
241
  telemetryState.meter.createCounter.mockClear();
190
242
  telemetryState.meter.createHistogram.mockClear();
191
243
  sdkStart.mockClear();
192
244
  sdkShutdown.mockClear();
193
- logEmit.mockClear();
245
+ logEmit.mockReset();
194
246
  logShutdown.mockClear();
195
247
  traceExporterCtor.mockClear();
196
- registerLogTransportMock.mockReset();
248
+ metricExporterCtor.mockClear();
249
+ logExporterCtor.mockClear();
250
+ delete process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT;
251
+ delete process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT;
252
+ delete process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT;
197
253
  });
198
254
 
199
- test("records message-flow metrics and spans", async () => {
200
- const { registeredTransports } = setupRegisteredTransports();
255
+ afterEach(() => {
256
+ resetDiagnosticEventsForTest();
257
+ if (ORIGINAL_OPENCLAW_OTEL_PRELOADED === undefined) {
258
+ delete process.env.OPENCLAW_OTEL_PRELOADED;
259
+ } else {
260
+ process.env.OPENCLAW_OTEL_PRELOADED = ORIGINAL_OPENCLAW_OTEL_PRELOADED;
261
+ }
262
+ if (ORIGINAL_OTEL_SEMCONV_STABILITY_OPT_IN === undefined) {
263
+ delete process.env.OTEL_SEMCONV_STABILITY_OPT_IN;
264
+ } else {
265
+ process.env.OTEL_SEMCONV_STABILITY_OPT_IN = ORIGINAL_OTEL_SEMCONV_STABILITY_OPT_IN;
266
+ }
267
+ if (ORIGINAL_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT === undefined) {
268
+ delete process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT;
269
+ } else {
270
+ process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = ORIGINAL_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT;
271
+ }
272
+ if (ORIGINAL_OTEL_EXPORTER_OTLP_METRICS_ENDPOINT === undefined) {
273
+ delete process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT;
274
+ } else {
275
+ process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT =
276
+ ORIGINAL_OTEL_EXPORTER_OTLP_METRICS_ENDPOINT;
277
+ }
278
+ if (ORIGINAL_OTEL_EXPORTER_OTLP_LOGS_ENDPOINT === undefined) {
279
+ delete process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT;
280
+ } else {
281
+ process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT = ORIGINAL_OTEL_EXPORTER_OTLP_LOGS_ENDPOINT;
282
+ }
283
+ });
201
284
 
285
+ test("records message-flow metrics and spans", async () => {
202
286
  const service = createDiagnosticsOtelService();
203
287
  const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
204
288
  await service.start(ctx);
@@ -236,6 +320,7 @@ describe("diagnostics-otel service", () => {
236
320
  type: "session.stuck",
237
321
  state: "processing",
238
322
  ageMs: 125_000,
323
+ classification: "stale_session_state",
239
324
  });
240
325
  emitDiagnosticEvent({
241
326
  type: "run.attempt",
@@ -264,16 +349,325 @@ describe("diagnostics-otel service", () => {
264
349
  expect(spanNames).toContain("openclaw.message.processed");
265
350
  expect(spanNames).toContain("openclaw.session.stuck");
266
351
 
267
- expect(registerLogTransportMock).toHaveBeenCalledTimes(1);
268
- expect(registeredTransports).toHaveLength(1);
269
- registeredTransports[0]?.({
270
- 0: '{"subsystem":"diagnostic"}',
271
- 1: "hello",
272
- _meta: { logLevelName: "INFO", date: new Date() },
352
+ emitDiagnosticEvent({
353
+ type: "log.record",
354
+ level: "INFO",
355
+ message: "hello",
356
+ attributes: { subsystem: "diagnostic" },
357
+ });
358
+ await flushDiagnosticEvents();
359
+ expect(logEmit).toHaveBeenCalled();
360
+
361
+ await service.stop?.(ctx);
362
+ });
363
+
364
+ test("restarts without retaining prior listeners or log transports", async () => {
365
+ const service = createDiagnosticsOtelService();
366
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
367
+ await service.start(ctx);
368
+ await service.start(ctx);
369
+
370
+ expect(logShutdown).toHaveBeenCalledTimes(1);
371
+ expect(sdkShutdown).toHaveBeenCalledTimes(1);
372
+
373
+ telemetryState.tracer.startSpan.mockClear();
374
+ emitDiagnosticEvent({
375
+ type: "message.processed",
376
+ channel: "telegram",
377
+ outcome: "completed",
378
+ durationMs: 10,
379
+ });
380
+ expect(telemetryState.tracer.startSpan).toHaveBeenCalledTimes(1);
381
+
382
+ await service.stop?.(ctx);
383
+ expect(logShutdown).toHaveBeenCalledTimes(2);
384
+ expect(sdkShutdown).toHaveBeenCalledTimes(2);
385
+
386
+ telemetryState.tracer.startSpan.mockClear();
387
+ emitDiagnosticEvent({
388
+ type: "message.processed",
389
+ channel: "telegram",
390
+ outcome: "completed",
391
+ durationMs: 10,
392
+ });
393
+ expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
394
+ });
395
+
396
+ test("uses a preloaded OpenTelemetry SDK without dropping diagnostic listeners", async () => {
397
+ process.env.OPENCLAW_OTEL_PRELOADED = "1";
398
+ const service = createDiagnosticsOtelService();
399
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
400
+ await service.start(ctx);
401
+
402
+ expect(sdkStart).not.toHaveBeenCalled();
403
+ expect(traceExporterCtor).not.toHaveBeenCalled();
404
+ expect(ctx.logger.info).toHaveBeenCalledWith(
405
+ "diagnostics-otel: using preloaded OpenTelemetry SDK",
406
+ );
407
+
408
+ emitDiagnosticEvent({
409
+ type: "run.completed",
410
+ runId: "run-1",
411
+ provider: "openai",
412
+ model: "gpt-5.4",
413
+ outcome: "completed",
414
+ durationMs: 100,
415
+ });
416
+ emitDiagnosticEvent({
417
+ type: "log.record",
418
+ level: "INFO",
419
+ message: "preloaded log",
273
420
  });
421
+ await flushDiagnosticEvents();
422
+
423
+ expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
424
+ 100,
425
+ expect.objectContaining({
426
+ "openclaw.provider": "openai",
427
+ "openclaw.model": "gpt-5.4",
428
+ }),
429
+ );
430
+ expect(telemetryState.tracer.startSpan).toHaveBeenCalledWith(
431
+ "openclaw.run",
432
+ expect.objectContaining({
433
+ attributes: expect.objectContaining({
434
+ "openclaw.outcome": "completed",
435
+ }),
436
+ }),
437
+ undefined,
438
+ );
274
439
  expect(logEmit).toHaveBeenCalled();
275
440
 
276
441
  await service.stop?.(ctx);
442
+ expect(sdkShutdown).not.toHaveBeenCalled();
443
+ expect(logShutdown).toHaveBeenCalledTimes(1);
444
+ });
445
+
446
+ test("emits and records bounded telemetry exporter health events", async () => {
447
+ const events: Array<Parameters<Parameters<typeof onInternalDiagnosticEvent>[0]>[0]> = [];
448
+ const unsubscribe = onInternalDiagnosticEvent((event) => {
449
+ if (event.type === "telemetry.exporter") {
450
+ events.push(event);
451
+ }
452
+ });
453
+ const service = createDiagnosticsOtelService();
454
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true, logs: true });
455
+
456
+ await service.start(ctx);
457
+
458
+ expect(events).toEqual(
459
+ expect.arrayContaining([
460
+ expect.objectContaining({
461
+ type: "telemetry.exporter",
462
+ exporter: "diagnostics-otel",
463
+ signal: "traces",
464
+ status: "started",
465
+ reason: "configured",
466
+ }),
467
+ expect.objectContaining({
468
+ type: "telemetry.exporter",
469
+ exporter: "diagnostics-otel",
470
+ signal: "metrics",
471
+ status: "started",
472
+ reason: "configured",
473
+ }),
474
+ expect.objectContaining({
475
+ type: "telemetry.exporter",
476
+ exporter: "diagnostics-otel",
477
+ signal: "logs",
478
+ status: "started",
479
+ reason: "configured",
480
+ }),
481
+ ]),
482
+ );
483
+ expect(
484
+ telemetryState.counters.get("openclaw.telemetry.exporter.events")?.add,
485
+ ).toHaveBeenCalledWith(1, {
486
+ "openclaw.exporter": "diagnostics-otel",
487
+ "openclaw.signal": "logs",
488
+ "openclaw.status": "started",
489
+ "openclaw.reason": "configured",
490
+ });
491
+
492
+ unsubscribe();
493
+ await service.stop?.(ctx);
494
+ });
495
+
496
+ test("records liveness warning diagnostics", async () => {
497
+ const service = createDiagnosticsOtelService();
498
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
499
+
500
+ await service.start(ctx);
501
+ emitDiagnosticEvent({
502
+ type: "diagnostic.liveness.warning",
503
+ reasons: ["event_loop_delay", "cpu"],
504
+ intervalMs: 30_000,
505
+ eventLoopDelayP99Ms: 250,
506
+ eventLoopDelayMaxMs: 900,
507
+ eventLoopUtilization: 0.95,
508
+ cpuUserMs: 1200,
509
+ cpuSystemMs: 300,
510
+ cpuTotalMs: 1500,
511
+ cpuCoreRatio: 1.4,
512
+ active: 2,
513
+ waiting: 1,
514
+ queued: 4,
515
+ });
516
+ await flushDiagnosticEvents();
517
+
518
+ expect(telemetryState.counters.get("openclaw.liveness.warning")?.add).toHaveBeenCalledWith(1, {
519
+ "openclaw.liveness.reason": "event_loop_delay:cpu",
520
+ });
521
+ expect(
522
+ telemetryState.histograms.get("openclaw.liveness.event_loop_delay_p99_ms")?.record,
523
+ ).toHaveBeenCalledWith(250, {
524
+ "openclaw.liveness.reason": "event_loop_delay:cpu",
525
+ });
526
+ expect(
527
+ telemetryState.histograms.get("openclaw.liveness.cpu_core_ratio")?.record,
528
+ ).toHaveBeenCalledWith(1.4, {
529
+ "openclaw.liveness.reason": "event_loop_delay:cpu",
530
+ });
531
+ const livenessSpan = telemetryState.tracer.startSpan.mock.calls.find(
532
+ (call) => call[0] === "openclaw.liveness.warning",
533
+ );
534
+ expect(livenessSpan?.[1]).toMatchObject({
535
+ attributes: {
536
+ "openclaw.liveness.reason": "event_loop_delay:cpu",
537
+ "openclaw.liveness.active": 2,
538
+ "openclaw.liveness.queued": 4,
539
+ },
540
+ });
541
+ const span = telemetryState.spans.find((item) => item.name === "openclaw.liveness.warning");
542
+ expect(span?.setStatus).toHaveBeenCalledWith({
543
+ code: 2,
544
+ message: "event_loop_delay:cpu",
545
+ });
546
+
547
+ await service.stop?.(ctx);
548
+ });
549
+
550
+ test("reports log exporter emit failures without exporting raw error text", async () => {
551
+ const events: Array<Parameters<Parameters<typeof onInternalDiagnosticEvent>[0]>[0]> = [];
552
+ const unsubscribe = onInternalDiagnosticEvent((event) => {
553
+ if (event.type === "telemetry.exporter") {
554
+ events.push(event);
555
+ }
556
+ });
557
+ const service = createDiagnosticsOtelService();
558
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true });
559
+ logEmit.mockImplementationOnce(() => {
560
+ throw new TypeError("token sk-test-secret should not leave as telemetry");
561
+ });
562
+
563
+ await service.start(ctx);
564
+ emitDiagnosticEvent({
565
+ type: "log.record",
566
+ level: "INFO",
567
+ message: "export me",
568
+ });
569
+ await flushDiagnosticEvents();
570
+
571
+ expect(events).toEqual(
572
+ expect.arrayContaining([
573
+ expect.objectContaining({
574
+ type: "telemetry.exporter",
575
+ exporter: "diagnostics-otel",
576
+ signal: "logs",
577
+ status: "failure",
578
+ reason: "emit_failed",
579
+ errorCategory: "TypeError",
580
+ }),
581
+ ]),
582
+ );
583
+ expect(
584
+ telemetryState.counters.get("openclaw.telemetry.exporter.events")?.add,
585
+ ).toHaveBeenCalledWith(1, {
586
+ "openclaw.exporter": "diagnostics-otel",
587
+ "openclaw.signal": "logs",
588
+ "openclaw.status": "failure",
589
+ "openclaw.reason": "emit_failed",
590
+ "openclaw.errorCategory": "TypeError",
591
+ });
592
+
593
+ unsubscribe();
594
+ await service.stop?.(ctx);
595
+ });
596
+
597
+ test("ignores untrusted telemetry exporter events for OTEL metrics", async () => {
598
+ const service = createDiagnosticsOtelService();
599
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
600
+
601
+ await service.start(ctx);
602
+ telemetryState.counters.get("openclaw.telemetry.exporter.events")?.add.mockClear();
603
+ emitDiagnosticEvent({
604
+ type: "telemetry.exporter",
605
+ exporter: "spoofed-plugin-exporter",
606
+ signal: "metrics",
607
+ status: "failure",
608
+ reason: "emit_failed",
609
+ });
610
+
611
+ expect(
612
+ telemetryState.counters.get("openclaw.telemetry.exporter.events")?.add,
613
+ ).not.toHaveBeenCalled();
614
+
615
+ await service.stop?.(ctx);
616
+ });
617
+
618
+ test("honors disabled traces when an OpenTelemetry SDK is preloaded", async () => {
619
+ process.env.OPENCLAW_OTEL_PRELOADED = "1";
620
+ const service = createDiagnosticsOtelService();
621
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: false, metrics: true });
622
+ await service.start(ctx);
623
+
624
+ emitDiagnosticEvent({
625
+ type: "run.completed",
626
+ runId: "run-1",
627
+ provider: "openai",
628
+ model: "gpt-5.4",
629
+ outcome: "completed",
630
+ durationMs: 100,
631
+ });
632
+ await flushDiagnosticEvents();
633
+
634
+ expect(sdkStart).not.toHaveBeenCalled();
635
+ expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
636
+ 100,
637
+ expect.objectContaining({
638
+ "openclaw.provider": "openai",
639
+ }),
640
+ );
641
+ expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
642
+
643
+ await service.stop?.(ctx);
644
+ expect(sdkShutdown).not.toHaveBeenCalled();
645
+ });
646
+
647
+ test("tears down active handles when restarted with diagnostics disabled", async () => {
648
+ const service = createDiagnosticsOtelService();
649
+ const enabledCtx = createOtelContext(OTEL_TEST_ENDPOINT, {
650
+ traces: true,
651
+ metrics: true,
652
+ logs: true,
653
+ });
654
+ await service.start(enabledCtx);
655
+ await service.start({
656
+ ...enabledCtx,
657
+ config: { diagnostics: { enabled: false } },
658
+ });
659
+
660
+ expect(logShutdown).toHaveBeenCalledTimes(1);
661
+ expect(sdkShutdown).toHaveBeenCalledTimes(1);
662
+
663
+ telemetryState.tracer.startSpan.mockClear();
664
+ emitDiagnosticEvent({
665
+ type: "message.processed",
666
+ channel: "telegram",
667
+ outcome: "completed",
668
+ durationMs: 10,
669
+ });
670
+ expect(telemetryState.tracer.startSpan).not.toHaveBeenCalled();
277
671
  });
278
672
 
279
673
  test("appends signal path when endpoint contains non-signal /v1 segment", async () => {
@@ -316,10 +710,54 @@ describe("diagnostics-otel service", () => {
316
710
  await service.stop?.(ctx);
317
711
  });
318
712
 
713
+ test("uses signal-specific OTLP endpoints ahead of the shared endpoint", async () => {
714
+ const service = createDiagnosticsOtelService();
715
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, {
716
+ traces: true,
717
+ metrics: true,
718
+ logs: true,
719
+ });
720
+ ctx.config.diagnostics!.otel!.tracesEndpoint = "https://trace.example.com/otlp";
721
+ ctx.config.diagnostics!.otel!.metricsEndpoint = "https://metric.example.com/v1/metrics";
722
+ ctx.config.diagnostics!.otel!.logsEndpoint = "https://log.example.com/otlp";
723
+
724
+ await service.start(ctx);
725
+
726
+ const traceOptions = traceExporterCtor.mock.calls[0]?.[0] as { url?: string } | undefined;
727
+ const metricOptions = metricExporterCtor.mock.calls[0]?.[0] as { url?: string } | undefined;
728
+ const logOptions = logExporterCtor.mock.calls[0]?.[0] as { url?: string } | undefined;
729
+ expect(traceOptions?.url).toBe("https://trace.example.com/otlp/v1/traces");
730
+ expect(metricOptions?.url).toBe("https://metric.example.com/v1/metrics");
731
+ expect(logOptions?.url).toBe("https://log.example.com/otlp/v1/logs");
732
+ await service.stop?.(ctx);
733
+ });
734
+
735
+ test("uses signal-specific OTLP env endpoints when config is unset", async () => {
736
+ process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = "https://trace-env.example.com/v1/traces";
737
+ process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT = "https://metric-env.example.com/otlp";
738
+ process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT = "https://log-env.example.com/otlp";
739
+
740
+ const service = createDiagnosticsOtelService();
741
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, {
742
+ traces: true,
743
+ metrics: true,
744
+ logs: true,
745
+ });
746
+ await service.start(ctx);
747
+
748
+ const traceOptions = traceExporterCtor.mock.calls[0]?.[0] as { url?: string } | undefined;
749
+ const metricOptions = metricExporterCtor.mock.calls[0]?.[0] as { url?: string } | undefined;
750
+ const logOptions = logExporterCtor.mock.calls[0]?.[0] as { url?: string } | undefined;
751
+ expect(traceOptions?.url).toBe("https://trace-env.example.com/v1/traces");
752
+ expect(metricOptions?.url).toBe("https://metric-env.example.com/otlp/v1/metrics");
753
+ expect(logOptions?.url).toBe("https://log-env.example.com/otlp/v1/logs");
754
+ await service.stop?.(ctx);
755
+ });
756
+
319
757
  test("redacts sensitive data from log messages before export", async () => {
320
758
  const emitCall = await emitAndCaptureLog({
321
- 0: "Using API key sk-1234567890abcdef1234567890abcdef",
322
- _meta: { logLevelName: "INFO", date: new Date() },
759
+ level: "INFO",
760
+ message: "Using API key sk-1234567890abcdef1234567890abcdef",
323
761
  });
324
762
 
325
763
  expect(emitCall?.body).not.toContain("sk-1234567890abcdef1234567890abcdef");
@@ -329,9 +767,11 @@ describe("diagnostics-otel service", () => {
329
767
 
330
768
  test("redacts sensitive data from log attributes before export", async () => {
331
769
  const emitCall = await emitAndCaptureLog({
332
- 0: '{"token":"ghp_abcdefghijklmnopqrstuvwxyz123456"}', // pragma: allowlist secret
333
- 1: "auth configured",
334
- _meta: { logLevelName: "DEBUG", date: new Date() },
770
+ level: "DEBUG",
771
+ message: "auth configured",
772
+ attributes: {
773
+ token: "ghp_abcdefghijklmnopqrstuvwxyz123456", // pragma: allowlist secret
774
+ },
335
775
  });
336
776
 
337
777
  const tokenAttr = emitCall?.attributes?.["openclaw.token"];
@@ -341,6 +781,1774 @@ describe("diagnostics-otel service", () => {
341
781
  }
342
782
  });
343
783
 
784
+ test("does not attach untrusted diagnostic trace context to exported logs", async () => {
785
+ const emitCall = await emitAndCaptureLog({
786
+ level: "INFO",
787
+ message: "traceable log",
788
+ attributes: {
789
+ subsystem: "diagnostic",
790
+ },
791
+ trace: {
792
+ traceId: TRACE_ID,
793
+ spanId: SPAN_ID,
794
+ traceFlags: "01",
795
+ },
796
+ });
797
+
798
+ expect(emitCall?.attributes).toEqual(
799
+ expect.not.objectContaining({
800
+ "openclaw.traceId": expect.anything(),
801
+ "openclaw.spanId": expect.anything(),
802
+ "openclaw.traceFlags": expect.anything(),
803
+ }),
804
+ );
805
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
806
+ expect(emitCall?.context).toBeUndefined();
807
+ });
808
+
809
+ test("attaches trusted diagnostic trace context to exported logs", async () => {
810
+ const emitCall = await emitAndCaptureLog(
811
+ {
812
+ level: "INFO",
813
+ message: "traceable log",
814
+ trace: {
815
+ traceId: TRACE_ID,
816
+ spanId: SPAN_ID,
817
+ traceFlags: "01",
818
+ },
819
+ },
820
+ { trusted: true },
821
+ );
822
+
823
+ expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
824
+ expect.anything(),
825
+ expect.objectContaining({
826
+ traceId: TRACE_ID,
827
+ spanId: SPAN_ID,
828
+ traceFlags: 1,
829
+ isRemote: true,
830
+ }),
831
+ );
832
+ expect(emitCall?.context).toEqual({
833
+ spanContext: expect.objectContaining({
834
+ traceId: TRACE_ID,
835
+ spanId: SPAN_ID,
836
+ }),
837
+ });
838
+ });
839
+
840
+ test("bounds plugin-emitted log attributes and omits source paths", async () => {
841
+ const service = createDiagnosticsOtelService();
842
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true });
843
+ await service.start(ctx);
844
+
845
+ const attributes = Object.create(null) as Record<string, string>;
846
+ attributes.good = "y".repeat(6000);
847
+ attributes["bad key"] = "drop-me";
848
+ attributes[PROTO_KEY] = "pollute";
849
+ attributes["constructor"] = "pollute";
850
+ attributes["prototype"] = "pollute";
851
+ attributes["sk-1234567890abcdef1234567890abcdef"] = "secret-key"; // pragma: allowlist secret
852
+
853
+ emitDiagnosticEvent({
854
+ type: "log.record",
855
+ level: "INFO",
856
+ message: "x".repeat(6000),
857
+ attributes,
858
+ code: {
859
+ filepath: "/Users/alice/openclaw/src/private.ts",
860
+ line: 42,
861
+ functionName: "handler",
862
+ location: "/Users/alice/openclaw/src/private.ts:42",
863
+ },
864
+ } as Parameters<typeof emitDiagnosticEvent>[0]);
865
+ await flushDiagnosticEvents();
866
+
867
+ const emitCall = logEmit.mock.calls[0]?.[0];
868
+ expect(emitCall?.body.length).toBeLessThanOrEqual(4200);
869
+ expect(emitCall?.attributes).toMatchObject({
870
+ "openclaw.good": expect.stringMatching(/^y+/),
871
+ "code.lineno": 42,
872
+ "code.function": "handler",
873
+ });
874
+ expect(String(emitCall?.attributes?.["openclaw.good"]).length).toBeLessThanOrEqual(4200);
875
+ expect(Object.hasOwn(emitCall?.attributes ?? {}, `openclaw.${PROTO_KEY}`)).toBe(false);
876
+ expect(Object.hasOwn(emitCall?.attributes ?? {}, "openclaw.constructor")).toBe(false);
877
+ expect(Object.hasOwn(emitCall?.attributes ?? {}, "openclaw.prototype")).toBe(false);
878
+ expect(
879
+ Object.hasOwn(
880
+ emitCall?.attributes ?? {},
881
+ "openclaw.sk-1234567890abcdef1234567890abcdef", // pragma: allowlist secret
882
+ ),
883
+ ).toBe(false);
884
+ expect(emitCall?.attributes).toEqual(
885
+ expect.not.objectContaining({
886
+ "openclaw.bad key": expect.anything(),
887
+ "code.filepath": expect.anything(),
888
+ "openclaw.code.location": expect.anything(),
889
+ }),
890
+ );
891
+ await service.stop?.(ctx);
892
+ });
893
+
894
+ test("rate-limits repeated log export failure reports", async () => {
895
+ const service = createDiagnosticsOtelService();
896
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { logs: true });
897
+ const nowSpy = vi.spyOn(Date, "now").mockReturnValue(1_000);
898
+ logEmit.mockImplementation(() => {
899
+ throw new Error("export failed");
900
+ });
901
+ try {
902
+ await service.start(ctx);
903
+
904
+ emitDiagnosticEvent({
905
+ type: "log.record",
906
+ level: "ERROR",
907
+ message: "first failing log",
908
+ });
909
+ emitDiagnosticEvent({
910
+ type: "log.record",
911
+ level: "ERROR",
912
+ message: "second failing log",
913
+ });
914
+ await flushDiagnosticEvents();
915
+
916
+ expect(ctx.logger.error).toHaveBeenCalledTimes(1);
917
+
918
+ nowSpy.mockReturnValue(62_000);
919
+ emitDiagnosticEvent({
920
+ type: "log.record",
921
+ level: "ERROR",
922
+ message: "third failing log",
923
+ });
924
+ await flushDiagnosticEvents();
925
+
926
+ expect(ctx.logger.error).toHaveBeenCalledTimes(2);
927
+ } finally {
928
+ nowSpy.mockRestore();
929
+ await service.stop?.(ctx);
930
+ }
931
+ });
932
+
933
+ test("does not parent diagnostic event spans from plugin-emittable trace context", async () => {
934
+ const service = createDiagnosticsOtelService();
935
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
936
+ await service.start(ctx);
937
+
938
+ emitDiagnosticEvent({
939
+ type: "model.usage",
940
+ trace: {
941
+ traceId: TRACE_ID,
942
+ spanId: SPAN_ID,
943
+ traceFlags: "01",
944
+ },
945
+ provider: "openai",
946
+ model: "gpt-5.4",
947
+ usage: { total: 4 },
948
+ durationMs: 12,
949
+ });
950
+
951
+ const modelUsageCall = telemetryState.tracer.startSpan.mock.calls.find(
952
+ (call) => call[0] === "openclaw.model.usage",
953
+ );
954
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
955
+ expect(modelUsageCall?.[2]).toBeUndefined();
956
+ await service.stop?.(ctx);
957
+ });
958
+
959
+ test("exports GenAI client token usage histogram for input and output only", async () => {
960
+ const service = createDiagnosticsOtelService();
961
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
962
+ await service.start(ctx);
963
+
964
+ emitDiagnosticEvent({
965
+ type: "model.usage",
966
+ sessionKey: "session-key",
967
+ channel: "webchat",
968
+ agentId: "ops",
969
+ provider: "openai",
970
+ model: "gpt-5.4",
971
+ usage: {
972
+ input: 12,
973
+ output: 7,
974
+ cacheRead: 3,
975
+ cacheWrite: 2,
976
+ promptTokens: 17,
977
+ total: 24,
978
+ },
979
+ });
980
+ await flushDiagnosticEvents();
981
+
982
+ expect(telemetryState.meter.createHistogram).toHaveBeenCalledWith(
983
+ "gen_ai.client.token.usage",
984
+ expect.objectContaining({
985
+ unit: "{token}",
986
+ advice: {
987
+ explicitBucketBoundaries: expect.arrayContaining([1, 4, 16, 1024, 67108864]),
988
+ },
989
+ }),
990
+ );
991
+ const genAiTokenUsage = telemetryState.histograms.get("gen_ai.client.token.usage");
992
+ const tokens = telemetryState.counters.get("openclaw.tokens");
993
+ expect(tokens?.add).toHaveBeenCalledWith(12, {
994
+ "openclaw.channel": "webchat",
995
+ "openclaw.agent": "ops",
996
+ "openclaw.provider": "openai",
997
+ "openclaw.model": "gpt-5.4",
998
+ "openclaw.token": "input",
999
+ });
1000
+ expect(genAiTokenUsage?.record).toHaveBeenCalledTimes(2);
1001
+ expect(genAiTokenUsage?.record).toHaveBeenCalledWith(12, {
1002
+ "gen_ai.operation.name": "chat",
1003
+ "gen_ai.provider.name": "openai",
1004
+ "gen_ai.request.model": "gpt-5.4",
1005
+ "gen_ai.token.type": "input",
1006
+ });
1007
+ expect(genAiTokenUsage?.record).toHaveBeenCalledWith(7, {
1008
+ "gen_ai.operation.name": "chat",
1009
+ "gen_ai.provider.name": "openai",
1010
+ "gen_ai.request.model": "gpt-5.4",
1011
+ "gen_ai.token.type": "output",
1012
+ });
1013
+ expect(JSON.stringify(genAiTokenUsage?.record.mock.calls)).not.toContain("session-key");
1014
+ await service.stop?.(ctx);
1015
+ });
1016
+
1017
+ test("bounds agent identifiers on model usage metric attributes", async () => {
1018
+ const service = createDiagnosticsOtelService();
1019
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
1020
+ await service.start(ctx);
1021
+
1022
+ emitDiagnosticEvent({
1023
+ type: "model.usage",
1024
+ agentId: "Bearer sk-test-secret-value",
1025
+ provider: "openai",
1026
+ model: "gpt-5.4",
1027
+ usage: { input: 2 },
1028
+ });
1029
+ await flushDiagnosticEvents();
1030
+
1031
+ expect(telemetryState.counters.get("openclaw.tokens")?.add).toHaveBeenCalledWith(2, {
1032
+ "openclaw.channel": "unknown",
1033
+ "openclaw.agent": "unknown",
1034
+ "openclaw.provider": "openai",
1035
+ "openclaw.model": "gpt-5.4",
1036
+ "openclaw.token": "input",
1037
+ });
1038
+ expect(
1039
+ JSON.stringify(telemetryState.counters.get("openclaw.tokens")?.add.mock.calls),
1040
+ ).not.toContain("sk-test-secret-value");
1041
+ await service.stop?.(ctx);
1042
+ });
1043
+
1044
+ test("keeps GenAI token usage metric model attribute present when model is unavailable", async () => {
1045
+ const service = createDiagnosticsOtelService();
1046
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
1047
+ await service.start(ctx);
1048
+
1049
+ emitDiagnosticEvent({
1050
+ type: "model.usage",
1051
+ provider: "openai",
1052
+ usage: { input: 2 },
1053
+ });
1054
+ await flushDiagnosticEvents();
1055
+
1056
+ expect(telemetryState.histograms.get("gen_ai.client.token.usage")?.record).toHaveBeenCalledWith(
1057
+ 2,
1058
+ {
1059
+ "gen_ai.operation.name": "chat",
1060
+ "gen_ai.provider.name": "openai",
1061
+ "gen_ai.request.model": "unknown",
1062
+ "gen_ai.token.type": "input",
1063
+ },
1064
+ );
1065
+ await service.stop?.(ctx);
1066
+ });
1067
+
1068
+ test("exports GenAI usage attributes on model usage spans without diagnostic identifiers", async () => {
1069
+ const service = createDiagnosticsOtelService();
1070
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true });
1071
+ await service.start(ctx);
1072
+
1073
+ emitDiagnosticEvent({
1074
+ type: "model.usage",
1075
+ sessionKey: "session-key",
1076
+ sessionId: "session-id",
1077
+ provider: "anthropic",
1078
+ model: "claude-sonnet-4.6",
1079
+ usage: {
1080
+ input: 100,
1081
+ output: 40,
1082
+ cacheRead: 30,
1083
+ cacheWrite: 20,
1084
+ promptTokens: 150,
1085
+ total: 190,
1086
+ },
1087
+ durationMs: 25,
1088
+ });
1089
+ await flushDiagnosticEvents();
1090
+
1091
+ const modelUsageCall = telemetryState.tracer.startSpan.mock.calls.find(
1092
+ (call) => call[0] === "openclaw.model.usage",
1093
+ );
1094
+ expect(modelUsageCall?.[1]).toMatchObject({
1095
+ attributes: {
1096
+ "gen_ai.operation.name": "chat",
1097
+ "gen_ai.system": "anthropic",
1098
+ "gen_ai.request.model": "claude-sonnet-4.6",
1099
+ "gen_ai.usage.input_tokens": 150,
1100
+ "gen_ai.usage.output_tokens": 40,
1101
+ "gen_ai.usage.cache_read.input_tokens": 30,
1102
+ "gen_ai.usage.cache_creation.input_tokens": 20,
1103
+ },
1104
+ });
1105
+ expect(modelUsageCall?.[1]).toEqual({
1106
+ attributes: expect.not.objectContaining({
1107
+ "openclaw.sessionKey": expect.anything(),
1108
+ "openclaw.sessionId": expect.anything(),
1109
+ "gen_ai.provider.name": expect.anything(),
1110
+ "gen_ai.input.messages": expect.anything(),
1111
+ "gen_ai.output.messages": expect.anything(),
1112
+ }),
1113
+ startTime: expect.any(Number),
1114
+ });
1115
+ expect(JSON.stringify(modelUsageCall)).not.toContain("session-key");
1116
+ await service.stop?.(ctx);
1117
+ });
1118
+
1119
+ test("exports GenAI client operation duration histogram without diagnostic identifiers", async () => {
1120
+ const service = createDiagnosticsOtelService();
1121
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });
1122
+ await service.start(ctx);
1123
+
1124
+ emitDiagnosticEvent({
1125
+ type: "model.call.completed",
1126
+ runId: "run-1",
1127
+ callId: "call-1",
1128
+ sessionKey: "session-key",
1129
+ provider: "openai",
1130
+ model: "gpt-5.4",
1131
+ api: "openai-completions",
1132
+ durationMs: 250,
1133
+ });
1134
+ emitDiagnosticEvent({
1135
+ type: "model.call.error",
1136
+ runId: "run-1",
1137
+ callId: "call-2",
1138
+ sessionKey: "session-key",
1139
+ provider: "google",
1140
+ model: "gemini-2.5-flash",
1141
+ api: "google-generative-ai",
1142
+ durationMs: 1250,
1143
+ errorCategory: "TimeoutError",
1144
+ });
1145
+ await flushDiagnosticEvents();
1146
+
1147
+ expect(telemetryState.meter.createHistogram).toHaveBeenCalledWith(
1148
+ "gen_ai.client.operation.duration",
1149
+ expect.objectContaining({
1150
+ unit: "s",
1151
+ advice: {
1152
+ explicitBucketBoundaries: expect.arrayContaining([0.01, 0.32, 2.56, 81.92]),
1153
+ },
1154
+ }),
1155
+ );
1156
+ const genAiOperationDuration = telemetryState.histograms.get(
1157
+ "gen_ai.client.operation.duration",
1158
+ );
1159
+ expect(genAiOperationDuration?.record).toHaveBeenCalledTimes(2);
1160
+ expect(genAiOperationDuration?.record).toHaveBeenCalledWith(0.25, {
1161
+ "gen_ai.operation.name": "text_completion",
1162
+ "gen_ai.provider.name": "openai",
1163
+ "gen_ai.request.model": "gpt-5.4",
1164
+ });
1165
+ expect(genAiOperationDuration?.record).toHaveBeenCalledWith(1.25, {
1166
+ "gen_ai.operation.name": "generate_content",
1167
+ "gen_ai.provider.name": "google",
1168
+ "gen_ai.request.model": "gemini-2.5-flash",
1169
+ "error.type": "TimeoutError",
1170
+ });
1171
+ expect(JSON.stringify(genAiOperationDuration?.record.mock.calls)).not.toContain("session-key");
1172
+ expect(JSON.stringify(genAiOperationDuration?.record.mock.calls)).not.toContain("run-1");
1173
+ await service.stop?.(ctx);
1174
+ });
1175
+
1176
+ test("exports run, model call, and tool execution lifecycle spans", async () => {
1177
+ const service = createDiagnosticsOtelService();
1178
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1179
+ await service.start(ctx);
1180
+
1181
+ emitDiagnosticEvent({
1182
+ type: "run.completed",
1183
+ runId: "run-1",
1184
+ sessionKey: "session-key",
1185
+ provider: "openai",
1186
+ model: "gpt-5.4",
1187
+ channel: "webchat",
1188
+ outcome: "completed",
1189
+ durationMs: 100,
1190
+ trace: {
1191
+ traceId: TRACE_ID,
1192
+ spanId: SPAN_ID,
1193
+ traceFlags: "01",
1194
+ },
1195
+ });
1196
+ emitDiagnosticEvent({
1197
+ type: "model.call.completed",
1198
+ runId: "run-1",
1199
+ callId: "call-1",
1200
+ provider: "openai",
1201
+ model: "gpt-5.4",
1202
+ api: "completions",
1203
+ transport: "http",
1204
+ durationMs: 80,
1205
+ requestPayloadBytes: 1234,
1206
+ responseStreamBytes: 567,
1207
+ timeToFirstByteMs: 45,
1208
+ trace: {
1209
+ traceId: TRACE_ID,
1210
+ spanId: CHILD_SPAN_ID,
1211
+ parentSpanId: SPAN_ID,
1212
+ traceFlags: "01",
1213
+ },
1214
+ });
1215
+ emitDiagnosticEvent({
1216
+ type: "harness.run.completed",
1217
+ runId: "run-1",
1218
+ sessionKey: "session-key",
1219
+ sessionId: "session-1",
1220
+ provider: "codex",
1221
+ model: "gpt-5.4",
1222
+ channel: "qa",
1223
+ harnessId: "codex",
1224
+ pluginId: "codex-plugin",
1225
+ outcome: "completed",
1226
+ durationMs: 90,
1227
+ resultClassification: "reasoning-only",
1228
+ yieldDetected: true,
1229
+ itemLifecycle: { startedCount: 3, completedCount: 2, activeCount: 1 },
1230
+ trace: {
1231
+ traceId: TRACE_ID,
1232
+ spanId: GRANDCHILD_SPAN_ID,
1233
+ parentSpanId: CHILD_SPAN_ID,
1234
+ traceFlags: "01",
1235
+ },
1236
+ });
1237
+ emitDiagnosticEvent({
1238
+ type: "tool.execution.error",
1239
+ runId: "run-1",
1240
+ toolName: "read",
1241
+ toolCallId: "tool-1",
1242
+ paramsSummary: { kind: "object" },
1243
+ durationMs: 20,
1244
+ errorCategory: "TypeError",
1245
+ errorCode: "429",
1246
+ trace: {
1247
+ traceId: TRACE_ID,
1248
+ spanId: GRANDCHILD_SPAN_ID,
1249
+ parentSpanId: CHILD_SPAN_ID,
1250
+ traceFlags: "01",
1251
+ },
1252
+ });
1253
+ await flushDiagnosticEvents();
1254
+
1255
+ const spanNames = telemetryState.tracer.startSpan.mock.calls.map((call) => call[0]);
1256
+ expect(spanNames).toEqual(
1257
+ expect.arrayContaining([
1258
+ "openclaw.run",
1259
+ "openclaw.model.call",
1260
+ "openclaw.harness.run",
1261
+ "openclaw.tool.execution",
1262
+ ]),
1263
+ );
1264
+
1265
+ const runCall = telemetryState.tracer.startSpan.mock.calls.find(
1266
+ (call) => call[0] === "openclaw.run",
1267
+ );
1268
+ expect(runCall?.[1]).toMatchObject({
1269
+ attributes: {
1270
+ "openclaw.outcome": "completed",
1271
+ "openclaw.provider": "openai",
1272
+ "openclaw.model": "gpt-5.4",
1273
+ "openclaw.channel": "webchat",
1274
+ },
1275
+ startTime: expect.any(Number),
1276
+ });
1277
+ expect(runCall?.[1]).toEqual({
1278
+ attributes: expect.not.objectContaining({
1279
+ "gen_ai.system": expect.anything(),
1280
+ "gen_ai.request.model": expect.anything(),
1281
+ "openclaw.runId": expect.anything(),
1282
+ "openclaw.sessionKey": expect.anything(),
1283
+ "openclaw.traceId": expect.anything(),
1284
+ }),
1285
+ startTime: expect.any(Number),
1286
+ });
1287
+
1288
+ const modelCall = telemetryState.tracer.startSpan.mock.calls.find(
1289
+ (call) => call[0] === "openclaw.model.call",
1290
+ );
1291
+ expect(modelCall?.[1]).toMatchObject({
1292
+ attributes: {
1293
+ "gen_ai.system": "openai",
1294
+ "gen_ai.request.model": "gpt-5.4",
1295
+ "gen_ai.operation.name": "text_completion",
1296
+ },
1297
+ });
1298
+ expect(modelCall?.[1]).toEqual({
1299
+ attributes: expect.not.objectContaining({
1300
+ "gen_ai.provider.name": expect.anything(),
1301
+ "openclaw.callId": expect.anything(),
1302
+ "openclaw.runId": expect.anything(),
1303
+ "openclaw.sessionKey": expect.anything(),
1304
+ }),
1305
+ startTime: expect.any(Number),
1306
+ });
1307
+ expect(modelCall?.[2]).toBeUndefined();
1308
+
1309
+ const harnessCall = telemetryState.tracer.startSpan.mock.calls.find(
1310
+ (call) => call[0] === "openclaw.harness.run",
1311
+ );
1312
+ expect(harnessCall?.[1]).toMatchObject({
1313
+ attributes: {
1314
+ "openclaw.harness.id": "codex",
1315
+ "openclaw.harness.plugin": "codex-plugin",
1316
+ "openclaw.outcome": "completed",
1317
+ "openclaw.provider": "codex",
1318
+ "openclaw.model": "gpt-5.4",
1319
+ "openclaw.channel": "qa",
1320
+ "openclaw.harness.result_classification": "reasoning-only",
1321
+ "openclaw.harness.yield_detected": true,
1322
+ "openclaw.harness.items.started": 3,
1323
+ "openclaw.harness.items.completed": 2,
1324
+ "openclaw.harness.items.active": 1,
1325
+ },
1326
+ startTime: expect.any(Number),
1327
+ });
1328
+ expect(harnessCall?.[1]).toEqual({
1329
+ attributes: expect.not.objectContaining({
1330
+ "openclaw.runId": expect.anything(),
1331
+ "openclaw.sessionId": expect.anything(),
1332
+ "openclaw.sessionKey": expect.anything(),
1333
+ "openclaw.traceId": expect.anything(),
1334
+ }),
1335
+ startTime: expect.any(Number),
1336
+ });
1337
+ expect(harnessCall?.[2]).toBeUndefined();
1338
+
1339
+ const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
1340
+ (call) => call[0] === "openclaw.tool.execution",
1341
+ );
1342
+ expect(toolCall?.[1]).toMatchObject({
1343
+ attributes: {
1344
+ "openclaw.toolName": "read",
1345
+ "openclaw.errorCategory": "TypeError",
1346
+ "openclaw.errorCode": "429",
1347
+ "openclaw.tool.params.kind": "object",
1348
+ "gen_ai.tool.name": "read",
1349
+ },
1350
+ });
1351
+ expect(toolCall?.[1]).toEqual({
1352
+ attributes: expect.not.objectContaining({
1353
+ "openclaw.toolCallId": expect.anything(),
1354
+ "openclaw.runId": expect.anything(),
1355
+ "openclaw.sessionKey": expect.anything(),
1356
+ }),
1357
+ startTime: expect.any(Number),
1358
+ });
1359
+ expect(toolCall?.[2]).toBeUndefined();
1360
+
1361
+ expect(
1362
+ telemetryState.histograms.get("openclaw.model_call.duration_ms")?.record,
1363
+ ).toHaveBeenCalledWith(
1364
+ 80,
1365
+ expect.objectContaining({
1366
+ "openclaw.provider": "openai",
1367
+ "openclaw.model": "gpt-5.4",
1368
+ }),
1369
+ );
1370
+ expect(
1371
+ telemetryState.histograms.get("openclaw.model_call.request_bytes")?.record,
1372
+ ).toHaveBeenCalledWith(
1373
+ 1234,
1374
+ expect.objectContaining({
1375
+ "openclaw.provider": "openai",
1376
+ "openclaw.model": "gpt-5.4",
1377
+ }),
1378
+ );
1379
+ expect(
1380
+ telemetryState.histograms.get("openclaw.model_call.response_bytes")?.record,
1381
+ ).toHaveBeenCalledWith(
1382
+ 567,
1383
+ expect.objectContaining({
1384
+ "openclaw.provider": "openai",
1385
+ "openclaw.model": "gpt-5.4",
1386
+ }),
1387
+ );
1388
+ expect(
1389
+ telemetryState.histograms.get("openclaw.model_call.time_to_first_byte_ms")?.record,
1390
+ ).toHaveBeenCalledWith(
1391
+ 45,
1392
+ expect.objectContaining({
1393
+ "openclaw.provider": "openai",
1394
+ "openclaw.model": "gpt-5.4",
1395
+ }),
1396
+ );
1397
+ const modelCallSpan = telemetryState.spans.find((span) => span.name === "openclaw.model.call");
1398
+ expect(modelCallSpan?.setAttributes).toHaveBeenCalledWith(
1399
+ expect.objectContaining({
1400
+ "openclaw.model_call.request_bytes": 1234,
1401
+ "openclaw.model_call.response_bytes": 567,
1402
+ "openclaw.model_call.time_to_first_byte_ms": 45,
1403
+ }),
1404
+ );
1405
+ expect(telemetryState.histograms.get("openclaw.run.duration_ms")?.record).toHaveBeenCalledWith(
1406
+ 100,
1407
+ expect.not.objectContaining({
1408
+ "openclaw.runId": expect.anything(),
1409
+ }),
1410
+ );
1411
+ expect(
1412
+ telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
1413
+ ).toHaveBeenCalledWith(
1414
+ 90,
1415
+ expect.objectContaining({
1416
+ "openclaw.harness.id": "codex",
1417
+ "openclaw.harness.plugin": "codex-plugin",
1418
+ "openclaw.outcome": "completed",
1419
+ }),
1420
+ );
1421
+ expect(
1422
+ telemetryState.histograms.get("openclaw.harness.duration_ms")?.record,
1423
+ ).toHaveBeenCalledWith(
1424
+ 90,
1425
+ expect.not.objectContaining({
1426
+ "openclaw.runId": expect.anything(),
1427
+ "openclaw.sessionKey": expect.anything(),
1428
+ }),
1429
+ );
1430
+ expect(
1431
+ telemetryState.histograms.get("openclaw.tool.execution.duration_ms")?.record,
1432
+ ).toHaveBeenCalledWith(
1433
+ 20,
1434
+ expect.not.objectContaining({
1435
+ "openclaw.errorCode": expect.anything(),
1436
+ "openclaw.runId": expect.anything(),
1437
+ }),
1438
+ );
1439
+
1440
+ const toolSpan = telemetryState.spans.find((span) => span.name === "openclaw.tool.execution");
1441
+ expect(toolSpan?.setStatus).toHaveBeenCalledWith({
1442
+ code: 2,
1443
+ message: "TypeError",
1444
+ });
1445
+ expect(toolSpan?.end).toHaveBeenCalledWith(expect.any(Number));
1446
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
1447
+ await service.stop?.(ctx);
1448
+ });
1449
+
1450
+ test("maps model call APIs to GenAI operation names and error type", async () => {
1451
+ const service = createDiagnosticsOtelService();
1452
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1453
+ await service.start(ctx);
1454
+
1455
+ emitDiagnosticEvent({
1456
+ type: "model.call.completed",
1457
+ runId: "run-1",
1458
+ callId: "call-1",
1459
+ provider: "openai",
1460
+ model: "gpt-5.4",
1461
+ api: "openai-completions",
1462
+ durationMs: 80,
1463
+ });
1464
+ emitDiagnosticEvent({
1465
+ type: "model.call.completed",
1466
+ runId: "run-1",
1467
+ callId: "call-2",
1468
+ provider: "google",
1469
+ model: "gemini-2.5-flash",
1470
+ api: "google-generative-ai",
1471
+ durationMs: 90,
1472
+ });
1473
+ emitDiagnosticEvent({
1474
+ type: "model.call.error",
1475
+ runId: "run-1",
1476
+ callId: "call-3",
1477
+ provider: "openai",
1478
+ model: "gpt-5.4",
1479
+ api: "openai-responses",
1480
+ durationMs: 40,
1481
+ errorCategory: "TimeoutError",
1482
+ });
1483
+ await flushDiagnosticEvents();
1484
+
1485
+ const modelCallAttrs = telemetryState.tracer.startSpan.mock.calls
1486
+ .filter((call) => call[0] === "openclaw.model.call")
1487
+ .map((call) => (call[1] as { attributes?: Record<string, unknown> }).attributes);
1488
+ expect(modelCallAttrs).toEqual([
1489
+ expect.objectContaining({
1490
+ "gen_ai.system": "openai",
1491
+ "gen_ai.request.model": "gpt-5.4",
1492
+ "gen_ai.operation.name": "text_completion",
1493
+ }),
1494
+ expect.objectContaining({
1495
+ "gen_ai.system": "google",
1496
+ "gen_ai.request.model": "gemini-2.5-flash",
1497
+ "gen_ai.operation.name": "generate_content",
1498
+ }),
1499
+ expect.objectContaining({
1500
+ "gen_ai.system": "openai",
1501
+ "gen_ai.request.model": "gpt-5.4",
1502
+ "gen_ai.operation.name": "chat",
1503
+ "error.type": "TimeoutError",
1504
+ }),
1505
+ ]);
1506
+ await service.stop?.(ctx);
1507
+ });
1508
+
1509
+ test("uses latest GenAI provider attribute only when semconv opt-in is set", async () => {
1510
+ process.env.OTEL_SEMCONV_STABILITY_OPT_IN = "http,gen_ai_latest_experimental";
1511
+
1512
+ const service = createDiagnosticsOtelService();
1513
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1514
+ await service.start(ctx);
1515
+
1516
+ emitDiagnosticEvent({
1517
+ type: "model.call.completed",
1518
+ runId: "run-1",
1519
+ callId: "call-1",
1520
+ provider: "openai",
1521
+ model: "gpt-5.4",
1522
+ api: "openai-completions",
1523
+ durationMs: 80,
1524
+ });
1525
+ emitDiagnosticEvent({
1526
+ type: "model.usage",
1527
+ provider: "openai",
1528
+ model: "gpt-5.4",
1529
+ usage: { input: 3, output: 2 },
1530
+ durationMs: 10,
1531
+ });
1532
+ await flushDiagnosticEvents();
1533
+
1534
+ const modelCall = telemetryState.tracer.startSpan.mock.calls.find(
1535
+ (call) => call[0] === "openclaw.model.call",
1536
+ );
1537
+ expect(modelCall?.[1]).toMatchObject({
1538
+ attributes: {
1539
+ "gen_ai.provider.name": "openai",
1540
+ "gen_ai.request.model": "gpt-5.4",
1541
+ "gen_ai.operation.name": "text_completion",
1542
+ },
1543
+ });
1544
+ expect(modelCall?.[1]).toEqual({
1545
+ attributes: expect.not.objectContaining({
1546
+ "gen_ai.system": expect.anything(),
1547
+ }),
1548
+ startTime: expect.any(Number),
1549
+ });
1550
+ const modelUsage = telemetryState.tracer.startSpan.mock.calls.find(
1551
+ (call) => call[0] === "openclaw.model.usage",
1552
+ );
1553
+ expect(modelUsage?.[1]).toMatchObject({
1554
+ attributes: {
1555
+ "gen_ai.provider.name": "openai",
1556
+ "gen_ai.request.model": "gpt-5.4",
1557
+ "gen_ai.operation.name": "chat",
1558
+ },
1559
+ });
1560
+ expect(modelUsage?.[1]).toEqual({
1561
+ attributes: expect.not.objectContaining({
1562
+ "gen_ai.system": expect.anything(),
1563
+ }),
1564
+ startTime: expect.any(Number),
1565
+ });
1566
+ await service.stop?.(ctx);
1567
+ });
1568
+
1569
+ test("records upstream request id hashes as model call span events only", async () => {
1570
+ const service = createDiagnosticsOtelService();
1571
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1572
+ await service.start(ctx);
1573
+
1574
+ emitDiagnosticEvent({
1575
+ type: "model.call.error",
1576
+ runId: "run-1",
1577
+ callId: "call-1",
1578
+ provider: "openai",
1579
+ model: "gpt-5.4",
1580
+ api: "openai-responses",
1581
+ durationMs: 40,
1582
+ errorCategory: "ProviderError",
1583
+ failureKind: "terminated",
1584
+ upstreamRequestIdHash: "sha256:123456abcdef",
1585
+ });
1586
+ await flushDiagnosticEvents();
1587
+
1588
+ const modelCall = telemetryState.tracer.startSpan.mock.calls.find(
1589
+ (call) => call[0] === "openclaw.model.call",
1590
+ );
1591
+ expect(modelCall?.[1]).toEqual({
1592
+ attributes: expect.objectContaining({
1593
+ "openclaw.failureKind": "terminated",
1594
+ }),
1595
+ startTime: expect.any(Number),
1596
+ });
1597
+ expect(modelCall?.[1]).toEqual({
1598
+ attributes: expect.not.objectContaining({
1599
+ "openclaw.upstreamRequestIdHash": expect.anything(),
1600
+ }),
1601
+ startTime: expect.any(Number),
1602
+ });
1603
+ const span = telemetryState.spans.find((candidate) => candidate.name === "openclaw.model.call");
1604
+ expect(span?.addEvent).toHaveBeenCalledWith("openclaw.provider.request", {
1605
+ "openclaw.upstreamRequestIdHash": "sha256:123456abcdef",
1606
+ });
1607
+ expect(
1608
+ telemetryState.histograms.get("openclaw.model_call.duration_ms")?.record,
1609
+ ).toHaveBeenCalledWith(
1610
+ 40,
1611
+ expect.objectContaining({
1612
+ "openclaw.failureKind": "terminated",
1613
+ }),
1614
+ );
1615
+ expect(
1616
+ telemetryState.histograms.get("openclaw.model_call.duration_ms")?.record,
1617
+ ).toHaveBeenCalledWith(
1618
+ 40,
1619
+ expect.not.objectContaining({
1620
+ "openclaw.upstreamRequestIdHash": expect.anything(),
1621
+ }),
1622
+ );
1623
+ await service.stop?.(ctx);
1624
+ });
1625
+
1626
+ test("exports trusted context assembly spans without prompt content", async () => {
1627
+ const service = createDiagnosticsOtelService();
1628
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1629
+ await service.start(ctx);
1630
+
1631
+ emitTrustedDiagnosticEvent({
1632
+ type: "run.started",
1633
+ runId: "run-1",
1634
+ provider: "openai",
1635
+ model: "gpt-5.4",
1636
+ trace: {
1637
+ traceId: TRACE_ID,
1638
+ spanId: SPAN_ID,
1639
+ traceFlags: "01",
1640
+ },
1641
+ });
1642
+ emitTrustedDiagnosticEvent({
1643
+ type: "context.assembled",
1644
+ runId: "run-1",
1645
+ sessionKey: "session-key",
1646
+ sessionId: "session-id",
1647
+ provider: "openai",
1648
+ model: "gpt-5.4",
1649
+ channel: "webchat",
1650
+ trigger: "message",
1651
+ messageCount: 12,
1652
+ historyTextChars: 1234,
1653
+ historyImageBlocks: 2,
1654
+ maxMessageTextChars: 456,
1655
+ systemPromptChars: 789,
1656
+ promptChars: 42,
1657
+ promptImages: 1,
1658
+ contextTokenBudget: 128_000,
1659
+ reserveTokens: 4096,
1660
+ trace: {
1661
+ traceId: TRACE_ID,
1662
+ spanId: GRANDCHILD_SPAN_ID,
1663
+ parentSpanId: SPAN_ID,
1664
+ traceFlags: "01",
1665
+ },
1666
+ });
1667
+ await flushDiagnosticEvents();
1668
+
1669
+ const contextCall = telemetryState.tracer.startSpan.mock.calls.find(
1670
+ (call) => call[0] === "openclaw.context.assembled",
1671
+ );
1672
+ const runSpan = telemetryState.spans.find((span) => span.name === "openclaw.run");
1673
+ const runSpanId = runSpan?.spanContext.mock.results[0]?.value?.spanId;
1674
+ expect(contextCall?.[1]).toMatchObject({
1675
+ attributes: {
1676
+ "openclaw.provider": "openai",
1677
+ "openclaw.model": "gpt-5.4",
1678
+ "openclaw.channel": "webchat",
1679
+ "openclaw.trigger": "message",
1680
+ "openclaw.context.message_count": 12,
1681
+ "openclaw.context.history_text_chars": 1234,
1682
+ "openclaw.context.history_image_blocks": 2,
1683
+ "openclaw.context.max_message_text_chars": 456,
1684
+ "openclaw.context.system_prompt_chars": 789,
1685
+ "openclaw.context.prompt_chars": 42,
1686
+ "openclaw.context.prompt_images": 1,
1687
+ "openclaw.context.token_budget": 128_000,
1688
+ "openclaw.context.reserve_tokens": 4096,
1689
+ },
1690
+ });
1691
+ expect(contextCall?.[1]).toEqual({
1692
+ attributes: expect.any(Object),
1693
+ startTime: expect.any(Number),
1694
+ });
1695
+ expect(JSON.stringify(contextCall)).not.toContain("session-key");
1696
+ expect(JSON.stringify(contextCall)).not.toContain("prompt text");
1697
+ expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
1698
+ expect.anything(),
1699
+ expect.objectContaining({ traceId: TRACE_ID, spanId: runSpanId }),
1700
+ );
1701
+ expect(
1702
+ (contextCall?.[2] as { spanContext?: { spanId?: string } } | undefined)?.spanContext?.spanId,
1703
+ ).toBe(runSpanId);
1704
+ await service.stop?.(ctx);
1705
+ });
1706
+
1707
+ test("exports tool loop diagnostics without loop messages or session identifiers", async () => {
1708
+ const service = createDiagnosticsOtelService();
1709
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1710
+ await service.start(ctx);
1711
+
1712
+ emitDiagnosticEvent({
1713
+ type: "tool.loop",
1714
+ sessionKey: "session-key",
1715
+ sessionId: "session-id",
1716
+ toolName: "process",
1717
+ level: "critical",
1718
+ action: "block",
1719
+ detector: "known_poll_no_progress",
1720
+ count: 20,
1721
+ message: "CRITICAL: repeated secret-bearing tool output",
1722
+ pairedToolName: "read",
1723
+ });
1724
+ await flushDiagnosticEvents();
1725
+
1726
+ expect(telemetryState.counters.get("openclaw.tool.loop")?.add).toHaveBeenCalledWith(1, {
1727
+ "openclaw.toolName": "process",
1728
+ "openclaw.loop.level": "critical",
1729
+ "openclaw.loop.action": "block",
1730
+ "openclaw.loop.detector": "known_poll_no_progress",
1731
+ "openclaw.loop.count": 20,
1732
+ "openclaw.loop.paired_tool": "read",
1733
+ });
1734
+ const loopSpanCall = telemetryState.tracer.startSpan.mock.calls.find(
1735
+ (call) => call[0] === "openclaw.tool.loop",
1736
+ );
1737
+ expect(loopSpanCall?.[1]).toMatchObject({
1738
+ attributes: {
1739
+ "openclaw.toolName": "process",
1740
+ "openclaw.loop.level": "critical",
1741
+ "openclaw.loop.action": "block",
1742
+ "openclaw.loop.detector": "known_poll_no_progress",
1743
+ "openclaw.loop.count": 20,
1744
+ "openclaw.loop.paired_tool": "read",
1745
+ },
1746
+ });
1747
+ const loopSpan = telemetryState.spans.find((span) => span.name === "openclaw.tool.loop");
1748
+ expect(loopSpan?.setStatus).toHaveBeenCalledWith({
1749
+ code: 2,
1750
+ message: "known_poll_no_progress:block",
1751
+ });
1752
+ expect(JSON.stringify(loopSpanCall)).not.toContain("session-key");
1753
+ expect(JSON.stringify(loopSpanCall)).not.toContain("secret-bearing");
1754
+ await service.stop?.(ctx);
1755
+ });
1756
+
1757
+ test("exports diagnostic memory samples and pressure without session identifiers", async () => {
1758
+ const service = createDiagnosticsOtelService();
1759
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1760
+ await service.start(ctx);
1761
+
1762
+ emitDiagnosticEvent({
1763
+ type: "diagnostic.memory.sample",
1764
+ uptimeMs: 1234,
1765
+ memory: {
1766
+ rssBytes: 100,
1767
+ heapUsedBytes: 40,
1768
+ heapTotalBytes: 80,
1769
+ externalBytes: 10,
1770
+ arrayBuffersBytes: 5,
1771
+ },
1772
+ });
1773
+ emitDiagnosticEvent({
1774
+ type: "diagnostic.memory.pressure",
1775
+ level: "critical",
1776
+ reason: "rss_growth",
1777
+ thresholdBytes: 512,
1778
+ rssGrowthBytes: 256,
1779
+ windowMs: 60_000,
1780
+ memory: {
1781
+ rssBytes: 200,
1782
+ heapUsedBytes: 50,
1783
+ heapTotalBytes: 90,
1784
+ externalBytes: 20,
1785
+ arrayBuffersBytes: 6,
1786
+ },
1787
+ });
1788
+ await flushDiagnosticEvents();
1789
+
1790
+ expect(telemetryState.histograms.get("openclaw.memory.rss_bytes")?.record).toHaveBeenCalledWith(
1791
+ 100,
1792
+ {},
1793
+ );
1794
+ expect(telemetryState.histograms.get("openclaw.memory.rss_bytes")?.record).toHaveBeenCalledWith(
1795
+ 200,
1796
+ {
1797
+ "openclaw.memory.level": "critical",
1798
+ "openclaw.memory.reason": "rss_growth",
1799
+ },
1800
+ );
1801
+ expect(telemetryState.counters.get("openclaw.memory.pressure")?.add).toHaveBeenCalledWith(1, {
1802
+ "openclaw.memory.level": "critical",
1803
+ "openclaw.memory.reason": "rss_growth",
1804
+ });
1805
+ const pressureCall = telemetryState.tracer.startSpan.mock.calls.find(
1806
+ (call) => call[0] === "openclaw.memory.pressure",
1807
+ );
1808
+ expect(pressureCall?.[1]).toMatchObject({
1809
+ attributes: {
1810
+ "openclaw.memory.level": "critical",
1811
+ "openclaw.memory.reason": "rss_growth",
1812
+ "openclaw.memory.rss_bytes": 200,
1813
+ "openclaw.memory.heap_used_bytes": 50,
1814
+ "openclaw.memory.heap_total_bytes": 90,
1815
+ "openclaw.memory.external_bytes": 20,
1816
+ "openclaw.memory.array_buffers_bytes": 6,
1817
+ "openclaw.memory.threshold_bytes": 512,
1818
+ "openclaw.memory.rss_growth_bytes": 256,
1819
+ "openclaw.memory.window_ms": 60_000,
1820
+ },
1821
+ });
1822
+ const pressureSpan = telemetryState.spans.find(
1823
+ (span) => span.name === "openclaw.memory.pressure",
1824
+ );
1825
+ expect(pressureSpan?.setStatus).toHaveBeenCalledWith({
1826
+ code: 2,
1827
+ message: "rss_growth",
1828
+ });
1829
+ expect(JSON.stringify(pressureCall)).not.toContain("session");
1830
+ await service.stop?.(ctx);
1831
+ });
1832
+
1833
+ test("parents trusted diagnostic lifecycle spans from active started spans", async () => {
1834
+ const service = createDiagnosticsOtelService();
1835
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1836
+ await service.start(ctx);
1837
+
1838
+ emitTrustedDiagnosticEvent({
1839
+ type: "run.started",
1840
+ runId: "run-1",
1841
+ provider: "openai",
1842
+ model: "gpt-5.4",
1843
+ trace: {
1844
+ traceId: TRACE_ID,
1845
+ spanId: CHILD_SPAN_ID,
1846
+ parentSpanId: SPAN_ID,
1847
+ traceFlags: "01",
1848
+ },
1849
+ });
1850
+ emitTrustedDiagnosticEvent({
1851
+ type: "model.call.started",
1852
+ runId: "run-1",
1853
+ callId: "call-1",
1854
+ provider: "openai",
1855
+ model: "gpt-5.4",
1856
+ trace: {
1857
+ traceId: TRACE_ID,
1858
+ spanId: GRANDCHILD_SPAN_ID,
1859
+ parentSpanId: CHILD_SPAN_ID,
1860
+ traceFlags: "01",
1861
+ },
1862
+ });
1863
+ emitTrustedDiagnosticEvent({
1864
+ type: "tool.execution.started",
1865
+ runId: "run-1",
1866
+ toolName: "read",
1867
+ trace: {
1868
+ traceId: TRACE_ID,
1869
+ spanId: TOOL_SPAN_ID,
1870
+ parentSpanId: GRANDCHILD_SPAN_ID,
1871
+ traceFlags: "01",
1872
+ },
1873
+ });
1874
+ emitTrustedDiagnosticEvent({
1875
+ type: "tool.execution.error",
1876
+ runId: "run-1",
1877
+ toolName: "read",
1878
+ durationMs: 20,
1879
+ errorCategory: "TypeError",
1880
+ trace: {
1881
+ traceId: TRACE_ID,
1882
+ spanId: TOOL_SPAN_ID,
1883
+ parentSpanId: GRANDCHILD_SPAN_ID,
1884
+ traceFlags: "01",
1885
+ },
1886
+ });
1887
+ emitTrustedDiagnosticEvent({
1888
+ type: "model.call.completed",
1889
+ runId: "run-1",
1890
+ callId: "call-1",
1891
+ provider: "openai",
1892
+ model: "gpt-5.4",
1893
+ durationMs: 80,
1894
+ trace: {
1895
+ traceId: TRACE_ID,
1896
+ spanId: GRANDCHILD_SPAN_ID,
1897
+ parentSpanId: CHILD_SPAN_ID,
1898
+ traceFlags: "01",
1899
+ },
1900
+ });
1901
+ emitTrustedDiagnosticEvent({
1902
+ type: "run.completed",
1903
+ runId: "run-1",
1904
+ provider: "openai",
1905
+ model: "gpt-5.4",
1906
+ outcome: "completed",
1907
+ durationMs: 100,
1908
+ trace: {
1909
+ traceId: TRACE_ID,
1910
+ spanId: CHILD_SPAN_ID,
1911
+ parentSpanId: SPAN_ID,
1912
+ traceFlags: "01",
1913
+ },
1914
+ });
1915
+ await flushDiagnosticEvents();
1916
+
1917
+ const runSpan = telemetryState.spans.find((span) => span.name === "openclaw.run");
1918
+ const modelSpan = telemetryState.spans.find((span) => span.name === "openclaw.model.call");
1919
+ const toolSpan = telemetryState.spans.find((span) => span.name === "openclaw.tool.execution");
1920
+ const runSpanId = runSpan?.spanContext.mock.results[0]?.value?.spanId;
1921
+ const modelSpanId = modelSpan?.spanContext.mock.results[0]?.value?.spanId;
1922
+
1923
+ expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledTimes(2);
1924
+ expect(telemetryState.tracer.setSpanContext.mock.calls.map((call) => call[1])).toEqual([
1925
+ expect.objectContaining({ traceId: TRACE_ID, spanId: runSpanId }),
1926
+ expect.objectContaining({ traceId: TRACE_ID, spanId: modelSpanId }),
1927
+ ]);
1928
+
1929
+ const parentBySpanName = Object.fromEntries(
1930
+ telemetryState.tracer.startSpan.mock.calls.map((call) => [
1931
+ call[0],
1932
+ (call[2] as { spanContext?: { spanId?: string } } | undefined)?.spanContext?.spanId,
1933
+ ]),
1934
+ );
1935
+ expect(parentBySpanName).toMatchObject({
1936
+ "openclaw.run": undefined,
1937
+ "openclaw.model.call": runSpanId,
1938
+ "openclaw.tool.execution": modelSpanId,
1939
+ });
1940
+ expect(toolSpan?.setStatus).toHaveBeenCalledWith({
1941
+ code: 2,
1942
+ message: "TypeError",
1943
+ });
1944
+ await service.stop?.(ctx);
1945
+ });
1946
+
1947
+ test("keeps trusted run spans alive long enough for post-completion usage parenting", async () => {
1948
+ const service = createDiagnosticsOtelService();
1949
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
1950
+ await service.start(ctx);
1951
+
1952
+ emitTrustedDiagnosticEvent({
1953
+ type: "run.started",
1954
+ runId: "run-1",
1955
+ provider: "openai",
1956
+ model: "gpt-5.4",
1957
+ trace: {
1958
+ traceId: TRACE_ID,
1959
+ spanId: CHILD_SPAN_ID,
1960
+ parentSpanId: SPAN_ID,
1961
+ traceFlags: "01",
1962
+ },
1963
+ });
1964
+ emitTrustedDiagnosticEvent({
1965
+ type: "run.completed",
1966
+ runId: "run-1",
1967
+ provider: "openai",
1968
+ model: "gpt-5.4",
1969
+ outcome: "completed",
1970
+ durationMs: 100,
1971
+ trace: {
1972
+ traceId: TRACE_ID,
1973
+ spanId: CHILD_SPAN_ID,
1974
+ parentSpanId: SPAN_ID,
1975
+ traceFlags: "01",
1976
+ },
1977
+ });
1978
+ emitTrustedDiagnosticEvent({
1979
+ type: "model.usage",
1980
+ provider: "openai",
1981
+ model: "gpt-5.4",
1982
+ usage: { input: 3, output: 2, total: 5 },
1983
+ durationMs: 10,
1984
+ trace: {
1985
+ traceId: TRACE_ID,
1986
+ spanId: GRANDCHILD_SPAN_ID,
1987
+ parentSpanId: SPAN_ID,
1988
+ traceFlags: "01",
1989
+ },
1990
+ });
1991
+ await flushDiagnosticEvents();
1992
+
1993
+ const runSpan = telemetryState.spans.find((span) => span.name === "openclaw.run");
1994
+ const runSpanId = runSpan?.spanContext.mock.results[0]?.value?.spanId;
1995
+ const modelUsageCall = telemetryState.tracer.startSpan.mock.calls.find(
1996
+ (call) => call[0] === "openclaw.model.usage",
1997
+ );
1998
+
1999
+ expect(telemetryState.tracer.setSpanContext).toHaveBeenCalledWith(
2000
+ expect.anything(),
2001
+ expect.objectContaining({ traceId: TRACE_ID, spanId: runSpanId }),
2002
+ );
2003
+ expect(
2004
+ (modelUsageCall?.[2] as { spanContext?: { spanId?: string } } | undefined)?.spanContext
2005
+ ?.spanId,
2006
+ ).toBe(runSpanId);
2007
+ expect(runSpan?.end).toHaveBeenCalledWith(expect.any(Number));
2008
+ await service.stop?.(ctx);
2009
+ });
2010
+
2011
+ test("does not force remote parents for completed-only trusted lifecycle spans", async () => {
2012
+ const service = createDiagnosticsOtelService();
2013
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2014
+ await service.start(ctx);
2015
+
2016
+ emitTrustedDiagnosticEvent({
2017
+ type: "run.completed",
2018
+ runId: "run-1",
2019
+ provider: "openai",
2020
+ model: "gpt-5.4",
2021
+ outcome: "completed",
2022
+ durationMs: 100,
2023
+ trace: {
2024
+ traceId: TRACE_ID,
2025
+ spanId: CHILD_SPAN_ID,
2026
+ parentSpanId: SPAN_ID,
2027
+ traceFlags: "01",
2028
+ },
2029
+ });
2030
+ emitTrustedDiagnosticEvent({
2031
+ type: "model.call.completed",
2032
+ runId: "run-1",
2033
+ callId: "call-1",
2034
+ provider: "openai",
2035
+ model: "gpt-5.4",
2036
+ durationMs: 80,
2037
+ trace: {
2038
+ traceId: TRACE_ID,
2039
+ spanId: GRANDCHILD_SPAN_ID,
2040
+ parentSpanId: CHILD_SPAN_ID,
2041
+ traceFlags: "01",
2042
+ },
2043
+ });
2044
+ await flushDiagnosticEvents();
2045
+
2046
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
2047
+ const parentBySpanName = Object.fromEntries(
2048
+ telemetryState.tracer.startSpan.mock.calls.map((call) => [call[0], call[2]]),
2049
+ );
2050
+ expect(parentBySpanName).toMatchObject({
2051
+ "openclaw.run": undefined,
2052
+ "openclaw.model.call": undefined,
2053
+ });
2054
+ await service.stop?.(ctx);
2055
+ });
2056
+
2057
+ test("does not self-parent trusted diagnostic lifecycle spans without parent ids", async () => {
2058
+ const service = createDiagnosticsOtelService();
2059
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2060
+ await service.start(ctx);
2061
+
2062
+ emitTrustedDiagnosticEvent({
2063
+ type: "run.completed",
2064
+ runId: "run-1",
2065
+ provider: "openai",
2066
+ model: "gpt-5.4",
2067
+ outcome: "completed",
2068
+ durationMs: 100,
2069
+ trace: {
2070
+ traceId: TRACE_ID,
2071
+ spanId: CHILD_SPAN_ID,
2072
+ traceFlags: "01",
2073
+ },
2074
+ });
2075
+ emitTrustedDiagnosticEvent({
2076
+ type: "model.call.completed",
2077
+ runId: "run-1",
2078
+ callId: "call-1",
2079
+ provider: "openai",
2080
+ model: "gpt-5.4",
2081
+ durationMs: 80,
2082
+ trace: {
2083
+ traceId: TRACE_ID,
2084
+ spanId: GRANDCHILD_SPAN_ID,
2085
+ traceFlags: "01",
2086
+ },
2087
+ });
2088
+ await flushDiagnosticEvents();
2089
+
2090
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
2091
+ const parentBySpanName = Object.fromEntries(
2092
+ telemetryState.tracer.startSpan.mock.calls.map((call) => [call[0], call[2]]),
2093
+ );
2094
+ expect(parentBySpanName).toMatchObject({
2095
+ "openclaw.run": undefined,
2096
+ "openclaw.model.call": undefined,
2097
+ });
2098
+ await service.stop?.(ctx);
2099
+ });
2100
+
2101
+ test("does not parent untrusted diagnostic lifecycle spans from injected trace ids", async () => {
2102
+ const service = createDiagnosticsOtelService();
2103
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2104
+ await service.start(ctx);
2105
+
2106
+ emitDiagnosticEvent({
2107
+ type: "run.completed",
2108
+ runId: "run-1",
2109
+ provider: "openai",
2110
+ model: "gpt-5.4",
2111
+ outcome: "completed",
2112
+ durationMs: 100,
2113
+ trace: {
2114
+ traceId: TRACE_ID,
2115
+ spanId: CHILD_SPAN_ID,
2116
+ parentSpanId: SPAN_ID,
2117
+ traceFlags: "01",
2118
+ },
2119
+ });
2120
+ emitDiagnosticEvent({
2121
+ type: "model.call.completed",
2122
+ runId: "run-1",
2123
+ callId: "call-1",
2124
+ provider: "openai",
2125
+ model: "gpt-5.4",
2126
+ durationMs: 80,
2127
+ trace: {
2128
+ traceId: TRACE_ID,
2129
+ spanId: GRANDCHILD_SPAN_ID,
2130
+ parentSpanId: CHILD_SPAN_ID,
2131
+ traceFlags: "01",
2132
+ },
2133
+ });
2134
+ emitDiagnosticEvent({
2135
+ type: "tool.execution.completed",
2136
+ runId: "run-1",
2137
+ toolName: "read",
2138
+ durationMs: 20,
2139
+ trace: {
2140
+ traceId: TRACE_ID,
2141
+ spanId: TOOL_SPAN_ID,
2142
+ parentSpanId: GRANDCHILD_SPAN_ID,
2143
+ traceFlags: "01",
2144
+ },
2145
+ });
2146
+ await flushDiagnosticEvents();
2147
+
2148
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
2149
+ const parentBySpanName = Object.fromEntries(
2150
+ telemetryState.tracer.startSpan.mock.calls.map((call) => [call[0], call[2]]),
2151
+ );
2152
+ expect(parentBySpanName).toMatchObject({
2153
+ "openclaw.run": undefined,
2154
+ "openclaw.model.call": undefined,
2155
+ "openclaw.tool.execution": undefined,
2156
+ });
2157
+ await service.stop?.(ctx);
2158
+ });
2159
+
2160
+ test("does not create live started spans for untrusted lifecycle diagnostics", async () => {
2161
+ const service = createDiagnosticsOtelService();
2162
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2163
+ await service.start(ctx);
2164
+
2165
+ emitDiagnosticEvent({
2166
+ type: "run.started",
2167
+ runId: "run-1",
2168
+ provider: "openai",
2169
+ model: "gpt-5.4",
2170
+ });
2171
+ emitDiagnosticEvent({
2172
+ type: "run.completed",
2173
+ runId: "run-1",
2174
+ provider: "openai",
2175
+ model: "gpt-5.4",
2176
+ outcome: "completed",
2177
+ durationMs: 100,
2178
+ });
2179
+ emitDiagnosticEvent({
2180
+ type: "model.call.started",
2181
+ runId: "run-1",
2182
+ callId: "call-1",
2183
+ provider: "openai",
2184
+ model: "gpt-5.4",
2185
+ });
2186
+ emitDiagnosticEvent({
2187
+ type: "model.call.completed",
2188
+ runId: "run-1",
2189
+ callId: "call-1",
2190
+ provider: "openai",
2191
+ model: "gpt-5.4",
2192
+ durationMs: 80,
2193
+ });
2194
+ emitDiagnosticEvent({
2195
+ type: "tool.execution.started",
2196
+ runId: "run-1",
2197
+ toolName: "read",
2198
+ });
2199
+ emitDiagnosticEvent({
2200
+ type: "tool.execution.error",
2201
+ runId: "run-1",
2202
+ toolName: "read",
2203
+ durationMs: 20,
2204
+ errorCategory: "TypeError",
2205
+ });
2206
+ emitDiagnosticEvent({
2207
+ type: "harness.run.started",
2208
+ runId: "run-1",
2209
+ provider: "codex",
2210
+ model: "gpt-5.4",
2211
+ harnessId: "codex",
2212
+ pluginId: "codex-plugin",
2213
+ });
2214
+ emitDiagnosticEvent({
2215
+ type: "harness.run.completed",
2216
+ runId: "run-1",
2217
+ provider: "codex",
2218
+ model: "gpt-5.4",
2219
+ harnessId: "codex",
2220
+ pluginId: "codex-plugin",
2221
+ outcome: "completed",
2222
+ durationMs: 90,
2223
+ });
2224
+ await flushDiagnosticEvents();
2225
+
2226
+ expect(
2227
+ telemetryState.tracer.startSpan.mock.calls.filter((call) => call[0] === "openclaw.run"),
2228
+ ).toHaveLength(1);
2229
+ expect(
2230
+ telemetryState.tracer.startSpan.mock.calls.filter(
2231
+ (call) => call[0] === "openclaw.model.call",
2232
+ ),
2233
+ ).toHaveLength(1);
2234
+ expect(
2235
+ telemetryState.tracer.startSpan.mock.calls.filter(
2236
+ (call) => call[0] === "openclaw.tool.execution",
2237
+ ),
2238
+ ).toHaveLength(1);
2239
+ expect(
2240
+ telemetryState.tracer.startSpan.mock.calls.filter(
2241
+ (call) => call[0] === "openclaw.harness.run",
2242
+ ),
2243
+ ).toHaveLength(1);
2244
+ await service.stop?.(ctx);
2245
+ });
2246
+
2247
+ test("exports exec process spans without command text", async () => {
2248
+ const service = createDiagnosticsOtelService();
2249
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2250
+ await service.start(ctx);
2251
+
2252
+ emitDiagnosticEvent({
2253
+ type: "exec.process.completed",
2254
+ target: "host",
2255
+ mode: "child",
2256
+ outcome: "failed",
2257
+ durationMs: 30,
2258
+ commandLength: 42,
2259
+ exitCode: 1,
2260
+ timedOut: false,
2261
+ failureKind: "runtime-error",
2262
+ });
2263
+ await flushDiagnosticEvents();
2264
+
2265
+ expect(telemetryState.histograms.get("openclaw.exec.duration_ms")?.record).toHaveBeenCalledWith(
2266
+ 30,
2267
+ expect.objectContaining({
2268
+ "openclaw.exec.target": "host",
2269
+ "openclaw.exec.mode": "child",
2270
+ "openclaw.outcome": "failed",
2271
+ "openclaw.failureKind": "runtime-error",
2272
+ }),
2273
+ );
2274
+
2275
+ const execCall = telemetryState.tracer.startSpan.mock.calls.find(
2276
+ (call) => call[0] === "openclaw.exec",
2277
+ );
2278
+ expect(execCall?.[1]).toMatchObject({
2279
+ attributes: {
2280
+ "openclaw.exec.target": "host",
2281
+ "openclaw.exec.mode": "child",
2282
+ "openclaw.outcome": "failed",
2283
+ "openclaw.exec.command_length": 42,
2284
+ "openclaw.exec.exit_code": 1,
2285
+ "openclaw.exec.timed_out": false,
2286
+ "openclaw.failureKind": "runtime-error",
2287
+ },
2288
+ startTime: expect.any(Number),
2289
+ });
2290
+ expect(execCall?.[1]).toEqual({
2291
+ attributes: expect.not.objectContaining({
2292
+ "openclaw.exec.command": expect.anything(),
2293
+ "openclaw.exec.workdir": expect.anything(),
2294
+ "openclaw.sessionKey": expect.anything(),
2295
+ }),
2296
+ startTime: expect.any(Number),
2297
+ });
2298
+
2299
+ const execSpan = telemetryState.spans.find((span) => span.name === "openclaw.exec");
2300
+ expect(execSpan?.setStatus).toHaveBeenCalledWith({
2301
+ code: 2,
2302
+ message: "runtime-error",
2303
+ });
2304
+ expect(execSpan?.end).toHaveBeenCalledWith(expect.any(Number));
2305
+ await service.stop?.(ctx);
2306
+ });
2307
+
2308
+ test("exports message delivery spans and metrics with low-cardinality attributes", async () => {
2309
+ const service = createDiagnosticsOtelService();
2310
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2311
+ await service.start(ctx);
2312
+
2313
+ emitDiagnosticEvent({
2314
+ type: "message.delivery.started",
2315
+ channel: "matrix",
2316
+ deliveryKind: "text",
2317
+ sessionKey: "session-secret",
2318
+ });
2319
+ emitDiagnosticEvent({
2320
+ type: "message.delivery.completed",
2321
+ channel: "matrix",
2322
+ deliveryKind: "text",
2323
+ durationMs: 25,
2324
+ resultCount: 1,
2325
+ sessionKey: "session-secret",
2326
+ });
2327
+ emitDiagnosticEvent({
2328
+ type: "message.delivery.error",
2329
+ channel: "discord",
2330
+ deliveryKind: "media",
2331
+ durationMs: 40,
2332
+ errorCategory: "TypeError",
2333
+ sessionKey: "session-secret",
2334
+ });
2335
+ await flushDiagnosticEvents();
2336
+
2337
+ expect(
2338
+ telemetryState.counters.get("openclaw.message.delivery.started")?.add,
2339
+ ).toHaveBeenCalledWith(1, {
2340
+ "openclaw.channel": "matrix",
2341
+ "openclaw.delivery.kind": "text",
2342
+ });
2343
+ expect(
2344
+ telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record,
2345
+ ).toHaveBeenCalledWith(
2346
+ 25,
2347
+ expect.objectContaining({
2348
+ "openclaw.channel": "matrix",
2349
+ "openclaw.delivery.kind": "text",
2350
+ "openclaw.outcome": "completed",
2351
+ }),
2352
+ );
2353
+ expect(
2354
+ telemetryState.histograms.get("openclaw.message.delivery.duration_ms")?.record,
2355
+ ).toHaveBeenCalledWith(
2356
+ 40,
2357
+ expect.objectContaining({
2358
+ "openclaw.channel": "discord",
2359
+ "openclaw.delivery.kind": "media",
2360
+ "openclaw.outcome": "error",
2361
+ "openclaw.errorCategory": "TypeError",
2362
+ }),
2363
+ );
2364
+
2365
+ const deliverySpanCalls = telemetryState.tracer.startSpan.mock.calls.filter(
2366
+ (call) => call[0] === "openclaw.message.delivery",
2367
+ );
2368
+ expect(deliverySpanCalls).toHaveLength(2);
2369
+ expect(deliverySpanCalls[0]?.[1]).toMatchObject({
2370
+ attributes: {
2371
+ "openclaw.channel": "matrix",
2372
+ "openclaw.delivery.kind": "text",
2373
+ "openclaw.outcome": "completed",
2374
+ "openclaw.delivery.result_count": 1,
2375
+ },
2376
+ startTime: expect.any(Number),
2377
+ });
2378
+ expect(deliverySpanCalls[1]?.[1]).toMatchObject({
2379
+ attributes: {
2380
+ "openclaw.channel": "discord",
2381
+ "openclaw.delivery.kind": "media",
2382
+ "openclaw.outcome": "error",
2383
+ "openclaw.errorCategory": "TypeError",
2384
+ },
2385
+ startTime: expect.any(Number),
2386
+ });
2387
+ for (const call of deliverySpanCalls) {
2388
+ expect(call[1]).toEqual({
2389
+ attributes: expect.not.objectContaining({
2390
+ "openclaw.sessionKey": expect.anything(),
2391
+ "openclaw.messageId": expect.anything(),
2392
+ "openclaw.conversationId": expect.anything(),
2393
+ "openclaw.content": expect.anything(),
2394
+ "openclaw.to": expect.anything(),
2395
+ }),
2396
+ startTime: expect.any(Number),
2397
+ });
2398
+ }
2399
+ const errorSpan = telemetryState.spans.find(
2400
+ (span) => span.name === "openclaw.message.delivery" && span.setStatus.mock.calls.length > 0,
2401
+ );
2402
+ expect(errorSpan?.setStatus).toHaveBeenCalledWith({
2403
+ code: 2,
2404
+ message: "TypeError",
2405
+ });
2406
+ await service.stop?.(ctx);
2407
+ });
2408
+
2409
+ test("does not export model or tool content unless capture is explicitly enabled", async () => {
2410
+ const service = createDiagnosticsOtelService();
2411
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2412
+ await service.start(ctx);
2413
+
2414
+ emitDiagnosticEvent({
2415
+ type: "model.call.completed",
2416
+ runId: "run-1",
2417
+ callId: "call-1",
2418
+ provider: "openai",
2419
+ model: "gpt-5.4",
2420
+ durationMs: 80,
2421
+ inputMessages: ["private user prompt"],
2422
+ outputMessages: ["private model reply"],
2423
+ systemPrompt: "private system prompt",
2424
+ } as Parameters<typeof emitDiagnosticEvent>[0]);
2425
+ emitDiagnosticEvent({
2426
+ type: "tool.execution.completed",
2427
+ runId: "run-1",
2428
+ toolName: "read",
2429
+ toolCallId: "tool-1",
2430
+ durationMs: 20,
2431
+ toolInput: "private tool input",
2432
+ toolOutput: "private tool output",
2433
+ } as Parameters<typeof emitDiagnosticEvent>[0]);
2434
+ await flushDiagnosticEvents();
2435
+
2436
+ const modelCall = telemetryState.tracer.startSpan.mock.calls.find(
2437
+ (call) => call[0] === "openclaw.model.call",
2438
+ );
2439
+ const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
2440
+ (call) => call[0] === "openclaw.tool.execution",
2441
+ );
2442
+ expect(modelCall?.[1]).toEqual({
2443
+ attributes: expect.not.objectContaining({
2444
+ "openclaw.content.input_messages": expect.anything(),
2445
+ "openclaw.content.output_messages": expect.anything(),
2446
+ "openclaw.content.system_prompt": expect.anything(),
2447
+ }),
2448
+ startTime: expect.any(Number),
2449
+ });
2450
+ expect(toolCall?.[1]).toEqual({
2451
+ attributes: expect.not.objectContaining({
2452
+ "openclaw.content.tool_input": expect.anything(),
2453
+ "openclaw.content.tool_output": expect.anything(),
2454
+ }),
2455
+ startTime: expect.any(Number),
2456
+ });
2457
+ await service.stop?.(ctx);
2458
+ });
2459
+
2460
+ test("exports bounded redacted content when capture fields are opted in", async () => {
2461
+ const service = createDiagnosticsOtelService();
2462
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, {
2463
+ traces: true,
2464
+ metrics: true,
2465
+ captureContent: {
2466
+ enabled: true,
2467
+ inputMessages: true,
2468
+ outputMessages: true,
2469
+ toolInputs: true,
2470
+ toolOutputs: true,
2471
+ systemPrompt: true,
2472
+ },
2473
+ });
2474
+ await service.start(ctx);
2475
+
2476
+ emitDiagnosticEvent({
2477
+ type: "model.call.completed",
2478
+ runId: "run-1",
2479
+ callId: "call-1",
2480
+ provider: "openai",
2481
+ model: "gpt-5.4",
2482
+ durationMs: 80,
2483
+ inputMessages: ["use key sk-1234567890abcdef1234567890abcdef"], // pragma: allowlist secret
2484
+ outputMessages: ["model reply"],
2485
+ systemPrompt: "system prompt",
2486
+ } as Parameters<typeof emitDiagnosticEvent>[0]);
2487
+ emitDiagnosticEvent({
2488
+ type: "tool.execution.completed",
2489
+ runId: "run-1",
2490
+ toolName: "read",
2491
+ toolCallId: "tool-1",
2492
+ durationMs: 20,
2493
+ toolInput: "tool input",
2494
+ toolOutput: `${"x".repeat(4077)} Bearer ${"a".repeat(80)}`, // pragma: allowlist secret
2495
+ } as Parameters<typeof emitDiagnosticEvent>[0]);
2496
+ await flushDiagnosticEvents();
2497
+
2498
+ const modelCall = telemetryState.tracer.startSpan.mock.calls.find(
2499
+ (call) => call[0] === "openclaw.model.call",
2500
+ );
2501
+ const toolCall = telemetryState.tracer.startSpan.mock.calls.find(
2502
+ (call) => call[0] === "openclaw.tool.execution",
2503
+ );
2504
+ const modelAttrs = (modelCall?.[1] as { attributes?: Record<string, unknown> } | undefined)
2505
+ ?.attributes;
2506
+ const toolAttrs = (toolCall?.[1] as { attributes?: Record<string, unknown> } | undefined)
2507
+ ?.attributes;
2508
+
2509
+ expect(modelAttrs).toMatchObject({
2510
+ "openclaw.content.output_messages": "model reply",
2511
+ "openclaw.content.system_prompt": "system prompt",
2512
+ });
2513
+ expect(String(modelAttrs?.["openclaw.content.input_messages"])).not.toContain(
2514
+ "sk-1234567890abcdef1234567890abcdef", // pragma: allowlist secret
2515
+ );
2516
+ expect(toolAttrs).toMatchObject({
2517
+ "openclaw.content.tool_input": "tool input",
2518
+ });
2519
+ expect(String(toolAttrs?.["openclaw.content.tool_output"]).length).toBeLessThanOrEqual(
2520
+ MAX_TEST_OTEL_CONTENT_ATTRIBUTE_CHARS + OTEL_TRUNCATED_SUFFIX_MAX_CHARS,
2521
+ );
2522
+ expect(String(toolAttrs?.["openclaw.content.tool_output"])).not.toContain("a".repeat(11));
2523
+ await service.stop?.(ctx);
2524
+ });
2525
+
2526
+ test("ignores invalid diagnostic event trace parents", async () => {
2527
+ const service = createDiagnosticsOtelService();
2528
+ const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { traces: true, metrics: true });
2529
+ await service.start(ctx);
2530
+
2531
+ emitDiagnosticEvent({
2532
+ type: "model.usage",
2533
+ trace: {
2534
+ traceId: "0".repeat(32),
2535
+ spanId: "not-a-span",
2536
+ traceFlags: "zz",
2537
+ },
2538
+ provider: "openai",
2539
+ model: "gpt-5.4",
2540
+ usage: { total: 4 },
2541
+ durationMs: 12,
2542
+ });
2543
+
2544
+ const modelUsageCall = telemetryState.tracer.startSpan.mock.calls.find(
2545
+ (call) => call[0] === "openclaw.model.usage",
2546
+ );
2547
+ expect(telemetryState.tracer.setSpanContext).not.toHaveBeenCalled();
2548
+ expect(modelUsageCall?.[2]).toBeUndefined();
2549
+ await service.stop?.(ctx);
2550
+ });
2551
+
344
2552
  test("redacts sensitive reason in session.state metric attributes", async () => {
345
2553
  const service = createDiagnosticsOtelService();
346
2554
  const ctx = createOtelContext(OTEL_TEST_ENDPOINT, { metrics: true });