@aztec/telemetry-client 5.0.0-private.20260318 → 5.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dest/attributes.d.ts +5 -2
  2. package/dest/attributes.d.ts.map +1 -1
  3. package/dest/attributes.js +2 -1
  4. package/dest/config.d.ts +3 -1
  5. package/dest/config.d.ts.map +1 -1
  6. package/dest/config.js +17 -9
  7. package/dest/lmdb_metrics.d.ts +2 -2
  8. package/dest/lmdb_metrics.d.ts.map +1 -1
  9. package/dest/metrics.d.ts +21 -3
  10. package/dest/metrics.d.ts.map +1 -1
  11. package/dest/metrics.js +110 -10
  12. package/dest/monitored_batch_span_processor.d.ts +29 -0
  13. package/dest/monitored_batch_span_processor.d.ts.map +1 -0
  14. package/dest/monitored_batch_span_processor.js +70 -0
  15. package/dest/otel.d.ts +1 -1
  16. package/dest/otel.d.ts.map +1 -1
  17. package/dest/otel.js +62 -3
  18. package/dest/otel_propagation.d.ts +3 -1
  19. package/dest/otel_propagation.d.ts.map +1 -1
  20. package/dest/otel_propagation.js +49 -1
  21. package/dest/start.d.ts +1 -1
  22. package/dest/start.d.ts.map +1 -1
  23. package/dest/start.js +1 -1
  24. package/dest/telemetry.d.ts +2 -2
  25. package/dest/telemetry.d.ts.map +1 -1
  26. package/dest/wrappers/fetch.d.ts +3 -3
  27. package/dest/wrappers/fetch.d.ts.map +1 -1
  28. package/dest/wrappers/fetch.js +3 -2
  29. package/dest/wrappers/l2_block_stream.d.ts +2 -2
  30. package/dest/wrappers/l2_block_stream.d.ts.map +1 -1
  31. package/package.json +3 -3
  32. package/src/attributes.ts +4 -1
  33. package/src/config.ts +24 -9
  34. package/src/metrics.ts +114 -10
  35. package/src/monitored_batch_span_processor.ts +93 -0
  36. package/src/otel.ts +38 -2
  37. package/src/otel_propagation.ts +42 -1
  38. package/src/start.ts +6 -1
  39. package/src/telemetry.ts +0 -1
  40. package/src/wrappers/fetch.ts +9 -3
  41. package/src/wrappers/l2_block_stream.ts +1 -4
package/src/metrics.ts CHANGED
@@ -308,6 +308,12 @@ export const ARCHIVER_SYNC_PER_BLOCK: MetricDefinition = {
308
308
  unit: 'ms',
309
309
  valueType: ValueType.INT,
310
310
  };
311
+ export const ARCHIVER_SYNC_PER_CHECKPOINT: MetricDefinition = {
312
+ name: 'aztec.archiver.checkpoint.sync_per_item_duration',
313
+ description: 'Duration to sync a checkpoint',
314
+ unit: 'ms',
315
+ valueType: ValueType.INT,
316
+ };
311
317
  export const ARCHIVER_SYNC_BLOCK_COUNT: MetricDefinition = {
312
318
  name: 'aztec.archiver.block.sync_count',
313
319
  description: 'Number of blocks synced from L1',
@@ -357,6 +363,12 @@ export const ARCHIVER_CHECKPOINT_L1_INCLUSION_DELAY: MetricDefinition = {
357
363
  valueType: ValueType.INT,
358
364
  };
359
365
 
366
+ export const ARCHIVER_CHECKPOINT_PROMOTED_COUNT: MetricDefinition = {
367
+ name: 'aztec.archiver.checkpoint_promoted_count',
368
+ description: 'Number of checkpoints promoted from proposed (blob fetch skipped)',
369
+ valueType: ValueType.INT,
370
+ };
371
+
360
372
  export const NODE_RECEIVE_TX_DURATION: MetricDefinition = {
361
373
  name: 'aztec.node.receive_tx.duration',
362
374
  description: 'The duration of the receiveTx method',
@@ -388,6 +400,12 @@ export const SEQUENCER_STATE_TRANSITION_BUFFER_DURATION: MetricDefinition = {
388
400
  unit: 'ms',
389
401
  valueType: ValueType.INT,
390
402
  };
403
+ export const SEQUENCER_STATE_DURATION: MetricDefinition = {
404
+ name: 'aztec.sequencer.state_duration',
405
+ description: 'Wall-clock time spent in each sequencer state, labelled by the state being left',
406
+ unit: 'ms',
407
+ valueType: ValueType.INT,
408
+ };
391
409
  export const SEQUENCER_BLOCK_BUILD_DURATION: MetricDefinition = {
392
410
  name: 'aztec.sequencer.block.build_duration',
393
411
  description: 'Duration to build a block',
@@ -405,6 +423,12 @@ export const SEQUENCER_BLOCK_COUNT: MetricDefinition = {
405
423
  description: 'Number of blocks built by this sequencer',
406
424
  valueType: ValueType.INT,
407
425
  };
426
+ export const SEQUENCER_BLOCK_INTER_BLOCK_TIME: MetricDefinition = {
427
+ name: 'aztec.sequencer.block.inter_block_time',
428
+ description: 'Wall-clock time elapsed between consecutive blocks being built by this sequencer',
429
+ unit: 'ms',
430
+ valueType: ValueType.INT,
431
+ };
408
432
  export const SEQUENCER_CURRENT_SLOT_REWARDS: MetricDefinition = {
409
433
  name: 'aztec.sequencer.current_slot_rewards',
410
434
  description: 'The rewards earned per filled slot',
@@ -477,6 +501,25 @@ export const SEQUENCER_CHECKPOINT_BUILD_DURATION: MetricDefinition = {
477
501
  unit: 'ms',
478
502
  valueType: ValueType.INT,
479
503
  };
504
+ export const SEQUENCER_CHECKPOINT_START_TO_FIRST_BLOCK_DURATION: MetricDefinition = {
505
+ name: 'aztec.sequencer.checkpoint.start_to_first_block_duration',
506
+ description: 'Time from starting checkpoint work to the first block finishing build',
507
+ unit: 'ms',
508
+ valueType: ValueType.INT,
509
+ };
510
+ export const SEQUENCER_CHECKPOINT_LAST_BLOCK_TO_BROADCAST_DURATION: MetricDefinition = {
511
+ name: 'aztec.sequencer.checkpoint.last_block_to_broadcast_duration',
512
+ description: 'Time from the final block finishing build to the checkpoint proposal being broadcast',
513
+ unit: 'ms',
514
+ valueType: ValueType.INT,
515
+ };
516
+ export const SEQUENCER_PIPELINED_CHECKPOINT_BUILD_START_OFFSET_FROM_SLOT_BOUNDARY: MetricDefinition = {
517
+ name: 'aztec.sequencer.pipelined_checkpoint.build_start_offset_from_slot_boundary',
518
+ description:
519
+ 'Absolute offset from the wall-clock slot boundary when a pipelined checkpoint build starts. Use aztec.slot_boundary_side to distinguish before vs after the boundary.',
520
+ unit: 'ms',
521
+ valueType: ValueType.INT,
522
+ };
480
523
  export const SEQUENCER_CHECKPOINT_BLOCK_COUNT: MetricDefinition = {
481
524
  name: 'aztec.sequencer.checkpoint.block_count',
482
525
  description: 'Number of blocks built in a checkpoint',
@@ -504,6 +547,21 @@ export const SEQUENCER_CHECKPOINT_SUCCESS_COUNT: MetricDefinition = {
504
547
  description: 'The number of times checkpoint publishing succeeded',
505
548
  valueType: ValueType.INT,
506
549
  };
550
+ export const SEQUENCER_PIPELINE_DEPTH: MetricDefinition = {
551
+ name: 'aztec.sequencer.pipeline.depth',
552
+ description: 'Current pipeline depth when builder pipelining is enabled',
553
+ valueType: ValueType.INT,
554
+ };
555
+ export const SEQUENCER_PIPELINE_DISCARDS_COUNT: MetricDefinition = {
556
+ name: 'aztec.sequencer.pipeline.discards_count',
557
+ description: 'The number of times a pipeline was discarded',
558
+ valueType: ValueType.INT,
559
+ };
560
+ export const SEQUENCER_PIPELINE_PARENT_CHECKPOINT_MISMATCH_COUNT: MetricDefinition = {
561
+ name: 'aztec.sequencer.pipeline.parent_checkpoint_mismatch_count',
562
+ description: 'The number of times a pipelined checkpoint was discarded because the parent did not match expectations',
563
+ valueType: ValueType.INT,
564
+ };
507
565
 
508
566
  // Fisherman fee analysis metrics
509
567
  export const FISHERMAN_FEE_ANALYSIS_WOULD_BE_INCLUDED: MetricDefinition = {
@@ -671,6 +729,24 @@ export const L1_PUBLISHER_TX_TOTAL_FEE: MetricDefinition = {
671
729
  unit: 'eth',
672
730
  valueType: ValueType.DOUBLE,
673
731
  };
732
+ export const PROVER_NODE_ESTIMATED_SUBMISSION_GAS: MetricDefinition = {
733
+ name: 'aztec.prover_node.estimated_submission.gas',
734
+ description: 'Estimated gas for a proof submission tx (proof publishing disabled, not actually sent)',
735
+ unit: 'gas',
736
+ valueType: ValueType.INT,
737
+ };
738
+ export const PROVER_NODE_ESTIMATED_SUBMISSION_GAS_PRICE: MetricDefinition = {
739
+ name: 'aztec.prover_node.estimated_submission.gas_price',
740
+ description: 'Estimated effective gas price for a proof submission tx (proof publishing disabled, not actually sent)',
741
+ unit: 'gwei',
742
+ valueType: ValueType.DOUBLE,
743
+ };
744
+ export const PROVER_NODE_ESTIMATED_SUBMISSION_TOTAL_FEE: MetricDefinition = {
745
+ name: 'aztec.prover_node.estimated_submission.total_fee',
746
+ description: 'Estimated total L1 fee for a proof submission tx (proof publishing disabled, not actually sent)',
747
+ unit: 'eth',
748
+ valueType: ValueType.DOUBLE,
749
+ };
674
750
 
675
751
  export const L1_BLOCK_HEIGHT: MetricDefinition = {
676
752
  name: 'aztec.l1.block_height',
@@ -899,6 +975,12 @@ export const P2P_GOSSIP_AGG_MESSAGE_VALIDATION_DURATION_AVG: MetricDefinition =
899
975
  valueType: ValueType.INT,
900
976
  };
901
977
 
978
+ export const P2P_GOSSIP_SLOW_VALIDATION_COUNT: MetricDefinition = {
979
+ name: 'aztec.p2p.gossip.slow_validation_count',
980
+ description: 'Number of gossip validations that exceeded 75% of the mcache eviction window',
981
+ valueType: ValueType.INT,
982
+ };
983
+
902
984
  export const PUBLIC_PROCESSOR_TX_DURATION: MetricDefinition = {
903
985
  name: 'aztec.public_processor.tx_duration',
904
986
  description: 'Duration to process a public transaction',
@@ -962,6 +1044,17 @@ export const PUBLIC_PROCESSOR_TREE_INSERTION: MetricDefinition = {
962
1044
  unit: 'ms',
963
1045
  valueType: ValueType.INT,
964
1046
  };
1047
+ export const PUBLIC_PROCESSOR_SILENTLY_SKIPPED_COUNT: MetricDefinition = {
1048
+ name: 'aztec.public_processor.silently_skipped_count',
1049
+ description: 'Public txs fully processed then skipped (e.g. blob-field limit); not counted as processed or failed',
1050
+ valueType: ValueType.INT,
1051
+ };
1052
+ export const PUBLIC_PROCESSOR_SILENTLY_SKIPPED_DURATION: MetricDefinition = {
1053
+ name: 'aztec.public_processor.silently_skipped_duration',
1054
+ description: 'Wall-clock time spent processing txs that were then silently skipped',
1055
+ unit: 'ms',
1056
+ valueType: ValueType.INT,
1057
+ };
965
1058
 
966
1059
  export const PUBLIC_EXECUTOR_PREFIX = 'aztec.public_executor.';
967
1060
  export const PUBLIC_EXECUTOR_SIMULATION_COUNT: MetricDefinition = {
@@ -1139,12 +1232,6 @@ export const PROVER_NODE_BLOB_PROCESSING_LAST_DURATION: MetricDefinition = {
1139
1232
  unit: 'ms',
1140
1233
  valueType: ValueType.INT,
1141
1234
  };
1142
- export const PROVER_NODE_CHONK_VERIFIER_LAST_DURATION: MetricDefinition = {
1143
- name: 'aztec.prover_node.chonk_verifier.last_duration',
1144
- description: 'Duration of chonk verifier enqueuing in epoch proving job',
1145
- unit: 'ms',
1146
- valueType: ValueType.INT,
1147
- };
1148
1235
  export const PROVER_NODE_BLOCK_PROCESSING_DURATION: MetricDefinition = {
1149
1236
  name: 'aztec.prover_node.block_processing.duration',
1150
1237
  description: 'Duration of processing a single block in epoch proving job',
@@ -1157,10 +1244,14 @@ export const PROVER_NODE_CHECKPOINT_PROCESSING_DURATION: MetricDefinition = {
1157
1244
  unit: 'ms',
1158
1245
  valueType: ValueType.INT,
1159
1246
  };
1160
- export const PROVER_NODE_ALL_CHECKPOINTS_PROCESSING_LAST_DURATION: MetricDefinition = {
1161
- name: 'aztec.prover_node.all_checkpoints_processing.last_duration',
1162
- description: 'Duration of processing all checkpoints in epoch proving job',
1163
- unit: 'ms',
1247
+ export const PROVER_NODE_ACTIVE_CHECKPOINTS: MetricDefinition = {
1248
+ name: 'aztec.prover_node.active_checkpoints',
1249
+ description: 'Current number of canonical CheckpointProvers in the store (i.e. checkpoints currently being proven)',
1250
+ valueType: ValueType.INT,
1251
+ };
1252
+ export const PROVER_NODE_ACTIVE_EPOCH_SESSIONS: MetricDefinition = {
1253
+ name: 'aztec.prover_node.active_epoch_sessions',
1254
+ description: 'Current number of live EpochSessions, broken down by kind (full|partial)',
1164
1255
  valueType: ValueType.INT,
1165
1256
  };
1166
1257
  export const PROVER_NODE_REWARDS_TOTAL: MetricDefinition = {
@@ -1276,6 +1367,19 @@ export const VALIDATOR_RE_EXECUTION_TX_COUNT: MetricDefinition = {
1276
1367
  unit: 'tx',
1277
1368
  valueType: ValueType.INT,
1278
1369
  };
1370
+ export const VALIDATOR_CHECKPOINT_PROPOSAL_TO_PIPELINED_STATE_DURATION: MetricDefinition = {
1371
+ name: 'aztec.validator.checkpoint_proposal_to_pipelined_state_duration',
1372
+ description: 'Time from receiving a checkpoint proposal to setting proposed checkpoint state for pipelining',
1373
+ unit: 'ms',
1374
+ valueType: ValueType.INT,
1375
+ };
1376
+ export const VALIDATOR_CHECKPOINT_PROPOSAL_RECEIVE_OFFSET_FROM_NEXT_SLOT_BOUNDARY: MetricDefinition = {
1377
+ name: 'aztec.validator.checkpoint.proposal_receive_offset_from_next_slot_boundary',
1378
+ description:
1379
+ 'Absolute offset from the next slot boundary when a foreign checkpoint proposal is received. Use aztec.slot_boundary_side to distinguish before vs after the boundary.',
1380
+ unit: 'ms',
1381
+ valueType: ValueType.INT,
1382
+ };
1279
1383
 
1280
1384
  export const VALIDATOR_FAILED_REEXECUTION_COUNT: MetricDefinition = {
1281
1385
  name: 'aztec.validator.failed_reexecution_count',
@@ -0,0 +1,93 @@
1
+ import type { Logger } from '@aztec/foundation/log';
2
+
3
+ import { type Context, SpanStatusCode } from '@opentelemetry/api';
4
+ import { hrTimeToMilliseconds } from '@opentelemetry/core';
5
+ import type { SpanExporter } from '@opentelemetry/sdk-trace-base';
6
+ import { BatchSpanProcessor, type BufferConfig, type ReadableSpan, type Span } from '@opentelemetry/sdk-trace-node';
7
+
8
+ /** Minimum interval between drop warnings to avoid log spam. */
9
+ const DROP_WARNING_INTERVAL_MS = 30_000;
10
+
11
+ const DEFAULT_MIN_TRACE_DURATION_MS = 10;
12
+
13
+ export type MonitoredBatchSpanProcessorConfig = BufferConfig & {
14
+ minTraceDurationMs?: number;
15
+ };
16
+
17
+ /**
18
+ * Wraps BatchSpanProcessor to emit warnings when spans are dropped due to a full queue.
19
+ * The standard BatchSpanProcessor silently discards spans when its internal queue reaches
20
+ * maxQueueSize, making telemetry data loss invisible to operators.
21
+ */
22
+ export class MonitoredBatchSpanProcessor extends BatchSpanProcessor {
23
+ private readonly maxQueueSize: number;
24
+ private readonly minTraceDurationMs: number;
25
+ private readonly log: Logger;
26
+
27
+ private approxQueueSize = 0;
28
+ private droppedSinceLastWarning = 0;
29
+ private totalDropped = 0;
30
+ private lastWarningTime = 0;
31
+
32
+ constructor(exporter: SpanExporter, log: Logger, config?: MonitoredBatchSpanProcessorConfig) {
33
+ const maxQueueSize = config?.maxQueueSize ?? 2048;
34
+ super(exporter, { ...config, maxQueueSize });
35
+ this.maxQueueSize = maxQueueSize;
36
+ this.minTraceDurationMs = Math.max(0, config?.minTraceDurationMs ?? DEFAULT_MIN_TRACE_DURATION_MS);
37
+ this.log = log;
38
+ }
39
+
40
+ override onStart(span: Span, parentContext: Context): void {
41
+ super.onStart(span, parentContext);
42
+ }
43
+
44
+ override onEnd(span: ReadableSpan): void {
45
+ if (this.shouldDropShortSpan(span)) {
46
+ return;
47
+ }
48
+
49
+ if (this.approxQueueSize >= this.maxQueueSize) {
50
+ this.droppedSinceLastWarning++;
51
+ this.totalDropped++;
52
+ this.maybeLogDropWarning();
53
+ } else {
54
+ this.approxQueueSize++;
55
+ }
56
+ super.onEnd(span);
57
+ }
58
+
59
+ override async forceFlush(): Promise<void> {
60
+ await super.forceFlush();
61
+ this.approxQueueSize = 0;
62
+ }
63
+
64
+ override async shutdown(): Promise<void> {
65
+ if (this.totalDropped > 0) {
66
+ this.log.warn(`BatchSpanProcessor shutting down with ${this.totalDropped} total spans dropped`, {
67
+ totalDropped: this.totalDropped,
68
+ });
69
+ }
70
+ await super.shutdown();
71
+ }
72
+
73
+ private shouldDropShortSpan(span: ReadableSpan): boolean {
74
+ return (
75
+ this.minTraceDurationMs > 0 &&
76
+ span.status.code !== SpanStatusCode.ERROR &&
77
+ hrTimeToMilliseconds(span.duration) < this.minTraceDurationMs
78
+ );
79
+ }
80
+
81
+ private maybeLogDropWarning(): void {
82
+ const now = Date.now();
83
+ if (now - this.lastWarningTime >= DROP_WARNING_INTERVAL_MS) {
84
+ this.log.warn(
85
+ `BatchSpanProcessor dropping spans: queue full (maxQueueSize=${this.maxQueueSize}). ` +
86
+ `${this.droppedSinceLastWarning} dropped since last warning, ${this.totalDropped} total.`,
87
+ { droppedSinceLastWarning: this.droppedSinceLastWarning, totalDropped: this.totalDropped },
88
+ );
89
+ this.droppedSinceLastWarning = 0;
90
+ this.lastWarningTime = now;
91
+ }
92
+ }
93
+ }
package/src/otel.ts CHANGED
@@ -28,12 +28,13 @@ import {
28
28
  type PeriodicExportingMetricReaderOptions,
29
29
  View,
30
30
  } from '@opentelemetry/sdk-metrics';
31
- import { BatchSpanProcessor, NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
31
+ import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
32
32
  import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions';
33
33
 
34
34
  import type { TelemetryClientConfig } from './config.js';
35
35
  import { toMetricOptions } from './metric-utils.js';
36
36
  import type { MetricDefinition } from './metrics.js';
37
+ import { MonitoredBatchSpanProcessor } from './monitored_batch_span_processor.js';
37
38
  import { NodejsMetricsMonitor } from './nodejs_metrics_monitor.js';
38
39
  import { OtelFilterMetricExporter, PublicOtelFilterMetricExporter } from './otel_filter_metric_exporter.js';
39
40
  import { registerOtelLoggerProvider } from './otel_logger_provider.js';
@@ -334,6 +335,36 @@ export class OpenTelemetryClient implements TelemetryClient {
334
335
  true,
335
336
  ),
336
337
  }),
338
+ // L1 gas prices in gwei: priority fees ~0.01-10, base fees ~1-500, spikes to 1000+
339
+ new View({
340
+ instrumentType: InstrumentType.HISTOGRAM,
341
+ instrumentUnit: 'gwei',
342
+ aggregation: new ExplicitBucketHistogramAggregation(
343
+ [0.1, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1_000],
344
+ true,
345
+ ),
346
+ }),
347
+ // L1 gas consumption: tx gas 100k-30M, calldata/blob gas varies
348
+ new View({
349
+ instrumentType: InstrumentType.HISTOGRAM,
350
+ instrumentUnit: 'gas',
351
+ aggregation: new ExplicitBucketHistogramAggregation(
352
+ [
353
+ 10_000, 50_000, 100_000, 250_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000, 15_000_000,
354
+ 30_000_000,
355
+ ],
356
+ true,
357
+ ),
358
+ }),
359
+ // L1 tx total fee in ETH: typically 0.001 - 1 ETH
360
+ new View({
361
+ instrumentType: InstrumentType.HISTOGRAM,
362
+ instrumentUnit: 'eth',
363
+ aggregation: new ExplicitBucketHistogramAggregation(
364
+ [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
365
+ true,
366
+ ),
367
+ }),
337
368
  ],
338
369
  });
339
370
  }
@@ -343,7 +374,12 @@ export class OpenTelemetryClient implements TelemetryClient {
343
374
  const tracerProvider = new NodeTracerProvider({
344
375
  resource,
345
376
  spanProcessors: config.tracesCollectorUrl
346
- ? [new BatchSpanProcessor(new OTLPTraceExporter({ url: config.tracesCollectorUrl.href }))]
377
+ ? [
378
+ new MonitoredBatchSpanProcessor(new OTLPTraceExporter({ url: config.tracesCollectorUrl.href }), log, {
379
+ maxQueueSize: config.otelBspMaxQueueSize,
380
+ minTraceDurationMs: config.otelMinTraceDurationMs,
381
+ }),
382
+ ]
347
383
  : [],
348
384
  });
349
385
 
@@ -1,3 +1,5 @@
1
+ import type { DiagnosticsMiddleware } from '@aztec/foundation/json-rpc/server';
2
+
1
3
  import { ROOT_CONTEXT, type Span, SpanKind, SpanStatusCode, propagation } from '@opentelemetry/api';
2
4
  import type Koa from 'koa';
3
5
 
@@ -17,7 +19,7 @@ export function getOtelJsonRpcPropagationMiddleware(
17
19
  const context = propagation.extract(ROOT_CONTEXT, ctx.request.headers);
18
20
  const method = (ctx.request.body as any)?.method;
19
21
  return tracer.startActiveSpan(
20
- `JsonRpcServer.${method ?? 'unknown'}`,
22
+ `JsonRpcServer.${method ?? 'batch'}`,
21
23
  { kind: SpanKind.SERVER },
22
24
  context,
23
25
  async (span: Span): Promise<void> => {
@@ -48,3 +50,42 @@ export function getOtelJsonRpcPropagationMiddleware(
48
50
  );
49
51
  };
50
52
  }
53
+
54
+ export function getOtelJsonRpcDiagnosticsMiddleware(): DiagnosticsMiddleware {
55
+ return function otelJsonRpcDiagnostics(ctx, next) {
56
+ const [namespace, method] = splitNamespace(ctx.method);
57
+ const scope = namespace ?? 'UnknownHandler';
58
+ const tracer = getTelemetryClient().getTracer(scope);
59
+ return tracer.startActiveSpan(
60
+ `${scope}.${method}`,
61
+ { kind: SpanKind.INTERNAL, attributes: { [ATTR_JSONRPC_METHOD]: ctx.method } },
62
+ async span => {
63
+ if (ctx.id !== null) {
64
+ span.setAttribute(ATTR_JSONRPC_REQUEST_ID, ctx.id);
65
+ }
66
+
67
+ try {
68
+ await next();
69
+ span.setStatus({ code: SpanStatusCode.OK });
70
+ } catch (err) {
71
+ span.setStatus({ code: SpanStatusCode.ERROR, message: err instanceof Error ? err.message : String(err) });
72
+ if (typeof err === 'string' || err instanceof Error) {
73
+ span.recordException(err);
74
+ }
75
+ throw err;
76
+ } finally {
77
+ span.end();
78
+ }
79
+ },
80
+ );
81
+ };
82
+ }
83
+
84
+ function splitNamespace(fullMethod: string): [namespace: string | undefined, method: string] {
85
+ const idx = fullMethod.indexOf('_');
86
+ if (idx > -1) {
87
+ return [fullMethod.slice(0, idx), fullMethod.slice(idx + 1)];
88
+ } else {
89
+ return [undefined, fullMethod];
90
+ }
91
+ }
package/src/start.ts CHANGED
@@ -19,7 +19,12 @@ export async function initTelemetryClient(
19
19
  return telemetry;
20
20
  }
21
21
 
22
- if (config.metricsCollectorUrl || config.publicMetricsCollectorUrl) {
22
+ if (
23
+ config.metricsCollectorUrl ||
24
+ config.publicMetricsCollectorUrl ||
25
+ config.tracesCollectorUrl ||
26
+ config.logsCollectorUrl
27
+ ) {
23
28
  log.info(`Using OpenTelemetry client with custom collector`);
24
29
  // Lazy load OpenTelemetry to avoid loading heavy deps at startup
25
30
  const { OpenTelemetryClient } = await import('./otel.js');
package/src/telemetry.ts CHANGED
@@ -48,7 +48,6 @@ type BannedMetricAttributeNames = (typeof Attributes)[
48
48
  | 'TX_HASH'
49
49
  | 'PROVING_JOB_ID'
50
50
  | 'P2P_ID'
51
- | 'P2P_REQ_RESP_BATCH_REQUESTS_COUNT'
52
51
  | 'TARGET_ADDRESS'
53
52
  | 'MANA_USED'
54
53
  | 'TOTAL_INSTRUCTIONS'];
@@ -9,12 +9,17 @@ import { ATTR_JSONRPC_METHOD, ATTR_JSONRPC_REQUEST_ID } from '../vendor/attribut
9
9
 
10
10
  /**
11
11
  * Makes a fetch function that retries based on the given attempts and propagates trace information.
12
- * @param retries - Sequence of intervals (in seconds) to retry.
12
+ * @param retries - Sequence of intervals (in seconds) to retry, or a factory function returning an iterator for custom/indefinite backoff.
13
13
  * @param noRetry - Whether to stop retries on server errors.
14
14
  * @param log - Optional logger for logging attempts.
15
15
  * @returns A fetch function.
16
16
  */
17
- export function makeTracedFetch(retries: number[], defaultNoRetry: boolean, fetch = defaultFetch, log?: Logger) {
17
+ export function makeTracedFetch(
18
+ retries: number[] | (() => Generator<number>),
19
+ defaultNoRetry: boolean,
20
+ fetch = defaultFetch,
21
+ log?: Logger,
22
+ ) {
18
23
  return (host: string, body: unknown, extraHeaders: Record<string, string> = {}, noRetry?: boolean) => {
19
24
  const telemetry = getTelemetryClient();
20
25
  return telemetry.getTracer('fetch').startActiveSpan(`JsonRpcClient`, { kind: SpanKind.CLIENT }, async span => {
@@ -27,10 +32,11 @@ export function makeTracedFetch(retries: number[], defaultNoRetry: boolean, fetc
27
32
  }
28
33
  const headers = { ...extraHeaders };
29
34
  propagation.inject(context.active(), headers);
35
+ const backoff = typeof retries === 'function' ? retries() : makeBackoff(retries);
30
36
  return await retry(
31
37
  () => fetch(host, body, headers, noRetry ?? defaultNoRetry),
32
38
  `JsonRpcClient request to ${host}`,
33
- makeBackoff(retries),
39
+ backoff,
34
40
  log,
35
41
  false,
36
42
  );
@@ -11,10 +11,7 @@ import { type Traceable, type Tracer, trackSpan } from '@aztec/telemetry-client'
11
11
  /** Extends an L2BlockStream with a tracer to create a new trace per iteration. */
12
12
  export class TraceableL2BlockStream extends L2BlockStream implements Traceable {
13
13
  constructor(
14
- l2BlockSource: Pick<
15
- L2BlockSource,
16
- 'getBlocks' | 'getBlockHeader' | 'getL2Tips' | 'getCheckpoints' | 'getCheckpointedBlocks'
17
- >,
14
+ l2BlockSource: Pick<L2BlockSource, 'getBlocks' | 'getBlockData' | 'getL2Tips' | 'getCheckpoints'>,
18
15
  localData: L2BlockStreamLocalDataProvider,
19
16
  handler: L2BlockStreamEventHandler,
20
17
  public readonly tracer: Tracer,