@unrdf/observability 26.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.cjs +10 -0
- package/IMPLEMENTATION-SUMMARY.md +478 -0
- package/LICENSE +21 -0
- package/README.md +482 -0
- package/capability-map.md +90 -0
- package/config/alert-rules.yml +269 -0
- package/config/prometheus.yml +136 -0
- package/dashboards/grafana-unrdf.json +798 -0
- package/dashboards/unrdf-workflow-dashboard.json +295 -0
- package/docs/OBSERVABILITY-PATTERNS.md +681 -0
- package/docs/OBSERVABILITY-RUNBOOK.md +554 -0
- package/examples/observability-demo.mjs +334 -0
- package/package.json +46 -0
- package/src/advanced-metrics.mjs +413 -0
- package/src/alerts/alert-manager.mjs +436 -0
- package/src/custom-events.mjs +558 -0
- package/src/distributed-tracing.mjs +352 -0
- package/src/exporters/grafana-exporter.mjs +415 -0
- package/src/index.mjs +61 -0
- package/src/metrics/workflow-metrics.mjs +346 -0
- package/src/receipts/anchor.mjs +155 -0
- package/src/receipts/index.mjs +62 -0
- package/src/receipts/merkle-tree.mjs +188 -0
- package/src/receipts/receipt-chain.mjs +209 -0
- package/src/receipts/receipt-schema.mjs +128 -0
- package/src/receipts/tamper-detection.mjs +219 -0
- package/test/advanced-metrics.test.mjs +302 -0
- package/test/custom-events.test.mjs +387 -0
- package/test/distributed-tracing.test.mjs +314 -0
- package/validation/observability-validation.mjs +366 -0
- package/vitest.config.mjs +25 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Distributed Tracing Utilities
|
|
3
|
+
* @module observability/distributed-tracing
|
|
4
|
+
*
|
|
5
|
+
* @description
|
|
6
|
+
* W3C Trace Context propagation, sampling strategies, and cross-service
|
|
7
|
+
* correlation for distributed RDF operations.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { trace, propagation, context, SpanKind, SpanStatusCode } from '@opentelemetry/api';
|
|
11
|
+
import { z } from 'zod';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Trace context schema (W3C Trace Context format)
|
|
15
|
+
*/
|
|
16
|
+
export const TraceContextSchema = z.object({
|
|
17
|
+
traceparent: z.string().regex(/^00-[a-f0-9]{32}-[a-f0-9]{16}-[0-9]{2}$/),
|
|
18
|
+
tracestate: z.string().optional(),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Sampling config schema
|
|
23
|
+
*/
|
|
24
|
+
export const SamplingConfigSchema = z.object({
|
|
25
|
+
defaultRate: z.number().min(0).max(1).default(0.01),
|
|
26
|
+
errorRate: z.number().min(0).max(1).default(1.0),
|
|
27
|
+
slowThreshold: z.number().default(1000),
|
|
28
|
+
slowRate: z.number().min(0).max(1).default(0.1),
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Distributed tracing manager
|
|
33
|
+
*
|
|
34
|
+
* Provides:
|
|
35
|
+
* - W3C Trace Context propagation
|
|
36
|
+
* - Adaptive sampling (errors, slow operations)
|
|
37
|
+
* - Parent-child span relationships
|
|
38
|
+
* - Cross-service correlation
|
|
39
|
+
*/
|
|
40
|
+
export class DistributedTracing {
|
|
41
|
+
/**
|
|
42
|
+
* Create distributed tracing manager
|
|
43
|
+
*
|
|
44
|
+
* @param {Object} [config] - Configuration
|
|
45
|
+
* @param {string} [config.serviceName='unrdf'] - Service name
|
|
46
|
+
* @param {Object} [config.sampling] - Sampling configuration
|
|
47
|
+
*/
|
|
48
|
+
constructor(config = {}) {
|
|
49
|
+
this.serviceName = config.serviceName || 'unrdf';
|
|
50
|
+
this.samplingConfig = SamplingConfigSchema.parse(config.sampling || {});
|
|
51
|
+
this.tracer = trace.getTracer(this.serviceName);
|
|
52
|
+
|
|
53
|
+
// Active contexts for correlation
|
|
54
|
+
this.activeContexts = new Map();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Start a distributed trace
|
|
59
|
+
*
|
|
60
|
+
* @param {string} spanName - Span name
|
|
61
|
+
* @param {Object} [options] - Span options
|
|
62
|
+
* @param {Object} [options.attributes] - Span attributes
|
|
63
|
+
* @param {SpanKind} [options.kind] - Span kind
|
|
64
|
+
* @param {Object} [options.parentContext] - Parent trace context
|
|
65
|
+
* @returns {Object} Span context with propagation headers
|
|
66
|
+
*/
|
|
67
|
+
startSpan(spanName, options = {}) {
|
|
68
|
+
const { attributes = {}, kind = SpanKind.INTERNAL, parentContext } = options;
|
|
69
|
+
|
|
70
|
+
// Extract parent context if provided
|
|
71
|
+
let ctx = context.active();
|
|
72
|
+
if (parentContext) {
|
|
73
|
+
ctx = this._extractContext(parentContext);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Determine if should sample
|
|
77
|
+
const shouldSample = this._shouldSample(attributes);
|
|
78
|
+
|
|
79
|
+
// Start span
|
|
80
|
+
const span = this.tracer.startSpan(
|
|
81
|
+
spanName,
|
|
82
|
+
{
|
|
83
|
+
kind,
|
|
84
|
+
attributes: {
|
|
85
|
+
'service.name': this.serviceName,
|
|
86
|
+
'sampling.decision': shouldSample ? 'sampled' : 'not_sampled',
|
|
87
|
+
...attributes,
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
ctx
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
// Generate W3C Trace Context headers for propagation
|
|
94
|
+
const traceHeaders = this._generateTraceHeaders(span);
|
|
95
|
+
|
|
96
|
+
// Store active context
|
|
97
|
+
const spanContext = {
|
|
98
|
+
span,
|
|
99
|
+
spanName,
|
|
100
|
+
startTime: Date.now(),
|
|
101
|
+
traceHeaders,
|
|
102
|
+
sampled: shouldSample,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const spanId = span.spanContext().spanId;
|
|
106
|
+
this.activeContexts.set(spanId, spanContext);
|
|
107
|
+
|
|
108
|
+
return spanContext;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* End a distributed trace span
|
|
113
|
+
*
|
|
114
|
+
* @param {Object} spanContext - Span context from startSpan
|
|
115
|
+
* @param {Object} [options] - End options
|
|
116
|
+
* @param {Error} [options.error] - Error if operation failed
|
|
117
|
+
* @param {Object} [options.attributes] - Final attributes
|
|
118
|
+
*/
|
|
119
|
+
endSpan(spanContext, options = {}) {
|
|
120
|
+
const { error, attributes = {} } = options;
|
|
121
|
+
const { span, startTime } = spanContext;
|
|
122
|
+
|
|
123
|
+
const duration = Date.now() - startTime;
|
|
124
|
+
|
|
125
|
+
// Set final attributes
|
|
126
|
+
span.setAttributes({
|
|
127
|
+
'span.duration_ms': duration,
|
|
128
|
+
...attributes,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// Record error if present
|
|
132
|
+
if (error) {
|
|
133
|
+
span.recordException(error);
|
|
134
|
+
span.setStatus({
|
|
135
|
+
code: SpanStatusCode.ERROR,
|
|
136
|
+
message: error.message,
|
|
137
|
+
});
|
|
138
|
+
} else {
|
|
139
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
span.end();
|
|
143
|
+
|
|
144
|
+
// Cleanup
|
|
145
|
+
const spanId = span.spanContext().spanId;
|
|
146
|
+
this.activeContexts.delete(spanId);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Create child span with parent relationship
|
|
151
|
+
*
|
|
152
|
+
* @param {Object} parentSpanContext - Parent span context
|
|
153
|
+
* @param {string} childSpanName - Child span name
|
|
154
|
+
* @param {Object} [options] - Child span options
|
|
155
|
+
* @returns {Object} Child span context
|
|
156
|
+
*/
|
|
157
|
+
createChildSpan(parentSpanContext, childSpanName, options = {}) {
|
|
158
|
+
const { span: parentSpan } = parentSpanContext;
|
|
159
|
+
|
|
160
|
+
// Create context from parent span
|
|
161
|
+
const parentContext = trace.setSpan(context.active(), parentSpan);
|
|
162
|
+
|
|
163
|
+
return this.startSpan(childSpanName, {
|
|
164
|
+
...options,
|
|
165
|
+
parentContext: { context: parentContext },
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Extract trace context from HTTP headers
|
|
171
|
+
*
|
|
172
|
+
* @param {Object} headers - HTTP headers
|
|
173
|
+
* @returns {Object} Extracted context
|
|
174
|
+
*/
|
|
175
|
+
extractFromHeaders(headers) {
|
|
176
|
+
const carrier = {};
|
|
177
|
+
|
|
178
|
+
// Normalize header names (case-insensitive)
|
|
179
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
180
|
+
carrier[key.toLowerCase()] = value;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Extract context using W3C propagator
|
|
184
|
+
const ctx = propagation.extract(context.active(), carrier);
|
|
185
|
+
|
|
186
|
+
return { context: ctx, headers: carrier };
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Inject trace context into HTTP headers
|
|
191
|
+
*
|
|
192
|
+
* @param {Object} spanContext - Span context
|
|
193
|
+
* @returns {Object} HTTP headers with trace context
|
|
194
|
+
*/
|
|
195
|
+
injectIntoHeaders(spanContext) {
|
|
196
|
+
return spanContext.traceHeaders;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Execute operation with distributed tracing
|
|
201
|
+
*
|
|
202
|
+
* @template T
|
|
203
|
+
* @param {string} operationName - Operation name
|
|
204
|
+
* @param {Function} fn - Async function to execute
|
|
205
|
+
* @param {Object} [options] - Tracing options
|
|
206
|
+
* @returns {Promise<T>} Operation result
|
|
207
|
+
*/
|
|
208
|
+
async withSpan(operationName, fn, options = {}) {
|
|
209
|
+
const spanContext = this.startSpan(operationName, options);
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const result = await fn(spanContext);
|
|
213
|
+
this.endSpan(spanContext);
|
|
214
|
+
return result;
|
|
215
|
+
} catch (error) {
|
|
216
|
+
this.endSpan(spanContext, { error });
|
|
217
|
+
throw error;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Correlate spans by business ID
|
|
223
|
+
*
|
|
224
|
+
* @param {string} businessId - Business correlation ID
|
|
225
|
+
* @param {Object} spanContext - Span context to correlate
|
|
226
|
+
*/
|
|
227
|
+
correlateByBusinessId(businessId, spanContext) {
|
|
228
|
+
const { span } = spanContext;
|
|
229
|
+
|
|
230
|
+
span.setAttributes({
|
|
231
|
+
'correlation.business_id': businessId,
|
|
232
|
+
'correlation.type': 'business',
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Correlate spans by user ID
|
|
238
|
+
*
|
|
239
|
+
* @param {string} userId - User ID
|
|
240
|
+
* @param {Object} spanContext - Span context to correlate
|
|
241
|
+
*/
|
|
242
|
+
correlateByUserId(userId, spanContext) {
|
|
243
|
+
const { span } = spanContext;
|
|
244
|
+
|
|
245
|
+
span.setAttributes({
|
|
246
|
+
'correlation.user_id': userId,
|
|
247
|
+
'correlation.type': 'user',
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Generate W3C Trace Context headers
|
|
253
|
+
*
|
|
254
|
+
* @param {Span} span - OpenTelemetry span
|
|
255
|
+
* @returns {Object} Trace context headers
|
|
256
|
+
* @private
|
|
257
|
+
*/
|
|
258
|
+
_generateTraceHeaders(span) {
|
|
259
|
+
const carrier = {};
|
|
260
|
+
|
|
261
|
+
// Inject context into carrier using W3C propagator
|
|
262
|
+
const ctx = trace.setSpan(context.active(), span);
|
|
263
|
+
propagation.inject(ctx, carrier);
|
|
264
|
+
|
|
265
|
+
return carrier;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Extract context from trace context object
|
|
270
|
+
*
|
|
271
|
+
* @param {Object} traceContext - Trace context
|
|
272
|
+
* @returns {Context} OpenTelemetry context
|
|
273
|
+
* @private
|
|
274
|
+
*/
|
|
275
|
+
_extractContext(traceContext) {
|
|
276
|
+
if (traceContext.context) {
|
|
277
|
+
return traceContext.context;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Extract from headers if provided
|
|
281
|
+
if (traceContext.headers) {
|
|
282
|
+
return propagation.extract(context.active(), traceContext.headers);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return context.active();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Determine if span should be sampled
|
|
290
|
+
*
|
|
291
|
+
* Uses adaptive sampling:
|
|
292
|
+
* - Always sample errors
|
|
293
|
+
* - Sample slow operations at higher rate
|
|
294
|
+
* - Sample normal operations at base rate
|
|
295
|
+
*
|
|
296
|
+
* @param {Object} attributes - Span attributes
|
|
297
|
+
* @returns {boolean} True if should sample
|
|
298
|
+
* @private
|
|
299
|
+
*/
|
|
300
|
+
_shouldSample(attributes) {
|
|
301
|
+
// Always sample if error expected
|
|
302
|
+
if (attributes.error || attributes['error.expected']) {
|
|
303
|
+
return Math.random() < this.samplingConfig.errorRate;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Sample slow operations at higher rate
|
|
307
|
+
if (attributes['operation.slow'] || attributes.slow) {
|
|
308
|
+
return Math.random() < this.samplingConfig.slowRate;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Default sampling rate
|
|
312
|
+
return Math.random() < this.samplingConfig.defaultRate;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Get active span count
|
|
317
|
+
*
|
|
318
|
+
* @returns {number} Number of active spans
|
|
319
|
+
*/
|
|
320
|
+
getActiveSpanCount() {
|
|
321
|
+
return this.activeContexts.size;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Shutdown and cleanup
|
|
326
|
+
*/
|
|
327
|
+
shutdown() {
|
|
328
|
+
// End all active spans
|
|
329
|
+
for (const [_spanId, spanContext] of this.activeContexts) {
|
|
330
|
+
this.endSpan(spanContext, {
|
|
331
|
+
attributes: { shutdown: true },
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
this.activeContexts.clear();
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Create distributed tracing instance
|
|
341
|
+
*
|
|
342
|
+
* @param {Object} [config] - Configuration
|
|
343
|
+
* @returns {DistributedTracing} Tracing instance
|
|
344
|
+
*/
|
|
345
|
+
export function createDistributedTracing(config = {}) {
|
|
346
|
+
return new DistributedTracing(config);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Default distributed tracing instance
|
|
351
|
+
*/
|
|
352
|
+
export const defaultDistributedTracing = createDistributedTracing();
|