@unrdf/observability 26.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.cjs +10 -0
- package/IMPLEMENTATION-SUMMARY.md +478 -0
- package/LICENSE +21 -0
- package/README.md +482 -0
- package/capability-map.md +90 -0
- package/config/alert-rules.yml +269 -0
- package/config/prometheus.yml +136 -0
- package/dashboards/grafana-unrdf.json +798 -0
- package/dashboards/unrdf-workflow-dashboard.json +295 -0
- package/docs/OBSERVABILITY-PATTERNS.md +681 -0
- package/docs/OBSERVABILITY-RUNBOOK.md +554 -0
- package/examples/observability-demo.mjs +334 -0
- package/package.json +46 -0
- package/src/advanced-metrics.mjs +413 -0
- package/src/alerts/alert-manager.mjs +436 -0
- package/src/custom-events.mjs +558 -0
- package/src/distributed-tracing.mjs +352 -0
- package/src/exporters/grafana-exporter.mjs +415 -0
- package/src/index.mjs +61 -0
- package/src/metrics/workflow-metrics.mjs +346 -0
- package/src/receipts/anchor.mjs +155 -0
- package/src/receipts/index.mjs +62 -0
- package/src/receipts/merkle-tree.mjs +188 -0
- package/src/receipts/receipt-chain.mjs +209 -0
- package/src/receipts/receipt-schema.mjs +128 -0
- package/src/receipts/tamper-detection.mjs +219 -0
- package/test/advanced-metrics.test.mjs +302 -0
- package/test/custom-events.test.mjs +387 -0
- package/test/distributed-tracing.test.mjs +314 -0
- package/validation/observability-validation.mjs +366 -0
- package/vitest.config.mjs +25 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* @unrdf/observability - Validation Script
|
|
4
|
+
*
|
|
5
|
+
* Validates all observability features following Adversarial PM principles.
|
|
6
|
+
* Every claim must be PROVEN with evidence.
|
|
7
|
+
*
|
|
8
|
+
* @module @unrdf/observability/validation
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { performance } from 'node:perf_hooks';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Observability Validator
|
|
15
|
+
*/
|
|
16
|
+
class ObservabilityValidator {
|
|
17
|
+
constructor() {
|
|
18
|
+
this.results = [];
|
|
19
|
+
this.metrics = { totalTestTime: 0 };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
log(message) {
|
|
23
|
+
const ts = new Date().toISOString();
|
|
24
|
+
console.log(`[${ts}] ${message}`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
addResult(result) {
|
|
28
|
+
this.results.push(result);
|
|
29
|
+
const emoji = result.status === 'PASS' ? '✅' : '❌';
|
|
30
|
+
this.log(`${emoji} ${result.claim} - ${result.status}`);
|
|
31
|
+
if (result.evidence) {
|
|
32
|
+
this.log(` Evidence: ${JSON.stringify(result.evidence)}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* CLAIM 1: WorkflowMetrics can record and export metrics
|
|
38
|
+
*/
|
|
39
|
+
async validateClaim1_MetricsRecording() {
|
|
40
|
+
try {
|
|
41
|
+
const { createWorkflowMetrics } = await import('../src/metrics/workflow-metrics.mjs');
|
|
42
|
+
|
|
43
|
+
const metrics = createWorkflowMetrics({
|
|
44
|
+
enableDefaultMetrics: false,
|
|
45
|
+
prefix: 'test_',
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Record various metrics
|
|
49
|
+
metrics.recordWorkflowStart('test-wf-1', 'SEQUENCE');
|
|
50
|
+
metrics.recordWorkflowComplete('test-wf-1', 'completed', 2.5, 'SEQUENCE');
|
|
51
|
+
metrics.recordTaskExecution('test-wf-1', 'task-1', 'atomic', 'completed', 0.5);
|
|
52
|
+
metrics.recordError('test-error', 'test-wf-1', 'medium');
|
|
53
|
+
|
|
54
|
+
// Export metrics
|
|
55
|
+
const prometheusMetrics = await metrics.getMetrics();
|
|
56
|
+
const hasWorkflowMetrics = prometheusMetrics.includes('test_executions_total');
|
|
57
|
+
const hasTaskMetrics = prometheusMetrics.includes('test_task_executions_total');
|
|
58
|
+
const hasErrorMetrics = prometheusMetrics.includes('test_errors_total');
|
|
59
|
+
|
|
60
|
+
this.addResult({
|
|
61
|
+
claim: 'WorkflowMetrics records and exports metrics',
|
|
62
|
+
status: hasWorkflowMetrics && hasTaskMetrics && hasErrorMetrics ? 'PASS' : 'FAIL',
|
|
63
|
+
evidence: {
|
|
64
|
+
workflowMetrics: hasWorkflowMetrics,
|
|
65
|
+
taskMetrics: hasTaskMetrics,
|
|
66
|
+
errorMetrics: hasErrorMetrics,
|
|
67
|
+
},
|
|
68
|
+
});
|
|
69
|
+
} catch (error) {
|
|
70
|
+
this.addResult({
|
|
71
|
+
claim: 'WorkflowMetrics records and exports metrics',
|
|
72
|
+
status: 'FAIL',
|
|
73
|
+
evidence: { error: error.message },
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* CLAIM 2: AlertManager can evaluate thresholds
|
|
80
|
+
*/
|
|
81
|
+
async validateClaim2_AlertThresholds() {
|
|
82
|
+
try {
|
|
83
|
+
const { createAlertManager } = await import('../src/alerts/alert-manager.mjs');
|
|
84
|
+
|
|
85
|
+
const alerts = createAlertManager({
|
|
86
|
+
rules: [
|
|
87
|
+
{
|
|
88
|
+
id: 'test-rule',
|
|
89
|
+
name: 'Test Rule',
|
|
90
|
+
metric: 'test_metric',
|
|
91
|
+
threshold: 100,
|
|
92
|
+
operator: 'gt',
|
|
93
|
+
severity: 'warning',
|
|
94
|
+
},
|
|
95
|
+
],
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
let alertFired = false;
|
|
99
|
+
alerts.on('alert', _alert => {
|
|
100
|
+
alertFired = true;
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Should trigger alert (value > threshold)
|
|
104
|
+
await alerts.evaluateMetric('test_metric', 150);
|
|
105
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
106
|
+
|
|
107
|
+
this.addResult({
|
|
108
|
+
claim: 'AlertManager evaluates thresholds correctly',
|
|
109
|
+
status: alertFired ? 'PASS' : 'FAIL',
|
|
110
|
+
evidence: { alertFired, activeAlerts: alerts.getActiveAlerts().length },
|
|
111
|
+
});
|
|
112
|
+
} catch (error) {
|
|
113
|
+
this.addResult({
|
|
114
|
+
claim: 'AlertManager evaluates thresholds correctly',
|
|
115
|
+
status: 'FAIL',
|
|
116
|
+
evidence: { error: error.message },
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* CLAIM 3: AlertManager detects anomalies
|
|
123
|
+
*/
|
|
124
|
+
async validateClaim3_AnomalyDetection() {
|
|
125
|
+
try {
|
|
126
|
+
const { createAlertManager } = await import('../src/alerts/alert-manager.mjs');
|
|
127
|
+
|
|
128
|
+
const alerts = createAlertManager({
|
|
129
|
+
enableAnomalyDetection: true,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
let anomalyDetected = false;
|
|
133
|
+
alerts.on('alert', alert => {
|
|
134
|
+
if (alert.type === 'anomaly') {
|
|
135
|
+
anomalyDetected = true;
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// Build baseline (30+ samples required)
|
|
140
|
+
for (let i = 0; i < 35; i++) {
|
|
141
|
+
await alerts.evaluateMetric('anomaly_test', 50 + Math.random() * 5);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Inject anomaly (far outside normal range)
|
|
145
|
+
await alerts.evaluateMetric('anomaly_test', 500);
|
|
146
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
147
|
+
|
|
148
|
+
this.addResult({
|
|
149
|
+
claim: 'AlertManager detects statistical anomalies',
|
|
150
|
+
status: anomalyDetected ? 'PASS' : 'FAIL',
|
|
151
|
+
evidence: { anomalyDetected },
|
|
152
|
+
});
|
|
153
|
+
} catch (error) {
|
|
154
|
+
this.addResult({
|
|
155
|
+
claim: 'AlertManager detects statistical anomalies',
|
|
156
|
+
status: 'FAIL',
|
|
157
|
+
evidence: { error: error.message },
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* CLAIM 4: GrafanaExporter generates valid dashboard JSON
|
|
164
|
+
*/
|
|
165
|
+
async validateClaim4_GrafanaDashboard() {
|
|
166
|
+
try {
|
|
167
|
+
const { createGrafanaExporter } = await import('../src/exporters/grafana-exporter.mjs');
|
|
168
|
+
|
|
169
|
+
const exporter = createGrafanaExporter({
|
|
170
|
+
title: 'Test Dashboard',
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
const dashboard = exporter.generateDashboard();
|
|
174
|
+
const hasDashboard = !!dashboard.dashboard;
|
|
175
|
+
const hasTitle = dashboard.dashboard?.title === 'Test Dashboard';
|
|
176
|
+
const hasPanels = dashboard.dashboard?.panels?.length > 0;
|
|
177
|
+
|
|
178
|
+
const jsonExport = exporter.exportJSON(true);
|
|
179
|
+
const validJSON = !!JSON.parse(jsonExport);
|
|
180
|
+
|
|
181
|
+
this.addResult({
|
|
182
|
+
claim: 'GrafanaExporter generates valid dashboard JSON',
|
|
183
|
+
status: hasDashboard && hasTitle && hasPanels && validJSON ? 'PASS' : 'FAIL',
|
|
184
|
+
evidence: {
|
|
185
|
+
hasDashboard,
|
|
186
|
+
hasTitle,
|
|
187
|
+
panelCount: dashboard.dashboard?.panels?.length || 0,
|
|
188
|
+
validJSON,
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
} catch (error) {
|
|
192
|
+
this.addResult({
|
|
193
|
+
claim: 'GrafanaExporter generates valid dashboard JSON',
|
|
194
|
+
status: 'FAIL',
|
|
195
|
+
evidence: { error: error.message },
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* CLAIM 5: Alert history is tracked correctly
|
|
202
|
+
*/
|
|
203
|
+
async validateClaim5_AlertHistory() {
|
|
204
|
+
try {
|
|
205
|
+
const { createAlertManager } = await import('../src/alerts/alert-manager.mjs');
|
|
206
|
+
|
|
207
|
+
const alerts = createAlertManager({
|
|
208
|
+
rules: [
|
|
209
|
+
{
|
|
210
|
+
id: 'history-test',
|
|
211
|
+
name: 'History Test',
|
|
212
|
+
metric: 'history_metric',
|
|
213
|
+
threshold: 50,
|
|
214
|
+
operator: 'gt',
|
|
215
|
+
severity: 'info',
|
|
216
|
+
},
|
|
217
|
+
],
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
// Trigger multiple alerts
|
|
221
|
+
await alerts.evaluateMetric('history_metric', 100);
|
|
222
|
+
await alerts.evaluateMetric('history_metric', 75);
|
|
223
|
+
await alerts.evaluateMetric('history_metric', 25); // Should resolve
|
|
224
|
+
|
|
225
|
+
const history = alerts.getAlertHistory({ limit: 10 });
|
|
226
|
+
const stats = alerts.getStatistics();
|
|
227
|
+
|
|
228
|
+
this.addResult({
|
|
229
|
+
claim: 'Alert history tracked correctly',
|
|
230
|
+
status: history.length > 0 && stats.total > 0 ? 'PASS' : 'FAIL',
|
|
231
|
+
evidence: {
|
|
232
|
+
historyCount: history.length,
|
|
233
|
+
totalAlerts: stats.total,
|
|
234
|
+
activeAlerts: stats.active,
|
|
235
|
+
},
|
|
236
|
+
});
|
|
237
|
+
} catch (error) {
|
|
238
|
+
this.addResult({
|
|
239
|
+
claim: 'Alert history tracked correctly',
|
|
240
|
+
status: 'FAIL',
|
|
241
|
+
evidence: { error: error.message },
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* CLAIM 6: All metric types are supported
|
|
248
|
+
*/
|
|
249
|
+
async validateClaim6_MetricTypes() {
|
|
250
|
+
try {
|
|
251
|
+
const { createWorkflowMetrics } = await import('../src/metrics/workflow-metrics.mjs');
|
|
252
|
+
|
|
253
|
+
const metrics = createWorkflowMetrics({
|
|
254
|
+
enableDefaultMetrics: false,
|
|
255
|
+
prefix: 'types_test_',
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Test all metric types
|
|
259
|
+
metrics.recordWorkflowStart('wf-1', 'SEQ'); // Counter + Gauge
|
|
260
|
+
metrics.recordResourceUtilization('cpu', 'node-1', 75.5); // Gauge
|
|
261
|
+
metrics.recordLatency('task_exec', 150); // Summary
|
|
262
|
+
metrics.recordWorkflowComplete('wf-1', 'ok', 1.5, 'SEQ'); // Histogram
|
|
263
|
+
|
|
264
|
+
const prometheusMetrics = await metrics.getMetrics();
|
|
265
|
+
|
|
266
|
+
const hasCounter = prometheusMetrics.includes('# TYPE types_test_executions_total counter');
|
|
267
|
+
const hasGauge = prometheusMetrics.includes('# TYPE types_test_active_workflows gauge');
|
|
268
|
+
const hasHistogram = prometheusMetrics.includes(
|
|
269
|
+
'# TYPE types_test_execution_duration_seconds histogram'
|
|
270
|
+
);
|
|
271
|
+
const hasSummary = prometheusMetrics.includes(
|
|
272
|
+
'# TYPE types_test_latency_percentiles summary'
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
this.addResult({
|
|
276
|
+
claim: 'All Prometheus metric types supported',
|
|
277
|
+
status: hasCounter && hasGauge && hasHistogram && hasSummary ? 'PASS' : 'FAIL',
|
|
278
|
+
evidence: { hasCounter, hasGauge, hasHistogram, hasSummary },
|
|
279
|
+
});
|
|
280
|
+
} catch (error) {
|
|
281
|
+
this.addResult({
|
|
282
|
+
claim: 'All Prometheus metric types supported',
|
|
283
|
+
status: 'FAIL',
|
|
284
|
+
evidence: { error: error.message },
|
|
285
|
+
});
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* CLAIM 7: Module exports are correct
|
|
291
|
+
*/
|
|
292
|
+
async validateClaim7_ModuleExports() {
|
|
293
|
+
try {
|
|
294
|
+
const mainModule = await import('../src/index.mjs');
|
|
295
|
+
|
|
296
|
+
const hasWorkflowMetrics = !!mainModule.createWorkflowMetrics;
|
|
297
|
+
const hasGrafanaExporter = !!mainModule.createGrafanaExporter;
|
|
298
|
+
const hasAlertManager = !!mainModule.createAlertManager;
|
|
299
|
+
const hasObservabilityStack = !!mainModule.createObservabilityStack;
|
|
300
|
+
|
|
301
|
+
this.addResult({
|
|
302
|
+
claim: 'Module exports all required functions',
|
|
303
|
+
status:
|
|
304
|
+
hasWorkflowMetrics && hasGrafanaExporter && hasAlertManager && hasObservabilityStack
|
|
305
|
+
? 'PASS'
|
|
306
|
+
: 'FAIL',
|
|
307
|
+
evidence: {
|
|
308
|
+
hasWorkflowMetrics,
|
|
309
|
+
hasGrafanaExporter,
|
|
310
|
+
hasAlertManager,
|
|
311
|
+
hasObservabilityStack,
|
|
312
|
+
},
|
|
313
|
+
});
|
|
314
|
+
} catch (error) {
|
|
315
|
+
this.addResult({
|
|
316
|
+
claim: 'Module exports all required functions',
|
|
317
|
+
status: 'FAIL',
|
|
318
|
+
evidence: { error: error.message },
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Run all validations
|
|
325
|
+
*/
|
|
326
|
+
async runAll() {
|
|
327
|
+
this.log('Starting @unrdf/observability Validation');
|
|
328
|
+
this.log('='.repeat(50));
|
|
329
|
+
|
|
330
|
+
const startTime = performance.now();
|
|
331
|
+
|
|
332
|
+
await this.validateClaim1_MetricsRecording();
|
|
333
|
+
await this.validateClaim2_AlertThresholds();
|
|
334
|
+
await this.validateClaim3_AnomalyDetection();
|
|
335
|
+
await this.validateClaim4_GrafanaDashboard();
|
|
336
|
+
await this.validateClaim5_AlertHistory();
|
|
337
|
+
await this.validateClaim6_MetricTypes();
|
|
338
|
+
await this.validateClaim7_ModuleExports();
|
|
339
|
+
|
|
340
|
+
this.metrics.totalTestTime = performance.now() - startTime;
|
|
341
|
+
this.printReport();
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Print validation report
|
|
346
|
+
*/
|
|
347
|
+
printReport() {
|
|
348
|
+
this.log('='.repeat(50));
|
|
349
|
+
const passed = this.results.filter(r => r.status === 'PASS').length;
|
|
350
|
+
const failed = this.results.filter(r => r.status === 'FAIL').length;
|
|
351
|
+
|
|
352
|
+
this.log(`RESULTS: ${passed}/${this.results.length} PASSED`);
|
|
353
|
+
this.log(`Failed: ${failed}`);
|
|
354
|
+
this.log(`Total Time: ${this.metrics.totalTestTime.toFixed(2)}ms`);
|
|
355
|
+
|
|
356
|
+
// Exit code
|
|
357
|
+
process.exit(passed === this.results.length ? 0 : 1);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Run validation
|
|
362
|
+
const validator = new ObservabilityValidator();
|
|
363
|
+
validator.runAll().catch(err => {
|
|
364
|
+
console.error('FATAL ERROR:', err.message);
|
|
365
|
+
process.exit(1);
|
|
366
|
+
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vitest configuration for @unrdf/observability
|
|
3
|
+
*/
|
|
4
|
+
import { defineConfig } from 'vitest/config';
|
|
5
|
+
|
|
6
|
+
export default defineConfig({
|
|
7
|
+
test: {
|
|
8
|
+
include: ['test/**/*.test.mjs'],
|
|
9
|
+
coverage: {
|
|
10
|
+
provider: 'v8',
|
|
11
|
+
reporter: ['text', 'json', 'html'],
|
|
12
|
+
include: ['src/**/*.mjs'],
|
|
13
|
+
exclude: ['src/index.mjs'],
|
|
14
|
+
all: true,
|
|
15
|
+
thresholds: {
|
|
16
|
+
lines: 80,
|
|
17
|
+
functions: 80,
|
|
18
|
+
branches: 80,
|
|
19
|
+
statements: 80,
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
testTimeout: 5000,
|
|
23
|
+
hookTimeout: 5000,
|
|
24
|
+
},
|
|
25
|
+
});
|