@unrdf/observability 26.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,415 @@
1
+ /**
2
+ * @unrdf/observability - Grafana Dashboard Exporter
3
+ *
4
+ * Generates and exports Grafana dashboard configurations for UNRDF workflows.
5
+ * Provides pre-built dashboards for workflow monitoring, resource utilization,
6
+ * and performance analysis.
7
+ *
8
+ * @module @unrdf/observability/exporters
9
+ */
10
+
11
+ import { z } from 'zod';
12
+
13
+ /**
14
+ * Dashboard configuration schema
15
+ * @type {z.ZodObject}
16
+ */
17
+ const DashboardConfigSchema = z.object({
18
+ title: z.string(),
19
+ datasource: z.string().default('Prometheus'),
20
+ refreshInterval: z.string().default('5s'),
21
+ timeRange: z
22
+ .object({
23
+ from: z.string().default('now-1h'),
24
+ to: z.string().default('now'),
25
+ })
26
+ .default({}),
27
+ tags: z.array(z.string()).default(['unrdf', 'workflow']),
28
+ });
29
+
30
+ /**
31
+ * GrafanaExporter - Dashboard configuration generator
32
+ *
33
+ * Creates Grafana dashboard JSON configurations with:
34
+ * - Workflow execution overview
35
+ * - Task performance panels
36
+ * - Resource utilization graphs
37
+ * - Error rate monitoring
38
+ * - Custom business metrics
39
+ *
40
+ * @class
41
+ */
42
+ export class GrafanaExporter {
43
+ /**
44
+ * @param {object} config - Dashboard configuration
45
+ * @param {string} config.title - Dashboard title
46
+ * @param {string} [config.datasource='Prometheus'] - Prometheus datasource name
47
+ * @param {string} [config.refreshInterval='5s'] - Dashboard refresh interval
48
+ */
49
+ constructor(config = {}) {
50
+ this.config = DashboardConfigSchema.parse({
51
+ title: 'UNRDF Workflow Dashboard',
52
+ ...config,
53
+ });
54
+ }
55
+
56
+ /**
57
+ * Generate complete Grafana dashboard JSON
58
+ * @returns {object} Grafana dashboard configuration
59
+ */
60
+ generateDashboard() {
61
+ return {
62
+ dashboard: {
63
+ id: null,
64
+ uid: 'unrdf-workflow-dashboard',
65
+ title: this.config.title,
66
+ tags: this.config.tags,
67
+ timezone: 'browser',
68
+ schemaVersion: 38,
69
+ version: 1,
70
+ refresh: this.config.refreshInterval,
71
+ time: this.config.timeRange,
72
+ timepicker: {
73
+ refresh_intervals: ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h'],
74
+ time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'],
75
+ },
76
+ panels: this._generatePanels(),
77
+ templating: this._generateTemplating(),
78
+ annotations: {
79
+ list: [
80
+ {
81
+ datasource: this.config.datasource,
82
+ enable: true,
83
+ hide: false,
84
+ iconColor: 'rgba(255, 96, 96, 1)',
85
+ name: 'Workflow Events',
86
+ target: {
87
+ expr: 'unrdf_workflow_executions_total',
88
+ },
89
+ },
90
+ ],
91
+ },
92
+ },
93
+ overwrite: true,
94
+ };
95
+ }
96
+
97
+ /**
98
+ * Generate dashboard panels
99
+ * @private
100
+ * @returns {object[]} Panel configurations
101
+ */
102
+ _generatePanels() {
103
+ return [
104
+ this._createWorkflowOverviewPanel(),
105
+ this._createActiveWorkflowsPanel(),
106
+ this._createWorkflowDurationPanel(),
107
+ this._createTaskExecutionPanel(),
108
+ this._createErrorRatePanel(),
109
+ this._createResourceUtilizationPanel(),
110
+ this._createEventStorePanel(),
111
+ this._createLatencyPercentilesPanel(),
112
+ ];
113
+ }
114
+
115
+ /**
116
+ * Create workflow overview panel
117
+ * @private
118
+ */
119
+ _createWorkflowOverviewPanel() {
120
+ return {
121
+ id: 1,
122
+ gridPos: { h: 8, w: 12, x: 0, y: 0 },
123
+ type: 'graph',
124
+ title: 'Workflow Executions by Status',
125
+ datasource: this.config.datasource,
126
+ targets: [
127
+ {
128
+ expr: 'rate(unrdf_workflow_executions_total[5m])',
129
+ legendFormat: '{{status}} - {{pattern}}',
130
+ refId: 'A',
131
+ },
132
+ ],
133
+ yaxes: [{ format: 'short', label: 'Executions/sec' }, { format: 'short' }],
134
+ legend: { show: true, alignAsTable: true, values: true },
135
+ };
136
+ }
137
+
138
+ /**
139
+ * Create active workflows gauge panel
140
+ * @private
141
+ */
142
+ _createActiveWorkflowsPanel() {
143
+ return {
144
+ id: 2,
145
+ gridPos: { h: 8, w: 12, x: 12, y: 0 },
146
+ type: 'stat',
147
+ title: 'Active Workflows',
148
+ datasource: this.config.datasource,
149
+ targets: [
150
+ {
151
+ expr: 'sum(unrdf_workflow_active_workflows)',
152
+ refId: 'A',
153
+ },
154
+ ],
155
+ options: {
156
+ orientation: 'auto',
157
+ textMode: 'value_and_name',
158
+ colorMode: 'value',
159
+ graphMode: 'area',
160
+ },
161
+ fieldConfig: {
162
+ defaults: {
163
+ thresholds: {
164
+ mode: 'absolute',
165
+ steps: [
166
+ { value: 0, color: 'green' },
167
+ { value: 50, color: 'yellow' },
168
+ { value: 100, color: 'red' },
169
+ ],
170
+ },
171
+ },
172
+ },
173
+ };
174
+ }
175
+
176
+ /**
177
+ * Create workflow duration histogram panel
178
+ * @private
179
+ */
180
+ _createWorkflowDurationPanel() {
181
+ return {
182
+ id: 3,
183
+ gridPos: { h: 8, w: 24, x: 0, y: 8 },
184
+ type: 'heatmap',
185
+ title: 'Workflow Execution Duration Distribution',
186
+ datasource: this.config.datasource,
187
+ targets: [
188
+ {
189
+ expr: 'rate(unrdf_workflow_execution_duration_seconds_bucket[5m])',
190
+ legendFormat: '{{le}}',
191
+ refId: 'A',
192
+ },
193
+ ],
194
+ heatmap: {
195
+ colorScheme: 'interpolateSpectral',
196
+ },
197
+ };
198
+ }
199
+
200
+ /**
201
+ * Create task execution panel
202
+ * @private
203
+ */
204
+ _createTaskExecutionPanel() {
205
+ return {
206
+ id: 4,
207
+ gridPos: { h: 8, w: 12, x: 0, y: 16 },
208
+ type: 'graph',
209
+ title: 'Task Executions by Type',
210
+ datasource: this.config.datasource,
211
+ targets: [
212
+ {
213
+ expr: 'rate(unrdf_workflow_task_executions_total[5m])',
214
+ legendFormat: '{{task_type}} - {{status}}',
215
+ refId: 'A',
216
+ },
217
+ ],
218
+ yaxes: [{ format: 'short', label: 'Tasks/sec' }, { format: 'short' }],
219
+ };
220
+ }
221
+
222
+ /**
223
+ * Create error rate panel
224
+ * @private
225
+ */
226
+ _createErrorRatePanel() {
227
+ return {
228
+ id: 5,
229
+ gridPos: { h: 8, w: 12, x: 12, y: 16 },
230
+ type: 'graph',
231
+ title: 'Error Rate by Severity',
232
+ datasource: this.config.datasource,
233
+ targets: [
234
+ {
235
+ expr: 'rate(unrdf_workflow_errors_total[5m])',
236
+ legendFormat: '{{severity}} - {{error_type}}',
237
+ refId: 'A',
238
+ },
239
+ ],
240
+ yaxes: [{ format: 'short', label: 'Errors/sec' }, { format: 'short' }],
241
+ alert: {
242
+ conditions: [
243
+ {
244
+ evaluator: { params: [1], type: 'gt' },
245
+ operator: { type: 'and' },
246
+ query: { params: ['A', '5m', 'now'] },
247
+ reducer: { params: [], type: 'avg' },
248
+ type: 'query',
249
+ },
250
+ ],
251
+ },
252
+ };
253
+ }
254
+
255
+ /**
256
+ * Create resource utilization panel
257
+ * @private
258
+ */
259
+ _createResourceUtilizationPanel() {
260
+ return {
261
+ id: 6,
262
+ gridPos: { h: 8, w: 12, x: 0, y: 24 },
263
+ type: 'graph',
264
+ title: 'Resource Utilization',
265
+ datasource: this.config.datasource,
266
+ targets: [
267
+ {
268
+ expr: 'unrdf_workflow_resource_utilization',
269
+ legendFormat: '{{resource_type}} - {{resource_id}}',
270
+ refId: 'A',
271
+ },
272
+ ],
273
+ yaxes: [{ format: 'percent', label: 'Utilization %', max: 100, min: 0 }, { format: 'short' }],
274
+ };
275
+ }
276
+
277
+ /**
278
+ * Create event store panel
279
+ * @private
280
+ */
281
+ _createEventStorePanel() {
282
+ return {
283
+ id: 7,
284
+ gridPos: { h: 8, w: 12, x: 12, y: 24 },
285
+ type: 'graph',
286
+ title: 'Event Store Metrics',
287
+ datasource: this.config.datasource,
288
+ targets: [
289
+ {
290
+ expr: 'rate(unrdf_workflow_events_appended_total[5m])',
291
+ legendFormat: '{{event_type}} events/sec',
292
+ refId: 'A',
293
+ },
294
+ {
295
+ expr: 'unrdf_workflow_event_store_size_bytes',
296
+ legendFormat: 'Store size ({{workflow_id}})',
297
+ refId: 'B',
298
+ },
299
+ ],
300
+ };
301
+ }
302
+
303
+ /**
304
+ * Create latency percentiles panel
305
+ * @private
306
+ */
307
+ _createLatencyPercentilesPanel() {
308
+ return {
309
+ id: 8,
310
+ gridPos: { h: 8, w: 24, x: 0, y: 32 },
311
+ type: 'graph',
312
+ title: 'Operation Latency Percentiles',
313
+ datasource: this.config.datasource,
314
+ targets: [
315
+ {
316
+ expr: 'unrdf_workflow_latency_percentiles{quantile="0.5"}',
317
+ legendFormat: 'p50 - {{operation}}',
318
+ refId: 'A',
319
+ },
320
+ {
321
+ expr: 'unrdf_workflow_latency_percentiles{quantile="0.9"}',
322
+ legendFormat: 'p90 - {{operation}}',
323
+ refId: 'B',
324
+ },
325
+ {
326
+ expr: 'unrdf_workflow_latency_percentiles{quantile="0.99"}',
327
+ legendFormat: 'p99 - {{operation}}',
328
+ refId: 'C',
329
+ },
330
+ ],
331
+ yaxes: [{ format: 's', label: 'Latency' }, { format: 'short' }],
332
+ };
333
+ }
334
+
335
+ /**
336
+ * Generate dashboard templating/variables
337
+ * @private
338
+ */
339
+ _generateTemplating() {
340
+ return {
341
+ list: [
342
+ {
343
+ name: 'workflow_id',
344
+ type: 'query',
345
+ datasource: this.config.datasource,
346
+ query: 'label_values(unrdf_workflow_executions_total, workflow_id)',
347
+ multi: true,
348
+ includeAll: true,
349
+ refresh: 1,
350
+ },
351
+ {
352
+ name: 'pattern',
353
+ type: 'query',
354
+ datasource: this.config.datasource,
355
+ query: 'label_values(unrdf_workflow_executions_total, pattern)',
356
+ multi: true,
357
+ includeAll: true,
358
+ refresh: 1,
359
+ },
360
+ ],
361
+ };
362
+ }
363
+
364
+ /**
365
+ * Export dashboard to JSON string
366
+ * @param {boolean} pretty - Pretty print JSON
367
+ * @returns {string} Dashboard JSON
368
+ */
369
+ exportJSON(pretty = true) {
370
+ const dashboard = this.generateDashboard();
371
+ return JSON.stringify(dashboard, null, pretty ? 2 : 0);
372
+ }
373
+
374
+ /**
375
+ * Generate alert dashboard
376
+ * @returns {object} Alert-focused dashboard configuration
377
+ */
378
+ generateAlertDashboard() {
379
+ return {
380
+ dashboard: {
381
+ id: null,
382
+ uid: 'unrdf-alerts-dashboard',
383
+ title: 'UNRDF Alerts & SLOs',
384
+ tags: [...this.config.tags, 'alerts'],
385
+ panels: [
386
+ {
387
+ id: 1,
388
+ gridPos: { h: 8, w: 24, x: 0, y: 0 },
389
+ type: 'table',
390
+ title: 'Active Alerts',
391
+ datasource: this.config.datasource,
392
+ targets: [
393
+ {
394
+ expr: 'ALERTS{alertstate="firing"}',
395
+ refId: 'A',
396
+ },
397
+ ],
398
+ },
399
+ ],
400
+ },
401
+ overwrite: true,
402
+ };
403
+ }
404
+ }
405
+
406
+ /**
407
+ * Create Grafana exporter instance
408
+ * @param {object} config - Dashboard configuration
409
+ * @returns {GrafanaExporter} Exporter instance
410
+ */
411
+ export function createGrafanaExporter(config = {}) {
412
+ return new GrafanaExporter(config);
413
+ }
414
+
415
+ export default GrafanaExporter;
package/src/index.mjs ADDED
@@ -0,0 +1,61 @@
1
+ /**
2
+ * @unrdf/observability - Main Entry Point
3
+ *
4
+ * Innovative Prometheus/Grafana observability for UNRDF distributed workflows.
5
+ * Provides comprehensive metrics collection, dashboard generation, and alerting.
6
+ *
7
+ * @module @unrdf/observability
8
+ */
9
+
10
+ export {
11
+ WorkflowMetrics,
12
+ createWorkflowMetrics,
13
+ WorkflowStatus,
14
+ } from './metrics/workflow-metrics.mjs';
15
+ export { GrafanaExporter, createGrafanaExporter } from './exporters/grafana-exporter.mjs';
16
+ export { AlertManager, createAlertManager, AlertSeverity } from './alerts/alert-manager.mjs';
17
+
18
+ /**
19
+ * Create a complete observability stack
20
+ * @param {object} config - Observability configuration
21
+ * @param {object} [config.metrics] - Metrics configuration
22
+ * @param {object} [config.grafana] - Grafana configuration
23
+ * @param {object} [config.alerts] - Alert configuration
24
+ * @returns {Promise<object>} Complete observability stack
25
+ */
26
+ export async function createObservabilityStack(config = {}) {
27
+ const { createWorkflowMetrics } = await import('./metrics/workflow-metrics.mjs');
28
+ const { createGrafanaExporter } = await import('./exporters/grafana-exporter.mjs');
29
+ const { createAlertManager } = await import('./alerts/alert-manager.mjs');
30
+
31
+ const metrics = createWorkflowMetrics(config.metrics || {});
32
+ const grafana = createGrafanaExporter(config.grafana || {});
33
+ const alerts = createAlertManager(config.alerts || {});
34
+
35
+ // Wire up metrics to alerts
36
+ const originalRecordWorkflowComplete = metrics.recordWorkflowComplete.bind(metrics);
37
+ metrics.recordWorkflowComplete = (workflowId, status, duration, pattern) => {
38
+ originalRecordWorkflowComplete(workflowId, status, duration, pattern);
39
+ alerts.evaluateMetric('workflow_duration', duration, {
40
+ workflow_id: workflowId,
41
+ status,
42
+ pattern,
43
+ });
44
+ };
45
+
46
+ const originalRecordError = metrics.recordError.bind(metrics);
47
+ metrics.recordError = (errorType, workflowId, severity) => {
48
+ originalRecordError(errorType, workflowId, severity);
49
+ alerts.evaluateMetric('error_count', 1, {
50
+ error_type: errorType,
51
+ workflow_id: workflowId,
52
+ severity,
53
+ });
54
+ };
55
+
56
+ return {
57
+ metrics,
58
+ grafana,
59
+ alerts,
60
+ };
61
+ }