@zintrust/workers 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +861 -0
- package/dist/AnomalyDetection.d.ts +102 -0
- package/dist/AnomalyDetection.js +321 -0
- package/dist/AutoScaler.d.ts +127 -0
- package/dist/AutoScaler.js +425 -0
- package/dist/BroadcastWorker.d.ts +21 -0
- package/dist/BroadcastWorker.js +24 -0
- package/dist/CanaryController.d.ts +103 -0
- package/dist/CanaryController.js +380 -0
- package/dist/ChaosEngineering.d.ts +79 -0
- package/dist/ChaosEngineering.js +216 -0
- package/dist/CircuitBreaker.d.ts +106 -0
- package/dist/CircuitBreaker.js +374 -0
- package/dist/ClusterLock.d.ts +90 -0
- package/dist/ClusterLock.js +385 -0
- package/dist/ComplianceManager.d.ts +177 -0
- package/dist/ComplianceManager.js +556 -0
- package/dist/DatacenterOrchestrator.d.ts +133 -0
- package/dist/DatacenterOrchestrator.js +404 -0
- package/dist/DeadLetterQueue.d.ts +122 -0
- package/dist/DeadLetterQueue.js +539 -0
- package/dist/HealthMonitor.d.ts +42 -0
- package/dist/HealthMonitor.js +301 -0
- package/dist/MultiQueueWorker.d.ts +89 -0
- package/dist/MultiQueueWorker.js +277 -0
- package/dist/NotificationWorker.d.ts +21 -0
- package/dist/NotificationWorker.js +23 -0
- package/dist/Observability.d.ts +153 -0
- package/dist/Observability.js +530 -0
- package/dist/PluginManager.d.ts +123 -0
- package/dist/PluginManager.js +392 -0
- package/dist/PriorityQueue.d.ts +117 -0
- package/dist/PriorityQueue.js +244 -0
- package/dist/ResourceMonitor.d.ts +164 -0
- package/dist/ResourceMonitor.js +605 -0
- package/dist/SLAMonitor.d.ts +110 -0
- package/dist/SLAMonitor.js +274 -0
- package/dist/WorkerFactory.d.ts +193 -0
- package/dist/WorkerFactory.js +1507 -0
- package/dist/WorkerInit.d.ts +85 -0
- package/dist/WorkerInit.js +223 -0
- package/dist/WorkerMetrics.d.ts +114 -0
- package/dist/WorkerMetrics.js +509 -0
- package/dist/WorkerRegistry.d.ts +145 -0
- package/dist/WorkerRegistry.js +319 -0
- package/dist/WorkerShutdown.d.ts +61 -0
- package/dist/WorkerShutdown.js +159 -0
- package/dist/WorkerVersioning.d.ts +107 -0
- package/dist/WorkerVersioning.js +300 -0
- package/dist/build-manifest.json +462 -0
- package/dist/config/workerConfig.d.ts +3 -0
- package/dist/config/workerConfig.js +19 -0
- package/dist/createQueueWorker.d.ts +23 -0
- package/dist/createQueueWorker.js +113 -0
- package/dist/dashboard/index.d.ts +1 -0
- package/dist/dashboard/index.js +1 -0
- package/dist/dashboard/types.d.ts +117 -0
- package/dist/dashboard/types.js +1 -0
- package/dist/dashboard/workers-api.d.ts +4 -0
- package/dist/dashboard/workers-api.js +638 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
- package/dist/dashboard/workers-dashboard-ui.js +1026 -0
- package/dist/dashboard/workers-dashboard.d.ts +4 -0
- package/dist/dashboard/workers-dashboard.js +904 -0
- package/dist/helper/index.d.ts +5 -0
- package/dist/helper/index.js +10 -0
- package/dist/http/WorkerApiController.d.ts +38 -0
- package/dist/http/WorkerApiController.js +312 -0
- package/dist/http/WorkerController.d.ts +374 -0
- package/dist/http/WorkerController.js +1351 -0
- package/dist/http/middleware/CustomValidation.d.ts +92 -0
- package/dist/http/middleware/CustomValidation.js +270 -0
- package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
- package/dist/http/middleware/DatacenterValidator.js +94 -0
- package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
- package/dist/http/middleware/EditWorkerValidation.js +55 -0
- package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
- package/dist/http/middleware/FeaturesValidator.js +60 -0
- package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
- package/dist/http/middleware/InfrastructureValidator.js +226 -0
- package/dist/http/middleware/OptionsValidator.d.ts +3 -0
- package/dist/http/middleware/OptionsValidator.js +112 -0
- package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
- package/dist/http/middleware/PayloadSanitizer.js +42 -0
- package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
- package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
- package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/QueueNameSanitizer.js +45 -0
- package/dist/http/middleware/ValidateDriver.d.ts +7 -0
- package/dist/http/middleware/ValidateDriver.js +20 -0
- package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
- package/dist/http/middleware/VersionSanitizer.js +25 -0
- package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
- package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
- package/dist/http/middleware/WorkerValidationChain.js +185 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.js +48 -0
- package/dist/routes/workers.d.ts +12 -0
- package/dist/routes/workers.js +81 -0
- package/dist/storage/WorkerStore.d.ts +45 -0
- package/dist/storage/WorkerStore.js +195 -0
- package/dist/type.d.ts +76 -0
- package/dist/type.js +1 -0
- package/dist/ui/router/ui.d.ts +3 -0
- package/dist/ui/router/ui.js +83 -0
- package/dist/ui/types/worker-ui.d.ts +229 -0
- package/dist/ui/types/worker-ui.js +5 -0
- package/package.json +53 -0
- package/src/AnomalyDetection.ts +434 -0
- package/src/AutoScaler.ts +654 -0
- package/src/BroadcastWorker.ts +34 -0
- package/src/CanaryController.ts +531 -0
- package/src/ChaosEngineering.ts +301 -0
- package/src/CircuitBreaker.ts +495 -0
- package/src/ClusterLock.ts +499 -0
- package/src/ComplianceManager.ts +815 -0
- package/src/DatacenterOrchestrator.ts +561 -0
- package/src/DeadLetterQueue.ts +733 -0
- package/src/HealthMonitor.ts +390 -0
- package/src/MultiQueueWorker.ts +431 -0
- package/src/NotificationWorker.ts +33 -0
- package/src/Observability.ts +696 -0
- package/src/PluginManager.ts +551 -0
- package/src/PriorityQueue.ts +351 -0
- package/src/ResourceMonitor.ts +769 -0
- package/src/SLAMonitor.ts +408 -0
- package/src/WorkerFactory.ts +2108 -0
- package/src/WorkerInit.ts +313 -0
- package/src/WorkerMetrics.ts +709 -0
- package/src/WorkerRegistry.ts +443 -0
- package/src/WorkerShutdown.ts +210 -0
- package/src/WorkerVersioning.ts +422 -0
- package/src/config/workerConfig.ts +25 -0
- package/src/createQueueWorker.ts +174 -0
- package/src/dashboard/index.ts +6 -0
- package/src/dashboard/types.ts +141 -0
- package/src/dashboard/workers-api.ts +785 -0
- package/src/dashboard/zintrust.svg +30 -0
- package/src/helper/index.ts +11 -0
- package/src/http/WorkerApiController.ts +369 -0
- package/src/http/WorkerController.ts +1512 -0
- package/src/http/middleware/CustomValidation.ts +360 -0
- package/src/http/middleware/DatacenterValidator.ts +124 -0
- package/src/http/middleware/EditWorkerValidation.ts +74 -0
- package/src/http/middleware/FeaturesValidator.ts +82 -0
- package/src/http/middleware/InfrastructureValidator.ts +295 -0
- package/src/http/middleware/OptionsValidator.ts +144 -0
- package/src/http/middleware/PayloadSanitizer.ts +52 -0
- package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
- package/src/http/middleware/QueueNameSanitizer.ts +55 -0
- package/src/http/middleware/ValidateDriver.ts +29 -0
- package/src/http/middleware/VersionSanitizer.ts +30 -0
- package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
- package/src/http/middleware/WorkerValidationChain.ts +230 -0
- package/src/index.ts +98 -0
- package/src/routes/workers.ts +154 -0
- package/src/storage/WorkerStore.ts +240 -0
- package/src/type.ts +89 -0
- package/src/types/queue-monitor.d.ts +38 -0
- package/src/types/queue-redis.d.ts +38 -0
- package/src/ui/README.md +13 -0
- package/src/ui/components/JsonEditor.js +670 -0
- package/src/ui/components/JsonViewer.js +387 -0
- package/src/ui/components/WorkerCard.js +178 -0
- package/src/ui/components/WorkerExpandPanel.js +257 -0
- package/src/ui/components/fetcher.js +42 -0
- package/src/ui/components/sla-scorecard.js +32 -0
- package/src/ui/components/styles.css +30 -0
- package/src/ui/components/table-expander.js +34 -0
- package/src/ui/integration/worker-ui-integration.js +565 -0
- package/src/ui/router/ui.ts +99 -0
- package/src/ui/services/workerApi.js +240 -0
- package/src/ui/types/worker-ui.ts +283 -0
- package/src/ui/utils/jsonValidator.js +444 -0
- package/src/ui/workers/index.html +202 -0
- package/src/ui/workers/main.js +1781 -0
- package/src/ui/workers/styles.css +1350 -0
|
@@ -0,0 +1,696 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observability Manager
|
|
3
|
+
* Integrations for Prometheus, OpenTelemetry, and Datadog
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Span, SpanOptions, Tracer } from '@opentelemetry/api';
|
|
8
|
+
import { ErrorFactory, Logger, generateUuid } from '@zintrust/core';
|
|
9
|
+
import type { Counter, Gauge, Histogram, Registry, Summary } from 'prom-client';
|
|
10
|
+
|
|
11
|
+
export type ObservabilityConfig = {
|
|
12
|
+
prometheus: {
|
|
13
|
+
enabled: boolean;
|
|
14
|
+
port?: number;
|
|
15
|
+
path?: string;
|
|
16
|
+
defaultLabels?: Record<string, string>;
|
|
17
|
+
};
|
|
18
|
+
openTelemetry: {
|
|
19
|
+
enabled: boolean;
|
|
20
|
+
serviceName: string;
|
|
21
|
+
exporterUrl?: string;
|
|
22
|
+
sampleRate?: number; // 0-1, percentage of traces to sample
|
|
23
|
+
};
|
|
24
|
+
datadog: {
|
|
25
|
+
enabled: boolean;
|
|
26
|
+
host?: string;
|
|
27
|
+
port?: number;
|
|
28
|
+
prefix?: string;
|
|
29
|
+
tags?: string[];
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export type MetricType = 'counter' | 'gauge' | 'histogram' | 'summary';
|
|
34
|
+
|
|
35
|
+
export type MetricDefinition = {
|
|
36
|
+
name: string;
|
|
37
|
+
type: MetricType;
|
|
38
|
+
help: string;
|
|
39
|
+
labels?: string[];
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export type TraceContext = {
|
|
43
|
+
traceId: string;
|
|
44
|
+
spanId: string;
|
|
45
|
+
parentSpanId?: string;
|
|
46
|
+
baggage?: Record<string, string>;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export type SpanAttributes = Record<string, string | number | boolean>;
|
|
50
|
+
|
|
51
|
+
// Internal state
|
|
52
|
+
let config: ObservabilityConfig | null = null;
|
|
53
|
+
|
|
54
|
+
// Prometheus state
|
|
55
|
+
let promClient: typeof import('prom-client') | null = null;
|
|
56
|
+
let promRegistry: Registry | null = null;
|
|
57
|
+
const promMetrics: Map<string, Counter | Gauge | Histogram | Summary> = new Map();
|
|
58
|
+
|
|
59
|
+
// OpenTelemetry state
|
|
60
|
+
let otelTracer: Tracer | null = null;
|
|
61
|
+
const activeSpans: Map<string, { span: Span; startedAt: number }> = new Map();
|
|
62
|
+
let spanSweepInterval: NodeJS.Timeout | null = null;
|
|
63
|
+
|
|
64
|
+
const MAX_ACTIVE_SPANS = 1000;
|
|
65
|
+
const SPAN_TTL_MS = 5 * 60 * 1000;
|
|
66
|
+
|
|
67
|
+
const cleanupStaleSpans = (): void => {
|
|
68
|
+
const now = Date.now();
|
|
69
|
+
for (const [spanId, entry] of activeSpans.entries()) {
|
|
70
|
+
if (now - entry.startedAt > SPAN_TTL_MS) {
|
|
71
|
+
entry.span.end();
|
|
72
|
+
activeSpans.delete(spanId);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
const evictOldestSpan = (): void => {
|
|
78
|
+
if (activeSpans.size < MAX_ACTIVE_SPANS) return;
|
|
79
|
+
|
|
80
|
+
let oldestId: string | null = null;
|
|
81
|
+
let oldestTime = Number.POSITIVE_INFINITY;
|
|
82
|
+
|
|
83
|
+
for (const [spanId, entry] of activeSpans.entries()) {
|
|
84
|
+
if (entry.startedAt < oldestTime) {
|
|
85
|
+
oldestTime = entry.startedAt;
|
|
86
|
+
oldestId = spanId;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (oldestId !== null) {
|
|
91
|
+
const entry = activeSpans.get(oldestId);
|
|
92
|
+
if (entry) {
|
|
93
|
+
entry.span.end();
|
|
94
|
+
}
|
|
95
|
+
activeSpans.delete(oldestId);
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
type DatadogClient = {
|
|
100
|
+
increment: (name: string, value?: number, tags?: string[]) => void;
|
|
101
|
+
gauge: (name: string, value: number, tags?: string[]) => void;
|
|
102
|
+
histogram: (name: string, value: number, tags?: string[]) => void;
|
|
103
|
+
timing: (name: string, value: number, tags?: string[]) => void;
|
|
104
|
+
close: (callback?: () => void) => void;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
type DatadogClientConstructor = new (options: {
|
|
108
|
+
host?: string;
|
|
109
|
+
port?: number;
|
|
110
|
+
prefix?: string;
|
|
111
|
+
globalTags?: string[];
|
|
112
|
+
}) => DatadogClient;
|
|
113
|
+
|
|
114
|
+
// Datadog state
|
|
115
|
+
let datadogClient: DatadogClient | null = null;
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Helper: Lazy load Prometheus client
|
|
119
|
+
*/
|
|
120
|
+
const getPrometheusClient = async (): Promise<typeof import('prom-client')> => {
|
|
121
|
+
promClient ??= await import('prom-client');
|
|
122
|
+
return promClient;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Helper: Lazy load OpenTelemetry API
|
|
127
|
+
*/
|
|
128
|
+
const getOpenTelemetryApi = async (): Promise<typeof import('@opentelemetry/api')> => {
|
|
129
|
+
return import('@opentelemetry/api');
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Helper: Initialize Prometheus
|
|
134
|
+
*/
|
|
135
|
+
const initPrometheus = async (promConfig: ObservabilityConfig['prometheus']): Promise<void> => {
|
|
136
|
+
if (!promConfig.enabled) return;
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
const client = await getPrometheusClient();
|
|
140
|
+
promRegistry = new client.Registry();
|
|
141
|
+
|
|
142
|
+
// Set default labels if provided
|
|
143
|
+
if (promConfig.defaultLabels) {
|
|
144
|
+
promRegistry.setDefaultLabels(promConfig.defaultLabels);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Enable default metrics (process, Node.js metrics)
|
|
148
|
+
client.collectDefaultMetrics({ register: promRegistry });
|
|
149
|
+
|
|
150
|
+
Logger.info('Prometheus metrics initialized', {
|
|
151
|
+
port: promConfig.port,
|
|
152
|
+
path: promConfig.path ?? '/metrics',
|
|
153
|
+
});
|
|
154
|
+
} catch (error) {
|
|
155
|
+
Logger.error('Failed to initialize Prometheus', error);
|
|
156
|
+
throw error;
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Helper: Initialize OpenTelemetry
|
|
162
|
+
*/
|
|
163
|
+
const initOpenTelemetry = async (
|
|
164
|
+
otelConfig: ObservabilityConfig['openTelemetry']
|
|
165
|
+
): Promise<void> => {
|
|
166
|
+
if (!otelConfig.enabled) return;
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
const api = await getOpenTelemetryApi();
|
|
170
|
+
otelTracer = api.trace.getTracer(otelConfig.serviceName);
|
|
171
|
+
|
|
172
|
+
Logger.info('OpenTelemetry tracing initialized', {
|
|
173
|
+
serviceName: otelConfig.serviceName,
|
|
174
|
+
sampleRate: otelConfig.sampleRate ?? 1,
|
|
175
|
+
});
|
|
176
|
+
} catch (error) {
|
|
177
|
+
Logger.error('Failed to initialize OpenTelemetry', error);
|
|
178
|
+
// Don't throw - allow app to continue without tracing
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Helper: Initialize Datadog
|
|
184
|
+
*/
|
|
185
|
+
const initDatadog = async (ddConfig: ObservabilityConfig['datadog']): Promise<void> => {
|
|
186
|
+
if (!ddConfig.enabled) return;
|
|
187
|
+
|
|
188
|
+
try {
|
|
189
|
+
const module = (await import('hot-shots')) as unknown as { StatsD?: DatadogClientConstructor };
|
|
190
|
+
const StatsDClass = module.StatsD;
|
|
191
|
+
|
|
192
|
+
if (!StatsDClass) {
|
|
193
|
+
Logger.warn('Datadog StatsD client unavailable');
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
datadogClient = new StatsDClass({
|
|
198
|
+
host: ddConfig.host ?? 'localhost',
|
|
199
|
+
port: ddConfig.port ?? 8125,
|
|
200
|
+
prefix: ddConfig.prefix ?? 'worker.',
|
|
201
|
+
globalTags: ddConfig.tags ?? [],
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
Logger.info('Datadog StatsD initialized', {
|
|
205
|
+
host: ddConfig.host ?? 'localhost',
|
|
206
|
+
port: ddConfig.port ?? 8125,
|
|
207
|
+
});
|
|
208
|
+
} catch (error) {
|
|
209
|
+
Logger.error('Failed to initialize Datadog', error);
|
|
210
|
+
// Don't throw - allow app to continue without Datadog
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Observability Manager - Sealed namespace
|
|
216
|
+
*/
|
|
217
|
+
export const Observability = Object.freeze({
|
|
218
|
+
/**
|
|
219
|
+
* Initialize observability with configuration
|
|
220
|
+
*/
|
|
221
|
+
async initialize(observabilityConfig: ObservabilityConfig): Promise<void> {
|
|
222
|
+
if (config) {
|
|
223
|
+
Logger.warn('Observability already initialized');
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
config = observabilityConfig;
|
|
228
|
+
|
|
229
|
+
// Initialize all enabled platforms
|
|
230
|
+
await Promise.all([
|
|
231
|
+
initPrometheus(config.prometheus),
|
|
232
|
+
initOpenTelemetry(config.openTelemetry),
|
|
233
|
+
initDatadog(config.datadog),
|
|
234
|
+
]);
|
|
235
|
+
|
|
236
|
+
if (config.openTelemetry.enabled === true && spanSweepInterval === null) {
|
|
237
|
+
spanSweepInterval = setInterval(() => {
|
|
238
|
+
cleanupStaleSpans();
|
|
239
|
+
}, SPAN_TTL_MS);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
Logger.info('Observability initialized', {
|
|
243
|
+
prometheus: config.prometheus.enabled,
|
|
244
|
+
openTelemetry: config.openTelemetry.enabled,
|
|
245
|
+
datadog: config.datadog.enabled,
|
|
246
|
+
});
|
|
247
|
+
},
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Register a metric
|
|
251
|
+
*/
|
|
252
|
+
async registerMetric(definition: MetricDefinition): Promise<void> {
|
|
253
|
+
if (config?.prometheus.enabled !== true || !promRegistry) {
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (promMetrics.has(definition.name)) {
|
|
258
|
+
Logger.debug(`Metric already registered: ${definition.name}`);
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const client = await getPrometheusClient();
|
|
263
|
+
|
|
264
|
+
let metric: Counter | Gauge | Histogram | Summary;
|
|
265
|
+
|
|
266
|
+
switch (definition.type) {
|
|
267
|
+
case 'counter':
|
|
268
|
+
metric = new client.Counter({
|
|
269
|
+
name: definition.name,
|
|
270
|
+
help: definition.help,
|
|
271
|
+
labelNames: definition.labels ?? [],
|
|
272
|
+
registers: [promRegistry],
|
|
273
|
+
});
|
|
274
|
+
break;
|
|
275
|
+
|
|
276
|
+
case 'gauge':
|
|
277
|
+
metric = new client.Gauge({
|
|
278
|
+
name: definition.name,
|
|
279
|
+
help: definition.help,
|
|
280
|
+
labelNames: definition.labels ?? [],
|
|
281
|
+
registers: [promRegistry],
|
|
282
|
+
});
|
|
283
|
+
break;
|
|
284
|
+
|
|
285
|
+
case 'histogram':
|
|
286
|
+
metric = new client.Histogram({
|
|
287
|
+
name: definition.name,
|
|
288
|
+
help: definition.help,
|
|
289
|
+
labelNames: definition.labels ?? [],
|
|
290
|
+
registers: [promRegistry],
|
|
291
|
+
});
|
|
292
|
+
break;
|
|
293
|
+
|
|
294
|
+
case 'summary':
|
|
295
|
+
metric = new client.Summary({
|
|
296
|
+
name: definition.name,
|
|
297
|
+
help: definition.help,
|
|
298
|
+
labelNames: definition.labels ?? [],
|
|
299
|
+
registers: [promRegistry],
|
|
300
|
+
});
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
promMetrics.set(definition.name, metric);
|
|
305
|
+
|
|
306
|
+
Logger.debug(`Metric registered: ${definition.name} (${definition.type})`);
|
|
307
|
+
},
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Increment a counter
|
|
311
|
+
*/
|
|
312
|
+
incrementCounter(name: string, value = 1, labels?: Record<string, string>): void {
|
|
313
|
+
// Prometheus
|
|
314
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
315
|
+
const metric = promMetrics.get(name) as Counter;
|
|
316
|
+
if (labels) {
|
|
317
|
+
metric.inc(labels, value);
|
|
318
|
+
} else {
|
|
319
|
+
metric.inc(value);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Datadog
|
|
324
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
325
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
326
|
+
datadogClient.increment(name, value, tags);
|
|
327
|
+
}
|
|
328
|
+
},
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Set a gauge value
|
|
332
|
+
*/
|
|
333
|
+
setGauge(name: string, value: number, labels?: Record<string, string>): void {
|
|
334
|
+
// Prometheus
|
|
335
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
336
|
+
const metric = promMetrics.get(name) as Gauge;
|
|
337
|
+
if (labels) {
|
|
338
|
+
metric.set(labels, value);
|
|
339
|
+
} else {
|
|
340
|
+
metric.set(value);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Datadog
|
|
345
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
346
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
347
|
+
datadogClient.gauge(name, value, tags);
|
|
348
|
+
}
|
|
349
|
+
},
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Record a histogram observation
|
|
353
|
+
*/
|
|
354
|
+
recordHistogram(name: string, value: number, labels?: Record<string, string>): void {
|
|
355
|
+
// Prometheus
|
|
356
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
357
|
+
const metric = promMetrics.get(name) as Histogram;
|
|
358
|
+
if (labels) {
|
|
359
|
+
metric.observe(labels, value);
|
|
360
|
+
} else {
|
|
361
|
+
metric.observe(value);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Datadog
|
|
366
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
367
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
368
|
+
datadogClient.histogram(name, value, tags);
|
|
369
|
+
}
|
|
370
|
+
},
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Record timing (histogram for duration)
|
|
374
|
+
*/
|
|
375
|
+
recordTiming(name: string, durationMs: number, labels?: Record<string, string>): void {
|
|
376
|
+
// Prometheus (convert to seconds)
|
|
377
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
378
|
+
const metric = promMetrics.get(name) as Histogram;
|
|
379
|
+
if (labels) {
|
|
380
|
+
metric.observe(labels, durationMs / 1000);
|
|
381
|
+
} else {
|
|
382
|
+
metric.observe(durationMs / 1000);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Datadog (milliseconds)
|
|
387
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
388
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
389
|
+
datadogClient.timing(name, durationMs, tags);
|
|
390
|
+
}
|
|
391
|
+
},
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Start a span (OpenTelemetry)
|
|
395
|
+
*/
|
|
396
|
+
startSpan(
|
|
397
|
+
name: string,
|
|
398
|
+
options?: { attributes?: SpanAttributes; parentSpanId?: string }
|
|
399
|
+
): string | null {
|
|
400
|
+
if (config?.openTelemetry.enabled !== true || otelTracer === null) {
|
|
401
|
+
return null;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
try {
|
|
405
|
+
const spanOptions: SpanOptions = {
|
|
406
|
+
attributes: options?.attributes ?? {},
|
|
407
|
+
};
|
|
408
|
+
|
|
409
|
+
const span = otelTracer.startSpan(name, spanOptions);
|
|
410
|
+
|
|
411
|
+
const spanId = `${name}-${Date.now()}-${generateUuid()}`;
|
|
412
|
+
|
|
413
|
+
evictOldestSpan();
|
|
414
|
+
activeSpans.set(spanId, { span, startedAt: Date.now() });
|
|
415
|
+
|
|
416
|
+
Logger.debug(`Span started: ${name}`, { spanId });
|
|
417
|
+
|
|
418
|
+
return spanId;
|
|
419
|
+
} catch (error) {
|
|
420
|
+
Logger.error('Failed to start span', error);
|
|
421
|
+
return null;
|
|
422
|
+
}
|
|
423
|
+
},
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* End a span
|
|
427
|
+
*/
|
|
428
|
+
endSpan(spanId: string, attributes?: SpanAttributes): void {
|
|
429
|
+
if (config?.openTelemetry.enabled !== true) {
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
try {
|
|
434
|
+
const entry = activeSpans.get(spanId);
|
|
435
|
+
if (!entry) return;
|
|
436
|
+
|
|
437
|
+
if (attributes) {
|
|
438
|
+
entry.span.setAttributes(attributes);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
entry.span.end();
|
|
442
|
+
activeSpans.delete(spanId);
|
|
443
|
+
|
|
444
|
+
Logger.debug(`Span ended: ${spanId}`);
|
|
445
|
+
} catch (error) {
|
|
446
|
+
Logger.error('Failed to end span', error);
|
|
447
|
+
}
|
|
448
|
+
},
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Record an error on a span
|
|
452
|
+
*/
|
|
453
|
+
recordSpanError(spanId: string, error: Error): void {
|
|
454
|
+
if (config?.openTelemetry.enabled !== true) {
|
|
455
|
+
return;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
try {
|
|
459
|
+
const entry = activeSpans.get(spanId);
|
|
460
|
+
if (!entry) return;
|
|
461
|
+
entry.span.recordException(error);
|
|
462
|
+
entry.span.setStatus({ code: 2, message: error.message }); // ERROR status
|
|
463
|
+
|
|
464
|
+
Logger.debug(`Span error recorded: ${spanId}`, { error: error.message });
|
|
465
|
+
} catch (err) {
|
|
466
|
+
Logger.error('Failed to record span error', err);
|
|
467
|
+
}
|
|
468
|
+
},
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Add event to span
|
|
472
|
+
*/
|
|
473
|
+
addSpanEvent(spanId: string, name: string, attributes?: SpanAttributes): void {
|
|
474
|
+
if (config?.openTelemetry.enabled !== true) {
|
|
475
|
+
return;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
try {
|
|
479
|
+
const entry = activeSpans.get(spanId);
|
|
480
|
+
if (!entry) return;
|
|
481
|
+
entry.span.addEvent(name, attributes);
|
|
482
|
+
|
|
483
|
+
Logger.debug(`Span event added: ${spanId}/${name}`);
|
|
484
|
+
} catch (error) {
|
|
485
|
+
Logger.error('Failed to add span event', error);
|
|
486
|
+
}
|
|
487
|
+
},
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Get Prometheus metrics (for HTTP endpoint)
|
|
491
|
+
*/
|
|
492
|
+
async getPrometheusMetrics(): Promise<string> {
|
|
493
|
+
if (config?.prometheus.enabled !== true || !promRegistry) {
|
|
494
|
+
throw ErrorFactory.createGeneralError('Prometheus metrics not enabled');
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
return promRegistry.metrics();
|
|
498
|
+
},
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Get Prometheus registry (for advanced usage)
|
|
502
|
+
*/
|
|
503
|
+
getPrometheusRegistry(): Registry | null {
|
|
504
|
+
return promRegistry;
|
|
505
|
+
},
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Get Datadog client (for advanced usage)
|
|
509
|
+
*/
|
|
510
|
+
getDatadogClient(): DatadogClient | null {
|
|
511
|
+
return datadogClient;
|
|
512
|
+
},
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Get OpenTelemetry tracer (for advanced usage)
|
|
516
|
+
*/
|
|
517
|
+
getTracer(): Tracer | null {
|
|
518
|
+
return otelTracer;
|
|
519
|
+
},
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Record worker job metrics
|
|
523
|
+
*/
|
|
524
|
+
recordJobMetrics(
|
|
525
|
+
workerName: string,
|
|
526
|
+
jobName: string,
|
|
527
|
+
metrics: {
|
|
528
|
+
processed?: number;
|
|
529
|
+
failed?: number;
|
|
530
|
+
durationMs?: number;
|
|
531
|
+
queueSize?: number;
|
|
532
|
+
}
|
|
533
|
+
): void {
|
|
534
|
+
const labels = { worker: workerName, job: jobName };
|
|
535
|
+
|
|
536
|
+
if (metrics.processed !== undefined) {
|
|
537
|
+
Observability.incrementCounter('worker_jobs_processed_total', metrics.processed, labels);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (metrics.failed !== undefined) {
|
|
541
|
+
Observability.incrementCounter('worker_jobs_failed_total', metrics.failed, labels);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
if (metrics.durationMs !== undefined) {
|
|
545
|
+
Observability.recordTiming('worker_job_duration_seconds', metrics.durationMs, labels);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if (metrics.queueSize !== undefined) {
|
|
549
|
+
Observability.setGauge('worker_queue_size', metrics.queueSize, labels);
|
|
550
|
+
}
|
|
551
|
+
},
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* Record worker resource metrics
|
|
555
|
+
*/
|
|
556
|
+
recordResourceMetrics(
|
|
557
|
+
workerName: string,
|
|
558
|
+
resources: {
|
|
559
|
+
cpuUsage?: number;
|
|
560
|
+
memoryUsage?: number;
|
|
561
|
+
activeJobs?: number;
|
|
562
|
+
}
|
|
563
|
+
): void {
|
|
564
|
+
const labels = { worker: workerName };
|
|
565
|
+
|
|
566
|
+
if (resources.cpuUsage !== undefined) {
|
|
567
|
+
Observability.setGauge('worker_cpu_usage_percent', resources.cpuUsage, labels);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
if (resources.memoryUsage !== undefined) {
|
|
571
|
+
Observability.setGauge('worker_memory_usage_bytes', resources.memoryUsage, labels);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
if (resources.activeJobs !== undefined) {
|
|
575
|
+
Observability.setGauge('worker_active_jobs', resources.activeJobs, labels);
|
|
576
|
+
}
|
|
577
|
+
},
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Create a traced function wrapper
|
|
581
|
+
*/
|
|
582
|
+
traced<T extends (...args: unknown[]) => unknown>(
|
|
583
|
+
name: string,
|
|
584
|
+
fn: T,
|
|
585
|
+
options?: { attributes?: SpanAttributes }
|
|
586
|
+
): T {
|
|
587
|
+
return (async (...args: Parameters<T>): Promise<ReturnType<T>> => {
|
|
588
|
+
const spanId = Observability.startSpan(name, options);
|
|
589
|
+
|
|
590
|
+
try {
|
|
591
|
+
const result = await fn(...args);
|
|
592
|
+
|
|
593
|
+
if (spanId !== null) {
|
|
594
|
+
Observability.endSpan(spanId, { success: true });
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return result as ReturnType<T>;
|
|
598
|
+
} catch (error) {
|
|
599
|
+
if (spanId !== null) {
|
|
600
|
+
Observability.recordSpanError(spanId, error as Error);
|
|
601
|
+
Observability.endSpan(spanId, { success: false });
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
throw error;
|
|
605
|
+
}
|
|
606
|
+
}) as T;
|
|
607
|
+
},
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* Get configuration
|
|
611
|
+
*/
|
|
612
|
+
getConfig(): ObservabilityConfig | null {
|
|
613
|
+
return config ? { ...config } : null;
|
|
614
|
+
},
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Check if observability is enabled
|
|
618
|
+
*/
|
|
619
|
+
isEnabled(): boolean {
|
|
620
|
+
return (
|
|
621
|
+
config !== null &&
|
|
622
|
+
(config.prometheus.enabled || config.openTelemetry.enabled || config.datadog.enabled)
|
|
623
|
+
);
|
|
624
|
+
},
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Clear metrics for a specific worker
|
|
628
|
+
*/
|
|
629
|
+
async clearWorkerMetrics(workerName: string): Promise<void> {
|
|
630
|
+
if (config?.prometheus.enabled !== true) {
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
const metrics = Array.from(promMetrics.values());
|
|
635
|
+
|
|
636
|
+
await Promise.all(
|
|
637
|
+
metrics.map(async (metric) => {
|
|
638
|
+
try {
|
|
639
|
+
// Access internal values to find matching labels
|
|
640
|
+
// This relies on prom-client get() method returning values with labels
|
|
641
|
+
const item = await metric.get();
|
|
642
|
+
const values = item.values ?? [];
|
|
643
|
+
|
|
644
|
+
for (const val of values) {
|
|
645
|
+
const labels = val.labels;
|
|
646
|
+
if (labels?.['worker'] === workerName) {
|
|
647
|
+
metric.remove(labels);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
} catch (err) {
|
|
651
|
+
Logger.debug('Failed to clear worker metric labels', err as Error);
|
|
652
|
+
}
|
|
653
|
+
})
|
|
654
|
+
);
|
|
655
|
+
|
|
656
|
+
Logger.debug(`Cleared metrics for worker: ${workerName}`);
|
|
657
|
+
},
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Shutdown
|
|
661
|
+
*/
|
|
662
|
+
shutdown(): void {
|
|
663
|
+
Logger.info('Observability shutting down...');
|
|
664
|
+
|
|
665
|
+
// Close Datadog client
|
|
666
|
+
if (datadogClient !== null) {
|
|
667
|
+
datadogClient.close(() => {
|
|
668
|
+
Logger.debug('Datadog client closed');
|
|
669
|
+
});
|
|
670
|
+
datadogClient = null;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// End all active spans
|
|
674
|
+
for (const [spanId, entry] of activeSpans.entries()) {
|
|
675
|
+
entry.span.end();
|
|
676
|
+
Logger.debug(`Span force-ended: ${spanId}`);
|
|
677
|
+
}
|
|
678
|
+
activeSpans.clear();
|
|
679
|
+
|
|
680
|
+
if (spanSweepInterval) {
|
|
681
|
+
clearInterval(spanSweepInterval);
|
|
682
|
+
spanSweepInterval = null;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// Clear metrics
|
|
686
|
+
promMetrics.clear();
|
|
687
|
+
promRegistry = null;
|
|
688
|
+
promClient = null;
|
|
689
|
+
otelTracer = null;
|
|
690
|
+
config = null;
|
|
691
|
+
|
|
692
|
+
Logger.info('Observability shutdown complete');
|
|
693
|
+
},
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
// Graceful shutdown handled by WorkerShutdown
|