@zintrust/workers 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +861 -0
  2. package/dist/AnomalyDetection.d.ts +102 -0
  3. package/dist/AnomalyDetection.js +321 -0
  4. package/dist/AutoScaler.d.ts +127 -0
  5. package/dist/AutoScaler.js +425 -0
  6. package/dist/BroadcastWorker.d.ts +21 -0
  7. package/dist/BroadcastWorker.js +24 -0
  8. package/dist/CanaryController.d.ts +103 -0
  9. package/dist/CanaryController.js +380 -0
  10. package/dist/ChaosEngineering.d.ts +79 -0
  11. package/dist/ChaosEngineering.js +216 -0
  12. package/dist/CircuitBreaker.d.ts +106 -0
  13. package/dist/CircuitBreaker.js +374 -0
  14. package/dist/ClusterLock.d.ts +90 -0
  15. package/dist/ClusterLock.js +385 -0
  16. package/dist/ComplianceManager.d.ts +177 -0
  17. package/dist/ComplianceManager.js +556 -0
  18. package/dist/DatacenterOrchestrator.d.ts +133 -0
  19. package/dist/DatacenterOrchestrator.js +404 -0
  20. package/dist/DeadLetterQueue.d.ts +122 -0
  21. package/dist/DeadLetterQueue.js +539 -0
  22. package/dist/HealthMonitor.d.ts +42 -0
  23. package/dist/HealthMonitor.js +301 -0
  24. package/dist/MultiQueueWorker.d.ts +89 -0
  25. package/dist/MultiQueueWorker.js +277 -0
  26. package/dist/NotificationWorker.d.ts +21 -0
  27. package/dist/NotificationWorker.js +23 -0
  28. package/dist/Observability.d.ts +153 -0
  29. package/dist/Observability.js +530 -0
  30. package/dist/PluginManager.d.ts +123 -0
  31. package/dist/PluginManager.js +392 -0
  32. package/dist/PriorityQueue.d.ts +117 -0
  33. package/dist/PriorityQueue.js +244 -0
  34. package/dist/ResourceMonitor.d.ts +164 -0
  35. package/dist/ResourceMonitor.js +605 -0
  36. package/dist/SLAMonitor.d.ts +110 -0
  37. package/dist/SLAMonitor.js +274 -0
  38. package/dist/WorkerFactory.d.ts +193 -0
  39. package/dist/WorkerFactory.js +1507 -0
  40. package/dist/WorkerInit.d.ts +85 -0
  41. package/dist/WorkerInit.js +223 -0
  42. package/dist/WorkerMetrics.d.ts +114 -0
  43. package/dist/WorkerMetrics.js +509 -0
  44. package/dist/WorkerRegistry.d.ts +145 -0
  45. package/dist/WorkerRegistry.js +319 -0
  46. package/dist/WorkerShutdown.d.ts +61 -0
  47. package/dist/WorkerShutdown.js +159 -0
  48. package/dist/WorkerVersioning.d.ts +107 -0
  49. package/dist/WorkerVersioning.js +300 -0
  50. package/dist/build-manifest.json +462 -0
  51. package/dist/config/workerConfig.d.ts +3 -0
  52. package/dist/config/workerConfig.js +19 -0
  53. package/dist/createQueueWorker.d.ts +23 -0
  54. package/dist/createQueueWorker.js +113 -0
  55. package/dist/dashboard/index.d.ts +1 -0
  56. package/dist/dashboard/index.js +1 -0
  57. package/dist/dashboard/types.d.ts +117 -0
  58. package/dist/dashboard/types.js +1 -0
  59. package/dist/dashboard/workers-api.d.ts +4 -0
  60. package/dist/dashboard/workers-api.js +638 -0
  61. package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
  62. package/dist/dashboard/workers-dashboard-ui.js +1026 -0
  63. package/dist/dashboard/workers-dashboard.d.ts +4 -0
  64. package/dist/dashboard/workers-dashboard.js +904 -0
  65. package/dist/helper/index.d.ts +5 -0
  66. package/dist/helper/index.js +10 -0
  67. package/dist/http/WorkerApiController.d.ts +38 -0
  68. package/dist/http/WorkerApiController.js +312 -0
  69. package/dist/http/WorkerController.d.ts +374 -0
  70. package/dist/http/WorkerController.js +1351 -0
  71. package/dist/http/middleware/CustomValidation.d.ts +92 -0
  72. package/dist/http/middleware/CustomValidation.js +270 -0
  73. package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
  74. package/dist/http/middleware/DatacenterValidator.js +94 -0
  75. package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
  76. package/dist/http/middleware/EditWorkerValidation.js +55 -0
  77. package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
  78. package/dist/http/middleware/FeaturesValidator.js +60 -0
  79. package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
  80. package/dist/http/middleware/InfrastructureValidator.js +226 -0
  81. package/dist/http/middleware/OptionsValidator.d.ts +3 -0
  82. package/dist/http/middleware/OptionsValidator.js +112 -0
  83. package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
  84. package/dist/http/middleware/PayloadSanitizer.js +42 -0
  85. package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
  86. package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
  87. package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
  88. package/dist/http/middleware/QueueNameSanitizer.js +45 -0
  89. package/dist/http/middleware/ValidateDriver.d.ts +7 -0
  90. package/dist/http/middleware/ValidateDriver.js +20 -0
  91. package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
  92. package/dist/http/middleware/VersionSanitizer.js +25 -0
  93. package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
  94. package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
  95. package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
  96. package/dist/http/middleware/WorkerValidationChain.js +185 -0
  97. package/dist/index.d.ts +46 -0
  98. package/dist/index.js +48 -0
  99. package/dist/routes/workers.d.ts +12 -0
  100. package/dist/routes/workers.js +81 -0
  101. package/dist/storage/WorkerStore.d.ts +45 -0
  102. package/dist/storage/WorkerStore.js +195 -0
  103. package/dist/type.d.ts +76 -0
  104. package/dist/type.js +1 -0
  105. package/dist/ui/router/ui.d.ts +3 -0
  106. package/dist/ui/router/ui.js +83 -0
  107. package/dist/ui/types/worker-ui.d.ts +229 -0
  108. package/dist/ui/types/worker-ui.js +5 -0
  109. package/package.json +53 -0
  110. package/src/AnomalyDetection.ts +434 -0
  111. package/src/AutoScaler.ts +654 -0
  112. package/src/BroadcastWorker.ts +34 -0
  113. package/src/CanaryController.ts +531 -0
  114. package/src/ChaosEngineering.ts +301 -0
  115. package/src/CircuitBreaker.ts +495 -0
  116. package/src/ClusterLock.ts +499 -0
  117. package/src/ComplianceManager.ts +815 -0
  118. package/src/DatacenterOrchestrator.ts +561 -0
  119. package/src/DeadLetterQueue.ts +733 -0
  120. package/src/HealthMonitor.ts +390 -0
  121. package/src/MultiQueueWorker.ts +431 -0
  122. package/src/NotificationWorker.ts +33 -0
  123. package/src/Observability.ts +696 -0
  124. package/src/PluginManager.ts +551 -0
  125. package/src/PriorityQueue.ts +351 -0
  126. package/src/ResourceMonitor.ts +769 -0
  127. package/src/SLAMonitor.ts +408 -0
  128. package/src/WorkerFactory.ts +2108 -0
  129. package/src/WorkerInit.ts +313 -0
  130. package/src/WorkerMetrics.ts +709 -0
  131. package/src/WorkerRegistry.ts +443 -0
  132. package/src/WorkerShutdown.ts +210 -0
  133. package/src/WorkerVersioning.ts +422 -0
  134. package/src/config/workerConfig.ts +25 -0
  135. package/src/createQueueWorker.ts +174 -0
  136. package/src/dashboard/index.ts +6 -0
  137. package/src/dashboard/types.ts +141 -0
  138. package/src/dashboard/workers-api.ts +785 -0
  139. package/src/dashboard/zintrust.svg +30 -0
  140. package/src/helper/index.ts +11 -0
  141. package/src/http/WorkerApiController.ts +369 -0
  142. package/src/http/WorkerController.ts +1512 -0
  143. package/src/http/middleware/CustomValidation.ts +360 -0
  144. package/src/http/middleware/DatacenterValidator.ts +124 -0
  145. package/src/http/middleware/EditWorkerValidation.ts +74 -0
  146. package/src/http/middleware/FeaturesValidator.ts +82 -0
  147. package/src/http/middleware/InfrastructureValidator.ts +295 -0
  148. package/src/http/middleware/OptionsValidator.ts +144 -0
  149. package/src/http/middleware/PayloadSanitizer.ts +52 -0
  150. package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
  151. package/src/http/middleware/QueueNameSanitizer.ts +55 -0
  152. package/src/http/middleware/ValidateDriver.ts +29 -0
  153. package/src/http/middleware/VersionSanitizer.ts +30 -0
  154. package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
  155. package/src/http/middleware/WorkerValidationChain.ts +230 -0
  156. package/src/index.ts +98 -0
  157. package/src/routes/workers.ts +154 -0
  158. package/src/storage/WorkerStore.ts +240 -0
  159. package/src/type.ts +89 -0
  160. package/src/types/queue-monitor.d.ts +38 -0
  161. package/src/types/queue-redis.d.ts +38 -0
  162. package/src/ui/README.md +13 -0
  163. package/src/ui/components/JsonEditor.js +670 -0
  164. package/src/ui/components/JsonViewer.js +387 -0
  165. package/src/ui/components/WorkerCard.js +178 -0
  166. package/src/ui/components/WorkerExpandPanel.js +257 -0
  167. package/src/ui/components/fetcher.js +42 -0
  168. package/src/ui/components/sla-scorecard.js +32 -0
  169. package/src/ui/components/styles.css +30 -0
  170. package/src/ui/components/table-expander.js +34 -0
  171. package/src/ui/integration/worker-ui-integration.js +565 -0
  172. package/src/ui/router/ui.ts +99 -0
  173. package/src/ui/services/workerApi.js +240 -0
  174. package/src/ui/types/worker-ui.ts +283 -0
  175. package/src/ui/utils/jsonValidator.js +444 -0
  176. package/src/ui/workers/index.html +202 -0
  177. package/src/ui/workers/main.js +1781 -0
  178. package/src/ui/workers/styles.css +1350 -0
@@ -0,0 +1,696 @@
1
+ /**
2
+ * Observability Manager
3
+ * Integrations for Prometheus, OpenTelemetry, and Datadog
4
+ * Sealed namespace for immutability
5
+ */
6
+
7
+ import type { Span, SpanOptions, Tracer } from '@opentelemetry/api';
8
+ import { ErrorFactory, Logger, generateUuid } from '@zintrust/core';
9
+ import type { Counter, Gauge, Histogram, Registry, Summary } from 'prom-client';
10
+
11
+ export type ObservabilityConfig = {
12
+ prometheus: {
13
+ enabled: boolean;
14
+ port?: number;
15
+ path?: string;
16
+ defaultLabels?: Record<string, string>;
17
+ };
18
+ openTelemetry: {
19
+ enabled: boolean;
20
+ serviceName: string;
21
+ exporterUrl?: string;
22
+ sampleRate?: number; // 0-1, percentage of traces to sample
23
+ };
24
+ datadog: {
25
+ enabled: boolean;
26
+ host?: string;
27
+ port?: number;
28
+ prefix?: string;
29
+ tags?: string[];
30
+ };
31
+ };
32
+
33
+ export type MetricType = 'counter' | 'gauge' | 'histogram' | 'summary';
34
+
35
+ export type MetricDefinition = {
36
+ name: string;
37
+ type: MetricType;
38
+ help: string;
39
+ labels?: string[];
40
+ };
41
+
42
+ export type TraceContext = {
43
+ traceId: string;
44
+ spanId: string;
45
+ parentSpanId?: string;
46
+ baggage?: Record<string, string>;
47
+ };
48
+
49
+ export type SpanAttributes = Record<string, string | number | boolean>;
50
+
51
+ // Internal state
52
+ let config: ObservabilityConfig | null = null;
53
+
54
+ // Prometheus state
55
+ let promClient: typeof import('prom-client') | null = null;
56
+ let promRegistry: Registry | null = null;
57
+ const promMetrics: Map<string, Counter | Gauge | Histogram | Summary> = new Map();
58
+
59
+ // OpenTelemetry state
60
+ let otelTracer: Tracer | null = null;
61
+ const activeSpans: Map<string, { span: Span; startedAt: number }> = new Map();
62
+ let spanSweepInterval: NodeJS.Timeout | null = null;
63
+
64
+ const MAX_ACTIVE_SPANS = 1000;
65
+ const SPAN_TTL_MS = 5 * 60 * 1000;
66
+
67
+ const cleanupStaleSpans = (): void => {
68
+ const now = Date.now();
69
+ for (const [spanId, entry] of activeSpans.entries()) {
70
+ if (now - entry.startedAt > SPAN_TTL_MS) {
71
+ entry.span.end();
72
+ activeSpans.delete(spanId);
73
+ }
74
+ }
75
+ };
76
+
77
+ const evictOldestSpan = (): void => {
78
+ if (activeSpans.size < MAX_ACTIVE_SPANS) return;
79
+
80
+ let oldestId: string | null = null;
81
+ let oldestTime = Number.POSITIVE_INFINITY;
82
+
83
+ for (const [spanId, entry] of activeSpans.entries()) {
84
+ if (entry.startedAt < oldestTime) {
85
+ oldestTime = entry.startedAt;
86
+ oldestId = spanId;
87
+ }
88
+ }
89
+
90
+ if (oldestId !== null) {
91
+ const entry = activeSpans.get(oldestId);
92
+ if (entry) {
93
+ entry.span.end();
94
+ }
95
+ activeSpans.delete(oldestId);
96
+ }
97
+ };
98
+
99
+ type DatadogClient = {
100
+ increment: (name: string, value?: number, tags?: string[]) => void;
101
+ gauge: (name: string, value: number, tags?: string[]) => void;
102
+ histogram: (name: string, value: number, tags?: string[]) => void;
103
+ timing: (name: string, value: number, tags?: string[]) => void;
104
+ close: (callback?: () => void) => void;
105
+ };
106
+
107
+ type DatadogClientConstructor = new (options: {
108
+ host?: string;
109
+ port?: number;
110
+ prefix?: string;
111
+ globalTags?: string[];
112
+ }) => DatadogClient;
113
+
114
+ // Datadog state
115
+ let datadogClient: DatadogClient | null = null;
116
+
117
+ /**
118
+ * Helper: Lazy load Prometheus client
119
+ */
120
+ const getPrometheusClient = async (): Promise<typeof import('prom-client')> => {
121
+ promClient ??= await import('prom-client');
122
+ return promClient;
123
+ };
124
+
125
+ /**
126
+ * Helper: Lazy load OpenTelemetry API
127
+ */
128
+ const getOpenTelemetryApi = async (): Promise<typeof import('@opentelemetry/api')> => {
129
+ return import('@opentelemetry/api');
130
+ };
131
+
132
+ /**
133
+ * Helper: Initialize Prometheus
134
+ */
135
+ const initPrometheus = async (promConfig: ObservabilityConfig['prometheus']): Promise<void> => {
136
+ if (!promConfig.enabled) return;
137
+
138
+ try {
139
+ const client = await getPrometheusClient();
140
+ promRegistry = new client.Registry();
141
+
142
+ // Set default labels if provided
143
+ if (promConfig.defaultLabels) {
144
+ promRegistry.setDefaultLabels(promConfig.defaultLabels);
145
+ }
146
+
147
+ // Enable default metrics (process, Node.js metrics)
148
+ client.collectDefaultMetrics({ register: promRegistry });
149
+
150
+ Logger.info('Prometheus metrics initialized', {
151
+ port: promConfig.port,
152
+ path: promConfig.path ?? '/metrics',
153
+ });
154
+ } catch (error) {
155
+ Logger.error('Failed to initialize Prometheus', error);
156
+ throw error;
157
+ }
158
+ };
159
+
160
+ /**
161
+ * Helper: Initialize OpenTelemetry
162
+ */
163
+ const initOpenTelemetry = async (
164
+ otelConfig: ObservabilityConfig['openTelemetry']
165
+ ): Promise<void> => {
166
+ if (!otelConfig.enabled) return;
167
+
168
+ try {
169
+ const api = await getOpenTelemetryApi();
170
+ otelTracer = api.trace.getTracer(otelConfig.serviceName);
171
+
172
+ Logger.info('OpenTelemetry tracing initialized', {
173
+ serviceName: otelConfig.serviceName,
174
+ sampleRate: otelConfig.sampleRate ?? 1,
175
+ });
176
+ } catch (error) {
177
+ Logger.error('Failed to initialize OpenTelemetry', error);
178
+ // Don't throw - allow app to continue without tracing
179
+ }
180
+ };
181
+
182
+ /**
183
+ * Helper: Initialize Datadog
184
+ */
185
+ const initDatadog = async (ddConfig: ObservabilityConfig['datadog']): Promise<void> => {
186
+ if (!ddConfig.enabled) return;
187
+
188
+ try {
189
+ const module = (await import('hot-shots')) as unknown as { StatsD?: DatadogClientConstructor };
190
+ const StatsDClass = module.StatsD;
191
+
192
+ if (!StatsDClass) {
193
+ Logger.warn('Datadog StatsD client unavailable');
194
+ return;
195
+ }
196
+
197
+ datadogClient = new StatsDClass({
198
+ host: ddConfig.host ?? 'localhost',
199
+ port: ddConfig.port ?? 8125,
200
+ prefix: ddConfig.prefix ?? 'worker.',
201
+ globalTags: ddConfig.tags ?? [],
202
+ });
203
+
204
+ Logger.info('Datadog StatsD initialized', {
205
+ host: ddConfig.host ?? 'localhost',
206
+ port: ddConfig.port ?? 8125,
207
+ });
208
+ } catch (error) {
209
+ Logger.error('Failed to initialize Datadog', error);
210
+ // Don't throw - allow app to continue without Datadog
211
+ }
212
+ };
213
+
214
+ /**
215
+ * Observability Manager - Sealed namespace
216
+ */
217
+ export const Observability = Object.freeze({
218
+ /**
219
+ * Initialize observability with configuration
220
+ */
221
+ async initialize(observabilityConfig: ObservabilityConfig): Promise<void> {
222
+ if (config) {
223
+ Logger.warn('Observability already initialized');
224
+ return;
225
+ }
226
+
227
+ config = observabilityConfig;
228
+
229
+ // Initialize all enabled platforms
230
+ await Promise.all([
231
+ initPrometheus(config.prometheus),
232
+ initOpenTelemetry(config.openTelemetry),
233
+ initDatadog(config.datadog),
234
+ ]);
235
+
236
+ if (config.openTelemetry.enabled === true && spanSweepInterval === null) {
237
+ spanSweepInterval = setInterval(() => {
238
+ cleanupStaleSpans();
239
+ }, SPAN_TTL_MS);
240
+ }
241
+
242
+ Logger.info('Observability initialized', {
243
+ prometheus: config.prometheus.enabled,
244
+ openTelemetry: config.openTelemetry.enabled,
245
+ datadog: config.datadog.enabled,
246
+ });
247
+ },
248
+
249
+ /**
250
+ * Register a metric
251
+ */
252
+ async registerMetric(definition: MetricDefinition): Promise<void> {
253
+ if (config?.prometheus.enabled !== true || !promRegistry) {
254
+ return;
255
+ }
256
+
257
+ if (promMetrics.has(definition.name)) {
258
+ Logger.debug(`Metric already registered: ${definition.name}`);
259
+ return;
260
+ }
261
+
262
+ const client = await getPrometheusClient();
263
+
264
+ let metric: Counter | Gauge | Histogram | Summary;
265
+
266
+ switch (definition.type) {
267
+ case 'counter':
268
+ metric = new client.Counter({
269
+ name: definition.name,
270
+ help: definition.help,
271
+ labelNames: definition.labels ?? [],
272
+ registers: [promRegistry],
273
+ });
274
+ break;
275
+
276
+ case 'gauge':
277
+ metric = new client.Gauge({
278
+ name: definition.name,
279
+ help: definition.help,
280
+ labelNames: definition.labels ?? [],
281
+ registers: [promRegistry],
282
+ });
283
+ break;
284
+
285
+ case 'histogram':
286
+ metric = new client.Histogram({
287
+ name: definition.name,
288
+ help: definition.help,
289
+ labelNames: definition.labels ?? [],
290
+ registers: [promRegistry],
291
+ });
292
+ break;
293
+
294
+ case 'summary':
295
+ metric = new client.Summary({
296
+ name: definition.name,
297
+ help: definition.help,
298
+ labelNames: definition.labels ?? [],
299
+ registers: [promRegistry],
300
+ });
301
+ break;
302
+ }
303
+
304
+ promMetrics.set(definition.name, metric);
305
+
306
+ Logger.debug(`Metric registered: ${definition.name} (${definition.type})`);
307
+ },
308
+
309
+ /**
310
+ * Increment a counter
311
+ */
312
+ incrementCounter(name: string, value = 1, labels?: Record<string, string>): void {
313
+ // Prometheus
314
+ if (config?.prometheus.enabled === true && promMetrics.has(name)) {
315
+ const metric = promMetrics.get(name) as Counter;
316
+ if (labels) {
317
+ metric.inc(labels, value);
318
+ } else {
319
+ metric.inc(value);
320
+ }
321
+ }
322
+
323
+ // Datadog
324
+ if (config?.datadog.enabled === true && datadogClient !== null) {
325
+ const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
326
+ datadogClient.increment(name, value, tags);
327
+ }
328
+ },
329
+
330
+ /**
331
+ * Set a gauge value
332
+ */
333
+ setGauge(name: string, value: number, labels?: Record<string, string>): void {
334
+ // Prometheus
335
+ if (config?.prometheus.enabled === true && promMetrics.has(name)) {
336
+ const metric = promMetrics.get(name) as Gauge;
337
+ if (labels) {
338
+ metric.set(labels, value);
339
+ } else {
340
+ metric.set(value);
341
+ }
342
+ }
343
+
344
+ // Datadog
345
+ if (config?.datadog.enabled === true && datadogClient !== null) {
346
+ const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
347
+ datadogClient.gauge(name, value, tags);
348
+ }
349
+ },
350
+
351
+ /**
352
+ * Record a histogram observation
353
+ */
354
+ recordHistogram(name: string, value: number, labels?: Record<string, string>): void {
355
+ // Prometheus
356
+ if (config?.prometheus.enabled === true && promMetrics.has(name)) {
357
+ const metric = promMetrics.get(name) as Histogram;
358
+ if (labels) {
359
+ metric.observe(labels, value);
360
+ } else {
361
+ metric.observe(value);
362
+ }
363
+ }
364
+
365
+ // Datadog
366
+ if (config?.datadog.enabled === true && datadogClient !== null) {
367
+ const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
368
+ datadogClient.histogram(name, value, tags);
369
+ }
370
+ },
371
+
372
+ /**
373
+ * Record timing (histogram for duration)
374
+ */
375
+ recordTiming(name: string, durationMs: number, labels?: Record<string, string>): void {
376
+ // Prometheus (convert to seconds)
377
+ if (config?.prometheus.enabled === true && promMetrics.has(name)) {
378
+ const metric = promMetrics.get(name) as Histogram;
379
+ if (labels) {
380
+ metric.observe(labels, durationMs / 1000);
381
+ } else {
382
+ metric.observe(durationMs / 1000);
383
+ }
384
+ }
385
+
386
+ // Datadog (milliseconds)
387
+ if (config?.datadog.enabled === true && datadogClient !== null) {
388
+ const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
389
+ datadogClient.timing(name, durationMs, tags);
390
+ }
391
+ },
392
+
393
+ /**
394
+ * Start a span (OpenTelemetry)
395
+ */
396
+ startSpan(
397
+ name: string,
398
+ options?: { attributes?: SpanAttributes; parentSpanId?: string }
399
+ ): string | null {
400
+ if (config?.openTelemetry.enabled !== true || otelTracer === null) {
401
+ return null;
402
+ }
403
+
404
+ try {
405
+ const spanOptions: SpanOptions = {
406
+ attributes: options?.attributes ?? {},
407
+ };
408
+
409
+ const span = otelTracer.startSpan(name, spanOptions);
410
+
411
+ const spanId = `${name}-${Date.now()}-${generateUuid()}`;
412
+
413
+ evictOldestSpan();
414
+ activeSpans.set(spanId, { span, startedAt: Date.now() });
415
+
416
+ Logger.debug(`Span started: ${name}`, { spanId });
417
+
418
+ return spanId;
419
+ } catch (error) {
420
+ Logger.error('Failed to start span', error);
421
+ return null;
422
+ }
423
+ },
424
+
425
+ /**
426
+ * End a span
427
+ */
428
+ endSpan(spanId: string, attributes?: SpanAttributes): void {
429
+ if (config?.openTelemetry.enabled !== true) {
430
+ return;
431
+ }
432
+
433
+ try {
434
+ const entry = activeSpans.get(spanId);
435
+ if (!entry) return;
436
+
437
+ if (attributes) {
438
+ entry.span.setAttributes(attributes);
439
+ }
440
+
441
+ entry.span.end();
442
+ activeSpans.delete(spanId);
443
+
444
+ Logger.debug(`Span ended: ${spanId}`);
445
+ } catch (error) {
446
+ Logger.error('Failed to end span', error);
447
+ }
448
+ },
449
+
450
+ /**
451
+ * Record an error on a span
452
+ */
453
+ recordSpanError(spanId: string, error: Error): void {
454
+ if (config?.openTelemetry.enabled !== true) {
455
+ return;
456
+ }
457
+
458
+ try {
459
+ const entry = activeSpans.get(spanId);
460
+ if (!entry) return;
461
+ entry.span.recordException(error);
462
+ entry.span.setStatus({ code: 2, message: error.message }); // ERROR status
463
+
464
+ Logger.debug(`Span error recorded: ${spanId}`, { error: error.message });
465
+ } catch (err) {
466
+ Logger.error('Failed to record span error', err);
467
+ }
468
+ },
469
+
470
+ /**
471
+ * Add event to span
472
+ */
473
+ addSpanEvent(spanId: string, name: string, attributes?: SpanAttributes): void {
474
+ if (config?.openTelemetry.enabled !== true) {
475
+ return;
476
+ }
477
+
478
+ try {
479
+ const entry = activeSpans.get(spanId);
480
+ if (!entry) return;
481
+ entry.span.addEvent(name, attributes);
482
+
483
+ Logger.debug(`Span event added: ${spanId}/${name}`);
484
+ } catch (error) {
485
+ Logger.error('Failed to add span event', error);
486
+ }
487
+ },
488
+
489
+ /**
490
+ * Get Prometheus metrics (for HTTP endpoint)
491
+ */
492
+ async getPrometheusMetrics(): Promise<string> {
493
+ if (config?.prometheus.enabled !== true || !promRegistry) {
494
+ throw ErrorFactory.createGeneralError('Prometheus metrics not enabled');
495
+ }
496
+
497
+ return promRegistry.metrics();
498
+ },
499
+
500
+ /**
501
+ * Get Prometheus registry (for advanced usage)
502
+ */
503
+ getPrometheusRegistry(): Registry | null {
504
+ return promRegistry;
505
+ },
506
+
507
+ /**
508
+ * Get Datadog client (for advanced usage)
509
+ */
510
+ getDatadogClient(): DatadogClient | null {
511
+ return datadogClient;
512
+ },
513
+
514
+ /**
515
+ * Get OpenTelemetry tracer (for advanced usage)
516
+ */
517
+ getTracer(): Tracer | null {
518
+ return otelTracer;
519
+ },
520
+
521
+ /**
522
+ * Record worker job metrics
523
+ */
524
+ recordJobMetrics(
525
+ workerName: string,
526
+ jobName: string,
527
+ metrics: {
528
+ processed?: number;
529
+ failed?: number;
530
+ durationMs?: number;
531
+ queueSize?: number;
532
+ }
533
+ ): void {
534
+ const labels = { worker: workerName, job: jobName };
535
+
536
+ if (metrics.processed !== undefined) {
537
+ Observability.incrementCounter('worker_jobs_processed_total', metrics.processed, labels);
538
+ }
539
+
540
+ if (metrics.failed !== undefined) {
541
+ Observability.incrementCounter('worker_jobs_failed_total', metrics.failed, labels);
542
+ }
543
+
544
+ if (metrics.durationMs !== undefined) {
545
+ Observability.recordTiming('worker_job_duration_seconds', metrics.durationMs, labels);
546
+ }
547
+
548
+ if (metrics.queueSize !== undefined) {
549
+ Observability.setGauge('worker_queue_size', metrics.queueSize, labels);
550
+ }
551
+ },
552
+
553
+ /**
554
+ * Record worker resource metrics
555
+ */
556
+ recordResourceMetrics(
557
+ workerName: string,
558
+ resources: {
559
+ cpuUsage?: number;
560
+ memoryUsage?: number;
561
+ activeJobs?: number;
562
+ }
563
+ ): void {
564
+ const labels = { worker: workerName };
565
+
566
+ if (resources.cpuUsage !== undefined) {
567
+ Observability.setGauge('worker_cpu_usage_percent', resources.cpuUsage, labels);
568
+ }
569
+
570
+ if (resources.memoryUsage !== undefined) {
571
+ Observability.setGauge('worker_memory_usage_bytes', resources.memoryUsage, labels);
572
+ }
573
+
574
+ if (resources.activeJobs !== undefined) {
575
+ Observability.setGauge('worker_active_jobs', resources.activeJobs, labels);
576
+ }
577
+ },
578
+
579
+ /**
580
+ * Create a traced function wrapper
581
+ */
582
+ traced<T extends (...args: unknown[]) => unknown>(
583
+ name: string,
584
+ fn: T,
585
+ options?: { attributes?: SpanAttributes }
586
+ ): T {
587
+ return (async (...args: Parameters<T>): Promise<ReturnType<T>> => {
588
+ const spanId = Observability.startSpan(name, options);
589
+
590
+ try {
591
+ const result = await fn(...args);
592
+
593
+ if (spanId !== null) {
594
+ Observability.endSpan(spanId, { success: true });
595
+ }
596
+
597
+ return result as ReturnType<T>;
598
+ } catch (error) {
599
+ if (spanId !== null) {
600
+ Observability.recordSpanError(spanId, error as Error);
601
+ Observability.endSpan(spanId, { success: false });
602
+ }
603
+
604
+ throw error;
605
+ }
606
+ }) as T;
607
+ },
608
+
609
+ /**
610
+ * Get configuration
611
+ */
612
+ getConfig(): ObservabilityConfig | null {
613
+ return config ? { ...config } : null;
614
+ },
615
+
616
+ /**
617
+ * Check if observability is enabled
618
+ */
619
+ isEnabled(): boolean {
620
+ return (
621
+ config !== null &&
622
+ (config.prometheus.enabled || config.openTelemetry.enabled || config.datadog.enabled)
623
+ );
624
+ },
625
+
626
+ /**
627
+ * Clear metrics for a specific worker
628
+ */
629
+ async clearWorkerMetrics(workerName: string): Promise<void> {
630
+ if (config?.prometheus.enabled !== true) {
631
+ return;
632
+ }
633
+
634
+ const metrics = Array.from(promMetrics.values());
635
+
636
+ await Promise.all(
637
+ metrics.map(async (metric) => {
638
+ try {
639
+ // Access internal values to find matching labels
640
+ // This relies on prom-client get() method returning values with labels
641
+ const item = await metric.get();
642
+ const values = item.values ?? [];
643
+
644
+ for (const val of values) {
645
+ const labels = val.labels;
646
+ if (labels?.['worker'] === workerName) {
647
+ metric.remove(labels);
648
+ }
649
+ }
650
+ } catch (err) {
651
+ Logger.debug('Failed to clear worker metric labels', err as Error);
652
+ }
653
+ })
654
+ );
655
+
656
+ Logger.debug(`Cleared metrics for worker: ${workerName}`);
657
+ },
658
+
659
+ /**
660
+ * Shutdown
661
+ */
662
+ shutdown(): void {
663
+ Logger.info('Observability shutting down...');
664
+
665
+ // Close Datadog client
666
+ if (datadogClient !== null) {
667
+ datadogClient.close(() => {
668
+ Logger.debug('Datadog client closed');
669
+ });
670
+ datadogClient = null;
671
+ }
672
+
673
+ // End all active spans
674
+ for (const [spanId, entry] of activeSpans.entries()) {
675
+ entry.span.end();
676
+ Logger.debug(`Span force-ended: ${spanId}`);
677
+ }
678
+ activeSpans.clear();
679
+
680
+ if (spanSweepInterval) {
681
+ clearInterval(spanSweepInterval);
682
+ spanSweepInterval = null;
683
+ }
684
+
685
+ // Clear metrics
686
+ promMetrics.clear();
687
+ promRegistry = null;
688
+ promClient = null;
689
+ otelTracer = null;
690
+ config = null;
691
+
692
+ Logger.info('Observability shutdown complete');
693
+ },
694
+ });
695
+
696
+ // Graceful shutdown handled by WorkerShutdown