@zintrust/workers 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +861 -0
- package/dist/AnomalyDetection.d.ts +102 -0
- package/dist/AnomalyDetection.js +321 -0
- package/dist/AutoScaler.d.ts +127 -0
- package/dist/AutoScaler.js +425 -0
- package/dist/BroadcastWorker.d.ts +21 -0
- package/dist/BroadcastWorker.js +24 -0
- package/dist/CanaryController.d.ts +103 -0
- package/dist/CanaryController.js +380 -0
- package/dist/ChaosEngineering.d.ts +79 -0
- package/dist/ChaosEngineering.js +216 -0
- package/dist/CircuitBreaker.d.ts +106 -0
- package/dist/CircuitBreaker.js +374 -0
- package/dist/ClusterLock.d.ts +90 -0
- package/dist/ClusterLock.js +385 -0
- package/dist/ComplianceManager.d.ts +177 -0
- package/dist/ComplianceManager.js +556 -0
- package/dist/DatacenterOrchestrator.d.ts +133 -0
- package/dist/DatacenterOrchestrator.js +404 -0
- package/dist/DeadLetterQueue.d.ts +122 -0
- package/dist/DeadLetterQueue.js +539 -0
- package/dist/HealthMonitor.d.ts +42 -0
- package/dist/HealthMonitor.js +301 -0
- package/dist/MultiQueueWorker.d.ts +89 -0
- package/dist/MultiQueueWorker.js +277 -0
- package/dist/NotificationWorker.d.ts +21 -0
- package/dist/NotificationWorker.js +23 -0
- package/dist/Observability.d.ts +153 -0
- package/dist/Observability.js +530 -0
- package/dist/PluginManager.d.ts +123 -0
- package/dist/PluginManager.js +392 -0
- package/dist/PriorityQueue.d.ts +117 -0
- package/dist/PriorityQueue.js +244 -0
- package/dist/ResourceMonitor.d.ts +164 -0
- package/dist/ResourceMonitor.js +605 -0
- package/dist/SLAMonitor.d.ts +110 -0
- package/dist/SLAMonitor.js +274 -0
- package/dist/WorkerFactory.d.ts +193 -0
- package/dist/WorkerFactory.js +1507 -0
- package/dist/WorkerInit.d.ts +85 -0
- package/dist/WorkerInit.js +223 -0
- package/dist/WorkerMetrics.d.ts +114 -0
- package/dist/WorkerMetrics.js +509 -0
- package/dist/WorkerRegistry.d.ts +145 -0
- package/dist/WorkerRegistry.js +319 -0
- package/dist/WorkerShutdown.d.ts +61 -0
- package/dist/WorkerShutdown.js +159 -0
- package/dist/WorkerVersioning.d.ts +107 -0
- package/dist/WorkerVersioning.js +300 -0
- package/dist/build-manifest.json +462 -0
- package/dist/config/workerConfig.d.ts +3 -0
- package/dist/config/workerConfig.js +19 -0
- package/dist/createQueueWorker.d.ts +23 -0
- package/dist/createQueueWorker.js +113 -0
- package/dist/dashboard/index.d.ts +1 -0
- package/dist/dashboard/index.js +1 -0
- package/dist/dashboard/types.d.ts +117 -0
- package/dist/dashboard/types.js +1 -0
- package/dist/dashboard/workers-api.d.ts +4 -0
- package/dist/dashboard/workers-api.js +638 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
- package/dist/dashboard/workers-dashboard-ui.js +1026 -0
- package/dist/dashboard/workers-dashboard.d.ts +4 -0
- package/dist/dashboard/workers-dashboard.js +904 -0
- package/dist/helper/index.d.ts +5 -0
- package/dist/helper/index.js +10 -0
- package/dist/http/WorkerApiController.d.ts +38 -0
- package/dist/http/WorkerApiController.js +312 -0
- package/dist/http/WorkerController.d.ts +374 -0
- package/dist/http/WorkerController.js +1351 -0
- package/dist/http/middleware/CustomValidation.d.ts +92 -0
- package/dist/http/middleware/CustomValidation.js +270 -0
- package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
- package/dist/http/middleware/DatacenterValidator.js +94 -0
- package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
- package/dist/http/middleware/EditWorkerValidation.js +55 -0
- package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
- package/dist/http/middleware/FeaturesValidator.js +60 -0
- package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
- package/dist/http/middleware/InfrastructureValidator.js +226 -0
- package/dist/http/middleware/OptionsValidator.d.ts +3 -0
- package/dist/http/middleware/OptionsValidator.js +112 -0
- package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
- package/dist/http/middleware/PayloadSanitizer.js +42 -0
- package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
- package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
- package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/QueueNameSanitizer.js +45 -0
- package/dist/http/middleware/ValidateDriver.d.ts +7 -0
- package/dist/http/middleware/ValidateDriver.js +20 -0
- package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
- package/dist/http/middleware/VersionSanitizer.js +25 -0
- package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
- package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
- package/dist/http/middleware/WorkerValidationChain.js +185 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.js +48 -0
- package/dist/routes/workers.d.ts +12 -0
- package/dist/routes/workers.js +81 -0
- package/dist/storage/WorkerStore.d.ts +45 -0
- package/dist/storage/WorkerStore.js +195 -0
- package/dist/type.d.ts +76 -0
- package/dist/type.js +1 -0
- package/dist/ui/router/ui.d.ts +3 -0
- package/dist/ui/router/ui.js +83 -0
- package/dist/ui/types/worker-ui.d.ts +229 -0
- package/dist/ui/types/worker-ui.js +5 -0
- package/package.json +53 -0
- package/src/AnomalyDetection.ts +434 -0
- package/src/AutoScaler.ts +654 -0
- package/src/BroadcastWorker.ts +34 -0
- package/src/CanaryController.ts +531 -0
- package/src/ChaosEngineering.ts +301 -0
- package/src/CircuitBreaker.ts +495 -0
- package/src/ClusterLock.ts +499 -0
- package/src/ComplianceManager.ts +815 -0
- package/src/DatacenterOrchestrator.ts +561 -0
- package/src/DeadLetterQueue.ts +733 -0
- package/src/HealthMonitor.ts +390 -0
- package/src/MultiQueueWorker.ts +431 -0
- package/src/NotificationWorker.ts +33 -0
- package/src/Observability.ts +696 -0
- package/src/PluginManager.ts +551 -0
- package/src/PriorityQueue.ts +351 -0
- package/src/ResourceMonitor.ts +769 -0
- package/src/SLAMonitor.ts +408 -0
- package/src/WorkerFactory.ts +2108 -0
- package/src/WorkerInit.ts +313 -0
- package/src/WorkerMetrics.ts +709 -0
- package/src/WorkerRegistry.ts +443 -0
- package/src/WorkerShutdown.ts +210 -0
- package/src/WorkerVersioning.ts +422 -0
- package/src/config/workerConfig.ts +25 -0
- package/src/createQueueWorker.ts +174 -0
- package/src/dashboard/index.ts +6 -0
- package/src/dashboard/types.ts +141 -0
- package/src/dashboard/workers-api.ts +785 -0
- package/src/dashboard/zintrust.svg +30 -0
- package/src/helper/index.ts +11 -0
- package/src/http/WorkerApiController.ts +369 -0
- package/src/http/WorkerController.ts +1512 -0
- package/src/http/middleware/CustomValidation.ts +360 -0
- package/src/http/middleware/DatacenterValidator.ts +124 -0
- package/src/http/middleware/EditWorkerValidation.ts +74 -0
- package/src/http/middleware/FeaturesValidator.ts +82 -0
- package/src/http/middleware/InfrastructureValidator.ts +295 -0
- package/src/http/middleware/OptionsValidator.ts +144 -0
- package/src/http/middleware/PayloadSanitizer.ts +52 -0
- package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
- package/src/http/middleware/QueueNameSanitizer.ts +55 -0
- package/src/http/middleware/ValidateDriver.ts +29 -0
- package/src/http/middleware/VersionSanitizer.ts +30 -0
- package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
- package/src/http/middleware/WorkerValidationChain.ts +230 -0
- package/src/index.ts +98 -0
- package/src/routes/workers.ts +154 -0
- package/src/storage/WorkerStore.ts +240 -0
- package/src/type.ts +89 -0
- package/src/types/queue-monitor.d.ts +38 -0
- package/src/types/queue-redis.d.ts +38 -0
- package/src/ui/README.md +13 -0
- package/src/ui/components/JsonEditor.js +670 -0
- package/src/ui/components/JsonViewer.js +387 -0
- package/src/ui/components/WorkerCard.js +178 -0
- package/src/ui/components/WorkerExpandPanel.js +257 -0
- package/src/ui/components/fetcher.js +42 -0
- package/src/ui/components/sla-scorecard.js +32 -0
- package/src/ui/components/styles.css +30 -0
- package/src/ui/components/table-expander.js +34 -0
- package/src/ui/integration/worker-ui-integration.js +565 -0
- package/src/ui/router/ui.ts +99 -0
- package/src/ui/services/workerApi.js +240 -0
- package/src/ui/types/worker-ui.ts +283 -0
- package/src/ui/utils/jsonValidator.js +444 -0
- package/src/ui/workers/index.html +202 -0
- package/src/ui/workers/main.js +1781 -0
- package/src/ui/workers/styles.css +1350 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NotificationWorker - Processes queued notifications
|
|
3
|
+
*
|
|
4
|
+
* This worker dequeues notification messages and sends them using the Notification service.
|
|
5
|
+
* Use with Queue.dequeue() in a background process or cron job.
|
|
6
|
+
*/
|
|
7
|
+
import { Notification } from '@zintrust/core';
|
|
8
|
+
import { createQueueWorker } from './createQueueWorker';
|
|
9
|
+
export const NotificationWorker = Object.freeze({
|
|
10
|
+
...createQueueWorker({
|
|
11
|
+
kindLabel: 'notification',
|
|
12
|
+
defaultQueueName: 'notifications',
|
|
13
|
+
maxAttempts: 3,
|
|
14
|
+
getLogFields: (payload) => ({
|
|
15
|
+
recipient: payload.recipient,
|
|
16
|
+
queuedAt: payload.timestamp,
|
|
17
|
+
}),
|
|
18
|
+
handle: async (payload) => {
|
|
19
|
+
await Notification.send(payload.recipient, payload.message, payload.options);
|
|
20
|
+
},
|
|
21
|
+
}),
|
|
22
|
+
});
|
|
23
|
+
export default NotificationWorker;
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observability Manager
|
|
3
|
+
* Integrations for Prometheus, OpenTelemetry, and Datadog
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
import type { Tracer } from '@opentelemetry/api';
|
|
7
|
+
import type { Registry } from 'prom-client';
|
|
8
|
+
export type ObservabilityConfig = {
|
|
9
|
+
prometheus: {
|
|
10
|
+
enabled: boolean;
|
|
11
|
+
port?: number;
|
|
12
|
+
path?: string;
|
|
13
|
+
defaultLabels?: Record<string, string>;
|
|
14
|
+
};
|
|
15
|
+
openTelemetry: {
|
|
16
|
+
enabled: boolean;
|
|
17
|
+
serviceName: string;
|
|
18
|
+
exporterUrl?: string;
|
|
19
|
+
sampleRate?: number;
|
|
20
|
+
};
|
|
21
|
+
datadog: {
|
|
22
|
+
enabled: boolean;
|
|
23
|
+
host?: string;
|
|
24
|
+
port?: number;
|
|
25
|
+
prefix?: string;
|
|
26
|
+
tags?: string[];
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
export type MetricType = 'counter' | 'gauge' | 'histogram' | 'summary';
|
|
30
|
+
export type MetricDefinition = {
|
|
31
|
+
name: string;
|
|
32
|
+
type: MetricType;
|
|
33
|
+
help: string;
|
|
34
|
+
labels?: string[];
|
|
35
|
+
};
|
|
36
|
+
export type TraceContext = {
|
|
37
|
+
traceId: string;
|
|
38
|
+
spanId: string;
|
|
39
|
+
parentSpanId?: string;
|
|
40
|
+
baggage?: Record<string, string>;
|
|
41
|
+
};
|
|
42
|
+
export type SpanAttributes = Record<string, string | number | boolean>;
|
|
43
|
+
type DatadogClient = {
|
|
44
|
+
increment: (name: string, value?: number, tags?: string[]) => void;
|
|
45
|
+
gauge: (name: string, value: number, tags?: string[]) => void;
|
|
46
|
+
histogram: (name: string, value: number, tags?: string[]) => void;
|
|
47
|
+
timing: (name: string, value: number, tags?: string[]) => void;
|
|
48
|
+
close: (callback?: () => void) => void;
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* Observability Manager - Sealed namespace
|
|
52
|
+
*/
|
|
53
|
+
export declare const Observability: Readonly<{
|
|
54
|
+
/**
|
|
55
|
+
* Initialize observability with configuration
|
|
56
|
+
*/
|
|
57
|
+
initialize(observabilityConfig: ObservabilityConfig): Promise<void>;
|
|
58
|
+
/**
|
|
59
|
+
* Register a metric
|
|
60
|
+
*/
|
|
61
|
+
registerMetric(definition: MetricDefinition): Promise<void>;
|
|
62
|
+
/**
|
|
63
|
+
* Increment a counter
|
|
64
|
+
*/
|
|
65
|
+
incrementCounter(name: string, value?: number, labels?: Record<string, string>): void;
|
|
66
|
+
/**
|
|
67
|
+
* Set a gauge value
|
|
68
|
+
*/
|
|
69
|
+
setGauge(name: string, value: number, labels?: Record<string, string>): void;
|
|
70
|
+
/**
|
|
71
|
+
* Record a histogram observation
|
|
72
|
+
*/
|
|
73
|
+
recordHistogram(name: string, value: number, labels?: Record<string, string>): void;
|
|
74
|
+
/**
|
|
75
|
+
* Record timing (histogram for duration)
|
|
76
|
+
*/
|
|
77
|
+
recordTiming(name: string, durationMs: number, labels?: Record<string, string>): void;
|
|
78
|
+
/**
|
|
79
|
+
* Start a span (OpenTelemetry)
|
|
80
|
+
*/
|
|
81
|
+
startSpan(name: string, options?: {
|
|
82
|
+
attributes?: SpanAttributes;
|
|
83
|
+
parentSpanId?: string;
|
|
84
|
+
}): string | null;
|
|
85
|
+
/**
|
|
86
|
+
* End a span
|
|
87
|
+
*/
|
|
88
|
+
endSpan(spanId: string, attributes?: SpanAttributes): void;
|
|
89
|
+
/**
|
|
90
|
+
* Record an error on a span
|
|
91
|
+
*/
|
|
92
|
+
recordSpanError(spanId: string, error: Error): void;
|
|
93
|
+
/**
|
|
94
|
+
* Add event to span
|
|
95
|
+
*/
|
|
96
|
+
addSpanEvent(spanId: string, name: string, attributes?: SpanAttributes): void;
|
|
97
|
+
/**
|
|
98
|
+
* Get Prometheus metrics (for HTTP endpoint)
|
|
99
|
+
*/
|
|
100
|
+
getPrometheusMetrics(): Promise<string>;
|
|
101
|
+
/**
|
|
102
|
+
* Get Prometheus registry (for advanced usage)
|
|
103
|
+
*/
|
|
104
|
+
getPrometheusRegistry(): Registry | null;
|
|
105
|
+
/**
|
|
106
|
+
* Get Datadog client (for advanced usage)
|
|
107
|
+
*/
|
|
108
|
+
getDatadogClient(): DatadogClient | null;
|
|
109
|
+
/**
|
|
110
|
+
* Get OpenTelemetry tracer (for advanced usage)
|
|
111
|
+
*/
|
|
112
|
+
getTracer(): Tracer | null;
|
|
113
|
+
/**
|
|
114
|
+
* Record worker job metrics
|
|
115
|
+
*/
|
|
116
|
+
recordJobMetrics(workerName: string, jobName: string, metrics: {
|
|
117
|
+
processed?: number;
|
|
118
|
+
failed?: number;
|
|
119
|
+
durationMs?: number;
|
|
120
|
+
queueSize?: number;
|
|
121
|
+
}): void;
|
|
122
|
+
/**
|
|
123
|
+
* Record worker resource metrics
|
|
124
|
+
*/
|
|
125
|
+
recordResourceMetrics(workerName: string, resources: {
|
|
126
|
+
cpuUsage?: number;
|
|
127
|
+
memoryUsage?: number;
|
|
128
|
+
activeJobs?: number;
|
|
129
|
+
}): void;
|
|
130
|
+
/**
|
|
131
|
+
* Create a traced function wrapper
|
|
132
|
+
*/
|
|
133
|
+
traced<T extends (...args: unknown[]) => unknown>(name: string, fn: T, options?: {
|
|
134
|
+
attributes?: SpanAttributes;
|
|
135
|
+
}): T;
|
|
136
|
+
/**
|
|
137
|
+
* Get configuration
|
|
138
|
+
*/
|
|
139
|
+
getConfig(): ObservabilityConfig | null;
|
|
140
|
+
/**
|
|
141
|
+
* Check if observability is enabled
|
|
142
|
+
*/
|
|
143
|
+
isEnabled(): boolean;
|
|
144
|
+
/**
|
|
145
|
+
* Clear metrics for a specific worker
|
|
146
|
+
*/
|
|
147
|
+
clearWorkerMetrics(workerName: string): Promise<void>;
|
|
148
|
+
/**
|
|
149
|
+
* Shutdown
|
|
150
|
+
*/
|
|
151
|
+
shutdown(): void;
|
|
152
|
+
}>;
|
|
153
|
+
export {};
|
|
@@ -0,0 +1,530 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observability Manager
|
|
3
|
+
* Integrations for Prometheus, OpenTelemetry, and Datadog
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
import { ErrorFactory, Logger, generateUuid } from '@zintrust/core';
|
|
7
|
+
// Internal state
|
|
8
|
+
let config = null;
|
|
9
|
+
// Prometheus state
|
|
10
|
+
let promClient = null;
|
|
11
|
+
let promRegistry = null;
|
|
12
|
+
const promMetrics = new Map();
|
|
13
|
+
// OpenTelemetry state
|
|
14
|
+
let otelTracer = null;
|
|
15
|
+
const activeSpans = new Map();
|
|
16
|
+
let spanSweepInterval = null;
|
|
17
|
+
const MAX_ACTIVE_SPANS = 1000;
|
|
18
|
+
const SPAN_TTL_MS = 5 * 60 * 1000;
|
|
19
|
+
const cleanupStaleSpans = () => {
|
|
20
|
+
const now = Date.now();
|
|
21
|
+
for (const [spanId, entry] of activeSpans.entries()) {
|
|
22
|
+
if (now - entry.startedAt > SPAN_TTL_MS) {
|
|
23
|
+
entry.span.end();
|
|
24
|
+
activeSpans.delete(spanId);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
const evictOldestSpan = () => {
|
|
29
|
+
if (activeSpans.size < MAX_ACTIVE_SPANS)
|
|
30
|
+
return;
|
|
31
|
+
let oldestId = null;
|
|
32
|
+
let oldestTime = Number.POSITIVE_INFINITY;
|
|
33
|
+
for (const [spanId, entry] of activeSpans.entries()) {
|
|
34
|
+
if (entry.startedAt < oldestTime) {
|
|
35
|
+
oldestTime = entry.startedAt;
|
|
36
|
+
oldestId = spanId;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (oldestId !== null) {
|
|
40
|
+
const entry = activeSpans.get(oldestId);
|
|
41
|
+
if (entry) {
|
|
42
|
+
entry.span.end();
|
|
43
|
+
}
|
|
44
|
+
activeSpans.delete(oldestId);
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
// Datadog state
|
|
48
|
+
let datadogClient = null;
|
|
49
|
+
/**
|
|
50
|
+
* Helper: Lazy load Prometheus client
|
|
51
|
+
*/
|
|
52
|
+
const getPrometheusClient = async () => {
|
|
53
|
+
promClient ??= await import('prom-client');
|
|
54
|
+
return promClient;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Helper: Lazy load OpenTelemetry API
|
|
58
|
+
*/
|
|
59
|
+
const getOpenTelemetryApi = async () => {
|
|
60
|
+
return import('@opentelemetry/api');
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Helper: Initialize Prometheus
|
|
64
|
+
*/
|
|
65
|
+
const initPrometheus = async (promConfig) => {
|
|
66
|
+
if (!promConfig.enabled)
|
|
67
|
+
return;
|
|
68
|
+
try {
|
|
69
|
+
const client = await getPrometheusClient();
|
|
70
|
+
promRegistry = new client.Registry();
|
|
71
|
+
// Set default labels if provided
|
|
72
|
+
if (promConfig.defaultLabels) {
|
|
73
|
+
promRegistry.setDefaultLabels(promConfig.defaultLabels);
|
|
74
|
+
}
|
|
75
|
+
// Enable default metrics (process, Node.js metrics)
|
|
76
|
+
client.collectDefaultMetrics({ register: promRegistry });
|
|
77
|
+
Logger.info('Prometheus metrics initialized', {
|
|
78
|
+
port: promConfig.port,
|
|
79
|
+
path: promConfig.path ?? '/metrics',
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
Logger.error('Failed to initialize Prometheus', error);
|
|
84
|
+
throw error;
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
/**
|
|
88
|
+
* Helper: Initialize OpenTelemetry
|
|
89
|
+
*/
|
|
90
|
+
const initOpenTelemetry = async (otelConfig) => {
|
|
91
|
+
if (!otelConfig.enabled)
|
|
92
|
+
return;
|
|
93
|
+
try {
|
|
94
|
+
const api = await getOpenTelemetryApi();
|
|
95
|
+
otelTracer = api.trace.getTracer(otelConfig.serviceName);
|
|
96
|
+
Logger.info('OpenTelemetry tracing initialized', {
|
|
97
|
+
serviceName: otelConfig.serviceName,
|
|
98
|
+
sampleRate: otelConfig.sampleRate ?? 1,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
catch (error) {
|
|
102
|
+
Logger.error('Failed to initialize OpenTelemetry', error);
|
|
103
|
+
// Don't throw - allow app to continue without tracing
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
/**
|
|
107
|
+
* Helper: Initialize Datadog
|
|
108
|
+
*/
|
|
109
|
+
const initDatadog = async (ddConfig) => {
|
|
110
|
+
if (!ddConfig.enabled)
|
|
111
|
+
return;
|
|
112
|
+
try {
|
|
113
|
+
const module = (await import('hot-shots'));
|
|
114
|
+
const StatsDClass = module.StatsD;
|
|
115
|
+
if (!StatsDClass) {
|
|
116
|
+
Logger.warn('Datadog StatsD client unavailable');
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
datadogClient = new StatsDClass({
|
|
120
|
+
host: ddConfig.host ?? 'localhost',
|
|
121
|
+
port: ddConfig.port ?? 8125,
|
|
122
|
+
prefix: ddConfig.prefix ?? 'worker.',
|
|
123
|
+
globalTags: ddConfig.tags ?? [],
|
|
124
|
+
});
|
|
125
|
+
Logger.info('Datadog StatsD initialized', {
|
|
126
|
+
host: ddConfig.host ?? 'localhost',
|
|
127
|
+
port: ddConfig.port ?? 8125,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
Logger.error('Failed to initialize Datadog', error);
|
|
132
|
+
// Don't throw - allow app to continue without Datadog
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
/**
|
|
136
|
+
* Observability Manager - Sealed namespace
|
|
137
|
+
*/
|
|
138
|
+
export const Observability = Object.freeze({
|
|
139
|
+
/**
|
|
140
|
+
* Initialize observability with configuration
|
|
141
|
+
*/
|
|
142
|
+
async initialize(observabilityConfig) {
|
|
143
|
+
if (config) {
|
|
144
|
+
Logger.warn('Observability already initialized');
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
config = observabilityConfig;
|
|
148
|
+
// Initialize all enabled platforms
|
|
149
|
+
await Promise.all([
|
|
150
|
+
initPrometheus(config.prometheus),
|
|
151
|
+
initOpenTelemetry(config.openTelemetry),
|
|
152
|
+
initDatadog(config.datadog),
|
|
153
|
+
]);
|
|
154
|
+
if (config.openTelemetry.enabled === true && spanSweepInterval === null) {
|
|
155
|
+
spanSweepInterval = setInterval(() => {
|
|
156
|
+
cleanupStaleSpans();
|
|
157
|
+
}, SPAN_TTL_MS);
|
|
158
|
+
}
|
|
159
|
+
Logger.info('Observability initialized', {
|
|
160
|
+
prometheus: config.prometheus.enabled,
|
|
161
|
+
openTelemetry: config.openTelemetry.enabled,
|
|
162
|
+
datadog: config.datadog.enabled,
|
|
163
|
+
});
|
|
164
|
+
},
|
|
165
|
+
/**
|
|
166
|
+
* Register a metric
|
|
167
|
+
*/
|
|
168
|
+
async registerMetric(definition) {
|
|
169
|
+
if (config?.prometheus.enabled !== true || !promRegistry) {
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
if (promMetrics.has(definition.name)) {
|
|
173
|
+
Logger.debug(`Metric already registered: ${definition.name}`);
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
const client = await getPrometheusClient();
|
|
177
|
+
let metric;
|
|
178
|
+
switch (definition.type) {
|
|
179
|
+
case 'counter':
|
|
180
|
+
metric = new client.Counter({
|
|
181
|
+
name: definition.name,
|
|
182
|
+
help: definition.help,
|
|
183
|
+
labelNames: definition.labels ?? [],
|
|
184
|
+
registers: [promRegistry],
|
|
185
|
+
});
|
|
186
|
+
break;
|
|
187
|
+
case 'gauge':
|
|
188
|
+
metric = new client.Gauge({
|
|
189
|
+
name: definition.name,
|
|
190
|
+
help: definition.help,
|
|
191
|
+
labelNames: definition.labels ?? [],
|
|
192
|
+
registers: [promRegistry],
|
|
193
|
+
});
|
|
194
|
+
break;
|
|
195
|
+
case 'histogram':
|
|
196
|
+
metric = new client.Histogram({
|
|
197
|
+
name: definition.name,
|
|
198
|
+
help: definition.help,
|
|
199
|
+
labelNames: definition.labels ?? [],
|
|
200
|
+
registers: [promRegistry],
|
|
201
|
+
});
|
|
202
|
+
break;
|
|
203
|
+
case 'summary':
|
|
204
|
+
metric = new client.Summary({
|
|
205
|
+
name: definition.name,
|
|
206
|
+
help: definition.help,
|
|
207
|
+
labelNames: definition.labels ?? [],
|
|
208
|
+
registers: [promRegistry],
|
|
209
|
+
});
|
|
210
|
+
break;
|
|
211
|
+
}
|
|
212
|
+
promMetrics.set(definition.name, metric);
|
|
213
|
+
Logger.debug(`Metric registered: ${definition.name} (${definition.type})`);
|
|
214
|
+
},
|
|
215
|
+
/**
|
|
216
|
+
* Increment a counter
|
|
217
|
+
*/
|
|
218
|
+
incrementCounter(name, value = 1, labels) {
|
|
219
|
+
// Prometheus
|
|
220
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
221
|
+
const metric = promMetrics.get(name);
|
|
222
|
+
if (labels) {
|
|
223
|
+
metric.inc(labels, value);
|
|
224
|
+
}
|
|
225
|
+
else {
|
|
226
|
+
metric.inc(value);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// Datadog
|
|
230
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
231
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
232
|
+
datadogClient.increment(name, value, tags);
|
|
233
|
+
}
|
|
234
|
+
},
|
|
235
|
+
/**
|
|
236
|
+
* Set a gauge value
|
|
237
|
+
*/
|
|
238
|
+
setGauge(name, value, labels) {
|
|
239
|
+
// Prometheus
|
|
240
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
241
|
+
const metric = promMetrics.get(name);
|
|
242
|
+
if (labels) {
|
|
243
|
+
metric.set(labels, value);
|
|
244
|
+
}
|
|
245
|
+
else {
|
|
246
|
+
metric.set(value);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
// Datadog
|
|
250
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
251
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
252
|
+
datadogClient.gauge(name, value, tags);
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
/**
|
|
256
|
+
* Record a histogram observation
|
|
257
|
+
*/
|
|
258
|
+
recordHistogram(name, value, labels) {
|
|
259
|
+
// Prometheus
|
|
260
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
261
|
+
const metric = promMetrics.get(name);
|
|
262
|
+
if (labels) {
|
|
263
|
+
metric.observe(labels, value);
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
metric.observe(value);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
// Datadog
|
|
270
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
271
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
272
|
+
datadogClient.histogram(name, value, tags);
|
|
273
|
+
}
|
|
274
|
+
},
|
|
275
|
+
/**
|
|
276
|
+
* Record timing (histogram for duration)
|
|
277
|
+
*/
|
|
278
|
+
recordTiming(name, durationMs, labels) {
|
|
279
|
+
// Prometheus (convert to seconds)
|
|
280
|
+
if (config?.prometheus.enabled === true && promMetrics.has(name)) {
|
|
281
|
+
const metric = promMetrics.get(name);
|
|
282
|
+
if (labels) {
|
|
283
|
+
metric.observe(labels, durationMs / 1000);
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
metric.observe(durationMs / 1000);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
// Datadog (milliseconds)
|
|
290
|
+
if (config?.datadog.enabled === true && datadogClient !== null) {
|
|
291
|
+
const tags = labels ? Object.entries(labels).map(([k, v]) => `${k}:${v}`) : [];
|
|
292
|
+
datadogClient.timing(name, durationMs, tags);
|
|
293
|
+
}
|
|
294
|
+
},
|
|
295
|
+
/**
|
|
296
|
+
* Start a span (OpenTelemetry)
|
|
297
|
+
*/
|
|
298
|
+
startSpan(name, options) {
|
|
299
|
+
if (config?.openTelemetry.enabled !== true || otelTracer === null) {
|
|
300
|
+
return null;
|
|
301
|
+
}
|
|
302
|
+
try {
|
|
303
|
+
const spanOptions = {
|
|
304
|
+
attributes: options?.attributes ?? {},
|
|
305
|
+
};
|
|
306
|
+
const span = otelTracer.startSpan(name, spanOptions);
|
|
307
|
+
const spanId = `${name}-${Date.now()}-${generateUuid()}`;
|
|
308
|
+
evictOldestSpan();
|
|
309
|
+
activeSpans.set(spanId, { span, startedAt: Date.now() });
|
|
310
|
+
Logger.debug(`Span started: ${name}`, { spanId });
|
|
311
|
+
return spanId;
|
|
312
|
+
}
|
|
313
|
+
catch (error) {
|
|
314
|
+
Logger.error('Failed to start span', error);
|
|
315
|
+
return null;
|
|
316
|
+
}
|
|
317
|
+
},
|
|
318
|
+
/**
|
|
319
|
+
* End a span
|
|
320
|
+
*/
|
|
321
|
+
endSpan(spanId, attributes) {
|
|
322
|
+
if (config?.openTelemetry.enabled !== true) {
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
try {
|
|
326
|
+
const entry = activeSpans.get(spanId);
|
|
327
|
+
if (!entry)
|
|
328
|
+
return;
|
|
329
|
+
if (attributes) {
|
|
330
|
+
entry.span.setAttributes(attributes);
|
|
331
|
+
}
|
|
332
|
+
entry.span.end();
|
|
333
|
+
activeSpans.delete(spanId);
|
|
334
|
+
Logger.debug(`Span ended: ${spanId}`);
|
|
335
|
+
}
|
|
336
|
+
catch (error) {
|
|
337
|
+
Logger.error('Failed to end span', error);
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
/**
|
|
341
|
+
* Record an error on a span
|
|
342
|
+
*/
|
|
343
|
+
recordSpanError(spanId, error) {
|
|
344
|
+
if (config?.openTelemetry.enabled !== true) {
|
|
345
|
+
return;
|
|
346
|
+
}
|
|
347
|
+
try {
|
|
348
|
+
const entry = activeSpans.get(spanId);
|
|
349
|
+
if (!entry)
|
|
350
|
+
return;
|
|
351
|
+
entry.span.recordException(error);
|
|
352
|
+
entry.span.setStatus({ code: 2, message: error.message }); // ERROR status
|
|
353
|
+
Logger.debug(`Span error recorded: ${spanId}`, { error: error.message });
|
|
354
|
+
}
|
|
355
|
+
catch (err) {
|
|
356
|
+
Logger.error('Failed to record span error', err);
|
|
357
|
+
}
|
|
358
|
+
},
|
|
359
|
+
/**
|
|
360
|
+
* Add event to span
|
|
361
|
+
*/
|
|
362
|
+
addSpanEvent(spanId, name, attributes) {
|
|
363
|
+
if (config?.openTelemetry.enabled !== true) {
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
try {
|
|
367
|
+
const entry = activeSpans.get(spanId);
|
|
368
|
+
if (!entry)
|
|
369
|
+
return;
|
|
370
|
+
entry.span.addEvent(name, attributes);
|
|
371
|
+
Logger.debug(`Span event added: ${spanId}/${name}`);
|
|
372
|
+
}
|
|
373
|
+
catch (error) {
|
|
374
|
+
Logger.error('Failed to add span event', error);
|
|
375
|
+
}
|
|
376
|
+
},
|
|
377
|
+
/**
|
|
378
|
+
* Get Prometheus metrics (for HTTP endpoint)
|
|
379
|
+
*/
|
|
380
|
+
async getPrometheusMetrics() {
|
|
381
|
+
if (config?.prometheus.enabled !== true || !promRegistry) {
|
|
382
|
+
throw ErrorFactory.createGeneralError('Prometheus metrics not enabled');
|
|
383
|
+
}
|
|
384
|
+
return promRegistry.metrics();
|
|
385
|
+
},
|
|
386
|
+
/**
|
|
387
|
+
* Get Prometheus registry (for advanced usage)
|
|
388
|
+
*/
|
|
389
|
+
getPrometheusRegistry() {
|
|
390
|
+
return promRegistry;
|
|
391
|
+
},
|
|
392
|
+
/**
|
|
393
|
+
* Get Datadog client (for advanced usage)
|
|
394
|
+
*/
|
|
395
|
+
getDatadogClient() {
|
|
396
|
+
return datadogClient;
|
|
397
|
+
},
|
|
398
|
+
/**
|
|
399
|
+
* Get OpenTelemetry tracer (for advanced usage)
|
|
400
|
+
*/
|
|
401
|
+
getTracer() {
|
|
402
|
+
return otelTracer;
|
|
403
|
+
},
|
|
404
|
+
/**
|
|
405
|
+
* Record worker job metrics
|
|
406
|
+
*/
|
|
407
|
+
recordJobMetrics(workerName, jobName, metrics) {
|
|
408
|
+
const labels = { worker: workerName, job: jobName };
|
|
409
|
+
if (metrics.processed !== undefined) {
|
|
410
|
+
Observability.incrementCounter('worker_jobs_processed_total', metrics.processed, labels);
|
|
411
|
+
}
|
|
412
|
+
if (metrics.failed !== undefined) {
|
|
413
|
+
Observability.incrementCounter('worker_jobs_failed_total', metrics.failed, labels);
|
|
414
|
+
}
|
|
415
|
+
if (metrics.durationMs !== undefined) {
|
|
416
|
+
Observability.recordTiming('worker_job_duration_seconds', metrics.durationMs, labels);
|
|
417
|
+
}
|
|
418
|
+
if (metrics.queueSize !== undefined) {
|
|
419
|
+
Observability.setGauge('worker_queue_size', metrics.queueSize, labels);
|
|
420
|
+
}
|
|
421
|
+
},
|
|
422
|
+
/**
|
|
423
|
+
* Record worker resource metrics
|
|
424
|
+
*/
|
|
425
|
+
recordResourceMetrics(workerName, resources) {
|
|
426
|
+
const labels = { worker: workerName };
|
|
427
|
+
if (resources.cpuUsage !== undefined) {
|
|
428
|
+
Observability.setGauge('worker_cpu_usage_percent', resources.cpuUsage, labels);
|
|
429
|
+
}
|
|
430
|
+
if (resources.memoryUsage !== undefined) {
|
|
431
|
+
Observability.setGauge('worker_memory_usage_bytes', resources.memoryUsage, labels);
|
|
432
|
+
}
|
|
433
|
+
if (resources.activeJobs !== undefined) {
|
|
434
|
+
Observability.setGauge('worker_active_jobs', resources.activeJobs, labels);
|
|
435
|
+
}
|
|
436
|
+
},
|
|
437
|
+
/**
|
|
438
|
+
* Create a traced function wrapper
|
|
439
|
+
*/
|
|
440
|
+
traced(name, fn, options) {
|
|
441
|
+
return (async (...args) => {
|
|
442
|
+
const spanId = Observability.startSpan(name, options);
|
|
443
|
+
try {
|
|
444
|
+
const result = await fn(...args);
|
|
445
|
+
if (spanId !== null) {
|
|
446
|
+
Observability.endSpan(spanId, { success: true });
|
|
447
|
+
}
|
|
448
|
+
return result;
|
|
449
|
+
}
|
|
450
|
+
catch (error) {
|
|
451
|
+
if (spanId !== null) {
|
|
452
|
+
Observability.recordSpanError(spanId, error);
|
|
453
|
+
Observability.endSpan(spanId, { success: false });
|
|
454
|
+
}
|
|
455
|
+
throw error;
|
|
456
|
+
}
|
|
457
|
+
});
|
|
458
|
+
},
|
|
459
|
+
/**
|
|
460
|
+
* Get configuration
|
|
461
|
+
*/
|
|
462
|
+
getConfig() {
|
|
463
|
+
return config ? { ...config } : null;
|
|
464
|
+
},
|
|
465
|
+
/**
|
|
466
|
+
* Check if observability is enabled
|
|
467
|
+
*/
|
|
468
|
+
isEnabled() {
|
|
469
|
+
return (config !== null &&
|
|
470
|
+
(config.prometheus.enabled || config.openTelemetry.enabled || config.datadog.enabled));
|
|
471
|
+
},
|
|
472
|
+
/**
|
|
473
|
+
* Clear metrics for a specific worker
|
|
474
|
+
*/
|
|
475
|
+
async clearWorkerMetrics(workerName) {
|
|
476
|
+
if (config?.prometheus.enabled !== true) {
|
|
477
|
+
return;
|
|
478
|
+
}
|
|
479
|
+
const metrics = Array.from(promMetrics.values());
|
|
480
|
+
await Promise.all(metrics.map(async (metric) => {
|
|
481
|
+
try {
|
|
482
|
+
// Access internal values to find matching labels
|
|
483
|
+
// This relies on prom-client get() method returning values with labels
|
|
484
|
+
const item = await metric.get();
|
|
485
|
+
const values = item.values ?? [];
|
|
486
|
+
for (const val of values) {
|
|
487
|
+
const labels = val.labels;
|
|
488
|
+
if (labels?.['worker'] === workerName) {
|
|
489
|
+
metric.remove(labels);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
catch (err) {
|
|
494
|
+
Logger.debug('Failed to clear worker metric labels', err);
|
|
495
|
+
}
|
|
496
|
+
}));
|
|
497
|
+
Logger.debug(`Cleared metrics for worker: ${workerName}`);
|
|
498
|
+
},
|
|
499
|
+
/**
|
|
500
|
+
* Shutdown
|
|
501
|
+
*/
|
|
502
|
+
shutdown() {
|
|
503
|
+
Logger.info('Observability shutting down...');
|
|
504
|
+
// Close Datadog client
|
|
505
|
+
if (datadogClient !== null) {
|
|
506
|
+
datadogClient.close(() => {
|
|
507
|
+
Logger.debug('Datadog client closed');
|
|
508
|
+
});
|
|
509
|
+
datadogClient = null;
|
|
510
|
+
}
|
|
511
|
+
// End all active spans
|
|
512
|
+
for (const [spanId, entry] of activeSpans.entries()) {
|
|
513
|
+
entry.span.end();
|
|
514
|
+
Logger.debug(`Span force-ended: ${spanId}`);
|
|
515
|
+
}
|
|
516
|
+
activeSpans.clear();
|
|
517
|
+
if (spanSweepInterval) {
|
|
518
|
+
clearInterval(spanSweepInterval);
|
|
519
|
+
spanSweepInterval = null;
|
|
520
|
+
}
|
|
521
|
+
// Clear metrics
|
|
522
|
+
promMetrics.clear();
|
|
523
|
+
promRegistry = null;
|
|
524
|
+
promClient = null;
|
|
525
|
+
otelTracer = null;
|
|
526
|
+
config = null;
|
|
527
|
+
Logger.info('Observability shutdown complete');
|
|
528
|
+
},
|
|
529
|
+
});
|
|
530
|
+
// Graceful shutdown handled by WorkerShutdown
|