@zintrust/workers 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +861 -0
- package/dist/AnomalyDetection.d.ts +102 -0
- package/dist/AnomalyDetection.js +321 -0
- package/dist/AutoScaler.d.ts +127 -0
- package/dist/AutoScaler.js +425 -0
- package/dist/BroadcastWorker.d.ts +21 -0
- package/dist/BroadcastWorker.js +24 -0
- package/dist/CanaryController.d.ts +103 -0
- package/dist/CanaryController.js +380 -0
- package/dist/ChaosEngineering.d.ts +79 -0
- package/dist/ChaosEngineering.js +216 -0
- package/dist/CircuitBreaker.d.ts +106 -0
- package/dist/CircuitBreaker.js +374 -0
- package/dist/ClusterLock.d.ts +90 -0
- package/dist/ClusterLock.js +385 -0
- package/dist/ComplianceManager.d.ts +177 -0
- package/dist/ComplianceManager.js +556 -0
- package/dist/DatacenterOrchestrator.d.ts +133 -0
- package/dist/DatacenterOrchestrator.js +404 -0
- package/dist/DeadLetterQueue.d.ts +122 -0
- package/dist/DeadLetterQueue.js +539 -0
- package/dist/HealthMonitor.d.ts +42 -0
- package/dist/HealthMonitor.js +301 -0
- package/dist/MultiQueueWorker.d.ts +89 -0
- package/dist/MultiQueueWorker.js +277 -0
- package/dist/NotificationWorker.d.ts +21 -0
- package/dist/NotificationWorker.js +23 -0
- package/dist/Observability.d.ts +153 -0
- package/dist/Observability.js +530 -0
- package/dist/PluginManager.d.ts +123 -0
- package/dist/PluginManager.js +392 -0
- package/dist/PriorityQueue.d.ts +117 -0
- package/dist/PriorityQueue.js +244 -0
- package/dist/ResourceMonitor.d.ts +164 -0
- package/dist/ResourceMonitor.js +605 -0
- package/dist/SLAMonitor.d.ts +110 -0
- package/dist/SLAMonitor.js +274 -0
- package/dist/WorkerFactory.d.ts +193 -0
- package/dist/WorkerFactory.js +1507 -0
- package/dist/WorkerInit.d.ts +85 -0
- package/dist/WorkerInit.js +223 -0
- package/dist/WorkerMetrics.d.ts +114 -0
- package/dist/WorkerMetrics.js +509 -0
- package/dist/WorkerRegistry.d.ts +145 -0
- package/dist/WorkerRegistry.js +319 -0
- package/dist/WorkerShutdown.d.ts +61 -0
- package/dist/WorkerShutdown.js +159 -0
- package/dist/WorkerVersioning.d.ts +107 -0
- package/dist/WorkerVersioning.js +300 -0
- package/dist/build-manifest.json +462 -0
- package/dist/config/workerConfig.d.ts +3 -0
- package/dist/config/workerConfig.js +19 -0
- package/dist/createQueueWorker.d.ts +23 -0
- package/dist/createQueueWorker.js +113 -0
- package/dist/dashboard/index.d.ts +1 -0
- package/dist/dashboard/index.js +1 -0
- package/dist/dashboard/types.d.ts +117 -0
- package/dist/dashboard/types.js +1 -0
- package/dist/dashboard/workers-api.d.ts +4 -0
- package/dist/dashboard/workers-api.js +638 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
- package/dist/dashboard/workers-dashboard-ui.js +1026 -0
- package/dist/dashboard/workers-dashboard.d.ts +4 -0
- package/dist/dashboard/workers-dashboard.js +904 -0
- package/dist/helper/index.d.ts +5 -0
- package/dist/helper/index.js +10 -0
- package/dist/http/WorkerApiController.d.ts +38 -0
- package/dist/http/WorkerApiController.js +312 -0
- package/dist/http/WorkerController.d.ts +374 -0
- package/dist/http/WorkerController.js +1351 -0
- package/dist/http/middleware/CustomValidation.d.ts +92 -0
- package/dist/http/middleware/CustomValidation.js +270 -0
- package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
- package/dist/http/middleware/DatacenterValidator.js +94 -0
- package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
- package/dist/http/middleware/EditWorkerValidation.js +55 -0
- package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
- package/dist/http/middleware/FeaturesValidator.js +60 -0
- package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
- package/dist/http/middleware/InfrastructureValidator.js +226 -0
- package/dist/http/middleware/OptionsValidator.d.ts +3 -0
- package/dist/http/middleware/OptionsValidator.js +112 -0
- package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
- package/dist/http/middleware/PayloadSanitizer.js +42 -0
- package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
- package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
- package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/QueueNameSanitizer.js +45 -0
- package/dist/http/middleware/ValidateDriver.d.ts +7 -0
- package/dist/http/middleware/ValidateDriver.js +20 -0
- package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
- package/dist/http/middleware/VersionSanitizer.js +25 -0
- package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
- package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
- package/dist/http/middleware/WorkerValidationChain.js +185 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.js +48 -0
- package/dist/routes/workers.d.ts +12 -0
- package/dist/routes/workers.js +81 -0
- package/dist/storage/WorkerStore.d.ts +45 -0
- package/dist/storage/WorkerStore.js +195 -0
- package/dist/type.d.ts +76 -0
- package/dist/type.js +1 -0
- package/dist/ui/router/ui.d.ts +3 -0
- package/dist/ui/router/ui.js +83 -0
- package/dist/ui/types/worker-ui.d.ts +229 -0
- package/dist/ui/types/worker-ui.js +5 -0
- package/package.json +53 -0
- package/src/AnomalyDetection.ts +434 -0
- package/src/AutoScaler.ts +654 -0
- package/src/BroadcastWorker.ts +34 -0
- package/src/CanaryController.ts +531 -0
- package/src/ChaosEngineering.ts +301 -0
- package/src/CircuitBreaker.ts +495 -0
- package/src/ClusterLock.ts +499 -0
- package/src/ComplianceManager.ts +815 -0
- package/src/DatacenterOrchestrator.ts +561 -0
- package/src/DeadLetterQueue.ts +733 -0
- package/src/HealthMonitor.ts +390 -0
- package/src/MultiQueueWorker.ts +431 -0
- package/src/NotificationWorker.ts +33 -0
- package/src/Observability.ts +696 -0
- package/src/PluginManager.ts +551 -0
- package/src/PriorityQueue.ts +351 -0
- package/src/ResourceMonitor.ts +769 -0
- package/src/SLAMonitor.ts +408 -0
- package/src/WorkerFactory.ts +2108 -0
- package/src/WorkerInit.ts +313 -0
- package/src/WorkerMetrics.ts +709 -0
- package/src/WorkerRegistry.ts +443 -0
- package/src/WorkerShutdown.ts +210 -0
- package/src/WorkerVersioning.ts +422 -0
- package/src/config/workerConfig.ts +25 -0
- package/src/createQueueWorker.ts +174 -0
- package/src/dashboard/index.ts +6 -0
- package/src/dashboard/types.ts +141 -0
- package/src/dashboard/workers-api.ts +785 -0
- package/src/dashboard/zintrust.svg +30 -0
- package/src/helper/index.ts +11 -0
- package/src/http/WorkerApiController.ts +369 -0
- package/src/http/WorkerController.ts +1512 -0
- package/src/http/middleware/CustomValidation.ts +360 -0
- package/src/http/middleware/DatacenterValidator.ts +124 -0
- package/src/http/middleware/EditWorkerValidation.ts +74 -0
- package/src/http/middleware/FeaturesValidator.ts +82 -0
- package/src/http/middleware/InfrastructureValidator.ts +295 -0
- package/src/http/middleware/OptionsValidator.ts +144 -0
- package/src/http/middleware/PayloadSanitizer.ts +52 -0
- package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
- package/src/http/middleware/QueueNameSanitizer.ts +55 -0
- package/src/http/middleware/ValidateDriver.ts +29 -0
- package/src/http/middleware/VersionSanitizer.ts +30 -0
- package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
- package/src/http/middleware/WorkerValidationChain.ts +230 -0
- package/src/index.ts +98 -0
- package/src/routes/workers.ts +154 -0
- package/src/storage/WorkerStore.ts +240 -0
- package/src/type.ts +89 -0
- package/src/types/queue-monitor.d.ts +38 -0
- package/src/types/queue-redis.d.ts +38 -0
- package/src/ui/README.md +13 -0
- package/src/ui/components/JsonEditor.js +670 -0
- package/src/ui/components/JsonViewer.js +387 -0
- package/src/ui/components/WorkerCard.js +178 -0
- package/src/ui/components/WorkerExpandPanel.js +257 -0
- package/src/ui/components/fetcher.js +42 -0
- package/src/ui/components/sla-scorecard.js +32 -0
- package/src/ui/components/styles.css +30 -0
- package/src/ui/components/table-expander.js +34 -0
- package/src/ui/integration/worker-ui-integration.js +565 -0
- package/src/ui/router/ui.ts +99 -0
- package/src/ui/services/workerApi.js +240 -0
- package/src/ui/types/worker-ui.ts +283 -0
- package/src/ui/utils/jsonValidator.js +444 -0
- package/src/ui/workers/index.html +202 -0
- package/src/ui/workers/main.js +1781 -0
- package/src/ui/workers/styles.css +1350 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker Metrics Manager
|
|
3
|
+
* Time-series metrics persistence with Redis Sorted Sets
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
ErrorFactory,
|
|
9
|
+
Logger,
|
|
10
|
+
appConfig,
|
|
11
|
+
createRedisConnection,
|
|
12
|
+
type RedisConfig,
|
|
13
|
+
} from '@zintrust/core';
|
|
14
|
+
import type IORedis from 'ioredis';
|
|
15
|
+
|
|
16
|
+
const PREFIX = appConfig.prefix;
|
|
17
|
+
|
|
18
|
+
export type MetricType =
|
|
19
|
+
| 'processed'
|
|
20
|
+
| 'errors'
|
|
21
|
+
| 'duration'
|
|
22
|
+
| 'memory'
|
|
23
|
+
| 'cpu'
|
|
24
|
+
| 'queue-size'
|
|
25
|
+
| 'active-jobs'
|
|
26
|
+
| 'waiting-jobs'
|
|
27
|
+
| 'delayed-jobs'
|
|
28
|
+
| 'failed-jobs'
|
|
29
|
+
| 'completed-jobs';
|
|
30
|
+
|
|
31
|
+
export type MetricGranularity = 'hourly' | 'daily' | 'monthly';
|
|
32
|
+
|
|
33
|
+
export type MetricPoint = {
|
|
34
|
+
timestamp: Date;
|
|
35
|
+
value: number;
|
|
36
|
+
metadata?: Record<string, unknown>;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
export type MetricEntry = {
|
|
40
|
+
workerName: string;
|
|
41
|
+
metricType: MetricType;
|
|
42
|
+
granularity: MetricGranularity;
|
|
43
|
+
points: ReadonlyArray<MetricPoint>;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export type MetricQueryOptions = {
|
|
47
|
+
workerName: string;
|
|
48
|
+
metricType: MetricType;
|
|
49
|
+
granularity: MetricGranularity;
|
|
50
|
+
startDate?: Date;
|
|
51
|
+
endDate?: Date;
|
|
52
|
+
limit?: number;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export type AggregatedMetrics = {
|
|
56
|
+
workerName: string;
|
|
57
|
+
metricType: MetricType;
|
|
58
|
+
period: { start: Date; end: Date };
|
|
59
|
+
total: number;
|
|
60
|
+
average: number;
|
|
61
|
+
min: number;
|
|
62
|
+
max: number;
|
|
63
|
+
count: number;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
export type WorkerHealthScore = {
|
|
67
|
+
workerName: string;
|
|
68
|
+
timestamp: Date;
|
|
69
|
+
score: number; // 0-100
|
|
70
|
+
factors: {
|
|
71
|
+
errorRate: number;
|
|
72
|
+
throughput: number;
|
|
73
|
+
latency: number;
|
|
74
|
+
resourceUsage: number;
|
|
75
|
+
};
|
|
76
|
+
status: 'healthy' | 'degraded' | 'unhealthy';
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// Redis key prefixes
|
|
80
|
+
const METRICS_PREFIX = `${PREFIX}:worker:metrics:`;
|
|
81
|
+
const HEALTH_PREFIX = `${PREFIX}:worker:health:`;
|
|
82
|
+
|
|
83
|
+
// Retention periods (in seconds)
|
|
84
|
+
const RETENTION = {
|
|
85
|
+
hourly: 7 * 24 * 60 * 60, // 7 days
|
|
86
|
+
daily: 30 * 24 * 60 * 60, // 30 days
|
|
87
|
+
monthly: 365 * 24 * 60 * 60, // 1 year
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// Internal state
|
|
91
|
+
let redisClient: IORedis | null = null;
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Helper: Get Redis key for metrics
|
|
95
|
+
*/
|
|
96
|
+
const getMetricsKey = (
|
|
97
|
+
workerName: string,
|
|
98
|
+
metricType: MetricType,
|
|
99
|
+
granularity: MetricGranularity
|
|
100
|
+
): string => {
|
|
101
|
+
return `${METRICS_PREFIX}${workerName}:${metricType}:${granularity}`;
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Helper: Get Redis key for health scores
|
|
106
|
+
*/
|
|
107
|
+
const getHealthKey = (workerName: string): string => {
|
|
108
|
+
return `${HEALTH_PREFIX}${workerName}`;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Helper: Round timestamp to granularity
|
|
113
|
+
*/
|
|
114
|
+
const roundTimestamp = (date: Date, granularity: MetricGranularity): Date => {
|
|
115
|
+
const timestamp = date.getTime();
|
|
116
|
+
|
|
117
|
+
switch (granularity) {
|
|
118
|
+
case 'hourly':
|
|
119
|
+
// Round to nearest hour
|
|
120
|
+
return new Date(Math.floor(timestamp / (60 * 60 * 1000)) * 60 * 60 * 1000);
|
|
121
|
+
case 'daily': {
|
|
122
|
+
// Round to start of day (UTC)
|
|
123
|
+
const d = new Date(timestamp);
|
|
124
|
+
d.setUTCHours(0, 0, 0, 0);
|
|
125
|
+
return d;
|
|
126
|
+
}
|
|
127
|
+
case 'monthly': {
|
|
128
|
+
// Round to start of month (UTC)
|
|
129
|
+
const m = new Date(timestamp);
|
|
130
|
+
m.setUTCDate(1);
|
|
131
|
+
m.setUTCHours(0, 0, 0, 0);
|
|
132
|
+
return m;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Helper: Clean up old metrics based on retention policy
|
|
139
|
+
*/
|
|
140
|
+
const cleanupOldMetrics = async (
|
|
141
|
+
client: IORedis,
|
|
142
|
+
key: string,
|
|
143
|
+
granularity: MetricGranularity
|
|
144
|
+
): Promise<void> => {
|
|
145
|
+
try {
|
|
146
|
+
const retentionSeconds = RETENTION[granularity];
|
|
147
|
+
const cutoffTimestamp = Date.now() - retentionSeconds * 1000;
|
|
148
|
+
|
|
149
|
+
// Remove entries older than retention period
|
|
150
|
+
await client.zremrangebyscore(key, '-inf', cutoffTimestamp);
|
|
151
|
+
|
|
152
|
+
// Set expiry on the key (2x retention period for safety)
|
|
153
|
+
await client.expire(key, retentionSeconds * 2);
|
|
154
|
+
} catch (error) {
|
|
155
|
+
Logger.error(`Failed to cleanup old metrics for key "${key}"`, error);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Helper: Calculate health score based on metrics
|
|
161
|
+
*/
|
|
162
|
+
const calculateHealthScore = (metrics: {
|
|
163
|
+
errorRate: number;
|
|
164
|
+
throughput: number;
|
|
165
|
+
avgDuration: number;
|
|
166
|
+
memoryUsage: number;
|
|
167
|
+
cpuUsage: number;
|
|
168
|
+
}): {
|
|
169
|
+
score: number;
|
|
170
|
+
status: WorkerHealthScore['status'];
|
|
171
|
+
factors: {
|
|
172
|
+
errorRate: number;
|
|
173
|
+
throughput: number;
|
|
174
|
+
latency: number;
|
|
175
|
+
resourceUsage: number;
|
|
176
|
+
};
|
|
177
|
+
} => {
|
|
178
|
+
// Error rate factor (0-100, lower is better)
|
|
179
|
+
// 0% errors = 100, 10%+ errors = 0
|
|
180
|
+
const errorRateFactor = Math.max(0, 100 - metrics.errorRate * 1000);
|
|
181
|
+
|
|
182
|
+
// Throughput factor (0-100, higher is better)
|
|
183
|
+
// Normalized: >100 jobs/min = 100, 0 jobs/min = 0
|
|
184
|
+
const throughputFactor = Math.min(100, metrics.throughput);
|
|
185
|
+
|
|
186
|
+
// Latency factor (0-100, lower is better)
|
|
187
|
+
// <1s = 100, >10s = 0
|
|
188
|
+
const latencyFactor = Math.max(0, 100 - (metrics.avgDuration / 10000) * 100);
|
|
189
|
+
|
|
190
|
+
// Resource usage factor (0-100, lower is better)
|
|
191
|
+
// <50% = 100, >90% = 0
|
|
192
|
+
const avgResourceUsage = (metrics.memoryUsage + metrics.cpuUsage) / 2;
|
|
193
|
+
const resourceFactor = Math.max(0, 100 - Math.max(0, avgResourceUsage - 50) * 2.5);
|
|
194
|
+
|
|
195
|
+
// Weighted average: errors are most important
|
|
196
|
+
const score =
|
|
197
|
+
errorRateFactor * 0.4 + throughputFactor * 0.2 + latencyFactor * 0.2 + resourceFactor * 0.2;
|
|
198
|
+
|
|
199
|
+
let status: WorkerHealthScore['status'];
|
|
200
|
+
if (score >= 80) {
|
|
201
|
+
status = 'healthy';
|
|
202
|
+
} else if (score >= 50) {
|
|
203
|
+
status = 'degraded';
|
|
204
|
+
} else {
|
|
205
|
+
status = 'unhealthy';
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
score: Math.round(score),
|
|
210
|
+
status,
|
|
211
|
+
factors: {
|
|
212
|
+
errorRate: Math.round(errorRateFactor),
|
|
213
|
+
throughput: Math.round(throughputFactor),
|
|
214
|
+
latency: Math.round(latencyFactor),
|
|
215
|
+
resourceUsage: Math.round(resourceFactor),
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Worker Metrics Manager - Sealed namespace
|
|
222
|
+
*/
|
|
223
|
+
export const WorkerMetrics = Object.freeze({
|
|
224
|
+
/**
|
|
225
|
+
* Initialize the metrics manager with Redis connection
|
|
226
|
+
*/
|
|
227
|
+
initialize(config: RedisConfig): void {
|
|
228
|
+
if (redisClient) {
|
|
229
|
+
Logger.warn('WorkerMetrics already initialized');
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
redisClient = createRedisConnection(config);
|
|
234
|
+
Logger.info('WorkerMetrics initialized');
|
|
235
|
+
},
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Record a metric point
|
|
239
|
+
*/
|
|
240
|
+
async record(
|
|
241
|
+
workerName: string,
|
|
242
|
+
metricType: MetricType,
|
|
243
|
+
value: number,
|
|
244
|
+
metadata?: Record<string, unknown>
|
|
245
|
+
): Promise<void> {
|
|
246
|
+
if (!redisClient) {
|
|
247
|
+
throw ErrorFactory.createWorkerError(
|
|
248
|
+
'WorkerMetrics not initialized. Call initialize() first.'
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const now = new Date();
|
|
253
|
+
|
|
254
|
+
// Record at all granularities
|
|
255
|
+
const granularities: MetricGranularity[] = ['hourly', 'daily', 'monthly'];
|
|
256
|
+
|
|
257
|
+
await Promise.all(
|
|
258
|
+
granularities.map(async (granularity) => {
|
|
259
|
+
const roundedTimestamp = roundTimestamp(now, granularity);
|
|
260
|
+
const key = getMetricsKey(workerName, metricType, granularity);
|
|
261
|
+
|
|
262
|
+
const point: MetricPoint = {
|
|
263
|
+
timestamp: roundedTimestamp,
|
|
264
|
+
value,
|
|
265
|
+
metadata,
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
// Store in sorted set with timestamp as score
|
|
269
|
+
const score = roundedTimestamp.getTime();
|
|
270
|
+
const data = JSON.stringify(point);
|
|
271
|
+
|
|
272
|
+
await redisClient?.zadd(key, score, data);
|
|
273
|
+
|
|
274
|
+
// Cleanup old metrics (lightweight: ~1% based on time slice)
|
|
275
|
+
const client = redisClient;
|
|
276
|
+
if (client && Date.now() % 100 === 0) {
|
|
277
|
+
cleanupOldMetrics(client, key, granularity).catch((err) => {
|
|
278
|
+
Logger.error('Failed to cleanup old metrics', err);
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
})
|
|
282
|
+
);
|
|
283
|
+
|
|
284
|
+
Logger.debug(`Recorded metric: ${workerName}/${metricType} = ${value}`);
|
|
285
|
+
},
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Record multiple metrics at once (batch operation)
|
|
289
|
+
*/
|
|
290
|
+
async recordBatch(
|
|
291
|
+
workerName: string,
|
|
292
|
+
metrics: Array<{ metricType: MetricType; value: number; metadata?: Record<string, unknown> }>
|
|
293
|
+
): Promise<void> {
|
|
294
|
+
await Promise.all(
|
|
295
|
+
metrics.map(async (m) => WorkerMetrics.record(workerName, m.metricType, m.value, m.metadata))
|
|
296
|
+
);
|
|
297
|
+
},
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Query metrics for a time range
|
|
301
|
+
*/
|
|
302
|
+
async query(options: MetricQueryOptions): Promise<MetricEntry> {
|
|
303
|
+
if (!redisClient) {
|
|
304
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
308
|
+
const key = getMetricsKey(workerName, metricType, granularity);
|
|
309
|
+
|
|
310
|
+
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
311
|
+
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
312
|
+
|
|
313
|
+
try {
|
|
314
|
+
// Get data from sorted set
|
|
315
|
+
const results = await redisClient.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
316
|
+
|
|
317
|
+
const points: MetricPoint[] = results.map((data) => JSON.parse(data) as MetricPoint);
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
workerName,
|
|
321
|
+
metricType,
|
|
322
|
+
granularity,
|
|
323
|
+
points,
|
|
324
|
+
};
|
|
325
|
+
} catch (error) {
|
|
326
|
+
Logger.error(`Error querying metrics for ${workerName}/${metricType}`, error);
|
|
327
|
+
throw error;
|
|
328
|
+
}
|
|
329
|
+
},
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Get aggregated metrics for a time range
|
|
333
|
+
*/
|
|
334
|
+
async aggregate(options: MetricQueryOptions): Promise<AggregatedMetrics> {
|
|
335
|
+
const entry = await WorkerMetrics.query(options);
|
|
336
|
+
|
|
337
|
+
if (entry.points.length === 0) {
|
|
338
|
+
return {
|
|
339
|
+
workerName: entry.workerName,
|
|
340
|
+
metricType: entry.metricType,
|
|
341
|
+
period: {
|
|
342
|
+
start: options.startDate ?? new Date(0),
|
|
343
|
+
end: options.endDate ?? new Date(),
|
|
344
|
+
},
|
|
345
|
+
total: 0,
|
|
346
|
+
average: 0,
|
|
347
|
+
min: 0,
|
|
348
|
+
max: 0,
|
|
349
|
+
count: 0,
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
const values = entry.points.map((p) => p.value);
|
|
354
|
+
const total = values.reduce((sum, val) => sum + val, 0);
|
|
355
|
+
const average = total / values.length;
|
|
356
|
+
const min = Math.min(...values);
|
|
357
|
+
const max = Math.max(...values);
|
|
358
|
+
|
|
359
|
+
return {
|
|
360
|
+
workerName: entry.workerName,
|
|
361
|
+
metricType: entry.metricType,
|
|
362
|
+
period: {
|
|
363
|
+
start: entry.points[0].timestamp,
|
|
364
|
+
end: entry.points.at(-1)?.timestamp ?? new Date(),
|
|
365
|
+
},
|
|
366
|
+
total,
|
|
367
|
+
average,
|
|
368
|
+
min,
|
|
369
|
+
max,
|
|
370
|
+
count: values.length,
|
|
371
|
+
};
|
|
372
|
+
},
|
|
373
|
+
|
|
374
|
+
async aggregateBatch(optionsList: MetricQueryOptions[]): Promise<AggregatedMetrics[]> {
|
|
375
|
+
if (!redisClient) {
|
|
376
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
377
|
+
}
|
|
378
|
+
if (optionsList.length === 0) return [];
|
|
379
|
+
|
|
380
|
+
const pipeline = redisClient.pipeline();
|
|
381
|
+
|
|
382
|
+
for (const options of optionsList) {
|
|
383
|
+
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
384
|
+
const key = getMetricsKey(workerName, metricType, granularity);
|
|
385
|
+
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
386
|
+
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
387
|
+
pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const results = await pipeline.exec();
|
|
391
|
+
|
|
392
|
+
if (!results) {
|
|
393
|
+
throw ErrorFactory.createWorkerError('Failed to execute metrics pipeline');
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return optionsList.map((options, index) => {
|
|
397
|
+
const [err, data] = results[index];
|
|
398
|
+
if (err) {
|
|
399
|
+
Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
|
|
400
|
+
return {
|
|
401
|
+
workerName: options.workerName,
|
|
402
|
+
metricType: options.metricType,
|
|
403
|
+
period: { start: options.startDate ?? new Date(), end: options.endDate ?? new Date() },
|
|
404
|
+
total: 0,
|
|
405
|
+
average: 0,
|
|
406
|
+
min: 0,
|
|
407
|
+
max: 0,
|
|
408
|
+
count: 0,
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
const points: MetricPoint[] = (data as string[]).map((d) => JSON.parse(d) as MetricPoint);
|
|
413
|
+
|
|
414
|
+
if (points.length === 0) {
|
|
415
|
+
return {
|
|
416
|
+
workerName: options.workerName,
|
|
417
|
+
metricType: options.metricType,
|
|
418
|
+
period: { start: options.startDate ?? new Date(0), end: options.endDate ?? new Date() },
|
|
419
|
+
total: 0,
|
|
420
|
+
average: 0,
|
|
421
|
+
min: 0,
|
|
422
|
+
max: 0,
|
|
423
|
+
count: 0,
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
const values = points.map((p) => p.value);
|
|
428
|
+
const total = values.reduce((sum, val) => sum + val, 0);
|
|
429
|
+
const average = total / values.length;
|
|
430
|
+
const min = Math.min(...values);
|
|
431
|
+
const max = Math.max(...values);
|
|
432
|
+
|
|
433
|
+
return {
|
|
434
|
+
workerName: options.workerName,
|
|
435
|
+
metricType: options.metricType,
|
|
436
|
+
period: {
|
|
437
|
+
start: points[0].timestamp,
|
|
438
|
+
end: points.at(-1)?.timestamp ?? new Date(),
|
|
439
|
+
},
|
|
440
|
+
total,
|
|
441
|
+
average,
|
|
442
|
+
min,
|
|
443
|
+
max,
|
|
444
|
+
count: values.length,
|
|
445
|
+
};
|
|
446
|
+
});
|
|
447
|
+
},
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Calculate and store health score
|
|
451
|
+
*/
|
|
452
|
+
async calculateHealth(workerName: string): Promise<WorkerHealthScore> {
|
|
453
|
+
if (!redisClient) {
|
|
454
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
const now = new Date();
|
|
458
|
+
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);
|
|
459
|
+
|
|
460
|
+
try {
|
|
461
|
+
// Get recent metrics (last hour)
|
|
462
|
+
const [processed, errors, duration, memory, cpu] = await Promise.all([
|
|
463
|
+
WorkerMetrics.aggregate({
|
|
464
|
+
workerName,
|
|
465
|
+
metricType: 'processed',
|
|
466
|
+
granularity: 'hourly',
|
|
467
|
+
startDate: oneHourAgo,
|
|
468
|
+
endDate: now,
|
|
469
|
+
}),
|
|
470
|
+
WorkerMetrics.aggregate({
|
|
471
|
+
workerName,
|
|
472
|
+
metricType: 'errors',
|
|
473
|
+
granularity: 'hourly',
|
|
474
|
+
startDate: oneHourAgo,
|
|
475
|
+
endDate: now,
|
|
476
|
+
}),
|
|
477
|
+
WorkerMetrics.aggregate({
|
|
478
|
+
workerName,
|
|
479
|
+
metricType: 'duration',
|
|
480
|
+
granularity: 'hourly',
|
|
481
|
+
startDate: oneHourAgo,
|
|
482
|
+
endDate: now,
|
|
483
|
+
}),
|
|
484
|
+
WorkerMetrics.aggregate({
|
|
485
|
+
workerName,
|
|
486
|
+
metricType: 'memory',
|
|
487
|
+
granularity: 'hourly',
|
|
488
|
+
startDate: oneHourAgo,
|
|
489
|
+
endDate: now,
|
|
490
|
+
}),
|
|
491
|
+
WorkerMetrics.aggregate({
|
|
492
|
+
workerName,
|
|
493
|
+
metricType: 'cpu',
|
|
494
|
+
granularity: 'hourly',
|
|
495
|
+
startDate: oneHourAgo,
|
|
496
|
+
endDate: now,
|
|
497
|
+
}),
|
|
498
|
+
]);
|
|
499
|
+
|
|
500
|
+
const totalJobs = processed.total + errors.total;
|
|
501
|
+
const errorRate = totalJobs > 0 ? errors.total / totalJobs : 0;
|
|
502
|
+
const throughput = processed.total; // Jobs in last hour
|
|
503
|
+
const avgDuration = duration.average || 0;
|
|
504
|
+
const memoryUsage = memory.average || 0;
|
|
505
|
+
const cpuUsage = cpu.average || 0;
|
|
506
|
+
|
|
507
|
+
const healthData = calculateHealthScore({
|
|
508
|
+
errorRate,
|
|
509
|
+
throughput,
|
|
510
|
+
avgDuration,
|
|
511
|
+
memoryUsage,
|
|
512
|
+
cpuUsage,
|
|
513
|
+
});
|
|
514
|
+
|
|
515
|
+
const healthScore: WorkerHealthScore = {
|
|
516
|
+
workerName,
|
|
517
|
+
timestamp: now,
|
|
518
|
+
score: healthData.score,
|
|
519
|
+
factors: healthData.factors,
|
|
520
|
+
status: healthData.status,
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
// Store health score in sorted set (keep last 24 hours)
|
|
524
|
+
const key = getHealthKey(workerName);
|
|
525
|
+
const score = now.getTime();
|
|
526
|
+
const data = JSON.stringify(healthScore);
|
|
527
|
+
|
|
528
|
+
await redisClient.zadd(key, score, data);
|
|
529
|
+
|
|
530
|
+
// Keep only last 24 hours
|
|
531
|
+
const cutoff = now.getTime() - 24 * 60 * 60 * 1000;
|
|
532
|
+
await redisClient.zremrangebyscore(key, '-inf', cutoff);
|
|
533
|
+
|
|
534
|
+
// Set expiry (48 hours)
|
|
535
|
+
await redisClient.expire(key, 48 * 60 * 60);
|
|
536
|
+
|
|
537
|
+
Logger.debug(`Health score for ${workerName}: ${healthScore.score} (${healthScore.status})`);
|
|
538
|
+
|
|
539
|
+
return healthScore;
|
|
540
|
+
} catch (error) {
|
|
541
|
+
Logger.error(`Error calculating health score for ${workerName}`, error);
|
|
542
|
+
throw error;
|
|
543
|
+
}
|
|
544
|
+
},
|
|
545
|
+
|
|
546
|
+
/**
|
|
547
|
+
* Get recent health scores
|
|
548
|
+
*/
|
|
549
|
+
async getHealthHistory(
|
|
550
|
+
workerName: string,
|
|
551
|
+
hours = 24
|
|
552
|
+
): Promise<ReadonlyArray<WorkerHealthScore>> {
|
|
553
|
+
if (!redisClient) {
|
|
554
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
try {
|
|
558
|
+
const key = getHealthKey(workerName);
|
|
559
|
+
const now = Date.now();
|
|
560
|
+
const startTime = now - hours * 60 * 60 * 1000;
|
|
561
|
+
|
|
562
|
+
const results = await redisClient.zrangebyscore(key, startTime, now);
|
|
563
|
+
|
|
564
|
+
return results.map((data) => JSON.parse(data) as WorkerHealthScore);
|
|
565
|
+
} catch (error) {
|
|
566
|
+
Logger.error(`Error retrieving health history for ${workerName}`, error);
|
|
567
|
+
return [];
|
|
568
|
+
}
|
|
569
|
+
},
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Get latest health score
|
|
573
|
+
*/
|
|
574
|
+
async getLatestHealth(workerName: string): Promise<WorkerHealthScore | null> {
|
|
575
|
+
if (!redisClient) {
|
|
576
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
try {
|
|
580
|
+
const key = getHealthKey(workerName);
|
|
581
|
+
|
|
582
|
+
// Get the most recent entry
|
|
583
|
+
const results = await redisClient.zrevrange(key, 0, 0);
|
|
584
|
+
|
|
585
|
+
if (results.length === 0) {
|
|
586
|
+
return null;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
return JSON.parse(results[0]) as WorkerHealthScore;
|
|
590
|
+
} catch (error) {
|
|
591
|
+
Logger.error(`Error retrieving latest health for ${workerName}`, error);
|
|
592
|
+
return null;
|
|
593
|
+
}
|
|
594
|
+
},
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Get metrics summary for all workers
|
|
598
|
+
*/
|
|
599
|
+
async getAllWorkersSummary(): Promise<
|
|
600
|
+
ReadonlyArray<{
|
|
601
|
+
workerName: string;
|
|
602
|
+
health: WorkerHealthScore | null;
|
|
603
|
+
metrics: {
|
|
604
|
+
processed: number;
|
|
605
|
+
errors: number;
|
|
606
|
+
errorRate: number;
|
|
607
|
+
};
|
|
608
|
+
}>
|
|
609
|
+
> {
|
|
610
|
+
if (!redisClient) {
|
|
611
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
try {
|
|
615
|
+
// Find all unique worker names from health keys
|
|
616
|
+
const pattern = `${HEALTH_PREFIX}*`;
|
|
617
|
+
const keys = await redisClient.keys(pattern);
|
|
618
|
+
const workerNames = keys.map((key) => key.replace(HEALTH_PREFIX, ''));
|
|
619
|
+
|
|
620
|
+
const summaries = await Promise.all(
|
|
621
|
+
workerNames.map(async (workerName) => {
|
|
622
|
+
const now = new Date();
|
|
623
|
+
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);
|
|
624
|
+
|
|
625
|
+
const [health, processed, errors] = await Promise.all([
|
|
626
|
+
WorkerMetrics.getLatestHealth(workerName),
|
|
627
|
+
WorkerMetrics.aggregate({
|
|
628
|
+
workerName,
|
|
629
|
+
metricType: 'processed',
|
|
630
|
+
granularity: 'hourly',
|
|
631
|
+
startDate: oneHourAgo,
|
|
632
|
+
endDate: now,
|
|
633
|
+
}),
|
|
634
|
+
WorkerMetrics.aggregate({
|
|
635
|
+
workerName,
|
|
636
|
+
metricType: 'errors',
|
|
637
|
+
granularity: 'hourly',
|
|
638
|
+
startDate: oneHourAgo,
|
|
639
|
+
endDate: now,
|
|
640
|
+
}),
|
|
641
|
+
]);
|
|
642
|
+
|
|
643
|
+
const totalJobs = processed.total + errors.total;
|
|
644
|
+
const errorRate = totalJobs > 0 ? errors.total / totalJobs : 0;
|
|
645
|
+
|
|
646
|
+
return {
|
|
647
|
+
workerName,
|
|
648
|
+
health,
|
|
649
|
+
metrics: {
|
|
650
|
+
processed: processed.total,
|
|
651
|
+
errors: errors.total,
|
|
652
|
+
errorRate,
|
|
653
|
+
},
|
|
654
|
+
};
|
|
655
|
+
})
|
|
656
|
+
);
|
|
657
|
+
|
|
658
|
+
return summaries;
|
|
659
|
+
} catch (error) {
|
|
660
|
+
Logger.error('Error retrieving all workers summary', error);
|
|
661
|
+
return [];
|
|
662
|
+
}
|
|
663
|
+
},
|
|
664
|
+
|
|
665
|
+
/**
|
|
666
|
+
* Delete all metrics for a worker
|
|
667
|
+
*/
|
|
668
|
+
async deleteWorkerMetrics(workerName: string): Promise<void> {
|
|
669
|
+
if (!redisClient) {
|
|
670
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
try {
|
|
674
|
+
const pattern = `${METRICS_PREFIX}${workerName}:*`;
|
|
675
|
+
const keys = await redisClient.keys(pattern);
|
|
676
|
+
|
|
677
|
+
if (keys.length > 0) {
|
|
678
|
+
await redisClient.del(...keys);
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
// Also delete health scores
|
|
682
|
+
const healthKey = getHealthKey(workerName);
|
|
683
|
+
await redisClient.del(healthKey);
|
|
684
|
+
|
|
685
|
+
Logger.info(`Deleted all metrics for worker "${workerName}"`);
|
|
686
|
+
} catch (error) {
|
|
687
|
+
Logger.error(`Error deleting metrics for worker "${workerName}"`, error);
|
|
688
|
+
throw error;
|
|
689
|
+
}
|
|
690
|
+
},
|
|
691
|
+
|
|
692
|
+
/**
|
|
693
|
+
* Shutdown and disconnect
|
|
694
|
+
*/
|
|
695
|
+
async shutdown(): Promise<void> {
|
|
696
|
+
if (!redisClient) {
|
|
697
|
+
return;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
Logger.info('WorkerMetrics shutting down...');
|
|
701
|
+
|
|
702
|
+
await redisClient.quit();
|
|
703
|
+
redisClient = null;
|
|
704
|
+
|
|
705
|
+
Logger.info('WorkerMetrics shutdown complete');
|
|
706
|
+
},
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
// Graceful shutdown handled by WorkerShutdown
|