@zintrust/workers 0.1.29 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/AnomalyDetection.d.ts +4 -0
- package/dist/AnomalyDetection.js +8 -0
- package/dist/BroadcastWorker.d.ts +2 -0
- package/dist/CanaryController.js +49 -5
- package/dist/ChaosEngineering.js +13 -0
- package/dist/ClusterLock.js +21 -10
- package/dist/DeadLetterQueue.js +12 -8
- package/dist/MultiQueueWorker.d.ts +1 -1
- package/dist/MultiQueueWorker.js +12 -7
- package/dist/NotificationWorker.d.ts +2 -0
- package/dist/PriorityQueue.d.ts +2 -2
- package/dist/PriorityQueue.js +20 -21
- package/dist/ResourceMonitor.js +65 -38
- package/dist/WorkerFactory.d.ts +23 -3
- package/dist/WorkerFactory.js +420 -40
- package/dist/WorkerInit.js +8 -3
- package/dist/WorkerMetrics.d.ts +2 -1
- package/dist/WorkerMetrics.js +152 -93
- package/dist/WorkerRegistry.d.ts +6 -0
- package/dist/WorkerRegistry.js +70 -1
- package/dist/WorkerShutdown.d.ts +21 -0
- package/dist/WorkerShutdown.js +82 -9
- package/dist/WorkerShutdownDurableObject.d.ts +12 -0
- package/dist/WorkerShutdownDurableObject.js +41 -0
- package/dist/build-manifest.json +171 -99
- package/dist/createQueueWorker.d.ts +2 -0
- package/dist/createQueueWorker.js +42 -27
- package/dist/dashboard/types.d.ts +5 -0
- package/dist/dashboard/workers-api.js +136 -43
- package/dist/http/WorkerApiController.js +1 -0
- package/dist/http/WorkerController.js +133 -85
- package/dist/http/WorkerMonitoringService.d.ts +11 -0
- package/dist/http/WorkerMonitoringService.js +62 -0
- package/dist/http/middleware/CustomValidation.js +1 -1
- package/dist/http/middleware/EditWorkerValidation.d.ts +1 -1
- package/dist/http/middleware/EditWorkerValidation.js +7 -6
- package/dist/http/middleware/ProcessorPathSanitizer.js +101 -35
- package/dist/http/middleware/WorkerValidationChain.js +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +1 -0
- package/dist/routes/workers.js +48 -6
- package/dist/storage/WorkerStore.d.ts +4 -1
- package/dist/storage/WorkerStore.js +55 -7
- package/dist/telemetry/api/TelemetryAPI.d.ts +46 -0
- package/dist/telemetry/api/TelemetryAPI.js +219 -0
- package/dist/telemetry/api/TelemetryMonitoringService.d.ts +17 -0
- package/dist/telemetry/api/TelemetryMonitoringService.js +113 -0
- package/dist/telemetry/components/AlertPanel.d.ts +1 -0
- package/dist/telemetry/components/AlertPanel.js +13 -0
- package/dist/telemetry/components/CostTracking.d.ts +1 -0
- package/dist/telemetry/components/CostTracking.js +14 -0
- package/dist/telemetry/components/ResourceUsageChart.d.ts +1 -0
- package/dist/telemetry/components/ResourceUsageChart.js +11 -0
- package/dist/telemetry/components/WorkerHealthChart.d.ts +1 -0
- package/dist/telemetry/components/WorkerHealthChart.js +11 -0
- package/dist/telemetry/index.d.ts +15 -0
- package/dist/telemetry/index.js +60 -0
- package/dist/telemetry/routes/dashboard.d.ts +6 -0
- package/dist/telemetry/routes/dashboard.js +608 -0
- package/dist/ui/router/EmbeddedAssets.d.ts +4 -0
- package/dist/ui/router/EmbeddedAssets.js +13 -0
- package/dist/ui/router/ui.js +100 -4
- package/package.json +10 -6
- package/src/AnomalyDetection.ts +9 -0
- package/src/CanaryController.ts +41 -5
- package/src/ChaosEngineering.ts +14 -0
- package/src/ClusterLock.ts +22 -9
- package/src/DeadLetterQueue.ts +13 -8
- package/src/MultiQueueWorker.ts +15 -8
- package/src/PriorityQueue.ts +21 -22
- package/src/ResourceMonitor.ts +72 -40
- package/src/WorkerFactory.ts +545 -49
- package/src/WorkerInit.ts +8 -3
- package/src/WorkerMetrics.ts +183 -105
- package/src/WorkerRegistry.ts +80 -1
- package/src/WorkerShutdown.ts +115 -9
- package/src/WorkerShutdownDurableObject.ts +64 -0
- package/src/createQueueWorker.ts +73 -30
- package/src/dashboard/types.ts +5 -0
- package/src/dashboard/workers-api.ts +165 -52
- package/src/http/WorkerApiController.ts +1 -0
- package/src/http/WorkerController.ts +167 -90
- package/src/http/WorkerMonitoringService.ts +77 -0
- package/src/http/middleware/CustomValidation.ts +1 -1
- package/src/http/middleware/EditWorkerValidation.ts +7 -6
- package/src/http/middleware/ProcessorPathSanitizer.ts +123 -36
- package/src/http/middleware/WorkerValidationChain.ts +1 -0
- package/src/index.ts +6 -1
- package/src/routes/workers.ts +66 -9
- package/src/storage/WorkerStore.ts +59 -9
- package/src/telemetry/api/TelemetryAPI.ts +292 -0
- package/src/telemetry/api/TelemetryMonitoringService.ts +149 -0
- package/src/telemetry/components/AlertPanel.ts +13 -0
- package/src/telemetry/components/CostTracking.ts +14 -0
- package/src/telemetry/components/ResourceUsageChart.ts +11 -0
- package/src/telemetry/components/WorkerHealthChart.ts +11 -0
- package/src/telemetry/index.ts +121 -0
- package/src/telemetry/public/assets/zintrust-logo.svg +15 -0
- package/src/telemetry/routes/dashboard.ts +638 -0
- package/src/telemetry/styles/tailwind.css +1 -0
- package/src/telemetry/styles/zintrust-theme.css +8 -0
- package/src/ui/router/EmbeddedAssets.ts +13 -0
- package/src/ui/router/ui.ts +112 -5
- package/src/ui/workers/index.html +2 -2
- package/src/ui/workers/main.js +232 -61
- package/src/ui/workers/zintrust.svg +30 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +0 -3
- package/dist/dashboard/workers-dashboard-ui.js +0 -1026
- package/dist/dashboard/workers-dashboard.d.ts +0 -4
- package/dist/dashboard/workers-dashboard.js +0 -904
package/dist/WorkerMetrics.d.ts
CHANGED
|
@@ -53,7 +53,7 @@ export type WorkerHealthScore = {
|
|
|
53
53
|
/**
|
|
54
54
|
* Worker Metrics Manager - Sealed namespace
|
|
55
55
|
*/
|
|
56
|
-
|
|
56
|
+
declare const WorkerMetrics: Readonly<{
|
|
57
57
|
/**
|
|
58
58
|
* Initialize the metrics manager with Redis connection
|
|
59
59
|
*/
|
|
@@ -112,3 +112,4 @@ export declare const WorkerMetrics: Readonly<{
|
|
|
112
112
|
*/
|
|
113
113
|
shutdown(): Promise<void>;
|
|
114
114
|
}>;
|
|
115
|
+
export { WorkerMetrics };
|
package/dist/WorkerMetrics.js
CHANGED
|
@@ -10,8 +10,31 @@ const RETENTION = {
|
|
|
10
10
|
daily: 30 * 24 * 60 * 60, // 30 days
|
|
11
11
|
monthly: 365 * 24 * 60 * 60, // 1 year
|
|
12
12
|
};
|
|
13
|
+
const runInBatches = async (items, handler, batchSize = 10) => {
|
|
14
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
15
|
+
const batch = items.slice(i, i + batchSize);
|
|
16
|
+
// Batch processing is intentionally sequential to avoid overwhelming the system
|
|
17
|
+
// eslint-disable-next-line no-await-in-loop
|
|
18
|
+
await Promise.all(batch.map((item) => handler(item)));
|
|
19
|
+
}
|
|
20
|
+
};
|
|
13
21
|
// Internal state
|
|
14
22
|
let redisClient = null;
|
|
23
|
+
let cachedConfig = null;
|
|
24
|
+
let keepLoggin = 0;
|
|
25
|
+
/**
|
|
26
|
+
* Helper: Get valid Redis client
|
|
27
|
+
*/
|
|
28
|
+
const getValidClient = async () => {
|
|
29
|
+
if (!cachedConfig) {
|
|
30
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized. Call initialize() first.');
|
|
31
|
+
}
|
|
32
|
+
// If no client, create one
|
|
33
|
+
if (!redisClient) {
|
|
34
|
+
redisClient = createRedisConnection(cachedConfig);
|
|
35
|
+
}
|
|
36
|
+
return redisClient;
|
|
37
|
+
};
|
|
15
38
|
/**
|
|
16
39
|
* Helper: Get Redis key for metrics
|
|
17
40
|
* Uses singleton RedisKeys for consistent key management
|
|
@@ -106,10 +129,84 @@ const calculateHealthScore = (metrics) => {
|
|
|
106
129
|
},
|
|
107
130
|
};
|
|
108
131
|
};
|
|
132
|
+
/**
|
|
133
|
+
* Helper: Create empty metrics result for error cases
|
|
134
|
+
*/
|
|
135
|
+
const createEmptyMetrics = (options, defaultStartDate) => ({
|
|
136
|
+
workerName: options.workerName,
|
|
137
|
+
metricType: options.metricType,
|
|
138
|
+
period: {
|
|
139
|
+
start: options.startDate ?? defaultStartDate ?? new Date(),
|
|
140
|
+
end: options.endDate ?? new Date(),
|
|
141
|
+
},
|
|
142
|
+
total: 0,
|
|
143
|
+
average: 0,
|
|
144
|
+
min: 0,
|
|
145
|
+
max: 0,
|
|
146
|
+
count: 0,
|
|
147
|
+
});
|
|
148
|
+
/**
|
|
149
|
+
* Helper: Handle uninitialized Redis client
|
|
150
|
+
*/
|
|
151
|
+
const handleUninitializedMetrics = (optionsList) => {
|
|
152
|
+
if (keepLoggin === 0) {
|
|
153
|
+
keepLoggin = 1;
|
|
154
|
+
Logger.warn(`[METRICS] WorkerMetrics not initialized globally. Make sure all workers running`);
|
|
155
|
+
}
|
|
156
|
+
return optionsList.map((options) => createEmptyMetrics(options));
|
|
157
|
+
};
|
|
158
|
+
/**
|
|
159
|
+
* Helper: Build Redis pipeline for batch metrics query
|
|
160
|
+
*/
|
|
161
|
+
const buildMetricsPipeline = (client, optionsList) => {
|
|
162
|
+
const pipeline = client.pipeline();
|
|
163
|
+
for (const options of optionsList) {
|
|
164
|
+
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
165
|
+
const key = getMetricsKey(workerName, metricType, granularity);
|
|
166
|
+
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
167
|
+
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
168
|
+
pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
169
|
+
}
|
|
170
|
+
return pipeline;
|
|
171
|
+
};
|
|
172
|
+
/**
|
|
173
|
+
* Helper: Process batch results and calculate aggregations
|
|
174
|
+
*/
|
|
175
|
+
const processBatchResults = (optionsList, results) => {
|
|
176
|
+
return optionsList.map((options, index) => {
|
|
177
|
+
const [err, data] = results[index];
|
|
178
|
+
if (err) {
|
|
179
|
+
Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
|
|
180
|
+
return createEmptyMetrics(options);
|
|
181
|
+
}
|
|
182
|
+
const points = data.map((d) => JSON.parse(d));
|
|
183
|
+
if (points.length === 0) {
|
|
184
|
+
return createEmptyMetrics(options, new Date(0));
|
|
185
|
+
}
|
|
186
|
+
const values = points.map((p) => p.value);
|
|
187
|
+
const total = values.reduce((sum, val) => sum + val, 0);
|
|
188
|
+
const average = total / values.length;
|
|
189
|
+
const min = Math.min(...values);
|
|
190
|
+
const max = Math.max(...values);
|
|
191
|
+
return {
|
|
192
|
+
workerName: options.workerName,
|
|
193
|
+
metricType: options.metricType,
|
|
194
|
+
period: {
|
|
195
|
+
start: points[0].timestamp,
|
|
196
|
+
end: points.at(-1)?.timestamp ?? new Date(),
|
|
197
|
+
},
|
|
198
|
+
total,
|
|
199
|
+
average,
|
|
200
|
+
min,
|
|
201
|
+
max,
|
|
202
|
+
count: values.length,
|
|
203
|
+
};
|
|
204
|
+
});
|
|
205
|
+
};
|
|
109
206
|
/**
|
|
110
207
|
* Worker Metrics Manager - Sealed namespace
|
|
111
208
|
*/
|
|
112
|
-
|
|
209
|
+
const WorkerMetrics = Object.freeze({
|
|
113
210
|
/**
|
|
114
211
|
* Initialize the metrics manager with Redis connection
|
|
115
212
|
*/
|
|
@@ -118,6 +215,7 @@ export const WorkerMetrics = Object.freeze({
|
|
|
118
215
|
Logger.warn('WorkerMetrics already initialized');
|
|
119
216
|
return;
|
|
120
217
|
}
|
|
218
|
+
cachedConfig = config;
|
|
121
219
|
redisClient = createRedisConnection(config);
|
|
122
220
|
Logger.info('WorkerMetrics initialized');
|
|
123
221
|
},
|
|
@@ -125,9 +223,7 @@ export const WorkerMetrics = Object.freeze({
|
|
|
125
223
|
* Record a metric point
|
|
126
224
|
*/
|
|
127
225
|
async record(workerName, metricType, value, metadata) {
|
|
128
|
-
|
|
129
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized. Call initialize() first.');
|
|
130
|
-
}
|
|
226
|
+
const client = await getValidClient();
|
|
131
227
|
const now = new Date();
|
|
132
228
|
// Record at all granularities
|
|
133
229
|
const granularities = ['hourly', 'daily', 'monthly'];
|
|
@@ -142,10 +238,9 @@ export const WorkerMetrics = Object.freeze({
|
|
|
142
238
|
// Store in sorted set with timestamp as score
|
|
143
239
|
const score = roundedTimestamp.getTime();
|
|
144
240
|
const data = JSON.stringify(point);
|
|
145
|
-
await
|
|
241
|
+
await client.zadd(key, score, data);
|
|
146
242
|
// Cleanup old metrics (lightweight: ~1% based on time slice)
|
|
147
|
-
|
|
148
|
-
if (client && Date.now() % 100 === 0) {
|
|
243
|
+
if (Date.now() % 100 === 0) {
|
|
149
244
|
cleanupOldMetrics(client, key, granularity).catch((err) => {
|
|
150
245
|
Logger.error('Failed to cleanup old metrics', err);
|
|
151
246
|
});
|
|
@@ -157,22 +252,31 @@ export const WorkerMetrics = Object.freeze({
|
|
|
157
252
|
* Record multiple metrics at once (batch operation)
|
|
158
253
|
*/
|
|
159
254
|
async recordBatch(workerName, metrics) {
|
|
160
|
-
await
|
|
255
|
+
await runInBatches(metrics, async (m) => {
|
|
256
|
+
await WorkerMetrics.record(workerName, m.metricType, m.value, m.metadata);
|
|
257
|
+
});
|
|
161
258
|
},
|
|
162
259
|
/**
|
|
163
260
|
* Query metrics for a time range
|
|
164
261
|
*/
|
|
165
262
|
async query(options) {
|
|
166
|
-
if (!
|
|
167
|
-
|
|
263
|
+
if (!cachedConfig) {
|
|
264
|
+
Logger.warn(`[METRICS] WorkerMetrics not initialized for worker: ${options.workerName}. Please start the worker first to enable metrics collection.`);
|
|
265
|
+
return {
|
|
266
|
+
workerName: options.workerName,
|
|
267
|
+
metricType: options.metricType,
|
|
268
|
+
granularity: options.granularity,
|
|
269
|
+
points: [],
|
|
270
|
+
};
|
|
168
271
|
}
|
|
169
272
|
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
170
273
|
const key = getMetricsKey(workerName, metricType, granularity);
|
|
171
274
|
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
172
275
|
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
173
276
|
try {
|
|
277
|
+
const client = await getValidClient();
|
|
174
278
|
// Get data from sorted set
|
|
175
|
-
const results = await
|
|
279
|
+
const results = await client.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
176
280
|
const points = results.map((data) => JSON.parse(data));
|
|
177
281
|
return {
|
|
178
282
|
workerName,
|
|
@@ -226,78 +330,24 @@ export const WorkerMetrics = Object.freeze({
|
|
|
226
330
|
};
|
|
227
331
|
},
|
|
228
332
|
async aggregateBatch(optionsList) {
|
|
229
|
-
if (!
|
|
230
|
-
|
|
333
|
+
if (!cachedConfig) {
|
|
334
|
+
return handleUninitializedMetrics(optionsList);
|
|
231
335
|
}
|
|
232
336
|
if (optionsList.length === 0)
|
|
233
337
|
return [];
|
|
234
|
-
const
|
|
235
|
-
|
|
236
|
-
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
237
|
-
const key = getMetricsKey(workerName, metricType, granularity);
|
|
238
|
-
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
239
|
-
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
240
|
-
pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
241
|
-
}
|
|
338
|
+
const client = await getValidClient();
|
|
339
|
+
const pipeline = buildMetricsPipeline(client, optionsList);
|
|
242
340
|
const results = await pipeline.exec();
|
|
243
341
|
if (!results) {
|
|
244
342
|
throw ErrorFactory.createWorkerError('Failed to execute metrics pipeline');
|
|
245
343
|
}
|
|
246
|
-
return optionsList
|
|
247
|
-
const [err, data] = results[index];
|
|
248
|
-
if (err) {
|
|
249
|
-
Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
|
|
250
|
-
return {
|
|
251
|
-
workerName: options.workerName,
|
|
252
|
-
metricType: options.metricType,
|
|
253
|
-
period: { start: options.startDate ?? new Date(), end: options.endDate ?? new Date() },
|
|
254
|
-
total: 0,
|
|
255
|
-
average: 0,
|
|
256
|
-
min: 0,
|
|
257
|
-
max: 0,
|
|
258
|
-
count: 0,
|
|
259
|
-
};
|
|
260
|
-
}
|
|
261
|
-
const points = data.map((d) => JSON.parse(d));
|
|
262
|
-
if (points.length === 0) {
|
|
263
|
-
return {
|
|
264
|
-
workerName: options.workerName,
|
|
265
|
-
metricType: options.metricType,
|
|
266
|
-
period: { start: options.startDate ?? new Date(0), end: options.endDate ?? new Date() },
|
|
267
|
-
total: 0,
|
|
268
|
-
average: 0,
|
|
269
|
-
min: 0,
|
|
270
|
-
max: 0,
|
|
271
|
-
count: 0,
|
|
272
|
-
};
|
|
273
|
-
}
|
|
274
|
-
const values = points.map((p) => p.value);
|
|
275
|
-
const total = values.reduce((sum, val) => sum + val, 0);
|
|
276
|
-
const average = total / values.length;
|
|
277
|
-
const min = Math.min(...values);
|
|
278
|
-
const max = Math.max(...values);
|
|
279
|
-
return {
|
|
280
|
-
workerName: options.workerName,
|
|
281
|
-
metricType: options.metricType,
|
|
282
|
-
period: {
|
|
283
|
-
start: points[0].timestamp,
|
|
284
|
-
end: points.at(-1)?.timestamp ?? new Date(),
|
|
285
|
-
},
|
|
286
|
-
total,
|
|
287
|
-
average,
|
|
288
|
-
min,
|
|
289
|
-
max,
|
|
290
|
-
count: values.length,
|
|
291
|
-
};
|
|
292
|
-
});
|
|
344
|
+
return processBatchResults(optionsList, results);
|
|
293
345
|
},
|
|
294
346
|
/**
|
|
295
347
|
* Calculate and store health score
|
|
296
348
|
*/
|
|
297
349
|
async calculateHealth(workerName) {
|
|
298
|
-
|
|
299
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
300
|
-
}
|
|
350
|
+
const client = await getValidClient();
|
|
301
351
|
const now = new Date();
|
|
302
352
|
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);
|
|
303
353
|
try {
|
|
@@ -363,12 +413,12 @@ export const WorkerMetrics = Object.freeze({
|
|
|
363
413
|
const key = getHealthKey(workerName);
|
|
364
414
|
const score = now.getTime();
|
|
365
415
|
const data = JSON.stringify(healthScore);
|
|
366
|
-
await
|
|
416
|
+
await client.zadd(key, score, data);
|
|
367
417
|
// Keep only last 24 hours
|
|
368
418
|
const cutoff = now.getTime() - 24 * 60 * 60 * 1000;
|
|
369
|
-
await
|
|
419
|
+
await client.zremrangebyscore(key, '-inf', cutoff);
|
|
370
420
|
// Set expiry (48 hours)
|
|
371
|
-
await
|
|
421
|
+
await client.expire(key, 48 * 60 * 60);
|
|
372
422
|
Logger.debug(`Health score for ${workerName}: ${healthScore.score} (${healthScore.status})`);
|
|
373
423
|
return healthScore;
|
|
374
424
|
}
|
|
@@ -381,14 +431,12 @@ export const WorkerMetrics = Object.freeze({
|
|
|
381
431
|
* Get recent health scores
|
|
382
432
|
*/
|
|
383
433
|
async getHealthHistory(workerName, hours = 24) {
|
|
384
|
-
if (!redisClient) {
|
|
385
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
386
|
-
}
|
|
387
434
|
try {
|
|
435
|
+
const client = await getValidClient();
|
|
388
436
|
const key = getHealthKey(workerName);
|
|
389
437
|
const now = Date.now();
|
|
390
438
|
const startTime = now - hours * 60 * 60 * 1000;
|
|
391
|
-
const results = await
|
|
439
|
+
const results = await client.zrangebyscore(key, startTime, now);
|
|
392
440
|
return results.map((data) => JSON.parse(data));
|
|
393
441
|
}
|
|
394
442
|
catch (error) {
|
|
@@ -400,13 +448,11 @@ export const WorkerMetrics = Object.freeze({
|
|
|
400
448
|
* Get latest health score
|
|
401
449
|
*/
|
|
402
450
|
async getLatestHealth(workerName) {
|
|
403
|
-
if (!redisClient) {
|
|
404
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
405
|
-
}
|
|
406
451
|
try {
|
|
452
|
+
const client = await getValidClient();
|
|
407
453
|
const key = getHealthKey(workerName);
|
|
408
454
|
// Get the most recent entry
|
|
409
|
-
const results = await
|
|
455
|
+
const results = await client.zrevrange(key, 0, 0);
|
|
410
456
|
if (results.length === 0) {
|
|
411
457
|
return null;
|
|
412
458
|
}
|
|
@@ -421,13 +467,11 @@ export const WorkerMetrics = Object.freeze({
|
|
|
421
467
|
* Get metrics summary for all workers
|
|
422
468
|
*/
|
|
423
469
|
async getAllWorkersSummary() {
|
|
424
|
-
if (!redisClient) {
|
|
425
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
426
|
-
}
|
|
427
470
|
try {
|
|
471
|
+
const client = await getValidClient();
|
|
428
472
|
// Find all unique worker names from health keys
|
|
429
473
|
const pattern = `${RedisKeys.healthPrefix}*`;
|
|
430
|
-
const keys = await
|
|
474
|
+
const keys = await client.keys(pattern);
|
|
431
475
|
const workerNames = keys.map((key) => key.replace(RedisKeys.healthPrefix, ''));
|
|
432
476
|
const summaries = await Promise.all(workerNames.map(async (workerName) => {
|
|
433
477
|
const now = new Date();
|
|
@@ -472,18 +516,16 @@ export const WorkerMetrics = Object.freeze({
|
|
|
472
516
|
* Delete all metrics for a worker
|
|
473
517
|
*/
|
|
474
518
|
async deleteWorkerMetrics(workerName) {
|
|
475
|
-
|
|
476
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
477
|
-
}
|
|
519
|
+
const client = await getValidClient();
|
|
478
520
|
try {
|
|
479
521
|
const pattern = `${RedisKeys.metricsPrefix}${workerName}:*`;
|
|
480
|
-
const keys = await
|
|
522
|
+
const keys = await client.keys(pattern);
|
|
481
523
|
if (keys.length > 0) {
|
|
482
|
-
await
|
|
524
|
+
await client.del(...keys);
|
|
483
525
|
}
|
|
484
526
|
// Also delete health scores
|
|
485
527
|
const healthKey = getHealthKey(workerName);
|
|
486
|
-
await
|
|
528
|
+
await client.del(healthKey);
|
|
487
529
|
Logger.info(`Deleted all metrics for worker "${workerName}"`);
|
|
488
530
|
}
|
|
489
531
|
catch (error) {
|
|
@@ -499,9 +541,26 @@ export const WorkerMetrics = Object.freeze({
|
|
|
499
541
|
return;
|
|
500
542
|
}
|
|
501
543
|
Logger.info('WorkerMetrics shutting down...');
|
|
502
|
-
|
|
544
|
+
// Detach client immediately to allow re-initialization
|
|
545
|
+
const client = redisClient;
|
|
503
546
|
redisClient = null;
|
|
547
|
+
try {
|
|
548
|
+
// Attempt graceful quit
|
|
549
|
+
await client.quit();
|
|
550
|
+
}
|
|
551
|
+
catch (error) {
|
|
552
|
+
// If graceful quit fails, force disconnect
|
|
553
|
+
Logger.warn('WorkerMetrics graceful shutdown failed, forcing disconnect', error);
|
|
554
|
+
try {
|
|
555
|
+
client.disconnect();
|
|
556
|
+
}
|
|
557
|
+
catch (disconnectError) {
|
|
558
|
+
Logger.error('WorkerMetrics forced disconnect failed', disconnectError);
|
|
559
|
+
// Ignore disconnect errors
|
|
560
|
+
}
|
|
561
|
+
}
|
|
504
562
|
Logger.info('WorkerMetrics shutdown complete');
|
|
505
563
|
},
|
|
506
564
|
});
|
|
565
|
+
export { WorkerMetrics };
|
|
507
566
|
// Graceful shutdown handled by WorkerShutdown
|
package/dist/WorkerRegistry.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export type WorkerMetadata = {
|
|
|
11
11
|
region: string;
|
|
12
12
|
queueName: string;
|
|
13
13
|
concurrency: number;
|
|
14
|
+
activeStatus?: boolean;
|
|
14
15
|
startedAt: Date | null;
|
|
15
16
|
stoppedAt: Date | null;
|
|
16
17
|
lastProcessedAt: Date | null;
|
|
@@ -42,6 +43,7 @@ export type WorkerInstance = {
|
|
|
42
43
|
export type RegisterWorkerOptions = {
|
|
43
44
|
name: string;
|
|
44
45
|
config: Partial<WorkerConfig>;
|
|
46
|
+
activeStatus?: boolean;
|
|
45
47
|
version?: string;
|
|
46
48
|
region?: string;
|
|
47
49
|
queues?: ReadonlyArray<string>;
|
|
@@ -99,6 +101,10 @@ export declare const WorkerRegistry: Readonly<{
|
|
|
99
101
|
* List all registered workers
|
|
100
102
|
*/
|
|
101
103
|
list(): ReadonlyArray<string>;
|
|
104
|
+
/**
|
|
105
|
+
* Update active status for a registered worker
|
|
106
|
+
*/
|
|
107
|
+
setActiveStatus(name: string, activeStatus: boolean): void;
|
|
102
108
|
/**
|
|
103
109
|
* List all running workers
|
|
104
110
|
*/
|
package/dist/WorkerRegistry.js
CHANGED
|
@@ -4,9 +4,52 @@
|
|
|
4
4
|
* Sealed namespace for immutability
|
|
5
5
|
*/
|
|
6
6
|
import { ErrorFactory, Logger } from '@zintrust/core';
|
|
7
|
+
import { AnomalyDetection } from './AnomalyDetection';
|
|
7
8
|
// Internal storage
|
|
8
9
|
const workers = new Map();
|
|
9
10
|
const registrations = new Map();
|
|
11
|
+
// Cleanup configuration
|
|
12
|
+
const STOPPED_WORKER_CLEANUP_DELAY = 5 * 60 * 1000; // 5 minutes
|
|
13
|
+
const cleanupTimers = new Map();
|
|
14
|
+
/**
|
|
15
|
+
* Helper: Schedule cleanup of stopped worker
|
|
16
|
+
*/
|
|
17
|
+
const scheduleStoppedWorkerCleanup = (name) => {
|
|
18
|
+
// Clear existing timer if any
|
|
19
|
+
const existingTimer = cleanupTimers.get(name);
|
|
20
|
+
if (existingTimer) {
|
|
21
|
+
clearTimeout(existingTimer);
|
|
22
|
+
}
|
|
23
|
+
// Schedule new cleanup with proper cleanup handling
|
|
24
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
25
|
+
const timer = setTimeout(() => {
|
|
26
|
+
try {
|
|
27
|
+
const instance = workers.get(name);
|
|
28
|
+
if (instance && instance.metadata.status === 'stopped') {
|
|
29
|
+
Logger.info(`Auto-cleaning up stopped worker: ${name}`);
|
|
30
|
+
workers.delete(name);
|
|
31
|
+
registrations.delete(name);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
catch (error) {
|
|
35
|
+
Logger.error(`Error during auto-cleanup of worker ${name}`, error);
|
|
36
|
+
}
|
|
37
|
+
finally {
|
|
38
|
+
cleanupTimers.delete(name);
|
|
39
|
+
}
|
|
40
|
+
}, STOPPED_WORKER_CLEANUP_DELAY);
|
|
41
|
+
cleanupTimers.set(name, timer);
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* Helper: Cancel cleanup timer
|
|
45
|
+
*/
|
|
46
|
+
const cancelCleanupTimer = (name) => {
|
|
47
|
+
const timer = cleanupTimers.get(name);
|
|
48
|
+
if (timer) {
|
|
49
|
+
clearTimeout(timer);
|
|
50
|
+
cleanupTimers.delete(name);
|
|
51
|
+
}
|
|
52
|
+
};
|
|
10
53
|
/**
|
|
11
54
|
* Helper: Calculate uptime in seconds
|
|
12
55
|
*/
|
|
@@ -51,6 +94,9 @@ export const WorkerRegistry = Object.freeze({
|
|
|
51
94
|
if (!registration) {
|
|
52
95
|
throw ErrorFactory.createWorkerError(`Worker "${name}" is not registered`);
|
|
53
96
|
}
|
|
97
|
+
if (registration.activeStatus === false) {
|
|
98
|
+
throw ErrorFactory.createWorkerError(`Worker "${name}" is inactive`);
|
|
99
|
+
}
|
|
54
100
|
if (workers.has(name)) {
|
|
55
101
|
const existing = workers.get(name);
|
|
56
102
|
if (existing?.metadata.status === 'running') {
|
|
@@ -64,6 +110,8 @@ export const WorkerRegistry = Object.freeze({
|
|
|
64
110
|
const instance = await registration.factory();
|
|
65
111
|
instance.metadata.status = 'starting';
|
|
66
112
|
instance.metadata.version = version ?? '1.0.0';
|
|
113
|
+
// Cancel any pending cleanup timer when worker restarts
|
|
114
|
+
cancelCleanupTimer(name);
|
|
67
115
|
workers.set(name, instance);
|
|
68
116
|
instance.start();
|
|
69
117
|
instance.metadata.status = 'running';
|
|
@@ -96,6 +144,9 @@ export const WorkerRegistry = Object.freeze({
|
|
|
96
144
|
await instance.stop();
|
|
97
145
|
instance.metadata.status = 'stopped';
|
|
98
146
|
instance.metadata.stoppedAt = new Date();
|
|
147
|
+
AnomalyDetection.cleanup(name);
|
|
148
|
+
// Schedule automatic cleanup for stopped worker
|
|
149
|
+
scheduleStoppedWorkerCleanup(name);
|
|
99
150
|
Logger.info(`Worker "${name}" stopped successfully`);
|
|
100
151
|
}
|
|
101
152
|
catch (error) {
|
|
@@ -179,7 +230,22 @@ export const WorkerRegistry = Object.freeze({
|
|
|
179
230
|
* List all registered workers
|
|
180
231
|
*/
|
|
181
232
|
list() {
|
|
182
|
-
|
|
233
|
+
const names = [];
|
|
234
|
+
for (const [name, registration] of registrations.entries()) {
|
|
235
|
+
if (registration.activeStatus === false)
|
|
236
|
+
continue;
|
|
237
|
+
names.push(name);
|
|
238
|
+
}
|
|
239
|
+
return names;
|
|
240
|
+
},
|
|
241
|
+
/**
|
|
242
|
+
* Update active status for a registered worker
|
|
243
|
+
*/
|
|
244
|
+
setActiveStatus(name, activeStatus) {
|
|
245
|
+
const registration = registrations.get(name);
|
|
246
|
+
if (!registration)
|
|
247
|
+
return;
|
|
248
|
+
registrations.set(name, { ...registration, activeStatus });
|
|
183
249
|
},
|
|
184
250
|
/**
|
|
185
251
|
* List all running workers
|
|
@@ -293,8 +359,11 @@ export const WorkerRegistry = Object.freeze({
|
|
|
293
359
|
if (instance?.metadata.status === 'running') {
|
|
294
360
|
Logger.warn(`Worker "${name}" is still running during unregister`);
|
|
295
361
|
}
|
|
362
|
+
// Cancel any pending cleanup timer
|
|
363
|
+
cancelCleanupTimer(name);
|
|
296
364
|
workers.delete(name);
|
|
297
365
|
registrations.delete(name);
|
|
366
|
+
AnomalyDetection.cleanup(name);
|
|
298
367
|
Logger.info(`Worker "${name}" unregistered`);
|
|
299
368
|
},
|
|
300
369
|
/**
|
package/dist/WorkerShutdown.d.ts
CHANGED
|
@@ -24,6 +24,11 @@ interface IShutdownState {
|
|
|
24
24
|
startedAt: Date | null;
|
|
25
25
|
reason: string | null;
|
|
26
26
|
}
|
|
27
|
+
type DurableShutdownState = {
|
|
28
|
+
shuttingDown: boolean;
|
|
29
|
+
startedAt?: string;
|
|
30
|
+
reason?: string;
|
|
31
|
+
};
|
|
27
32
|
/**
|
|
28
33
|
* Perform graceful shutdown of all worker modules
|
|
29
34
|
*/
|
|
@@ -32,6 +37,10 @@ declare function shutdown(options?: IShutdownOptions): Promise<void>;
|
|
|
32
37
|
* Register process signal handlers for graceful shutdown
|
|
33
38
|
*/
|
|
34
39
|
declare function registerShutdownHandlers(): void;
|
|
40
|
+
/**
|
|
41
|
+
* Unregister process signal handlers (for hot reload/testing)
|
|
42
|
+
*/
|
|
43
|
+
declare function unregisterShutdownHandlers(): void;
|
|
35
44
|
/**
|
|
36
45
|
* Check if system is currently shutting down
|
|
37
46
|
*/
|
|
@@ -49,6 +58,10 @@ export declare const WorkerShutdown: Readonly<{
|
|
|
49
58
|
* Register process signal handlers for graceful shutdown
|
|
50
59
|
*/
|
|
51
60
|
registerShutdownHandlers: typeof registerShutdownHandlers;
|
|
61
|
+
/**
|
|
62
|
+
* Unregister process signal handlers (for hot reload/testing)
|
|
63
|
+
*/
|
|
64
|
+
unregisterShutdownHandlers: typeof unregisterShutdownHandlers;
|
|
52
65
|
/**
|
|
53
66
|
* Check if system is currently shutting down
|
|
54
67
|
*/
|
|
@@ -57,5 +70,13 @@ export declare const WorkerShutdown: Readonly<{
|
|
|
57
70
|
* Get current shutdown state
|
|
58
71
|
*/
|
|
59
72
|
getShutdownState: typeof getShutdownState;
|
|
73
|
+
/**
|
|
74
|
+
* Request shutdown via Durable Object (Workers)
|
|
75
|
+
*/
|
|
76
|
+
requestDurableShutdown: (reason?: string) => Promise<boolean>;
|
|
77
|
+
/**
|
|
78
|
+
* Read shutdown state from Durable Object (Workers)
|
|
79
|
+
*/
|
|
80
|
+
getDurableShutdownState: () => Promise<DurableShutdownState | null>;
|
|
60
81
|
}>;
|
|
61
82
|
export {};
|