@zintrust/workers 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/AnomalyDetection.d.ts +4 -0
- package/dist/AnomalyDetection.js +8 -0
- package/dist/BroadcastWorker.d.ts +2 -0
- package/dist/CanaryController.js +49 -5
- package/dist/ChaosEngineering.js +13 -0
- package/dist/ClusterLock.js +21 -10
- package/dist/DeadLetterQueue.js +12 -8
- package/dist/MultiQueueWorker.d.ts +1 -1
- package/dist/MultiQueueWorker.js +12 -7
- package/dist/NotificationWorker.d.ts +2 -0
- package/dist/PriorityQueue.d.ts +2 -2
- package/dist/PriorityQueue.js +20 -21
- package/dist/ResourceMonitor.js +65 -38
- package/dist/WorkerFactory.d.ts +23 -3
- package/dist/WorkerFactory.js +420 -40
- package/dist/WorkerInit.js +8 -3
- package/dist/WorkerMetrics.d.ts +2 -1
- package/dist/WorkerMetrics.js +152 -93
- package/dist/WorkerRegistry.d.ts +6 -0
- package/dist/WorkerRegistry.js +70 -1
- package/dist/WorkerShutdown.d.ts +21 -0
- package/dist/WorkerShutdown.js +82 -9
- package/dist/WorkerShutdownDurableObject.d.ts +12 -0
- package/dist/WorkerShutdownDurableObject.js +41 -0
- package/dist/build-manifest.json +171 -99
- package/dist/createQueueWorker.d.ts +2 -0
- package/dist/createQueueWorker.js +42 -27
- package/dist/dashboard/types.d.ts +5 -0
- package/dist/dashboard/workers-api.js +136 -43
- package/dist/http/WorkerApiController.js +1 -0
- package/dist/http/WorkerController.js +133 -85
- package/dist/http/WorkerMonitoringService.d.ts +11 -0
- package/dist/http/WorkerMonitoringService.js +62 -0
- package/dist/http/middleware/CustomValidation.js +1 -1
- package/dist/http/middleware/EditWorkerValidation.d.ts +1 -1
- package/dist/http/middleware/EditWorkerValidation.js +7 -6
- package/dist/http/middleware/ProcessorPathSanitizer.js +101 -35
- package/dist/http/middleware/WorkerValidationChain.js +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +1 -0
- package/dist/routes/workers.js +48 -6
- package/dist/storage/WorkerStore.d.ts +4 -1
- package/dist/storage/WorkerStore.js +55 -7
- package/dist/telemetry/api/TelemetryAPI.d.ts +46 -0
- package/dist/telemetry/api/TelemetryAPI.js +219 -0
- package/dist/telemetry/api/TelemetryMonitoringService.d.ts +17 -0
- package/dist/telemetry/api/TelemetryMonitoringService.js +113 -0
- package/dist/telemetry/components/AlertPanel.d.ts +1 -0
- package/dist/telemetry/components/AlertPanel.js +13 -0
- package/dist/telemetry/components/CostTracking.d.ts +1 -0
- package/dist/telemetry/components/CostTracking.js +14 -0
- package/dist/telemetry/components/ResourceUsageChart.d.ts +1 -0
- package/dist/telemetry/components/ResourceUsageChart.js +11 -0
- package/dist/telemetry/components/WorkerHealthChart.d.ts +1 -0
- package/dist/telemetry/components/WorkerHealthChart.js +11 -0
- package/dist/telemetry/index.d.ts +15 -0
- package/dist/telemetry/index.js +60 -0
- package/dist/telemetry/routes/dashboard.d.ts +6 -0
- package/dist/telemetry/routes/dashboard.js +608 -0
- package/dist/ui/router/EmbeddedAssets.d.ts +4 -0
- package/dist/ui/router/EmbeddedAssets.js +13 -0
- package/dist/ui/router/ui.js +100 -4
- package/package.json +9 -5
- package/src/AnomalyDetection.ts +9 -0
- package/src/CanaryController.ts +41 -5
- package/src/ChaosEngineering.ts +14 -0
- package/src/ClusterLock.ts +22 -9
- package/src/DeadLetterQueue.ts +13 -8
- package/src/MultiQueueWorker.ts +15 -8
- package/src/PriorityQueue.ts +21 -22
- package/src/ResourceMonitor.ts +72 -40
- package/src/WorkerFactory.ts +545 -49
- package/src/WorkerInit.ts +8 -3
- package/src/WorkerMetrics.ts +183 -105
- package/src/WorkerRegistry.ts +80 -1
- package/src/WorkerShutdown.ts +115 -9
- package/src/WorkerShutdownDurableObject.ts +64 -0
- package/src/createQueueWorker.ts +73 -30
- package/src/dashboard/types.ts +5 -0
- package/src/dashboard/workers-api.ts +165 -52
- package/src/http/WorkerApiController.ts +1 -0
- package/src/http/WorkerController.ts +167 -90
- package/src/http/WorkerMonitoringService.ts +77 -0
- package/src/http/middleware/CustomValidation.ts +1 -1
- package/src/http/middleware/EditWorkerValidation.ts +7 -6
- package/src/http/middleware/ProcessorPathSanitizer.ts +123 -36
- package/src/http/middleware/WorkerValidationChain.ts +1 -0
- package/src/index.ts +6 -1
- package/src/routes/workers.ts +66 -9
- package/src/storage/WorkerStore.ts +59 -9
- package/src/telemetry/api/TelemetryAPI.ts +292 -0
- package/src/telemetry/api/TelemetryMonitoringService.ts +149 -0
- package/src/telemetry/components/AlertPanel.ts +13 -0
- package/src/telemetry/components/CostTracking.ts +14 -0
- package/src/telemetry/components/ResourceUsageChart.ts +11 -0
- package/src/telemetry/components/WorkerHealthChart.ts +11 -0
- package/src/telemetry/index.ts +121 -0
- package/src/telemetry/public/assets/zintrust-logo.svg +15 -0
- package/src/telemetry/routes/dashboard.ts +638 -0
- package/src/telemetry/styles/tailwind.css +1 -0
- package/src/telemetry/styles/zintrust-theme.css +8 -0
- package/src/ui/router/EmbeddedAssets.ts +13 -0
- package/src/ui/router/ui.ts +112 -5
- package/src/ui/workers/index.html +2 -2
- package/src/ui/workers/main.js +232 -61
- package/src/ui/workers/zintrust.svg +30 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +0 -3
- package/dist/dashboard/workers-dashboard-ui.js +0 -1026
- package/dist/dashboard/workers-dashboard.d.ts +0 -4
- package/dist/dashboard/workers-dashboard.js +0 -904
package/src/WorkerInit.ts
CHANGED
|
@@ -187,7 +187,7 @@ async function initialize(options: IWorkerInitOptions = {}): Promise<void> {
|
|
|
187
187
|
async function autoStartPersistedWorkers(): Promise<void> {
|
|
188
188
|
// Check if auto-start is enabled globally via environment variable
|
|
189
189
|
Logger.debug('Auto-start check', {
|
|
190
|
-
envAutoStart:
|
|
190
|
+
envAutoStart: Env.getBool('WORKER_AUTO_START', false),
|
|
191
191
|
configAutoStart: workersConfig.defaultWorker?.autoStart,
|
|
192
192
|
});
|
|
193
193
|
|
|
@@ -204,6 +204,9 @@ async function autoStartPersistedWorkers(): Promise<void> {
|
|
|
204
204
|
});
|
|
205
205
|
|
|
206
206
|
const candidates = records.filter((record) => {
|
|
207
|
+
if (record.activeStatus === false) {
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
207
210
|
// If autoStart is explicitly true, always include
|
|
208
211
|
if (record.autoStart === true) {
|
|
209
212
|
return true;
|
|
@@ -232,7 +235,8 @@ async function autoStartPersistedWorkers(): Promise<void> {
|
|
|
232
235
|
await WorkerFactory.startFromPersisted(record.name);
|
|
233
236
|
return { name: record.name, started: true, skipped: false };
|
|
234
237
|
} catch (error) {
|
|
235
|
-
|
|
238
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
239
|
+
Logger.warn(`Auto-start failed for worker ${record.name}: ${message}`);
|
|
236
240
|
return { name: record.name, started: false, skipped: false };
|
|
237
241
|
}
|
|
238
242
|
})
|
|
@@ -246,7 +250,8 @@ async function autoStartPersistedWorkers(): Promise<void> {
|
|
|
246
250
|
skipped: skippedCount,
|
|
247
251
|
});
|
|
248
252
|
} catch (error) {
|
|
249
|
-
|
|
253
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
254
|
+
Logger.warn(`Auto-start persisted workers failed: ${message}`);
|
|
250
255
|
}
|
|
251
256
|
}
|
|
252
257
|
|
package/src/WorkerMetrics.ts
CHANGED
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
type RedisConfig,
|
|
13
13
|
} from '@zintrust/core';
|
|
14
14
|
import type IORedis from 'ioredis';
|
|
15
|
+
import type { ChainableCommander } from 'ioredis';
|
|
15
16
|
|
|
16
17
|
export type MetricType =
|
|
17
18
|
| 'processed'
|
|
@@ -81,8 +82,39 @@ const RETENTION = {
|
|
|
81
82
|
monthly: 365 * 24 * 60 * 60, // 1 year
|
|
82
83
|
};
|
|
83
84
|
|
|
85
|
+
const runInBatches = async <T>(
|
|
86
|
+
items: ReadonlyArray<T>,
|
|
87
|
+
handler: (item: T) => Promise<void>,
|
|
88
|
+
batchSize = 10
|
|
89
|
+
): Promise<void> => {
|
|
90
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
91
|
+
const batch = items.slice(i, i + batchSize);
|
|
92
|
+
// Batch processing is intentionally sequential to avoid overwhelming the system
|
|
93
|
+
// eslint-disable-next-line no-await-in-loop
|
|
94
|
+
await Promise.all(batch.map((item) => handler(item)));
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
|
|
84
98
|
// Internal state
|
|
85
99
|
let redisClient: IORedis | null = null;
|
|
100
|
+
let cachedConfig: RedisConfig | null = null;
|
|
101
|
+
let keepLoggin = 0;
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Helper: Get valid Redis client
|
|
105
|
+
*/
|
|
106
|
+
const getValidClient = async (): Promise<IORedis> => {
|
|
107
|
+
if (!cachedConfig) {
|
|
108
|
+
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized. Call initialize() first.');
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// If no client, create one
|
|
112
|
+
if (!redisClient) {
|
|
113
|
+
redisClient = createRedisConnection(cachedConfig);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return redisClient;
|
|
117
|
+
};
|
|
86
118
|
|
|
87
119
|
/**
|
|
88
120
|
* Helper: Get Redis key for metrics
|
|
@@ -213,10 +245,103 @@ const calculateHealthScore = (metrics: {
|
|
|
213
245
|
};
|
|
214
246
|
};
|
|
215
247
|
|
|
248
|
+
/**
|
|
249
|
+
* Helper: Create empty metrics result for error cases
|
|
250
|
+
*/
|
|
251
|
+
const createEmptyMetrics = (
|
|
252
|
+
options: MetricQueryOptions,
|
|
253
|
+
defaultStartDate?: Date
|
|
254
|
+
): AggregatedMetrics => ({
|
|
255
|
+
workerName: options.workerName,
|
|
256
|
+
metricType: options.metricType,
|
|
257
|
+
period: {
|
|
258
|
+
start: options.startDate ?? defaultStartDate ?? new Date(),
|
|
259
|
+
end: options.endDate ?? new Date(),
|
|
260
|
+
},
|
|
261
|
+
total: 0,
|
|
262
|
+
average: 0,
|
|
263
|
+
min: 0,
|
|
264
|
+
max: 0,
|
|
265
|
+
count: 0,
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Helper: Handle uninitialized Redis client
|
|
270
|
+
*/
|
|
271
|
+
const handleUninitializedMetrics = (optionsList: MetricQueryOptions[]): AggregatedMetrics[] => {
|
|
272
|
+
if (keepLoggin === 0) {
|
|
273
|
+
keepLoggin = 1;
|
|
274
|
+
Logger.warn(`[METRICS] WorkerMetrics not initialized globally. Make sure all workers running`);
|
|
275
|
+
}
|
|
276
|
+
return optionsList.map((options) => createEmptyMetrics(options));
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Helper: Build Redis pipeline for batch metrics query
|
|
281
|
+
*/
|
|
282
|
+
const buildMetricsPipeline = (
|
|
283
|
+
client: IORedis,
|
|
284
|
+
optionsList: MetricQueryOptions[]
|
|
285
|
+
): ChainableCommander => {
|
|
286
|
+
const pipeline = client.pipeline();
|
|
287
|
+
|
|
288
|
+
for (const options of optionsList) {
|
|
289
|
+
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
290
|
+
const key = getMetricsKey(workerName, metricType, granularity);
|
|
291
|
+
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
292
|
+
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
293
|
+
pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return pipeline;
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Helper: Process batch results and calculate aggregations
|
|
301
|
+
*/
|
|
302
|
+
const processBatchResults = (
|
|
303
|
+
optionsList: MetricQueryOptions[],
|
|
304
|
+
results: [Error | null, unknown][]
|
|
305
|
+
): AggregatedMetrics[] => {
|
|
306
|
+
return optionsList.map((options, index) => {
|
|
307
|
+
const [err, data] = results[index];
|
|
308
|
+
if (err) {
|
|
309
|
+
Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
|
|
310
|
+
return createEmptyMetrics(options);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const points: MetricPoint[] = (data as string[]).map((d) => JSON.parse(d) as MetricPoint);
|
|
314
|
+
|
|
315
|
+
if (points.length === 0) {
|
|
316
|
+
return createEmptyMetrics(options, new Date(0));
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const values = points.map((p) => p.value);
|
|
320
|
+
const total = values.reduce((sum, val) => sum + val, 0);
|
|
321
|
+
const average = total / values.length;
|
|
322
|
+
const min = Math.min(...values);
|
|
323
|
+
const max = Math.max(...values);
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
workerName: options.workerName,
|
|
327
|
+
metricType: options.metricType,
|
|
328
|
+
period: {
|
|
329
|
+
start: points[0].timestamp,
|
|
330
|
+
end: points.at(-1)?.timestamp ?? new Date(),
|
|
331
|
+
},
|
|
332
|
+
total,
|
|
333
|
+
average,
|
|
334
|
+
min,
|
|
335
|
+
max,
|
|
336
|
+
count: values.length,
|
|
337
|
+
};
|
|
338
|
+
});
|
|
339
|
+
};
|
|
340
|
+
|
|
216
341
|
/**
|
|
217
342
|
* Worker Metrics Manager - Sealed namespace
|
|
218
343
|
*/
|
|
219
|
-
|
|
344
|
+
const WorkerMetrics = Object.freeze({
|
|
220
345
|
/**
|
|
221
346
|
* Initialize the metrics manager with Redis connection
|
|
222
347
|
*/
|
|
@@ -226,6 +351,7 @@ export const WorkerMetrics = Object.freeze({
|
|
|
226
351
|
return;
|
|
227
352
|
}
|
|
228
353
|
|
|
354
|
+
cachedConfig = config;
|
|
229
355
|
redisClient = createRedisConnection(config);
|
|
230
356
|
Logger.info('WorkerMetrics initialized');
|
|
231
357
|
},
|
|
@@ -239,11 +365,7 @@ export const WorkerMetrics = Object.freeze({
|
|
|
239
365
|
value: number,
|
|
240
366
|
metadata?: Record<string, unknown>
|
|
241
367
|
): Promise<void> {
|
|
242
|
-
|
|
243
|
-
throw ErrorFactory.createWorkerError(
|
|
244
|
-
'WorkerMetrics not initialized. Call initialize() first.'
|
|
245
|
-
);
|
|
246
|
-
}
|
|
368
|
+
const client = await getValidClient();
|
|
247
369
|
|
|
248
370
|
const now = new Date();
|
|
249
371
|
|
|
@@ -265,11 +387,10 @@ export const WorkerMetrics = Object.freeze({
|
|
|
265
387
|
const score = roundedTimestamp.getTime();
|
|
266
388
|
const data = JSON.stringify(point);
|
|
267
389
|
|
|
268
|
-
await
|
|
390
|
+
await client.zadd(key, score, data);
|
|
269
391
|
|
|
270
392
|
// Cleanup old metrics (lightweight: ~1% based on time slice)
|
|
271
|
-
|
|
272
|
-
if (client && Date.now() % 100 === 0) {
|
|
393
|
+
if (Date.now() % 100 === 0) {
|
|
273
394
|
cleanupOldMetrics(client, key, granularity).catch((err) => {
|
|
274
395
|
Logger.error('Failed to cleanup old metrics', err);
|
|
275
396
|
});
|
|
@@ -287,17 +408,25 @@ export const WorkerMetrics = Object.freeze({
|
|
|
287
408
|
workerName: string,
|
|
288
409
|
metrics: Array<{ metricType: MetricType; value: number; metadata?: Record<string, unknown> }>
|
|
289
410
|
): Promise<void> {
|
|
290
|
-
await
|
|
291
|
-
|
|
292
|
-
);
|
|
411
|
+
await runInBatches(metrics, async (m) => {
|
|
412
|
+
await WorkerMetrics.record(workerName, m.metricType, m.value, m.metadata);
|
|
413
|
+
});
|
|
293
414
|
},
|
|
294
415
|
|
|
295
416
|
/**
|
|
296
417
|
* Query metrics for a time range
|
|
297
418
|
*/
|
|
298
419
|
async query(options: MetricQueryOptions): Promise<MetricEntry> {
|
|
299
|
-
if (!
|
|
300
|
-
|
|
420
|
+
if (!cachedConfig) {
|
|
421
|
+
Logger.warn(
|
|
422
|
+
`[METRICS] WorkerMetrics not initialized for worker: ${options.workerName}. Please start the worker first to enable metrics collection.`
|
|
423
|
+
);
|
|
424
|
+
return {
|
|
425
|
+
workerName: options.workerName,
|
|
426
|
+
metricType: options.metricType,
|
|
427
|
+
granularity: options.granularity,
|
|
428
|
+
points: [],
|
|
429
|
+
};
|
|
301
430
|
}
|
|
302
431
|
|
|
303
432
|
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
@@ -307,8 +436,9 @@ export const WorkerMetrics = Object.freeze({
|
|
|
307
436
|
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
308
437
|
|
|
309
438
|
try {
|
|
439
|
+
const client = await getValidClient();
|
|
310
440
|
// Get data from sorted set
|
|
311
|
-
const results = await
|
|
441
|
+
const results = await client.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
312
442
|
|
|
313
443
|
const points: MetricPoint[] = results.map((data) => JSON.parse(data) as MetricPoint);
|
|
314
444
|
|
|
@@ -368,20 +498,13 @@ export const WorkerMetrics = Object.freeze({
|
|
|
368
498
|
},
|
|
369
499
|
|
|
370
500
|
async aggregateBatch(optionsList: MetricQueryOptions[]): Promise<AggregatedMetrics[]> {
|
|
371
|
-
if (!
|
|
372
|
-
|
|
501
|
+
if (!cachedConfig) {
|
|
502
|
+
return handleUninitializedMetrics(optionsList);
|
|
373
503
|
}
|
|
374
504
|
if (optionsList.length === 0) return [];
|
|
375
505
|
|
|
376
|
-
const
|
|
377
|
-
|
|
378
|
-
for (const options of optionsList) {
|
|
379
|
-
const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
|
|
380
|
-
const key = getMetricsKey(workerName, metricType, granularity);
|
|
381
|
-
const minScore = startDate ? startDate.getTime() : '-inf';
|
|
382
|
-
const maxScore = endDate ? endDate.getTime() : '+inf';
|
|
383
|
-
pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
|
|
384
|
-
}
|
|
506
|
+
const client = await getValidClient();
|
|
507
|
+
const pipeline = buildMetricsPipeline(client, optionsList);
|
|
385
508
|
|
|
386
509
|
const results = await pipeline.exec();
|
|
387
510
|
|
|
@@ -389,66 +512,14 @@ export const WorkerMetrics = Object.freeze({
|
|
|
389
512
|
throw ErrorFactory.createWorkerError('Failed to execute metrics pipeline');
|
|
390
513
|
}
|
|
391
514
|
|
|
392
|
-
return optionsList
|
|
393
|
-
const [err, data] = results[index];
|
|
394
|
-
if (err) {
|
|
395
|
-
Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
|
|
396
|
-
return {
|
|
397
|
-
workerName: options.workerName,
|
|
398
|
-
metricType: options.metricType,
|
|
399
|
-
period: { start: options.startDate ?? new Date(), end: options.endDate ?? new Date() },
|
|
400
|
-
total: 0,
|
|
401
|
-
average: 0,
|
|
402
|
-
min: 0,
|
|
403
|
-
max: 0,
|
|
404
|
-
count: 0,
|
|
405
|
-
};
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
const points: MetricPoint[] = (data as string[]).map((d) => JSON.parse(d) as MetricPoint);
|
|
409
|
-
|
|
410
|
-
if (points.length === 0) {
|
|
411
|
-
return {
|
|
412
|
-
workerName: options.workerName,
|
|
413
|
-
metricType: options.metricType,
|
|
414
|
-
period: { start: options.startDate ?? new Date(0), end: options.endDate ?? new Date() },
|
|
415
|
-
total: 0,
|
|
416
|
-
average: 0,
|
|
417
|
-
min: 0,
|
|
418
|
-
max: 0,
|
|
419
|
-
count: 0,
|
|
420
|
-
};
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
const values = points.map((p) => p.value);
|
|
424
|
-
const total = values.reduce((sum, val) => sum + val, 0);
|
|
425
|
-
const average = total / values.length;
|
|
426
|
-
const min = Math.min(...values);
|
|
427
|
-
const max = Math.max(...values);
|
|
428
|
-
|
|
429
|
-
return {
|
|
430
|
-
workerName: options.workerName,
|
|
431
|
-
metricType: options.metricType,
|
|
432
|
-
period: {
|
|
433
|
-
start: points[0].timestamp,
|
|
434
|
-
end: points.at(-1)?.timestamp ?? new Date(),
|
|
435
|
-
},
|
|
436
|
-
total,
|
|
437
|
-
average,
|
|
438
|
-
min,
|
|
439
|
-
max,
|
|
440
|
-
count: values.length,
|
|
441
|
-
};
|
|
442
|
-
});
|
|
515
|
+
return processBatchResults(optionsList, results);
|
|
443
516
|
},
|
|
444
517
|
|
|
445
518
|
/**
|
|
446
519
|
* Calculate and store health score
|
|
447
520
|
*/
|
|
448
521
|
async calculateHealth(workerName: string): Promise<WorkerHealthScore> {
|
|
449
|
-
|
|
450
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
451
|
-
}
|
|
522
|
+
const client = await getValidClient();
|
|
452
523
|
|
|
453
524
|
const now = new Date();
|
|
454
525
|
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);
|
|
@@ -521,14 +592,14 @@ export const WorkerMetrics = Object.freeze({
|
|
|
521
592
|
const score = now.getTime();
|
|
522
593
|
const data = JSON.stringify(healthScore);
|
|
523
594
|
|
|
524
|
-
await
|
|
595
|
+
await client.zadd(key, score, data);
|
|
525
596
|
|
|
526
597
|
// Keep only last 24 hours
|
|
527
598
|
const cutoff = now.getTime() - 24 * 60 * 60 * 1000;
|
|
528
|
-
await
|
|
599
|
+
await client.zremrangebyscore(key, '-inf', cutoff);
|
|
529
600
|
|
|
530
601
|
// Set expiry (48 hours)
|
|
531
|
-
await
|
|
602
|
+
await client.expire(key, 48 * 60 * 60);
|
|
532
603
|
|
|
533
604
|
Logger.debug(`Health score for ${workerName}: ${healthScore.score} (${healthScore.status})`);
|
|
534
605
|
|
|
@@ -546,16 +617,13 @@ export const WorkerMetrics = Object.freeze({
|
|
|
546
617
|
workerName: string,
|
|
547
618
|
hours = 24
|
|
548
619
|
): Promise<ReadonlyArray<WorkerHealthScore>> {
|
|
549
|
-
if (!redisClient) {
|
|
550
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
551
|
-
}
|
|
552
|
-
|
|
553
620
|
try {
|
|
621
|
+
const client = await getValidClient();
|
|
554
622
|
const key = getHealthKey(workerName);
|
|
555
623
|
const now = Date.now();
|
|
556
624
|
const startTime = now - hours * 60 * 60 * 1000;
|
|
557
625
|
|
|
558
|
-
const results = await
|
|
626
|
+
const results = await client.zrangebyscore(key, startTime, now);
|
|
559
627
|
|
|
560
628
|
return results.map((data) => JSON.parse(data) as WorkerHealthScore);
|
|
561
629
|
} catch (error) {
|
|
@@ -568,15 +636,12 @@ export const WorkerMetrics = Object.freeze({
|
|
|
568
636
|
* Get latest health score
|
|
569
637
|
*/
|
|
570
638
|
async getLatestHealth(workerName: string): Promise<WorkerHealthScore | null> {
|
|
571
|
-
if (!redisClient) {
|
|
572
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
573
|
-
}
|
|
574
|
-
|
|
575
639
|
try {
|
|
640
|
+
const client = await getValidClient();
|
|
576
641
|
const key = getHealthKey(workerName);
|
|
577
642
|
|
|
578
643
|
// Get the most recent entry
|
|
579
|
-
const results = await
|
|
644
|
+
const results = await client.zrevrange(key, 0, 0);
|
|
580
645
|
|
|
581
646
|
if (results.length === 0) {
|
|
582
647
|
return null;
|
|
@@ -603,14 +668,12 @@ export const WorkerMetrics = Object.freeze({
|
|
|
603
668
|
};
|
|
604
669
|
}>
|
|
605
670
|
> {
|
|
606
|
-
if (!redisClient) {
|
|
607
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
608
|
-
}
|
|
609
|
-
|
|
610
671
|
try {
|
|
672
|
+
const client = await getValidClient();
|
|
673
|
+
|
|
611
674
|
// Find all unique worker names from health keys
|
|
612
675
|
const pattern = `${RedisKeys.healthPrefix}*`;
|
|
613
|
-
const keys = await
|
|
676
|
+
const keys = await client.keys(pattern);
|
|
614
677
|
const workerNames = keys.map((key) => key.replace(RedisKeys.healthPrefix, ''));
|
|
615
678
|
|
|
616
679
|
const summaries = await Promise.all(
|
|
@@ -662,21 +725,19 @@ export const WorkerMetrics = Object.freeze({
|
|
|
662
725
|
* Delete all metrics for a worker
|
|
663
726
|
*/
|
|
664
727
|
async deleteWorkerMetrics(workerName: string): Promise<void> {
|
|
665
|
-
|
|
666
|
-
throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
|
|
667
|
-
}
|
|
728
|
+
const client = await getValidClient();
|
|
668
729
|
|
|
669
730
|
try {
|
|
670
731
|
const pattern = `${RedisKeys.metricsPrefix}${workerName}:*`;
|
|
671
|
-
const keys = await
|
|
732
|
+
const keys = await client.keys(pattern);
|
|
672
733
|
|
|
673
734
|
if (keys.length > 0) {
|
|
674
|
-
await
|
|
735
|
+
await client.del(...keys);
|
|
675
736
|
}
|
|
676
737
|
|
|
677
738
|
// Also delete health scores
|
|
678
739
|
const healthKey = getHealthKey(workerName);
|
|
679
|
-
await
|
|
740
|
+
await client.del(healthKey);
|
|
680
741
|
|
|
681
742
|
Logger.info(`Deleted all metrics for worker "${workerName}"`);
|
|
682
743
|
} catch (error) {
|
|
@@ -695,11 +756,28 @@ export const WorkerMetrics = Object.freeze({
|
|
|
695
756
|
|
|
696
757
|
Logger.info('WorkerMetrics shutting down...');
|
|
697
758
|
|
|
698
|
-
|
|
759
|
+
// Detach client immediately to allow re-initialization
|
|
760
|
+
const client = redisClient;
|
|
699
761
|
redisClient = null;
|
|
700
762
|
|
|
763
|
+
try {
|
|
764
|
+
// Attempt graceful quit
|
|
765
|
+
await client.quit();
|
|
766
|
+
} catch (error) {
|
|
767
|
+
// If graceful quit fails, force disconnect
|
|
768
|
+
Logger.warn('WorkerMetrics graceful shutdown failed, forcing disconnect', error);
|
|
769
|
+
try {
|
|
770
|
+
client.disconnect();
|
|
771
|
+
} catch (disconnectError) {
|
|
772
|
+
Logger.error('WorkerMetrics forced disconnect failed', disconnectError);
|
|
773
|
+
// Ignore disconnect errors
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
701
777
|
Logger.info('WorkerMetrics shutdown complete');
|
|
702
778
|
},
|
|
703
779
|
});
|
|
704
780
|
|
|
781
|
+
export { WorkerMetrics };
|
|
782
|
+
|
|
705
783
|
// Graceful shutdown handled by WorkerShutdown
|
package/src/WorkerRegistry.ts
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import { ErrorFactory, Logger, type WorkerConfig, type WorkerStatus } from '@zintrust/core';
|
|
8
|
+
import { AnomalyDetection } from './AnomalyDetection';
|
|
8
9
|
|
|
9
10
|
export type WorkerMetadata = {
|
|
10
11
|
name: string;
|
|
@@ -13,6 +14,7 @@ export type WorkerMetadata = {
|
|
|
13
14
|
region: string;
|
|
14
15
|
queueName: string;
|
|
15
16
|
concurrency: number;
|
|
17
|
+
activeStatus?: boolean;
|
|
16
18
|
startedAt: Date | null;
|
|
17
19
|
stoppedAt: Date | null;
|
|
18
20
|
lastProcessedAt: Date | null;
|
|
@@ -46,6 +48,7 @@ export type WorkerInstance = {
|
|
|
46
48
|
export type RegisterWorkerOptions = {
|
|
47
49
|
name: string;
|
|
48
50
|
config: Partial<WorkerConfig>;
|
|
51
|
+
activeStatus?: boolean;
|
|
49
52
|
version?: string;
|
|
50
53
|
region?: string;
|
|
51
54
|
queues?: ReadonlyArray<string>;
|
|
@@ -75,6 +78,51 @@ type Rego = { workers: string[]; count: number };
|
|
|
75
78
|
const workers = new Map<string, WorkerInstance>();
|
|
76
79
|
const registrations = new Map<string, RegisterWorkerOptions>();
|
|
77
80
|
|
|
81
|
+
// Cleanup configuration
|
|
82
|
+
const STOPPED_WORKER_CLEANUP_DELAY = 5 * 60 * 1000; // 5 minutes
|
|
83
|
+
const cleanupTimers = new Map<string, NodeJS.Timeout>();
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Helper: Schedule cleanup of stopped worker
|
|
87
|
+
*/
|
|
88
|
+
const scheduleStoppedWorkerCleanup = (name: string): void => {
|
|
89
|
+
// Clear existing timer if any
|
|
90
|
+
const existingTimer = cleanupTimers.get(name);
|
|
91
|
+
if (existingTimer) {
|
|
92
|
+
clearTimeout(existingTimer);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Schedule new cleanup with proper cleanup handling
|
|
96
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
97
|
+
const timer = setTimeout(() => {
|
|
98
|
+
try {
|
|
99
|
+
const instance = workers.get(name);
|
|
100
|
+
if (instance && instance.metadata.status === 'stopped') {
|
|
101
|
+
Logger.info(`Auto-cleaning up stopped worker: ${name}`);
|
|
102
|
+
workers.delete(name);
|
|
103
|
+
registrations.delete(name);
|
|
104
|
+
}
|
|
105
|
+
} catch (error) {
|
|
106
|
+
Logger.error(`Error during auto-cleanup of worker ${name}`, error);
|
|
107
|
+
} finally {
|
|
108
|
+
cleanupTimers.delete(name);
|
|
109
|
+
}
|
|
110
|
+
}, STOPPED_WORKER_CLEANUP_DELAY);
|
|
111
|
+
|
|
112
|
+
cleanupTimers.set(name, timer);
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Helper: Cancel cleanup timer
|
|
117
|
+
*/
|
|
118
|
+
const cancelCleanupTimer = (name: string): void => {
|
|
119
|
+
const timer = cleanupTimers.get(name);
|
|
120
|
+
if (timer) {
|
|
121
|
+
clearTimeout(timer);
|
|
122
|
+
cleanupTimers.delete(name);
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
|
|
78
126
|
/**
|
|
79
127
|
* Helper: Calculate uptime in seconds
|
|
80
128
|
*/
|
|
@@ -127,6 +175,10 @@ export const WorkerRegistry = Object.freeze({
|
|
|
127
175
|
throw ErrorFactory.createWorkerError(`Worker "${name}" is not registered`);
|
|
128
176
|
}
|
|
129
177
|
|
|
178
|
+
if (registration.activeStatus === false) {
|
|
179
|
+
throw ErrorFactory.createWorkerError(`Worker "${name}" is inactive`);
|
|
180
|
+
}
|
|
181
|
+
|
|
130
182
|
if (workers.has(name)) {
|
|
131
183
|
const existing = workers.get(name);
|
|
132
184
|
if (existing?.metadata.status === 'running') {
|
|
@@ -143,6 +195,9 @@ export const WorkerRegistry = Object.freeze({
|
|
|
143
195
|
instance.metadata.status = 'starting';
|
|
144
196
|
instance.metadata.version = version ?? '1.0.0';
|
|
145
197
|
|
|
198
|
+
// Cancel any pending cleanup timer when worker restarts
|
|
199
|
+
cancelCleanupTimer(name);
|
|
200
|
+
|
|
146
201
|
workers.set(name, instance);
|
|
147
202
|
|
|
148
203
|
instance.start();
|
|
@@ -183,6 +238,11 @@ export const WorkerRegistry = Object.freeze({
|
|
|
183
238
|
instance.metadata.status = 'stopped';
|
|
184
239
|
instance.metadata.stoppedAt = new Date();
|
|
185
240
|
|
|
241
|
+
AnomalyDetection.cleanup(name);
|
|
242
|
+
|
|
243
|
+
// Schedule automatic cleanup for stopped worker
|
|
244
|
+
scheduleStoppedWorkerCleanup(name);
|
|
245
|
+
|
|
186
246
|
Logger.info(`Worker "${name}" stopped successfully`);
|
|
187
247
|
} catch (error) {
|
|
188
248
|
Logger.error(`Failed to stop worker "${name}"`, error);
|
|
@@ -280,7 +340,21 @@ export const WorkerRegistry = Object.freeze({
|
|
|
280
340
|
* List all registered workers
|
|
281
341
|
*/
|
|
282
342
|
list(): ReadonlyArray<string> {
|
|
283
|
-
|
|
343
|
+
const names: string[] = [];
|
|
344
|
+
for (const [name, registration] of registrations.entries()) {
|
|
345
|
+
if (registration.activeStatus === false) continue;
|
|
346
|
+
names.push(name);
|
|
347
|
+
}
|
|
348
|
+
return names;
|
|
349
|
+
},
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Update active status for a registered worker
|
|
353
|
+
*/
|
|
354
|
+
setActiveStatus(name: string, activeStatus: boolean): void {
|
|
355
|
+
const registration = registrations.get(name);
|
|
356
|
+
if (!registration) return;
|
|
357
|
+
registrations.set(name, { ...registration, activeStatus });
|
|
284
358
|
},
|
|
285
359
|
|
|
286
360
|
/**
|
|
@@ -413,9 +487,14 @@ export const WorkerRegistry = Object.freeze({
|
|
|
413
487
|
Logger.warn(`Worker "${name}" is still running during unregister`);
|
|
414
488
|
}
|
|
415
489
|
|
|
490
|
+
// Cancel any pending cleanup timer
|
|
491
|
+
cancelCleanupTimer(name);
|
|
492
|
+
|
|
416
493
|
workers.delete(name);
|
|
417
494
|
registrations.delete(name);
|
|
418
495
|
|
|
496
|
+
AnomalyDetection.cleanup(name);
|
|
497
|
+
|
|
419
498
|
Logger.info(`Worker "${name}" unregistered`);
|
|
420
499
|
},
|
|
421
500
|
|