@zintrust/workers 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +16 -1
  2. package/dist/AnomalyDetection.d.ts +4 -0
  3. package/dist/AnomalyDetection.js +8 -0
  4. package/dist/BroadcastWorker.d.ts +2 -0
  5. package/dist/CanaryController.js +49 -5
  6. package/dist/ChaosEngineering.js +13 -0
  7. package/dist/ClusterLock.js +21 -10
  8. package/dist/DeadLetterQueue.js +12 -8
  9. package/dist/MultiQueueWorker.d.ts +1 -1
  10. package/dist/MultiQueueWorker.js +12 -7
  11. package/dist/NotificationWorker.d.ts +2 -0
  12. package/dist/PriorityQueue.d.ts +2 -2
  13. package/dist/PriorityQueue.js +20 -21
  14. package/dist/ResourceMonitor.js +65 -38
  15. package/dist/WorkerFactory.d.ts +23 -3
  16. package/dist/WorkerFactory.js +420 -40
  17. package/dist/WorkerInit.js +8 -3
  18. package/dist/WorkerMetrics.d.ts +2 -1
  19. package/dist/WorkerMetrics.js +152 -93
  20. package/dist/WorkerRegistry.d.ts +6 -0
  21. package/dist/WorkerRegistry.js +70 -1
  22. package/dist/WorkerShutdown.d.ts +21 -0
  23. package/dist/WorkerShutdown.js +82 -9
  24. package/dist/WorkerShutdownDurableObject.d.ts +12 -0
  25. package/dist/WorkerShutdownDurableObject.js +41 -0
  26. package/dist/build-manifest.json +171 -99
  27. package/dist/createQueueWorker.d.ts +2 -0
  28. package/dist/createQueueWorker.js +42 -27
  29. package/dist/dashboard/types.d.ts +5 -0
  30. package/dist/dashboard/workers-api.js +136 -43
  31. package/dist/http/WorkerApiController.js +1 -0
  32. package/dist/http/WorkerController.js +133 -85
  33. package/dist/http/WorkerMonitoringService.d.ts +11 -0
  34. package/dist/http/WorkerMonitoringService.js +62 -0
  35. package/dist/http/middleware/CustomValidation.js +1 -1
  36. package/dist/http/middleware/EditWorkerValidation.d.ts +1 -1
  37. package/dist/http/middleware/EditWorkerValidation.js +7 -6
  38. package/dist/http/middleware/ProcessorPathSanitizer.js +101 -35
  39. package/dist/http/middleware/WorkerValidationChain.js +1 -0
  40. package/dist/index.d.ts +2 -1
  41. package/dist/index.js +1 -0
  42. package/dist/routes/workers.js +48 -6
  43. package/dist/storage/WorkerStore.d.ts +4 -1
  44. package/dist/storage/WorkerStore.js +55 -7
  45. package/dist/telemetry/api/TelemetryAPI.d.ts +46 -0
  46. package/dist/telemetry/api/TelemetryAPI.js +219 -0
  47. package/dist/telemetry/api/TelemetryMonitoringService.d.ts +17 -0
  48. package/dist/telemetry/api/TelemetryMonitoringService.js +113 -0
  49. package/dist/telemetry/components/AlertPanel.d.ts +1 -0
  50. package/dist/telemetry/components/AlertPanel.js +13 -0
  51. package/dist/telemetry/components/CostTracking.d.ts +1 -0
  52. package/dist/telemetry/components/CostTracking.js +14 -0
  53. package/dist/telemetry/components/ResourceUsageChart.d.ts +1 -0
  54. package/dist/telemetry/components/ResourceUsageChart.js +11 -0
  55. package/dist/telemetry/components/WorkerHealthChart.d.ts +1 -0
  56. package/dist/telemetry/components/WorkerHealthChart.js +11 -0
  57. package/dist/telemetry/index.d.ts +15 -0
  58. package/dist/telemetry/index.js +60 -0
  59. package/dist/telemetry/routes/dashboard.d.ts +6 -0
  60. package/dist/telemetry/routes/dashboard.js +608 -0
  61. package/dist/ui/router/EmbeddedAssets.d.ts +4 -0
  62. package/dist/ui/router/EmbeddedAssets.js +13 -0
  63. package/dist/ui/router/ui.js +100 -4
  64. package/package.json +9 -5
  65. package/src/AnomalyDetection.ts +9 -0
  66. package/src/CanaryController.ts +41 -5
  67. package/src/ChaosEngineering.ts +14 -0
  68. package/src/ClusterLock.ts +22 -9
  69. package/src/DeadLetterQueue.ts +13 -8
  70. package/src/MultiQueueWorker.ts +15 -8
  71. package/src/PriorityQueue.ts +21 -22
  72. package/src/ResourceMonitor.ts +72 -40
  73. package/src/WorkerFactory.ts +545 -49
  74. package/src/WorkerInit.ts +8 -3
  75. package/src/WorkerMetrics.ts +183 -105
  76. package/src/WorkerRegistry.ts +80 -1
  77. package/src/WorkerShutdown.ts +115 -9
  78. package/src/WorkerShutdownDurableObject.ts +64 -0
  79. package/src/createQueueWorker.ts +73 -30
  80. package/src/dashboard/types.ts +5 -0
  81. package/src/dashboard/workers-api.ts +165 -52
  82. package/src/http/WorkerApiController.ts +1 -0
  83. package/src/http/WorkerController.ts +167 -90
  84. package/src/http/WorkerMonitoringService.ts +77 -0
  85. package/src/http/middleware/CustomValidation.ts +1 -1
  86. package/src/http/middleware/EditWorkerValidation.ts +7 -6
  87. package/src/http/middleware/ProcessorPathSanitizer.ts +123 -36
  88. package/src/http/middleware/WorkerValidationChain.ts +1 -0
  89. package/src/index.ts +6 -1
  90. package/src/routes/workers.ts +66 -9
  91. package/src/storage/WorkerStore.ts +59 -9
  92. package/src/telemetry/api/TelemetryAPI.ts +292 -0
  93. package/src/telemetry/api/TelemetryMonitoringService.ts +149 -0
  94. package/src/telemetry/components/AlertPanel.ts +13 -0
  95. package/src/telemetry/components/CostTracking.ts +14 -0
  96. package/src/telemetry/components/ResourceUsageChart.ts +11 -0
  97. package/src/telemetry/components/WorkerHealthChart.ts +11 -0
  98. package/src/telemetry/index.ts +121 -0
  99. package/src/telemetry/public/assets/zintrust-logo.svg +15 -0
  100. package/src/telemetry/routes/dashboard.ts +638 -0
  101. package/src/telemetry/styles/tailwind.css +1 -0
  102. package/src/telemetry/styles/zintrust-theme.css +8 -0
  103. package/src/ui/router/EmbeddedAssets.ts +13 -0
  104. package/src/ui/router/ui.ts +112 -5
  105. package/src/ui/workers/index.html +2 -2
  106. package/src/ui/workers/main.js +232 -61
  107. package/src/ui/workers/zintrust.svg +30 -0
  108. package/dist/dashboard/workers-dashboard-ui.d.ts +0 -3
  109. package/dist/dashboard/workers-dashboard-ui.js +0 -1026
  110. package/dist/dashboard/workers-dashboard.d.ts +0 -4
  111. package/dist/dashboard/workers-dashboard.js +0 -904
package/src/WorkerInit.ts CHANGED
@@ -187,7 +187,7 @@ async function initialize(options: IWorkerInitOptions = {}): Promise<void> {
187
187
  async function autoStartPersistedWorkers(): Promise<void> {
188
188
  // Check if auto-start is enabled globally via environment variable
189
189
  Logger.debug('Auto-start check', {
190
- envAutoStart: process.env['WORKER_AUTO_START'],
190
+ envAutoStart: Env.getBool('WORKER_AUTO_START', false),
191
191
  configAutoStart: workersConfig.defaultWorker?.autoStart,
192
192
  });
193
193
 
@@ -204,6 +204,9 @@ async function autoStartPersistedWorkers(): Promise<void> {
204
204
  });
205
205
 
206
206
  const candidates = records.filter((record) => {
207
+ if (record.activeStatus === false) {
208
+ return false;
209
+ }
207
210
  // If autoStart is explicitly true, always include
208
211
  if (record.autoStart === true) {
209
212
  return true;
@@ -232,7 +235,8 @@ async function autoStartPersistedWorkers(): Promise<void> {
232
235
  await WorkerFactory.startFromPersisted(record.name);
233
236
  return { name: record.name, started: true, skipped: false };
234
237
  } catch (error) {
235
- Logger.warn(`Auto-start failed for worker ${record.name}`, error as Error);
238
+ const message = error instanceof Error ? error.message : String(error);
239
+ Logger.warn(`Auto-start failed for worker ${record.name}: ${message}`);
236
240
  return { name: record.name, started: false, skipped: false };
237
241
  }
238
242
  })
@@ -246,7 +250,8 @@ async function autoStartPersistedWorkers(): Promise<void> {
246
250
  skipped: skippedCount,
247
251
  });
248
252
  } catch (error) {
249
- Logger.warn('Auto-start persisted workers failed', error as Error);
253
+ const message = error instanceof Error ? error.message : String(error);
254
+ Logger.warn(`Auto-start persisted workers failed: ${message}`);
250
255
  }
251
256
  }
252
257
 
@@ -12,6 +12,7 @@ import {
12
12
  type RedisConfig,
13
13
  } from '@zintrust/core';
14
14
  import type IORedis from 'ioredis';
15
+ import type { ChainableCommander } from 'ioredis';
15
16
 
16
17
  export type MetricType =
17
18
  | 'processed'
@@ -81,8 +82,39 @@ const RETENTION = {
81
82
  monthly: 365 * 24 * 60 * 60, // 1 year
82
83
  };
83
84
 
85
+ const runInBatches = async <T>(
86
+ items: ReadonlyArray<T>,
87
+ handler: (item: T) => Promise<void>,
88
+ batchSize = 10
89
+ ): Promise<void> => {
90
+ for (let i = 0; i < items.length; i += batchSize) {
91
+ const batch = items.slice(i, i + batchSize);
92
+ // Batch processing is intentionally sequential to avoid overwhelming the system
93
+ // eslint-disable-next-line no-await-in-loop
94
+ await Promise.all(batch.map((item) => handler(item)));
95
+ }
96
+ };
97
+
84
98
  // Internal state
85
99
  let redisClient: IORedis | null = null;
100
+ let cachedConfig: RedisConfig | null = null;
101
+ let keepLoggin = 0;
102
+
103
+ /**
104
+ * Helper: Get valid Redis client
105
+ */
106
+ const getValidClient = async (): Promise<IORedis> => {
107
+ if (!cachedConfig) {
108
+ throw ErrorFactory.createWorkerError('WorkerMetrics not initialized. Call initialize() first.');
109
+ }
110
+
111
+ // If no client, create one
112
+ if (!redisClient) {
113
+ redisClient = createRedisConnection(cachedConfig);
114
+ }
115
+
116
+ return redisClient;
117
+ };
86
118
 
87
119
  /**
88
120
  * Helper: Get Redis key for metrics
@@ -213,10 +245,103 @@ const calculateHealthScore = (metrics: {
213
245
  };
214
246
  };
215
247
 
248
+ /**
249
+ * Helper: Create empty metrics result for error cases
250
+ */
251
+ const createEmptyMetrics = (
252
+ options: MetricQueryOptions,
253
+ defaultStartDate?: Date
254
+ ): AggregatedMetrics => ({
255
+ workerName: options.workerName,
256
+ metricType: options.metricType,
257
+ period: {
258
+ start: options.startDate ?? defaultStartDate ?? new Date(),
259
+ end: options.endDate ?? new Date(),
260
+ },
261
+ total: 0,
262
+ average: 0,
263
+ min: 0,
264
+ max: 0,
265
+ count: 0,
266
+ });
267
+
268
+ /**
269
+ * Helper: Handle uninitialized Redis client
270
+ */
271
+ const handleUninitializedMetrics = (optionsList: MetricQueryOptions[]): AggregatedMetrics[] => {
272
+ if (keepLoggin === 0) {
273
+ keepLoggin = 1;
274
+ Logger.warn(`[METRICS] WorkerMetrics not initialized globally. Make sure all workers running`);
275
+ }
276
+ return optionsList.map((options) => createEmptyMetrics(options));
277
+ };
278
+
279
+ /**
280
+ * Helper: Build Redis pipeline for batch metrics query
281
+ */
282
+ const buildMetricsPipeline = (
283
+ client: IORedis,
284
+ optionsList: MetricQueryOptions[]
285
+ ): ChainableCommander => {
286
+ const pipeline = client.pipeline();
287
+
288
+ for (const options of optionsList) {
289
+ const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
290
+ const key = getMetricsKey(workerName, metricType, granularity);
291
+ const minScore = startDate ? startDate.getTime() : '-inf';
292
+ const maxScore = endDate ? endDate.getTime() : '+inf';
293
+ pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
294
+ }
295
+
296
+ return pipeline;
297
+ };
298
+
299
+ /**
300
+ * Helper: Process batch results and calculate aggregations
301
+ */
302
+ const processBatchResults = (
303
+ optionsList: MetricQueryOptions[],
304
+ results: [Error | null, unknown][]
305
+ ): AggregatedMetrics[] => {
306
+ return optionsList.map((options, index) => {
307
+ const [err, data] = results[index];
308
+ if (err) {
309
+ Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
310
+ return createEmptyMetrics(options);
311
+ }
312
+
313
+ const points: MetricPoint[] = (data as string[]).map((d) => JSON.parse(d) as MetricPoint);
314
+
315
+ if (points.length === 0) {
316
+ return createEmptyMetrics(options, new Date(0));
317
+ }
318
+
319
+ const values = points.map((p) => p.value);
320
+ const total = values.reduce((sum, val) => sum + val, 0);
321
+ const average = total / values.length;
322
+ const min = Math.min(...values);
323
+ const max = Math.max(...values);
324
+
325
+ return {
326
+ workerName: options.workerName,
327
+ metricType: options.metricType,
328
+ period: {
329
+ start: points[0].timestamp,
330
+ end: points.at(-1)?.timestamp ?? new Date(),
331
+ },
332
+ total,
333
+ average,
334
+ min,
335
+ max,
336
+ count: values.length,
337
+ };
338
+ });
339
+ };
340
+
216
341
  /**
217
342
  * Worker Metrics Manager - Sealed namespace
218
343
  */
219
- export const WorkerMetrics = Object.freeze({
344
+ const WorkerMetrics = Object.freeze({
220
345
  /**
221
346
  * Initialize the metrics manager with Redis connection
222
347
  */
@@ -226,6 +351,7 @@ export const WorkerMetrics = Object.freeze({
226
351
  return;
227
352
  }
228
353
 
354
+ cachedConfig = config;
229
355
  redisClient = createRedisConnection(config);
230
356
  Logger.info('WorkerMetrics initialized');
231
357
  },
@@ -239,11 +365,7 @@ export const WorkerMetrics = Object.freeze({
239
365
  value: number,
240
366
  metadata?: Record<string, unknown>
241
367
  ): Promise<void> {
242
- if (!redisClient) {
243
- throw ErrorFactory.createWorkerError(
244
- 'WorkerMetrics not initialized. Call initialize() first.'
245
- );
246
- }
368
+ const client = await getValidClient();
247
369
 
248
370
  const now = new Date();
249
371
 
@@ -265,11 +387,10 @@ export const WorkerMetrics = Object.freeze({
265
387
  const score = roundedTimestamp.getTime();
266
388
  const data = JSON.stringify(point);
267
389
 
268
- await redisClient?.zadd(key, score, data);
390
+ await client.zadd(key, score, data);
269
391
 
270
392
  // Cleanup old metrics (lightweight: ~1% based on time slice)
271
- const client = redisClient;
272
- if (client && Date.now() % 100 === 0) {
393
+ if (Date.now() % 100 === 0) {
273
394
  cleanupOldMetrics(client, key, granularity).catch((err) => {
274
395
  Logger.error('Failed to cleanup old metrics', err);
275
396
  });
@@ -287,17 +408,25 @@ export const WorkerMetrics = Object.freeze({
287
408
  workerName: string,
288
409
  metrics: Array<{ metricType: MetricType; value: number; metadata?: Record<string, unknown> }>
289
410
  ): Promise<void> {
290
- await Promise.all(
291
- metrics.map(async (m) => WorkerMetrics.record(workerName, m.metricType, m.value, m.metadata))
292
- );
411
+ await runInBatches(metrics, async (m) => {
412
+ await WorkerMetrics.record(workerName, m.metricType, m.value, m.metadata);
413
+ });
293
414
  },
294
415
 
295
416
  /**
296
417
  * Query metrics for a time range
297
418
  */
298
419
  async query(options: MetricQueryOptions): Promise<MetricEntry> {
299
- if (!redisClient) {
300
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
420
+ if (!cachedConfig) {
421
+ Logger.warn(
422
+ `[METRICS] WorkerMetrics not initialized for worker: ${options.workerName}. Please start the worker first to enable metrics collection.`
423
+ );
424
+ return {
425
+ workerName: options.workerName,
426
+ metricType: options.metricType,
427
+ granularity: options.granularity,
428
+ points: [],
429
+ };
301
430
  }
302
431
 
303
432
  const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
@@ -307,8 +436,9 @@ export const WorkerMetrics = Object.freeze({
307
436
  const maxScore = endDate ? endDate.getTime() : '+inf';
308
437
 
309
438
  try {
439
+ const client = await getValidClient();
310
440
  // Get data from sorted set
311
- const results = await redisClient.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
441
+ const results = await client.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
312
442
 
313
443
  const points: MetricPoint[] = results.map((data) => JSON.parse(data) as MetricPoint);
314
444
 
@@ -368,20 +498,13 @@ export const WorkerMetrics = Object.freeze({
368
498
  },
369
499
 
370
500
  async aggregateBatch(optionsList: MetricQueryOptions[]): Promise<AggregatedMetrics[]> {
371
- if (!redisClient) {
372
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
501
+ if (!cachedConfig) {
502
+ return handleUninitializedMetrics(optionsList);
373
503
  }
374
504
  if (optionsList.length === 0) return [];
375
505
 
376
- const pipeline = redisClient.pipeline();
377
-
378
- for (const options of optionsList) {
379
- const { workerName, metricType, granularity, startDate, endDate, limit = 1000 } = options;
380
- const key = getMetricsKey(workerName, metricType, granularity);
381
- const minScore = startDate ? startDate.getTime() : '-inf';
382
- const maxScore = endDate ? endDate.getTime() : '+inf';
383
- pipeline.zrangebyscore(key, minScore, maxScore, 'LIMIT', 0, limit);
384
- }
506
+ const client = await getValidClient();
507
+ const pipeline = buildMetricsPipeline(client, optionsList);
385
508
 
386
509
  const results = await pipeline.exec();
387
510
 
@@ -389,66 +512,14 @@ export const WorkerMetrics = Object.freeze({
389
512
  throw ErrorFactory.createWorkerError('Failed to execute metrics pipeline');
390
513
  }
391
514
 
392
- return optionsList.map((options, index) => {
393
- const [err, data] = results[index];
394
- if (err) {
395
- Logger.error(`Error querying metrics for ${options.workerName}/${options.metricType}`, err);
396
- return {
397
- workerName: options.workerName,
398
- metricType: options.metricType,
399
- period: { start: options.startDate ?? new Date(), end: options.endDate ?? new Date() },
400
- total: 0,
401
- average: 0,
402
- min: 0,
403
- max: 0,
404
- count: 0,
405
- };
406
- }
407
-
408
- const points: MetricPoint[] = (data as string[]).map((d) => JSON.parse(d) as MetricPoint);
409
-
410
- if (points.length === 0) {
411
- return {
412
- workerName: options.workerName,
413
- metricType: options.metricType,
414
- period: { start: options.startDate ?? new Date(0), end: options.endDate ?? new Date() },
415
- total: 0,
416
- average: 0,
417
- min: 0,
418
- max: 0,
419
- count: 0,
420
- };
421
- }
422
-
423
- const values = points.map((p) => p.value);
424
- const total = values.reduce((sum, val) => sum + val, 0);
425
- const average = total / values.length;
426
- const min = Math.min(...values);
427
- const max = Math.max(...values);
428
-
429
- return {
430
- workerName: options.workerName,
431
- metricType: options.metricType,
432
- period: {
433
- start: points[0].timestamp,
434
- end: points.at(-1)?.timestamp ?? new Date(),
435
- },
436
- total,
437
- average,
438
- min,
439
- max,
440
- count: values.length,
441
- };
442
- });
515
+ return processBatchResults(optionsList, results);
443
516
  },
444
517
 
445
518
  /**
446
519
  * Calculate and store health score
447
520
  */
448
521
  async calculateHealth(workerName: string): Promise<WorkerHealthScore> {
449
- if (!redisClient) {
450
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
451
- }
522
+ const client = await getValidClient();
452
523
 
453
524
  const now = new Date();
454
525
  const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);
@@ -521,14 +592,14 @@ export const WorkerMetrics = Object.freeze({
521
592
  const score = now.getTime();
522
593
  const data = JSON.stringify(healthScore);
523
594
 
524
- await redisClient.zadd(key, score, data);
595
+ await client.zadd(key, score, data);
525
596
 
526
597
  // Keep only last 24 hours
527
598
  const cutoff = now.getTime() - 24 * 60 * 60 * 1000;
528
- await redisClient.zremrangebyscore(key, '-inf', cutoff);
599
+ await client.zremrangebyscore(key, '-inf', cutoff);
529
600
 
530
601
  // Set expiry (48 hours)
531
- await redisClient.expire(key, 48 * 60 * 60);
602
+ await client.expire(key, 48 * 60 * 60);
532
603
 
533
604
  Logger.debug(`Health score for ${workerName}: ${healthScore.score} (${healthScore.status})`);
534
605
 
@@ -546,16 +617,13 @@ export const WorkerMetrics = Object.freeze({
546
617
  workerName: string,
547
618
  hours = 24
548
619
  ): Promise<ReadonlyArray<WorkerHealthScore>> {
549
- if (!redisClient) {
550
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
551
- }
552
-
553
620
  try {
621
+ const client = await getValidClient();
554
622
  const key = getHealthKey(workerName);
555
623
  const now = Date.now();
556
624
  const startTime = now - hours * 60 * 60 * 1000;
557
625
 
558
- const results = await redisClient.zrangebyscore(key, startTime, now);
626
+ const results = await client.zrangebyscore(key, startTime, now);
559
627
 
560
628
  return results.map((data) => JSON.parse(data) as WorkerHealthScore);
561
629
  } catch (error) {
@@ -568,15 +636,12 @@ export const WorkerMetrics = Object.freeze({
568
636
  * Get latest health score
569
637
  */
570
638
  async getLatestHealth(workerName: string): Promise<WorkerHealthScore | null> {
571
- if (!redisClient) {
572
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
573
- }
574
-
575
639
  try {
640
+ const client = await getValidClient();
576
641
  const key = getHealthKey(workerName);
577
642
 
578
643
  // Get the most recent entry
579
- const results = await redisClient.zrevrange(key, 0, 0);
644
+ const results = await client.zrevrange(key, 0, 0);
580
645
 
581
646
  if (results.length === 0) {
582
647
  return null;
@@ -603,14 +668,12 @@ export const WorkerMetrics = Object.freeze({
603
668
  };
604
669
  }>
605
670
  > {
606
- if (!redisClient) {
607
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
608
- }
609
-
610
671
  try {
672
+ const client = await getValidClient();
673
+
611
674
  // Find all unique worker names from health keys
612
675
  const pattern = `${RedisKeys.healthPrefix}*`;
613
- const keys = await redisClient.keys(pattern);
676
+ const keys = await client.keys(pattern);
614
677
  const workerNames = keys.map((key) => key.replace(RedisKeys.healthPrefix, ''));
615
678
 
616
679
  const summaries = await Promise.all(
@@ -662,21 +725,19 @@ export const WorkerMetrics = Object.freeze({
662
725
  * Delete all metrics for a worker
663
726
  */
664
727
  async deleteWorkerMetrics(workerName: string): Promise<void> {
665
- if (!redisClient) {
666
- throw ErrorFactory.createWorkerError('WorkerMetrics not initialized');
667
- }
728
+ const client = await getValidClient();
668
729
 
669
730
  try {
670
731
  const pattern = `${RedisKeys.metricsPrefix}${workerName}:*`;
671
- const keys = await redisClient.keys(pattern);
732
+ const keys = await client.keys(pattern);
672
733
 
673
734
  if (keys.length > 0) {
674
- await redisClient.del(...keys);
735
+ await client.del(...keys);
675
736
  }
676
737
 
677
738
  // Also delete health scores
678
739
  const healthKey = getHealthKey(workerName);
679
- await redisClient.del(healthKey);
740
+ await client.del(healthKey);
680
741
 
681
742
  Logger.info(`Deleted all metrics for worker "${workerName}"`);
682
743
  } catch (error) {
@@ -695,11 +756,28 @@ export const WorkerMetrics = Object.freeze({
695
756
 
696
757
  Logger.info('WorkerMetrics shutting down...');
697
758
 
698
- await redisClient.quit();
759
+ // Detach client immediately to allow re-initialization
760
+ const client = redisClient;
699
761
  redisClient = null;
700
762
 
763
+ try {
764
+ // Attempt graceful quit
765
+ await client.quit();
766
+ } catch (error) {
767
+ // If graceful quit fails, force disconnect
768
+ Logger.warn('WorkerMetrics graceful shutdown failed, forcing disconnect', error);
769
+ try {
770
+ client.disconnect();
771
+ } catch (disconnectError) {
772
+ Logger.error('WorkerMetrics forced disconnect failed', disconnectError);
773
+ // Ignore disconnect errors
774
+ }
775
+ }
776
+
701
777
  Logger.info('WorkerMetrics shutdown complete');
702
778
  },
703
779
  });
704
780
 
781
+ export { WorkerMetrics };
782
+
705
783
  // Graceful shutdown handled by WorkerShutdown
@@ -5,6 +5,7 @@
5
5
  */
6
6
 
7
7
  import { ErrorFactory, Logger, type WorkerConfig, type WorkerStatus } from '@zintrust/core';
8
+ import { AnomalyDetection } from './AnomalyDetection';
8
9
 
9
10
  export type WorkerMetadata = {
10
11
  name: string;
@@ -13,6 +14,7 @@ export type WorkerMetadata = {
13
14
  region: string;
14
15
  queueName: string;
15
16
  concurrency: number;
17
+ activeStatus?: boolean;
16
18
  startedAt: Date | null;
17
19
  stoppedAt: Date | null;
18
20
  lastProcessedAt: Date | null;
@@ -46,6 +48,7 @@ export type WorkerInstance = {
46
48
  export type RegisterWorkerOptions = {
47
49
  name: string;
48
50
  config: Partial<WorkerConfig>;
51
+ activeStatus?: boolean;
49
52
  version?: string;
50
53
  region?: string;
51
54
  queues?: ReadonlyArray<string>;
@@ -75,6 +78,51 @@ type Rego = { workers: string[]; count: number };
75
78
  const workers = new Map<string, WorkerInstance>();
76
79
  const registrations = new Map<string, RegisterWorkerOptions>();
77
80
 
81
+ // Cleanup configuration
82
+ const STOPPED_WORKER_CLEANUP_DELAY = 5 * 60 * 1000; // 5 minutes
83
+ const cleanupTimers = new Map<string, NodeJS.Timeout>();
84
+
85
+ /**
86
+ * Helper: Schedule cleanup of stopped worker
87
+ */
88
+ const scheduleStoppedWorkerCleanup = (name: string): void => {
89
+ // Clear existing timer if any
90
+ const existingTimer = cleanupTimers.get(name);
91
+ if (existingTimer) {
92
+ clearTimeout(existingTimer);
93
+ }
94
+
95
+ // Schedule new cleanup with proper cleanup handling
96
+ // eslint-disable-next-line no-restricted-syntax
97
+ const timer = setTimeout(() => {
98
+ try {
99
+ const instance = workers.get(name);
100
+ if (instance && instance.metadata.status === 'stopped') {
101
+ Logger.info(`Auto-cleaning up stopped worker: ${name}`);
102
+ workers.delete(name);
103
+ registrations.delete(name);
104
+ }
105
+ } catch (error) {
106
+ Logger.error(`Error during auto-cleanup of worker ${name}`, error);
107
+ } finally {
108
+ cleanupTimers.delete(name);
109
+ }
110
+ }, STOPPED_WORKER_CLEANUP_DELAY);
111
+
112
+ cleanupTimers.set(name, timer);
113
+ };
114
+
115
+ /**
116
+ * Helper: Cancel cleanup timer
117
+ */
118
+ const cancelCleanupTimer = (name: string): void => {
119
+ const timer = cleanupTimers.get(name);
120
+ if (timer) {
121
+ clearTimeout(timer);
122
+ cleanupTimers.delete(name);
123
+ }
124
+ };
125
+
78
126
  /**
79
127
  * Helper: Calculate uptime in seconds
80
128
  */
@@ -127,6 +175,10 @@ export const WorkerRegistry = Object.freeze({
127
175
  throw ErrorFactory.createWorkerError(`Worker "${name}" is not registered`);
128
176
  }
129
177
 
178
+ if (registration.activeStatus === false) {
179
+ throw ErrorFactory.createWorkerError(`Worker "${name}" is inactive`);
180
+ }
181
+
130
182
  if (workers.has(name)) {
131
183
  const existing = workers.get(name);
132
184
  if (existing?.metadata.status === 'running') {
@@ -143,6 +195,9 @@ export const WorkerRegistry = Object.freeze({
143
195
  instance.metadata.status = 'starting';
144
196
  instance.metadata.version = version ?? '1.0.0';
145
197
 
198
+ // Cancel any pending cleanup timer when worker restarts
199
+ cancelCleanupTimer(name);
200
+
146
201
  workers.set(name, instance);
147
202
 
148
203
  instance.start();
@@ -183,6 +238,11 @@ export const WorkerRegistry = Object.freeze({
183
238
  instance.metadata.status = 'stopped';
184
239
  instance.metadata.stoppedAt = new Date();
185
240
 
241
+ AnomalyDetection.cleanup(name);
242
+
243
+ // Schedule automatic cleanup for stopped worker
244
+ scheduleStoppedWorkerCleanup(name);
245
+
186
246
  Logger.info(`Worker "${name}" stopped successfully`);
187
247
  } catch (error) {
188
248
  Logger.error(`Failed to stop worker "${name}"`, error);
@@ -280,7 +340,21 @@ export const WorkerRegistry = Object.freeze({
280
340
  * List all registered workers
281
341
  */
282
342
  list(): ReadonlyArray<string> {
283
- return Array.from(registrations.keys());
343
+ const names: string[] = [];
344
+ for (const [name, registration] of registrations.entries()) {
345
+ if (registration.activeStatus === false) continue;
346
+ names.push(name);
347
+ }
348
+ return names;
349
+ },
350
+
351
+ /**
352
+ * Update active status for a registered worker
353
+ */
354
+ setActiveStatus(name: string, activeStatus: boolean): void {
355
+ const registration = registrations.get(name);
356
+ if (!registration) return;
357
+ registrations.set(name, { ...registration, activeStatus });
284
358
  },
285
359
 
286
360
  /**
@@ -413,9 +487,14 @@ export const WorkerRegistry = Object.freeze({
413
487
  Logger.warn(`Worker "${name}" is still running during unregister`);
414
488
  }
415
489
 
490
+ // Cancel any pending cleanup timer
491
+ cancelCleanupTimer(name);
492
+
416
493
  workers.delete(name);
417
494
  registrations.delete(name);
418
495
 
496
+ AnomalyDetection.cleanup(name);
497
+
419
498
  Logger.info(`Worker "${name}" unregistered`);
420
499
  },
421
500