@zintrust/workers 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/README.md +861 -0
  2. package/dist/AnomalyDetection.d.ts +102 -0
  3. package/dist/AnomalyDetection.js +321 -0
  4. package/dist/AutoScaler.d.ts +127 -0
  5. package/dist/AutoScaler.js +425 -0
  6. package/dist/BroadcastWorker.d.ts +21 -0
  7. package/dist/BroadcastWorker.js +24 -0
  8. package/dist/CanaryController.d.ts +103 -0
  9. package/dist/CanaryController.js +380 -0
  10. package/dist/ChaosEngineering.d.ts +79 -0
  11. package/dist/ChaosEngineering.js +216 -0
  12. package/dist/CircuitBreaker.d.ts +106 -0
  13. package/dist/CircuitBreaker.js +374 -0
  14. package/dist/ClusterLock.d.ts +90 -0
  15. package/dist/ClusterLock.js +385 -0
  16. package/dist/ComplianceManager.d.ts +177 -0
  17. package/dist/ComplianceManager.js +556 -0
  18. package/dist/DatacenterOrchestrator.d.ts +133 -0
  19. package/dist/DatacenterOrchestrator.js +404 -0
  20. package/dist/DeadLetterQueue.d.ts +122 -0
  21. package/dist/DeadLetterQueue.js +539 -0
  22. package/dist/HealthMonitor.d.ts +42 -0
  23. package/dist/HealthMonitor.js +301 -0
  24. package/dist/MultiQueueWorker.d.ts +89 -0
  25. package/dist/MultiQueueWorker.js +277 -0
  26. package/dist/NotificationWorker.d.ts +21 -0
  27. package/dist/NotificationWorker.js +23 -0
  28. package/dist/Observability.d.ts +153 -0
  29. package/dist/Observability.js +530 -0
  30. package/dist/PluginManager.d.ts +123 -0
  31. package/dist/PluginManager.js +392 -0
  32. package/dist/PriorityQueue.d.ts +117 -0
  33. package/dist/PriorityQueue.js +244 -0
  34. package/dist/ResourceMonitor.d.ts +164 -0
  35. package/dist/ResourceMonitor.js +605 -0
  36. package/dist/SLAMonitor.d.ts +110 -0
  37. package/dist/SLAMonitor.js +274 -0
  38. package/dist/WorkerFactory.d.ts +193 -0
  39. package/dist/WorkerFactory.js +1507 -0
  40. package/dist/WorkerInit.d.ts +85 -0
  41. package/dist/WorkerInit.js +223 -0
  42. package/dist/WorkerMetrics.d.ts +114 -0
  43. package/dist/WorkerMetrics.js +509 -0
  44. package/dist/WorkerRegistry.d.ts +145 -0
  45. package/dist/WorkerRegistry.js +319 -0
  46. package/dist/WorkerShutdown.d.ts +61 -0
  47. package/dist/WorkerShutdown.js +159 -0
  48. package/dist/WorkerVersioning.d.ts +107 -0
  49. package/dist/WorkerVersioning.js +300 -0
  50. package/dist/build-manifest.json +462 -0
  51. package/dist/config/workerConfig.d.ts +3 -0
  52. package/dist/config/workerConfig.js +19 -0
  53. package/dist/createQueueWorker.d.ts +23 -0
  54. package/dist/createQueueWorker.js +113 -0
  55. package/dist/dashboard/index.d.ts +1 -0
  56. package/dist/dashboard/index.js +1 -0
  57. package/dist/dashboard/types.d.ts +117 -0
  58. package/dist/dashboard/types.js +1 -0
  59. package/dist/dashboard/workers-api.d.ts +4 -0
  60. package/dist/dashboard/workers-api.js +638 -0
  61. package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
  62. package/dist/dashboard/workers-dashboard-ui.js +1026 -0
  63. package/dist/dashboard/workers-dashboard.d.ts +4 -0
  64. package/dist/dashboard/workers-dashboard.js +904 -0
  65. package/dist/helper/index.d.ts +5 -0
  66. package/dist/helper/index.js +10 -0
  67. package/dist/http/WorkerApiController.d.ts +38 -0
  68. package/dist/http/WorkerApiController.js +312 -0
  69. package/dist/http/WorkerController.d.ts +374 -0
  70. package/dist/http/WorkerController.js +1351 -0
  71. package/dist/http/middleware/CustomValidation.d.ts +92 -0
  72. package/dist/http/middleware/CustomValidation.js +270 -0
  73. package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
  74. package/dist/http/middleware/DatacenterValidator.js +94 -0
  75. package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
  76. package/dist/http/middleware/EditWorkerValidation.js +55 -0
  77. package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
  78. package/dist/http/middleware/FeaturesValidator.js +60 -0
  79. package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
  80. package/dist/http/middleware/InfrastructureValidator.js +226 -0
  81. package/dist/http/middleware/OptionsValidator.d.ts +3 -0
  82. package/dist/http/middleware/OptionsValidator.js +112 -0
  83. package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
  84. package/dist/http/middleware/PayloadSanitizer.js +42 -0
  85. package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
  86. package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
  87. package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
  88. package/dist/http/middleware/QueueNameSanitizer.js +45 -0
  89. package/dist/http/middleware/ValidateDriver.d.ts +7 -0
  90. package/dist/http/middleware/ValidateDriver.js +20 -0
  91. package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
  92. package/dist/http/middleware/VersionSanitizer.js +25 -0
  93. package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
  94. package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
  95. package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
  96. package/dist/http/middleware/WorkerValidationChain.js +185 -0
  97. package/dist/index.d.ts +46 -0
  98. package/dist/index.js +48 -0
  99. package/dist/routes/workers.d.ts +12 -0
  100. package/dist/routes/workers.js +81 -0
  101. package/dist/storage/WorkerStore.d.ts +45 -0
  102. package/dist/storage/WorkerStore.js +195 -0
  103. package/dist/type.d.ts +76 -0
  104. package/dist/type.js +1 -0
  105. package/dist/ui/router/ui.d.ts +3 -0
  106. package/dist/ui/router/ui.js +83 -0
  107. package/dist/ui/types/worker-ui.d.ts +229 -0
  108. package/dist/ui/types/worker-ui.js +5 -0
  109. package/package.json +53 -0
  110. package/src/AnomalyDetection.ts +434 -0
  111. package/src/AutoScaler.ts +654 -0
  112. package/src/BroadcastWorker.ts +34 -0
  113. package/src/CanaryController.ts +531 -0
  114. package/src/ChaosEngineering.ts +301 -0
  115. package/src/CircuitBreaker.ts +495 -0
  116. package/src/ClusterLock.ts +499 -0
  117. package/src/ComplianceManager.ts +815 -0
  118. package/src/DatacenterOrchestrator.ts +561 -0
  119. package/src/DeadLetterQueue.ts +733 -0
  120. package/src/HealthMonitor.ts +390 -0
  121. package/src/MultiQueueWorker.ts +431 -0
  122. package/src/NotificationWorker.ts +33 -0
  123. package/src/Observability.ts +696 -0
  124. package/src/PluginManager.ts +551 -0
  125. package/src/PriorityQueue.ts +351 -0
  126. package/src/ResourceMonitor.ts +769 -0
  127. package/src/SLAMonitor.ts +408 -0
  128. package/src/WorkerFactory.ts +2108 -0
  129. package/src/WorkerInit.ts +313 -0
  130. package/src/WorkerMetrics.ts +709 -0
  131. package/src/WorkerRegistry.ts +443 -0
  132. package/src/WorkerShutdown.ts +210 -0
  133. package/src/WorkerVersioning.ts +422 -0
  134. package/src/config/workerConfig.ts +25 -0
  135. package/src/createQueueWorker.ts +174 -0
  136. package/src/dashboard/index.ts +6 -0
  137. package/src/dashboard/types.ts +141 -0
  138. package/src/dashboard/workers-api.ts +785 -0
  139. package/src/dashboard/zintrust.svg +30 -0
  140. package/src/helper/index.ts +11 -0
  141. package/src/http/WorkerApiController.ts +369 -0
  142. package/src/http/WorkerController.ts +1512 -0
  143. package/src/http/middleware/CustomValidation.ts +360 -0
  144. package/src/http/middleware/DatacenterValidator.ts +124 -0
  145. package/src/http/middleware/EditWorkerValidation.ts +74 -0
  146. package/src/http/middleware/FeaturesValidator.ts +82 -0
  147. package/src/http/middleware/InfrastructureValidator.ts +295 -0
  148. package/src/http/middleware/OptionsValidator.ts +144 -0
  149. package/src/http/middleware/PayloadSanitizer.ts +52 -0
  150. package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
  151. package/src/http/middleware/QueueNameSanitizer.ts +55 -0
  152. package/src/http/middleware/ValidateDriver.ts +29 -0
  153. package/src/http/middleware/VersionSanitizer.ts +30 -0
  154. package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
  155. package/src/http/middleware/WorkerValidationChain.ts +230 -0
  156. package/src/index.ts +98 -0
  157. package/src/routes/workers.ts +154 -0
  158. package/src/storage/WorkerStore.ts +240 -0
  159. package/src/type.ts +89 -0
  160. package/src/types/queue-monitor.d.ts +38 -0
  161. package/src/types/queue-redis.d.ts +38 -0
  162. package/src/ui/README.md +13 -0
  163. package/src/ui/components/JsonEditor.js +670 -0
  164. package/src/ui/components/JsonViewer.js +387 -0
  165. package/src/ui/components/WorkerCard.js +178 -0
  166. package/src/ui/components/WorkerExpandPanel.js +257 -0
  167. package/src/ui/components/fetcher.js +42 -0
  168. package/src/ui/components/sla-scorecard.js +32 -0
  169. package/src/ui/components/styles.css +30 -0
  170. package/src/ui/components/table-expander.js +34 -0
  171. package/src/ui/integration/worker-ui-integration.js +565 -0
  172. package/src/ui/router/ui.ts +99 -0
  173. package/src/ui/services/workerApi.js +240 -0
  174. package/src/ui/types/worker-ui.ts +283 -0
  175. package/src/ui/utils/jsonValidator.js +444 -0
  176. package/src/ui/workers/index.html +202 -0
  177. package/src/ui/workers/main.js +1781 -0
  178. package/src/ui/workers/styles.css +1350 -0
@@ -0,0 +1,2108 @@
1
+ /**
2
+ * Worker Factory
3
+ * Central factory for creating workers with all advanced features
4
+ * Sealed namespace for immutability
5
+ */
6
+
7
+ import {
8
+ appConfig,
9
+ createRedisConnection,
10
+ databaseConfig,
11
+ Env,
12
+ ErrorFactory,
13
+ getBullMQSafeQueueName,
14
+ Logger,
15
+ NodeSingletons,
16
+ queueConfig,
17
+ registerDatabasesFromRuntimeConfig,
18
+ useEnsureDbConnected,
19
+ workersConfig,
20
+ type IDatabase,
21
+ type RedisConfig,
22
+ type WorkerStatus,
23
+ } from '@zintrust/core';
24
+ import { Worker, type Job, type WorkerOptions } from 'bullmq';
25
+ import { AutoScaler, type AutoScalerConfig } from './AutoScaler';
26
+ import { CanaryController } from './CanaryController';
27
+ import { CircuitBreaker } from './CircuitBreaker';
28
+ import { ClusterLock } from './ClusterLock';
29
+ import { ComplianceManager, type ComplianceConfig } from './ComplianceManager';
30
+ import { DatacenterOrchestrator } from './DatacenterOrchestrator';
31
+ import { DeadLetterQueue, type RetentionPolicy } from './DeadLetterQueue';
32
+ import { HealthMonitor } from './HealthMonitor';
33
+ import { MultiQueueWorker } from './MultiQueueWorker';
34
+ import { Observability, type ObservabilityConfig } from './Observability';
35
+ import { PluginManager } from './PluginManager';
36
+ import { PriorityQueue } from './PriorityQueue';
37
+ import { ResourceMonitor } from './ResourceMonitor';
38
+ import { WorkerMetrics } from './WorkerMetrics';
39
+ import { WorkerRegistry, type WorkerInstance as RegistryWorkerInstance } from './WorkerRegistry';
40
+ import { WorkerVersioning } from './WorkerVersioning';
41
+ import {
42
+ DbWorkerStore,
43
+ InMemoryWorkerStore,
44
+ RedisWorkerStore,
45
+ type WorkerRecord,
46
+ type WorkerStore,
47
+ } from './storage/WorkerStore';
48
+
49
+ const path = NodeSingletons.path;
50
+
51
+ const getStoreForWorker = async (
52
+ config: WorkerFactoryConfig | undefined,
53
+ persistenceOverride?: WorkerPersistenceConfig
54
+ ): Promise<WorkerStore> => {
55
+ if (persistenceOverride) {
56
+ return resolveWorkerStoreForPersistence(persistenceOverride);
57
+ }
58
+
59
+ // If worker has specific configuration, use it
60
+ if (config) {
61
+ const persistence = resolvePersistenceConfig(config);
62
+ if (persistence) {
63
+ return resolveWorkerStoreForPersistence(persistence);
64
+ }
65
+ }
66
+
67
+ // Fallback to default/global store
68
+ await ensureWorkerStoreConfigured();
69
+ return workerStore;
70
+ };
71
+
72
+ const validateAndGetStore = async (
73
+ name: string,
74
+ config: WorkerFactoryConfig | undefined,
75
+ persistenceOverride?: WorkerPersistenceConfig
76
+ ): Promise<WorkerStore> => {
77
+ const store = await getStoreForWorker(config, persistenceOverride);
78
+ const record = await store.get(name);
79
+ if (!record) {
80
+ throw ErrorFactory.createNotFoundError(
81
+ `Worker "${name}" not found in the specified driver. Ensure you are addressing the correct storage backend.`
82
+ );
83
+ }
84
+ return store;
85
+ };
86
+
87
+ // Worker creation status enum for proper lifecycle management
88
+ export const WorkerCreationStatus = {
89
+ CREATING: 'creating', // Initial state - worker is being created
90
+ CONNECTING: 'connecting', // Connecting to Redis/Queue
91
+ STARTING: 'starting', // Starting BullMQ worker
92
+ RUNNING: 'running', // Actually processing jobs
93
+ FAILED: 'failed', // Connection/startup failed
94
+ STOPPED: 'stopped', // Intentionally stopped
95
+ } as const;
96
+
97
+ export type WorkerCreationStatus = (typeof WorkerCreationStatus)[keyof typeof WorkerCreationStatus];
98
+
99
+ // Internal initialization state to prevent memory leaks and redundant calls
100
+ let clusteringInitialized = false;
101
+ let metricsInitialized = false;
102
+ let autoScalingInitialized = false;
103
+ let deadLetterQueueInitialized = false;
104
+ let resourceMonitoringInitialized = false;
105
+ let complianceInitialized = false;
106
+ let observabilityInitialized = false;
107
+
108
+ export type WorkerFactoryConfig = {
109
+ name: string;
110
+ version?: string;
111
+ queueName: string;
112
+ processor: (job: Job) => Promise<unknown>;
113
+ processorPath?: string;
114
+ options?: WorkerOptions;
115
+ autoStart?: boolean;
116
+ infrastructure?: {
117
+ redis?: RedisConfigInput;
118
+ persistence?: WorkerPersistenceConfig;
119
+ deadLetterQueue?: {
120
+ redis?: RedisConfigInput;
121
+ policy: RetentionPolicy;
122
+ };
123
+ compliance?: {
124
+ redis?: RedisConfigInput;
125
+ config?: Partial<ComplianceConfig>;
126
+ };
127
+ observability?: ObservabilityConfigInput;
128
+ autoScaler?: AutoScalerConfig;
129
+ };
130
+ features?: {
131
+ clustering?: boolean;
132
+ metrics?: boolean;
133
+ autoScaling?: boolean;
134
+ circuitBreaker?: boolean;
135
+ deadLetterQueue?: boolean;
136
+ resourceMonitoring?: boolean;
137
+ compliance?: boolean;
138
+ observability?: boolean;
139
+ plugins?: boolean;
140
+ versioning?: boolean;
141
+ datacenterOrchestration?: boolean;
142
+ };
143
+ datacenter?: {
144
+ primaryRegion: string;
145
+ secondaryRegions?: string[];
146
+ affinityRules?: {
147
+ preferLocal?: boolean;
148
+ maxLatency?: number;
149
+ avoidRegions?: string[];
150
+ };
151
+ };
152
+ };
153
+
154
+ export type WorkerInstance = {
155
+ worker: Worker;
156
+ config: WorkerFactoryConfig;
157
+ startedAt: Date;
158
+ status: WorkerCreationStatus;
159
+ lastHealthCheck?: Date;
160
+ connectionState?: 'disconnected' | 'connecting' | 'connected' | 'error';
161
+ };
162
+
163
+ type RedisEnvConfig = {
164
+ env: true;
165
+ host?: string;
166
+ port?: string;
167
+ password?: string;
168
+ db?: string;
169
+ };
170
+
171
+ type RedisConfigInput = RedisConfig | RedisEnvConfig;
172
+
173
+ export type WorkerPersistenceConfig =
174
+ | { driver: 'memory' }
175
+ | { driver: 'redis'; redis?: RedisConfigInput; keyPrefix?: string }
176
+ | { driver: 'database'; client?: IDatabase | string; connection?: string; table?: string };
177
+
178
+ type ObservabilityConfigInput =
179
+ | ObservabilityConfig
180
+ | {
181
+ enabled?: boolean;
182
+ prometheus?: Partial<ObservabilityConfig['prometheus']>;
183
+ openTelemetry?: Partial<ObservabilityConfig['openTelemetry']>;
184
+ datadog?: Partial<ObservabilityConfig['datadog']>;
185
+ };
186
+
187
+ // Internal state
188
+ const workers = new Map<string, WorkerInstance>();
189
+ let workerStore: WorkerStore = InMemoryWorkerStore.create();
190
+ let workerStoreConfigured = false;
191
+ let workerStoreConfig: WorkerPersistenceConfig | null = null;
192
+ type ProcessorResolver = (
193
+ name: string
194
+ ) =>
195
+ | WorkerFactoryConfig['processor']
196
+ | undefined
197
+ | Promise<WorkerFactoryConfig['processor'] | undefined>;
198
+
199
+ const processorRegistry = new Map<string, WorkerFactoryConfig['processor']>();
200
+ const processorPathRegistry = new Map<string, string>();
201
+ const processorResolvers: ProcessorResolver[] = [];
202
+
203
+ const buildPersistenceBootstrapConfig = (): WorkerFactoryConfig => {
204
+ const driver = Env.get('WORKER_PERSISTENCE_DRIVER', 'memory') as 'memory' | 'redis' | 'database';
205
+
206
+ const config: WorkerFactoryConfig = {
207
+ name: '__zintrust_persistence_bootstrap__',
208
+ queueName: '__zintrust_bootstrap__',
209
+ processor: async () => undefined,
210
+ infrastructure: {
211
+ persistence: {
212
+ driver,
213
+ },
214
+ },
215
+ };
216
+
217
+ // Add Redis config if using Redis persistence
218
+ if (driver === 'redis') {
219
+ config.infrastructure = {
220
+ ...config.infrastructure,
221
+ redis: queueConfig.drivers.redis,
222
+ };
223
+ }
224
+
225
+ return config;
226
+ };
227
+
228
+ const registerProcessor = (name: string, processor: WorkerFactoryConfig['processor']): void => {
229
+ processorRegistry.set(name, processor);
230
+ };
231
+
232
+ const registerProcessors = (processors: Record<string, WorkerFactoryConfig['processor']>): void => {
233
+ Object.entries(processors).forEach(([name, processor]) => {
234
+ if (typeof processor === 'function') {
235
+ processorRegistry.set(name, processor);
236
+ }
237
+ });
238
+ };
239
+
240
+ const registerProcessorPaths = (paths: Record<string, string>): void => {
241
+ Object.entries(paths).forEach(([name, modulePath]) => {
242
+ if (typeof modulePath === 'string' && modulePath.trim().length > 0) {
243
+ processorPathRegistry.set(name, modulePath);
244
+ }
245
+ });
246
+ };
247
+
248
+ const registerProcessorResolver = (resolver: ProcessorResolver): void => {
249
+ processorResolvers.push(resolver);
250
+ };
251
+
252
+ const decodeProcessorPathEntities = (value: string): string =>
253
+ value
254
+ .replaceAll(/&#x2F;/gi, '/')
255
+ .replaceAll('&#47;', '/')
256
+ .replaceAll(/&sol;/gi, '/');
257
+
258
+ const waitForWorkerConnection = async (
259
+ worker: Worker,
260
+ name: string,
261
+ _queueName: string,
262
+ timeoutMs: number
263
+ ): Promise<void> => {
264
+ const startTime = Date.now();
265
+ const checkInterval = 100; // 100ms between checks
266
+ let timeoutId: NodeJS.Timeout | null = null;
267
+
268
+ return new Promise<void>((resolve, reject) => {
269
+ const checkConnection = async (): Promise<void> => {
270
+ try {
271
+ // Check if worker is actually running
272
+ const isRunning = await worker.isRunning();
273
+ if (!isRunning) {
274
+ throw ErrorFactory.createWorkerError('Worker not running');
275
+ }
276
+
277
+ // Check Redis connection
278
+ const client = await worker.client;
279
+ const pingResult = await client.ping();
280
+ if (pingResult !== 'PONG') {
281
+ throw ErrorFactory.createWorkerError('Redis ping failed');
282
+ }
283
+
284
+ // Removed heavy Queue instantiation loop - relying on Redis ping for connectivity check
285
+ // The queue instance creation was causing memory pressure and potential connection leaks in this retry loop
286
+
287
+ Logger.debug(`Worker health verification passed for ${name}`, {
288
+ isRunning,
289
+ pingResult,
290
+ });
291
+
292
+ if (timeoutId) clearTimeout(timeoutId);
293
+ resolve();
294
+ return;
295
+ } catch (error) {
296
+ Logger.debug(`Worker health verification failed for ${name}, retrying...`, error);
297
+
298
+ // Check timeout
299
+ if (Date.now() - startTime >= timeoutMs) {
300
+ if (timeoutId) clearTimeout(timeoutId);
301
+ reject(
302
+ ErrorFactory.createWorkerError(
303
+ 'Worker failed health verification within timeout period'
304
+ )
305
+ );
306
+ return;
307
+ }
308
+
309
+ // Schedule next check
310
+ timeoutId = globalThis.setTimeout(checkConnection, checkInterval);
311
+ }
312
+ };
313
+
314
+ // Start checking
315
+ checkConnection();
316
+ });
317
+ };
318
+
319
+ const startHealthMonitoring = (name: string, worker: Worker, queueName: string): void => {
320
+ HealthMonitor.register(name, worker, queueName);
321
+ };
322
+
323
+ const sanitizeProcessorPath = (value: string): string => {
324
+ const decoded = decodeProcessorPathEntities(value);
325
+ const base = decoded.split(/[?#&]/)[0]?.trim() ?? '';
326
+ if (!base) return '';
327
+ const isAbsolutePath = base.startsWith('/') || /^[A-Za-z]:[\\/]/.test(base);
328
+ const relativePath = base.startsWith('.') ? base : `./${base}`;
329
+ return isAbsolutePath ? base : path.resolve(process.cwd(), relativePath);
330
+ };
331
+
332
+ const resolveProcessorFromPath = async (
333
+ modulePath: string
334
+ ): Promise<WorkerFactoryConfig['processor'] | undefined> => {
335
+ const trimmed = modulePath.trim();
336
+ if (!trimmed) return undefined;
337
+
338
+ const resolved = sanitizeProcessorPath(trimmed);
339
+ if (!resolved) return undefined;
340
+
341
+ try {
342
+ const mod = await import(resolved);
343
+ const candidate = mod?.default ?? mod?.processor ?? mod?.handler ?? mod?.handle;
344
+
345
+ if (typeof candidate !== 'function') {
346
+ Logger.warn(
347
+ `Module imported from ${resolved} but no valid processor function found (exported: ${Object.keys(mod)})`
348
+ );
349
+ }
350
+
351
+ return typeof candidate === 'function'
352
+ ? (candidate as WorkerFactoryConfig['processor'])
353
+ : undefined;
354
+ } catch (err) {
355
+ Logger.error(`Failed to import processor from path: ${resolved}`, err);
356
+ return undefined;
357
+ }
358
+ };
359
+
360
+ const resolveProcessor = async (
361
+ name: string
362
+ ): Promise<WorkerFactoryConfig['processor'] | undefined> => {
363
+ const direct = processorRegistry.get(name);
364
+ if (direct) return direct;
365
+
366
+ const pathHint = processorPathRegistry.get(name);
367
+ if (pathHint) {
368
+ try {
369
+ const resolved = await resolveProcessorFromPath(pathHint);
370
+ if (resolved) return resolved;
371
+ } catch (error) {
372
+ Logger.error(`Failed to resolve processor module for "${name}"`, error);
373
+ }
374
+ }
375
+
376
+ const resolverResults = await Promise.all(
377
+ processorResolvers.map(async (resolver) => {
378
+ try {
379
+ return await resolver(name);
380
+ } catch (error) {
381
+ Logger.error(`Processor resolver failed for "${name}"`, error);
382
+ return undefined;
383
+ }
384
+ })
385
+ );
386
+
387
+ const resolvedFromResolvers = resolverResults.find((result) => result !== undefined);
388
+ if (resolvedFromResolvers) return resolvedFromResolvers;
389
+
390
+ return undefined;
391
+ };
392
+
393
+ const resolveProcessorPath = async (
394
+ modulePath: string
395
+ ): Promise<WorkerFactoryConfig['processor'] | undefined> => resolveProcessorFromPath(modulePath);
396
+
397
+ const recordMetricSafely = (
398
+ workerName: string,
399
+ metricType: Parameters<typeof WorkerMetrics.record>[1],
400
+ value: number,
401
+ metadata?: Record<string, unknown>
402
+ ): void => {
403
+ WorkerMetrics.record(workerName, metricType, value, metadata).catch((error) => {
404
+ Logger.error(`Failed to record worker metric: ${workerName}/${metricType}`, error);
405
+ });
406
+ };
407
+
408
+ type BeforeProcessHookOutcome = {
409
+ skip: boolean;
410
+ reason?: string;
411
+ jobData?: unknown;
412
+ };
413
+
414
+ const ensureCircuitAllowsExecution = (
415
+ workerName: string,
416
+ version: string,
417
+ jobId: string | number | undefined,
418
+ features?: WorkerFactoryConfig['features']
419
+ ): void => {
420
+ if (!(features?.circuitBreaker ?? false)) return;
421
+
422
+ const canExecute = CircuitBreaker.canExecute(workerName, version);
423
+ if (canExecute) return;
424
+
425
+ const state = CircuitBreaker.getState(workerName, version);
426
+ Logger.warn('Circuit breaker is open, rejecting job', {
427
+ workerName,
428
+ version,
429
+ jobId,
430
+ circuitState: state?.state,
431
+ });
432
+
433
+ CircuitBreaker.recordRejection(workerName, version);
434
+ throw ErrorFactory.createGeneralError(`Circuit breaker is open for ${workerName}@${version}`);
435
+ };
436
+
437
+ const runBeforeProcessHooks = async (
438
+ workerName: string,
439
+ job: Job,
440
+ features?: WorkerFactoryConfig['features']
441
+ ): Promise<BeforeProcessHookOutcome> => {
442
+ if (!(features?.plugins ?? false)) {
443
+ return { skip: false, jobData: job.data };
444
+ }
445
+
446
+ const hookResult = await PluginManager.executeHook('beforeProcess', {
447
+ workerName,
448
+ jobId: job.id ?? '',
449
+ jobData: job.data,
450
+ timestamp: new Date(),
451
+ });
452
+
453
+ if (hookResult.stopped) {
454
+ const errorMessage = hookResult.errors[0]?.error?.message ?? 'Stopped by plugin';
455
+ Logger.info('Job processing stopped by plugin', {
456
+ workerName,
457
+ jobId: job.id,
458
+ reason: errorMessage,
459
+ });
460
+ return { skip: true, reason: errorMessage };
461
+ }
462
+
463
+ if (hookResult.modified) {
464
+ return { skip: false, jobData: hookResult.context.jobData };
465
+ }
466
+
467
+ return { skip: false, jobData: job.data };
468
+ };
469
+
470
+ const startProcessingSpan = (
471
+ workerName: string,
472
+ version: string,
473
+ job: Job,
474
+ queueName: string,
475
+ features?: WorkerFactoryConfig['features']
476
+ ): string | null => {
477
+ if (!(features?.observability ?? false)) return null;
478
+
479
+ return Observability.startSpan(`worker.${workerName}.process`, {
480
+ attributes: {
481
+ worker_name: workerName,
482
+ worker_version: version,
483
+ job_id: job.id ?? '',
484
+ queue_name: queueName,
485
+ },
486
+ });
487
+ };
488
+
489
+ const usePluginManager = async (
490
+ workerName: string,
491
+ job: { id: string; data: unknown },
492
+ result: unknown
493
+ ): Promise<void> => {
494
+ await PluginManager.executeHook('afterProcess', {
495
+ workerName,
496
+ jobId: job.id ?? '',
497
+ jobData: job.data,
498
+ metadata: { result },
499
+ timestamp: new Date(),
500
+ });
501
+
502
+ await PluginManager.executeHook('onComplete', {
503
+ workerName,
504
+ jobId: job.id ?? '',
505
+ jobData: job.data,
506
+ metadata: { result },
507
+ timestamp: new Date(),
508
+ });
509
+ };
510
+
511
+ const handleSuccess = async (params: {
512
+ workerName: string;
513
+ jobVersion: string;
514
+ job: Job;
515
+ result: unknown;
516
+ duration: number;
517
+ spanId: string | null;
518
+ features?: WorkerFactoryConfig['features'];
519
+ }): Promise<void> => {
520
+ const { workerName, jobVersion, job, result, duration, spanId, features } = params;
521
+
522
+ if (features?.metrics ?? false) {
523
+ recordMetricSafely(workerName, 'processed', 1);
524
+ recordMetricSafely(workerName, 'duration', duration);
525
+ }
526
+
527
+ if (features?.circuitBreaker ?? false) {
528
+ CircuitBreaker.recordSuccess(workerName, jobVersion);
529
+ }
530
+
531
+ if (features?.observability ?? false) {
532
+ Observability.recordJobMetrics(workerName, job.name, {
533
+ processed: 1,
534
+ failed: 0,
535
+ durationMs: duration,
536
+ });
537
+ if (spanId !== null) {
538
+ Observability.endSpan(spanId, { success: true });
539
+ }
540
+ }
541
+
542
+ if (features?.plugins ?? false) {
543
+ await usePluginManager(workerName, { id: job.id ?? '', data: job.data }, result);
544
+ }
545
+ };
546
+
547
+ const recordFailureMetrics = (
548
+ workerName: string,
549
+ _jobVersion: string,
550
+ duration: number,
551
+ features?: WorkerFactoryConfig['features']
552
+ ): void => {
553
+ if (features?.metrics === true) {
554
+ recordMetricSafely(workerName, 'errors', 1);
555
+ recordMetricSafely(workerName, 'duration', duration);
556
+ }
557
+ };
558
+
559
+ const recordFailureObservability = (
560
+ workerName: string,
561
+ jobName: string,
562
+ duration: number,
563
+ spanId: string | null,
564
+ features?: WorkerFactoryConfig['features']
565
+ ): void => {
566
+ if (features?.observability === true) {
567
+ Observability.recordJobMetrics(workerName, jobName, {
568
+ processed: 0,
569
+ failed: 1,
570
+ durationMs: duration,
571
+ });
572
+ if (spanId !== null) {
573
+ Observability.recordSpanError(
574
+ spanId,
575
+ ErrorFactory.createGeneralError('Job processing failed')
576
+ );
577
+ Observability.endSpan(spanId, { success: false });
578
+ }
579
+ }
580
+ };
581
+
582
+ const addFailedJobToDeadLetterQueue = async (
583
+ workerName: string,
584
+ job: Job,
585
+ error: Error,
586
+ duration: number,
587
+ jobVersion: string,
588
+ queueName: string,
589
+ features?: WorkerFactoryConfig['features']
590
+ ): Promise<void> => {
591
+ if (features?.deadLetterQueue === true) {
592
+ await DeadLetterQueue.addFailedJob({
593
+ id: job.id ?? '',
594
+ queueName,
595
+ workerName,
596
+ jobName: job.name,
597
+ data: job.data,
598
+ error: {
599
+ name: error.name,
600
+ message: error.message,
601
+ stack: error.stack,
602
+ },
603
+ attemptsMade: job.attemptsMade ?? 0,
604
+ maxAttempts: job.opts.attempts ?? 0,
605
+ failedAt: new Date(),
606
+ firstAttemptAt: new Date(job.timestamp ?? Date.now()),
607
+ lastAttemptAt: new Date(),
608
+ processingTime: duration,
609
+ metadata: {
610
+ version: jobVersion,
611
+ },
612
+ complianceFlags: {
613
+ containsPII: false,
614
+ containsPHI: false,
615
+ dataClassification: 'public',
616
+ },
617
+ });
618
+ }
619
+ };
620
+
621
+ const executeFailurePlugins = async (
622
+ workerName: string,
623
+ job: Job,
624
+ error: Error,
625
+ features?: WorkerFactoryConfig['features']
626
+ ): Promise<void> => {
627
+ if (features?.plugins === true) {
628
+ await PluginManager.executeHook('onError', {
629
+ workerName,
630
+ jobId: job.id ?? '',
631
+ jobData: job.data,
632
+ error,
633
+ timestamp: new Date(),
634
+ });
635
+ }
636
+ };
637
+
638
+ const recordCircuitBreakerFailure = (
639
+ workerName: string,
640
+ jobVersion: string,
641
+ error: Error,
642
+ features?: WorkerFactoryConfig['features']
643
+ ): void => {
644
+ if (features?.circuitBreaker === true) {
645
+ CircuitBreaker.recordFailure(workerName, jobVersion, error);
646
+ }
647
+ };
648
+
649
+ const logAndRecordFailure = (
650
+ workerName: string,
651
+ jobVersion: string,
652
+ job: Job,
653
+ error: Error,
654
+ features?: WorkerFactoryConfig['features']
655
+ ): void => {
656
+ Logger.error(
657
+ `Worker job failed: ${workerName}`,
658
+ { error, jobId: job.id, version: jobVersion },
659
+ 'workers'
660
+ );
661
+ recordCircuitBreakerFailure(workerName, jobVersion, error, features);
662
+ };
663
+
664
+ const recordFailureObservabilityAndMetrics = (params: {
665
+ workerName: string;
666
+ jobVersion: string;
667
+ jobName: string;
668
+ duration: number;
669
+ spanId: string | null;
670
+ features?: WorkerFactoryConfig['features'];
671
+ }): void => {
672
+ const { workerName, jobVersion, jobName, duration, spanId, features } = params;
673
+
674
+ recordFailureMetrics(workerName, jobVersion, duration, features);
675
+ recordFailureObservability(workerName, jobName, duration, spanId, features);
676
+ };
677
+
678
+ const executeAllFailureHandlers = async (params: {
679
+ workerName: string;
680
+ jobVersion: string;
681
+ job: Job;
682
+ error: Error;
683
+ duration: number;
684
+ spanId: string | null;
685
+ features?: WorkerFactoryConfig['features'];
686
+ queueName: string;
687
+ }): Promise<void> => {
688
+ const { workerName, jobVersion, job, error, duration, spanId, features, queueName } = params;
689
+
690
+ recordFailureObservabilityAndMetrics({
691
+ workerName,
692
+ jobVersion,
693
+ jobName: job.name,
694
+ duration,
695
+ spanId,
696
+ features,
697
+ });
698
+
699
+ if (features?.deadLetterQueue === true) {
700
+ await addFailedJobToDeadLetterQueue(
701
+ workerName,
702
+ job,
703
+ error,
704
+ duration,
705
+ jobVersion,
706
+ queueName,
707
+ features
708
+ );
709
+ }
710
+ };
711
+
712
+ const handleFailure = async (params: {
713
+ workerName: string;
714
+ jobVersion: string;
715
+ job: Job;
716
+ error: Error;
717
+ duration: number;
718
+ spanId: string | null;
719
+ features?: WorkerFactoryConfig['features'];
720
+ queueName: string;
721
+ }): Promise<void> => {
722
+ const { workerName, jobVersion, job, error, features } = params;
723
+
724
+ logAndRecordFailure(workerName, jobVersion, job, error, features);
725
+ await executeAllFailureHandlers(params);
726
+ await executeFailurePlugins(workerName, job, error, features);
727
+ };
728
+
729
+ /**
730
+ * Helper: Create enhanced processor with all features
731
+ */
732
+ const createEnhancedProcessor = (config: WorkerFactoryConfig): ((job: Job) => Promise<unknown>) => {
733
+ return async (job: Job): Promise<unknown> => {
734
+ const { name, version, processor, features } = config;
735
+ const jobVersion = version ?? '1.0.0';
736
+
737
+ ensureCircuitAllowsExecution(name, jobVersion, job.id, features);
738
+
739
+ const beforeOutcome = await runBeforeProcessHooks(name, job, features);
740
+ if (beforeOutcome.skip) {
741
+ return { skipped: true, reason: beforeOutcome.reason };
742
+ }
743
+
744
+ if (beforeOutcome.jobData !== undefined) {
745
+ job.data = beforeOutcome.jobData;
746
+ }
747
+
748
+ const startTime = Date.now();
749
+ let result: unknown;
750
+ let spanId: string | null = null;
751
+
752
+ try {
753
+ spanId = startProcessingSpan(name, jobVersion, job, config.queueName, features);
754
+
755
+ // Process the job
756
+ result = await processor(job);
757
+
758
+ const duration = Date.now() - startTime;
759
+ await handleSuccess({
760
+ workerName: name,
761
+ jobVersion,
762
+ job,
763
+ result,
764
+ duration,
765
+ spanId,
766
+ features,
767
+ });
768
+
769
+ return result;
770
+ } catch (err) {
771
+ const error = err as Error;
772
+ const duration = Date.now() - startTime;
773
+
774
+ await handleFailure({
775
+ workerName: name,
776
+ jobVersion,
777
+ job,
778
+ error,
779
+ duration,
780
+ spanId,
781
+ features,
782
+ queueName: config.queueName,
783
+ });
784
+
785
+ throw error;
786
+ }
787
+ };
788
+ };
789
+
790
+ const requireInfrastructure = <T>(value: T | null | undefined, message: string): T => {
791
+ if (value === null || value === undefined) {
792
+ throw ErrorFactory.createConfigError(message);
793
+ }
794
+ return value;
795
+ };
796
+
797
+ const resolveEnvString = (envKey: string | undefined, fallback: string): string => {
798
+ if (!envKey) return fallback;
799
+ return Env.get(envKey, fallback);
800
+ };
801
+
802
+ const resolveEnvInt = (envKey: string | undefined, fallback: number): number => {
803
+ if (!envKey) return fallback;
804
+ return Env.getInt(envKey, fallback);
805
+ };
806
+
807
+ const isRedisEnvConfig = (config: RedisConfigInput): config is RedisEnvConfig =>
808
+ (config as RedisEnvConfig).env === true;
809
+
810
+ const requireRedisHost = (host: string, context: string): string => {
811
+ if (!host) {
812
+ throw ErrorFactory.createConfigError(`${context}.host is required`);
813
+ }
814
+ return host;
815
+ };
816
+
817
+ const resolveRedisFallbacks = (): {
818
+ host: string;
819
+ port: number;
820
+ db: number;
821
+ password: string;
822
+ } => {
823
+ const queueRedis = queueConfig.drivers.redis;
824
+ return {
825
+ host: queueRedis?.driver === 'redis' ? queueRedis.host : Env.get('REDIS_HOST', '127.0.0.1'),
826
+ port: queueRedis?.driver === 'redis' ? queueRedis.port : Env.getInt('REDIS_PORT', 6379),
827
+ db: queueRedis?.driver === 'redis' ? queueRedis.database : Env.getInt('REDIS_DB', 0),
828
+ password:
829
+ queueRedis?.driver === 'redis' ? (queueRedis.password ?? '') : Env.get('REDIS_PASSWORD', ''),
830
+ };
831
+ };
832
+
833
+ const resolveRedisConfigFromEnv = (config: RedisEnvConfig, context: string): RedisConfig => {
834
+ const fallback = resolveRedisFallbacks();
835
+ const host = requireRedisHost(
836
+ resolveEnvString(config.host ?? 'REDIS_HOST', fallback.host),
837
+ context
838
+ );
839
+ const port = resolveEnvInt(config.port ?? 'REDIS_PORT', fallback.port);
840
+ const db = resolveEnvInt(config.db ?? 'REDIS_DB', fallback.db);
841
+ const password = resolveEnvString(config.password ?? 'REDIS_PASSWORD', fallback.password);
842
+
843
+ return {
844
+ host,
845
+ port,
846
+ db,
847
+ password: password || undefined,
848
+ };
849
+ };
850
+
851
+ const resolveRedisConfigFromDirect = (config: RedisConfig, context: string): RedisConfig => ({
852
+ host: requireRedisHost(config.host, context),
853
+ port: config.port,
854
+ db: config.db,
855
+ password: config.password ?? Env.get('REDIS_PASSWORD', undefined),
856
+ });
857
+
858
+ const resolveRedisConfig = (config: RedisConfigInput, context: string): RedisConfig =>
859
+ isRedisEnvConfig(config)
860
+ ? resolveRedisConfigFromEnv(config, context)
861
+ : resolveRedisConfigFromDirect(config, context);
862
+
863
+ const resolveRedisConfigWithFallback = (
864
+ primary: RedisConfigInput | undefined,
865
+ fallback: RedisConfigInput | undefined,
866
+ errorMessage: string,
867
+ context: string
868
+ ): RedisConfig => {
869
+ const selected = primary ?? fallback;
870
+ if (!selected) {
871
+ throw ErrorFactory.createConfigError(errorMessage);
872
+ }
873
+
874
+ return resolveRedisConfig(selected, context);
875
+ };
876
+
877
+ const normalizeEnvValue = (value: string | undefined): string | undefined => {
878
+ if (!value) return undefined;
879
+ const trimmed = value.trim();
880
+ return trimmed.length > 0 ? trimmed : undefined;
881
+ };
882
+
883
+ const normalizeAppName = (value: string | undefined): string => {
884
+ const normalized = (value ?? '').trim().replaceAll(/\s+/g, '_');
885
+ return normalized.length > 0 ? normalized : 'zintrust';
886
+ };
887
+
888
+ const resolveDefaultRedisKeyPrefix = (): string => 'worker_' + normalizeAppName(appConfig.prefix);
889
+ const resolveDefaultPersistenceTable = (): string =>
890
+ normalizeEnvValue(Env.get('WORKER_PERSISTENCE_TABLE', 'zintrust_workers')) ?? 'zintrust_workers';
891
+
892
+ const resolveDefaultPersistenceConnection = (): string =>
893
+ normalizeEnvValue(Env.get('WORKER_PERSISTENCE_DB_CONNECTION', 'default')) ?? 'default';
894
+
895
+ const resolveAutoStart = (config: WorkerFactoryConfig): boolean => {
896
+ // If explicitly set in config (not null/undefined), use that
897
+ if (config.autoStart !== undefined && config.autoStart !== null) {
898
+ return config.autoStart;
899
+ }
900
+ // Otherwise, use environment variable
901
+ return Env.getBool('WORKER_AUTO_START', false);
902
+ };
903
+
904
+ const normalizeExplicitPersistence = (
905
+ persistence: WorkerPersistenceConfig
906
+ ): WorkerPersistenceConfig => {
907
+ if (persistence.driver === 'memory') return { driver: 'memory' };
908
+
909
+ if (persistence.driver === 'redis') {
910
+ return {
911
+ driver: 'redis',
912
+ redis: persistence.redis,
913
+ keyPrefix:
914
+ persistence.keyPrefix ??
915
+ normalizeEnvValue(Env.get('WORKER_PERSISTENCE_REDIS_KEY_PREFIX', '')) ??
916
+ resolveDefaultRedisKeyPrefix(),
917
+ };
918
+ }
919
+
920
+ const clientIsConnection = typeof persistence.client === 'string';
921
+ const clientConnection = clientIsConnection ? (persistence.client as string) : undefined;
922
+ const resolvedConnection =
923
+ persistence.connection ??
924
+ clientConnection ??
925
+ normalizeEnvValue(Env.get('WORKER_PERSISTENCE_DB_CONNECTION', 'default')) ??
926
+ resolveDefaultPersistenceConnection();
927
+
928
+ return {
929
+ driver: 'database',
930
+ client: clientIsConnection ? undefined : persistence.client,
931
+ connection: resolvedConnection,
932
+ table:
933
+ persistence.table ??
934
+ normalizeEnvValue(Env.get('WORKER_PERSISTENCE_TABLE', 'zintrust_workers')) ??
935
+ resolveDefaultPersistenceTable(),
936
+ };
937
+ };
938
+
939
+ const resolvePersistenceConfig = (
940
+ config: WorkerFactoryConfig
941
+ ): WorkerPersistenceConfig | undefined => {
942
+ const explicit = config.infrastructure?.persistence;
943
+ if (explicit) return normalizeExplicitPersistence(explicit);
944
+
945
+ const driver = normalizeEnvValue(Env.get('WORKER_PERSISTENCE_DRIVER', ''))?.toLowerCase();
946
+ if (!driver) return undefined;
947
+
948
+ if (driver === 'memory') return { driver: 'memory' };
949
+
950
+ if (driver === 'redis') {
951
+ const keyPrefix = normalizeEnvValue(Env.get('WORKER_PERSISTENCE_REDIS_KEY_PREFIX', ''));
952
+ return {
953
+ driver: 'redis',
954
+ redis: { env: true },
955
+ keyPrefix: `${keyPrefix}_worker_${appConfig.prefix}`,
956
+ };
957
+ }
958
+
959
+ if (driver === 'db' || driver === 'database') {
960
+ return {
961
+ driver: 'database',
962
+ connection: resolveDefaultPersistenceConnection(),
963
+ table: resolveDefaultPersistenceTable(),
964
+ };
965
+ }
966
+
967
+ throw ErrorFactory.createConfigError(
968
+ 'WORKER_PERSISTENCE_DRIVER must be one of memory, redis, or database'
969
+ );
970
+ };
971
+
972
+ const resolveDbClientFromEnv = async (connectionName = 'default'): Promise<IDatabase> => {
973
+ const connect = async (): Promise<IDatabase> =>
974
+ await useEnsureDbConnected(undefined, connectionName);
975
+
976
+ try {
977
+ return await connect();
978
+ } catch (error) {
979
+ Logger.error('Worker persistence failed to resolve database connection', error);
980
+ }
981
+
982
+ try {
983
+ registerDatabasesFromRuntimeConfig(databaseConfig);
984
+ return await connect();
985
+ } catch (error) {
986
+ Logger.error('Worker persistence failed after registering runtime databases', error);
987
+ throw ErrorFactory.createConfigError(
988
+ `Worker persistence requires a database client. Register connection '${connectionName}' or pass infrastructure.persistence.client.`
989
+ );
990
+ }
991
+ };
992
+
993
+ const resolveWorkerStore = async (config: WorkerFactoryConfig): Promise<WorkerStore> => {
994
+ const persistence = resolvePersistenceConfig(config);
995
+ if (!persistence) return workerStore;
996
+
997
+ let next: WorkerStore;
998
+
999
+ if (persistence.driver === 'memory') {
1000
+ next = InMemoryWorkerStore.create();
1001
+ } else if (persistence.driver === 'redis') {
1002
+ const redisConfig = resolveRedisConfigWithFallback(
1003
+ persistence.redis,
1004
+ config.infrastructure?.redis,
1005
+ 'Worker persistence requires redis config (persistence.redis or infrastructure.redis)',
1006
+ 'infrastructure.persistence.redis'
1007
+ );
1008
+ const client = createRedisConnection(redisConfig);
1009
+ next = RedisWorkerStore.create(client, persistence.keyPrefix ?? resolveDefaultRedisKeyPrefix());
1010
+ } else if (persistence.driver === 'database') {
1011
+ const explicitConnection =
1012
+ typeof persistence.client === 'string' ? persistence.client : persistence.connection;
1013
+ const client =
1014
+ typeof persistence.client === 'string'
1015
+ ? await resolveDbClientFromEnv(explicitConnection)
1016
+ : (persistence.client ?? (await resolveDbClientFromEnv(explicitConnection)));
1017
+ next = DbWorkerStore.create(client, persistence.table);
1018
+ } else {
1019
+ next = InMemoryWorkerStore.create();
1020
+ }
1021
+
1022
+ await next.init();
1023
+ return next;
1024
+ };
1025
+
1026
+ // Store instance cache to reuse connections
1027
+ const storeInstanceCache = new Map<string, WorkerStore>();
1028
+
1029
+ /**
1030
+ * Generate cache key for persistence configuration
1031
+ */
1032
+ const generateCacheKey = (persistence: WorkerPersistenceConfig): string => {
1033
+ return JSON.stringify({
1034
+ driver: persistence.driver,
1035
+ redis: 'redis' in persistence ? persistence.redis : undefined,
1036
+ keyPrefix: 'keyPrefix' in persistence ? persistence.keyPrefix : undefined,
1037
+ connection: 'connection' in persistence ? persistence.connection : undefined,
1038
+ table: 'table' in persistence ? persistence.table : undefined,
1039
+ });
1040
+ };
1041
+
1042
+ /**
1043
+ * Create new store instance based on persistence configuration
1044
+ */
1045
+ const createWorkerStore = async (persistence: WorkerPersistenceConfig): Promise<WorkerStore> => {
1046
+ if (persistence.driver === 'memory') {
1047
+ if (workerStoreConfigured && workerStoreConfig?.driver === 'memory') {
1048
+ return workerStore;
1049
+ }
1050
+ return InMemoryWorkerStore.create();
1051
+ }
1052
+
1053
+ if (persistence.driver === 'redis') {
1054
+ const redisConfig = resolveRedisConfigWithFallback(
1055
+ persistence.redis ?? { env: true },
1056
+ undefined,
1057
+ 'Worker persistence requires redis config (persistence.redis or REDIS_* env values)',
1058
+ 'persistence.redis'
1059
+ );
1060
+ const client = createRedisConnection(redisConfig);
1061
+ return RedisWorkerStore.create(client, persistence.keyPrefix ?? resolveDefaultRedisKeyPrefix());
1062
+ }
1063
+
1064
+ // Database driver
1065
+ const explicitConnection =
1066
+ typeof persistence.client === 'string' ? persistence.client : persistence.connection;
1067
+ const client =
1068
+ typeof persistence.client === 'string'
1069
+ ? await resolveDbClientFromEnv(explicitConnection)
1070
+ : (persistence.client ?? (await resolveDbClientFromEnv(explicitConnection)));
1071
+ return DbWorkerStore.create(client, persistence.table);
1072
+ };
1073
+
1074
+ const resolveWorkerStoreForPersistence = async (
1075
+ persistence: WorkerPersistenceConfig
1076
+ ): Promise<WorkerStore> => {
1077
+ const cacheKey = generateCacheKey(persistence);
1078
+
1079
+ // Return cached instance if available
1080
+ const cached = storeInstanceCache.get(cacheKey);
1081
+ if (cached) {
1082
+ return cached;
1083
+ }
1084
+
1085
+ // Create new store instance
1086
+ const store = await createWorkerStore(persistence);
1087
+ await store.init();
1088
+
1089
+ // Cache the store instance for reuse
1090
+ storeInstanceCache.set(cacheKey, store);
1091
+
1092
+ return store;
1093
+ };
1094
+
1095
+ const getPersistedRecord = async (
1096
+ name: string,
1097
+ persistenceOverride?: WorkerPersistenceConfig
1098
+ ): Promise<WorkerRecord | null> => {
1099
+ if (!persistenceOverride) {
1100
+ await ensureWorkerStoreConfigured();
1101
+ return workerStore.get(name);
1102
+ }
1103
+
1104
+ const store = await resolveWorkerStoreForPersistence(persistenceOverride);
1105
+ return store.get(name);
1106
+ };
1107
+
1108
+ const ensureWorkerStoreConfigured = async (): Promise<void> => {
1109
+ if (workerStoreConfigured) return;
1110
+ const bootstrapConfig = buildPersistenceBootstrapConfig();
1111
+ const persistence = resolvePersistenceConfig(bootstrapConfig);
1112
+ if (!persistence) return;
1113
+ workerStore = await resolveWorkerStore(bootstrapConfig);
1114
+ workerStoreConfigured = true;
1115
+ workerStoreConfig = persistence;
1116
+ };
1117
+
1118
+ const buildWorkerRecord = (config: WorkerFactoryConfig, status: string): WorkerRecord => {
1119
+ const now = new Date();
1120
+ const decodedProcessorPath = config.processorPath
1121
+ ? decodeProcessorPathEntities(config.processorPath)
1122
+ : null;
1123
+ return {
1124
+ name: config.name,
1125
+ queueName: config.queueName,
1126
+ version: config.version ?? '1.0.0',
1127
+ status,
1128
+ autoStart: resolveAutoStart(config),
1129
+ concurrency: config.options?.concurrency ?? 1,
1130
+ region: config.datacenter?.primaryRegion ?? null,
1131
+ processorPath: decodedProcessorPath,
1132
+ features: config.features ? { ...config.features } : null,
1133
+ infrastructure: config.infrastructure ? { ...config.infrastructure } : null,
1134
+ datacenter: config.datacenter ? { ...config.datacenter } : null,
1135
+ createdAt: now,
1136
+ updatedAt: now,
1137
+ lastHealthCheck: undefined,
1138
+ lastError: undefined,
1139
+ connectionState: undefined,
1140
+ };
1141
+ };
1142
+
1143
+ const buildDefaultAutoScalerConfig = (): AutoScalerConfig => ({
1144
+ enabled: workersConfig.autoScaling.enabled,
1145
+ checkInterval: workersConfig.autoScaling.interval,
1146
+ scalingPolicies: new Map(),
1147
+ costOptimization: {
1148
+ enabled: workersConfig.costOptimization.enabled,
1149
+ maxCostPerHour: 0,
1150
+ preferSpotInstances: workersConfig.costOptimization.spotInstances,
1151
+ offPeakSchedule: {
1152
+ start: workersConfig.autoScaling.offPeakSchedule.split('-')[0] ?? '22:00',
1153
+ end: workersConfig.autoScaling.offPeakSchedule.split('-')[1] ?? '06:00',
1154
+ timezone: 'UTC',
1155
+ reductionPercentage: Math.round(workersConfig.autoScaling.offPeakReduction * 100),
1156
+ },
1157
+ budgetAlerts: {
1158
+ dailyLimit: 0,
1159
+ weeklyLimit: 0,
1160
+ monthlyLimit: 0,
1161
+ },
1162
+ },
1163
+ });
1164
+
1165
+ const resolveOffPeakSchedule = (
1166
+ input: AutoScalerConfig | undefined,
1167
+ defaults: AutoScalerConfig
1168
+ ): NonNullable<AutoScalerConfig['costOptimization']['offPeakSchedule']> => {
1169
+ const fallback = defaults.costOptimization.offPeakSchedule ?? {
1170
+ start: '22:00',
1171
+ end: '06:00',
1172
+ timezone: 'UTC',
1173
+ reductionPercentage: 0,
1174
+ };
1175
+
1176
+ const override = input?.costOptimization?.offPeakSchedule;
1177
+ const schedule = { ...fallback };
1178
+ if (override) {
1179
+ Object.assign(schedule, override);
1180
+ }
1181
+ return schedule;
1182
+ };
1183
+
1184
+ const resolveCostOptimization = (
1185
+ input: AutoScalerConfig | undefined,
1186
+ defaults: AutoScalerConfig
1187
+ ): AutoScalerConfig['costOptimization'] => ({
1188
+ ...defaults.costOptimization,
1189
+ ...input?.costOptimization,
1190
+ offPeakSchedule: resolveOffPeakSchedule(input, defaults),
1191
+ budgetAlerts: {
1192
+ ...defaults.costOptimization.budgetAlerts,
1193
+ ...input?.costOptimization?.budgetAlerts,
1194
+ },
1195
+ });
1196
+
1197
+ const resolveAutoScalerConfig = (input: AutoScalerConfig | undefined): AutoScalerConfig => {
1198
+ const defaults = buildDefaultAutoScalerConfig();
1199
+ if (!input) return defaults;
1200
+
1201
+ return {
1202
+ ...defaults,
1203
+ ...input,
1204
+ costOptimization: resolveCostOptimization(input, defaults),
1205
+ };
1206
+ };
1207
+
1208
+ const resolveWorkerOptions = (config: WorkerFactoryConfig, autoStart: boolean): WorkerOptions => {
1209
+ const options = config.options ? { ...config.options } : ({} as WorkerOptions);
1210
+
1211
+ if (options.prefix === undefined) {
1212
+ options.prefix = getBullMQSafeQueueName();
1213
+ }
1214
+
1215
+ if (options.autorun === undefined) {
1216
+ options.autorun = autoStart;
1217
+ }
1218
+ if (options.connection) return options;
1219
+
1220
+ const redisConfig = resolveRedisConfigWithFallback(
1221
+ config.infrastructure?.redis,
1222
+ undefined,
1223
+ 'Worker requires a connection. Provide options.connection or infrastructure.redis config',
1224
+ 'infrastructure.redis'
1225
+ );
1226
+
1227
+ return {
1228
+ ...options,
1229
+ connection: {
1230
+ host: redisConfig.host,
1231
+ port: redisConfig.port,
1232
+ db: redisConfig.db,
1233
+ password: redisConfig.password,
1234
+ },
1235
+ };
1236
+ };
1237
+
1238
+ const buildDefaultObservabilityConfig = (): ObservabilityConfig => ({
1239
+ prometheus: {
1240
+ enabled: workersConfig.observability.prometheus.enabled,
1241
+ port: workersConfig.observability.prometheus.port,
1242
+ },
1243
+ openTelemetry: {
1244
+ enabled: workersConfig.observability.opentelemetry.enabled,
1245
+ serviceName: 'zintrust-workers',
1246
+ exporterUrl: workersConfig.observability.opentelemetry.endpoint,
1247
+ },
1248
+ datadog: {
1249
+ enabled: workersConfig.observability.datadog.enabled,
1250
+ tags: workersConfig.observability.datadog.apiKey
1251
+ ? [`apiKey:${workersConfig.observability.datadog.apiKey}`]
1252
+ : undefined,
1253
+ },
1254
+ });
1255
+
1256
+ const resolveObservabilityConfig = (
1257
+ input: ObservabilityConfigInput | undefined
1258
+ ): ObservabilityConfig => {
1259
+ const defaults = buildDefaultObservabilityConfig();
1260
+ if (!input) return defaults;
1261
+
1262
+ const enabledOverride = 'enabled' in input ? input.enabled : undefined;
1263
+
1264
+ const prometheus = { ...defaults.prometheus };
1265
+ if (input.prometheus) {
1266
+ Object.assign(prometheus, input.prometheus);
1267
+ }
1268
+
1269
+ const openTelemetry = { ...defaults.openTelemetry };
1270
+ if (input.openTelemetry) {
1271
+ Object.assign(openTelemetry, input.openTelemetry);
1272
+ }
1273
+
1274
+ const datadog = { ...defaults.datadog };
1275
+ if (input.datadog) {
1276
+ Object.assign(datadog, input.datadog);
1277
+ }
1278
+
1279
+ if (enabledOverride === false) {
1280
+ prometheus.enabled = false;
1281
+ openTelemetry.enabled = false;
1282
+ datadog.enabled = false;
1283
+ } else if (enabledOverride === true) {
1284
+ prometheus.enabled = true;
1285
+ openTelemetry.enabled = true;
1286
+ datadog.enabled = true;
1287
+ }
1288
+
1289
+ if (!openTelemetry.serviceName) {
1290
+ openTelemetry.serviceName = defaults.openTelemetry.serviceName;
1291
+ }
1292
+
1293
+ return { prometheus, openTelemetry, datadog };
1294
+ };
1295
+
1296
+ const initializeClustering = (config: WorkerFactoryConfig): void => {
1297
+ if (clusteringInitialized || !(config.features?.clustering ?? false)) return;
1298
+ const redisConfig = resolveRedisConfigWithFallback(
1299
+ config.infrastructure?.redis,
1300
+ undefined,
1301
+ 'ClusterLock requires infrastructure.redis config',
1302
+ 'infrastructure.redis'
1303
+ );
1304
+ ClusterLock.initialize(redisConfig);
1305
+ clusteringInitialized = true;
1306
+ };
1307
+
1308
+ const initializeMetrics = (config: WorkerFactoryConfig): void => {
1309
+ if (metricsInitialized || !(config.features?.metrics ?? false)) return;
1310
+ const redisConfig = resolveRedisConfigWithFallback(
1311
+ config.infrastructure?.redis,
1312
+ undefined,
1313
+ 'WorkerMetrics requires infrastructure.redis config',
1314
+ 'infrastructure.redis'
1315
+ );
1316
+ WorkerMetrics.initialize(redisConfig);
1317
+ metricsInitialized = true;
1318
+ };
1319
+
1320
+ const initializeAutoScaling = (config: WorkerFactoryConfig): void => {
1321
+ if (autoScalingInitialized || !(config.features?.autoScaling ?? false)) return;
1322
+
1323
+ const autoScalerConfig = resolveAutoScalerConfig(config.infrastructure?.autoScaler);
1324
+
1325
+ AutoScaler.initialize(autoScalerConfig);
1326
+ autoScalingInitialized = true;
1327
+ };
1328
+
1329
+ const initializeCircuitBreaker = (config: WorkerFactoryConfig, version: string): void => {
1330
+ if (!(config.features?.circuitBreaker ?? false)) return;
1331
+ CircuitBreaker.initialize(config.name, version);
1332
+ };
1333
+
1334
+ const initializeDeadLetterQueue = (config: WorkerFactoryConfig): void => {
1335
+ if (deadLetterQueueInitialized || !(config.features?.deadLetterQueue ?? false)) return;
1336
+ const dlqConfig = requireInfrastructure(
1337
+ config.infrastructure?.deadLetterQueue,
1338
+ 'DeadLetterQueue requires infrastructure.deadLetterQueue config'
1339
+ );
1340
+ const dlqRedisConfig = resolveRedisConfigWithFallback(
1341
+ dlqConfig.redis,
1342
+ config.infrastructure?.redis,
1343
+ 'DeadLetterQueue requires infrastructure.deadLetterQueue.redis or infrastructure.redis config',
1344
+ 'infrastructure.deadLetterQueue.redis'
1345
+ );
1346
+ DeadLetterQueue.initialize(dlqRedisConfig, dlqConfig.policy);
1347
+ deadLetterQueueInitialized = true;
1348
+ };
1349
+
1350
+ const initializeResourceMonitoring = (config: WorkerFactoryConfig): void => {
1351
+ if (resourceMonitoringInitialized || !(config.features?.resourceMonitoring ?? false)) return;
1352
+ ResourceMonitor.initialize();
1353
+ ResourceMonitor.start();
1354
+ resourceMonitoringInitialized = true;
1355
+ };
1356
+
1357
+ const initializeCompliance = (config: WorkerFactoryConfig): void => {
1358
+ if (complianceInitialized || !(config.features?.compliance ?? false)) return;
1359
+ const complianceConfig = requireInfrastructure(
1360
+ config.infrastructure?.compliance,
1361
+ 'ComplianceManager requires infrastructure.compliance config'
1362
+ );
1363
+ const complianceRedisConfig = resolveRedisConfigWithFallback(
1364
+ complianceConfig.redis,
1365
+ config.infrastructure?.redis,
1366
+ 'ComplianceManager requires infrastructure.compliance.redis or infrastructure.redis config',
1367
+ 'infrastructure.compliance.redis'
1368
+ );
1369
+ ComplianceManager.initialize(complianceRedisConfig, complianceConfig.config);
1370
+ complianceInitialized = true;
1371
+ };
1372
+
1373
+ const initializeObservability = async (config: WorkerFactoryConfig): Promise<void> => {
1374
+ if (observabilityInitialized || !(config.features?.observability ?? false)) return;
1375
+ const observabilityConfig = resolveObservabilityConfig(config.infrastructure?.observability);
1376
+ await Observability.initialize(observabilityConfig);
1377
+ observabilityInitialized = true;
1378
+ };
1379
+
1380
+ const initializeVersioning = (config: WorkerFactoryConfig, version: string): void => {
1381
+ if (!(config.features?.versioning ?? false)) return;
1382
+ WorkerVersioning.register({
1383
+ workerName: config.name,
1384
+ version: WorkerVersioning.parse(version),
1385
+ changelog: 'Initial version',
1386
+ });
1387
+ };
1388
+
1389
+ const initializeDatacenter = (config: WorkerFactoryConfig): void => {
1390
+ if (!(config.features?.datacenterOrchestration ?? false) || !config.datacenter) return;
1391
+ DatacenterOrchestrator.placeWorker({
1392
+ workerName: config.name,
1393
+ primaryRegion: config.datacenter.primaryRegion,
1394
+ secondaryRegions: config.datacenter.secondaryRegions ?? [],
1395
+ replicationStrategy: 'active-passive',
1396
+ affinityRules: {
1397
+ preferLocal: config.datacenter.affinityRules?.preferLocal ?? true,
1398
+ maxLatency: config.datacenter.affinityRules?.maxLatency,
1399
+ avoidRegions: config.datacenter.affinityRules?.avoidRegions,
1400
+ },
1401
+ });
1402
+ };
1403
+
1404
+ const setupWorkerEventListeners = (
1405
+ worker: Worker,
1406
+ workerName: string,
1407
+ workerVersion: string,
1408
+ features?: WorkerFactoryConfig['features']
1409
+ ): void => {
1410
+ worker.on('completed', (job: Job) => {
1411
+ Logger.debug(`Job completed: ${workerName}`, { jobId: job.id });
1412
+
1413
+ if (features?.observability === true) {
1414
+ Observability.incrementCounter('worker.jobs.completed', 1, {
1415
+ worker: workerName,
1416
+ version: workerVersion,
1417
+ });
1418
+ }
1419
+ });
1420
+
1421
+ worker.on('failed', (job: Job | undefined, error: Error) => {
1422
+ Logger.error(`Job failed: ${workerName}`, { error, jobId: job?.id }, 'workers');
1423
+
1424
+ if (features?.observability === true) {
1425
+ Observability.incrementCounter('worker.jobs.failed', 1, {
1426
+ worker: workerName,
1427
+ version: workerVersion,
1428
+ });
1429
+ }
1430
+ });
1431
+
1432
+ worker.on('error', (error: Error) => {
1433
+ Logger.error(`Worker error: ${workerName}`, error);
1434
+ });
1435
+ };
1436
+
1437
+ const registerWorkerInstance = (params: {
1438
+ worker: Worker;
1439
+ config: WorkerFactoryConfig;
1440
+ workerVersion: string;
1441
+ queueName: string;
1442
+ options?: WorkerOptions;
1443
+ autoStart: boolean;
1444
+ }): void => {
1445
+ const { worker, config, workerVersion, queueName, options, autoStart } = params;
1446
+
1447
+ WorkerRegistry.register({
1448
+ name: config.name,
1449
+ config: {},
1450
+ version: workerVersion,
1451
+ region: config.datacenter?.primaryRegion,
1452
+ queues: [queueName],
1453
+ factory: async (): Promise<RegistryWorkerInstance> => {
1454
+ await Promise.resolve();
1455
+ return {
1456
+ metadata: {
1457
+ name: config.name,
1458
+ status: autoStart ? 'running' : 'stopped',
1459
+ version: workerVersion,
1460
+ region: config.datacenter?.primaryRegion ?? 'unknown',
1461
+ queueName,
1462
+ concurrency: options?.concurrency ?? 1,
1463
+ startedAt: new Date(),
1464
+ stoppedAt: null,
1465
+ lastProcessedAt: null,
1466
+ restartCount: 0,
1467
+ processedCount: 0,
1468
+ errorCount: 0,
1469
+ lockKey: null,
1470
+ priority: 0,
1471
+ memoryUsage: 0,
1472
+ cpuUsage: 0,
1473
+ circuitState: 'closed',
1474
+ queues: [queueName],
1475
+ plugins: [],
1476
+ datacenter: config.datacenter?.primaryRegion ?? 'unknown',
1477
+ canaryPercentage: 0,
1478
+ config: {},
1479
+ },
1480
+ instance: worker,
1481
+ start: (): void => {
1482
+ if (!autoStart) {
1483
+ worker.run().catch((error) => {
1484
+ Logger.error(`Failed to start worker "${config.name}"`, error);
1485
+ });
1486
+ }
1487
+ },
1488
+ stop: async (): Promise<void> => worker.close(),
1489
+ drain: async (): Promise<void> => worker.close(),
1490
+ sleep: async (): Promise<void> => worker.pause(),
1491
+ wakeup: (): void => {
1492
+ worker.resume();
1493
+ },
1494
+ getStatus: (): WorkerStatus => 'running',
1495
+ getHealth: (): 'green' | 'yellow' | 'red' => 'green',
1496
+ };
1497
+ },
1498
+ });
1499
+ };
1500
+
1501
+ const initializeWorkerFeatures = async (
1502
+ config: WorkerFactoryConfig,
1503
+ workerVersion: string
1504
+ ): Promise<void> => {
1505
+ initializeClustering(config);
1506
+ initializeMetrics(config);
1507
+ initializeAutoScaling(config);
1508
+ initializeCircuitBreaker(config, workerVersion);
1509
+ initializeDeadLetterQueue(config);
1510
+ initializeResourceMonitoring(config);
1511
+ initializeCompliance(config);
1512
+ await initializeObservability(config);
1513
+ initializeVersioning(config, workerVersion);
1514
+ initializeDatacenter(config);
1515
+ };
1516
+
1517
+ /**
1518
+ * Worker Factory - Sealed namespace
1519
+ */
1520
+ export const WorkerFactory = Object.freeze({
1521
+ registerProcessor,
1522
+ registerProcessors,
1523
+ registerProcessorPaths,
1524
+ registerProcessorResolver,
1525
+ resolveProcessorPath,
1526
+
1527
+ /**
1528
+ * Create new worker with full setup
1529
+ */
1530
+ async create(config: WorkerFactoryConfig): Promise<Worker> {
1531
+ const { name, version, queueName, features } = config;
1532
+ const workerVersion = version ?? '1.0.0';
1533
+ const autoStart = resolveAutoStart(config);
1534
+
1535
+ if (workers.has(name)) {
1536
+ throw ErrorFactory.createWorkerError(`Worker "${name}" already exists`);
1537
+ }
1538
+
1539
+ // Resolve the correct store for this worker configuration
1540
+ const store = await getStoreForWorker(config);
1541
+
1542
+ // Save initial status as "creating"
1543
+ await store.save(buildWorkerRecord(config, WorkerCreationStatus.CREATING));
1544
+
1545
+ try {
1546
+ await initializeWorkerFeatures(config, workerVersion);
1547
+
1548
+ // Update status to "connecting"
1549
+ await store.update(name, {
1550
+ status: WorkerCreationStatus.CONNECTING,
1551
+ updatedAt: new Date(),
1552
+ });
1553
+
1554
+ // Create enhanced processor
1555
+ const enhancedProcessor = createEnhancedProcessor(config);
1556
+
1557
+ // Create BullMQ worker
1558
+ const resolvedOptions = resolveWorkerOptions(config, autoStart);
1559
+ const worker = new Worker(queueName, enhancedProcessor, resolvedOptions);
1560
+
1561
+ setupWorkerEventListeners(worker, name, workerVersion, features);
1562
+
1563
+ // Update status to "starting"
1564
+ await store.update(name, {
1565
+ status: WorkerCreationStatus.STARTING,
1566
+ updatedAt: new Date(),
1567
+ });
1568
+
1569
+ const timeoutMs = Env.getInt('WORKER_CONNECTION_TIMEOUT', 5000);
1570
+
1571
+ // Wait for actual connection and health verification
1572
+ await waitForWorkerConnection(worker, name, queueName, timeoutMs);
1573
+
1574
+ // Update status to "running" only after successful connection
1575
+ await store.update(name, {
1576
+ status: WorkerCreationStatus.RUNNING,
1577
+ updatedAt: new Date(),
1578
+ });
1579
+
1580
+ // Store worker instance
1581
+ const instance: WorkerInstance = {
1582
+ worker,
1583
+ config,
1584
+ startedAt: new Date(),
1585
+ status: WorkerCreationStatus.RUNNING,
1586
+ connectionState: 'connected',
1587
+ };
1588
+
1589
+ workers.set(name, instance);
1590
+
1591
+ registerWorkerInstance({
1592
+ worker,
1593
+ config,
1594
+ workerVersion,
1595
+ queueName,
1596
+ options: resolvedOptions,
1597
+ autoStart,
1598
+ });
1599
+
1600
+ if (autoStart) {
1601
+ await WorkerRegistry.start(name, workerVersion);
1602
+ }
1603
+
1604
+ // Execute afterStart hooks
1605
+ if (features?.plugins === true) {
1606
+ await PluginManager.executeHook('afterStart', {
1607
+ workerName: name,
1608
+ timestamp: new Date(),
1609
+ });
1610
+ }
1611
+
1612
+ // Start health monitoring for the worker
1613
+ startHealthMonitoring(name, worker, queueName);
1614
+
1615
+ Logger.info(`Worker created: ${name}@${workerVersion}`, {
1616
+ queueName,
1617
+ features: Object.keys(features ?? {}).filter(
1618
+ (k) => features?.[k as keyof typeof features] === true
1619
+ ),
1620
+ });
1621
+
1622
+ return worker;
1623
+ } catch (error) {
1624
+ // Handle failure - update status to "failed"
1625
+ // Re-resolve store in case of error to be safe
1626
+ const failStore = await getStoreForWorker(config);
1627
+ await failStore.update(name, {
1628
+ status: WorkerCreationStatus.FAILED,
1629
+ updatedAt: new Date(),
1630
+ lastError: (error as Error).message,
1631
+ });
1632
+
1633
+ Logger.error(`Worker creation failed: ${name}`, error);
1634
+ throw error;
1635
+ }
1636
+ },
1637
+
1638
+ /**
1639
+ * Get worker instance
1640
+ */
1641
+ get(name: string): WorkerInstance | null {
1642
+ const instance = workers.get(name);
1643
+ return instance ? { ...instance } : null;
1644
+ },
1645
+
1646
+ /**
1647
+ * Update worker status directly (used by HealthMonitor)
1648
+ */
1649
+ async updateStatus(name: string, status: string, error?: Error | string): Promise<void> {
1650
+ const instance = workers.get(name);
1651
+ if (instance) {
1652
+ instance.status = status as WorkerCreationStatus;
1653
+ }
1654
+
1655
+ try {
1656
+ const store = await getStoreForWorker(
1657
+ instance?.config ?? {
1658
+ name,
1659
+ queueName: 'unknown',
1660
+ processor: async (): Promise<unknown> => {
1661
+ return Promise.resolve(); //NOSONAR
1662
+ },
1663
+ }
1664
+ );
1665
+ const errorMessage = typeof error === 'string' ? error : error?.message;
1666
+ await store.update(name, {
1667
+ status: status as WorkerCreationStatus,
1668
+ updatedAt: new Date(),
1669
+ lastError: errorMessage,
1670
+ });
1671
+ } catch (err) {
1672
+ Logger.warn(`Failed to update status for ${name} to ${status}`, err as Error);
1673
+ }
1674
+ },
1675
+
1676
+ /**
1677
+ * Stop worker
1678
+ */
1679
+ async stop(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void> {
1680
+ const instance = workers.get(name);
1681
+ const store = await validateAndGetStore(name, instance?.config, persistenceOverride);
1682
+
1683
+ if (!instance) {
1684
+ await store.update(name, { status: 'stopped', updatedAt: new Date() });
1685
+ Logger.info(`Worker marked stopped (not running): ${name}`);
1686
+ return;
1687
+ }
1688
+
1689
+ // Execute beforeStop hooks
1690
+ if (instance.config.features?.plugins === true) {
1691
+ await PluginManager.executeHook('beforeStop', {
1692
+ workerName: name,
1693
+ timestamp: new Date(),
1694
+ });
1695
+ }
1696
+
1697
+ // Close worker with timeout to prevent hanging
1698
+ const workerClosePromise = instance.worker.close();
1699
+ let timeoutId: NodeJS.Timeout | undefined;
1700
+
1701
+ const timeoutPromise = new Promise<never>((_, reject) => {
1702
+ // eslint-disable-next-line no-restricted-syntax
1703
+ timeoutId = setTimeout(() => {
1704
+ reject(new Error('Worker close timeout'));
1705
+ }, 5000);
1706
+ });
1707
+
1708
+ try {
1709
+ await Promise.race([workerClosePromise, timeoutPromise]);
1710
+ } catch (error) {
1711
+ Logger.warn(`Worker "${name}" close failed or timed out, continuing...`, error as Error);
1712
+ } finally {
1713
+ // Always clean up timeout to prevent memory leak
1714
+ if (timeoutId) {
1715
+ clearTimeout(timeoutId);
1716
+ timeoutId = undefined;
1717
+ }
1718
+ }
1719
+ instance.status = WorkerCreationStatus.STOPPED;
1720
+
1721
+ // Stop health monitoring for this worker
1722
+ HealthMonitor.unregister(name);
1723
+
1724
+ try {
1725
+ await store.update(name, {
1726
+ status: WorkerCreationStatus.STOPPED,
1727
+ updatedAt: new Date(),
1728
+ });
1729
+ Logger.info(`Worker "${name}" status updated to stopped`);
1730
+ } catch (error) {
1731
+ Logger.error(`Failed to update worker "${name}" status`, error as Error);
1732
+ }
1733
+
1734
+ await WorkerRegistry.stop(name);
1735
+
1736
+ // Execute afterStop hooks
1737
+ if (instance.config.features?.plugins === true) {
1738
+ await PluginManager.executeHook('afterStop', {
1739
+ workerName: name,
1740
+ timestamp: new Date(),
1741
+ });
1742
+ }
1743
+
1744
+ Logger.info(`Worker stopped: ${name}`);
1745
+ },
1746
+
1747
+ /**
1748
+ * Restart worker
1749
+ */
1750
+ async restart(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void> {
1751
+ const instance = workers.get(name);
1752
+
1753
+ if (!instance) {
1754
+ await WorkerFactory.startFromPersisted(name, persistenceOverride);
1755
+ Logger.info(`Worker started from persistence: ${name}`);
1756
+ return;
1757
+ }
1758
+
1759
+ await WorkerFactory.stop(name, persistenceOverride);
1760
+ const refreshed = workers.get(name);
1761
+
1762
+ if (!refreshed) {
1763
+ throw ErrorFactory.createNotFoundError(`Worker "${name}" not found`);
1764
+ }
1765
+
1766
+ workers.delete(name);
1767
+
1768
+ const newWorker = await WorkerFactory.create(refreshed.config);
1769
+ refreshed.worker = newWorker;
1770
+ refreshed.status = WorkerCreationStatus.RUNNING;
1771
+ refreshed.startedAt = new Date();
1772
+
1773
+ Logger.info(`Worker restarted: ${name}`);
1774
+ },
1775
+
1776
+ /**
1777
+ * Pause worker
1778
+ */
1779
+ async pause(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void> {
1780
+ const instance = workers.get(name);
1781
+ const store = await validateAndGetStore(name, instance?.config, persistenceOverride);
1782
+
1783
+ if (instance) {
1784
+ await instance.worker.pause();
1785
+ instance.status = WorkerCreationStatus.STARTING; // Using STARTING as equivalent to sleeping/paused
1786
+ }
1787
+
1788
+ await store.update(name, {
1789
+ status: WorkerCreationStatus.STARTING,
1790
+ updatedAt: new Date(),
1791
+ });
1792
+
1793
+ Logger.info(`Worker paused: ${name}`);
1794
+ },
1795
+
1796
+ /**
1797
+ * Resume worker
1798
+ */
1799
+ async resume(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void> {
1800
+ const instance = workers.get(name);
1801
+ const store = await validateAndGetStore(name, instance?.config, persistenceOverride);
1802
+
1803
+ if (instance) {
1804
+ instance.worker.resume();
1805
+ instance.status = WorkerCreationStatus.RUNNING;
1806
+ }
1807
+
1808
+ try {
1809
+ await store.update(name, { status: WorkerCreationStatus.RUNNING, updatedAt: new Date() });
1810
+ } catch (error) {
1811
+ Logger.error('Failed to persist worker resume', error as Error);
1812
+ }
1813
+
1814
+ Logger.info(`Worker resumed: ${name}`);
1815
+ },
1816
+
1817
+ /**
1818
+ * Update auto-start for persisted worker
1819
+ */
1820
+ async setAutoStart(
1821
+ name: string,
1822
+ autoStart: boolean,
1823
+ persistenceOverride?: WorkerPersistenceConfig
1824
+ ): Promise<void> {
1825
+ const instance = workers.get(name);
1826
+ const store = await validateAndGetStore(name, instance?.config, persistenceOverride);
1827
+
1828
+ if (instance) {
1829
+ instance.config.autoStart = autoStart;
1830
+ }
1831
+
1832
+ await store.update(name, { autoStart, updatedAt: new Date() });
1833
+
1834
+ if (!autoStart) return;
1835
+
1836
+ const refreshed = workers.get(name);
1837
+ if (refreshed) {
1838
+ if (refreshed.status !== 'running') {
1839
+ await WorkerFactory.start(name, persistenceOverride);
1840
+ }
1841
+ return;
1842
+ }
1843
+
1844
+ await WorkerFactory.startFromPersisted(name, persistenceOverride);
1845
+ },
1846
+
1847
+ /**
1848
+ * Update persisted worker record and in-memory config if running.
1849
+ */
1850
+ async update(
1851
+ name: string,
1852
+ patch: Partial<WorkerRecord> | WorkerRecord,
1853
+ persistenceOverride?: WorkerPersistenceConfig
1854
+ ): Promise<void> {
1855
+ const instance = workers.get(name);
1856
+ const store = await getStoreForWorker(instance?.config, persistenceOverride);
1857
+
1858
+ const current = await store.get(name);
1859
+ if (!current) {
1860
+ throw ErrorFactory.createNotFoundError(`Worker "${name}" not found in persistence store`);
1861
+ }
1862
+
1863
+ const merged: WorkerRecord = {
1864
+ ...current,
1865
+ ...(patch as Partial<WorkerRecord>),
1866
+ updatedAt: (patch as Partial<WorkerRecord>).updatedAt ?? new Date(),
1867
+ };
1868
+
1869
+ // Use save() which will insert or update appropriately for each store
1870
+ await store.save(merged);
1871
+
1872
+ // If the worker is running in memory, update its runtime config so restarts use the new config
1873
+ if (instance) {
1874
+ const cfg = instance.config;
1875
+ instance.config = {
1876
+ ...cfg,
1877
+ version: merged.version ?? cfg.version,
1878
+ queueName: merged.queueName ?? cfg.queueName,
1879
+ options: {
1880
+ ...cfg.options,
1881
+ concurrency: merged.concurrency ?? cfg.options?.concurrency,
1882
+ },
1883
+ infrastructure: (merged.infrastructure as unknown) ?? cfg.infrastructure,
1884
+ features: (merged.features as unknown) ?? cfg.features,
1885
+ datacenter: (merged.datacenter as unknown) ?? cfg.datacenter,
1886
+ } as WorkerFactoryConfig;
1887
+ }
1888
+ },
1889
+
1890
+ /**
1891
+ * Start worker
1892
+ */
1893
+ async start(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void> {
1894
+ const instance = workers.get(name);
1895
+ // Even if instance exists, we must validate against the requested driver
1896
+ const store = await validateAndGetStore(name, instance?.config, persistenceOverride);
1897
+
1898
+ if (!instance) {
1899
+ throw ErrorFactory.createNotFoundError(`Worker "${name}" not found`);
1900
+ }
1901
+
1902
+ const version = instance.config.version ?? '1.0.0';
1903
+ await WorkerRegistry.start(name, version);
1904
+
1905
+ instance.status = WorkerCreationStatus.RUNNING;
1906
+ instance.startedAt = new Date();
1907
+
1908
+ await store.update(name, { status: WorkerCreationStatus.RUNNING, updatedAt: new Date() });
1909
+
1910
+ Logger.info(`Worker started: ${name}`);
1911
+ },
1912
+
1913
+ /**
1914
+ * List all workers
1915
+ */
1916
+ list(): string[] {
1917
+ return Array.from(workers.keys());
1918
+ },
1919
+
1920
+ /**
1921
+ * List all persisted workers
1922
+ */
1923
+ async listPersisted(
1924
+ persistenceOverride?: WorkerPersistenceConfig,
1925
+ options?: { offset?: number; limit?: number; search?: string }
1926
+ ): Promise<string[]> {
1927
+ const records = await WorkerFactory.listPersistedRecords(persistenceOverride, options);
1928
+ return records.map((record) => record.name);
1929
+ },
1930
+
1931
+ async listPersistedRecords(
1932
+ persistenceOverride?: WorkerPersistenceConfig,
1933
+ options?: { offset?: number; limit?: number; search?: string }
1934
+ ): Promise<WorkerRecord[]> {
1935
+ if (!persistenceOverride) {
1936
+ await ensureWorkerStoreConfigured();
1937
+ return workerStore.list(options);
1938
+ }
1939
+
1940
+ const store = await resolveWorkerStoreForPersistence(persistenceOverride);
1941
+ return store.list(options);
1942
+ },
1943
+
1944
+ /**
1945
+ * Start a worker from persisted storage when it is not registered.
1946
+ */
1947
+ async startFromPersisted(
1948
+ name: string,
1949
+ persistenceOverride?: WorkerPersistenceConfig
1950
+ ): Promise<void> {
1951
+ const record = await getPersistedRecord(name, persistenceOverride);
1952
+ if (!record) {
1953
+ throw ErrorFactory.createNotFoundError(`Worker "${name}" not found in persistence store`);
1954
+ }
1955
+
1956
+ let processor = await resolveProcessor(name);
1957
+
1958
+ if (!processor && record.processorPath) {
1959
+ try {
1960
+ processor = await resolveProcessorFromPath(record.processorPath);
1961
+ } catch (error) {
1962
+ Logger.error(`Failed to resolve processor module for "${name}"`, error);
1963
+ }
1964
+ }
1965
+
1966
+ if (!processor) {
1967
+ throw ErrorFactory.createConfigError(
1968
+ `Worker "${name}" processor is not registered or resolvable. Register the processor at startup or persist a processorPath.`
1969
+ );
1970
+ }
1971
+
1972
+ await WorkerFactory.create({
1973
+ name: record.name,
1974
+ queueName: record.queueName,
1975
+ version: record.version ?? undefined,
1976
+ processor,
1977
+ processorPath: record.processorPath ?? undefined,
1978
+ autoStart: true, // Override to true when manually starting
1979
+ options: { concurrency: record.concurrency } as WorkerOptions,
1980
+ infrastructure: record.infrastructure as WorkerFactoryConfig['infrastructure'],
1981
+ features: record.features as WorkerFactoryConfig['features'],
1982
+ datacenter: record.datacenter as WorkerFactoryConfig['datacenter'],
1983
+ });
1984
+ },
1985
+
1986
+ /**
1987
+ * Get persisted worker record
1988
+ */
1989
+ async getPersisted(
1990
+ name: string,
1991
+ persistenceOverride?: WorkerPersistenceConfig
1992
+ ): Promise<WorkerRecord | null> {
1993
+ const instance = workers.get(name);
1994
+ const store = await getStoreForWorker(instance?.config, persistenceOverride);
1995
+ return store.get(name);
1996
+ },
1997
+
1998
+ /**
1999
+ * Remove worker
2000
+ */
2001
+ async remove(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void> {
2002
+ const instance = workers.get(name);
2003
+ // Validate that worker exists in the store we are trying to remove from
2004
+ const store = await validateAndGetStore(name, instance?.config, persistenceOverride);
2005
+
2006
+ if (instance) {
2007
+ await WorkerFactory.stop(name, persistenceOverride);
2008
+ const registry = WorkerRegistry as { unregister?: (name: string) => void };
2009
+ registry.unregister?.(name);
2010
+ AutoScaler.clearHistory(name);
2011
+ ResourceMonitor.clearHistory(name);
2012
+ CircuitBreaker.deleteWorker(name);
2013
+ CanaryController.purge(name);
2014
+ WorkerVersioning.clear(name);
2015
+ DatacenterOrchestrator.removeWorker(name);
2016
+ await Observability.clearWorkerMetrics(name);
2017
+
2018
+ // Stop health monitoring for this worker
2019
+ HealthMonitor.unregister(name);
2020
+
2021
+ workers.delete(name);
2022
+ }
2023
+
2024
+ await store.remove(name);
2025
+ Logger.info(`Worker removed: ${name}`);
2026
+ },
2027
+
2028
+ /**
2029
+ * Get worker metrics
2030
+ */
2031
+ async getMetrics(name: string): Promise<unknown> {
2032
+ const instance = workers.get(name);
2033
+
2034
+ if (!instance) {
2035
+ throw ErrorFactory.createNotFoundError(`Worker "${name}" not found`);
2036
+ }
2037
+
2038
+ if (instance.config.features?.metrics === undefined || !instance.config.features?.metrics) {
2039
+ return null;
2040
+ }
2041
+
2042
+ const now = Date.now();
2043
+ const oneHourAgo = now - 3600 * 1000;
2044
+
2045
+ const metrics = await WorkerMetrics.aggregate({
2046
+ workerName: name,
2047
+ metricType: 'processed',
2048
+ granularity: 'hourly',
2049
+ startDate: new Date(oneHourAgo),
2050
+ endDate: new Date(now),
2051
+ });
2052
+
2053
+ return metrics;
2054
+ },
2055
+
2056
+ /**
2057
+ * Get worker health
2058
+ */
2059
+ async getHealth(name: string): Promise<unknown> {
2060
+ const instance = workers.get(name);
2061
+
2062
+ if (!instance) {
2063
+ throw ErrorFactory.createNotFoundError(`Worker "${name}" not found`);
2064
+ }
2065
+
2066
+ if (!(instance.config.features?.metrics ?? false)) {
2067
+ return { status: 'unknown' };
2068
+ }
2069
+
2070
+ const health = await WorkerMetrics.getLatestHealth(name);
2071
+
2072
+ return health;
2073
+ },
2074
+
2075
+ /**
2076
+ * Shutdown all workers
2077
+ */
2078
+ async shutdown(): Promise<void> {
2079
+ Logger.info('WorkerFactory shutting down...');
2080
+
2081
+ const workerNames = Array.from(workers.keys());
2082
+
2083
+ await Promise.all(workerNames.map(async (name) => WorkerFactory.stop(name)));
2084
+
2085
+ // Shutdown all modules
2086
+ ResourceMonitor.stop();
2087
+ await WorkerMetrics.shutdown();
2088
+ await MultiQueueWorker.shutdown();
2089
+ await ComplianceManager.shutdown();
2090
+ await PriorityQueue.shutdown();
2091
+ HealthMonitor.shutdown();
2092
+ AutoScaler.stop();
2093
+ ClusterLock.shutdown();
2094
+ WorkerVersioning.shutdown();
2095
+ CanaryController.shutdown();
2096
+ DatacenterOrchestrator.shutdown();
2097
+ PluginManager.shutdown();
2098
+ Observability.shutdown();
2099
+ await DeadLetterQueue.shutdown();
2100
+ CircuitBreaker.shutdown();
2101
+
2102
+ workers.clear();
2103
+
2104
+ Logger.info('WorkerFactory shutdown complete');
2105
+ },
2106
+ });
2107
+
2108
+ // Graceful shutdown handled by WorkerShutdown