@zintrust/workers 0.1.29 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/AnomalyDetection.d.ts +4 -0
- package/dist/AnomalyDetection.js +8 -0
- package/dist/BroadcastWorker.d.ts +2 -0
- package/dist/CanaryController.js +49 -5
- package/dist/ChaosEngineering.js +13 -0
- package/dist/ClusterLock.js +21 -10
- package/dist/DeadLetterQueue.js +12 -8
- package/dist/MultiQueueWorker.d.ts +1 -1
- package/dist/MultiQueueWorker.js +12 -7
- package/dist/NotificationWorker.d.ts +2 -0
- package/dist/PriorityQueue.d.ts +2 -2
- package/dist/PriorityQueue.js +20 -21
- package/dist/ResourceMonitor.js +65 -38
- package/dist/WorkerFactory.d.ts +23 -3
- package/dist/WorkerFactory.js +420 -40
- package/dist/WorkerInit.js +8 -3
- package/dist/WorkerMetrics.d.ts +2 -1
- package/dist/WorkerMetrics.js +152 -93
- package/dist/WorkerRegistry.d.ts +6 -0
- package/dist/WorkerRegistry.js +70 -1
- package/dist/WorkerShutdown.d.ts +21 -0
- package/dist/WorkerShutdown.js +82 -9
- package/dist/WorkerShutdownDurableObject.d.ts +12 -0
- package/dist/WorkerShutdownDurableObject.js +41 -0
- package/dist/build-manifest.json +171 -99
- package/dist/createQueueWorker.d.ts +2 -0
- package/dist/createQueueWorker.js +42 -27
- package/dist/dashboard/types.d.ts +5 -0
- package/dist/dashboard/workers-api.js +136 -43
- package/dist/http/WorkerApiController.js +1 -0
- package/dist/http/WorkerController.js +133 -85
- package/dist/http/WorkerMonitoringService.d.ts +11 -0
- package/dist/http/WorkerMonitoringService.js +62 -0
- package/dist/http/middleware/CustomValidation.js +1 -1
- package/dist/http/middleware/EditWorkerValidation.d.ts +1 -1
- package/dist/http/middleware/EditWorkerValidation.js +7 -6
- package/dist/http/middleware/ProcessorPathSanitizer.js +101 -35
- package/dist/http/middleware/WorkerValidationChain.js +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +1 -0
- package/dist/routes/workers.js +48 -6
- package/dist/storage/WorkerStore.d.ts +4 -1
- package/dist/storage/WorkerStore.js +55 -7
- package/dist/telemetry/api/TelemetryAPI.d.ts +46 -0
- package/dist/telemetry/api/TelemetryAPI.js +219 -0
- package/dist/telemetry/api/TelemetryMonitoringService.d.ts +17 -0
- package/dist/telemetry/api/TelemetryMonitoringService.js +113 -0
- package/dist/telemetry/components/AlertPanel.d.ts +1 -0
- package/dist/telemetry/components/AlertPanel.js +13 -0
- package/dist/telemetry/components/CostTracking.d.ts +1 -0
- package/dist/telemetry/components/CostTracking.js +14 -0
- package/dist/telemetry/components/ResourceUsageChart.d.ts +1 -0
- package/dist/telemetry/components/ResourceUsageChart.js +11 -0
- package/dist/telemetry/components/WorkerHealthChart.d.ts +1 -0
- package/dist/telemetry/components/WorkerHealthChart.js +11 -0
- package/dist/telemetry/index.d.ts +15 -0
- package/dist/telemetry/index.js +60 -0
- package/dist/telemetry/routes/dashboard.d.ts +6 -0
- package/dist/telemetry/routes/dashboard.js +608 -0
- package/dist/ui/router/EmbeddedAssets.d.ts +4 -0
- package/dist/ui/router/EmbeddedAssets.js +13 -0
- package/dist/ui/router/ui.js +100 -4
- package/package.json +10 -6
- package/src/AnomalyDetection.ts +9 -0
- package/src/CanaryController.ts +41 -5
- package/src/ChaosEngineering.ts +14 -0
- package/src/ClusterLock.ts +22 -9
- package/src/DeadLetterQueue.ts +13 -8
- package/src/MultiQueueWorker.ts +15 -8
- package/src/PriorityQueue.ts +21 -22
- package/src/ResourceMonitor.ts +72 -40
- package/src/WorkerFactory.ts +545 -49
- package/src/WorkerInit.ts +8 -3
- package/src/WorkerMetrics.ts +183 -105
- package/src/WorkerRegistry.ts +80 -1
- package/src/WorkerShutdown.ts +115 -9
- package/src/WorkerShutdownDurableObject.ts +64 -0
- package/src/createQueueWorker.ts +73 -30
- package/src/dashboard/types.ts +5 -0
- package/src/dashboard/workers-api.ts +165 -52
- package/src/http/WorkerApiController.ts +1 -0
- package/src/http/WorkerController.ts +167 -90
- package/src/http/WorkerMonitoringService.ts +77 -0
- package/src/http/middleware/CustomValidation.ts +1 -1
- package/src/http/middleware/EditWorkerValidation.ts +7 -6
- package/src/http/middleware/ProcessorPathSanitizer.ts +123 -36
- package/src/http/middleware/WorkerValidationChain.ts +1 -0
- package/src/index.ts +6 -1
- package/src/routes/workers.ts +66 -9
- package/src/storage/WorkerStore.ts +59 -9
- package/src/telemetry/api/TelemetryAPI.ts +292 -0
- package/src/telemetry/api/TelemetryMonitoringService.ts +149 -0
- package/src/telemetry/components/AlertPanel.ts +13 -0
- package/src/telemetry/components/CostTracking.ts +14 -0
- package/src/telemetry/components/ResourceUsageChart.ts +11 -0
- package/src/telemetry/components/WorkerHealthChart.ts +11 -0
- package/src/telemetry/index.ts +121 -0
- package/src/telemetry/public/assets/zintrust-logo.svg +15 -0
- package/src/telemetry/routes/dashboard.ts +638 -0
- package/src/telemetry/styles/tailwind.css +1 -0
- package/src/telemetry/styles/zintrust-theme.css +8 -0
- package/src/ui/router/EmbeddedAssets.ts +13 -0
- package/src/ui/router/ui.ts +112 -5
- package/src/ui/workers/index.html +2 -2
- package/src/ui/workers/main.js +232 -61
- package/src/ui/workers/zintrust.svg +30 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +0 -3
- package/dist/dashboard/workers-dashboard-ui.js +0 -1026
- package/dist/dashboard/workers-dashboard.d.ts +0 -4
- package/dist/dashboard/workers-dashboard.js +0 -904
|
@@ -1,49 +1,109 @@
|
|
|
1
|
-
import { ErrorFactory, Logger } from '@zintrust/core';
|
|
1
|
+
import { Env, ErrorFactory, Logger } from '@zintrust/core';
|
|
2
2
|
import { WorkerFactory } from '../WorkerFactory';
|
|
3
3
|
import { WorkerMetrics as WorkerMetricsManager } from '../WorkerMetrics';
|
|
4
4
|
const DEFAULT_PAGE_SIZE = 100;
|
|
5
5
|
const MAX_PAGE_SIZE = 200;
|
|
6
|
+
// Helper for timeout handling
|
|
7
|
+
async function withTimeout(promise, timeoutMs, errorMsg) {
|
|
8
|
+
let timer;
|
|
9
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
10
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
11
|
+
timer = setTimeout(() => reject(new Error(errorMsg)), timeoutMs);
|
|
12
|
+
});
|
|
13
|
+
try {
|
|
14
|
+
const result = await Promise.race([promise, timeoutPromise]);
|
|
15
|
+
if (timer)
|
|
16
|
+
clearTimeout(timer);
|
|
17
|
+
return result;
|
|
18
|
+
}
|
|
19
|
+
catch (error) {
|
|
20
|
+
if (timer)
|
|
21
|
+
clearTimeout(timer);
|
|
22
|
+
throw error;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
async function fetchPersistenceWithTimeout(page, limit, query) {
|
|
26
|
+
const driver = Env.get('WORKER_PERSISTENCE_DRIVER', 'memory');
|
|
27
|
+
try {
|
|
28
|
+
const result = await withTimeout(getWorkersFromPersistence(page, limit, query.driver, query), 5000, 'Persistence timeout');
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
catch (err) {
|
|
32
|
+
Logger.error(`[getWorkers] Persistence hung or failed (driver=${driver}), resetting connection state`, err);
|
|
33
|
+
if (typeof WorkerFactory.resetPersistence === 'function') {
|
|
34
|
+
await WorkerFactory.resetPersistence();
|
|
35
|
+
}
|
|
36
|
+
return {
|
|
37
|
+
workers: [],
|
|
38
|
+
total: 0,
|
|
39
|
+
drivers: ['memory'],
|
|
40
|
+
effectiveLimit: limit,
|
|
41
|
+
prePaginated: true,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async function fetchQueueDataSafe() {
|
|
46
|
+
const defaultData = {
|
|
47
|
+
driver: 'memory',
|
|
48
|
+
totalQueues: 0,
|
|
49
|
+
totalJobs: 0,
|
|
50
|
+
processingJobs: 0,
|
|
51
|
+
failedJobs: 0,
|
|
52
|
+
};
|
|
53
|
+
try {
|
|
54
|
+
return await withTimeout(getQueueData(), 3000, 'Queue data timeout');
|
|
55
|
+
}
|
|
56
|
+
catch (err) {
|
|
57
|
+
Logger.warn('[getWorkers] Queue data fetch failed or timed out', err);
|
|
58
|
+
return defaultData;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
async function enrichWithMetricsSafe(workers) {
|
|
62
|
+
try {
|
|
63
|
+
return await withTimeout(enrichWithMetrics(workers), 5000, 'Metrics timeout');
|
|
64
|
+
}
|
|
65
|
+
catch (err) {
|
|
66
|
+
Logger.warn('[getWorkers] Metrics fetch failed or timed out', err);
|
|
67
|
+
// Reset metrics connection to avoid hanging next request
|
|
68
|
+
// We use fire-and-forget here because the request is already delayed/timed-out
|
|
69
|
+
// and we want to ensure the NEXT request has a clean slate (redisClient=null)
|
|
70
|
+
WorkerMetricsManager.shutdown().catch((e) => Logger.error('Failed to reset metrics connection', e));
|
|
71
|
+
return workers;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
6
74
|
export async function getWorkers(query) {
|
|
75
|
+
const start = Date.now();
|
|
76
|
+
Logger.debug('[getWorkers] Start', query);
|
|
7
77
|
const page = Math.max(1, query.page || 1);
|
|
8
78
|
const limit = Math.min(MAX_PAGE_SIZE, Math.max(1, query.limit || DEFAULT_PAGE_SIZE));
|
|
9
79
|
const offset = (page - 1) * limit;
|
|
10
80
|
// Get workers from persistence based on configuration
|
|
11
|
-
const
|
|
12
|
-
|
|
81
|
+
const persistenceStart = Date.now();
|
|
82
|
+
const persistence = await fetchPersistenceWithTimeout(page, limit, query);
|
|
83
|
+
Logger.debug('[getWorkers] Persistence took ' + (Date.now() - persistenceStart) + 'ms', {
|
|
84
|
+
count: persistence.workers.length,
|
|
85
|
+
total: persistence.total,
|
|
86
|
+
});
|
|
87
|
+
// Apply filters/search/sorting
|
|
13
88
|
let filteredWorkers = applyFilters(persistence.workers, query);
|
|
14
|
-
// Apply search
|
|
15
89
|
if (query.search) {
|
|
16
90
|
filteredWorkers = applySearch(filteredWorkers, query.search);
|
|
17
91
|
}
|
|
18
|
-
// Apply sorting
|
|
19
92
|
filteredWorkers = applySorting(filteredWorkers, query.sortBy, query.sortOrder);
|
|
20
93
|
// Get queue data
|
|
21
|
-
const
|
|
94
|
+
const queueStart = Date.now();
|
|
95
|
+
const queueData = await fetchQueueDataSafe();
|
|
96
|
+
Logger.debug('[getWorkers] Queue data took ' + (Date.now() - queueStart) + 'ms');
|
|
22
97
|
// Apply pagination
|
|
23
98
|
const paginatedWorkers = persistence.prePaginated
|
|
24
99
|
? filteredWorkers
|
|
25
100
|
: filteredWorkers.slice(offset, offset + persistence.effectiveLimit);
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
queueData,
|
|
33
|
-
pagination: {
|
|
34
|
-
page,
|
|
35
|
-
limit: persistence.effectiveLimit,
|
|
36
|
-
total: persistence.prePaginated ? persistence.total : filteredWorkers.length,
|
|
37
|
-
totalPages: Math.ceil((persistence.prePaginated ? persistence.total : filteredWorkers.length) /
|
|
38
|
-
persistence.effectiveLimit),
|
|
39
|
-
hasNext: offset + persistence.effectiveLimit <
|
|
40
|
-
(persistence.prePaginated ? persistence.total : filteredWorkers.length),
|
|
41
|
-
hasPrev: page > 1,
|
|
42
|
-
},
|
|
43
|
-
drivers: persistence.drivers,
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
return {
|
|
101
|
+
// Enrich with metrics
|
|
102
|
+
const metricsStart = Date.now();
|
|
103
|
+
const workersWithMetrics = await enrichWithMetricsSafe(paginatedWorkers);
|
|
104
|
+
Logger.debug('[getWorkers] Metrics took ' + (Date.now() - metricsStart) + 'ms');
|
|
105
|
+
// Prepare result
|
|
106
|
+
const result = {
|
|
47
107
|
workers: workersWithMetrics,
|
|
48
108
|
queueData,
|
|
49
109
|
pagination: {
|
|
@@ -58,22 +118,35 @@ export async function getWorkers(query) {
|
|
|
58
118
|
},
|
|
59
119
|
drivers: persistence.drivers,
|
|
60
120
|
};
|
|
121
|
+
// Include details if requested
|
|
122
|
+
if (query.includeDetails) {
|
|
123
|
+
const detailsStart = Date.now();
|
|
124
|
+
try {
|
|
125
|
+
result.workers = await enrichWithDetails(result.workers);
|
|
126
|
+
}
|
|
127
|
+
catch (err) {
|
|
128
|
+
Logger.warn('[getWorkers] Details fetch failed', err);
|
|
129
|
+
}
|
|
130
|
+
Logger.debug('[getWorkers] Details took ' + (Date.now() - detailsStart) + 'ms');
|
|
131
|
+
}
|
|
132
|
+
Logger.debug('[getWorkers] Total took ' + (Date.now() - start) + 'ms');
|
|
133
|
+
return result;
|
|
61
134
|
}
|
|
62
|
-
async function getWorkersFromPersistence(page, limit, driverFilter) {
|
|
135
|
+
async function getWorkersFromPersistence(page, limit, driverFilter, query) {
|
|
63
136
|
const offset = (page - 1) * limit;
|
|
64
|
-
const persistenceDriver =
|
|
137
|
+
const persistenceDriver = Env.get('WORKER_PERSISTENCE_DRIVER', 'memory');
|
|
65
138
|
const isMixedPersistence = persistenceDriver === 'database' || persistenceDriver === 'db';
|
|
66
139
|
if (driverFilter) {
|
|
67
|
-
return getWorkersByDriverFilter(driverFilter, offset, limit);
|
|
140
|
+
return getWorkersByDriverFilter(driverFilter, offset, limit, query);
|
|
68
141
|
}
|
|
69
142
|
if (isMixedPersistence) {
|
|
70
|
-
return getWorkersFromMixedPersistence(offset, limit);
|
|
143
|
+
return getWorkersFromMixedPersistence(offset, limit, query);
|
|
71
144
|
}
|
|
72
|
-
return getWorkersFromSinglePersistence(persistenceDriver, offset, limit);
|
|
145
|
+
return getWorkersFromSinglePersistence(persistenceDriver, offset, limit, query);
|
|
73
146
|
}
|
|
74
|
-
async function getWorkersByDriverFilter(driverFilter, offset, limit) {
|
|
147
|
+
async function getWorkersByDriverFilter(driverFilter, offset, limit, query) {
|
|
75
148
|
try {
|
|
76
|
-
const driverRecords = await WorkerFactory.listPersistedRecords({ driver: driverFilter }, { offset, limit });
|
|
149
|
+
const driverRecords = await WorkerFactory.listPersistedRecords({ driver: driverFilter }, { offset, limit, includeInactive: query.includeInactive });
|
|
77
150
|
const workers = transformToWorkerData(driverRecords, driverFilter);
|
|
78
151
|
return {
|
|
79
152
|
workers,
|
|
@@ -94,10 +167,26 @@ async function getWorkersByDriverFilter(driverFilter, offset, limit) {
|
|
|
94
167
|
};
|
|
95
168
|
}
|
|
96
169
|
}
|
|
97
|
-
async function getWorkersFromMixedPersistence(offset, limit) {
|
|
170
|
+
async function getWorkersFromMixedPersistence(offset, limit, query) {
|
|
171
|
+
const includeInactive = query.includeInactive;
|
|
172
|
+
let dbRecords = [];
|
|
173
|
+
let redisRecords = [];
|
|
174
|
+
try {
|
|
175
|
+
dbRecords = await WorkerFactory.listPersistedRecords({ driver: 'database', connection: 'mysql' }, { offset, limit, includeInactive });
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
// In some environments (like Cloudflare), database access might not be available.
|
|
179
|
+
// We log this as debug instead of error to avoid noise.
|
|
180
|
+
Logger.debug('Failed to fetch from database persistence:', error);
|
|
181
|
+
}
|
|
182
|
+
try {
|
|
183
|
+
redisRecords = await WorkerFactory.listPersistedRecords({ driver: 'redis' }, { offset, limit, includeInactive });
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
// Similarly for Redis if direct connection is not available.
|
|
187
|
+
Logger.debug('Failed to fetch from redis persistence:', error);
|
|
188
|
+
}
|
|
98
189
|
try {
|
|
99
|
-
const dbRecords = await WorkerFactory.listPersistedRecords({ driver: 'database' }, { offset, limit });
|
|
100
|
-
const redisRecords = await WorkerFactory.listPersistedRecords({ driver: 'redis' }, { offset, limit });
|
|
101
190
|
const workers = [
|
|
102
191
|
...transformToWorkerData(dbRecords, 'database'),
|
|
103
192
|
...transformToWorkerData(redisRecords, 'redis'),
|
|
@@ -113,7 +202,7 @@ async function getWorkersFromMixedPersistence(offset, limit) {
|
|
|
113
202
|
};
|
|
114
203
|
}
|
|
115
204
|
catch (error) {
|
|
116
|
-
Logger.error('Error
|
|
205
|
+
Logger.error('Error transforming workers from mixed persistence:', error);
|
|
117
206
|
return {
|
|
118
207
|
workers: [],
|
|
119
208
|
total: 0,
|
|
@@ -123,10 +212,10 @@ async function getWorkersFromMixedPersistence(offset, limit) {
|
|
|
123
212
|
};
|
|
124
213
|
}
|
|
125
214
|
}
|
|
126
|
-
async function getWorkersFromSinglePersistence(persistenceDriver, offset, limit) {
|
|
215
|
+
async function getWorkersFromSinglePersistence(persistenceDriver, offset, limit, query) {
|
|
127
216
|
try {
|
|
128
217
|
const normalizedDriver = normalizeDriver(persistenceDriver);
|
|
129
|
-
const driverRecords = await WorkerFactory.listPersistedRecords({ driver: normalizedDriver }, { offset, limit });
|
|
218
|
+
const driverRecords = await WorkerFactory.listPersistedRecords({ driver: normalizedDriver }, { offset, limit, includeInactive: query.includeInactive });
|
|
130
219
|
const workers = transformToWorkerData(driverRecords, normalizedDriver);
|
|
131
220
|
return {
|
|
132
221
|
workers,
|
|
@@ -181,6 +270,7 @@ const buildWorkerFromRecord = (record, driver) => {
|
|
|
181
270
|
version: record.version ?? '1.0.0',
|
|
182
271
|
autoStart: record.autoStart,
|
|
183
272
|
lastError: record.lastError,
|
|
273
|
+
activeStatus: record.activeStatus ?? true,
|
|
184
274
|
};
|
|
185
275
|
return buildWorkerFromRaw(rawData, driver);
|
|
186
276
|
};
|
|
@@ -197,6 +287,7 @@ const buildWorkerFromRaw = (workerData, driver) => {
|
|
|
197
287
|
avgTime: workerData.avgTime || 0,
|
|
198
288
|
memory: workerData.memory || 0,
|
|
199
289
|
autoStart: workerData.autoStart || false,
|
|
290
|
+
activeStatus: workerData.activeStatus ?? true,
|
|
200
291
|
details: workerData.details || {
|
|
201
292
|
configuration: {},
|
|
202
293
|
health: {},
|
|
@@ -306,7 +397,7 @@ function applySorting(workers, sortBy, sortOrder = 'asc') {
|
|
|
306
397
|
});
|
|
307
398
|
}
|
|
308
399
|
async function getQueueData() {
|
|
309
|
-
const queueDriver =
|
|
400
|
+
const queueDriver = Env.get('QUEUE_DRIVER', 'redis');
|
|
310
401
|
try {
|
|
311
402
|
// Get queue statistics based on QUEUE_DRIVER
|
|
312
403
|
switch (queueDriver) {
|
|
@@ -512,7 +603,8 @@ function buildWorkerConfiguration(worker, persisted) {
|
|
|
512
603
|
queueName: worker.queueName,
|
|
513
604
|
concurrency: null,
|
|
514
605
|
region: null,
|
|
515
|
-
|
|
606
|
+
processorSpec: null,
|
|
607
|
+
activeStatus: null,
|
|
516
608
|
version: worker.version,
|
|
517
609
|
features: null,
|
|
518
610
|
infrastructure: null,
|
|
@@ -523,7 +615,8 @@ function buildWorkerConfiguration(worker, persisted) {
|
|
|
523
615
|
queueName: persisted.queueName ?? worker.queueName,
|
|
524
616
|
concurrency: persisted.concurrency ?? null,
|
|
525
617
|
region: persisted.region ?? null,
|
|
526
|
-
|
|
618
|
+
processorSpec: persisted.processorSpec ?? null,
|
|
619
|
+
activeStatus: persisted.activeStatus ?? true,
|
|
527
620
|
version: persisted.version ?? worker.version,
|
|
528
621
|
features: persisted.features ?? null,
|
|
529
622
|
infrastructure: persisted.infrastructure ?? null,
|
|
@@ -73,6 +73,7 @@ export const listWorkers = async (req, res) => {
|
|
|
73
73
|
]),
|
|
74
74
|
search: getQueryParam(query, 'search'),
|
|
75
75
|
includeDetails: getBooleanParam(query, 'includeDetails', false),
|
|
76
|
+
includeInactive: getBooleanParam(query, 'includeInactive', false),
|
|
76
77
|
};
|
|
77
78
|
const result = await getWorkers(queryParams);
|
|
78
79
|
res.json(result);
|
|
@@ -3,9 +3,8 @@
|
|
|
3
3
|
* Worker Controller
|
|
4
4
|
* HTTP handlers for worker management API
|
|
5
5
|
*/
|
|
6
|
-
import { Logger, getValidatedBody } from '@zintrust/core';
|
|
6
|
+
import { Env, Logger, getValidatedBody } from '@zintrust/core';
|
|
7
7
|
import { CanaryController } from '../CanaryController';
|
|
8
|
-
import { getWorkers } from '../dashboard/workers-api';
|
|
9
8
|
import { HealthMonitor } from '../HealthMonitor';
|
|
10
9
|
import { getParam } from '../helper';
|
|
11
10
|
import { SLAMonitor } from '../index';
|
|
@@ -14,6 +13,7 @@ import { WorkerFactory } from '../WorkerFactory';
|
|
|
14
13
|
import { WorkerRegistry } from '../WorkerRegistry';
|
|
15
14
|
import { WorkerShutdown } from '../WorkerShutdown';
|
|
16
15
|
import { WorkerVersioning } from '../WorkerVersioning';
|
|
16
|
+
import { WorkerMonitoringService } from './WorkerMonitoringService';
|
|
17
17
|
/**
|
|
18
18
|
* Helper to get request body
|
|
19
19
|
*/
|
|
@@ -50,12 +50,12 @@ async function create(req, res) {
|
|
|
50
50
|
}
|
|
51
51
|
const rawProcessor = body.processor;
|
|
52
52
|
let processor;
|
|
53
|
-
let
|
|
53
|
+
let processorSpec;
|
|
54
54
|
if (typeof rawProcessor === 'string') {
|
|
55
|
-
|
|
56
|
-
const resolved = await WorkerFactory.
|
|
55
|
+
processorSpec = rawProcessor;
|
|
56
|
+
const resolved = await WorkerFactory.resolveProcessorSpec(rawProcessor);
|
|
57
57
|
if (!resolved) {
|
|
58
|
-
res.setStatus(400).json({ error: 'Processor
|
|
58
|
+
res.setStatus(400).json({ error: 'Processor spec could not be resolved' });
|
|
59
59
|
return;
|
|
60
60
|
}
|
|
61
61
|
processor = resolved;
|
|
@@ -70,7 +70,7 @@ async function create(req, res) {
|
|
|
70
70
|
const config = {
|
|
71
71
|
...body,
|
|
72
72
|
processor,
|
|
73
|
-
|
|
73
|
+
processorSpec,
|
|
74
74
|
};
|
|
75
75
|
await WorkerFactory.create(config);
|
|
76
76
|
res.json({
|
|
@@ -98,6 +98,9 @@ async function start(req, res) {
|
|
|
98
98
|
return;
|
|
99
99
|
}
|
|
100
100
|
const persistenceOverride = resolvePersistenceOverride(req);
|
|
101
|
+
const isActive = await ensureActiveWorker(name, persistenceOverride, res);
|
|
102
|
+
if (!isActive)
|
|
103
|
+
return;
|
|
101
104
|
const registered = WorkerRegistry.list().includes(name);
|
|
102
105
|
if (!registered) {
|
|
103
106
|
await WorkerFactory.startFromPersisted(name, persistenceOverride);
|
|
@@ -121,6 +124,9 @@ async function stop(req, res) {
|
|
|
121
124
|
try {
|
|
122
125
|
const name = getParam(req, 'name');
|
|
123
126
|
const persistenceOverride = resolvePersistenceOverride(req);
|
|
127
|
+
const isActive = await ensureActiveWorker(name, persistenceOverride, res);
|
|
128
|
+
if (!isActive)
|
|
129
|
+
return;
|
|
124
130
|
await WorkerFactory.stop(name, persistenceOverride);
|
|
125
131
|
res.json({ ok: true, message: `Worker ${name} stopped` });
|
|
126
132
|
}
|
|
@@ -138,6 +144,9 @@ async function restart(req, res) {
|
|
|
138
144
|
try {
|
|
139
145
|
const name = getParam(req, 'name');
|
|
140
146
|
const persistenceOverride = resolvePersistenceOverride(req);
|
|
147
|
+
const isActive = await ensureActiveWorker(name, persistenceOverride, res);
|
|
148
|
+
if (!isActive)
|
|
149
|
+
return;
|
|
141
150
|
await WorkerFactory.restart(name, persistenceOverride);
|
|
142
151
|
res.json({ ok: true, message: `Worker ${name} restarted` });
|
|
143
152
|
}
|
|
@@ -170,6 +179,9 @@ async function setAutoStart(req, res) {
|
|
|
170
179
|
enabled = ['true', '1', 'yes', 'on'].includes(enabledStr.toLowerCase());
|
|
171
180
|
}
|
|
172
181
|
const persistenceOverride = resolvePersistenceOverride(req);
|
|
182
|
+
const isActive = await ensureActiveWorker(name, persistenceOverride, res);
|
|
183
|
+
if (!isActive)
|
|
184
|
+
return;
|
|
173
185
|
await WorkerFactory.setAutoStart(name, enabled, persistenceOverride);
|
|
174
186
|
res.json({ ok: true, message: `Worker ${name} autoStart set to ${enabled}` });
|
|
175
187
|
}
|
|
@@ -187,6 +199,9 @@ async function pause(req, res) {
|
|
|
187
199
|
try {
|
|
188
200
|
const name = getParam(req, 'name');
|
|
189
201
|
const persistenceOverride = resolvePersistenceOverride(req);
|
|
202
|
+
const isActive = await ensureActiveWorker(name, persistenceOverride, res);
|
|
203
|
+
if (!isActive)
|
|
204
|
+
return;
|
|
190
205
|
await WorkerFactory.pause(name, persistenceOverride);
|
|
191
206
|
res.json({ ok: true, message: `Worker ${name} paused` });
|
|
192
207
|
}
|
|
@@ -204,6 +219,9 @@ async function resume(req, res) {
|
|
|
204
219
|
try {
|
|
205
220
|
const name = getParam(req, 'name');
|
|
206
221
|
const persistenceOverride = resolvePersistenceOverride(req);
|
|
222
|
+
const isActive = await ensureActiveWorker(name, persistenceOverride, res);
|
|
223
|
+
if (!isActive)
|
|
224
|
+
return;
|
|
207
225
|
await WorkerFactory.resume(name, persistenceOverride);
|
|
208
226
|
res.json({ ok: true, message: `Worker ${name} resumed` });
|
|
209
227
|
}
|
|
@@ -270,6 +288,21 @@ const resolvePersistenceOverride = (req) => {
|
|
|
270
288
|
}
|
|
271
289
|
return undefined;
|
|
272
290
|
};
|
|
291
|
+
const ensureActiveWorker = async (name, persistenceOverride, res) => {
|
|
292
|
+
if (!name)
|
|
293
|
+
return false;
|
|
294
|
+
const instance = WorkerFactory.get(name);
|
|
295
|
+
if (instance?.config?.activeStatus === false) {
|
|
296
|
+
res.setStatus(410).json({ error: 'Worker is inactive', code: 'WORKER_INACTIVE' });
|
|
297
|
+
return false;
|
|
298
|
+
}
|
|
299
|
+
const persisted = await WorkerFactory.getPersisted(name, persistenceOverride);
|
|
300
|
+
if (persisted?.activeStatus === false) {
|
|
301
|
+
res.setStatus(410).json({ error: 'Worker is inactive', code: 'WORKER_INACTIVE' });
|
|
302
|
+
return false;
|
|
303
|
+
}
|
|
304
|
+
return true;
|
|
305
|
+
};
|
|
273
306
|
/**
|
|
274
307
|
* Get a specific worker instance
|
|
275
308
|
* @param req.params.name - Worker name
|
|
@@ -314,8 +347,13 @@ async function update(req, res) {
|
|
|
314
347
|
res.setStatus(404).json({ error: `Worker ${name} not found` });
|
|
315
348
|
return;
|
|
316
349
|
}
|
|
317
|
-
//
|
|
350
|
+
// Remove immutable fields and prepare updates
|
|
318
351
|
const { name: _name, driver: _driver, ...updateData } = reqData; // Remove immutable fields
|
|
352
|
+
const processorValid = await validateProcessorSpecIfNeeded(updateData);
|
|
353
|
+
if (!processorValid) {
|
|
354
|
+
res.setStatus(400).json({ error: 'Processor spec could not be resolved' });
|
|
355
|
+
return;
|
|
356
|
+
}
|
|
319
357
|
// Note: driver is determined by persistence configuration, not stored in worker record
|
|
320
358
|
const updatedRecord = {
|
|
321
359
|
...currentRecord,
|
|
@@ -324,34 +362,9 @@ async function update(req, res) {
|
|
|
324
362
|
updatedAt: new Date(),
|
|
325
363
|
};
|
|
326
364
|
updatedRecord.infrastructure.persistence.driver = driver;
|
|
327
|
-
|
|
328
|
-
try {
|
|
329
|
-
// Persist merged record via WorkerFactory API
|
|
330
|
-
await WorkerFactory.update(name, updatedRecord, persistenceOverride);
|
|
331
|
-
Logger.info(`Worker ${name} persistence updated with fields:`, Object.keys(updateData));
|
|
332
|
-
}
|
|
333
|
-
catch (persistError) {
|
|
334
|
-
Logger.warn(`Failed to persist some updates for ${name}`, persistError);
|
|
335
|
-
// Continue with restart even if persistence update partially fails
|
|
336
|
-
}
|
|
337
|
-
// If worker is currently running, restart it to apply new configuration changes
|
|
338
|
-
// This ensures new concurrency, queue settings, and other config take effect
|
|
365
|
+
await persistUpdatedRecord(name, updatedRecord, persistenceOverride, updateData);
|
|
339
366
|
const currentInstance = WorkerFactory.get(name);
|
|
340
|
-
|
|
341
|
-
if (currentInstance && currentInstance.status === 'running') {
|
|
342
|
-
try {
|
|
343
|
-
Logger.info(`Restarting worker ${name} to apply configuration changes`);
|
|
344
|
-
await WorkerFactory.restart(name, persistenceOverride);
|
|
345
|
-
}
|
|
346
|
-
catch (error) {
|
|
347
|
-
restartError = error.message;
|
|
348
|
-
Logger.warn(`Failed to restart worker ${name} after update`, error);
|
|
349
|
-
// Don't fail the update, but warn about restart failure
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
else {
|
|
353
|
-
Logger.info(`Worker ${name} is not running (status: ${currentInstance?.status || 'not found'}), skipping restart`);
|
|
354
|
-
}
|
|
367
|
+
const restartError = await restartIfNeeded(name, currentInstance, updatedRecord, currentRecord, persistenceOverride);
|
|
355
368
|
// Worker configuration updated in persistence and memory
|
|
356
369
|
Logger.info(`Worker configuration updated: ${name}`, {
|
|
357
370
|
updatedFields: Object.keys(updateData),
|
|
@@ -371,6 +384,43 @@ async function update(req, res) {
|
|
|
371
384
|
res.setStatus(500).json({ error: error.message });
|
|
372
385
|
}
|
|
373
386
|
}
|
|
387
|
+
// Helpers extracted from update() to reduce complexity
|
|
388
|
+
async function validateProcessorSpecIfNeeded(updateData) {
|
|
389
|
+
if (typeof updateData['processorSpec'] === 'string') {
|
|
390
|
+
const resolved = await WorkerFactory.resolveProcessorSpec(updateData['processorSpec']);
|
|
391
|
+
return Boolean(resolved);
|
|
392
|
+
}
|
|
393
|
+
return true;
|
|
394
|
+
}
|
|
395
|
+
async function persistUpdatedRecord(name, updatedRecord, persistenceOverride, updateData) {
|
|
396
|
+
try {
|
|
397
|
+
await WorkerFactory.update(name, updatedRecord, persistenceOverride);
|
|
398
|
+
Logger.info(`Worker ${name} persistence updated with fields:`, Object.keys(updateData));
|
|
399
|
+
}
|
|
400
|
+
catch (persistError) {
|
|
401
|
+
Logger.warn(`Failed to persist some updates for ${name}`, persistError);
|
|
402
|
+
// Continue execution even if persistence update partially fails
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
async function restartIfNeeded(name, currentInstance, updatedRecord, currentRecord, persistenceOverride) {
|
|
406
|
+
if (!currentInstance ||
|
|
407
|
+
currentInstance.status !== 'running' ||
|
|
408
|
+
updatedRecord.activeStatus === false ||
|
|
409
|
+
currentRecord.activeStatus === false) {
|
|
410
|
+
Logger.info(`Worker ${name} is not running (status: ${currentInstance?.status || 'not found'}), skipping restart`);
|
|
411
|
+
return undefined;
|
|
412
|
+
}
|
|
413
|
+
try {
|
|
414
|
+
Logger.info(`Restarting worker ${name} to apply configuration changes`);
|
|
415
|
+
await WorkerFactory.restart(name, persistenceOverride);
|
|
416
|
+
return undefined;
|
|
417
|
+
}
|
|
418
|
+
catch (err) {
|
|
419
|
+
const restartError = err.message;
|
|
420
|
+
Logger.warn(`Failed to restart worker ${name} after update`, err);
|
|
421
|
+
return restartError;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
374
424
|
/**
|
|
375
425
|
* Get worker status
|
|
376
426
|
* @param req.params.name - Worker name
|
|
@@ -536,7 +586,7 @@ async function getSlaStatus(req, res) {
|
|
|
536
586
|
catch (error) {
|
|
537
587
|
Logger.error('WorkerController.getSlaStatus failed', error);
|
|
538
588
|
if (error.message.includes('SLA config not found')) {
|
|
539
|
-
res.setStatus(
|
|
589
|
+
res.setStatus(400).json({ error: 'SLA config not found for worker' });
|
|
540
590
|
}
|
|
541
591
|
else {
|
|
542
592
|
res.setStatus(500).json({ error: error.message });
|
|
@@ -1116,61 +1166,59 @@ async function monitoringSummary(_req, res) {
|
|
|
1116
1166
|
res.setStatus(500).json({ error: error.message });
|
|
1117
1167
|
}
|
|
1118
1168
|
}
|
|
1169
|
+
const SSE_HEARTBEAT_INTERVAL = Env.SSE_HEARTBEAT_INTERVAL;
|
|
1119
1170
|
/**
|
|
1120
1171
|
* SSE endpoint: stream worker and monitoring events
|
|
1121
1172
|
* GET /api/workers/events
|
|
1122
1173
|
*/
|
|
1123
1174
|
const eventsStream = async (_req, res) => {
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1175
|
+
try {
|
|
1176
|
+
const raw = res.getRaw();
|
|
1177
|
+
raw.writeHead(200, {
|
|
1178
|
+
'Content-Type': 'text/event-stream',
|
|
1179
|
+
'Cache-Control': 'no-cache, no-transform',
|
|
1180
|
+
Connection: 'keep-alive',
|
|
1181
|
+
'X-Accel-Buffering': 'no',
|
|
1182
|
+
});
|
|
1183
|
+
let closed = false;
|
|
1184
|
+
const send = (payload) => {
|
|
1185
|
+
if (closed)
|
|
1186
|
+
return;
|
|
1187
|
+
try {
|
|
1188
|
+
const data = JSON.stringify(payload);
|
|
1189
|
+
raw.write(`data: ${data}\n\n`);
|
|
1190
|
+
}
|
|
1191
|
+
catch (err) {
|
|
1192
|
+
Logger.error('WorkerController.eventsStream serialization failed', err);
|
|
1193
|
+
}
|
|
1194
|
+
};
|
|
1195
|
+
// Send hello immediately
|
|
1196
|
+
send({ type: 'hello', ts: new Date().toISOString() });
|
|
1197
|
+
// Defined subscription callback
|
|
1198
|
+
const onSnapshot = (data) => {
|
|
1199
|
+
send(data);
|
|
1200
|
+
};
|
|
1201
|
+
// Subscribe to centralized service
|
|
1202
|
+
WorkerMonitoringService.subscribe(onSnapshot);
|
|
1203
|
+
// Heartbeat to keep connection alive
|
|
1204
|
+
const hb = setInterval(() => {
|
|
1205
|
+
if (!closed)
|
|
1206
|
+
raw.write(': ping\n\n');
|
|
1207
|
+
}, SSE_HEARTBEAT_INTERVAL);
|
|
1208
|
+
// Clean up when client disconnects
|
|
1209
|
+
raw.on('close', () => {
|
|
1210
|
+
closed = true;
|
|
1211
|
+
clearInterval(hb);
|
|
1212
|
+
WorkerMonitoringService.unsubscribe(onSnapshot);
|
|
1213
|
+
});
|
|
1214
|
+
}
|
|
1215
|
+
catch (error) {
|
|
1216
|
+
Logger.error('WorkerController.eventsStream failed', error);
|
|
1217
|
+
const raw = res.getRaw && typeof res.getRaw === 'function' ? res.getRaw() : null;
|
|
1218
|
+
if (!raw?.headersSent) {
|
|
1219
|
+
res.setStatus(500).json({ error: error.message });
|
|
1161
1220
|
}
|
|
1162
|
-
}
|
|
1163
|
-
// Heartbeat to keep connection alive
|
|
1164
|
-
const hb = setInterval(() => {
|
|
1165
|
-
if (!closed)
|
|
1166
|
-
raw.write(': ping\n\n');
|
|
1167
|
-
}, 15000);
|
|
1168
|
-
// Clean up when client disconnects
|
|
1169
|
-
raw.on('close', () => {
|
|
1170
|
-
closed = true;
|
|
1171
|
-
clearInterval(interval);
|
|
1172
|
-
clearInterval(hb);
|
|
1173
|
-
});
|
|
1221
|
+
}
|
|
1174
1222
|
};
|
|
1175
1223
|
/**
|
|
1176
1224
|
* Builders that group related handlers to keep the create() method small.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
type SnapshotData = {
|
|
2
|
+
type: string;
|
|
3
|
+
ts: string;
|
|
4
|
+
monitoring: unknown;
|
|
5
|
+
workers: unknown;
|
|
6
|
+
};
|
|
7
|
+
export declare const WorkerMonitoringService: Readonly<{
|
|
8
|
+
subscribe(callback: (data: SnapshotData) => void): void;
|
|
9
|
+
unsubscribe(callback: (data: SnapshotData) => void): void;
|
|
10
|
+
}>;
|
|
11
|
+
export {};
|