@zintrust/workers 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +861 -0
- package/dist/AnomalyDetection.d.ts +102 -0
- package/dist/AnomalyDetection.js +321 -0
- package/dist/AutoScaler.d.ts +127 -0
- package/dist/AutoScaler.js +425 -0
- package/dist/BroadcastWorker.d.ts +21 -0
- package/dist/BroadcastWorker.js +24 -0
- package/dist/CanaryController.d.ts +103 -0
- package/dist/CanaryController.js +380 -0
- package/dist/ChaosEngineering.d.ts +79 -0
- package/dist/ChaosEngineering.js +216 -0
- package/dist/CircuitBreaker.d.ts +106 -0
- package/dist/CircuitBreaker.js +374 -0
- package/dist/ClusterLock.d.ts +90 -0
- package/dist/ClusterLock.js +385 -0
- package/dist/ComplianceManager.d.ts +177 -0
- package/dist/ComplianceManager.js +556 -0
- package/dist/DatacenterOrchestrator.d.ts +133 -0
- package/dist/DatacenterOrchestrator.js +404 -0
- package/dist/DeadLetterQueue.d.ts +122 -0
- package/dist/DeadLetterQueue.js +539 -0
- package/dist/HealthMonitor.d.ts +42 -0
- package/dist/HealthMonitor.js +301 -0
- package/dist/MultiQueueWorker.d.ts +89 -0
- package/dist/MultiQueueWorker.js +277 -0
- package/dist/NotificationWorker.d.ts +21 -0
- package/dist/NotificationWorker.js +23 -0
- package/dist/Observability.d.ts +153 -0
- package/dist/Observability.js +530 -0
- package/dist/PluginManager.d.ts +123 -0
- package/dist/PluginManager.js +392 -0
- package/dist/PriorityQueue.d.ts +117 -0
- package/dist/PriorityQueue.js +244 -0
- package/dist/ResourceMonitor.d.ts +164 -0
- package/dist/ResourceMonitor.js +605 -0
- package/dist/SLAMonitor.d.ts +110 -0
- package/dist/SLAMonitor.js +274 -0
- package/dist/WorkerFactory.d.ts +193 -0
- package/dist/WorkerFactory.js +1507 -0
- package/dist/WorkerInit.d.ts +85 -0
- package/dist/WorkerInit.js +223 -0
- package/dist/WorkerMetrics.d.ts +114 -0
- package/dist/WorkerMetrics.js +509 -0
- package/dist/WorkerRegistry.d.ts +145 -0
- package/dist/WorkerRegistry.js +319 -0
- package/dist/WorkerShutdown.d.ts +61 -0
- package/dist/WorkerShutdown.js +159 -0
- package/dist/WorkerVersioning.d.ts +107 -0
- package/dist/WorkerVersioning.js +300 -0
- package/dist/build-manifest.json +462 -0
- package/dist/config/workerConfig.d.ts +3 -0
- package/dist/config/workerConfig.js +19 -0
- package/dist/createQueueWorker.d.ts +23 -0
- package/dist/createQueueWorker.js +113 -0
- package/dist/dashboard/index.d.ts +1 -0
- package/dist/dashboard/index.js +1 -0
- package/dist/dashboard/types.d.ts +117 -0
- package/dist/dashboard/types.js +1 -0
- package/dist/dashboard/workers-api.d.ts +4 -0
- package/dist/dashboard/workers-api.js +638 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
- package/dist/dashboard/workers-dashboard-ui.js +1026 -0
- package/dist/dashboard/workers-dashboard.d.ts +4 -0
- package/dist/dashboard/workers-dashboard.js +904 -0
- package/dist/helper/index.d.ts +5 -0
- package/dist/helper/index.js +10 -0
- package/dist/http/WorkerApiController.d.ts +38 -0
- package/dist/http/WorkerApiController.js +312 -0
- package/dist/http/WorkerController.d.ts +374 -0
- package/dist/http/WorkerController.js +1351 -0
- package/dist/http/middleware/CustomValidation.d.ts +92 -0
- package/dist/http/middleware/CustomValidation.js +270 -0
- package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
- package/dist/http/middleware/DatacenterValidator.js +94 -0
- package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
- package/dist/http/middleware/EditWorkerValidation.js +55 -0
- package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
- package/dist/http/middleware/FeaturesValidator.js +60 -0
- package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
- package/dist/http/middleware/InfrastructureValidator.js +226 -0
- package/dist/http/middleware/OptionsValidator.d.ts +3 -0
- package/dist/http/middleware/OptionsValidator.js +112 -0
- package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
- package/dist/http/middleware/PayloadSanitizer.js +42 -0
- package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
- package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
- package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/QueueNameSanitizer.js +45 -0
- package/dist/http/middleware/ValidateDriver.d.ts +7 -0
- package/dist/http/middleware/ValidateDriver.js +20 -0
- package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
- package/dist/http/middleware/VersionSanitizer.js +25 -0
- package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
- package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
- package/dist/http/middleware/WorkerValidationChain.js +185 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.js +48 -0
- package/dist/routes/workers.d.ts +12 -0
- package/dist/routes/workers.js +81 -0
- package/dist/storage/WorkerStore.d.ts +45 -0
- package/dist/storage/WorkerStore.js +195 -0
- package/dist/type.d.ts +76 -0
- package/dist/type.js +1 -0
- package/dist/ui/router/ui.d.ts +3 -0
- package/dist/ui/router/ui.js +83 -0
- package/dist/ui/types/worker-ui.d.ts +229 -0
- package/dist/ui/types/worker-ui.js +5 -0
- package/package.json +53 -0
- package/src/AnomalyDetection.ts +434 -0
- package/src/AutoScaler.ts +654 -0
- package/src/BroadcastWorker.ts +34 -0
- package/src/CanaryController.ts +531 -0
- package/src/ChaosEngineering.ts +301 -0
- package/src/CircuitBreaker.ts +495 -0
- package/src/ClusterLock.ts +499 -0
- package/src/ComplianceManager.ts +815 -0
- package/src/DatacenterOrchestrator.ts +561 -0
- package/src/DeadLetterQueue.ts +733 -0
- package/src/HealthMonitor.ts +390 -0
- package/src/MultiQueueWorker.ts +431 -0
- package/src/NotificationWorker.ts +33 -0
- package/src/Observability.ts +696 -0
- package/src/PluginManager.ts +551 -0
- package/src/PriorityQueue.ts +351 -0
- package/src/ResourceMonitor.ts +769 -0
- package/src/SLAMonitor.ts +408 -0
- package/src/WorkerFactory.ts +2108 -0
- package/src/WorkerInit.ts +313 -0
- package/src/WorkerMetrics.ts +709 -0
- package/src/WorkerRegistry.ts +443 -0
- package/src/WorkerShutdown.ts +210 -0
- package/src/WorkerVersioning.ts +422 -0
- package/src/config/workerConfig.ts +25 -0
- package/src/createQueueWorker.ts +174 -0
- package/src/dashboard/index.ts +6 -0
- package/src/dashboard/types.ts +141 -0
- package/src/dashboard/workers-api.ts +785 -0
- package/src/dashboard/zintrust.svg +30 -0
- package/src/helper/index.ts +11 -0
- package/src/http/WorkerApiController.ts +369 -0
- package/src/http/WorkerController.ts +1512 -0
- package/src/http/middleware/CustomValidation.ts +360 -0
- package/src/http/middleware/DatacenterValidator.ts +124 -0
- package/src/http/middleware/EditWorkerValidation.ts +74 -0
- package/src/http/middleware/FeaturesValidator.ts +82 -0
- package/src/http/middleware/InfrastructureValidator.ts +295 -0
- package/src/http/middleware/OptionsValidator.ts +144 -0
- package/src/http/middleware/PayloadSanitizer.ts +52 -0
- package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
- package/src/http/middleware/QueueNameSanitizer.ts +55 -0
- package/src/http/middleware/ValidateDriver.ts +29 -0
- package/src/http/middleware/VersionSanitizer.ts +30 -0
- package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
- package/src/http/middleware/WorkerValidationChain.ts +230 -0
- package/src/index.ts +98 -0
- package/src/routes/workers.ts +154 -0
- package/src/storage/WorkerStore.ts +240 -0
- package/src/type.ts +89 -0
- package/src/types/queue-monitor.d.ts +38 -0
- package/src/types/queue-redis.d.ts +38 -0
- package/src/ui/README.md +13 -0
- package/src/ui/components/JsonEditor.js +670 -0
- package/src/ui/components/JsonViewer.js +387 -0
- package/src/ui/components/WorkerCard.js +178 -0
- package/src/ui/components/WorkerExpandPanel.js +257 -0
- package/src/ui/components/fetcher.js +42 -0
- package/src/ui/components/sla-scorecard.js +32 -0
- package/src/ui/components/styles.css +30 -0
- package/src/ui/components/table-expander.js +34 -0
- package/src/ui/integration/worker-ui-integration.js +565 -0
- package/src/ui/router/ui.ts +99 -0
- package/src/ui/services/workerApi.js +240 -0
- package/src/ui/types/worker-ui.ts +283 -0
- package/src/ui/utils/jsonValidator.js +444 -0
- package/src/ui/workers/index.html +202 -0
- package/src/ui/workers/main.js +1781 -0
- package/src/ui/workers/styles.css +1350 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SLA Monitor
|
|
3
|
+
* SLA compliance checks and violation tracking for workers
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
import { ErrorFactory, Logger, generateUuid } from '@zintrust/core';
|
|
7
|
+
import { HealthMonitor } from './HealthMonitor';
|
|
8
|
+
import { WorkerMetrics } from './WorkerMetrics';
|
|
9
|
+
const slaConfigs = new Map();
|
|
10
|
+
const violationHistory = new Map();
|
|
11
|
+
const lastAlertAt = new Map();
|
|
12
|
+
const DEFAULT_LOOKBACK_MS = 60 * 60 * 1000;
|
|
13
|
+
const quantile = (points, percentile) => {
|
|
14
|
+
if (points.length === 0)
|
|
15
|
+
return 0;
|
|
16
|
+
const sorted = [...points].sort((a, b) => a.value - b.value);
|
|
17
|
+
const index = Math.min(sorted.length - 1, Math.max(0, Math.floor(percentile * sorted.length)));
|
|
18
|
+
return sorted[index]?.value ?? 0;
|
|
19
|
+
};
|
|
20
|
+
const warnOnMaxThreshold = (value, threshold) => value >= threshold * 0.9 && value <= threshold;
|
|
21
|
+
const warnOnMinThreshold = (value, threshold) => value <= threshold * 1.1 && value >= threshold;
|
|
22
|
+
const evaluateMaxThreshold = (value, threshold) => {
|
|
23
|
+
if (value > threshold)
|
|
24
|
+
return 'fail';
|
|
25
|
+
if (warnOnMaxThreshold(value, threshold))
|
|
26
|
+
return 'warn';
|
|
27
|
+
return 'pass';
|
|
28
|
+
};
|
|
29
|
+
const evaluateMinThreshold = (value, threshold) => {
|
|
30
|
+
if (value < threshold)
|
|
31
|
+
return 'fail';
|
|
32
|
+
if (warnOnMinThreshold(value, threshold))
|
|
33
|
+
return 'warn';
|
|
34
|
+
return 'pass';
|
|
35
|
+
};
|
|
36
|
+
const storeViolation = (violation) => {
|
|
37
|
+
const history = violationHistory.get(violation.workerName) ?? [];
|
|
38
|
+
history.push(violation);
|
|
39
|
+
if (history.length > 1000) {
|
|
40
|
+
history.shift();
|
|
41
|
+
}
|
|
42
|
+
violationHistory.set(violation.workerName, history);
|
|
43
|
+
};
|
|
44
|
+
const calculateAvailability = (checks) => {
|
|
45
|
+
if (checks.length === 0)
|
|
46
|
+
return 0;
|
|
47
|
+
const upCount = checks.filter((check) => check.status === 'healthy' || check.status === 'degraded').length;
|
|
48
|
+
return (upCount / checks.length) * 100;
|
|
49
|
+
};
|
|
50
|
+
const calculateRecoveryTime = (checks) => {
|
|
51
|
+
if (checks.length < 2)
|
|
52
|
+
return 0;
|
|
53
|
+
const sorted = [...checks].sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
|
54
|
+
let currentDownAt = null;
|
|
55
|
+
let maxRecoverySeconds = 0;
|
|
56
|
+
for (const check of sorted) {
|
|
57
|
+
const isDown = check.status === 'critical';
|
|
58
|
+
if (isDown && currentDownAt === null) {
|
|
59
|
+
currentDownAt = check.timestamp;
|
|
60
|
+
}
|
|
61
|
+
if (!isDown && currentDownAt !== null) {
|
|
62
|
+
const recoverySeconds = (check.timestamp.getTime() - currentDownAt.getTime()) / 1000;
|
|
63
|
+
maxRecoverySeconds = Math.max(maxRecoverySeconds, recoverySeconds);
|
|
64
|
+
currentDownAt = null;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return Math.round(maxRecoverySeconds);
|
|
68
|
+
};
|
|
69
|
+
const loadHealthChecks = async (workerName, range) => {
|
|
70
|
+
const history = HealthMonitor.getHealthHistory(workerName);
|
|
71
|
+
const filtered = history.filter((check) => check.timestamp.getTime() >= range.start.getTime() &&
|
|
72
|
+
check.timestamp.getTime() <= range.end.getTime());
|
|
73
|
+
if (filtered.length > 0)
|
|
74
|
+
return filtered;
|
|
75
|
+
try {
|
|
76
|
+
const current = HealthMonitor.getCurrentHealth(workerName);
|
|
77
|
+
return current ? [current] : [];
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
Logger.debug('Failed to get current health for SLA check', error);
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
const getMetricPoints = async (workerName, metricType, range) => {
|
|
85
|
+
const entry = await WorkerMetrics.query({
|
|
86
|
+
workerName,
|
|
87
|
+
metricType,
|
|
88
|
+
granularity: 'hourly',
|
|
89
|
+
startDate: range.start,
|
|
90
|
+
endDate: range.end,
|
|
91
|
+
});
|
|
92
|
+
return entry.points;
|
|
93
|
+
};
|
|
94
|
+
const collectSlaMetrics = async (workerName, range) => {
|
|
95
|
+
const [durationPoints, processedAgg, errorAgg, healthChecks] = await Promise.all([
|
|
96
|
+
getMetricPoints(workerName, 'duration', range),
|
|
97
|
+
WorkerMetrics.aggregate({
|
|
98
|
+
workerName,
|
|
99
|
+
metricType: 'processed',
|
|
100
|
+
granularity: 'hourly',
|
|
101
|
+
startDate: range.start,
|
|
102
|
+
endDate: range.end,
|
|
103
|
+
}),
|
|
104
|
+
WorkerMetrics.aggregate({
|
|
105
|
+
workerName,
|
|
106
|
+
metricType: 'errors',
|
|
107
|
+
granularity: 'hourly',
|
|
108
|
+
startDate: range.start,
|
|
109
|
+
endDate: range.end,
|
|
110
|
+
}),
|
|
111
|
+
loadHealthChecks(workerName, range),
|
|
112
|
+
]);
|
|
113
|
+
const latencyP99 = quantile(durationPoints, 0.99);
|
|
114
|
+
const minutes = Math.max(1, (range.end.getTime() - range.start.getTime()) / 60000);
|
|
115
|
+
const throughput = processedAgg.total / minutes;
|
|
116
|
+
const errorRate = processedAgg.total > 0 ? (errorAgg.total / processedAgg.total) * 100 : 0;
|
|
117
|
+
const availability = calculateAvailability(healthChecks);
|
|
118
|
+
const recoveryTime = calculateRecoveryTime(healthChecks);
|
|
119
|
+
return { latencyP99, throughput, availability, errorRate, recoveryTime };
|
|
120
|
+
};
|
|
121
|
+
const buildChecks = (metrics, config) => ({
|
|
122
|
+
latencyP99: {
|
|
123
|
+
value: metrics.latencyP99,
|
|
124
|
+
threshold: config.metrics.maxLatencyP99,
|
|
125
|
+
status: evaluateMaxThreshold(metrics.latencyP99, config.metrics.maxLatencyP99),
|
|
126
|
+
},
|
|
127
|
+
throughput: {
|
|
128
|
+
value: metrics.throughput,
|
|
129
|
+
threshold: config.metrics.minThroughput,
|
|
130
|
+
status: evaluateMinThreshold(metrics.throughput, config.metrics.minThroughput),
|
|
131
|
+
},
|
|
132
|
+
availability: {
|
|
133
|
+
value: metrics.availability,
|
|
134
|
+
threshold: config.metrics.minAvailability,
|
|
135
|
+
status: evaluateMinThreshold(metrics.availability, config.metrics.minAvailability),
|
|
136
|
+
},
|
|
137
|
+
errorRate: {
|
|
138
|
+
value: metrics.errorRate,
|
|
139
|
+
threshold: config.metrics.maxErrorRate,
|
|
140
|
+
status: evaluateMaxThreshold(metrics.errorRate, config.metrics.maxErrorRate),
|
|
141
|
+
},
|
|
142
|
+
recoveryTime: {
|
|
143
|
+
value: metrics.recoveryTime,
|
|
144
|
+
threshold: config.metrics.maxRecoveryTime,
|
|
145
|
+
status: evaluateMaxThreshold(metrics.recoveryTime, config.metrics.maxRecoveryTime),
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
const buildSlaStatus = (checks) => {
|
|
149
|
+
const hasFailures = Object.values(checks).some((check) => check.status === 'fail');
|
|
150
|
+
if (hasFailures)
|
|
151
|
+
return 'breach';
|
|
152
|
+
const hasWarnings = Object.values(checks).some((check) => check.status === 'warn');
|
|
153
|
+
return hasWarnings ? 'warning' : 'compliant';
|
|
154
|
+
};
|
|
155
|
+
const buildViolation = (params) => ({
|
|
156
|
+
id: generateUuid(),
|
|
157
|
+
workerName: params.workerName,
|
|
158
|
+
metric: params.metric,
|
|
159
|
+
expected: params.expected,
|
|
160
|
+
actual: params.actual,
|
|
161
|
+
timestamp: new Date(),
|
|
162
|
+
severity: params.severity,
|
|
163
|
+
message: params.message,
|
|
164
|
+
});
|
|
165
|
+
/**
|
|
166
|
+
* SLA Monitor - Sealed namespace
|
|
167
|
+
*/
|
|
168
|
+
export const SLAMonitor = Object.freeze({
|
|
169
|
+
/**
|
|
170
|
+
* Define SLA for a worker
|
|
171
|
+
*/
|
|
172
|
+
defineSLA(config) {
|
|
173
|
+
slaConfigs.set(config.workerName, { ...config });
|
|
174
|
+
Logger.info(`SLA defined for worker "${config.workerName}"`);
|
|
175
|
+
},
|
|
176
|
+
/**
|
|
177
|
+
* Check SLA compliance for a worker
|
|
178
|
+
*/
|
|
179
|
+
async checkCompliance(workerName) {
|
|
180
|
+
const config = slaConfigs.get(workerName);
|
|
181
|
+
if (!config) {
|
|
182
|
+
throw ErrorFactory.createNotFoundError(`SLA config not found for worker "${workerName}"`);
|
|
183
|
+
}
|
|
184
|
+
const range = {
|
|
185
|
+
start: new Date(Date.now() - DEFAULT_LOOKBACK_MS),
|
|
186
|
+
end: new Date(),
|
|
187
|
+
};
|
|
188
|
+
const metrics = await collectSlaMetrics(workerName, range);
|
|
189
|
+
const checks = buildChecks(metrics, config);
|
|
190
|
+
const status = buildSlaStatus(checks);
|
|
191
|
+
Object.entries(checks)
|
|
192
|
+
.filter(([, detail]) => detail.status === 'fail')
|
|
193
|
+
.forEach(([metricKey, detail]) => {
|
|
194
|
+
const violation = buildViolation({
|
|
195
|
+
workerName,
|
|
196
|
+
metric: metricKey,
|
|
197
|
+
expected: detail.threshold,
|
|
198
|
+
actual: detail.value,
|
|
199
|
+
severity: 'critical',
|
|
200
|
+
message: `SLA breach for ${metricKey}: ${detail.value} (expected ${detail.threshold})`,
|
|
201
|
+
});
|
|
202
|
+
storeViolation(violation);
|
|
203
|
+
SLAMonitor.alertOnViolation(violation);
|
|
204
|
+
});
|
|
205
|
+
return {
|
|
206
|
+
workerName,
|
|
207
|
+
status,
|
|
208
|
+
evaluatedAt: new Date(),
|
|
209
|
+
checks,
|
|
210
|
+
};
|
|
211
|
+
},
|
|
212
|
+
/**
|
|
213
|
+
* Get SLA violations for a worker
|
|
214
|
+
*/
|
|
215
|
+
getViolations(workerName, timeRange) {
|
|
216
|
+
const history = violationHistory.get(workerName) ?? [];
|
|
217
|
+
return history.filter((violation) => violation.timestamp.getTime() >= timeRange.start.getTime() &&
|
|
218
|
+
violation.timestamp.getTime() <= timeRange.end.getTime());
|
|
219
|
+
},
|
|
220
|
+
/**
|
|
221
|
+
* Get SLA compliance report
|
|
222
|
+
*/
|
|
223
|
+
getComplianceReport(timeRange) {
|
|
224
|
+
const violations = [];
|
|
225
|
+
const perWorker = [];
|
|
226
|
+
for (const [workerName] of slaConfigs.entries()) {
|
|
227
|
+
const workerViolations = SLAMonitor.getViolations(workerName, timeRange);
|
|
228
|
+
violations.push(...workerViolations);
|
|
229
|
+
const totalChecks = slaConfigs.size * 5;
|
|
230
|
+
const breachCount = workerViolations.length;
|
|
231
|
+
const complianceRate = totalChecks > 0 ? (totalChecks - breachCount) / totalChecks : 1;
|
|
232
|
+
perWorker.push({
|
|
233
|
+
workerName,
|
|
234
|
+
violations: breachCount,
|
|
235
|
+
complianceRate,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
const totalChecks = slaConfigs.size * 5;
|
|
239
|
+
const totalViolations = violations.length;
|
|
240
|
+
const complianceRate = totalChecks > 0 ? (totalChecks - totalViolations) / totalChecks : 1;
|
|
241
|
+
return {
|
|
242
|
+
generatedAt: new Date(),
|
|
243
|
+
period: timeRange,
|
|
244
|
+
totalWorkers: slaConfigs.size,
|
|
245
|
+
totalChecks,
|
|
246
|
+
totalViolations,
|
|
247
|
+
complianceRate,
|
|
248
|
+
violations,
|
|
249
|
+
perWorker,
|
|
250
|
+
};
|
|
251
|
+
},
|
|
252
|
+
/**
|
|
253
|
+
* Alert on SLA violation
|
|
254
|
+
*/
|
|
255
|
+
alertOnViolation(violation) {
|
|
256
|
+
const config = slaConfigs.get(violation.workerName);
|
|
257
|
+
if (!config)
|
|
258
|
+
return;
|
|
259
|
+
const lastAlert = lastAlertAt.get(violation.workerName);
|
|
260
|
+
const cooldownMs = config.alerting.cooldown * 60 * 1000;
|
|
261
|
+
if (lastAlert && Date.now() - lastAlert.getTime() < cooldownMs) {
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
lastAlertAt.set(violation.workerName, new Date());
|
|
265
|
+
Logger.warn(`SLA violation for ${violation.workerName}`, {
|
|
266
|
+
metric: violation.metric,
|
|
267
|
+
expected: violation.expected,
|
|
268
|
+
actual: violation.actual,
|
|
269
|
+
severity: violation.severity,
|
|
270
|
+
channels: config.alerting.channels,
|
|
271
|
+
});
|
|
272
|
+
},
|
|
273
|
+
});
|
|
274
|
+
export default SLAMonitor;
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker Factory
|
|
3
|
+
* Central factory for creating workers with all advanced features
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
import { type IDatabase, type RedisConfig } from '@zintrust/core';
|
|
7
|
+
import { Worker, type Job, type WorkerOptions } from 'bullmq';
|
|
8
|
+
import { type AutoScalerConfig } from './AutoScaler';
|
|
9
|
+
import { type ComplianceConfig } from './ComplianceManager';
|
|
10
|
+
import { type RetentionPolicy } from './DeadLetterQueue';
|
|
11
|
+
import { type ObservabilityConfig } from './Observability';
|
|
12
|
+
import { type WorkerRecord } from './storage/WorkerStore';
|
|
13
|
+
export declare const WorkerCreationStatus: {
|
|
14
|
+
readonly CREATING: "creating";
|
|
15
|
+
readonly CONNECTING: "connecting";
|
|
16
|
+
readonly STARTING: "starting";
|
|
17
|
+
readonly RUNNING: "running";
|
|
18
|
+
readonly FAILED: "failed";
|
|
19
|
+
readonly STOPPED: "stopped";
|
|
20
|
+
};
|
|
21
|
+
export type WorkerCreationStatus = (typeof WorkerCreationStatus)[keyof typeof WorkerCreationStatus];
|
|
22
|
+
export type WorkerFactoryConfig = {
|
|
23
|
+
name: string;
|
|
24
|
+
version?: string;
|
|
25
|
+
queueName: string;
|
|
26
|
+
processor: (job: Job) => Promise<unknown>;
|
|
27
|
+
processorPath?: string;
|
|
28
|
+
options?: WorkerOptions;
|
|
29
|
+
autoStart?: boolean;
|
|
30
|
+
infrastructure?: {
|
|
31
|
+
redis?: RedisConfigInput;
|
|
32
|
+
persistence?: WorkerPersistenceConfig;
|
|
33
|
+
deadLetterQueue?: {
|
|
34
|
+
redis?: RedisConfigInput;
|
|
35
|
+
policy: RetentionPolicy;
|
|
36
|
+
};
|
|
37
|
+
compliance?: {
|
|
38
|
+
redis?: RedisConfigInput;
|
|
39
|
+
config?: Partial<ComplianceConfig>;
|
|
40
|
+
};
|
|
41
|
+
observability?: ObservabilityConfigInput;
|
|
42
|
+
autoScaler?: AutoScalerConfig;
|
|
43
|
+
};
|
|
44
|
+
features?: {
|
|
45
|
+
clustering?: boolean;
|
|
46
|
+
metrics?: boolean;
|
|
47
|
+
autoScaling?: boolean;
|
|
48
|
+
circuitBreaker?: boolean;
|
|
49
|
+
deadLetterQueue?: boolean;
|
|
50
|
+
resourceMonitoring?: boolean;
|
|
51
|
+
compliance?: boolean;
|
|
52
|
+
observability?: boolean;
|
|
53
|
+
plugins?: boolean;
|
|
54
|
+
versioning?: boolean;
|
|
55
|
+
datacenterOrchestration?: boolean;
|
|
56
|
+
};
|
|
57
|
+
datacenter?: {
|
|
58
|
+
primaryRegion: string;
|
|
59
|
+
secondaryRegions?: string[];
|
|
60
|
+
affinityRules?: {
|
|
61
|
+
preferLocal?: boolean;
|
|
62
|
+
maxLatency?: number;
|
|
63
|
+
avoidRegions?: string[];
|
|
64
|
+
};
|
|
65
|
+
};
|
|
66
|
+
};
|
|
67
|
+
export type WorkerInstance = {
|
|
68
|
+
worker: Worker;
|
|
69
|
+
config: WorkerFactoryConfig;
|
|
70
|
+
startedAt: Date;
|
|
71
|
+
status: WorkerCreationStatus;
|
|
72
|
+
lastHealthCheck?: Date;
|
|
73
|
+
connectionState?: 'disconnected' | 'connecting' | 'connected' | 'error';
|
|
74
|
+
};
|
|
75
|
+
type RedisEnvConfig = {
|
|
76
|
+
env: true;
|
|
77
|
+
host?: string;
|
|
78
|
+
port?: string;
|
|
79
|
+
password?: string;
|
|
80
|
+
db?: string;
|
|
81
|
+
};
|
|
82
|
+
type RedisConfigInput = RedisConfig | RedisEnvConfig;
|
|
83
|
+
export type WorkerPersistenceConfig = {
|
|
84
|
+
driver: 'memory';
|
|
85
|
+
} | {
|
|
86
|
+
driver: 'redis';
|
|
87
|
+
redis?: RedisConfigInput;
|
|
88
|
+
keyPrefix?: string;
|
|
89
|
+
} | {
|
|
90
|
+
driver: 'database';
|
|
91
|
+
client?: IDatabase | string;
|
|
92
|
+
connection?: string;
|
|
93
|
+
table?: string;
|
|
94
|
+
};
|
|
95
|
+
type ObservabilityConfigInput = ObservabilityConfig | {
|
|
96
|
+
enabled?: boolean;
|
|
97
|
+
prometheus?: Partial<ObservabilityConfig['prometheus']>;
|
|
98
|
+
openTelemetry?: Partial<ObservabilityConfig['openTelemetry']>;
|
|
99
|
+
datadog?: Partial<ObservabilityConfig['datadog']>;
|
|
100
|
+
};
|
|
101
|
+
type ProcessorResolver = (name: string) => WorkerFactoryConfig['processor'] | undefined | Promise<WorkerFactoryConfig['processor'] | undefined>;
|
|
102
|
+
/**
|
|
103
|
+
* Worker Factory - Sealed namespace
|
|
104
|
+
*/
|
|
105
|
+
export declare const WorkerFactory: Readonly<{
|
|
106
|
+
registerProcessor: (name: string, processor: WorkerFactoryConfig["processor"]) => void;
|
|
107
|
+
registerProcessors: (processors: Record<string, WorkerFactoryConfig["processor"]>) => void;
|
|
108
|
+
registerProcessorPaths: (paths: Record<string, string>) => void;
|
|
109
|
+
registerProcessorResolver: (resolver: ProcessorResolver) => void;
|
|
110
|
+
resolveProcessorPath: (modulePath: string) => Promise<WorkerFactoryConfig["processor"] | undefined>;
|
|
111
|
+
/**
|
|
112
|
+
* Create new worker with full setup
|
|
113
|
+
*/
|
|
114
|
+
create(config: WorkerFactoryConfig): Promise<Worker>;
|
|
115
|
+
/**
|
|
116
|
+
* Get worker instance
|
|
117
|
+
*/
|
|
118
|
+
get(name: string): WorkerInstance | null;
|
|
119
|
+
/**
|
|
120
|
+
* Update worker status directly (used by HealthMonitor)
|
|
121
|
+
*/
|
|
122
|
+
updateStatus(name: string, status: string, error?: Error | string): Promise<void>;
|
|
123
|
+
/**
|
|
124
|
+
* Stop worker
|
|
125
|
+
*/
|
|
126
|
+
stop(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
127
|
+
/**
|
|
128
|
+
* Restart worker
|
|
129
|
+
*/
|
|
130
|
+
restart(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
131
|
+
/**
|
|
132
|
+
* Pause worker
|
|
133
|
+
*/
|
|
134
|
+
pause(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
135
|
+
/**
|
|
136
|
+
* Resume worker
|
|
137
|
+
*/
|
|
138
|
+
resume(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
139
|
+
/**
|
|
140
|
+
* Update auto-start for persisted worker
|
|
141
|
+
*/
|
|
142
|
+
setAutoStart(name: string, autoStart: boolean, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
143
|
+
/**
|
|
144
|
+
* Update persisted worker record and in-memory config if running.
|
|
145
|
+
*/
|
|
146
|
+
update(name: string, patch: Partial<WorkerRecord> | WorkerRecord, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
147
|
+
/**
|
|
148
|
+
* Start worker
|
|
149
|
+
*/
|
|
150
|
+
start(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
151
|
+
/**
|
|
152
|
+
* List all workers
|
|
153
|
+
*/
|
|
154
|
+
list(): string[];
|
|
155
|
+
/**
|
|
156
|
+
* List all persisted workers
|
|
157
|
+
*/
|
|
158
|
+
listPersisted(persistenceOverride?: WorkerPersistenceConfig, options?: {
|
|
159
|
+
offset?: number;
|
|
160
|
+
limit?: number;
|
|
161
|
+
search?: string;
|
|
162
|
+
}): Promise<string[]>;
|
|
163
|
+
listPersistedRecords(persistenceOverride?: WorkerPersistenceConfig, options?: {
|
|
164
|
+
offset?: number;
|
|
165
|
+
limit?: number;
|
|
166
|
+
search?: string;
|
|
167
|
+
}): Promise<WorkerRecord[]>;
|
|
168
|
+
/**
|
|
169
|
+
* Start a worker from persisted storage when it is not registered.
|
|
170
|
+
*/
|
|
171
|
+
startFromPersisted(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
172
|
+
/**
|
|
173
|
+
* Get persisted worker record
|
|
174
|
+
*/
|
|
175
|
+
getPersisted(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<WorkerRecord | null>;
|
|
176
|
+
/**
|
|
177
|
+
* Remove worker
|
|
178
|
+
*/
|
|
179
|
+
remove(name: string, persistenceOverride?: WorkerPersistenceConfig): Promise<void>;
|
|
180
|
+
/**
|
|
181
|
+
* Get worker metrics
|
|
182
|
+
*/
|
|
183
|
+
getMetrics(name: string): Promise<unknown>;
|
|
184
|
+
/**
|
|
185
|
+
* Get worker health
|
|
186
|
+
*/
|
|
187
|
+
getHealth(name: string): Promise<unknown>;
|
|
188
|
+
/**
|
|
189
|
+
* Shutdown all workers
|
|
190
|
+
*/
|
|
191
|
+
shutdown(): Promise<void>;
|
|
192
|
+
}>;
|
|
193
|
+
export {};
|