@zintrust/workers 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +861 -0
- package/dist/AnomalyDetection.d.ts +102 -0
- package/dist/AnomalyDetection.js +321 -0
- package/dist/AutoScaler.d.ts +127 -0
- package/dist/AutoScaler.js +425 -0
- package/dist/BroadcastWorker.d.ts +21 -0
- package/dist/BroadcastWorker.js +24 -0
- package/dist/CanaryController.d.ts +103 -0
- package/dist/CanaryController.js +380 -0
- package/dist/ChaosEngineering.d.ts +79 -0
- package/dist/ChaosEngineering.js +216 -0
- package/dist/CircuitBreaker.d.ts +106 -0
- package/dist/CircuitBreaker.js +374 -0
- package/dist/ClusterLock.d.ts +90 -0
- package/dist/ClusterLock.js +385 -0
- package/dist/ComplianceManager.d.ts +177 -0
- package/dist/ComplianceManager.js +556 -0
- package/dist/DatacenterOrchestrator.d.ts +133 -0
- package/dist/DatacenterOrchestrator.js +404 -0
- package/dist/DeadLetterQueue.d.ts +122 -0
- package/dist/DeadLetterQueue.js +539 -0
- package/dist/HealthMonitor.d.ts +42 -0
- package/dist/HealthMonitor.js +301 -0
- package/dist/MultiQueueWorker.d.ts +89 -0
- package/dist/MultiQueueWorker.js +277 -0
- package/dist/NotificationWorker.d.ts +21 -0
- package/dist/NotificationWorker.js +23 -0
- package/dist/Observability.d.ts +153 -0
- package/dist/Observability.js +530 -0
- package/dist/PluginManager.d.ts +123 -0
- package/dist/PluginManager.js +392 -0
- package/dist/PriorityQueue.d.ts +117 -0
- package/dist/PriorityQueue.js +244 -0
- package/dist/ResourceMonitor.d.ts +164 -0
- package/dist/ResourceMonitor.js +605 -0
- package/dist/SLAMonitor.d.ts +110 -0
- package/dist/SLAMonitor.js +274 -0
- package/dist/WorkerFactory.d.ts +193 -0
- package/dist/WorkerFactory.js +1507 -0
- package/dist/WorkerInit.d.ts +85 -0
- package/dist/WorkerInit.js +223 -0
- package/dist/WorkerMetrics.d.ts +114 -0
- package/dist/WorkerMetrics.js +509 -0
- package/dist/WorkerRegistry.d.ts +145 -0
- package/dist/WorkerRegistry.js +319 -0
- package/dist/WorkerShutdown.d.ts +61 -0
- package/dist/WorkerShutdown.js +159 -0
- package/dist/WorkerVersioning.d.ts +107 -0
- package/dist/WorkerVersioning.js +300 -0
- package/dist/build-manifest.json +462 -0
- package/dist/config/workerConfig.d.ts +3 -0
- package/dist/config/workerConfig.js +19 -0
- package/dist/createQueueWorker.d.ts +23 -0
- package/dist/createQueueWorker.js +113 -0
- package/dist/dashboard/index.d.ts +1 -0
- package/dist/dashboard/index.js +1 -0
- package/dist/dashboard/types.d.ts +117 -0
- package/dist/dashboard/types.js +1 -0
- package/dist/dashboard/workers-api.d.ts +4 -0
- package/dist/dashboard/workers-api.js +638 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +3 -0
- package/dist/dashboard/workers-dashboard-ui.js +1026 -0
- package/dist/dashboard/workers-dashboard.d.ts +4 -0
- package/dist/dashboard/workers-dashboard.js +904 -0
- package/dist/helper/index.d.ts +5 -0
- package/dist/helper/index.js +10 -0
- package/dist/http/WorkerApiController.d.ts +38 -0
- package/dist/http/WorkerApiController.js +312 -0
- package/dist/http/WorkerController.d.ts +374 -0
- package/dist/http/WorkerController.js +1351 -0
- package/dist/http/middleware/CustomValidation.d.ts +92 -0
- package/dist/http/middleware/CustomValidation.js +270 -0
- package/dist/http/middleware/DatacenterValidator.d.ts +3 -0
- package/dist/http/middleware/DatacenterValidator.js +94 -0
- package/dist/http/middleware/EditWorkerValidation.d.ts +7 -0
- package/dist/http/middleware/EditWorkerValidation.js +55 -0
- package/dist/http/middleware/FeaturesValidator.d.ts +3 -0
- package/dist/http/middleware/FeaturesValidator.js +60 -0
- package/dist/http/middleware/InfrastructureValidator.d.ts +31 -0
- package/dist/http/middleware/InfrastructureValidator.js +226 -0
- package/dist/http/middleware/OptionsValidator.d.ts +3 -0
- package/dist/http/middleware/OptionsValidator.js +112 -0
- package/dist/http/middleware/PayloadSanitizer.d.ts +7 -0
- package/dist/http/middleware/PayloadSanitizer.js +42 -0
- package/dist/http/middleware/ProcessorPathSanitizer.d.ts +3 -0
- package/dist/http/middleware/ProcessorPathSanitizer.js +74 -0
- package/dist/http/middleware/QueueNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/QueueNameSanitizer.js +45 -0
- package/dist/http/middleware/ValidateDriver.d.ts +7 -0
- package/dist/http/middleware/ValidateDriver.js +20 -0
- package/dist/http/middleware/VersionSanitizer.d.ts +3 -0
- package/dist/http/middleware/VersionSanitizer.js +25 -0
- package/dist/http/middleware/WorkerNameSanitizer.d.ts +3 -0
- package/dist/http/middleware/WorkerNameSanitizer.js +46 -0
- package/dist/http/middleware/WorkerValidationChain.d.ts +27 -0
- package/dist/http/middleware/WorkerValidationChain.js +185 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.js +48 -0
- package/dist/routes/workers.d.ts +12 -0
- package/dist/routes/workers.js +81 -0
- package/dist/storage/WorkerStore.d.ts +45 -0
- package/dist/storage/WorkerStore.js +195 -0
- package/dist/type.d.ts +76 -0
- package/dist/type.js +1 -0
- package/dist/ui/router/ui.d.ts +3 -0
- package/dist/ui/router/ui.js +83 -0
- package/dist/ui/types/worker-ui.d.ts +229 -0
- package/dist/ui/types/worker-ui.js +5 -0
- package/package.json +53 -0
- package/src/AnomalyDetection.ts +434 -0
- package/src/AutoScaler.ts +654 -0
- package/src/BroadcastWorker.ts +34 -0
- package/src/CanaryController.ts +531 -0
- package/src/ChaosEngineering.ts +301 -0
- package/src/CircuitBreaker.ts +495 -0
- package/src/ClusterLock.ts +499 -0
- package/src/ComplianceManager.ts +815 -0
- package/src/DatacenterOrchestrator.ts +561 -0
- package/src/DeadLetterQueue.ts +733 -0
- package/src/HealthMonitor.ts +390 -0
- package/src/MultiQueueWorker.ts +431 -0
- package/src/NotificationWorker.ts +33 -0
- package/src/Observability.ts +696 -0
- package/src/PluginManager.ts +551 -0
- package/src/PriorityQueue.ts +351 -0
- package/src/ResourceMonitor.ts +769 -0
- package/src/SLAMonitor.ts +408 -0
- package/src/WorkerFactory.ts +2108 -0
- package/src/WorkerInit.ts +313 -0
- package/src/WorkerMetrics.ts +709 -0
- package/src/WorkerRegistry.ts +443 -0
- package/src/WorkerShutdown.ts +210 -0
- package/src/WorkerVersioning.ts +422 -0
- package/src/config/workerConfig.ts +25 -0
- package/src/createQueueWorker.ts +174 -0
- package/src/dashboard/index.ts +6 -0
- package/src/dashboard/types.ts +141 -0
- package/src/dashboard/workers-api.ts +785 -0
- package/src/dashboard/zintrust.svg +30 -0
- package/src/helper/index.ts +11 -0
- package/src/http/WorkerApiController.ts +369 -0
- package/src/http/WorkerController.ts +1512 -0
- package/src/http/middleware/CustomValidation.ts +360 -0
- package/src/http/middleware/DatacenterValidator.ts +124 -0
- package/src/http/middleware/EditWorkerValidation.ts +74 -0
- package/src/http/middleware/FeaturesValidator.ts +82 -0
- package/src/http/middleware/InfrastructureValidator.ts +295 -0
- package/src/http/middleware/OptionsValidator.ts +144 -0
- package/src/http/middleware/PayloadSanitizer.ts +52 -0
- package/src/http/middleware/ProcessorPathSanitizer.ts +86 -0
- package/src/http/middleware/QueueNameSanitizer.ts +55 -0
- package/src/http/middleware/ValidateDriver.ts +29 -0
- package/src/http/middleware/VersionSanitizer.ts +30 -0
- package/src/http/middleware/WorkerNameSanitizer.ts +56 -0
- package/src/http/middleware/WorkerValidationChain.ts +230 -0
- package/src/index.ts +98 -0
- package/src/routes/workers.ts +154 -0
- package/src/storage/WorkerStore.ts +240 -0
- package/src/type.ts +89 -0
- package/src/types/queue-monitor.d.ts +38 -0
- package/src/types/queue-redis.d.ts +38 -0
- package/src/ui/README.md +13 -0
- package/src/ui/components/JsonEditor.js +670 -0
- package/src/ui/components/JsonViewer.js +387 -0
- package/src/ui/components/WorkerCard.js +178 -0
- package/src/ui/components/WorkerExpandPanel.js +257 -0
- package/src/ui/components/fetcher.js +42 -0
- package/src/ui/components/sla-scorecard.js +32 -0
- package/src/ui/components/styles.css +30 -0
- package/src/ui/components/table-expander.js +34 -0
- package/src/ui/integration/worker-ui-integration.js +565 -0
- package/src/ui/router/ui.ts +99 -0
- package/src/ui/services/workerApi.js +240 -0
- package/src/ui/types/worker-ui.ts +283 -0
- package/src/ui/utils/jsonValidator.js +444 -0
- package/src/ui/workers/index.html +202 -0
- package/src/ui/workers/main.js +1781 -0
- package/src/ui/workers/styles.css +1350 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canary Deployment Controller
|
|
3
|
+
* Gradual rollout with traffic percentage control and automatic rollback
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
import { ErrorFactory, Logger } from '@zintrust/core';
|
|
7
|
+
import { CircuitBreaker } from './CircuitBreaker';
|
|
8
|
+
// Internal state
|
|
9
|
+
const canaryDeployments = new Map();
|
|
10
|
+
const canaryTimers = new Map();
|
|
11
|
+
const MAX_HISTORY = 1000;
|
|
12
|
+
/**
|
|
13
|
+
* Helper: Calculate error rate
|
|
14
|
+
*/
|
|
15
|
+
const calculateErrorRate = (processed, errors) => {
|
|
16
|
+
if (processed === 0)
|
|
17
|
+
return 0;
|
|
18
|
+
return errors / processed;
|
|
19
|
+
};
|
|
20
|
+
/**
|
|
21
|
+
* Helper: Calculate success rate
|
|
22
|
+
*/
|
|
23
|
+
const calculateSuccessRate = (processed, errors) => {
|
|
24
|
+
if (processed === 0)
|
|
25
|
+
return 1;
|
|
26
|
+
return (processed - errors) / processed;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Helper: Should rollback based on metrics
|
|
30
|
+
*/
|
|
31
|
+
const shouldRollback = (deployment) => {
|
|
32
|
+
const { config, metrics } = deployment;
|
|
33
|
+
const { canaryVersion } = metrics;
|
|
34
|
+
// Check error threshold
|
|
35
|
+
const errorRate = calculateErrorRate(canaryVersion.processed, canaryVersion.errors);
|
|
36
|
+
if (errorRate > config.errorThreshold) {
|
|
37
|
+
return {
|
|
38
|
+
should: true,
|
|
39
|
+
reason: `Error rate ${(errorRate * 100).toFixed(2)}% exceeds threshold ${(config.errorThreshold * 100).toFixed(2)}%`,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
// Check success rate
|
|
43
|
+
const successRate = calculateSuccessRate(canaryVersion.processed, canaryVersion.errors);
|
|
44
|
+
if (successRate < config.minSuccessRate) {
|
|
45
|
+
return {
|
|
46
|
+
should: true,
|
|
47
|
+
reason: `Success rate ${(successRate * 100).toFixed(2)}% below minimum ${(config.minSuccessRate * 100).toFixed(2)}%`,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
// Check latency threshold
|
|
51
|
+
if (canaryVersion.avgLatency > config.latencyThreshold) {
|
|
52
|
+
return {
|
|
53
|
+
should: true,
|
|
54
|
+
reason: `P95 latency ${canaryVersion.avgLatency}ms exceeds threshold ${config.latencyThreshold}ms`,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
return { should: false };
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Helper: Increment traffic
|
|
61
|
+
*/
|
|
62
|
+
const incrementTraffic = (workerName) => {
|
|
63
|
+
const deployment = canaryDeployments.get(workerName);
|
|
64
|
+
if (!deployment) {
|
|
65
|
+
Logger.error('Canary deployment not found', { workerName });
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const { config } = deployment;
|
|
69
|
+
// Check if we should rollback
|
|
70
|
+
const rollbackCheck = shouldRollback(deployment);
|
|
71
|
+
if (rollbackCheck.should && config.autoRollback) {
|
|
72
|
+
Logger.warn('Auto-rollback triggered', {
|
|
73
|
+
workerName,
|
|
74
|
+
reason: rollbackCheck.reason,
|
|
75
|
+
});
|
|
76
|
+
CanaryController.rollback(workerName, rollbackCheck.reason ?? '');
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
// Increment traffic
|
|
80
|
+
const newTrafficPercent = Math.min(deployment.currentTrafficPercent + config.incrementPercent, config.targetTrafficPercent);
|
|
81
|
+
deployment.currentTrafficPercent = newTrafficPercent;
|
|
82
|
+
// Record history
|
|
83
|
+
appendHistory(deployment, {
|
|
84
|
+
timestamp: new Date(),
|
|
85
|
+
trafficPercent: newTrafficPercent,
|
|
86
|
+
stage: deployment.stage,
|
|
87
|
+
metrics: { ...deployment.metrics },
|
|
88
|
+
decision: `Traffic increased to ${newTrafficPercent}%`,
|
|
89
|
+
});
|
|
90
|
+
Logger.info('Canary traffic incremented', {
|
|
91
|
+
workerName,
|
|
92
|
+
trafficPercent: newTrafficPercent,
|
|
93
|
+
targetPercent: config.targetTrafficPercent,
|
|
94
|
+
});
|
|
95
|
+
// Check if we've reached the target
|
|
96
|
+
if (newTrafficPercent >= config.targetTrafficPercent) {
|
|
97
|
+
deployment.stage = 'monitoring';
|
|
98
|
+
// Wait for final monitoring period
|
|
99
|
+
const existingCompleteTimer = canaryTimers.get(`${workerName}:complete`);
|
|
100
|
+
if (existingCompleteTimer) {
|
|
101
|
+
clearTimeout(existingCompleteTimer);
|
|
102
|
+
canaryTimers.delete(`${workerName}:complete`);
|
|
103
|
+
}
|
|
104
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
105
|
+
const timer = setTimeout(() => {
|
|
106
|
+
CanaryController.complete(workerName);
|
|
107
|
+
}, config.monitoringDuration * 1000);
|
|
108
|
+
canaryTimers.set(`${workerName}:complete`, timer);
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
// Schedule next increment
|
|
112
|
+
const existingTimer = canaryTimers.get(workerName);
|
|
113
|
+
if (existingTimer) {
|
|
114
|
+
clearTimeout(existingTimer);
|
|
115
|
+
canaryTimers.delete(workerName);
|
|
116
|
+
}
|
|
117
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
118
|
+
const timer = setTimeout(() => {
|
|
119
|
+
incrementTraffic(workerName);
|
|
120
|
+
}, config.incrementInterval * 1000);
|
|
121
|
+
canaryTimers.set(workerName, timer);
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
const appendHistory = (deployment, entry) => {
|
|
125
|
+
deployment.history.push(entry);
|
|
126
|
+
if (deployment.history.length > MAX_HISTORY) {
|
|
127
|
+
deployment.history.shift();
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
/**
|
|
131
|
+
* Canary Deployment Controller - Sealed namespace
|
|
132
|
+
*/
|
|
133
|
+
export const CanaryController = Object.freeze({
|
|
134
|
+
/**
|
|
135
|
+
* Start canary deployment
|
|
136
|
+
*/
|
|
137
|
+
start(config) {
|
|
138
|
+
const { workerName } = config;
|
|
139
|
+
if (canaryDeployments.has(workerName)) {
|
|
140
|
+
throw ErrorFactory.createGeneralError(`Canary deployment already in progress for "${workerName}"`);
|
|
141
|
+
}
|
|
142
|
+
// Validate config
|
|
143
|
+
if (config.initialTrafficPercent < 0 || config.initialTrafficPercent > 100) {
|
|
144
|
+
throw ErrorFactory.createValidationError('Initial traffic percent must be between 0 and 100');
|
|
145
|
+
}
|
|
146
|
+
if (config.targetTrafficPercent < config.initialTrafficPercent ||
|
|
147
|
+
config.targetTrafficPercent > 100) {
|
|
148
|
+
throw ErrorFactory.createValidationError('Target traffic percent must be >= initial and <= 100');
|
|
149
|
+
}
|
|
150
|
+
// Create deployment
|
|
151
|
+
const deployment = {
|
|
152
|
+
config,
|
|
153
|
+
currentTrafficPercent: config.initialTrafficPercent,
|
|
154
|
+
stage: 'initial',
|
|
155
|
+
startedAt: new Date(),
|
|
156
|
+
metrics: {
|
|
157
|
+
currentVersion: { processed: 0, errors: 0, avgLatency: 0 },
|
|
158
|
+
canaryVersion: { processed: 0, errors: 0, avgLatency: 0 },
|
|
159
|
+
},
|
|
160
|
+
history: [],
|
|
161
|
+
};
|
|
162
|
+
canaryDeployments.set(workerName, deployment);
|
|
163
|
+
Logger.info('Canary deployment started', {
|
|
164
|
+
workerName,
|
|
165
|
+
currentVersion: config.currentVersion,
|
|
166
|
+
canaryVersion: config.canaryVersion,
|
|
167
|
+
initialTraffic: config.initialTrafficPercent,
|
|
168
|
+
});
|
|
169
|
+
// Start ramping up
|
|
170
|
+
deployment.stage = 'ramping';
|
|
171
|
+
// Schedule first increment
|
|
172
|
+
const existingTimer = canaryTimers.get(workerName);
|
|
173
|
+
if (existingTimer) {
|
|
174
|
+
clearTimeout(existingTimer);
|
|
175
|
+
canaryTimers.delete(workerName);
|
|
176
|
+
}
|
|
177
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
178
|
+
const timer = setTimeout(() => {
|
|
179
|
+
incrementTraffic(workerName);
|
|
180
|
+
}, config.monitoringDuration * 1000);
|
|
181
|
+
canaryTimers.set(workerName, timer);
|
|
182
|
+
},
|
|
183
|
+
/**
|
|
184
|
+
* Pause canary deployment
|
|
185
|
+
*/
|
|
186
|
+
pause(workerName) {
|
|
187
|
+
const deployment = canaryDeployments.get(workerName);
|
|
188
|
+
if (!deployment) {
|
|
189
|
+
throw ErrorFactory.createNotFoundError(`Canary deployment not found for "${workerName}"`);
|
|
190
|
+
}
|
|
191
|
+
// Clear timer
|
|
192
|
+
const timer = canaryTimers.get(workerName);
|
|
193
|
+
if (timer) {
|
|
194
|
+
clearTimeout(timer);
|
|
195
|
+
canaryTimers.delete(workerName);
|
|
196
|
+
}
|
|
197
|
+
Logger.info('Canary deployment paused', { workerName });
|
|
198
|
+
},
|
|
199
|
+
/**
|
|
200
|
+
* Resume canary deployment
|
|
201
|
+
*/
|
|
202
|
+
resume(workerName) {
|
|
203
|
+
const deployment = canaryDeployments.get(workerName);
|
|
204
|
+
if (!deployment) {
|
|
205
|
+
throw ErrorFactory.createNotFoundError(`Canary deployment not found for "${workerName}"`);
|
|
206
|
+
}
|
|
207
|
+
// Resume incrementing
|
|
208
|
+
const existingTimer = canaryTimers.get(workerName);
|
|
209
|
+
if (existingTimer) {
|
|
210
|
+
clearTimeout(existingTimer);
|
|
211
|
+
canaryTimers.delete(workerName);
|
|
212
|
+
}
|
|
213
|
+
// eslint-disable-next-line no-restricted-syntax
|
|
214
|
+
const timer = setTimeout(() => {
|
|
215
|
+
incrementTraffic(workerName);
|
|
216
|
+
}, deployment.config.incrementInterval * 1000);
|
|
217
|
+
canaryTimers.set(workerName, timer);
|
|
218
|
+
Logger.info('Canary deployment resumed', { workerName });
|
|
219
|
+
},
|
|
220
|
+
/**
|
|
221
|
+
* Complete canary deployment
|
|
222
|
+
*/
|
|
223
|
+
complete(workerName) {
|
|
224
|
+
const deployment = canaryDeployments.get(workerName);
|
|
225
|
+
if (!deployment) {
|
|
226
|
+
throw ErrorFactory.createNotFoundError(`Canary deployment not found for "${workerName}"`);
|
|
227
|
+
}
|
|
228
|
+
deployment.stage = 'completed';
|
|
229
|
+
deployment.completedAt = new Date();
|
|
230
|
+
// Clear timers
|
|
231
|
+
const timer = canaryTimers.get(workerName);
|
|
232
|
+
if (timer) {
|
|
233
|
+
clearTimeout(timer);
|
|
234
|
+
canaryTimers.delete(workerName);
|
|
235
|
+
}
|
|
236
|
+
const completeTimer = canaryTimers.get(`${workerName}:complete`);
|
|
237
|
+
if (completeTimer) {
|
|
238
|
+
clearTimeout(completeTimer);
|
|
239
|
+
canaryTimers.delete(`${workerName}:complete`);
|
|
240
|
+
}
|
|
241
|
+
// Record completion in history
|
|
242
|
+
appendHistory(deployment, {
|
|
243
|
+
timestamp: new Date(),
|
|
244
|
+
trafficPercent: deployment.currentTrafficPercent,
|
|
245
|
+
stage: 'completed',
|
|
246
|
+
metrics: { ...deployment.metrics },
|
|
247
|
+
decision: 'Deployment completed successfully',
|
|
248
|
+
});
|
|
249
|
+
Logger.info('Canary deployment completed', {
|
|
250
|
+
workerName,
|
|
251
|
+
duration: deployment.completedAt.getTime() - deployment.startedAt.getTime(),
|
|
252
|
+
});
|
|
253
|
+
},
|
|
254
|
+
/**
|
|
255
|
+
* Rollback canary deployment
|
|
256
|
+
*/
|
|
257
|
+
rollback(workerName, reason) {
|
|
258
|
+
const deployment = canaryDeployments.get(workerName);
|
|
259
|
+
if (!deployment) {
|
|
260
|
+
throw ErrorFactory.createNotFoundError(`Canary deployment not found for "${workerName}"`);
|
|
261
|
+
}
|
|
262
|
+
deployment.stage = 'rolling-back';
|
|
263
|
+
// Clear timers
|
|
264
|
+
const timer = canaryTimers.get(workerName);
|
|
265
|
+
if (timer) {
|
|
266
|
+
clearTimeout(timer);
|
|
267
|
+
canaryTimers.delete(workerName);
|
|
268
|
+
}
|
|
269
|
+
// Roll back traffic to 0%
|
|
270
|
+
deployment.currentTrafficPercent = 0;
|
|
271
|
+
// Record rollback in history
|
|
272
|
+
appendHistory(deployment, {
|
|
273
|
+
timestamp: new Date(),
|
|
274
|
+
trafficPercent: 0,
|
|
275
|
+
stage: 'rolling-back',
|
|
276
|
+
metrics: { ...deployment.metrics },
|
|
277
|
+
decision: `Rollback initiated: ${reason}`,
|
|
278
|
+
});
|
|
279
|
+
deployment.stage = 'failed';
|
|
280
|
+
deployment.completedAt = new Date();
|
|
281
|
+
Logger.error('Canary deployment rolled back', { workerName, reason });
|
|
282
|
+
// Optional: Open circuit breaker for canary version
|
|
283
|
+
CircuitBreaker.forceOpen(workerName, deployment.config.canaryVersion, reason);
|
|
284
|
+
},
|
|
285
|
+
/**
|
|
286
|
+
* Get canary deployment status
|
|
287
|
+
*/
|
|
288
|
+
getStatus(workerName) {
|
|
289
|
+
const deployment = canaryDeployments.get(workerName);
|
|
290
|
+
return deployment ? { ...deployment } : null;
|
|
291
|
+
},
|
|
292
|
+
/**
|
|
293
|
+
* Update metrics for canary deployment
|
|
294
|
+
*/
|
|
295
|
+
updateMetrics(workerName, version, processed, errors, avgLatency) {
|
|
296
|
+
const deployment = canaryDeployments.get(workerName);
|
|
297
|
+
if (!deployment) {
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
if (version === deployment.config.currentVersion) {
|
|
301
|
+
deployment.metrics.currentVersion = { processed, errors, avgLatency };
|
|
302
|
+
}
|
|
303
|
+
else if (version === deployment.config.canaryVersion) {
|
|
304
|
+
deployment.metrics.canaryVersion = { processed, errors, avgLatency };
|
|
305
|
+
}
|
|
306
|
+
},
|
|
307
|
+
/**
|
|
308
|
+
* Route job to version based on traffic percentage
|
|
309
|
+
*/
|
|
310
|
+
routeJob(workerName) {
|
|
311
|
+
const deployment = canaryDeployments.get(workerName);
|
|
312
|
+
if (!deployment || deployment.stage === 'completed' || deployment.stage === 'failed') {
|
|
313
|
+
return null; // No active canary
|
|
314
|
+
}
|
|
315
|
+
// Random routing based on traffic percentage
|
|
316
|
+
const random = Math.random() * 100; // NOSONAR
|
|
317
|
+
if (random < deployment.currentTrafficPercent) {
|
|
318
|
+
return deployment.config.canaryVersion;
|
|
319
|
+
}
|
|
320
|
+
return deployment.config.currentVersion;
|
|
321
|
+
},
|
|
322
|
+
/**
|
|
323
|
+
* List all canary deployments
|
|
324
|
+
*/
|
|
325
|
+
listDeployments() {
|
|
326
|
+
return Array.from(canaryDeployments.keys());
|
|
327
|
+
},
|
|
328
|
+
/**
|
|
329
|
+
* Get deployment history
|
|
330
|
+
*/
|
|
331
|
+
getHistory(workerName) {
|
|
332
|
+
const deployment = canaryDeployments.get(workerName);
|
|
333
|
+
return deployment ? [...deployment.history] : null;
|
|
334
|
+
},
|
|
335
|
+
/**
|
|
336
|
+
* Remove completed/failed deployment
|
|
337
|
+
*/
|
|
338
|
+
remove(workerName) {
|
|
339
|
+
const deployment = canaryDeployments.get(workerName);
|
|
340
|
+
if (!deployment) {
|
|
341
|
+
throw ErrorFactory.createNotFoundError(`Canary deployment not found for "${workerName}"`);
|
|
342
|
+
}
|
|
343
|
+
if (deployment.stage !== 'completed' && deployment.stage !== 'failed') {
|
|
344
|
+
throw ErrorFactory.createValidationError('Cannot remove active deployment. Pause or complete it first.');
|
|
345
|
+
}
|
|
346
|
+
canaryDeployments.delete(workerName);
|
|
347
|
+
Logger.info('Canary deployment removed', { workerName });
|
|
348
|
+
},
|
|
349
|
+
/**
|
|
350
|
+
* Purge deployment data (force cleanup)
|
|
351
|
+
*/
|
|
352
|
+
purge(workerName) {
|
|
353
|
+
const timer = canaryTimers.get(workerName);
|
|
354
|
+
if (timer) {
|
|
355
|
+
clearTimeout(timer);
|
|
356
|
+
canaryTimers.delete(workerName);
|
|
357
|
+
}
|
|
358
|
+
const completionTimer = canaryTimers.get(`${workerName}:complete`);
|
|
359
|
+
if (completionTimer) {
|
|
360
|
+
clearTimeout(completionTimer);
|
|
361
|
+
canaryTimers.delete(`${workerName}:complete`);
|
|
362
|
+
}
|
|
363
|
+
canaryDeployments.delete(workerName);
|
|
364
|
+
Logger.info('Canary deployment purged', { workerName });
|
|
365
|
+
},
|
|
366
|
+
/**
|
|
367
|
+
* Shutdown all canary deployments
|
|
368
|
+
*/
|
|
369
|
+
shutdown() {
|
|
370
|
+
Logger.info('CanaryController shutting down...');
|
|
371
|
+
// Clear all timers
|
|
372
|
+
for (const timer of canaryTimers.values()) {
|
|
373
|
+
clearTimeout(timer);
|
|
374
|
+
}
|
|
375
|
+
canaryTimers.clear();
|
|
376
|
+
canaryDeployments.clear();
|
|
377
|
+
Logger.info('CanaryController shutdown complete');
|
|
378
|
+
},
|
|
379
|
+
});
|
|
380
|
+
// Graceful shutdown handled by WorkerShutdown
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chaos Engineering
|
|
3
|
+
* Controlled fault injection experiments for worker resilience testing
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
export interface IChaosExperiment {
|
|
7
|
+
name: string;
|
|
8
|
+
description: string;
|
|
9
|
+
target: {
|
|
10
|
+
workers?: string[];
|
|
11
|
+
percentage?: number;
|
|
12
|
+
};
|
|
13
|
+
failure: {
|
|
14
|
+
type: 'crash' | 'latency' | 'error' | 'resource';
|
|
15
|
+
config: unknown;
|
|
16
|
+
};
|
|
17
|
+
duration: number;
|
|
18
|
+
safetyLimits: {
|
|
19
|
+
maxConcurrent: number;
|
|
20
|
+
circuitBreaker: boolean;
|
|
21
|
+
rollbackOn: string[];
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
export type IChaosStatus = {
|
|
25
|
+
id: string;
|
|
26
|
+
name: string;
|
|
27
|
+
state: 'defined' | 'running' | 'completed' | 'stopped' | 'failed';
|
|
28
|
+
startedAt: Date | null;
|
|
29
|
+
endedAt: Date | null;
|
|
30
|
+
targetedWorkers: string[];
|
|
31
|
+
failureType: IChaosExperiment['failure']['type'];
|
|
32
|
+
};
|
|
33
|
+
export type IChaosReport = {
|
|
34
|
+
experimentId: string;
|
|
35
|
+
summary: string;
|
|
36
|
+
durationMs: number;
|
|
37
|
+
targetedWorkers: string[];
|
|
38
|
+
completedAt: Date | null;
|
|
39
|
+
};
|
|
40
|
+
export type IChaosComparison = {
|
|
41
|
+
comparedIds: string[];
|
|
42
|
+
running: number;
|
|
43
|
+
completed: number;
|
|
44
|
+
stopped: number;
|
|
45
|
+
};
|
|
46
|
+
/**
|
|
47
|
+
* Chaos Engineering - Sealed namespace
|
|
48
|
+
*/
|
|
49
|
+
export declare const ChaosEngineering: Readonly<{
|
|
50
|
+
/**
|
|
51
|
+
* Define a chaos experiment
|
|
52
|
+
*/
|
|
53
|
+
defineExperiment(config: IChaosExperiment): string;
|
|
54
|
+
/**
|
|
55
|
+
* Start a chaos experiment
|
|
56
|
+
*/
|
|
57
|
+
startExperiment(experimentId: string): Promise<void>;
|
|
58
|
+
/**
|
|
59
|
+
* Stop a chaos experiment
|
|
60
|
+
*/
|
|
61
|
+
stopExperiment(experimentId: string): Promise<void>;
|
|
62
|
+
/**
|
|
63
|
+
* Get experiment status
|
|
64
|
+
*/
|
|
65
|
+
getExperimentStatus(experimentId: string): IChaosStatus;
|
|
66
|
+
/**
|
|
67
|
+
* Failure injection helpers
|
|
68
|
+
*/
|
|
69
|
+
injectCrash(workerName: string): void;
|
|
70
|
+
injectLatency(workerName: string, delayMs: number): void;
|
|
71
|
+
injectError(workerName: string, errorRate: number): void;
|
|
72
|
+
injectResourceExhaustion(workerName: string, type: "cpu" | "memory"): void;
|
|
73
|
+
/**
|
|
74
|
+
* Analysis helpers
|
|
75
|
+
*/
|
|
76
|
+
analyzeResilience(experimentId: string): IChaosReport;
|
|
77
|
+
compareExperiments(ids: string[]): IChaosComparison;
|
|
78
|
+
}>;
|
|
79
|
+
export default ChaosEngineering;
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chaos Engineering
|
|
3
|
+
* Controlled fault injection experiments for worker resilience testing
|
|
4
|
+
* Sealed namespace for immutability
|
|
5
|
+
*/
|
|
6
|
+
import { ErrorFactory, Logger, generateUuid } from '@zintrust/core';
|
|
7
|
+
import { CircuitBreaker } from './CircuitBreaker';
|
|
8
|
+
import { Observability } from './Observability';
|
|
9
|
+
import { ResourceMonitor } from './ResourceMonitor';
|
|
10
|
+
import { WorkerRegistry } from './WorkerRegistry';
|
|
11
|
+
const experiments = new Map();
|
|
12
|
+
const getTargetWorkers = (config) => {
|
|
13
|
+
const candidates = config.target.workers ?? WorkerRegistry.listRunning();
|
|
14
|
+
if (candidates.length === 0)
|
|
15
|
+
return [];
|
|
16
|
+
const percentage = config.target.percentage ?? 100;
|
|
17
|
+
if (percentage >= 100)
|
|
18
|
+
return [...candidates];
|
|
19
|
+
const count = Math.max(1, Math.floor((candidates.length * percentage) / 100));
|
|
20
|
+
return candidates.slice(0, count);
|
|
21
|
+
};
|
|
22
|
+
const applyFailure = async (config, workers) => {
|
|
23
|
+
switch (config.failure.type) {
|
|
24
|
+
case 'crash':
|
|
25
|
+
await Promise.all(workers.map(async (workerName) => {
|
|
26
|
+
try {
|
|
27
|
+
await WorkerRegistry.stop(workerName);
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
Logger.error(`Failed to inject crash for ${workerName}`, error);
|
|
31
|
+
}
|
|
32
|
+
}));
|
|
33
|
+
break;
|
|
34
|
+
case 'latency':
|
|
35
|
+
workers.forEach((workerName) => {
|
|
36
|
+
Logger.warn(`Injected latency for ${workerName}`, { config: config.failure.config });
|
|
37
|
+
});
|
|
38
|
+
break;
|
|
39
|
+
case 'error':
|
|
40
|
+
workers.forEach((workerName) => {
|
|
41
|
+
const status = WorkerRegistry.status(workerName);
|
|
42
|
+
if (status) {
|
|
43
|
+
CircuitBreaker.forceOpen(workerName, status.version, 'Chaos experiment error injection');
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
break;
|
|
47
|
+
case 'resource':
|
|
48
|
+
workers.forEach((workerName) => {
|
|
49
|
+
const usage = ResourceMonitor.getCurrentUsage(workerName);
|
|
50
|
+
Logger.warn(`Resource pressure simulated for ${workerName}`, {
|
|
51
|
+
cpu: usage.cpu,
|
|
52
|
+
memory: usage.memory.percent,
|
|
53
|
+
config: config.failure.config,
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
if (Observability.isEnabled()) {
|
|
59
|
+
workers.forEach((workerName) => {
|
|
60
|
+
Observability.recordJobMetrics(workerName, 'chaos', {
|
|
61
|
+
processed: 0,
|
|
62
|
+
failed: 0,
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
const rollbackFailure = (config, workers) => {
|
|
68
|
+
if (!config.safetyLimits.circuitBreaker)
|
|
69
|
+
return;
|
|
70
|
+
if (config.failure.type === 'error') {
|
|
71
|
+
workers.forEach((workerName) => {
|
|
72
|
+
const status = WorkerRegistry.status(workerName);
|
|
73
|
+
if (status) {
|
|
74
|
+
CircuitBreaker.reset(workerName, status.version);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Chaos Engineering - Sealed namespace
|
|
81
|
+
*/
|
|
82
|
+
export const ChaosEngineering = Object.freeze({
|
|
83
|
+
/**
|
|
84
|
+
* Define a chaos experiment
|
|
85
|
+
*/
|
|
86
|
+
defineExperiment(config) {
|
|
87
|
+
const id = generateUuid();
|
|
88
|
+
const status = {
|
|
89
|
+
id,
|
|
90
|
+
name: config.name,
|
|
91
|
+
state: 'defined',
|
|
92
|
+
startedAt: null,
|
|
93
|
+
endedAt: null,
|
|
94
|
+
targetedWorkers: [],
|
|
95
|
+
failureType: config.failure.type,
|
|
96
|
+
};
|
|
97
|
+
experiments.set(id, { config, status, timer: null });
|
|
98
|
+
Logger.info(`Chaos experiment defined: ${config.name}`, { id });
|
|
99
|
+
return id;
|
|
100
|
+
},
|
|
101
|
+
/**
|
|
102
|
+
* Start a chaos experiment
|
|
103
|
+
*/
|
|
104
|
+
async startExperiment(experimentId) {
|
|
105
|
+
const record = experiments.get(experimentId);
|
|
106
|
+
if (!record) {
|
|
107
|
+
throw ErrorFactory.createNotFoundError(`Chaos experiment not found: ${experimentId}`);
|
|
108
|
+
}
|
|
109
|
+
if (record.status.state === 'running') {
|
|
110
|
+
throw ErrorFactory.createWorkerError(`Chaos experiment already running: ${experimentId}`);
|
|
111
|
+
}
|
|
112
|
+
const runningCount = Array.from(experiments.values()).filter((exp) => exp.status.state === 'running').length;
|
|
113
|
+
if (runningCount >= record.config.safetyLimits.maxConcurrent) {
|
|
114
|
+
throw ErrorFactory.createWorkerError('Maximum concurrent chaos experiments reached');
|
|
115
|
+
}
|
|
116
|
+
const targets = getTargetWorkers(record.config);
|
|
117
|
+
record.status.state = 'running';
|
|
118
|
+
record.status.startedAt = new Date();
|
|
119
|
+
record.status.targetedWorkers = targets;
|
|
120
|
+
Logger.warn(`Chaos experiment started: ${record.config.name}`, {
|
|
121
|
+
id: experimentId,
|
|
122
|
+
targets,
|
|
123
|
+
});
|
|
124
|
+
await applyFailure(record.config, targets);
|
|
125
|
+
record.timer = globalThis.setTimeout(() => {
|
|
126
|
+
const currentTimer = record.timer;
|
|
127
|
+
record.timer = null;
|
|
128
|
+
ChaosEngineering.stopExperiment(experimentId).catch((error) => {
|
|
129
|
+
Logger.error('Failed to stop chaos experiment after duration', error);
|
|
130
|
+
if (currentTimer)
|
|
131
|
+
clearTimeout(currentTimer);
|
|
132
|
+
});
|
|
133
|
+
}, record.config.duration);
|
|
134
|
+
},
|
|
135
|
+
/**
|
|
136
|
+
* Stop a chaos experiment
|
|
137
|
+
*/
|
|
138
|
+
async stopExperiment(experimentId) {
|
|
139
|
+
const record = experiments.get(experimentId);
|
|
140
|
+
if (!record) {
|
|
141
|
+
throw ErrorFactory.createNotFoundError(`Chaos experiment not found: ${experimentId}`);
|
|
142
|
+
}
|
|
143
|
+
if (record.timer) {
|
|
144
|
+
clearTimeout(record.timer);
|
|
145
|
+
record.timer = null;
|
|
146
|
+
}
|
|
147
|
+
rollbackFailure(record.config, record.status.targetedWorkers);
|
|
148
|
+
record.status.state = 'completed';
|
|
149
|
+
record.status.endedAt = new Date();
|
|
150
|
+
Logger.info(`Chaos experiment completed: ${record.config.name}`, {
|
|
151
|
+
id: experimentId,
|
|
152
|
+
duration: record.config.duration,
|
|
153
|
+
});
|
|
154
|
+
},
|
|
155
|
+
/**
|
|
156
|
+
* Get experiment status
|
|
157
|
+
*/
|
|
158
|
+
getExperimentStatus(experimentId) {
|
|
159
|
+
const record = experiments.get(experimentId);
|
|
160
|
+
if (!record) {
|
|
161
|
+
throw ErrorFactory.createNotFoundError(`Chaos experiment not found: ${experimentId}`);
|
|
162
|
+
}
|
|
163
|
+
return { ...record.status, targetedWorkers: [...record.status.targetedWorkers] };
|
|
164
|
+
},
|
|
165
|
+
/**
|
|
166
|
+
* Failure injection helpers
|
|
167
|
+
*/
|
|
168
|
+
injectCrash(workerName) {
|
|
169
|
+
WorkerRegistry.stop(workerName).catch((error) => {
|
|
170
|
+
Logger.error(`Failed to inject crash for ${workerName}`, error);
|
|
171
|
+
});
|
|
172
|
+
},
|
|
173
|
+
injectLatency(workerName, delayMs) {
|
|
174
|
+
Logger.warn(`Injected latency ${delayMs}ms for ${workerName}`);
|
|
175
|
+
},
|
|
176
|
+
injectError(workerName, errorRate) {
|
|
177
|
+
const status = WorkerRegistry.status(workerName);
|
|
178
|
+
if (!status) {
|
|
179
|
+
throw ErrorFactory.createNotFoundError(`Worker not found: ${workerName}`);
|
|
180
|
+
}
|
|
181
|
+
CircuitBreaker.forceOpen(workerName, status.version, `Chaos error rate ${errorRate}`);
|
|
182
|
+
},
|
|
183
|
+
injectResourceExhaustion(workerName, type) {
|
|
184
|
+
const usage = ResourceMonitor.getCurrentUsage(workerName);
|
|
185
|
+
Logger.warn(`Injected resource exhaustion (${type}) for ${workerName}`, {
|
|
186
|
+
cpu: usage.cpu,
|
|
187
|
+
memory: usage.memory.percent,
|
|
188
|
+
});
|
|
189
|
+
},
|
|
190
|
+
/**
|
|
191
|
+
* Analysis helpers
|
|
192
|
+
*/
|
|
193
|
+
analyzeResilience(experimentId) {
|
|
194
|
+
const record = experiments.get(experimentId);
|
|
195
|
+
if (!record) {
|
|
196
|
+
throw ErrorFactory.createNotFoundError(`Chaos experiment not found: ${experimentId}`);
|
|
197
|
+
}
|
|
198
|
+
return {
|
|
199
|
+
experimentId,
|
|
200
|
+
summary: `Experiment ${record.config.name} finished with ${record.status.state}`,
|
|
201
|
+
durationMs: record.config.duration,
|
|
202
|
+
targetedWorkers: [...record.status.targetedWorkers],
|
|
203
|
+
completedAt: record.status.endedAt,
|
|
204
|
+
};
|
|
205
|
+
},
|
|
206
|
+
compareExperiments(ids) {
|
|
207
|
+
const statuses = ids.map((id) => experiments.get(id)?.status).filter(Boolean);
|
|
208
|
+
return {
|
|
209
|
+
comparedIds: ids,
|
|
210
|
+
running: statuses.filter((s) => s.state === 'running').length,
|
|
211
|
+
completed: statuses.filter((s) => s.state === 'completed').length,
|
|
212
|
+
stopped: statuses.filter((s) => s.state === 'stopped').length,
|
|
213
|
+
};
|
|
214
|
+
},
|
|
215
|
+
});
|
|
216
|
+
export default ChaosEngineering;
|