codecruise 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +111 -0
- package/bin/codecruise.js +68 -0
- package/config/CLAUDE.md +107 -0
- package/config/agents/analyst.md +48 -0
- package/config/agents/architect-reviewer.md +161 -0
- package/config/agents/architect.md +119 -0
- package/config/agents/critic.md +63 -0
- package/config/agents/developer.md +96 -0
- package/config/agents/devops.md +81 -0
- package/config/agents/orchestrator.md +91 -0
- package/config/agents/planner.md +139 -0
- package/config/agents/retro.md +52 -0
- package/config/agents/reviewer.md +101 -0
- package/config/agents/security-reviewer.md +57 -0
- package/config/agents/stack/expo/AGENT.md +473 -0
- package/config/agents/stack/expo/rules/critical.md +427 -0
- package/config/agents/stack/expo/rules/native.md +455 -0
- package/config/agents/stack/expo/rules/navigation.md +445 -0
- package/config/agents/stack/expo/rules/performance.md +415 -0
- package/config/agents/stack/fastify/AGENT.md +397 -0
- package/config/agents/stack/fastify/rules/api-design.md +283 -0
- package/config/agents/stack/fastify/rules/critical.md +232 -0
- package/config/agents/stack/fastify/rules/queues.md +303 -0
- package/config/agents/stack/fastify/rules/security.md +384 -0
- package/config/agents/stack/index.yaml +48 -0
- package/config/agents/stack/nextjs/AGENT.md +421 -0
- package/config/agents/stack/nextjs/rules/components.md +413 -0
- package/config/agents/stack/nextjs/rules/critical.md +391 -0
- package/config/agents/stack/nextjs/rules/performance.md +403 -0
- package/config/agents/stack/nextjs/rules/styling.md +334 -0
- package/config/agents/stack/shared-ts/AGENT.md +384 -0
- package/config/agents/stack/shared-ts/rules/critical.md +315 -0
- package/config/agents/stack/shared-ts/rules/patterns.md +384 -0
- package/config/agents/stack/shared-ts/rules/zod.md +427 -0
- package/config/agents/tester.md +79 -0
- package/config/commands/architect-discuss.md +366 -0
- package/config/commands/architect-list.md +160 -0
- package/config/commands/architect-review.md +111 -0
- package/config/commands/architect.md +118 -0
- package/config/commands/compact.md +118 -0
- package/config/commands/companion.md +279 -0
- package/config/commands/dashboard.md +152 -0
- package/config/commands/doctor.md +227 -0
- package/config/commands/dogfood-report.md +101 -0
- package/config/commands/flags/run-autonomous.md +110 -0
- package/config/commands/flags/run-pause.md +80 -0
- package/config/commands/ingest.md +173 -0
- package/config/commands/init.md +128 -0
- package/config/commands/metrics.md +87 -0
- package/config/commands/parallel.md +320 -0
- package/config/commands/pause.md +55 -0
- package/config/commands/plan-review.md +130 -0
- package/config/commands/plan.md +216 -0
- package/config/commands/production-check.md +308 -0
- package/config/commands/refine.md +323 -0
- package/config/commands/resume.md +72 -0
- package/config/commands/retro.md +121 -0
- package/config/commands/retry.md +75 -0
- package/config/commands/role.md +310 -0
- package/config/commands/run.md +417 -0
- package/config/commands/scope.md +85 -0
- package/config/commands/setup-permissions.md +104 -0
- package/config/commands/skip.md +75 -0
- package/config/commands/spec-forge.md +213 -0
- package/config/commands/spec-help.md +194 -0
- package/config/commands/spec-patch.md +342 -0
- package/config/commands/spec-resolve.md +110 -0
- package/config/commands/spec-review.md +153 -0
- package/config/commands/status.md +114 -0
- package/config/commands/sync.md +131 -0
- package/config/commands/task.md +138 -0
- package/config/commands/verify.md +124 -0
- package/config/hooks/README.md +632 -0
- package/config/hooks/activity-log.sh +187 -0
- package/config/hooks/anti-rationalize.sh +52 -0
- package/config/hooks/capture-verification.sh +112 -0
- package/config/hooks/collect-metrics.sh +135 -0
- package/config/hooks/enforce-file-scope.sh +75 -0
- package/config/hooks/enforce-state-machine.sh +161 -0
- package/config/hooks/enforce-tdd.sh +180 -0
- package/config/hooks/format.sh +40 -0
- package/config/hooks/lib/activity-helpers.sh +162 -0
- package/config/hooks/lib/read-settings.sh +71 -0
- package/config/hooks/load-context-skills.sh +95 -0
- package/config/hooks/notify.sh +81 -0
- package/config/hooks/pre-commit.sample +35 -0
- package/config/hooks/protect-files.sh +63 -0
- package/config/hooks/track-agents.sh +41 -0
- package/config/hooks/track-commands.sh +37 -0
- package/config/hooks/track-enforcement.sh +44 -0
- package/config/hooks/track-ooda.sh +77 -0
- package/config/hooks/validate-commit-msg.sh +35 -0
- package/config/hooks/validate-plan.sh +213 -0
- package/config/hooks/verify-criteria.sh +46 -0
- package/config/hooks/verify-todo-completion.sh +140 -0
- package/config/rules/comments.md +25 -0
- package/config/rules/decision-rules.md +308 -0
- package/config/rules/hygiene.md +247 -0
- package/config/rules/pattern-detection.md +372 -0
- package/config/rules/profiles.md +193 -0
- package/config/rules/recovery.md +83 -0
- package/config/rules/scope-detection.md +213 -0
- package/config/rules/standards.md +127 -0
- package/config/rules/workflow.md +121 -0
- package/config/schemas.md +767 -0
- package/config/settings.json +195 -0
- package/config/skills/backend/SKILL.md +734 -0
- package/config/skills/database/SKILL.md +426 -0
- package/config/skills/frontend/SKILL.md +434 -0
- package/config/skills/git/SKILL.md +396 -0
- package/config/skills/index.yaml +36 -0
- package/config/skills/observability/SKILL.md +430 -0
- package/config/skills/package-dev/SKILL.md +498 -0
- package/config/skills/performance/SKILL.md +378 -0
- package/config/skills/resilience/SKILL.md +573 -0
- package/config/skills/testing/SKILL.md +398 -0
- package/config/skills/testing-patterns/SKILL.md +276 -0
- package/config/skills/typescript/SKILL.md +152 -0
- package/config/templates/CLAUDE.md +70 -0
- package/config/templates/README.md +117 -0
- package/config/templates/steering/adr-template.md +102 -0
- package/config/templates/steering/product.md +60 -0
- package/config/templates/steering/rfc-template.md +159 -0
- package/config/templates/steering/structure.md +146 -0
- package/config/templates/steering/tech.md +85 -0
- package/package.json +40 -0
- package/src/install.js +163 -0
- package/src/report.js +310 -0
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: resilience-patterns
|
|
3
|
+
description: Fault tolerance - circuit breakers, retries, timeouts, bulkheads, graceful degradation
|
|
4
|
+
keywords: [resilience, circuit breaker, retry, timeout, bulkhead, fault tolerance, graceful degradation]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Resilience Patterns
|
|
8
|
+
|
|
9
|
+
Fault tolerance, retries, and graceful degradation for production systems.
|
|
10
|
+
|
|
11
|
+
## Core Principles
|
|
12
|
+
|
|
13
|
+
1. **Fail fast, recover gracefully**
|
|
14
|
+
2. **Isolate failures** — one failing dependency shouldn't bring down the system
|
|
15
|
+
3. **Always have a fallback**
|
|
16
|
+
4. **Make failures visible** — log, metric, alert
|
|
17
|
+
|
|
18
|
+
## Circuit Breaker
|
|
19
|
+
|
|
20
|
+
Prevent cascading failures by stopping calls to failing services.
|
|
21
|
+
|
|
22
|
+
### States
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
CLOSED → (failures exceed threshold) → OPEN
|
|
26
|
+
↓
|
|
27
|
+
OPEN → (wait period expires) → HALF-OPEN
|
|
28
|
+
↓
|
|
29
|
+
HALF-OPEN → (success) → CLOSED
|
|
30
|
+
→ (failure) → OPEN
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Implementation
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
enum CircuitState {
|
|
37
|
+
CLOSED = 'CLOSED',
|
|
38
|
+
OPEN = 'OPEN',
|
|
39
|
+
HALF_OPEN = 'HALF_OPEN',
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface CircuitBreakerOptions {
|
|
43
|
+
failureThreshold: number; // Failures to trip circuit
|
|
44
|
+
successThreshold: number; // Successes to close circuit
|
|
45
|
+
timeout: number; // Time in OPEN state (ms)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
class CircuitBreaker {
|
|
49
|
+
private state: CircuitState = CircuitState.CLOSED;
|
|
50
|
+
private failureCount = 0;
|
|
51
|
+
private successCount = 0;
|
|
52
|
+
private lastFailureTime = 0;
|
|
53
|
+
|
|
54
|
+
constructor(
|
|
55
|
+
private name: string,
|
|
56
|
+
private options: CircuitBreakerOptions = {
|
|
57
|
+
failureThreshold: 5,
|
|
58
|
+
successThreshold: 3,
|
|
59
|
+
timeout: 30000,
|
|
60
|
+
}
|
|
61
|
+
) {}
|
|
62
|
+
|
|
63
|
+
async execute<T>(operation: () => Promise<T>): Promise<T> {
|
|
64
|
+
if (this.state === CircuitState.OPEN) {
|
|
65
|
+
if (Date.now() - this.lastFailureTime >= this.options.timeout) {
|
|
66
|
+
this.state = CircuitState.HALF_OPEN;
|
|
67
|
+
this.successCount = 0;
|
|
68
|
+
} else {
|
|
69
|
+
throw new CircuitOpenError(this.name);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
const result = await operation();
|
|
75
|
+
this.onSuccess();
|
|
76
|
+
return result;
|
|
77
|
+
} catch (error) {
|
|
78
|
+
this.onFailure();
|
|
79
|
+
throw error;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
private onSuccess(): void {
|
|
84
|
+
this.failureCount = 0;
|
|
85
|
+
|
|
86
|
+
if (this.state === CircuitState.HALF_OPEN) {
|
|
87
|
+
this.successCount++;
|
|
88
|
+
if (this.successCount >= this.options.successThreshold) {
|
|
89
|
+
this.state = CircuitState.CLOSED;
|
|
90
|
+
logger.info('Circuit closed', { circuit: this.name });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
private onFailure(): void {
|
|
96
|
+
this.failureCount++;
|
|
97
|
+
this.lastFailureTime = Date.now();
|
|
98
|
+
|
|
99
|
+
if (this.state === CircuitState.HALF_OPEN) {
|
|
100
|
+
this.state = CircuitState.OPEN;
|
|
101
|
+
logger.warn('Circuit reopened', { circuit: this.name });
|
|
102
|
+
} else if (this.failureCount >= this.options.failureThreshold) {
|
|
103
|
+
this.state = CircuitState.OPEN;
|
|
104
|
+
logger.warn('Circuit opened', {
|
|
105
|
+
circuit: this.name,
|
|
106
|
+
failures: this.failureCount
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
getState(): CircuitState {
|
|
112
|
+
return this.state;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
class CircuitOpenError extends AppError {
|
|
117
|
+
constructor(circuitName: string) {
|
|
118
|
+
super('CIRCUIT_OPEN', `Circuit ${circuitName} is open`, 503);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Usage
|
|
124
|
+
|
|
125
|
+
```typescript
|
|
126
|
+
// Create circuit breaker per external dependency
|
|
127
|
+
const paymentCircuit = new CircuitBreaker('payment-service', {
|
|
128
|
+
failureThreshold: 5,
|
|
129
|
+
successThreshold: 3,
|
|
130
|
+
timeout: 30000,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
async function processPayment(order: Order): Promise<PaymentResult> {
|
|
134
|
+
return paymentCircuit.execute(async () => {
|
|
135
|
+
return paymentService.charge(order);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Retry with Backoff
|
|
141
|
+
|
|
142
|
+
### Configuration
|
|
143
|
+
|
|
144
|
+
| Operation | Timeout | Max Retries | Backoff |
|
|
145
|
+
|-----------|---------|-------------|---------|
|
|
146
|
+
| HTTP (external) | 10s | 3 | Exponential |
|
|
147
|
+
| HTTP (internal) | 5s | 2 | Linear |
|
|
148
|
+
| Database | 5s | 2 | Linear |
|
|
149
|
+
| Cache | 1s | 1 | None |
|
|
150
|
+
| Message queue | 30s | 5 | Exponential |
|
|
151
|
+
|
|
152
|
+
### Implementation
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
interface RetryOptions {
|
|
156
|
+
maxRetries: number;
|
|
157
|
+
baseDelay: number; // ms
|
|
158
|
+
maxDelay: number; // ms
|
|
159
|
+
backoff: 'linear' | 'exponential' | 'none';
|
|
160
|
+
retryOn?: (error: Error) => boolean;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
|
|
164
|
+
maxRetries: 3,
|
|
165
|
+
baseDelay: 1000,
|
|
166
|
+
maxDelay: 30000,
|
|
167
|
+
backoff: 'exponential',
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
async function withRetry<T>(
|
|
171
|
+
operation: () => Promise<T>,
|
|
172
|
+
options: Partial<RetryOptions> = {}
|
|
173
|
+
): Promise<T> {
|
|
174
|
+
const opts = { ...DEFAULT_RETRY_OPTIONS, ...options };
|
|
175
|
+
let lastError: Error;
|
|
176
|
+
|
|
177
|
+
for (let attempt = 1; attempt <= opts.maxRetries + 1; attempt++) {
|
|
178
|
+
try {
|
|
179
|
+
return await operation();
|
|
180
|
+
} catch (error) {
|
|
181
|
+
lastError = error as Error;
|
|
182
|
+
|
|
183
|
+
// Check if error is retryable
|
|
184
|
+
const shouldRetry = opts.retryOn
|
|
185
|
+
? opts.retryOn(lastError)
|
|
186
|
+
: isRetryable(lastError);
|
|
187
|
+
|
|
188
|
+
if (!shouldRetry || attempt > opts.maxRetries) {
|
|
189
|
+
throw lastError;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Calculate delay
|
|
193
|
+
const delay = calculateDelay(attempt, opts);
|
|
194
|
+
|
|
195
|
+
logger.warn('Retrying operation', {
|
|
196
|
+
attempt,
|
|
197
|
+
maxRetries: opts.maxRetries,
|
|
198
|
+
delay,
|
|
199
|
+
error: lastError.message,
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
await sleep(delay);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
throw lastError!;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function calculateDelay(attempt: number, opts: RetryOptions): number {
|
|
210
|
+
let delay: number;
|
|
211
|
+
|
|
212
|
+
switch (opts.backoff) {
|
|
213
|
+
case 'none':
|
|
214
|
+
delay = opts.baseDelay;
|
|
215
|
+
break;
|
|
216
|
+
case 'linear':
|
|
217
|
+
delay = opts.baseDelay * attempt;
|
|
218
|
+
break;
|
|
219
|
+
case 'exponential':
|
|
220
|
+
delay = opts.baseDelay * Math.pow(2, attempt - 1);
|
|
221
|
+
break;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Add jitter (±10%)
|
|
225
|
+
const jitter = delay * 0.1 * (Math.random() * 2 - 1);
|
|
226
|
+
delay = Math.min(delay + jitter, opts.maxDelay);
|
|
227
|
+
|
|
228
|
+
return Math.round(delay);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function isRetryable(error: Error): boolean {
|
|
232
|
+
// Network errors
|
|
233
|
+
if (error.message.includes('ECONNREFUSED')) return true;
|
|
234
|
+
if (error.message.includes('ETIMEDOUT')) return true;
|
|
235
|
+
if (error.message.includes('ENOTFOUND')) return true;
|
|
236
|
+
|
|
237
|
+
// HTTP errors
|
|
238
|
+
if (error instanceof HttpError) {
|
|
239
|
+
return [408, 429, 500, 502, 503, 504].includes(error.statusCode);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Database errors
|
|
243
|
+
if (error.message.includes('deadlock')) return true;
|
|
244
|
+
if (error.message.includes('connection')) return true;
|
|
245
|
+
|
|
246
|
+
return false;
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Usage
|
|
251
|
+
|
|
252
|
+
```typescript
|
|
253
|
+
const result = await withRetry(
|
|
254
|
+
() => externalApi.fetch(id),
|
|
255
|
+
{
|
|
256
|
+
maxRetries: 3,
|
|
257
|
+
backoff: 'exponential',
|
|
258
|
+
retryOn: (error) => error instanceof HttpError && error.statusCode >= 500,
|
|
259
|
+
}
|
|
260
|
+
);
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Timeouts
|
|
264
|
+
|
|
265
|
+
### Configuration
|
|
266
|
+
|
|
267
|
+
```typescript
|
|
268
|
+
const TIMEOUTS = {
|
|
269
|
+
// HTTP requests
|
|
270
|
+
httpExternal: 10000, // 10s for external APIs
|
|
271
|
+
httpInternal: 5000, // 5s for internal services
|
|
272
|
+
|
|
273
|
+
// Database
|
|
274
|
+
dbQuery: 5000, // 5s for queries
|
|
275
|
+
dbTransaction: 30000, // 30s for transactions
|
|
276
|
+
|
|
277
|
+
// Cache
|
|
278
|
+
cacheRead: 1000, // 1s
|
|
279
|
+
cacheWrite: 2000, // 2s
|
|
280
|
+
|
|
281
|
+
// Message queue
|
|
282
|
+
queuePublish: 5000, // 5s
|
|
283
|
+
queueConsume: 30000, // 30s per message
|
|
284
|
+
} as const;
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Implementation
|
|
288
|
+
|
|
289
|
+
```typescript
|
|
290
|
+
async function withTimeout<T>(
|
|
291
|
+
operation: Promise<T>,
|
|
292
|
+
ms: number,
|
|
293
|
+
operationName: string
|
|
294
|
+
): Promise<T> {
|
|
295
|
+
const timeout = new Promise<never>((_, reject) => {
|
|
296
|
+
setTimeout(() => {
|
|
297
|
+
reject(new TimeoutError(operationName, ms));
|
|
298
|
+
}, ms);
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
return Promise.race([operation, timeout]);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
class TimeoutError extends AppError {
|
|
305
|
+
constructor(operation: string, timeout: number) {
|
|
306
|
+
super(
|
|
307
|
+
'TIMEOUT',
|
|
308
|
+
`Operation ${operation} timed out after ${timeout}ms`,
|
|
309
|
+
504
|
|
310
|
+
);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Usage
|
|
315
|
+
const user = await withTimeout(
|
|
316
|
+
db.user.findUnique({ where: { id } }),
|
|
317
|
+
TIMEOUTS.dbQuery,
|
|
318
|
+
'findUser'
|
|
319
|
+
);
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
## Graceful Degradation
|
|
323
|
+
|
|
324
|
+
### Fallback Patterns
|
|
325
|
+
|
|
326
|
+
```typescript
|
|
327
|
+
// Pattern 1: Return cached/stale data
|
|
328
|
+
async function getProductWithFallback(id: string): Promise<Product> {
|
|
329
|
+
try {
|
|
330
|
+
const product = await productService.fetch(id);
|
|
331
|
+
await cache.set(`product:${id}`, product, 3600);
|
|
332
|
+
return product;
|
|
333
|
+
} catch (error) {
|
|
334
|
+
logger.warn('Product service failed, using cache', { id, error });
|
|
335
|
+
|
|
336
|
+
const cached = await cache.get(`product:${id}`);
|
|
337
|
+
if (cached) {
|
|
338
|
+
return { ...cached, _stale: true };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
throw error;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Pattern 2: Return default value
|
|
346
|
+
async function getRecommendations(userId: string): Promise<Product[]> {
|
|
347
|
+
try {
|
|
348
|
+
return await recommendationService.getFor(userId);
|
|
349
|
+
} catch (error) {
|
|
350
|
+
logger.warn('Recommendations failed, returning defaults', { userId });
|
|
351
|
+
return getDefaultRecommendations();
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Pattern 3: Disable feature
|
|
356
|
+
async function getFeatureFlags(userId: string): Promise<FeatureFlags> {
|
|
357
|
+
try {
|
|
358
|
+
return await featureFlagService.getFor(userId);
|
|
359
|
+
} catch (error) {
|
|
360
|
+
logger.warn('Feature flags unavailable, using safe defaults', { userId });
|
|
361
|
+
return {
|
|
362
|
+
newCheckout: false, // Disable experimental features
|
|
363
|
+
darkMode: true, // Keep stable features
|
|
364
|
+
betaFeatures: false,
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Pattern 4: Queue for later
|
|
370
|
+
async function sendNotification(notification: Notification): Promise<void> {
|
|
371
|
+
try {
|
|
372
|
+
await notificationService.send(notification);
|
|
373
|
+
} catch (error) {
|
|
374
|
+
logger.warn('Notification failed, queuing for retry', { notification });
|
|
375
|
+
await queue.add('notifications:retry', notification, {
|
|
376
|
+
delay: 60000,
|
|
377
|
+
attempts: 5,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
## Bulkhead Pattern
|
|
384
|
+
|
|
385
|
+
Isolate resources to prevent one consumer from exhausting them.
|
|
386
|
+
|
|
387
|
+
```typescript
|
|
388
|
+
class Bulkhead {
|
|
389
|
+
private active = 0;
|
|
390
|
+
private queue: Array<() => void> = [];
|
|
391
|
+
|
|
392
|
+
constructor(
|
|
393
|
+
private name: string,
|
|
394
|
+
private maxConcurrent: number,
|
|
395
|
+
private maxQueue: number = 100
|
|
396
|
+
) {}
|
|
397
|
+
|
|
398
|
+
async execute<T>(operation: () => Promise<T>): Promise<T> {
|
|
399
|
+
if (this.active >= this.maxConcurrent) {
|
|
400
|
+
if (this.queue.length >= this.maxQueue) {
|
|
401
|
+
throw new BulkheadFullError(this.name);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
await new Promise<void>((resolve) => {
|
|
405
|
+
this.queue.push(resolve);
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
this.active++;
|
|
410
|
+
|
|
411
|
+
try {
|
|
412
|
+
return await operation();
|
|
413
|
+
} finally {
|
|
414
|
+
this.active--;
|
|
415
|
+
|
|
416
|
+
const next = this.queue.shift();
|
|
417
|
+
if (next) next();
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Separate bulkheads for different concerns
|
|
423
|
+
const dbBulkhead = new Bulkhead('database', 20);
|
|
424
|
+
const apiBulkhead = new Bulkhead('external-api', 10);
|
|
425
|
+
|
|
426
|
+
// Usage
|
|
427
|
+
const user = await dbBulkhead.execute(() =>
|
|
428
|
+
db.user.findUnique({ where: { id } })
|
|
429
|
+
);
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
## Graceful Shutdown
|
|
433
|
+
|
|
434
|
+
```typescript
|
|
435
|
+
class GracefulShutdown {
|
|
436
|
+
private isShuttingDown = false;
|
|
437
|
+
private connections = new Set<Socket>();
|
|
438
|
+
|
|
439
|
+
constructor(
|
|
440
|
+
private server: Server,
|
|
441
|
+
private cleanup: () => Promise<void>,
|
|
442
|
+
private timeout = 30000
|
|
443
|
+
) {
|
|
444
|
+
this.setup();
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
private setup(): void {
|
|
448
|
+
// Track connections
|
|
449
|
+
this.server.on('connection', (socket) => {
|
|
450
|
+
this.connections.add(socket);
|
|
451
|
+
socket.on('close', () => this.connections.delete(socket));
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
// Handle shutdown signals
|
|
455
|
+
process.on('SIGTERM', () => this.shutdown('SIGTERM'));
|
|
456
|
+
process.on('SIGINT', () => this.shutdown('SIGINT'));
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
private async shutdown(signal: string): Promise<void> {
|
|
460
|
+
if (this.isShuttingDown) return;
|
|
461
|
+
this.isShuttingDown = true;
|
|
462
|
+
|
|
463
|
+
logger.info('Shutdown initiated', { signal });
|
|
464
|
+
|
|
465
|
+
// Stop accepting new connections
|
|
466
|
+
this.server.close();
|
|
467
|
+
|
|
468
|
+
// Set deadline
|
|
469
|
+
const deadline = setTimeout(() => {
|
|
470
|
+
logger.error('Graceful shutdown timed out, forcing exit');
|
|
471
|
+
process.exit(1);
|
|
472
|
+
}, this.timeout);
|
|
473
|
+
|
|
474
|
+
try {
|
|
475
|
+
// Wait for existing requests to complete
|
|
476
|
+
await this.drainConnections();
|
|
477
|
+
|
|
478
|
+
// Run cleanup (close DB, flush queues, etc.)
|
|
479
|
+
await this.cleanup();
|
|
480
|
+
|
|
481
|
+
clearTimeout(deadline);
|
|
482
|
+
logger.info('Graceful shutdown complete');
|
|
483
|
+
process.exit(0);
|
|
484
|
+
} catch (error) {
|
|
485
|
+
logger.error('Shutdown error', { error });
|
|
486
|
+
process.exit(1);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
private async drainConnections(): Promise<void> {
|
|
491
|
+
// Give existing requests time to complete
|
|
492
|
+
await sleep(5000);
|
|
493
|
+
|
|
494
|
+
// Force close remaining connections
|
|
495
|
+
for (const socket of this.connections) {
|
|
496
|
+
socket.destroy();
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Usage
|
|
502
|
+
const shutdown = new GracefulShutdown(
|
|
503
|
+
server,
|
|
504
|
+
async () => {
|
|
505
|
+
await db.$disconnect();
|
|
506
|
+
await redis.quit();
|
|
507
|
+
await queue.close();
|
|
508
|
+
},
|
|
509
|
+
30000
|
|
510
|
+
);
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
## Health-Based Load Shedding
|
|
514
|
+
|
|
515
|
+
```typescript
|
|
516
|
+
interface HealthMetrics {
|
|
517
|
+
cpuUsage: number;
|
|
518
|
+
memoryUsage: number;
|
|
519
|
+
eventLoopLag: number;
|
|
520
|
+
activeConnections: number;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
class LoadShedder {
|
|
524
|
+
private readonly thresholds = {
|
|
525
|
+
cpuUsage: 0.8, // 80%
|
|
526
|
+
memoryUsage: 0.85, // 85%
|
|
527
|
+
eventLoopLag: 100, // 100ms
|
|
528
|
+
activeConnections: 1000,
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
shouldShed(metrics: HealthMetrics): boolean {
|
|
532
|
+
return (
|
|
533
|
+
metrics.cpuUsage > this.thresholds.cpuUsage ||
|
|
534
|
+
metrics.memoryUsage > this.thresholds.memoryUsage ||
|
|
535
|
+
metrics.eventLoopLag > this.thresholds.eventLoopLag ||
|
|
536
|
+
metrics.activeConnections > this.thresholds.activeConnections
|
|
537
|
+
);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Middleware
|
|
542
|
+
function loadSheddingMiddleware(shedder: LoadShedder) {
|
|
543
|
+
return (req: Request, res: Response, next: NextFunction) => {
|
|
544
|
+
const metrics = getHealthMetrics();
|
|
545
|
+
|
|
546
|
+
if (shedder.shouldShed(metrics)) {
|
|
547
|
+
logger.warn('Shedding load', { metrics, path: req.path });
|
|
548
|
+
|
|
549
|
+
res.status(503).json({
|
|
550
|
+
code: 'SERVICE_OVERLOADED',
|
|
551
|
+
message: 'Service temporarily unavailable',
|
|
552
|
+
retryAfter: 5,
|
|
553
|
+
});
|
|
554
|
+
return;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
next();
|
|
558
|
+
};
|
|
559
|
+
}
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
## Quality Checklist
|
|
563
|
+
|
|
564
|
+
- [ ] Circuit breakers on all external dependencies
|
|
565
|
+
- [ ] Retry with exponential backoff implemented
|
|
566
|
+
- [ ] Timeouts configured for all operations
|
|
567
|
+
- [ ] Fallback strategies for critical paths
|
|
568
|
+
- [ ] Bulkheads isolate resource pools
|
|
569
|
+
- [ ] Graceful shutdown handles SIGTERM/SIGINT
|
|
570
|
+
- [ ] Health checks expose degraded state
|
|
571
|
+
- [ ] Load shedding under pressure
|
|
572
|
+
- [ ] All failures logged with context
|
|
573
|
+
- [ ] Metrics track circuit state and retry counts
|