codecruise 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +111 -0
- package/bin/codecruise.js +68 -0
- package/config/CLAUDE.md +107 -0
- package/config/agents/analyst.md +48 -0
- package/config/agents/architect-reviewer.md +161 -0
- package/config/agents/architect.md +119 -0
- package/config/agents/critic.md +63 -0
- package/config/agents/developer.md +96 -0
- package/config/agents/devops.md +81 -0
- package/config/agents/orchestrator.md +91 -0
- package/config/agents/planner.md +139 -0
- package/config/agents/retro.md +52 -0
- package/config/agents/reviewer.md +101 -0
- package/config/agents/security-reviewer.md +57 -0
- package/config/agents/stack/expo/AGENT.md +473 -0
- package/config/agents/stack/expo/rules/critical.md +427 -0
- package/config/agents/stack/expo/rules/native.md +455 -0
- package/config/agents/stack/expo/rules/navigation.md +445 -0
- package/config/agents/stack/expo/rules/performance.md +415 -0
- package/config/agents/stack/fastify/AGENT.md +397 -0
- package/config/agents/stack/fastify/rules/api-design.md +283 -0
- package/config/agents/stack/fastify/rules/critical.md +232 -0
- package/config/agents/stack/fastify/rules/queues.md +303 -0
- package/config/agents/stack/fastify/rules/security.md +384 -0
- package/config/agents/stack/index.yaml +48 -0
- package/config/agents/stack/nextjs/AGENT.md +421 -0
- package/config/agents/stack/nextjs/rules/components.md +413 -0
- package/config/agents/stack/nextjs/rules/critical.md +391 -0
- package/config/agents/stack/nextjs/rules/performance.md +403 -0
- package/config/agents/stack/nextjs/rules/styling.md +334 -0
- package/config/agents/stack/shared-ts/AGENT.md +384 -0
- package/config/agents/stack/shared-ts/rules/critical.md +315 -0
- package/config/agents/stack/shared-ts/rules/patterns.md +384 -0
- package/config/agents/stack/shared-ts/rules/zod.md +427 -0
- package/config/agents/tester.md +79 -0
- package/config/commands/architect-discuss.md +366 -0
- package/config/commands/architect-list.md +160 -0
- package/config/commands/architect-review.md +111 -0
- package/config/commands/architect.md +118 -0
- package/config/commands/compact.md +118 -0
- package/config/commands/companion.md +279 -0
- package/config/commands/dashboard.md +152 -0
- package/config/commands/doctor.md +227 -0
- package/config/commands/dogfood-report.md +101 -0
- package/config/commands/flags/run-autonomous.md +110 -0
- package/config/commands/flags/run-pause.md +80 -0
- package/config/commands/ingest.md +173 -0
- package/config/commands/init.md +128 -0
- package/config/commands/metrics.md +87 -0
- package/config/commands/parallel.md +320 -0
- package/config/commands/pause.md +55 -0
- package/config/commands/plan-review.md +130 -0
- package/config/commands/plan.md +216 -0
- package/config/commands/production-check.md +308 -0
- package/config/commands/refine.md +323 -0
- package/config/commands/resume.md +72 -0
- package/config/commands/retro.md +121 -0
- package/config/commands/retry.md +75 -0
- package/config/commands/role.md +310 -0
- package/config/commands/run.md +417 -0
- package/config/commands/scope.md +85 -0
- package/config/commands/setup-permissions.md +104 -0
- package/config/commands/skip.md +75 -0
- package/config/commands/spec-forge.md +213 -0
- package/config/commands/spec-help.md +194 -0
- package/config/commands/spec-patch.md +342 -0
- package/config/commands/spec-resolve.md +110 -0
- package/config/commands/spec-review.md +153 -0
- package/config/commands/status.md +114 -0
- package/config/commands/sync.md +131 -0
- package/config/commands/task.md +138 -0
- package/config/commands/verify.md +124 -0
- package/config/hooks/README.md +632 -0
- package/config/hooks/activity-log.sh +187 -0
- package/config/hooks/anti-rationalize.sh +52 -0
- package/config/hooks/capture-verification.sh +112 -0
- package/config/hooks/collect-metrics.sh +135 -0
- package/config/hooks/enforce-file-scope.sh +75 -0
- package/config/hooks/enforce-state-machine.sh +161 -0
- package/config/hooks/enforce-tdd.sh +180 -0
- package/config/hooks/format.sh +40 -0
- package/config/hooks/lib/activity-helpers.sh +162 -0
- package/config/hooks/lib/read-settings.sh +71 -0
- package/config/hooks/load-context-skills.sh +95 -0
- package/config/hooks/notify.sh +81 -0
- package/config/hooks/pre-commit.sample +35 -0
- package/config/hooks/protect-files.sh +63 -0
- package/config/hooks/track-agents.sh +41 -0
- package/config/hooks/track-commands.sh +37 -0
- package/config/hooks/track-enforcement.sh +44 -0
- package/config/hooks/track-ooda.sh +77 -0
- package/config/hooks/validate-commit-msg.sh +35 -0
- package/config/hooks/validate-plan.sh +213 -0
- package/config/hooks/verify-criteria.sh +46 -0
- package/config/hooks/verify-todo-completion.sh +140 -0
- package/config/rules/comments.md +25 -0
- package/config/rules/decision-rules.md +308 -0
- package/config/rules/hygiene.md +247 -0
- package/config/rules/pattern-detection.md +372 -0
- package/config/rules/profiles.md +193 -0
- package/config/rules/recovery.md +83 -0
- package/config/rules/scope-detection.md +213 -0
- package/config/rules/standards.md +127 -0
- package/config/rules/workflow.md +121 -0
- package/config/schemas.md +767 -0
- package/config/settings.json +195 -0
- package/config/skills/backend/SKILL.md +734 -0
- package/config/skills/database/SKILL.md +426 -0
- package/config/skills/frontend/SKILL.md +434 -0
- package/config/skills/git/SKILL.md +396 -0
- package/config/skills/index.yaml +36 -0
- package/config/skills/observability/SKILL.md +430 -0
- package/config/skills/package-dev/SKILL.md +498 -0
- package/config/skills/performance/SKILL.md +378 -0
- package/config/skills/resilience/SKILL.md +573 -0
- package/config/skills/testing/SKILL.md +398 -0
- package/config/skills/testing-patterns/SKILL.md +276 -0
- package/config/skills/typescript/SKILL.md +152 -0
- package/config/templates/CLAUDE.md +70 -0
- package/config/templates/README.md +117 -0
- package/config/templates/steering/adr-template.md +102 -0
- package/config/templates/steering/product.md +60 -0
- package/config/templates/steering/rfc-template.md +159 -0
- package/config/templates/steering/structure.md +146 -0
- package/config/templates/steering/tech.md +85 -0
- package/package.json +40 -0
- package/src/install.js +163 -0
- package/src/report.js +310 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: observability-patterns
|
|
3
|
+
description: Logging, metrics, tracing, health checks, alerting
|
|
4
|
+
keywords: [observability, logging, metrics, tracing, health check, prometheus, correlation id, pii redaction]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Observability Standards
|
|
8
|
+
|
|
9
|
+
Logging, metrics, and tracing for production systems.
|
|
10
|
+
|
|
11
|
+
## Core Principle
|
|
12
|
+
|
|
13
|
+
**Observability must never block, crash, or slow down the application.**
|
|
14
|
+
|
|
15
|
+
Any exception inside logging/metrics logic must be isolated and safely handled.
|
|
16
|
+
|
|
17
|
+
## Structured Logging
|
|
18
|
+
|
|
19
|
+
### No Console.log
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
// ❌ NEVER
|
|
23
|
+
console.log('User created:', user);
|
|
24
|
+
console.error('Failed to process:', error);
|
|
25
|
+
|
|
26
|
+
// ✅ ALWAYS use structured logger
|
|
27
|
+
logger.info('User created', { userId: user.id, email: redact(user.email) });
|
|
28
|
+
logger.error('Failed to process', { error: serializeError(error), context });
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Required Log Fields
|
|
32
|
+
|
|
33
|
+
Every log entry must include:
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
interface LogEntry {
|
|
37
|
+
timestamp: string; // ISO8601
|
|
38
|
+
level: LogLevel; // debug, info, warn, error
|
|
39
|
+
correlationId: string; // Request/trace ID
|
|
40
|
+
service: string; // Service name
|
|
41
|
+
message: string; // Human-readable message
|
|
42
|
+
context?: object; // Structured data (sanitized)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Logger Implementation
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
import { randomUUID } from 'crypto';
|
|
52
|
+
|
|
53
|
+
interface Logger {
|
|
54
|
+
debug(message: string, context?: object): void;
|
|
55
|
+
info(message: string, context?: object): void;
|
|
56
|
+
warn(message: string, context?: object): void;
|
|
57
|
+
error(message: string, context?: object): void;
|
|
58
|
+
child(context: object): Logger;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
class StructuredLogger implements Logger {
|
|
62
|
+
private baseContext: object;
|
|
63
|
+
|
|
64
|
+
constructor(
|
|
65
|
+
private service: string,
|
|
66
|
+
private correlationId: string = randomUUID(),
|
|
67
|
+
context: object = {}
|
|
68
|
+
) {
|
|
69
|
+
this.baseContext = context;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private log(level: LogLevel, message: string, context?: object): void {
|
|
73
|
+
try {
|
|
74
|
+
const entry: LogEntry = {
|
|
75
|
+
timestamp: new Date().toISOString(),
|
|
76
|
+
level,
|
|
77
|
+
correlationId: this.correlationId,
|
|
78
|
+
service: this.service,
|
|
79
|
+
message,
|
|
80
|
+
...(context && { context: { ...this.baseContext, ...context } }),
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Non-blocking write
|
|
84
|
+
process.stdout.write(JSON.stringify(entry) + '\n');
|
|
85
|
+
} catch {
|
|
86
|
+
// Never let logging crash the app
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
debug(message: string, context?: object): void {
|
|
91
|
+
if (process.env.LOG_LEVEL === 'debug') {
|
|
92
|
+
this.log('debug', message, context);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
info(message: string, context?: object): void {
|
|
97
|
+
this.log('info', message, context);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
warn(message: string, context?: object): void {
|
|
101
|
+
this.log('warn', message, context);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
error(message: string, context?: object): void {
|
|
105
|
+
this.log('error', message, context);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
child(context: object): Logger {
|
|
109
|
+
return new StructuredLogger(
|
|
110
|
+
this.service,
|
|
111
|
+
this.correlationId,
|
|
112
|
+
{ ...this.baseContext, ...context }
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Correlation IDs
|
|
119
|
+
|
|
120
|
+
Every request gets a correlation ID that flows through all operations.
|
|
121
|
+
|
|
122
|
+
```typescript
|
|
123
|
+
// Middleware to extract/generate correlation ID
|
|
124
|
+
function correlationMiddleware(req: Request, res: Response, next: NextFunction) {
|
|
125
|
+
const correlationId = req.headers['x-correlation-id'] as string || randomUUID();
|
|
126
|
+
|
|
127
|
+
// Attach to request
|
|
128
|
+
req.correlationId = correlationId;
|
|
129
|
+
|
|
130
|
+
// Include in response
|
|
131
|
+
res.setHeader('x-correlation-id', correlationId);
|
|
132
|
+
|
|
133
|
+
// Create request-scoped logger
|
|
134
|
+
req.logger = new StructuredLogger('api', correlationId, {
|
|
135
|
+
method: req.method,
|
|
136
|
+
path: req.path,
|
|
137
|
+
userAgent: req.headers['user-agent'],
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
next();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Pass through to downstream services
|
|
144
|
+
async function callExternalService(req: Request, payload: unknown) {
|
|
145
|
+
return fetch('https://api.external.com/endpoint', {
|
|
146
|
+
method: 'POST',
|
|
147
|
+
headers: {
|
|
148
|
+
'Content-Type': 'application/json',
|
|
149
|
+
'x-correlation-id': req.correlationId, // Propagate!
|
|
150
|
+
},
|
|
151
|
+
body: JSON.stringify(payload),
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Log Levels
|
|
157
|
+
|
|
158
|
+
| Level | When to Use |
|
|
159
|
+
|-------|-------------|
|
|
160
|
+
| `debug` | Development troubleshooting, detailed flow |
|
|
161
|
+
| `info` | Business events, state changes, milestones |
|
|
162
|
+
| `warn` | Recoverable issues, degraded state, approaching limits |
|
|
163
|
+
| `error` | Failures requiring attention, unhandled exceptions |
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
// DEBUG: Detailed internal state
|
|
167
|
+
logger.debug('Cache lookup', { key, hit: !!result });
|
|
168
|
+
|
|
169
|
+
// INFO: Business events
|
|
170
|
+
logger.info('Order created', { orderId, userId, total });
|
|
171
|
+
logger.info('Payment processed', { orderId, amount, provider });
|
|
172
|
+
|
|
173
|
+
// WARN: Recoverable issues
|
|
174
|
+
logger.warn('Rate limit approaching', { current: 980, limit: 1000 });
|
|
175
|
+
logger.warn('Retry succeeded', { attempt: 3, operation: 'sendEmail' });
|
|
176
|
+
|
|
177
|
+
// ERROR: Failures
|
|
178
|
+
logger.error('Payment failed', { orderId, error: serializeError(err) });
|
|
179
|
+
logger.error('Database connection lost', { error: serializeError(err) });
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## PII Redaction
|
|
183
|
+
|
|
184
|
+
**Never log sensitive data.**
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
const SENSITIVE_FIELDS = ['password', 'token', 'secret', 'apiKey', 'ssn', 'creditCard'];
|
|
188
|
+
|
|
189
|
+
function redact(value: string): string {
|
|
190
|
+
if (!value) return value;
|
|
191
|
+
if (value.length <= 4) return '****';
|
|
192
|
+
return value.slice(0, 2) + '****' + value.slice(-2);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function sanitize(obj: unknown): unknown {
|
|
196
|
+
if (typeof obj !== 'object' || obj === null) return obj;
|
|
197
|
+
|
|
198
|
+
const sanitized: Record<string, unknown> = {};
|
|
199
|
+
|
|
200
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
201
|
+
if (SENSITIVE_FIELDS.some(f => key.toLowerCase().includes(f))) {
|
|
202
|
+
sanitized[key] = '[REDACTED]';
|
|
203
|
+
} else if (key.toLowerCase().includes('email')) {
|
|
204
|
+
sanitized[key] = redact(String(value));
|
|
205
|
+
} else if (typeof value === 'object') {
|
|
206
|
+
sanitized[key] = sanitize(value);
|
|
207
|
+
} else {
|
|
208
|
+
sanitized[key] = value;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return sanitized;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Usage
|
|
216
|
+
logger.info('User updated', sanitize({ userId, email, password }));
|
|
217
|
+
// Output: { userId: '123', email: 'jo****om', password: '[REDACTED]' }
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Error Serialization
|
|
221
|
+
|
|
222
|
+
```typescript
|
|
223
|
+
function serializeError(error: unknown): object {
|
|
224
|
+
if (error instanceof AppError) {
|
|
225
|
+
return {
|
|
226
|
+
name: error.name,
|
|
227
|
+
code: error.code,
|
|
228
|
+
message: error.message,
|
|
229
|
+
statusCode: error.statusCode,
|
|
230
|
+
context: error.context,
|
|
231
|
+
stack: process.env.NODE_ENV !== 'production' ? error.stack : undefined,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (error instanceof Error) {
|
|
236
|
+
return {
|
|
237
|
+
name: error.name,
|
|
238
|
+
message: error.message,
|
|
239
|
+
stack: process.env.NODE_ENV !== 'production' ? error.stack : undefined,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { message: String(error) };
|
|
244
|
+
}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Metrics
|
|
248
|
+
|
|
249
|
+
### Key Metrics to Track
|
|
250
|
+
|
|
251
|
+
| Category | Metrics |
|
|
252
|
+
|----------|---------|
|
|
253
|
+
| **HTTP** | Request count, latency histogram, error rate by status |
|
|
254
|
+
| **Database** | Query count, latency, connection pool usage |
|
|
255
|
+
| **Cache** | Hit/miss ratio, latency |
|
|
256
|
+
| **Queue** | Queue depth, processing time, error rate |
|
|
257
|
+
| **Business** | Signups, orders, payments, conversions |
|
|
258
|
+
|
|
259
|
+
### Implementation Pattern
|
|
260
|
+
|
|
261
|
+
```typescript
|
|
262
|
+
interface MetricsClient {
|
|
263
|
+
increment(metric: string, tags?: Record<string, string>): void;
|
|
264
|
+
gauge(metric: string, value: number, tags?: Record<string, string>): void;
|
|
265
|
+
histogram(metric: string, value: number, tags?: Record<string, string>): void;
|
|
266
|
+
timing(metric: string, duration: number, tags?: Record<string, string>): void;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Request timing middleware
|
|
270
|
+
function metricsMiddleware(metrics: MetricsClient) {
|
|
271
|
+
return (req: Request, res: Response, next: NextFunction) => {
|
|
272
|
+
const start = Date.now();
|
|
273
|
+
|
|
274
|
+
res.on('finish', () => {
|
|
275
|
+
const duration = Date.now() - start;
|
|
276
|
+
const tags = {
|
|
277
|
+
method: req.method,
|
|
278
|
+
path: req.route?.path || 'unknown',
|
|
279
|
+
status: String(res.statusCode),
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
metrics.timing('http.request.duration', duration, tags);
|
|
283
|
+
metrics.increment('http.request.count', tags);
|
|
284
|
+
|
|
285
|
+
if (res.statusCode >= 500) {
|
|
286
|
+
metrics.increment('http.request.error', tags);
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
next();
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## Distributed Tracing
|
|
296
|
+
|
|
297
|
+
### Trace Context Propagation
|
|
298
|
+
|
|
299
|
+
```typescript
|
|
300
|
+
interface TraceContext {
|
|
301
|
+
traceId: string;
|
|
302
|
+
spanId: string;
|
|
303
|
+
parentSpanId?: string;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function extractTraceContext(headers: Record<string, string>): TraceContext {
|
|
307
|
+
// W3C Trace Context format
|
|
308
|
+
const traceparent = headers['traceparent'];
|
|
309
|
+
|
|
310
|
+
if (traceparent) {
|
|
311
|
+
const [, traceId, parentSpanId] = traceparent.split('-');
|
|
312
|
+
return {
|
|
313
|
+
traceId,
|
|
314
|
+
spanId: randomUUID().replace(/-/g, '').slice(0, 16),
|
|
315
|
+
parentSpanId,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
traceId: randomUUID().replace(/-/g, ''),
|
|
321
|
+
spanId: randomUUID().replace(/-/g, '').slice(0, 16),
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function injectTraceContext(ctx: TraceContext): Record<string, string> {
|
|
326
|
+
return {
|
|
327
|
+
traceparent: `00-${ctx.traceId}-${ctx.spanId}-01`,
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
### Sampling
|
|
333
|
+
|
|
334
|
+
```typescript
|
|
335
|
+
const SAMPLE_RATE = {
|
|
336
|
+
production: 0.1, // 10%
|
|
337
|
+
staging: 1.0, // 100%
|
|
338
|
+
development: 1.0, // 100%
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
function shouldSample(): boolean {
|
|
342
|
+
const rate = SAMPLE_RATE[process.env.NODE_ENV as keyof typeof SAMPLE_RATE] || 0.1;
|
|
343
|
+
return Math.random() < rate;
|
|
344
|
+
}
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
## Health Checks
|
|
348
|
+
|
|
349
|
+
```typescript
|
|
350
|
+
interface HealthCheck {
|
|
351
|
+
name: string;
|
|
352
|
+
check: () => Promise<{ healthy: boolean; message?: string }>;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const healthChecks: HealthCheck[] = [
|
|
356
|
+
{
|
|
357
|
+
name: 'database',
|
|
358
|
+
check: async () => {
|
|
359
|
+
try {
|
|
360
|
+
await db.raw('SELECT 1');
|
|
361
|
+
return { healthy: true };
|
|
362
|
+
} catch (error) {
|
|
363
|
+
return { healthy: false, message: 'Database unreachable' };
|
|
364
|
+
}
|
|
365
|
+
},
|
|
366
|
+
},
|
|
367
|
+
{
|
|
368
|
+
name: 'redis',
|
|
369
|
+
check: async () => {
|
|
370
|
+
try {
|
|
371
|
+
await redis.ping();
|
|
372
|
+
return { healthy: true };
|
|
373
|
+
} catch (error) {
|
|
374
|
+
return { healthy: false, message: 'Redis unreachable' };
|
|
375
|
+
}
|
|
376
|
+
},
|
|
377
|
+
},
|
|
378
|
+
];
|
|
379
|
+
|
|
380
|
+
// Health endpoint
|
|
381
|
+
app.get('/health', async (req, res) => {
|
|
382
|
+
const results = await Promise.all(
|
|
383
|
+
healthChecks.map(async (hc) => ({
|
|
384
|
+
name: hc.name,
|
|
385
|
+
...await hc.check(),
|
|
386
|
+
}))
|
|
387
|
+
);
|
|
388
|
+
|
|
389
|
+
const healthy = results.every(r => r.healthy);
|
|
390
|
+
|
|
391
|
+
res.status(healthy ? 200 : 503).json({
|
|
392
|
+
status: healthy ? 'healthy' : 'unhealthy',
|
|
393
|
+
checks: results,
|
|
394
|
+
timestamp: new Date().toISOString(),
|
|
395
|
+
});
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
// Liveness probe (basic)
|
|
399
|
+
app.get('/health/live', (req, res) => {
|
|
400
|
+
res.status(200).json({ status: 'alive' });
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
// Readiness probe (full)
|
|
404
|
+
app.get('/health/ready', async (req, res) => {
|
|
405
|
+
// Same as /health
|
|
406
|
+
});
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
## Alerting Thresholds
|
|
410
|
+
|
|
411
|
+
| Metric | Warning | Critical |
|
|
412
|
+
|--------|---------|----------|
|
|
413
|
+
| Error rate | > 1% | > 5% |
|
|
414
|
+
| P99 latency | > 500ms | > 2s |
|
|
415
|
+
| CPU usage | > 70% | > 90% |
|
|
416
|
+
| Memory usage | > 70% | > 90% |
|
|
417
|
+
| Queue depth | > 1000 | > 5000 |
|
|
418
|
+
| DB connections | > 70% pool | > 90% pool |
|
|
419
|
+
|
|
420
|
+
## Quality Checklist
|
|
421
|
+
|
|
422
|
+
- [ ] Structured logger used (no console.log)
|
|
423
|
+
- [ ] Correlation IDs on all requests
|
|
424
|
+
- [ ] PII redacted from logs
|
|
425
|
+
- [ ] Log levels used appropriately
|
|
426
|
+
- [ ] Key metrics tracked
|
|
427
|
+
- [ ] Health check endpoints exist
|
|
428
|
+
- [ ] Trace context propagated
|
|
429
|
+
- [ ] Errors serialized properly
|
|
430
|
+
- [ ] Logging never blocks main thread
|