proagents 1.6.17 → 1.6.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +169 -0
- package/COMMANDS.md +595 -0
- package/README.md +13 -23
- package/package.json +2 -7
- package/.proagents/ai-models/README.md +0 -141
- package/.proagents/ai-models/cost-management.md +0 -362
- package/.proagents/ai-models/fallbacks.md +0 -342
- package/.proagents/ai-models/model-config.md +0 -318
- package/.proagents/ai-models/task-routing.md +0 -503
- package/.proagents/ai-training/README.md +0 -155
- package/.proagents/ai-training/continuous-learning.md +0 -413
- package/.proagents/ai-training/domain-knowledge.md +0 -378
- package/.proagents/ai-training/pattern-learning.md +0 -455
- package/.proagents/ai-training/training-data.md +0 -337
- package/.proagents/ai-training/user-preferences.md +0 -346
- package/.proagents/approval-workflows/README.md +0 -146
- package/.proagents/approval-workflows/approval-config.md +0 -332
- package/.proagents/approval-workflows/approval-stages.md +0 -503
- package/.proagents/approval-workflows/emergency-bypass.md +0 -351
- package/.proagents/approval-workflows/examples.md +0 -859
- package/.proagents/approval-workflows/notifications.md +0 -320
- package/.proagents/compliance/README.md +0 -206
- package/.proagents/compliance/access-control.md +0 -310
- package/.proagents/compliance/audit-logging.md +0 -444
- package/.proagents/compliance/compliance-frameworks.md +0 -429
- package/.proagents/compliance/reports.md +0 -491
- package/.proagents/compliance/retention-policies.md +0 -454
- package/.proagents/config-versioning/README.md +0 -120
- package/.proagents/config-versioning/changelog.md +0 -300
- package/.proagents/config-versioning/rollback.md +0 -283
- package/.proagents/config-versioning/versioning.md +0 -330
- package/.proagents/contract-testing/README.md +0 -223
- package/.proagents/contract-testing/contract-testing.md +0 -614
- package/.proagents/contract-testing/pact-integration.md +0 -507
- package/.proagents/contract-testing/schema-validation.md +0 -565
- package/.proagents/dependency-management/README.md +0 -140
- package/.proagents/dependency-management/automation.md +0 -363
- package/.proagents/dependency-management/compatibility.md +0 -319
- package/.proagents/dependency-management/security-scanning.md +0 -413
- package/.proagents/dependency-management/update-policies.md +0 -374
- package/.proagents/disaster-recovery/README.md +0 -247
- package/.proagents/disaster-recovery/automation.md +0 -366
- package/.proagents/disaster-recovery/backup-recovery.md +0 -571
- package/.proagents/disaster-recovery/incident-response.md +0 -565
- package/.proagents/disaster-recovery/rollback-procedures.md +0 -499
- package/.proagents/disaster-recovery/runbooks.md +0 -603
- package/.proagents/disaster-recovery/scenarios.md +0 -892
- package/.proagents/disaster-recovery/testing.md +0 -438
- package/.proagents/environments/README.md +0 -244
- package/.proagents/environments/configuration.md +0 -437
- package/.proagents/environments/promotion.md +0 -434
- package/.proagents/environments/setup.md +0 -420
- package/.proagents/examples/README.md +0 -55
- package/.proagents/examples/backend-nodejs/README.md +0 -188
- package/.proagents/examples/backend-nodejs/complete-conversation.md +0 -601
- package/.proagents/examples/backend-nodejs/proagents.config.yaml +0 -415
- package/.proagents/examples/backend-nodejs/workflow-example.md +0 -909
- package/.proagents/examples/fullstack-nextjs/README.md +0 -155
- package/.proagents/examples/fullstack-nextjs/complete-conversation.md +0 -604
- package/.proagents/examples/fullstack-nextjs/proagents.config.yaml +0 -287
- package/.proagents/examples/fullstack-nextjs/workflow-example.md +0 -553
- package/.proagents/examples/mobile-react-native/README.md +0 -171
- package/.proagents/examples/mobile-react-native/complete-conversation.md +0 -825
- package/.proagents/examples/mobile-react-native/proagents.config.yaml +0 -330
- package/.proagents/examples/mobile-react-native/workflow-example.md +0 -723
- package/.proagents/examples/web-frontend-react/README.md +0 -125
- package/.proagents/examples/web-frontend-react/complete-conversation.md +0 -556
- package/.proagents/examples/web-frontend-react/proagents.config.yaml +0 -183
- package/.proagents/examples/web-frontend-react/workflow-example.md +0 -603
- package/.proagents/existing-projects/README.md +0 -65
- package/.proagents/existing-projects/challenges.md +0 -861
- package/.proagents/existing-projects/coexistence-mode.md +0 -483
- package/.proagents/existing-projects/compatibility-assessment.md +0 -541
- package/.proagents/existing-projects/gradual-adoption.md +0 -515
- package/.proagents/existing-projects/migration-strategies.md +0 -788
- package/.proagents/existing-projects/pattern-reconciliation.md +0 -489
- package/.proagents/existing-projects/team-onboarding.md +0 -617
- package/.proagents/existing-projects/technical-debt-handling.md +0 -644
- package/.proagents/feature-flags/README.md +0 -263
- package/.proagents/feature-flags/ab-testing.md +0 -413
- package/.proagents/feature-flags/configuration.md +0 -420
- package/.proagents/feature-flags/kill-switches.md +0 -444
- package/.proagents/feature-flags/rollout-strategies.md +0 -392
- package/.proagents/history.log +0 -12
- package/.proagents/i18n/README.md +0 -133
- package/.proagents/i18n/extraction.md +0 -433
- package/.proagents/i18n/tms-integration.md +0 -332
- package/.proagents/i18n/translation-workflow.md +0 -413
- package/.proagents/i18n/validation.md +0 -355
- package/.proagents/logging/README.md +0 -276
- package/.proagents/logging/aggregation.md +0 -475
- package/.proagents/logging/log-levels.md +0 -376
- package/.proagents/logging/sensitive-data.md +0 -423
- package/.proagents/logging/structured-logging.md +0 -406
- package/.proagents/metrics/README.md +0 -69
- package/.proagents/metrics/code-quality-kpis.md +0 -461
- package/.proagents/metrics/deployment-metrics.md +0 -517
- package/.proagents/metrics/developer-productivity.md +0 -368
- package/.proagents/metrics/learning-effectiveness.md +0 -478
- package/.proagents/migrations/README.md +0 -77
- package/.proagents/migrations/from-claude-projects.md +0 -313
- package/.proagents/migrations/from-cursor-rules.md +0 -345
- package/.proagents/migrations/from-custom-workflows.md +0 -410
- package/.proagents/monitoring/README.md +0 -308
- package/.proagents/monitoring/alerting.md +0 -449
- package/.proagents/monitoring/dashboards.md +0 -454
- package/.proagents/monitoring/health-checks.md +0 -436
- package/.proagents/monitoring/metrics.md +0 -434
- package/.proagents/multi-project/README.md +0 -170
- package/.proagents/multi-project/coordinated-deploy.md +0 -510
- package/.proagents/multi-project/cross-project-deps.md +0 -395
- package/.proagents/multi-project/unified-changelog.md +0 -477
- package/.proagents/multi-project/walkthroughs/monorepo-setup.md +0 -787
- package/.proagents/multi-project/workspace-config.md +0 -408
- package/.proagents/notifications/README.md +0 -151
- package/.proagents/notifications/channels.md +0 -457
- package/.proagents/notifications/preferences.md +0 -415
- package/.proagents/notifications/routing.md +0 -449
- package/.proagents/notifications/scheduling.md +0 -425
- package/.proagents/notifications/templates.md +0 -446
- package/.proagents/offline-mode/README.md +0 -145
- package/.proagents/offline-mode/caching.md +0 -344
- package/.proagents/offline-mode/offline-operations.md +0 -312
- package/.proagents/offline-mode/queue-specifications.md +0 -679
- package/.proagents/offline-mode/sync.md +0 -475
- package/.proagents/parallel-features/README.md +0 -85
- package/.proagents/parallel-features/conflict-detection.md +0 -226
- package/.proagents/parallel-features/dependency-management.md +0 -392
- package/.proagents/parallel-features/merge-coordination.md +0 -506
- package/.proagents/parallel-features/tracking-system.md +0 -416
- package/.proagents/performance/README.md +0 -59
- package/.proagents/performance/bundle-analysis.md +0 -375
- package/.proagents/performance/load-testing.md +0 -563
- package/.proagents/performance/runtime-metrics.md +0 -489
- package/.proagents/performance/web-vitals.md +0 -425
- package/.proagents/plugins/README.md +0 -139
- package/.proagents/plugins/creating-plugins.md +0 -504
- package/.proagents/plugins/plugin-api.md +0 -467
- package/.proagents/plugins/plugin-registry.md +0 -276
- package/.proagents/reporting/README.md +0 -158
- package/.proagents/reporting/dashboards.md +0 -366
- package/.proagents/reporting/exports.md +0 -524
- package/.proagents/reporting/quality-metrics.md +0 -385
- package/.proagents/reporting/templates/README.md +0 -56
- package/.proagents/reporting/templates/dashboard-config.json +0 -187
- package/.proagents/reporting/templates/metrics-queries.md +0 -427
- package/.proagents/reporting/templates/react-dashboard.tsx +0 -544
- package/.proagents/reporting/templates/widgets.md +0 -451
- package/.proagents/reporting/velocity-metrics.md +0 -340
- package/.proagents/reverse-engineering/README.md +0 -151
- package/.proagents/reverse-engineering/architecture-extraction.md +0 -325
- package/.proagents/reverse-engineering/code-analysis.md +0 -377
- package/.proagents/reverse-engineering/dependency-mapping.md +0 -567
- package/.proagents/reverse-engineering/diagram-generation.md +0 -586
- package/.proagents/reverse-engineering/documentation-generation.md +0 -468
- package/.proagents/reverse-engineering/pattern-detection.md +0 -569
- package/.proagents/reverse-engineering/quality-assessment.md +0 -733
- package/.proagents/secrets/README.md +0 -278
- package/.proagents/secrets/access-control.md +0 -443
- package/.proagents/secrets/rotation.md +0 -403
- package/.proagents/secrets/scanning.md +0 -487
- package/.proagents/secrets/storage.md +0 -394
- package/.proagents/webhooks/README.md +0 -126
- package/.proagents/webhooks/endpoints.md +0 -298
- package/.proagents/webhooks/events.md +0 -316
- package/.proagents/webhooks/payloads.md +0 -325
- package/.proagents/webhooks/reliability.md +0 -363
- package/.proagents/webhooks/security.md +0 -380
|
@@ -1,436 +0,0 @@
|
|
|
1
|
-
# Health Checks
|
|
2
|
-
|
|
3
|
-
Monitoring application and service health.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Health Check Types
|
|
8
|
-
|
|
9
|
-
| Type | Purpose | Frequency | Timeout |
|
|
10
|
-
|------|---------|-----------|---------|
|
|
11
|
-
| **Liveness** | Is the service running? | 10s | 1s |
|
|
12
|
-
| **Readiness** | Can it accept traffic? | 5s | 3s |
|
|
13
|
-
| **Startup** | Has it finished initializing? | 5s | 30s |
|
|
14
|
-
| **Deep** | Are all dependencies healthy? | 30s | 10s |
|
|
15
|
-
|
|
16
|
-
---
|
|
17
|
-
|
|
18
|
-
## Configuration
|
|
19
|
-
|
|
20
|
-
### Basic Health Endpoints
|
|
21
|
-
|
|
22
|
-
```yaml
|
|
23
|
-
# proagents.config.yaml
|
|
24
|
-
monitoring:
|
|
25
|
-
health_checks:
|
|
26
|
-
enabled: true
|
|
27
|
-
|
|
28
|
-
endpoints:
|
|
29
|
-
# Simple liveness check
|
|
30
|
-
liveness:
|
|
31
|
-
path: "/health/live"
|
|
32
|
-
method: "GET"
|
|
33
|
-
success_codes: [200]
|
|
34
|
-
|
|
35
|
-
# Readiness with dependencies
|
|
36
|
-
readiness:
|
|
37
|
-
path: "/health/ready"
|
|
38
|
-
method: "GET"
|
|
39
|
-
success_codes: [200]
|
|
40
|
-
check_dependencies: true
|
|
41
|
-
|
|
42
|
-
# Detailed health status
|
|
43
|
-
deep:
|
|
44
|
-
path: "/health"
|
|
45
|
-
method: "GET"
|
|
46
|
-
success_codes: [200]
|
|
47
|
-
include_details: true
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### Dependency Checks
|
|
51
|
-
|
|
52
|
-
```yaml
|
|
53
|
-
monitoring:
|
|
54
|
-
health_checks:
|
|
55
|
-
dependencies:
|
|
56
|
-
# Database
|
|
57
|
-
database:
|
|
58
|
-
type: "postgres"
|
|
59
|
-
connection_string_env: "DATABASE_URL"
|
|
60
|
-
timeout: "3s"
|
|
61
|
-
query: "SELECT 1"
|
|
62
|
-
critical: true
|
|
63
|
-
|
|
64
|
-
# Redis
|
|
65
|
-
cache:
|
|
66
|
-
type: "redis"
|
|
67
|
-
url_env: "REDIS_URL"
|
|
68
|
-
timeout: "1s"
|
|
69
|
-
command: "PING"
|
|
70
|
-
critical: true
|
|
71
|
-
|
|
72
|
-
# External API
|
|
73
|
-
payment_api:
|
|
74
|
-
type: "http"
|
|
75
|
-
url: "https://api.stripe.com/v1/health"
|
|
76
|
-
timeout: "5s"
|
|
77
|
-
expected_status: 200
|
|
78
|
-
critical: false
|
|
79
|
-
|
|
80
|
-
# Message queue
|
|
81
|
-
message_queue:
|
|
82
|
-
type: "rabbitmq"
|
|
83
|
-
url_env: "RABBITMQ_URL"
|
|
84
|
-
timeout: "2s"
|
|
85
|
-
critical: true
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
---
|
|
89
|
-
|
|
90
|
-
## Health Response Format
|
|
91
|
-
|
|
92
|
-
### Simple Response
|
|
93
|
-
|
|
94
|
-
```json
|
|
95
|
-
{
|
|
96
|
-
"status": "healthy"
|
|
97
|
-
}
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
### Detailed Response
|
|
101
|
-
|
|
102
|
-
```json
|
|
103
|
-
{
|
|
104
|
-
"status": "healthy",
|
|
105
|
-
"timestamp": "2024-01-15T10:30:00Z",
|
|
106
|
-
"version": "1.2.3",
|
|
107
|
-
"uptime": "3d 4h 25m",
|
|
108
|
-
"checks": {
|
|
109
|
-
"database": {
|
|
110
|
-
"status": "healthy",
|
|
111
|
-
"latency_ms": 5,
|
|
112
|
-
"details": {
|
|
113
|
-
"connections_active": 10,
|
|
114
|
-
"connections_max": 50
|
|
115
|
-
}
|
|
116
|
-
},
|
|
117
|
-
"cache": {
|
|
118
|
-
"status": "healthy",
|
|
119
|
-
"latency_ms": 1,
|
|
120
|
-
"details": {
|
|
121
|
-
"memory_used": "128MB",
|
|
122
|
-
"hit_rate": "95%"
|
|
123
|
-
}
|
|
124
|
-
},
|
|
125
|
-
"payment_api": {
|
|
126
|
-
"status": "degraded",
|
|
127
|
-
"latency_ms": 850,
|
|
128
|
-
"message": "Elevated latency"
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
### Unhealthy Response
|
|
135
|
-
|
|
136
|
-
```json
|
|
137
|
-
{
|
|
138
|
-
"status": "unhealthy",
|
|
139
|
-
"timestamp": "2024-01-15T10:30:00Z",
|
|
140
|
-
"checks": {
|
|
141
|
-
"database": {
|
|
142
|
-
"status": "unhealthy",
|
|
143
|
-
"error": "Connection refused",
|
|
144
|
-
"last_healthy": "2024-01-15T10:25:00Z"
|
|
145
|
-
},
|
|
146
|
-
"cache": {
|
|
147
|
-
"status": "healthy"
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
---
|
|
154
|
-
|
|
155
|
-
## Kubernetes Integration
|
|
156
|
-
|
|
157
|
-
### Pod Health Probes
|
|
158
|
-
|
|
159
|
-
```yaml
|
|
160
|
-
monitoring:
|
|
161
|
-
health_checks:
|
|
162
|
-
kubernetes:
|
|
163
|
-
# Liveness probe
|
|
164
|
-
liveness_probe:
|
|
165
|
-
http_get:
|
|
166
|
-
path: "/health/live"
|
|
167
|
-
port: 8080
|
|
168
|
-
initial_delay_seconds: 10
|
|
169
|
-
period_seconds: 10
|
|
170
|
-
timeout_seconds: 1
|
|
171
|
-
failure_threshold: 3
|
|
172
|
-
|
|
173
|
-
# Readiness probe
|
|
174
|
-
readiness_probe:
|
|
175
|
-
http_get:
|
|
176
|
-
path: "/health/ready"
|
|
177
|
-
port: 8080
|
|
178
|
-
initial_delay_seconds: 5
|
|
179
|
-
period_seconds: 5
|
|
180
|
-
timeout_seconds: 3
|
|
181
|
-
failure_threshold: 3
|
|
182
|
-
|
|
183
|
-
# Startup probe
|
|
184
|
-
startup_probe:
|
|
185
|
-
http_get:
|
|
186
|
-
path: "/health/startup"
|
|
187
|
-
port: 8080
|
|
188
|
-
initial_delay_seconds: 0
|
|
189
|
-
period_seconds: 5
|
|
190
|
-
timeout_seconds: 30
|
|
191
|
-
failure_threshold: 30
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
### Generated Kubernetes YAML
|
|
195
|
-
|
|
196
|
-
```yaml
|
|
197
|
-
apiVersion: v1
|
|
198
|
-
kind: Pod
|
|
199
|
-
spec:
|
|
200
|
-
containers:
|
|
201
|
-
- name: app
|
|
202
|
-
livenessProbe:
|
|
203
|
-
httpGet:
|
|
204
|
-
path: /health/live
|
|
205
|
-
port: 8080
|
|
206
|
-
initialDelaySeconds: 10
|
|
207
|
-
periodSeconds: 10
|
|
208
|
-
timeoutSeconds: 1
|
|
209
|
-
failureThreshold: 3
|
|
210
|
-
|
|
211
|
-
readinessProbe:
|
|
212
|
-
httpGet:
|
|
213
|
-
path: /health/ready
|
|
214
|
-
port: 8080
|
|
215
|
-
initialDelaySeconds: 5
|
|
216
|
-
periodSeconds: 5
|
|
217
|
-
timeoutSeconds: 3
|
|
218
|
-
failureThreshold: 3
|
|
219
|
-
|
|
220
|
-
startupProbe:
|
|
221
|
-
httpGet:
|
|
222
|
-
path: /health/startup
|
|
223
|
-
port: 8080
|
|
224
|
-
periodSeconds: 5
|
|
225
|
-
failureThreshold: 30
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
---
|
|
229
|
-
|
|
230
|
-
## Custom Health Checks
|
|
231
|
-
|
|
232
|
-
### Application-Level Checks
|
|
233
|
-
|
|
234
|
-
```yaml
|
|
235
|
-
monitoring:
|
|
236
|
-
health_checks:
|
|
237
|
-
custom:
|
|
238
|
-
# Memory usage
|
|
239
|
-
memory:
|
|
240
|
-
type: "resource"
|
|
241
|
-
threshold:
|
|
242
|
-
warning: "80%"
|
|
243
|
-
critical: "95%"
|
|
244
|
-
|
|
245
|
-
# Disk space
|
|
246
|
-
disk:
|
|
247
|
-
type: "resource"
|
|
248
|
-
path: "/data"
|
|
249
|
-
threshold:
|
|
250
|
-
warning: "80%"
|
|
251
|
-
critical: "95%"
|
|
252
|
-
|
|
253
|
-
# Queue depth
|
|
254
|
-
queue_depth:
|
|
255
|
-
type: "metric"
|
|
256
|
-
metric: "queue.messages.count"
|
|
257
|
-
threshold:
|
|
258
|
-
warning: 1000
|
|
259
|
-
critical: 5000
|
|
260
|
-
|
|
261
|
-
# Error rate
|
|
262
|
-
error_rate:
|
|
263
|
-
type: "metric"
|
|
264
|
-
metric: "http.errors.rate"
|
|
265
|
-
window: "5m"
|
|
266
|
-
threshold:
|
|
267
|
-
warning: "1%"
|
|
268
|
-
critical: "5%"
|
|
269
|
-
```
|
|
270
|
-
|
|
271
|
-
### Code Implementation
|
|
272
|
-
|
|
273
|
-
```typescript
|
|
274
|
-
import { HealthChecker, HealthStatus } from '@proagents/monitoring';
|
|
275
|
-
|
|
276
|
-
const healthChecker = new HealthChecker();
|
|
277
|
-
|
|
278
|
-
// Add database check
|
|
279
|
-
healthChecker.addCheck('database', async () => {
|
|
280
|
-
try {
|
|
281
|
-
const start = Date.now();
|
|
282
|
-
await db.query('SELECT 1');
|
|
283
|
-
return {
|
|
284
|
-
status: HealthStatus.HEALTHY,
|
|
285
|
-
latency_ms: Date.now() - start,
|
|
286
|
-
};
|
|
287
|
-
} catch (error) {
|
|
288
|
-
return {
|
|
289
|
-
status: HealthStatus.UNHEALTHY,
|
|
290
|
-
error: error.message,
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
// Add custom business logic check
|
|
296
|
-
healthChecker.addCheck('order_processing', async () => {
|
|
297
|
-
const pendingOrders = await getStaleOrders();
|
|
298
|
-
if (pendingOrders > 100) {
|
|
299
|
-
return {
|
|
300
|
-
status: HealthStatus.DEGRADED,
|
|
301
|
-
message: `${pendingOrders} orders pending`,
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
return { status: HealthStatus.HEALTHY };
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
// Express endpoint
|
|
308
|
-
app.get('/health', async (req, res) => {
|
|
309
|
-
const health = await healthChecker.check();
|
|
310
|
-
const statusCode = health.status === 'healthy' ? 200 : 503;
|
|
311
|
-
res.status(statusCode).json(health);
|
|
312
|
-
});
|
|
313
|
-
```
|
|
314
|
-
|
|
315
|
-
---
|
|
316
|
-
|
|
317
|
-
## Health Check Aggregation
|
|
318
|
-
|
|
319
|
-
### Service Mesh Health
|
|
320
|
-
|
|
321
|
-
```yaml
|
|
322
|
-
monitoring:
|
|
323
|
-
health_checks:
|
|
324
|
-
aggregation:
|
|
325
|
-
# Aggregate health from multiple services
|
|
326
|
-
services:
|
|
327
|
-
- name: "api-gateway"
|
|
328
|
-
url: "http://api-gateway:8080/health"
|
|
329
|
-
weight: 1.0
|
|
330
|
-
|
|
331
|
-
- name: "auth-service"
|
|
332
|
-
url: "http://auth-service:8080/health"
|
|
333
|
-
weight: 1.0
|
|
334
|
-
critical: true
|
|
335
|
-
|
|
336
|
-
- name: "user-service"
|
|
337
|
-
url: "http://user-service:8080/health"
|
|
338
|
-
weight: 0.8
|
|
339
|
-
|
|
340
|
-
# Aggregation rules
|
|
341
|
-
rules:
|
|
342
|
-
healthy: "all_critical_healthy AND healthy_percentage >= 80"
|
|
343
|
-
degraded: "all_critical_healthy AND healthy_percentage >= 50"
|
|
344
|
-
unhealthy: "any_critical_unhealthy OR healthy_percentage < 50"
|
|
345
|
-
```
|
|
346
|
-
|
|
347
|
-
### Dashboard Health Summary
|
|
348
|
-
|
|
349
|
-
```yaml
|
|
350
|
-
monitoring:
|
|
351
|
-
health_checks:
|
|
352
|
-
dashboard:
|
|
353
|
-
# Overall system health
|
|
354
|
-
system_health:
|
|
355
|
-
endpoint: "/health/system"
|
|
356
|
-
components:
|
|
357
|
-
- "api"
|
|
358
|
-
- "database"
|
|
359
|
-
- "cache"
|
|
360
|
-
- "queue"
|
|
361
|
-
|
|
362
|
-
# Per-environment health
|
|
363
|
-
environments:
|
|
364
|
-
production:
|
|
365
|
-
services: ["api-prod", "worker-prod"]
|
|
366
|
-
staging:
|
|
367
|
-
services: ["api-staging", "worker-staging"]
|
|
368
|
-
```
|
|
369
|
-
|
|
370
|
-
---
|
|
371
|
-
|
|
372
|
-
## Alerting on Health
|
|
373
|
-
|
|
374
|
-
### Health-Based Alerts
|
|
375
|
-
|
|
376
|
-
```yaml
|
|
377
|
-
monitoring:
|
|
378
|
-
health_checks:
|
|
379
|
-
alerts:
|
|
380
|
-
# Service unhealthy
|
|
381
|
-
- name: "Service Unhealthy"
|
|
382
|
-
condition: "status == unhealthy"
|
|
383
|
-
duration: "1m"
|
|
384
|
-
severity: "critical"
|
|
385
|
-
notify: ["pagerduty", "#incidents"]
|
|
386
|
-
|
|
387
|
-
# Service degraded
|
|
388
|
-
- name: "Service Degraded"
|
|
389
|
-
condition: "status == degraded"
|
|
390
|
-
duration: "5m"
|
|
391
|
-
severity: "warning"
|
|
392
|
-
notify: ["#alerts"]
|
|
393
|
-
|
|
394
|
-
# Dependency failing
|
|
395
|
-
- name: "Database Unhealthy"
|
|
396
|
-
condition: "checks.database.status == unhealthy"
|
|
397
|
-
duration: "30s"
|
|
398
|
-
severity: "critical"
|
|
399
|
-
notify: ["pagerduty", "#database-alerts"]
|
|
400
|
-
```
|
|
401
|
-
|
|
402
|
-
---
|
|
403
|
-
|
|
404
|
-
## Commands
|
|
405
|
-
|
|
406
|
-
```bash
|
|
407
|
-
# Check health
|
|
408
|
-
proagents health check
|
|
409
|
-
|
|
410
|
-
# Check specific service
|
|
411
|
-
proagents health check --service api
|
|
412
|
-
|
|
413
|
-
# Check all dependencies
|
|
414
|
-
proagents health check --deep
|
|
415
|
-
|
|
416
|
-
# View health history
|
|
417
|
-
proagents health history --last 24h
|
|
418
|
-
|
|
419
|
-
# Test health endpoints
|
|
420
|
-
proagents health test --endpoint /health/ready
|
|
421
|
-
|
|
422
|
-
# Generate Kubernetes probes
|
|
423
|
-
proagents health generate-k8s
|
|
424
|
-
```
|
|
425
|
-
|
|
426
|
-
---
|
|
427
|
-
|
|
428
|
-
## Best Practices
|
|
429
|
-
|
|
430
|
-
1. **Fast Liveness**: Keep liveness checks simple and fast
|
|
431
|
-
2. **Meaningful Readiness**: Include dependency checks in readiness
|
|
432
|
-
3. **Appropriate Timeouts**: Set realistic timeouts for each check
|
|
433
|
-
4. **Graceful Degradation**: Report degraded status, not just healthy/unhealthy
|
|
434
|
-
5. **Don't Over-Check**: Balance thoroughness with performance
|
|
435
|
-
6. **Include Context**: Add details to help diagnose issues
|
|
436
|
-
7. **Version the Response**: Include app version in health response
|