proagents 1.6.17 → 1.6.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +169 -0
- package/.proagents/AGENTS.md +2 -0
- package/.proagents/AI_INSTRUCTIONS.md +13 -0
- package/.proagents/ANTIGRAVITY.md +2 -0
- package/.proagents/BOLT.md +2 -0
- package/.proagents/CHATGPT.md +2 -0
- package/.proagents/CLAUDE.md +2 -0
- package/.proagents/GEMINI.md +2 -0
- package/.proagents/GROQ.md +2 -0
- package/.proagents/KIRO.md +2 -0
- package/.proagents/LOVABLE.md +2 -0
- package/.proagents/PROAGENTS.md +2 -0
- package/.proagents/REPLIT.md +2 -0
- package/.proagents/prompts/00-project-setup.md +878 -0
- package/.proagents/prompts/04-planning.md +38 -0
- package/.proagents/prompts/12-rnd.md +957 -0
- package/.proagents/workflow-modes/entry-modes.md +27 -0
- package/.proagents/worklog/_context.template.md +47 -0
- package/COMMANDS.md +654 -0
- package/README.md +16 -24
- package/package.json +2 -7
- package/.proagents/ai-models/README.md +0 -141
- package/.proagents/ai-models/cost-management.md +0 -362
- package/.proagents/ai-models/fallbacks.md +0 -342
- package/.proagents/ai-models/model-config.md +0 -318
- package/.proagents/ai-models/task-routing.md +0 -503
- package/.proagents/ai-training/README.md +0 -155
- package/.proagents/ai-training/continuous-learning.md +0 -413
- package/.proagents/ai-training/domain-knowledge.md +0 -378
- package/.proagents/ai-training/pattern-learning.md +0 -455
- package/.proagents/ai-training/training-data.md +0 -337
- package/.proagents/ai-training/user-preferences.md +0 -346
- package/.proagents/approval-workflows/README.md +0 -146
- package/.proagents/approval-workflows/approval-config.md +0 -332
- package/.proagents/approval-workflows/approval-stages.md +0 -503
- package/.proagents/approval-workflows/emergency-bypass.md +0 -351
- package/.proagents/approval-workflows/examples.md +0 -859
- package/.proagents/approval-workflows/notifications.md +0 -320
- package/.proagents/compliance/README.md +0 -206
- package/.proagents/compliance/access-control.md +0 -310
- package/.proagents/compliance/audit-logging.md +0 -444
- package/.proagents/compliance/compliance-frameworks.md +0 -429
- package/.proagents/compliance/reports.md +0 -491
- package/.proagents/compliance/retention-policies.md +0 -454
- package/.proagents/config-versioning/README.md +0 -120
- package/.proagents/config-versioning/changelog.md +0 -300
- package/.proagents/config-versioning/rollback.md +0 -283
- package/.proagents/config-versioning/versioning.md +0 -330
- package/.proagents/contract-testing/README.md +0 -223
- package/.proagents/contract-testing/contract-testing.md +0 -614
- package/.proagents/contract-testing/pact-integration.md +0 -507
- package/.proagents/contract-testing/schema-validation.md +0 -565
- package/.proagents/dependency-management/README.md +0 -140
- package/.proagents/dependency-management/automation.md +0 -363
- package/.proagents/dependency-management/compatibility.md +0 -319
- package/.proagents/dependency-management/security-scanning.md +0 -413
- package/.proagents/dependency-management/update-policies.md +0 -374
- package/.proagents/disaster-recovery/README.md +0 -247
- package/.proagents/disaster-recovery/automation.md +0 -366
- package/.proagents/disaster-recovery/backup-recovery.md +0 -571
- package/.proagents/disaster-recovery/incident-response.md +0 -565
- package/.proagents/disaster-recovery/rollback-procedures.md +0 -499
- package/.proagents/disaster-recovery/runbooks.md +0 -603
- package/.proagents/disaster-recovery/scenarios.md +0 -892
- package/.proagents/disaster-recovery/testing.md +0 -438
- package/.proagents/environments/README.md +0 -244
- package/.proagents/environments/configuration.md +0 -437
- package/.proagents/environments/promotion.md +0 -434
- package/.proagents/environments/setup.md +0 -420
- package/.proagents/examples/README.md +0 -55
- package/.proagents/examples/backend-nodejs/README.md +0 -188
- package/.proagents/examples/backend-nodejs/complete-conversation.md +0 -601
- package/.proagents/examples/backend-nodejs/proagents.config.yaml +0 -415
- package/.proagents/examples/backend-nodejs/workflow-example.md +0 -909
- package/.proagents/examples/fullstack-nextjs/README.md +0 -155
- package/.proagents/examples/fullstack-nextjs/complete-conversation.md +0 -604
- package/.proagents/examples/fullstack-nextjs/proagents.config.yaml +0 -287
- package/.proagents/examples/fullstack-nextjs/workflow-example.md +0 -553
- package/.proagents/examples/mobile-react-native/README.md +0 -171
- package/.proagents/examples/mobile-react-native/complete-conversation.md +0 -825
- package/.proagents/examples/mobile-react-native/proagents.config.yaml +0 -330
- package/.proagents/examples/mobile-react-native/workflow-example.md +0 -723
- package/.proagents/examples/web-frontend-react/README.md +0 -125
- package/.proagents/examples/web-frontend-react/complete-conversation.md +0 -556
- package/.proagents/examples/web-frontend-react/proagents.config.yaml +0 -183
- package/.proagents/examples/web-frontend-react/workflow-example.md +0 -603
- package/.proagents/existing-projects/README.md +0 -65
- package/.proagents/existing-projects/challenges.md +0 -861
- package/.proagents/existing-projects/coexistence-mode.md +0 -483
- package/.proagents/existing-projects/compatibility-assessment.md +0 -541
- package/.proagents/existing-projects/gradual-adoption.md +0 -515
- package/.proagents/existing-projects/migration-strategies.md +0 -788
- package/.proagents/existing-projects/pattern-reconciliation.md +0 -489
- package/.proagents/existing-projects/team-onboarding.md +0 -617
- package/.proagents/existing-projects/technical-debt-handling.md +0 -644
- package/.proagents/feature-flags/README.md +0 -263
- package/.proagents/feature-flags/ab-testing.md +0 -413
- package/.proagents/feature-flags/configuration.md +0 -420
- package/.proagents/feature-flags/kill-switches.md +0 -444
- package/.proagents/feature-flags/rollout-strategies.md +0 -392
- package/.proagents/history.log +0 -12
- package/.proagents/i18n/README.md +0 -133
- package/.proagents/i18n/extraction.md +0 -433
- package/.proagents/i18n/tms-integration.md +0 -332
- package/.proagents/i18n/translation-workflow.md +0 -413
- package/.proagents/i18n/validation.md +0 -355
- package/.proagents/logging/README.md +0 -276
- package/.proagents/logging/aggregation.md +0 -475
- package/.proagents/logging/log-levels.md +0 -376
- package/.proagents/logging/sensitive-data.md +0 -423
- package/.proagents/logging/structured-logging.md +0 -406
- package/.proagents/metrics/README.md +0 -69
- package/.proagents/metrics/code-quality-kpis.md +0 -461
- package/.proagents/metrics/deployment-metrics.md +0 -517
- package/.proagents/metrics/developer-productivity.md +0 -368
- package/.proagents/metrics/learning-effectiveness.md +0 -478
- package/.proagents/migrations/README.md +0 -77
- package/.proagents/migrations/from-claude-projects.md +0 -313
- package/.proagents/migrations/from-cursor-rules.md +0 -345
- package/.proagents/migrations/from-custom-workflows.md +0 -410
- package/.proagents/monitoring/README.md +0 -308
- package/.proagents/monitoring/alerting.md +0 -449
- package/.proagents/monitoring/dashboards.md +0 -454
- package/.proagents/monitoring/health-checks.md +0 -436
- package/.proagents/monitoring/metrics.md +0 -434
- package/.proagents/multi-project/README.md +0 -170
- package/.proagents/multi-project/coordinated-deploy.md +0 -510
- package/.proagents/multi-project/cross-project-deps.md +0 -395
- package/.proagents/multi-project/unified-changelog.md +0 -477
- package/.proagents/multi-project/walkthroughs/monorepo-setup.md +0 -787
- package/.proagents/multi-project/workspace-config.md +0 -408
- package/.proagents/notifications/README.md +0 -151
- package/.proagents/notifications/channels.md +0 -457
- package/.proagents/notifications/preferences.md +0 -415
- package/.proagents/notifications/routing.md +0 -449
- package/.proagents/notifications/scheduling.md +0 -425
- package/.proagents/notifications/templates.md +0 -446
- package/.proagents/offline-mode/README.md +0 -145
- package/.proagents/offline-mode/caching.md +0 -344
- package/.proagents/offline-mode/offline-operations.md +0 -312
- package/.proagents/offline-mode/queue-specifications.md +0 -679
- package/.proagents/offline-mode/sync.md +0 -475
- package/.proagents/parallel-features/README.md +0 -85
- package/.proagents/parallel-features/conflict-detection.md +0 -226
- package/.proagents/parallel-features/dependency-management.md +0 -392
- package/.proagents/parallel-features/merge-coordination.md +0 -506
- package/.proagents/parallel-features/tracking-system.md +0 -416
- package/.proagents/performance/README.md +0 -59
- package/.proagents/performance/bundle-analysis.md +0 -375
- package/.proagents/performance/load-testing.md +0 -563
- package/.proagents/performance/runtime-metrics.md +0 -489
- package/.proagents/performance/web-vitals.md +0 -425
- package/.proagents/plugins/README.md +0 -139
- package/.proagents/plugins/creating-plugins.md +0 -504
- package/.proagents/plugins/plugin-api.md +0 -467
- package/.proagents/plugins/plugin-registry.md +0 -276
- package/.proagents/reporting/README.md +0 -158
- package/.proagents/reporting/dashboards.md +0 -366
- package/.proagents/reporting/exports.md +0 -524
- package/.proagents/reporting/quality-metrics.md +0 -385
- package/.proagents/reporting/templates/README.md +0 -56
- package/.proagents/reporting/templates/dashboard-config.json +0 -187
- package/.proagents/reporting/templates/metrics-queries.md +0 -427
- package/.proagents/reporting/templates/react-dashboard.tsx +0 -544
- package/.proagents/reporting/templates/widgets.md +0 -451
- package/.proagents/reporting/velocity-metrics.md +0 -340
- package/.proagents/reverse-engineering/README.md +0 -151
- package/.proagents/reverse-engineering/architecture-extraction.md +0 -325
- package/.proagents/reverse-engineering/code-analysis.md +0 -377
- package/.proagents/reverse-engineering/dependency-mapping.md +0 -567
- package/.proagents/reverse-engineering/diagram-generation.md +0 -586
- package/.proagents/reverse-engineering/documentation-generation.md +0 -468
- package/.proagents/reverse-engineering/pattern-detection.md +0 -569
- package/.proagents/reverse-engineering/quality-assessment.md +0 -733
- package/.proagents/secrets/README.md +0 -278
- package/.proagents/secrets/access-control.md +0 -443
- package/.proagents/secrets/rotation.md +0 -403
- package/.proagents/secrets/scanning.md +0 -487
- package/.proagents/secrets/storage.md +0 -394
- package/.proagents/webhooks/README.md +0 -126
- package/.proagents/webhooks/endpoints.md +0 -298
- package/.proagents/webhooks/events.md +0 -316
- package/.proagents/webhooks/payloads.md +0 -325
- package/.proagents/webhooks/reliability.md +0 -363
- package/.proagents/webhooks/security.md +0 -380
|
@@ -1,436 +0,0 @@
|
|
|
1
|
-
# Health Checks
|
|
2
|
-
|
|
3
|
-
Monitoring application and service health.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Health Check Types
|
|
8
|
-
|
|
9
|
-
| Type | Purpose | Frequency | Timeout |
|
|
10
|
-
|------|---------|-----------|---------|
|
|
11
|
-
| **Liveness** | Is the service running? | 10s | 1s |
|
|
12
|
-
| **Readiness** | Can it accept traffic? | 5s | 3s |
|
|
13
|
-
| **Startup** | Has it finished initializing? | 5s | 30s |
|
|
14
|
-
| **Deep** | Are all dependencies healthy? | 30s | 10s |
|
|
15
|
-
|
|
16
|
-
---
|
|
17
|
-
|
|
18
|
-
## Configuration
|
|
19
|
-
|
|
20
|
-
### Basic Health Endpoints
|
|
21
|
-
|
|
22
|
-
```yaml
|
|
23
|
-
# proagents.config.yaml
|
|
24
|
-
monitoring:
|
|
25
|
-
health_checks:
|
|
26
|
-
enabled: true
|
|
27
|
-
|
|
28
|
-
endpoints:
|
|
29
|
-
# Simple liveness check
|
|
30
|
-
liveness:
|
|
31
|
-
path: "/health/live"
|
|
32
|
-
method: "GET"
|
|
33
|
-
success_codes: [200]
|
|
34
|
-
|
|
35
|
-
# Readiness with dependencies
|
|
36
|
-
readiness:
|
|
37
|
-
path: "/health/ready"
|
|
38
|
-
method: "GET"
|
|
39
|
-
success_codes: [200]
|
|
40
|
-
check_dependencies: true
|
|
41
|
-
|
|
42
|
-
# Detailed health status
|
|
43
|
-
deep:
|
|
44
|
-
path: "/health"
|
|
45
|
-
method: "GET"
|
|
46
|
-
success_codes: [200]
|
|
47
|
-
include_details: true
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### Dependency Checks
|
|
51
|
-
|
|
52
|
-
```yaml
|
|
53
|
-
monitoring:
|
|
54
|
-
health_checks:
|
|
55
|
-
dependencies:
|
|
56
|
-
# Database
|
|
57
|
-
database:
|
|
58
|
-
type: "postgres"
|
|
59
|
-
connection_string_env: "DATABASE_URL"
|
|
60
|
-
timeout: "3s"
|
|
61
|
-
query: "SELECT 1"
|
|
62
|
-
critical: true
|
|
63
|
-
|
|
64
|
-
# Redis
|
|
65
|
-
cache:
|
|
66
|
-
type: "redis"
|
|
67
|
-
url_env: "REDIS_URL"
|
|
68
|
-
timeout: "1s"
|
|
69
|
-
command: "PING"
|
|
70
|
-
critical: true
|
|
71
|
-
|
|
72
|
-
# External API
|
|
73
|
-
payment_api:
|
|
74
|
-
type: "http"
|
|
75
|
-
url: "https://api.stripe.com/v1/health"
|
|
76
|
-
timeout: "5s"
|
|
77
|
-
expected_status: 200
|
|
78
|
-
critical: false
|
|
79
|
-
|
|
80
|
-
# Message queue
|
|
81
|
-
message_queue:
|
|
82
|
-
type: "rabbitmq"
|
|
83
|
-
url_env: "RABBITMQ_URL"
|
|
84
|
-
timeout: "2s"
|
|
85
|
-
critical: true
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
---
|
|
89
|
-
|
|
90
|
-
## Health Response Format
|
|
91
|
-
|
|
92
|
-
### Simple Response
|
|
93
|
-
|
|
94
|
-
```json
|
|
95
|
-
{
|
|
96
|
-
"status": "healthy"
|
|
97
|
-
}
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
### Detailed Response
|
|
101
|
-
|
|
102
|
-
```json
|
|
103
|
-
{
|
|
104
|
-
"status": "healthy",
|
|
105
|
-
"timestamp": "2024-01-15T10:30:00Z",
|
|
106
|
-
"version": "1.2.3",
|
|
107
|
-
"uptime": "3d 4h 25m",
|
|
108
|
-
"checks": {
|
|
109
|
-
"database": {
|
|
110
|
-
"status": "healthy",
|
|
111
|
-
"latency_ms": 5,
|
|
112
|
-
"details": {
|
|
113
|
-
"connections_active": 10,
|
|
114
|
-
"connections_max": 50
|
|
115
|
-
}
|
|
116
|
-
},
|
|
117
|
-
"cache": {
|
|
118
|
-
"status": "healthy",
|
|
119
|
-
"latency_ms": 1,
|
|
120
|
-
"details": {
|
|
121
|
-
"memory_used": "128MB",
|
|
122
|
-
"hit_rate": "95%"
|
|
123
|
-
}
|
|
124
|
-
},
|
|
125
|
-
"payment_api": {
|
|
126
|
-
"status": "degraded",
|
|
127
|
-
"latency_ms": 850,
|
|
128
|
-
"message": "Elevated latency"
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
### Unhealthy Response
|
|
135
|
-
|
|
136
|
-
```json
|
|
137
|
-
{
|
|
138
|
-
"status": "unhealthy",
|
|
139
|
-
"timestamp": "2024-01-15T10:30:00Z",
|
|
140
|
-
"checks": {
|
|
141
|
-
"database": {
|
|
142
|
-
"status": "unhealthy",
|
|
143
|
-
"error": "Connection refused",
|
|
144
|
-
"last_healthy": "2024-01-15T10:25:00Z"
|
|
145
|
-
},
|
|
146
|
-
"cache": {
|
|
147
|
-
"status": "healthy"
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
---
|
|
154
|
-
|
|
155
|
-
## Kubernetes Integration
|
|
156
|
-
|
|
157
|
-
### Pod Health Probes
|
|
158
|
-
|
|
159
|
-
```yaml
|
|
160
|
-
monitoring:
|
|
161
|
-
health_checks:
|
|
162
|
-
kubernetes:
|
|
163
|
-
# Liveness probe
|
|
164
|
-
liveness_probe:
|
|
165
|
-
http_get:
|
|
166
|
-
path: "/health/live"
|
|
167
|
-
port: 8080
|
|
168
|
-
initial_delay_seconds: 10
|
|
169
|
-
period_seconds: 10
|
|
170
|
-
timeout_seconds: 1
|
|
171
|
-
failure_threshold: 3
|
|
172
|
-
|
|
173
|
-
# Readiness probe
|
|
174
|
-
readiness_probe:
|
|
175
|
-
http_get:
|
|
176
|
-
path: "/health/ready"
|
|
177
|
-
port: 8080
|
|
178
|
-
initial_delay_seconds: 5
|
|
179
|
-
period_seconds: 5
|
|
180
|
-
timeout_seconds: 3
|
|
181
|
-
failure_threshold: 3
|
|
182
|
-
|
|
183
|
-
# Startup probe
|
|
184
|
-
startup_probe:
|
|
185
|
-
http_get:
|
|
186
|
-
path: "/health/startup"
|
|
187
|
-
port: 8080
|
|
188
|
-
initial_delay_seconds: 0
|
|
189
|
-
period_seconds: 5
|
|
190
|
-
timeout_seconds: 30
|
|
191
|
-
failure_threshold: 30
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
### Generated Kubernetes YAML
|
|
195
|
-
|
|
196
|
-
```yaml
|
|
197
|
-
apiVersion: v1
|
|
198
|
-
kind: Pod
|
|
199
|
-
spec:
|
|
200
|
-
containers:
|
|
201
|
-
- name: app
|
|
202
|
-
livenessProbe:
|
|
203
|
-
httpGet:
|
|
204
|
-
path: /health/live
|
|
205
|
-
port: 8080
|
|
206
|
-
initialDelaySeconds: 10
|
|
207
|
-
periodSeconds: 10
|
|
208
|
-
timeoutSeconds: 1
|
|
209
|
-
failureThreshold: 3
|
|
210
|
-
|
|
211
|
-
readinessProbe:
|
|
212
|
-
httpGet:
|
|
213
|
-
path: /health/ready
|
|
214
|
-
port: 8080
|
|
215
|
-
initialDelaySeconds: 5
|
|
216
|
-
periodSeconds: 5
|
|
217
|
-
timeoutSeconds: 3
|
|
218
|
-
failureThreshold: 3
|
|
219
|
-
|
|
220
|
-
startupProbe:
|
|
221
|
-
httpGet:
|
|
222
|
-
path: /health/startup
|
|
223
|
-
port: 8080
|
|
224
|
-
periodSeconds: 5
|
|
225
|
-
failureThreshold: 30
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
---
|
|
229
|
-
|
|
230
|
-
## Custom Health Checks
|
|
231
|
-
|
|
232
|
-
### Application-Level Checks
|
|
233
|
-
|
|
234
|
-
```yaml
|
|
235
|
-
monitoring:
|
|
236
|
-
health_checks:
|
|
237
|
-
custom:
|
|
238
|
-
# Memory usage
|
|
239
|
-
memory:
|
|
240
|
-
type: "resource"
|
|
241
|
-
threshold:
|
|
242
|
-
warning: "80%"
|
|
243
|
-
critical: "95%"
|
|
244
|
-
|
|
245
|
-
# Disk space
|
|
246
|
-
disk:
|
|
247
|
-
type: "resource"
|
|
248
|
-
path: "/data"
|
|
249
|
-
threshold:
|
|
250
|
-
warning: "80%"
|
|
251
|
-
critical: "95%"
|
|
252
|
-
|
|
253
|
-
# Queue depth
|
|
254
|
-
queue_depth:
|
|
255
|
-
type: "metric"
|
|
256
|
-
metric: "queue.messages.count"
|
|
257
|
-
threshold:
|
|
258
|
-
warning: 1000
|
|
259
|
-
critical: 5000
|
|
260
|
-
|
|
261
|
-
# Error rate
|
|
262
|
-
error_rate:
|
|
263
|
-
type: "metric"
|
|
264
|
-
metric: "http.errors.rate"
|
|
265
|
-
window: "5m"
|
|
266
|
-
threshold:
|
|
267
|
-
warning: "1%"
|
|
268
|
-
critical: "5%"
|
|
269
|
-
```
|
|
270
|
-
|
|
271
|
-
### Code Implementation
|
|
272
|
-
|
|
273
|
-
```typescript
|
|
274
|
-
import { HealthChecker, HealthStatus } from '@proagents/monitoring';
|
|
275
|
-
|
|
276
|
-
const healthChecker = new HealthChecker();
|
|
277
|
-
|
|
278
|
-
// Add database check
|
|
279
|
-
healthChecker.addCheck('database', async () => {
|
|
280
|
-
try {
|
|
281
|
-
const start = Date.now();
|
|
282
|
-
await db.query('SELECT 1');
|
|
283
|
-
return {
|
|
284
|
-
status: HealthStatus.HEALTHY,
|
|
285
|
-
latency_ms: Date.now() - start,
|
|
286
|
-
};
|
|
287
|
-
} catch (error) {
|
|
288
|
-
return {
|
|
289
|
-
status: HealthStatus.UNHEALTHY,
|
|
290
|
-
error: error.message,
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
// Add custom business logic check
|
|
296
|
-
healthChecker.addCheck('order_processing', async () => {
|
|
297
|
-
const pendingOrders = await getStaleOrders();
|
|
298
|
-
if (pendingOrders > 100) {
|
|
299
|
-
return {
|
|
300
|
-
status: HealthStatus.DEGRADED,
|
|
301
|
-
message: `${pendingOrders} orders pending`,
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
return { status: HealthStatus.HEALTHY };
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
// Express endpoint
|
|
308
|
-
app.get('/health', async (req, res) => {
|
|
309
|
-
const health = await healthChecker.check();
|
|
310
|
-
const statusCode = health.status === 'healthy' ? 200 : 503;
|
|
311
|
-
res.status(statusCode).json(health);
|
|
312
|
-
});
|
|
313
|
-
```
|
|
314
|
-
|
|
315
|
-
---
|
|
316
|
-
|
|
317
|
-
## Health Check Aggregation
|
|
318
|
-
|
|
319
|
-
### Service Mesh Health
|
|
320
|
-
|
|
321
|
-
```yaml
|
|
322
|
-
monitoring:
|
|
323
|
-
health_checks:
|
|
324
|
-
aggregation:
|
|
325
|
-
# Aggregate health from multiple services
|
|
326
|
-
services:
|
|
327
|
-
- name: "api-gateway"
|
|
328
|
-
url: "http://api-gateway:8080/health"
|
|
329
|
-
weight: 1.0
|
|
330
|
-
|
|
331
|
-
- name: "auth-service"
|
|
332
|
-
url: "http://auth-service:8080/health"
|
|
333
|
-
weight: 1.0
|
|
334
|
-
critical: true
|
|
335
|
-
|
|
336
|
-
- name: "user-service"
|
|
337
|
-
url: "http://user-service:8080/health"
|
|
338
|
-
weight: 0.8
|
|
339
|
-
|
|
340
|
-
# Aggregation rules
|
|
341
|
-
rules:
|
|
342
|
-
healthy: "all_critical_healthy AND healthy_percentage >= 80"
|
|
343
|
-
degraded: "all_critical_healthy AND healthy_percentage >= 50"
|
|
344
|
-
unhealthy: "any_critical_unhealthy OR healthy_percentage < 50"
|
|
345
|
-
```
|
|
346
|
-
|
|
347
|
-
### Dashboard Health Summary
|
|
348
|
-
|
|
349
|
-
```yaml
|
|
350
|
-
monitoring:
|
|
351
|
-
health_checks:
|
|
352
|
-
dashboard:
|
|
353
|
-
# Overall system health
|
|
354
|
-
system_health:
|
|
355
|
-
endpoint: "/health/system"
|
|
356
|
-
components:
|
|
357
|
-
- "api"
|
|
358
|
-
- "database"
|
|
359
|
-
- "cache"
|
|
360
|
-
- "queue"
|
|
361
|
-
|
|
362
|
-
# Per-environment health
|
|
363
|
-
environments:
|
|
364
|
-
production:
|
|
365
|
-
services: ["api-prod", "worker-prod"]
|
|
366
|
-
staging:
|
|
367
|
-
services: ["api-staging", "worker-staging"]
|
|
368
|
-
```
|
|
369
|
-
|
|
370
|
-
---
|
|
371
|
-
|
|
372
|
-
## Alerting on Health
|
|
373
|
-
|
|
374
|
-
### Health-Based Alerts
|
|
375
|
-
|
|
376
|
-
```yaml
|
|
377
|
-
monitoring:
|
|
378
|
-
health_checks:
|
|
379
|
-
alerts:
|
|
380
|
-
# Service unhealthy
|
|
381
|
-
- name: "Service Unhealthy"
|
|
382
|
-
condition: "status == unhealthy"
|
|
383
|
-
duration: "1m"
|
|
384
|
-
severity: "critical"
|
|
385
|
-
notify: ["pagerduty", "#incidents"]
|
|
386
|
-
|
|
387
|
-
# Service degraded
|
|
388
|
-
- name: "Service Degraded"
|
|
389
|
-
condition: "status == degraded"
|
|
390
|
-
duration: "5m"
|
|
391
|
-
severity: "warning"
|
|
392
|
-
notify: ["#alerts"]
|
|
393
|
-
|
|
394
|
-
# Dependency failing
|
|
395
|
-
- name: "Database Unhealthy"
|
|
396
|
-
condition: "checks.database.status == unhealthy"
|
|
397
|
-
duration: "30s"
|
|
398
|
-
severity: "critical"
|
|
399
|
-
notify: ["pagerduty", "#database-alerts"]
|
|
400
|
-
```
|
|
401
|
-
|
|
402
|
-
---
|
|
403
|
-
|
|
404
|
-
## Commands
|
|
405
|
-
|
|
406
|
-
```bash
|
|
407
|
-
# Check health
|
|
408
|
-
proagents health check
|
|
409
|
-
|
|
410
|
-
# Check specific service
|
|
411
|
-
proagents health check --service api
|
|
412
|
-
|
|
413
|
-
# Check all dependencies
|
|
414
|
-
proagents health check --deep
|
|
415
|
-
|
|
416
|
-
# View health history
|
|
417
|
-
proagents health history --last 24h
|
|
418
|
-
|
|
419
|
-
# Test health endpoints
|
|
420
|
-
proagents health test --endpoint /health/ready
|
|
421
|
-
|
|
422
|
-
# Generate Kubernetes probes
|
|
423
|
-
proagents health generate-k8s
|
|
424
|
-
```
|
|
425
|
-
|
|
426
|
-
---
|
|
427
|
-
|
|
428
|
-
## Best Practices
|
|
429
|
-
|
|
430
|
-
1. **Fast Liveness**: Keep liveness checks simple and fast
|
|
431
|
-
2. **Meaningful Readiness**: Include dependency checks in readiness
|
|
432
|
-
3. **Appropriate Timeouts**: Set realistic timeouts for each check
|
|
433
|
-
4. **Graceful Degradation**: Report degraded status, not just healthy/unhealthy
|
|
434
|
-
5. **Don't Over-Check**: Balance thoroughness with performance
|
|
435
|
-
6. **Include Context**: Add details to help diagnose issues
|
|
436
|
-
7. **Version the Response**: Include app version in health response
|