@checkstack/healthcheck-backend 0.12.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -0
- package/drizzle/0010_colorful_shinobi_shaw.sql +8 -0
- package/drizzle/meta/0010_snapshot.json +469 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +14 -13
- package/src/hooks.ts +10 -0
- package/src/index.ts +18 -4
- package/src/queue-executor.ts +24 -0
- package/src/realtime-aggregation.ts +12 -0
- package/src/router.test.ts +6 -5
- package/src/router.ts +44 -5
- package/src/schema.ts +31 -1
- package/src/service.ts +215 -27
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,70 @@
|
|
|
1
1
|
# @checkstack/healthcheck-backend
|
|
2
2
|
|
|
3
|
+
## 0.13.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 26d8bae: Distributed satellite health checks and Assignment IDE page
|
|
8
|
+
|
|
9
|
+
**Satellite System**
|
|
10
|
+
|
|
11
|
+
- New `satellite-backend`, `satellite-common`, `satellite-frontend`, and `satellite` agent packages for distributed health check execution
|
|
12
|
+
- WebSocket-based satellite connectivity with authentication, heartbeats, and live configuration push
|
|
13
|
+
- Satellite management UI with create dialog, status badges, and list page
|
|
14
|
+
|
|
15
|
+
**Live Configuration Updates**
|
|
16
|
+
|
|
17
|
+
- Added `assignmentChanged` hook to `healthcheck-backend` for cross-plugin communication
|
|
18
|
+
- `satellite-backend` subscribes to assignment changes and pushes config updates to connected satellites in real-time
|
|
19
|
+
|
|
20
|
+
**Assignment IDE Page**
|
|
21
|
+
|
|
22
|
+
- Replaced the 1028-line modal-based `SystemHealthCheckAssignment` component with a full-page IDE layout
|
|
23
|
+
- New modular components: `AssignmentTree`, `GeneralPanel`, `ThresholdsPanel`, `RetentionPanel`, `ExecutionPanel`
|
|
24
|
+
- Added unassign capability and sorted assignment lists for stable ordering
|
|
25
|
+
|
|
26
|
+
**Shared IDE Primitives**
|
|
27
|
+
|
|
28
|
+
- Extracted `IDETreeNode`, `IDETreeSection`, `IDEStatusBar`, `IDELayout` to `@checkstack/ui` for cross-plugin reuse
|
|
29
|
+
- Migrated existing health check IDE editor to use shared primitives
|
|
30
|
+
|
|
31
|
+
**Infrastructure**
|
|
32
|
+
|
|
33
|
+
- Added `Dockerfile.satellite` for containerized satellite deployment
|
|
34
|
+
- WebSocket route registry in `@checkstack/backend` and `@checkstack/backend-api`
|
|
35
|
+
|
|
36
|
+
- 26d8bae: Source attribution and filtering for satellite health checks
|
|
37
|
+
|
|
38
|
+
**Source Attribution**
|
|
39
|
+
|
|
40
|
+
- Fixed satellite result attribution: runs from satellites now correctly display their source instead of defaulting to "Local"
|
|
41
|
+
- Added `sourceId` and `sourceLabel` to both public and detailed history API responses
|
|
42
|
+
|
|
43
|
+
**Source Filtering**
|
|
44
|
+
|
|
45
|
+
- Added `sourceFilter` parameter to `getHistory`, `getDetailedHistory`, and `getDetailedAggregatedHistory` RPC endpoints
|
|
46
|
+
- Source filter supports "local" (core-only), specific satellite UUID, or all sources
|
|
47
|
+
- Filter applies to all three aggregation tiers (raw, hourly, daily)
|
|
48
|
+
|
|
49
|
+
**Frontend**
|
|
50
|
+
|
|
51
|
+
- System detail accordion shows source filter buttons (All / Local / per-satellite) next to date range filter
|
|
52
|
+
- Filter applies to both charts and recent runs table
|
|
53
|
+
- Source column added to the recent runs table with Local/Remote badges
|
|
54
|
+
- Health check history detail page includes per-satellite source filter buttons
|
|
55
|
+
|
|
56
|
+
### Patch Changes
|
|
57
|
+
|
|
58
|
+
- Updated dependencies [26d8bae]
|
|
59
|
+
- Updated dependencies [26d8bae]
|
|
60
|
+
- @checkstack/healthcheck-common@0.11.0
|
|
61
|
+
- @checkstack/satellite-backend@0.2.0
|
|
62
|
+
- @checkstack/backend-api@0.12.0
|
|
63
|
+
- @checkstack/catalog-backend@0.2.24
|
|
64
|
+
- @checkstack/command-backend@0.1.19
|
|
65
|
+
- @checkstack/integration-backend@0.1.19
|
|
66
|
+
- @checkstack/queue-api@0.2.13
|
|
67
|
+
|
|
3
68
|
## 0.12.1
|
|
4
69
|
|
|
5
70
|
### Patch Changes
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
DROP INDEX "health_check_aggregates_bucket_unique";--> statement-breakpoint
|
|
2
|
+
ALTER TABLE "health_check_aggregates" ADD COLUMN "source_id" text;--> statement-breakpoint
|
|
3
|
+
ALTER TABLE "health_check_aggregates" ADD COLUMN "source_label" text;--> statement-breakpoint
|
|
4
|
+
ALTER TABLE "health_check_runs" ADD COLUMN "source_id" text;--> statement-breakpoint
|
|
5
|
+
ALTER TABLE "health_check_runs" ADD COLUMN "source_label" text;--> statement-breakpoint
|
|
6
|
+
ALTER TABLE "system_health_checks" ADD COLUMN "satellite_ids" jsonb;--> statement-breakpoint
|
|
7
|
+
ALTER TABLE "system_health_checks" ADD COLUMN "include_local" boolean DEFAULT true NOT NULL;--> statement-breakpoint
|
|
8
|
+
CREATE UNIQUE INDEX "health_check_aggregates_bucket_unique" ON "health_check_aggregates" USING btree ("configuration_id","system_id","bucket_start","bucket_size","source_id");
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "743f0b98-66a5-462e-8c35-4b2eb3093010",
|
|
3
|
+
"prevId": "b297253c-1c34-49b0-ad7e-4e06aff71d2d",
|
|
4
|
+
"version": "7",
|
|
5
|
+
"dialect": "postgresql",
|
|
6
|
+
"tables": {
|
|
7
|
+
"public.health_check_aggregates": {
|
|
8
|
+
"name": "health_check_aggregates",
|
|
9
|
+
"schema": "",
|
|
10
|
+
"columns": {
|
|
11
|
+
"id": {
|
|
12
|
+
"name": "id",
|
|
13
|
+
"type": "uuid",
|
|
14
|
+
"primaryKey": true,
|
|
15
|
+
"notNull": true,
|
|
16
|
+
"default": "gen_random_uuid()"
|
|
17
|
+
},
|
|
18
|
+
"configuration_id": {
|
|
19
|
+
"name": "configuration_id",
|
|
20
|
+
"type": "uuid",
|
|
21
|
+
"primaryKey": false,
|
|
22
|
+
"notNull": true
|
|
23
|
+
},
|
|
24
|
+
"system_id": {
|
|
25
|
+
"name": "system_id",
|
|
26
|
+
"type": "text",
|
|
27
|
+
"primaryKey": false,
|
|
28
|
+
"notNull": true
|
|
29
|
+
},
|
|
30
|
+
"bucket_start": {
|
|
31
|
+
"name": "bucket_start",
|
|
32
|
+
"type": "timestamp",
|
|
33
|
+
"primaryKey": false,
|
|
34
|
+
"notNull": true
|
|
35
|
+
},
|
|
36
|
+
"bucket_size": {
|
|
37
|
+
"name": "bucket_size",
|
|
38
|
+
"type": "bucket_size",
|
|
39
|
+
"typeSchema": "public",
|
|
40
|
+
"primaryKey": false,
|
|
41
|
+
"notNull": true
|
|
42
|
+
},
|
|
43
|
+
"run_count": {
|
|
44
|
+
"name": "run_count",
|
|
45
|
+
"type": "integer",
|
|
46
|
+
"primaryKey": false,
|
|
47
|
+
"notNull": true
|
|
48
|
+
},
|
|
49
|
+
"healthy_count": {
|
|
50
|
+
"name": "healthy_count",
|
|
51
|
+
"type": "integer",
|
|
52
|
+
"primaryKey": false,
|
|
53
|
+
"notNull": true
|
|
54
|
+
},
|
|
55
|
+
"degraded_count": {
|
|
56
|
+
"name": "degraded_count",
|
|
57
|
+
"type": "integer",
|
|
58
|
+
"primaryKey": false,
|
|
59
|
+
"notNull": true
|
|
60
|
+
},
|
|
61
|
+
"unhealthy_count": {
|
|
62
|
+
"name": "unhealthy_count",
|
|
63
|
+
"type": "integer",
|
|
64
|
+
"primaryKey": false,
|
|
65
|
+
"notNull": true
|
|
66
|
+
},
|
|
67
|
+
"latency_sum_ms": {
|
|
68
|
+
"name": "latency_sum_ms",
|
|
69
|
+
"type": "integer",
|
|
70
|
+
"primaryKey": false,
|
|
71
|
+
"notNull": false
|
|
72
|
+
},
|
|
73
|
+
"avg_latency_ms": {
|
|
74
|
+
"name": "avg_latency_ms",
|
|
75
|
+
"type": "integer",
|
|
76
|
+
"primaryKey": false,
|
|
77
|
+
"notNull": false
|
|
78
|
+
},
|
|
79
|
+
"min_latency_ms": {
|
|
80
|
+
"name": "min_latency_ms",
|
|
81
|
+
"type": "integer",
|
|
82
|
+
"primaryKey": false,
|
|
83
|
+
"notNull": false
|
|
84
|
+
},
|
|
85
|
+
"max_latency_ms": {
|
|
86
|
+
"name": "max_latency_ms",
|
|
87
|
+
"type": "integer",
|
|
88
|
+
"primaryKey": false,
|
|
89
|
+
"notNull": false
|
|
90
|
+
},
|
|
91
|
+
"p95_latency_ms": {
|
|
92
|
+
"name": "p95_latency_ms",
|
|
93
|
+
"type": "integer",
|
|
94
|
+
"primaryKey": false,
|
|
95
|
+
"notNull": false
|
|
96
|
+
},
|
|
97
|
+
"aggregated_result": {
|
|
98
|
+
"name": "aggregated_result",
|
|
99
|
+
"type": "jsonb",
|
|
100
|
+
"primaryKey": false,
|
|
101
|
+
"notNull": false
|
|
102
|
+
},
|
|
103
|
+
"tdigest_state": {
|
|
104
|
+
"name": "tdigest_state",
|
|
105
|
+
"type": "jsonb",
|
|
106
|
+
"primaryKey": false,
|
|
107
|
+
"notNull": false
|
|
108
|
+
},
|
|
109
|
+
"source_id": {
|
|
110
|
+
"name": "source_id",
|
|
111
|
+
"type": "text",
|
|
112
|
+
"primaryKey": false,
|
|
113
|
+
"notNull": false
|
|
114
|
+
},
|
|
115
|
+
"source_label": {
|
|
116
|
+
"name": "source_label",
|
|
117
|
+
"type": "text",
|
|
118
|
+
"primaryKey": false,
|
|
119
|
+
"notNull": false
|
|
120
|
+
}
|
|
121
|
+
},
|
|
122
|
+
"indexes": {
|
|
123
|
+
"health_check_aggregates_bucket_unique": {
|
|
124
|
+
"name": "health_check_aggregates_bucket_unique",
|
|
125
|
+
"columns": [
|
|
126
|
+
{
|
|
127
|
+
"expression": "configuration_id",
|
|
128
|
+
"isExpression": false,
|
|
129
|
+
"asc": true,
|
|
130
|
+
"nulls": "last"
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"expression": "system_id",
|
|
134
|
+
"isExpression": false,
|
|
135
|
+
"asc": true,
|
|
136
|
+
"nulls": "last"
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"expression": "bucket_start",
|
|
140
|
+
"isExpression": false,
|
|
141
|
+
"asc": true,
|
|
142
|
+
"nulls": "last"
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"expression": "bucket_size",
|
|
146
|
+
"isExpression": false,
|
|
147
|
+
"asc": true,
|
|
148
|
+
"nulls": "last"
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"expression": "source_id",
|
|
152
|
+
"isExpression": false,
|
|
153
|
+
"asc": true,
|
|
154
|
+
"nulls": "last"
|
|
155
|
+
}
|
|
156
|
+
],
|
|
157
|
+
"isUnique": true,
|
|
158
|
+
"concurrently": false,
|
|
159
|
+
"method": "btree",
|
|
160
|
+
"with": {}
|
|
161
|
+
}
|
|
162
|
+
},
|
|
163
|
+
"foreignKeys": {
|
|
164
|
+
"health_check_aggregates_configuration_id_health_check_configurations_id_fk": {
|
|
165
|
+
"name": "health_check_aggregates_configuration_id_health_check_configurations_id_fk",
|
|
166
|
+
"tableFrom": "health_check_aggregates",
|
|
167
|
+
"tableTo": "health_check_configurations",
|
|
168
|
+
"columnsFrom": [
|
|
169
|
+
"configuration_id"
|
|
170
|
+
],
|
|
171
|
+
"columnsTo": [
|
|
172
|
+
"id"
|
|
173
|
+
],
|
|
174
|
+
"onDelete": "cascade",
|
|
175
|
+
"onUpdate": "no action"
|
|
176
|
+
}
|
|
177
|
+
},
|
|
178
|
+
"compositePrimaryKeys": {},
|
|
179
|
+
"uniqueConstraints": {},
|
|
180
|
+
"policies": {},
|
|
181
|
+
"checkConstraints": {},
|
|
182
|
+
"isRLSEnabled": false
|
|
183
|
+
},
|
|
184
|
+
"public.health_check_configurations": {
|
|
185
|
+
"name": "health_check_configurations",
|
|
186
|
+
"schema": "",
|
|
187
|
+
"columns": {
|
|
188
|
+
"id": {
|
|
189
|
+
"name": "id",
|
|
190
|
+
"type": "uuid",
|
|
191
|
+
"primaryKey": true,
|
|
192
|
+
"notNull": true,
|
|
193
|
+
"default": "gen_random_uuid()"
|
|
194
|
+
},
|
|
195
|
+
"name": {
|
|
196
|
+
"name": "name",
|
|
197
|
+
"type": "text",
|
|
198
|
+
"primaryKey": false,
|
|
199
|
+
"notNull": true
|
|
200
|
+
},
|
|
201
|
+
"strategy_id": {
|
|
202
|
+
"name": "strategy_id",
|
|
203
|
+
"type": "text",
|
|
204
|
+
"primaryKey": false,
|
|
205
|
+
"notNull": true
|
|
206
|
+
},
|
|
207
|
+
"config": {
|
|
208
|
+
"name": "config",
|
|
209
|
+
"type": "jsonb",
|
|
210
|
+
"primaryKey": false,
|
|
211
|
+
"notNull": true
|
|
212
|
+
},
|
|
213
|
+
"collectors": {
|
|
214
|
+
"name": "collectors",
|
|
215
|
+
"type": "jsonb",
|
|
216
|
+
"primaryKey": false,
|
|
217
|
+
"notNull": false
|
|
218
|
+
},
|
|
219
|
+
"interval_seconds": {
|
|
220
|
+
"name": "interval_seconds",
|
|
221
|
+
"type": "integer",
|
|
222
|
+
"primaryKey": false,
|
|
223
|
+
"notNull": true
|
|
224
|
+
},
|
|
225
|
+
"is_template": {
|
|
226
|
+
"name": "is_template",
|
|
227
|
+
"type": "boolean",
|
|
228
|
+
"primaryKey": false,
|
|
229
|
+
"notNull": false,
|
|
230
|
+
"default": false
|
|
231
|
+
},
|
|
232
|
+
"paused": {
|
|
233
|
+
"name": "paused",
|
|
234
|
+
"type": "boolean",
|
|
235
|
+
"primaryKey": false,
|
|
236
|
+
"notNull": true,
|
|
237
|
+
"default": false
|
|
238
|
+
},
|
|
239
|
+
"created_at": {
|
|
240
|
+
"name": "created_at",
|
|
241
|
+
"type": "timestamp",
|
|
242
|
+
"primaryKey": false,
|
|
243
|
+
"notNull": true,
|
|
244
|
+
"default": "now()"
|
|
245
|
+
},
|
|
246
|
+
"updated_at": {
|
|
247
|
+
"name": "updated_at",
|
|
248
|
+
"type": "timestamp",
|
|
249
|
+
"primaryKey": false,
|
|
250
|
+
"notNull": true,
|
|
251
|
+
"default": "now()"
|
|
252
|
+
}
|
|
253
|
+
},
|
|
254
|
+
"indexes": {},
|
|
255
|
+
"foreignKeys": {},
|
|
256
|
+
"compositePrimaryKeys": {},
|
|
257
|
+
"uniqueConstraints": {},
|
|
258
|
+
"policies": {},
|
|
259
|
+
"checkConstraints": {},
|
|
260
|
+
"isRLSEnabled": false
|
|
261
|
+
},
|
|
262
|
+
"public.health_check_runs": {
|
|
263
|
+
"name": "health_check_runs",
|
|
264
|
+
"schema": "",
|
|
265
|
+
"columns": {
|
|
266
|
+
"id": {
|
|
267
|
+
"name": "id",
|
|
268
|
+
"type": "uuid",
|
|
269
|
+
"primaryKey": true,
|
|
270
|
+
"notNull": true,
|
|
271
|
+
"default": "gen_random_uuid()"
|
|
272
|
+
},
|
|
273
|
+
"configuration_id": {
|
|
274
|
+
"name": "configuration_id",
|
|
275
|
+
"type": "uuid",
|
|
276
|
+
"primaryKey": false,
|
|
277
|
+
"notNull": true
|
|
278
|
+
},
|
|
279
|
+
"system_id": {
|
|
280
|
+
"name": "system_id",
|
|
281
|
+
"type": "text",
|
|
282
|
+
"primaryKey": false,
|
|
283
|
+
"notNull": true
|
|
284
|
+
},
|
|
285
|
+
"status": {
|
|
286
|
+
"name": "status",
|
|
287
|
+
"type": "health_check_status",
|
|
288
|
+
"typeSchema": "public",
|
|
289
|
+
"primaryKey": false,
|
|
290
|
+
"notNull": true
|
|
291
|
+
},
|
|
292
|
+
"latency_ms": {
|
|
293
|
+
"name": "latency_ms",
|
|
294
|
+
"type": "integer",
|
|
295
|
+
"primaryKey": false,
|
|
296
|
+
"notNull": false
|
|
297
|
+
},
|
|
298
|
+
"result": {
|
|
299
|
+
"name": "result",
|
|
300
|
+
"type": "jsonb",
|
|
301
|
+
"primaryKey": false,
|
|
302
|
+
"notNull": false
|
|
303
|
+
},
|
|
304
|
+
"source_id": {
|
|
305
|
+
"name": "source_id",
|
|
306
|
+
"type": "text",
|
|
307
|
+
"primaryKey": false,
|
|
308
|
+
"notNull": false
|
|
309
|
+
},
|
|
310
|
+
"source_label": {
|
|
311
|
+
"name": "source_label",
|
|
312
|
+
"type": "text",
|
|
313
|
+
"primaryKey": false,
|
|
314
|
+
"notNull": false
|
|
315
|
+
},
|
|
316
|
+
"timestamp": {
|
|
317
|
+
"name": "timestamp",
|
|
318
|
+
"type": "timestamp",
|
|
319
|
+
"primaryKey": false,
|
|
320
|
+
"notNull": true,
|
|
321
|
+
"default": "now()"
|
|
322
|
+
}
|
|
323
|
+
},
|
|
324
|
+
"indexes": {},
|
|
325
|
+
"foreignKeys": {
|
|
326
|
+
"health_check_runs_configuration_id_health_check_configurations_id_fk": {
|
|
327
|
+
"name": "health_check_runs_configuration_id_health_check_configurations_id_fk",
|
|
328
|
+
"tableFrom": "health_check_runs",
|
|
329
|
+
"tableTo": "health_check_configurations",
|
|
330
|
+
"columnsFrom": [
|
|
331
|
+
"configuration_id"
|
|
332
|
+
],
|
|
333
|
+
"columnsTo": [
|
|
334
|
+
"id"
|
|
335
|
+
],
|
|
336
|
+
"onDelete": "cascade",
|
|
337
|
+
"onUpdate": "no action"
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
"compositePrimaryKeys": {},
|
|
341
|
+
"uniqueConstraints": {},
|
|
342
|
+
"policies": {},
|
|
343
|
+
"checkConstraints": {},
|
|
344
|
+
"isRLSEnabled": false
|
|
345
|
+
},
|
|
346
|
+
"public.system_health_checks": {
|
|
347
|
+
"name": "system_health_checks",
|
|
348
|
+
"schema": "",
|
|
349
|
+
"columns": {
|
|
350
|
+
"system_id": {
|
|
351
|
+
"name": "system_id",
|
|
352
|
+
"type": "text",
|
|
353
|
+
"primaryKey": false,
|
|
354
|
+
"notNull": true
|
|
355
|
+
},
|
|
356
|
+
"configuration_id": {
|
|
357
|
+
"name": "configuration_id",
|
|
358
|
+
"type": "uuid",
|
|
359
|
+
"primaryKey": false,
|
|
360
|
+
"notNull": true
|
|
361
|
+
},
|
|
362
|
+
"enabled": {
|
|
363
|
+
"name": "enabled",
|
|
364
|
+
"type": "boolean",
|
|
365
|
+
"primaryKey": false,
|
|
366
|
+
"notNull": true,
|
|
367
|
+
"default": true
|
|
368
|
+
},
|
|
369
|
+
"state_thresholds": {
|
|
370
|
+
"name": "state_thresholds",
|
|
371
|
+
"type": "jsonb",
|
|
372
|
+
"primaryKey": false,
|
|
373
|
+
"notNull": false
|
|
374
|
+
},
|
|
375
|
+
"retention_config": {
|
|
376
|
+
"name": "retention_config",
|
|
377
|
+
"type": "jsonb",
|
|
378
|
+
"primaryKey": false,
|
|
379
|
+
"notNull": false
|
|
380
|
+
},
|
|
381
|
+
"satellite_ids": {
|
|
382
|
+
"name": "satellite_ids",
|
|
383
|
+
"type": "jsonb",
|
|
384
|
+
"primaryKey": false,
|
|
385
|
+
"notNull": false
|
|
386
|
+
},
|
|
387
|
+
"include_local": {
|
|
388
|
+
"name": "include_local",
|
|
389
|
+
"type": "boolean",
|
|
390
|
+
"primaryKey": false,
|
|
391
|
+
"notNull": true,
|
|
392
|
+
"default": true
|
|
393
|
+
},
|
|
394
|
+
"created_at": {
|
|
395
|
+
"name": "created_at",
|
|
396
|
+
"type": "timestamp",
|
|
397
|
+
"primaryKey": false,
|
|
398
|
+
"notNull": true,
|
|
399
|
+
"default": "now()"
|
|
400
|
+
},
|
|
401
|
+
"updated_at": {
|
|
402
|
+
"name": "updated_at",
|
|
403
|
+
"type": "timestamp",
|
|
404
|
+
"primaryKey": false,
|
|
405
|
+
"notNull": true,
|
|
406
|
+
"default": "now()"
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
"indexes": {},
|
|
410
|
+
"foreignKeys": {
|
|
411
|
+
"system_health_checks_configuration_id_health_check_configurations_id_fk": {
|
|
412
|
+
"name": "system_health_checks_configuration_id_health_check_configurations_id_fk",
|
|
413
|
+
"tableFrom": "system_health_checks",
|
|
414
|
+
"tableTo": "health_check_configurations",
|
|
415
|
+
"columnsFrom": [
|
|
416
|
+
"configuration_id"
|
|
417
|
+
],
|
|
418
|
+
"columnsTo": [
|
|
419
|
+
"id"
|
|
420
|
+
],
|
|
421
|
+
"onDelete": "cascade",
|
|
422
|
+
"onUpdate": "no action"
|
|
423
|
+
}
|
|
424
|
+
},
|
|
425
|
+
"compositePrimaryKeys": {
|
|
426
|
+
"system_health_checks_system_id_configuration_id_pk": {
|
|
427
|
+
"name": "system_health_checks_system_id_configuration_id_pk",
|
|
428
|
+
"columns": [
|
|
429
|
+
"system_id",
|
|
430
|
+
"configuration_id"
|
|
431
|
+
]
|
|
432
|
+
}
|
|
433
|
+
},
|
|
434
|
+
"uniqueConstraints": {},
|
|
435
|
+
"policies": {},
|
|
436
|
+
"checkConstraints": {},
|
|
437
|
+
"isRLSEnabled": false
|
|
438
|
+
}
|
|
439
|
+
},
|
|
440
|
+
"enums": {
|
|
441
|
+
"public.bucket_size": {
|
|
442
|
+
"name": "bucket_size",
|
|
443
|
+
"schema": "public",
|
|
444
|
+
"values": [
|
|
445
|
+
"hourly",
|
|
446
|
+
"daily"
|
|
447
|
+
]
|
|
448
|
+
},
|
|
449
|
+
"public.health_check_status": {
|
|
450
|
+
"name": "health_check_status",
|
|
451
|
+
"schema": "public",
|
|
452
|
+
"values": [
|
|
453
|
+
"healthy",
|
|
454
|
+
"unhealthy",
|
|
455
|
+
"degraded"
|
|
456
|
+
]
|
|
457
|
+
}
|
|
458
|
+
},
|
|
459
|
+
"schemas": {},
|
|
460
|
+
"sequences": {},
|
|
461
|
+
"roles": {},
|
|
462
|
+
"policies": {},
|
|
463
|
+
"views": {},
|
|
464
|
+
"_meta": {
|
|
465
|
+
"columns": {},
|
|
466
|
+
"schemas": {},
|
|
467
|
+
"tables": {}
|
|
468
|
+
}
|
|
469
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@checkstack/healthcheck-backend",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "src/index.ts",
|
|
6
6
|
"checkstack": {
|
|
@@ -13,17 +13,18 @@
|
|
|
13
13
|
"lint:code": "eslint . --max-warnings 0"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
|
-
"@checkstack/backend-api": "0.11.
|
|
17
|
-
"@checkstack/catalog-backend": "0.2.
|
|
18
|
-
"@checkstack/catalog-common": "1.3.
|
|
19
|
-
"@checkstack/command-backend": "0.1.
|
|
20
|
-
"@checkstack/common": "0.6.
|
|
21
|
-
"@checkstack/healthcheck-common": "0.10.
|
|
22
|
-
"@checkstack/incident-common": "0.4.
|
|
23
|
-
"@checkstack/integration-backend": "0.1.
|
|
24
|
-
"@checkstack/maintenance-common": "0.4.
|
|
25
|
-
"@checkstack/queue-api": "0.2.
|
|
26
|
-
"@checkstack/
|
|
16
|
+
"@checkstack/backend-api": "0.11.1",
|
|
17
|
+
"@checkstack/catalog-backend": "0.2.23",
|
|
18
|
+
"@checkstack/catalog-common": "1.3.1",
|
|
19
|
+
"@checkstack/command-backend": "0.1.18",
|
|
20
|
+
"@checkstack/common": "0.6.5",
|
|
21
|
+
"@checkstack/healthcheck-common": "0.10.1",
|
|
22
|
+
"@checkstack/incident-common": "0.4.7",
|
|
23
|
+
"@checkstack/integration-backend": "0.1.18",
|
|
24
|
+
"@checkstack/maintenance-common": "0.4.9",
|
|
25
|
+
"@checkstack/queue-api": "0.2.12",
|
|
26
|
+
"@checkstack/satellite-backend": "0.1.0",
|
|
27
|
+
"@checkstack/signal-common": "0.1.9",
|
|
27
28
|
"@hono/zod-validator": "^0.7.6",
|
|
28
29
|
"drizzle-orm": "^0.45.0",
|
|
29
30
|
"hono": "^4.12.14",
|
|
@@ -34,7 +35,7 @@
|
|
|
34
35
|
"devDependencies": {
|
|
35
36
|
"@checkstack/drizzle-helper": "0.0.4",
|
|
36
37
|
"@checkstack/scripts": "0.1.2",
|
|
37
|
-
"@checkstack/test-utils-backend": "0.1.
|
|
38
|
+
"@checkstack/test-utils-backend": "0.1.18",
|
|
38
39
|
"@checkstack/tsconfig": "0.0.5",
|
|
39
40
|
"@types/bun": "^1.0.0",
|
|
40
41
|
"@types/tdigest": "^0.1.5",
|
package/src/hooks.ts
CHANGED
|
@@ -32,4 +32,14 @@ export const healthCheckHooks = {
|
|
|
32
32
|
totalChecks: number;
|
|
33
33
|
timestamp: string;
|
|
34
34
|
}>("healthcheck.system.healthy"),
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Emitted when a health check ↔ system association changes.
|
|
38
|
+
* Subscribers (e.g., satellite-backend) can use this to push
|
|
39
|
+
* updated assignments to connected satellites.
|
|
40
|
+
*/
|
|
41
|
+
assignmentChanged: createHook<{
|
|
42
|
+
systemId: string;
|
|
43
|
+
configurationId: string;
|
|
44
|
+
}>("healthcheck.assignment.changed"),
|
|
35
45
|
} as const;
|
package/src/index.ts
CHANGED
|
@@ -22,6 +22,7 @@ import { z } from "zod";
|
|
|
22
22
|
import { createHealthCheckRouter } from "./router";
|
|
23
23
|
import { HealthCheckService } from "./service";
|
|
24
24
|
import { catalogHooks } from "@checkstack/catalog-backend";
|
|
25
|
+
import { satelliteHooks } from "@checkstack/satellite-backend";
|
|
25
26
|
import { CatalogApi } from "@checkstack/catalog-common";
|
|
26
27
|
import { MaintenanceApi } from "@checkstack/maintenance-common";
|
|
27
28
|
import { IncidentApi } from "@checkstack/incident-common";
|
|
@@ -142,11 +143,12 @@ export default createBackendPlugin({
|
|
|
142
143
|
queueManager,
|
|
143
144
|
});
|
|
144
145
|
|
|
145
|
-
const healthCheckRouter = createHealthCheckRouter(
|
|
146
|
-
database as SafeDatabase<typeof schema>,
|
|
147
|
-
healthCheckRegistry,
|
|
146
|
+
const healthCheckRouter = createHealthCheckRouter({
|
|
147
|
+
database: database as SafeDatabase<typeof schema>,
|
|
148
|
+
registry: healthCheckRegistry,
|
|
148
149
|
collectorRegistry,
|
|
149
|
-
|
|
150
|
+
getEmitHook: () => storedEmitHook,
|
|
151
|
+
});
|
|
150
152
|
rpc.registerRouter(healthCheckRouter, healthCheckContract);
|
|
151
153
|
|
|
152
154
|
// Register command palette commands
|
|
@@ -212,6 +214,18 @@ export default createBackendPlugin({
|
|
|
212
214
|
{ mode: "work-queue", workerGroup: "system-cleanup" },
|
|
213
215
|
);
|
|
214
216
|
|
|
217
|
+
// Subscribe to satellite deletion to scrub satellite IDs from associations
|
|
218
|
+
onHook(
|
|
219
|
+
satelliteHooks.satelliteRemoved,
|
|
220
|
+
async (payload) => {
|
|
221
|
+
logger.debug(
|
|
222
|
+
`Scrubbing satellite ${payload.satelliteId} from health check associations`,
|
|
223
|
+
);
|
|
224
|
+
await service.scrubSatelliteFromAssociations(payload.satelliteId);
|
|
225
|
+
},
|
|
226
|
+
{ mode: "work-queue", workerGroup: "satellite-cleanup" },
|
|
227
|
+
);
|
|
228
|
+
|
|
215
229
|
logger.debug("✅ Health Check Backend afterPluginsReady complete.");
|
|
216
230
|
},
|
|
217
231
|
});
|
package/src/queue-executor.ts
CHANGED
|
@@ -259,6 +259,8 @@ async function executeHealthCheckJob(props: {
|
|
|
259
259
|
interval: healthCheckConfigurations.intervalSeconds,
|
|
260
260
|
enabled: systemHealthChecks.enabled,
|
|
261
261
|
paused: healthCheckConfigurations.paused,
|
|
262
|
+
includeLocal: systemHealthChecks.includeLocal,
|
|
263
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
262
264
|
})
|
|
263
265
|
.from(systemHealthChecks)
|
|
264
266
|
.innerJoin(
|
|
@@ -289,6 +291,19 @@ async function executeHealthCheckJob(props: {
|
|
|
289
291
|
return;
|
|
290
292
|
}
|
|
291
293
|
|
|
294
|
+
// If includeLocal is false and satellites are assigned, skip local execution
|
|
295
|
+
// (satellites handle execution, local core doesn't run this check)
|
|
296
|
+
if (
|
|
297
|
+
!configRow.includeLocal &&
|
|
298
|
+
configRow.satelliteIds &&
|
|
299
|
+
configRow.satelliteIds.length > 0
|
|
300
|
+
) {
|
|
301
|
+
logger.debug(
|
|
302
|
+
`Health check ${configId} for system ${systemId} is satellite-only, skipping local execution`,
|
|
303
|
+
);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
|
|
292
307
|
// Fetch system name for signal payload
|
|
293
308
|
let systemName = systemId;
|
|
294
309
|
try {
|
|
@@ -486,6 +501,8 @@ async function executeHealthCheckJob(props: {
|
|
|
486
501
|
status: result.status,
|
|
487
502
|
latencyMs: result.latencyMs,
|
|
488
503
|
result: { ...result } as Record<string, unknown>,
|
|
504
|
+
sourceId: undefined,
|
|
505
|
+
sourceLabel: "Local",
|
|
489
506
|
});
|
|
490
507
|
|
|
491
508
|
await incrementHourlyAggregate({
|
|
@@ -497,6 +514,7 @@ async function executeHealthCheckJob(props: {
|
|
|
497
514
|
runTimestamp: new Date(),
|
|
498
515
|
result: { ...result } as Record<string, unknown>,
|
|
499
516
|
collectorRegistry,
|
|
517
|
+
sourceLabel: "Local",
|
|
500
518
|
});
|
|
501
519
|
|
|
502
520
|
logger.debug(
|
|
@@ -560,6 +578,8 @@ async function executeHealthCheckJob(props: {
|
|
|
560
578
|
status: result.status,
|
|
561
579
|
latencyMs: result.latencyMs,
|
|
562
580
|
result: { ...result } as Record<string, unknown>,
|
|
581
|
+
sourceId: undefined,
|
|
582
|
+
sourceLabel: "Local",
|
|
563
583
|
});
|
|
564
584
|
|
|
565
585
|
// Trigger incremental hourly aggregation
|
|
@@ -572,6 +592,7 @@ async function executeHealthCheckJob(props: {
|
|
|
572
592
|
runTimestamp: new Date(),
|
|
573
593
|
result: { ...result } as Record<string, unknown>,
|
|
574
594
|
collectorRegistry,
|
|
595
|
+
sourceLabel: "Local",
|
|
575
596
|
});
|
|
576
597
|
|
|
577
598
|
logger.debug(
|
|
@@ -660,6 +681,8 @@ async function executeHealthCheckJob(props: {
|
|
|
660
681
|
systemId,
|
|
661
682
|
status: "unhealthy",
|
|
662
683
|
result: { error: String(error) } as Record<string, unknown>,
|
|
684
|
+
sourceId: undefined,
|
|
685
|
+
sourceLabel: "Local",
|
|
663
686
|
});
|
|
664
687
|
|
|
665
688
|
// Trigger incremental hourly aggregation
|
|
@@ -672,6 +695,7 @@ async function executeHealthCheckJob(props: {
|
|
|
672
695
|
runTimestamp: new Date(),
|
|
673
696
|
// No collector data for error cases
|
|
674
697
|
collectorRegistry,
|
|
698
|
+
sourceLabel: "Local",
|
|
675
699
|
});
|
|
676
700
|
|
|
677
701
|
// Try to fetch names for the enriched signal (best-effort)
|
|
@@ -67,6 +67,10 @@ interface IncrementHourlyAggregateParams {
|
|
|
67
67
|
result?: Record<string, unknown>;
|
|
68
68
|
/** Collector registry for aggregating collector data via mergeResult */
|
|
69
69
|
collectorRegistry?: CollectorRegistry;
|
|
70
|
+
/** Source identifier: undefined = local core, string = satellite ID */
|
|
71
|
+
sourceId?: string;
|
|
72
|
+
/** Human-readable source label for display */
|
|
73
|
+
sourceLabel?: string;
|
|
70
74
|
}
|
|
71
75
|
|
|
72
76
|
/**
|
|
@@ -88,6 +92,8 @@ export async function incrementHourlyAggregate(
|
|
|
88
92
|
runTimestamp,
|
|
89
93
|
result,
|
|
90
94
|
collectorRegistry,
|
|
95
|
+
sourceId,
|
|
96
|
+
sourceLabel,
|
|
91
97
|
} = params;
|
|
92
98
|
|
|
93
99
|
const bucketStart = getHourBucketStart(runTimestamp);
|
|
@@ -107,6 +113,9 @@ export async function incrementHourlyAggregate(
|
|
|
107
113
|
eq(healthCheckAggregates.configurationId, configurationId),
|
|
108
114
|
eq(healthCheckAggregates.bucketStart, bucketStart),
|
|
109
115
|
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
116
|
+
sourceId
|
|
117
|
+
? eq(healthCheckAggregates.sourceId, sourceId)
|
|
118
|
+
: sql`${healthCheckAggregates.sourceId} IS NULL`,
|
|
110
119
|
),
|
|
111
120
|
)
|
|
112
121
|
.limit(1);
|
|
@@ -181,6 +190,8 @@ export async function incrementHourlyAggregate(
|
|
|
181
190
|
p95LatencyMs: latencyUpdate?.p95,
|
|
182
191
|
tdigestState: latencyUpdate?.tdigestState,
|
|
183
192
|
aggregatedResult,
|
|
193
|
+
sourceId: sourceId ?? undefined,
|
|
194
|
+
sourceLabel: sourceLabel ?? undefined,
|
|
184
195
|
})
|
|
185
196
|
.onConflictDoUpdate({
|
|
186
197
|
target: [
|
|
@@ -188,6 +199,7 @@ export async function incrementHourlyAggregate(
|
|
|
188
199
|
healthCheckAggregates.systemId,
|
|
189
200
|
healthCheckAggregates.bucketStart,
|
|
190
201
|
healthCheckAggregates.bucketSize,
|
|
202
|
+
healthCheckAggregates.sourceId,
|
|
191
203
|
],
|
|
192
204
|
set: {
|
|
193
205
|
runCount: sql`${healthCheckAggregates.runCount} + 1`,
|
package/src/router.test.ts
CHANGED
|
@@ -50,11 +50,12 @@ describe("HealthCheck Router", () => {
|
|
|
50
50
|
getCollectorsForPlugin: mock(() => []),
|
|
51
51
|
};
|
|
52
52
|
|
|
53
|
-
const router = createHealthCheckRouter(
|
|
54
|
-
mockDb as never,
|
|
55
|
-
mockRegistry,
|
|
56
|
-
mockCollectorRegistry as never,
|
|
57
|
-
|
|
53
|
+
const router = createHealthCheckRouter({
|
|
54
|
+
database: mockDb as never,
|
|
55
|
+
registry: mockRegistry,
|
|
56
|
+
collectorRegistry: mockCollectorRegistry as never,
|
|
57
|
+
getEmitHook: () => undefined,
|
|
58
|
+
});
|
|
58
59
|
|
|
59
60
|
it("getStrategies returns strategies from registry", async () => {
|
|
60
61
|
const context = createMockRpcContext({
|
package/src/router.ts
CHANGED
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
import { healthCheckContract } from "@checkstack/healthcheck-common";
|
|
11
11
|
import type { StrategyCategory } from "@checkstack/healthcheck-common";
|
|
12
12
|
import { HealthCheckService } from "./service";
|
|
13
|
+
import { healthCheckHooks } from "./hooks";
|
|
13
14
|
import * as schema from "./schema";
|
|
14
15
|
import { toJsonSchemaWithChartMeta } from "./schema-utils";
|
|
15
16
|
|
|
@@ -19,11 +20,13 @@ import { toJsonSchemaWithChartMeta } from "./schema-utils";
|
|
|
19
20
|
* Auth and access rules are automatically enforced via autoAuthMiddleware
|
|
20
21
|
* based on the contract's meta.userType and meta.access.
|
|
21
22
|
*/
|
|
22
|
-
export const createHealthCheckRouter = (
|
|
23
|
-
database: SafeDatabase<typeof schema
|
|
24
|
-
registry: HealthCheckRegistry
|
|
25
|
-
collectorRegistry: CollectorRegistry
|
|
26
|
-
) => {
|
|
23
|
+
export const createHealthCheckRouter = (opts: {
|
|
24
|
+
database: SafeDatabase<typeof schema>;
|
|
25
|
+
registry: HealthCheckRegistry;
|
|
26
|
+
collectorRegistry: CollectorRegistry;
|
|
27
|
+
getEmitHook: () => ((hook: { id: string }, payload: Record<string, unknown>) => Promise<void>) | undefined;
|
|
28
|
+
}) => {
|
|
29
|
+
const { database, registry, collectorRegistry, getEmitHook } = opts;
|
|
27
30
|
// Create service instance once - shared across all handlers
|
|
28
31
|
const service = new HealthCheckService(database, registry, collectorRegistry);
|
|
29
32
|
|
|
@@ -137,6 +140,8 @@ export const createHealthCheckRouter = (
|
|
|
137
140
|
configurationId: input.body.configurationId,
|
|
138
141
|
enabled: input.body.enabled,
|
|
139
142
|
stateThresholds: input.body.stateThresholds,
|
|
143
|
+
satelliteIds: input.body.satelliteIds,
|
|
144
|
+
includeLocal: input.body.includeLocal,
|
|
140
145
|
});
|
|
141
146
|
|
|
142
147
|
// If enabling the health check, schedule it immediately
|
|
@@ -156,10 +161,28 @@ export const createHealthCheckRouter = (
|
|
|
156
161
|
});
|
|
157
162
|
}
|
|
158
163
|
}
|
|
164
|
+
|
|
165
|
+
// Notify subscribers (e.g., satellite-backend) that assignments changed
|
|
166
|
+
const emitHook = getEmitHook();
|
|
167
|
+
if (emitHook) {
|
|
168
|
+
await emitHook(healthCheckHooks.assignmentChanged, {
|
|
169
|
+
systemId: input.systemId,
|
|
170
|
+
configurationId: input.body.configurationId,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
159
173
|
}),
|
|
160
174
|
|
|
161
175
|
disassociateSystem: os.disassociateSystem.handler(async ({ input }) => {
|
|
162
176
|
await service.disassociateSystem(input.systemId, input.configId);
|
|
177
|
+
|
|
178
|
+
// Notify subscribers that assignments changed
|
|
179
|
+
const emitHook = getEmitHook();
|
|
180
|
+
if (emitHook) {
|
|
181
|
+
await emitHook(healthCheckHooks.assignmentChanged, {
|
|
182
|
+
systemId: input.systemId,
|
|
183
|
+
configurationId: input.configId,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
163
186
|
}),
|
|
164
187
|
|
|
165
188
|
getRetentionConfig: os.getRetentionConfig.handler(async ({ input }) => {
|
|
@@ -230,6 +253,22 @@ export const createHealthCheckRouter = (
|
|
|
230
253
|
return service.getSystemHealthOverview(input.systemId);
|
|
231
254
|
},
|
|
232
255
|
),
|
|
256
|
+
|
|
257
|
+
// ========================================================================
|
|
258
|
+
// SERVICE INTERFACE (S2S — satellite-backend)
|
|
259
|
+
// ========================================================================
|
|
260
|
+
|
|
261
|
+
getAssignmentsForSatellite: os.getAssignmentsForSatellite.handler(
|
|
262
|
+
async ({ input }) => {
|
|
263
|
+
return service.getAssignmentsForSatellite(input.satelliteId);
|
|
264
|
+
},
|
|
265
|
+
),
|
|
266
|
+
|
|
267
|
+
ingestSatelliteResult: os.ingestSatelliteResult.handler(
|
|
268
|
+
async ({ input }) => {
|
|
269
|
+
await service.ingestSatelliteResult(input);
|
|
270
|
+
},
|
|
271
|
+
),
|
|
233
272
|
});
|
|
234
273
|
};
|
|
235
274
|
|
package/src/schema.ts
CHANGED
|
@@ -90,6 +90,16 @@ export const systemHealthChecks = pgTable(
|
|
|
90
90
|
* Null means use default retention settings.
|
|
91
91
|
*/
|
|
92
92
|
retentionConfig: jsonb("retention_config").$type<RetentionConfig>(),
|
|
93
|
+
/**
|
|
94
|
+
* IDs of satellites assigned to execute this health check.
|
|
95
|
+
* When set, the check runs on these satellite nodes in addition to (or instead of) the core.
|
|
96
|
+
*/
|
|
97
|
+
satelliteIds: jsonb("satellite_ids").$type<string[]>(),
|
|
98
|
+
/**
|
|
99
|
+
* Whether to also run this check locally on the core instance.
|
|
100
|
+
* Defaults to true. Only relevant when satelliteIds is set.
|
|
101
|
+
*/
|
|
102
|
+
includeLocal: boolean("include_local").default(true).notNull(),
|
|
93
103
|
createdAt: timestamp("created_at").defaultNow().notNull(),
|
|
94
104
|
updatedAt: timestamp("updated_at").defaultNow().notNull(),
|
|
95
105
|
},
|
|
@@ -108,6 +118,16 @@ export const healthCheckRuns = pgTable("health_check_runs", {
|
|
|
108
118
|
/** Execution duration in milliseconds */
|
|
109
119
|
latencyMs: integer("latency_ms"),
|
|
110
120
|
result: jsonb("result").$type<Record<string, unknown>>(),
|
|
121
|
+
/**
|
|
122
|
+
* Source identifier for result attribution.
|
|
123
|
+
* null = local core execution, UUID = satellite ID.
|
|
124
|
+
*/
|
|
125
|
+
sourceId: text("source_id"),
|
|
126
|
+
/**
|
|
127
|
+
* Human-readable source label for UI display.
|
|
128
|
+
* e.g. "Local" or "EU West (eu-west-1)".
|
|
129
|
+
*/
|
|
130
|
+
sourceLabel: text("source_label"),
|
|
111
131
|
timestamp: timestamp("timestamp").defaultNow().notNull(),
|
|
112
132
|
});
|
|
113
133
|
|
|
@@ -151,14 +171,24 @@ export const healthCheckAggregates = pgTable(
|
|
|
151
171
|
jsonb("aggregated_result").$type<Record<string, unknown>>(),
|
|
152
172
|
/** Serialized t-digest state for incremental p95 calculation */
|
|
153
173
|
tdigestState: jsonb("tdigest_state").$type<number[]>(),
|
|
174
|
+
/**
|
|
175
|
+
* Source identifier for per-region aggregation.
|
|
176
|
+
* null = local core execution, UUID = satellite ID.
|
|
177
|
+
*/
|
|
178
|
+
sourceId: text("source_id"),
|
|
179
|
+
/**
|
|
180
|
+
* Human-readable source label for UI display.
|
|
181
|
+
*/
|
|
182
|
+
sourceLabel: text("source_label"),
|
|
154
183
|
},
|
|
155
184
|
(t) => ({
|
|
156
|
-
// Unique constraint for
|
|
185
|
+
// Unique constraint includes sourceId for per-region aggregation
|
|
157
186
|
bucketUnique: uniqueIndex("health_check_aggregates_bucket_unique").on(
|
|
158
187
|
t.configurationId,
|
|
159
188
|
t.systemId,
|
|
160
189
|
t.bucketStart,
|
|
161
190
|
t.bucketSize,
|
|
191
|
+
t.sourceId,
|
|
162
192
|
),
|
|
163
193
|
}),
|
|
164
194
|
);
|
package/src/service.ts
CHANGED
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
StateThresholds,
|
|
6
6
|
HealthCheckStatus,
|
|
7
7
|
RetentionConfig,
|
|
8
|
+
type HealthCheckRunResult,
|
|
8
9
|
} from "@checkstack/healthcheck-common";
|
|
9
10
|
import {
|
|
10
11
|
healthCheckConfigurations,
|
|
@@ -14,10 +15,11 @@ import {
|
|
|
14
15
|
VersionedStateThresholds,
|
|
15
16
|
} from "./schema";
|
|
16
17
|
import * as schema from "./schema";
|
|
17
|
-
import { eq, and, InferSelectModel, desc, gte, lte } from "drizzle-orm";
|
|
18
|
+
import { eq, and, InferSelectModel, desc, gte, lte, isNull } from "drizzle-orm";
|
|
18
19
|
import { ORPCError } from "@orpc/server";
|
|
19
20
|
import { evaluateHealthStatus } from "./state-evaluator";
|
|
20
21
|
import { stateThresholds } from "./state-thresholds-migrations";
|
|
22
|
+
import { incrementHourlyAggregate } from "./realtime-aggregation";
|
|
21
23
|
import type {
|
|
22
24
|
HealthCheckRegistry,
|
|
23
25
|
SafeDatabase,
|
|
@@ -129,12 +131,16 @@ export class HealthCheckService {
|
|
|
129
131
|
configurationId: string;
|
|
130
132
|
enabled?: boolean;
|
|
131
133
|
stateThresholds?: StateThresholds;
|
|
134
|
+
satelliteIds?: string[];
|
|
135
|
+
includeLocal?: boolean;
|
|
132
136
|
}) {
|
|
133
137
|
const {
|
|
134
138
|
systemId,
|
|
135
139
|
configurationId,
|
|
136
140
|
enabled = true,
|
|
137
141
|
stateThresholds: stateThresholds_,
|
|
142
|
+
satelliteIds,
|
|
143
|
+
includeLocal = true,
|
|
138
144
|
} = props;
|
|
139
145
|
|
|
140
146
|
// Wrap thresholds in versioned config if provided
|
|
@@ -148,6 +154,8 @@ export class HealthCheckService {
|
|
|
148
154
|
configurationId,
|
|
149
155
|
enabled,
|
|
150
156
|
stateThresholds: versionedThresholds,
|
|
157
|
+
satelliteIds: satelliteIds ?? undefined,
|
|
158
|
+
includeLocal,
|
|
151
159
|
})
|
|
152
160
|
.onConflictDoUpdate({
|
|
153
161
|
target: [
|
|
@@ -157,6 +165,8 @@ export class HealthCheckService {
|
|
|
157
165
|
set: {
|
|
158
166
|
enabled,
|
|
159
167
|
stateThresholds: versionedThresholds,
|
|
168
|
+
satelliteIds: satelliteIds ?? undefined,
|
|
169
|
+
includeLocal,
|
|
160
170
|
updatedAt: new Date(),
|
|
161
171
|
},
|
|
162
172
|
});
|
|
@@ -270,6 +280,8 @@ export class HealthCheckService {
|
|
|
270
280
|
configName: healthCheckConfigurations.name,
|
|
271
281
|
enabled: systemHealthChecks.enabled,
|
|
272
282
|
stateThresholds: systemHealthChecks.stateThresholds,
|
|
283
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
284
|
+
includeLocal: systemHealthChecks.includeLocal,
|
|
273
285
|
})
|
|
274
286
|
.from(systemHealthChecks)
|
|
275
287
|
.innerJoin(
|
|
@@ -290,6 +302,8 @@ export class HealthCheckService {
|
|
|
290
302
|
configurationName: row.configName,
|
|
291
303
|
enabled: row.enabled,
|
|
292
304
|
stateThresholds: thresholds,
|
|
305
|
+
satelliteIds: row.satelliteIds ?? undefined,
|
|
306
|
+
includeLocal: row.includeLocal,
|
|
293
307
|
});
|
|
294
308
|
}
|
|
295
309
|
return results;
|
|
@@ -474,6 +488,7 @@ export class HealthCheckService {
|
|
|
474
488
|
configurationId?: string;
|
|
475
489
|
startDate?: Date;
|
|
476
490
|
endDate?: Date;
|
|
491
|
+
sourceFilter?: string;
|
|
477
492
|
limit?: number;
|
|
478
493
|
offset?: number;
|
|
479
494
|
sortOrder: "asc" | "desc";
|
|
@@ -483,6 +498,7 @@ export class HealthCheckService {
|
|
|
483
498
|
configurationId,
|
|
484
499
|
startDate,
|
|
485
500
|
endDate,
|
|
501
|
+
sourceFilter,
|
|
486
502
|
limit = 10,
|
|
487
503
|
offset = 0,
|
|
488
504
|
sortOrder,
|
|
@@ -495,6 +511,13 @@ export class HealthCheckService {
|
|
|
495
511
|
if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
|
|
496
512
|
if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
|
|
497
513
|
|
|
514
|
+
// Source filtering: "local" = no sourceId, UUID = specific satellite
|
|
515
|
+
if (sourceFilter === "local") {
|
|
516
|
+
conditions.push(isNull(healthCheckRuns.sourceId));
|
|
517
|
+
} else if (sourceFilter) {
|
|
518
|
+
conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
|
|
519
|
+
}
|
|
520
|
+
|
|
498
521
|
// Build where clause
|
|
499
522
|
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
500
523
|
|
|
@@ -522,6 +545,8 @@ export class HealthCheckService {
|
|
|
522
545
|
status: run.status,
|
|
523
546
|
timestamp: run.timestamp,
|
|
524
547
|
latencyMs: run.latencyMs ?? undefined,
|
|
548
|
+
sourceId: run.sourceId ?? undefined,
|
|
549
|
+
sourceLabel: run.sourceLabel ?? undefined,
|
|
525
550
|
})),
|
|
526
551
|
total,
|
|
527
552
|
};
|
|
@@ -537,6 +562,7 @@ export class HealthCheckService {
|
|
|
537
562
|
configurationId?: string;
|
|
538
563
|
startDate?: Date;
|
|
539
564
|
endDate?: Date;
|
|
565
|
+
sourceFilter?: string;
|
|
540
566
|
limit?: number;
|
|
541
567
|
offset?: number;
|
|
542
568
|
sortOrder: "asc" | "desc";
|
|
@@ -546,6 +572,7 @@ export class HealthCheckService {
|
|
|
546
572
|
configurationId,
|
|
547
573
|
startDate,
|
|
548
574
|
endDate,
|
|
575
|
+
sourceFilter,
|
|
549
576
|
limit = 10,
|
|
550
577
|
offset = 0,
|
|
551
578
|
sortOrder,
|
|
@@ -558,6 +585,13 @@ export class HealthCheckService {
|
|
|
558
585
|
if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
|
|
559
586
|
if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
|
|
560
587
|
|
|
588
|
+
// Source filtering: "local" = no sourceId, UUID = specific satellite
|
|
589
|
+
if (sourceFilter === "local") {
|
|
590
|
+
conditions.push(isNull(healthCheckRuns.sourceId));
|
|
591
|
+
} else if (sourceFilter) {
|
|
592
|
+
conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
|
|
593
|
+
}
|
|
594
|
+
|
|
561
595
|
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
562
596
|
const total = await this.db.$count(healthCheckRuns, whereClause);
|
|
563
597
|
|
|
@@ -582,6 +616,8 @@ export class HealthCheckService {
|
|
|
582
616
|
result: run.result ?? {},
|
|
583
617
|
timestamp: run.timestamp,
|
|
584
618
|
latencyMs: run.latencyMs ?? undefined,
|
|
619
|
+
sourceId: run.sourceId ?? undefined,
|
|
620
|
+
sourceLabel: run.sourceLabel ?? undefined,
|
|
585
621
|
})),
|
|
586
622
|
total,
|
|
587
623
|
};
|
|
@@ -610,6 +646,8 @@ export class HealthCheckService {
|
|
|
610
646
|
result: r.result ?? {},
|
|
611
647
|
timestamp: r.timestamp,
|
|
612
648
|
latencyMs: r.latencyMs ?? undefined,
|
|
649
|
+
sourceId: r.sourceId ?? undefined,
|
|
650
|
+
sourceLabel: r.sourceLabel ?? undefined,
|
|
613
651
|
};
|
|
614
652
|
}
|
|
615
653
|
|
|
@@ -624,6 +662,7 @@ export class HealthCheckService {
|
|
|
624
662
|
configurationId: string;
|
|
625
663
|
startDate: Date;
|
|
626
664
|
endDate: Date;
|
|
665
|
+
sourceFilter?: string;
|
|
627
666
|
targetPoints?: number;
|
|
628
667
|
},
|
|
629
668
|
options: { includeAggregatedResult: boolean },
|
|
@@ -633,6 +672,7 @@ export class HealthCheckService {
|
|
|
633
672
|
configurationId,
|
|
634
673
|
startDate,
|
|
635
674
|
endDate,
|
|
675
|
+
sourceFilter,
|
|
636
676
|
targetPoints = 500,
|
|
637
677
|
} = props;
|
|
638
678
|
|
|
@@ -655,48 +695,66 @@ export class HealthCheckService {
|
|
|
655
695
|
? this.registry.getStrategy(config.strategyId)
|
|
656
696
|
: undefined;
|
|
657
697
|
|
|
698
|
+
// Build source condition for raw runs
|
|
699
|
+
const rawConditions = [
|
|
700
|
+
eq(healthCheckRuns.systemId, systemId),
|
|
701
|
+
eq(healthCheckRuns.configurationId, configurationId),
|
|
702
|
+
gte(healthCheckRuns.timestamp, startDate),
|
|
703
|
+
lte(healthCheckRuns.timestamp, endDate),
|
|
704
|
+
...(sourceFilter === "local"
|
|
705
|
+
? [isNull(healthCheckRuns.sourceId)]
|
|
706
|
+
: sourceFilter
|
|
707
|
+
? [eq(healthCheckRuns.sourceId, sourceFilter)]
|
|
708
|
+
: []),
|
|
709
|
+
];
|
|
710
|
+
|
|
711
|
+
// Build source condition for hourly aggregates
|
|
712
|
+
const hourlyConditions = [
|
|
713
|
+
eq(healthCheckAggregates.systemId, systemId),
|
|
714
|
+
eq(healthCheckAggregates.configurationId, configurationId),
|
|
715
|
+
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
716
|
+
gte(healthCheckAggregates.bucketStart, startDate),
|
|
717
|
+
lte(healthCheckAggregates.bucketStart, endDate),
|
|
718
|
+
...(sourceFilter === "local"
|
|
719
|
+
? [isNull(healthCheckAggregates.sourceId)]
|
|
720
|
+
: sourceFilter
|
|
721
|
+
? [eq(healthCheckAggregates.sourceId, sourceFilter)]
|
|
722
|
+
: []),
|
|
723
|
+
];
|
|
724
|
+
|
|
725
|
+
// Build source condition for daily aggregates
|
|
726
|
+
const dailyConditions = [
|
|
727
|
+
eq(healthCheckAggregates.systemId, systemId),
|
|
728
|
+
eq(healthCheckAggregates.configurationId, configurationId),
|
|
729
|
+
eq(healthCheckAggregates.bucketSize, "daily"),
|
|
730
|
+
gte(healthCheckAggregates.bucketStart, startDate),
|
|
731
|
+
lte(healthCheckAggregates.bucketStart, endDate),
|
|
732
|
+
...(sourceFilter === "local"
|
|
733
|
+
? [isNull(healthCheckAggregates.sourceId)]
|
|
734
|
+
: sourceFilter
|
|
735
|
+
? [eq(healthCheckAggregates.sourceId, sourceFilter)]
|
|
736
|
+
: []),
|
|
737
|
+
];
|
|
738
|
+
|
|
658
739
|
// Query all three tiers in parallel
|
|
659
740
|
const [rawRuns, hourlyAggregates, dailyAggregates] = await Promise.all([
|
|
660
741
|
// Raw runs
|
|
661
742
|
this.db
|
|
662
743
|
.select()
|
|
663
744
|
.from(healthCheckRuns)
|
|
664
|
-
.where(
|
|
665
|
-
and(
|
|
666
|
-
eq(healthCheckRuns.systemId, systemId),
|
|
667
|
-
eq(healthCheckRuns.configurationId, configurationId),
|
|
668
|
-
gte(healthCheckRuns.timestamp, startDate),
|
|
669
|
-
lte(healthCheckRuns.timestamp, endDate),
|
|
670
|
-
),
|
|
671
|
-
)
|
|
745
|
+
.where(and(...rawConditions))
|
|
672
746
|
.orderBy(healthCheckRuns.timestamp),
|
|
673
747
|
// Hourly aggregates
|
|
674
748
|
this.db
|
|
675
749
|
.select()
|
|
676
750
|
.from(healthCheckAggregates)
|
|
677
|
-
.where(
|
|
678
|
-
and(
|
|
679
|
-
eq(healthCheckAggregates.systemId, systemId),
|
|
680
|
-
eq(healthCheckAggregates.configurationId, configurationId),
|
|
681
|
-
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
682
|
-
gte(healthCheckAggregates.bucketStart, startDate),
|
|
683
|
-
lte(healthCheckAggregates.bucketStart, endDate),
|
|
684
|
-
),
|
|
685
|
-
)
|
|
751
|
+
.where(and(...hourlyConditions))
|
|
686
752
|
.orderBy(healthCheckAggregates.bucketStart),
|
|
687
753
|
// Daily aggregates
|
|
688
754
|
this.db
|
|
689
755
|
.select()
|
|
690
756
|
.from(healthCheckAggregates)
|
|
691
|
-
.where(
|
|
692
|
-
and(
|
|
693
|
-
eq(healthCheckAggregates.systemId, systemId),
|
|
694
|
-
eq(healthCheckAggregates.configurationId, configurationId),
|
|
695
|
-
eq(healthCheckAggregates.bucketSize, "daily"),
|
|
696
|
-
gte(healthCheckAggregates.bucketStart, startDate),
|
|
697
|
-
lte(healthCheckAggregates.bucketStart, endDate),
|
|
698
|
-
),
|
|
699
|
-
)
|
|
757
|
+
.where(and(...dailyConditions))
|
|
700
758
|
.orderBy(healthCheckAggregates.bucketStart),
|
|
701
759
|
]);
|
|
702
760
|
|
|
@@ -953,4 +1011,134 @@ export class HealthCheckService {
|
|
|
953
1011
|
updatedAt: row.updatedAt,
|
|
954
1012
|
};
|
|
955
1013
|
}
|
|
1014
|
+
|
|
1015
|
+
/**
|
|
1016
|
+
* Remove a satellite ID from all systemHealthChecks.satelliteIds arrays.
|
|
1017
|
+
* Called when a satellite is deleted via the satellite.removed hook.
|
|
1018
|
+
*/
|
|
1019
|
+
async scrubSatelliteFromAssociations(satelliteId: string): Promise<void> {
|
|
1020
|
+
// Get all associations that reference this satellite
|
|
1021
|
+
const associations = await this.db
|
|
1022
|
+
.select({
|
|
1023
|
+
systemId: systemHealthChecks.systemId,
|
|
1024
|
+
configurationId: systemHealthChecks.configurationId,
|
|
1025
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
1026
|
+
})
|
|
1027
|
+
.from(systemHealthChecks);
|
|
1028
|
+
|
|
1029
|
+
// Update each association that contains this satellite ID
|
|
1030
|
+
for (const assoc of associations) {
|
|
1031
|
+
if (!assoc.satelliteIds?.includes(satelliteId)) continue;
|
|
1032
|
+
|
|
1033
|
+
const updated = assoc.satelliteIds.filter((id) => id !== satelliteId);
|
|
1034
|
+
await this.db
|
|
1035
|
+
.update(systemHealthChecks)
|
|
1036
|
+
.set({
|
|
1037
|
+
satelliteIds: updated.length > 0 ? updated : undefined,
|
|
1038
|
+
updatedAt: new Date(),
|
|
1039
|
+
})
|
|
1040
|
+
.where(
|
|
1041
|
+
and(
|
|
1042
|
+
eq(systemHealthChecks.systemId, assoc.systemId),
|
|
1043
|
+
eq(systemHealthChecks.configurationId, assoc.configurationId),
|
|
1044
|
+
),
|
|
1045
|
+
);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
/**
|
|
1050
|
+
* Get all health check assignments for a specific satellite.
|
|
1051
|
+
* Returns the full configuration payload needed for the satellite to execute checks.
|
|
1052
|
+
*/
|
|
1053
|
+
async getAssignmentsForSatellite(satelliteId: string) {
|
|
1054
|
+
// Get all associations that reference this satellite
|
|
1055
|
+
const associations = await this.db
|
|
1056
|
+
.select({
|
|
1057
|
+
systemId: systemHealthChecks.systemId,
|
|
1058
|
+
configurationId: systemHealthChecks.configurationId,
|
|
1059
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
1060
|
+
enabled: systemHealthChecks.enabled,
|
|
1061
|
+
})
|
|
1062
|
+
.from(systemHealthChecks);
|
|
1063
|
+
|
|
1064
|
+
// Filter to associations that include this satellite and are enabled
|
|
1065
|
+
const matchingAssociations = associations.filter(
|
|
1066
|
+
(a) => a.enabled && a.satelliteIds?.includes(satelliteId),
|
|
1067
|
+
);
|
|
1068
|
+
|
|
1069
|
+
if (matchingAssociations.length === 0) return [];
|
|
1070
|
+
|
|
1071
|
+
// Get configurations for each matching association
|
|
1072
|
+
const assignments = [];
|
|
1073
|
+
for (const assoc of matchingAssociations) {
|
|
1074
|
+
const [config] = await this.db
|
|
1075
|
+
.select()
|
|
1076
|
+
.from(healthCheckConfigurations)
|
|
1077
|
+
.where(eq(healthCheckConfigurations.id, assoc.configurationId));
|
|
1078
|
+
|
|
1079
|
+
if (!config || config.paused) continue;
|
|
1080
|
+
|
|
1081
|
+
assignments.push({
|
|
1082
|
+
configId: config.id,
|
|
1083
|
+
systemId: assoc.systemId,
|
|
1084
|
+
strategyId: config.strategyId,
|
|
1085
|
+
config: config.config,
|
|
1086
|
+
collectors: config.collectors ?? undefined,
|
|
1087
|
+
intervalSeconds: config.intervalSeconds,
|
|
1088
|
+
});
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
return assignments;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
/**
|
|
1095
|
+
* Ingest a health check result from a satellite.
|
|
1096
|
+
* Stores the run with source attribution (sourceId + sourceLabel)
|
|
1097
|
+
* and triggers incremental aggregation to keep charts/availability current.
|
|
1098
|
+
*/
|
|
1099
|
+
async ingestSatelliteResult(props: {
|
|
1100
|
+
configId: string;
|
|
1101
|
+
systemId: string;
|
|
1102
|
+
status: HealthCheckStatus;
|
|
1103
|
+
latencyMs?: number;
|
|
1104
|
+
result?: HealthCheckRunResult;
|
|
1105
|
+
executedAt: string;
|
|
1106
|
+
sourceId: string;
|
|
1107
|
+
sourceLabel: string;
|
|
1108
|
+
}) {
|
|
1109
|
+
const {
|
|
1110
|
+
configId,
|
|
1111
|
+
systemId,
|
|
1112
|
+
status,
|
|
1113
|
+
latencyMs,
|
|
1114
|
+
result,
|
|
1115
|
+
sourceId,
|
|
1116
|
+
sourceLabel,
|
|
1117
|
+
} = props;
|
|
1118
|
+
|
|
1119
|
+
const resultRecord = result ? { ...result } as Record<string, unknown> : {};
|
|
1120
|
+
|
|
1121
|
+
await this.db.insert(healthCheckRuns).values({
|
|
1122
|
+
configurationId: configId,
|
|
1123
|
+
systemId,
|
|
1124
|
+
status,
|
|
1125
|
+
latencyMs,
|
|
1126
|
+
result: resultRecord,
|
|
1127
|
+
sourceId,
|
|
1128
|
+
sourceLabel,
|
|
1129
|
+
});
|
|
1130
|
+
|
|
1131
|
+
// Trigger incremental hourly aggregation — same as local executor
|
|
1132
|
+
await incrementHourlyAggregate({
|
|
1133
|
+
db: this.db,
|
|
1134
|
+
systemId,
|
|
1135
|
+
configurationId: configId,
|
|
1136
|
+
status,
|
|
1137
|
+
latencyMs,
|
|
1138
|
+
runTimestamp: new Date(props.executedAt),
|
|
1139
|
+
result: resultRecord,
|
|
1140
|
+
collectorRegistry: this.collectorRegistry,
|
|
1141
|
+
sourceLabel,
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
956
1144
|
}
|