@checkstack/healthcheck-backend 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,43 @@
1
1
  # @checkstack/healthcheck-backend
2
2
 
3
+ ## 0.8.3
4
+
5
+ ### Patch Changes
6
+
7
+ - 48c2080: Migrate aggregation from batch to incremental (`mergeResult`)
8
+
9
+ ### Breaking Changes (Internal)
10
+
11
+ - Replaced `aggregateResult(runs[])` with `mergeResult(existing, run)` interface across all HealthCheckStrategy and CollectorStrategy implementations
12
+
13
+ ### New Features
14
+
15
+ - Added incremental aggregation utilities in `@checkstack/backend-api`:
16
+ - `mergeCounter()` - track occurrences
17
+ - `mergeAverage()` - track sum/count, compute avg
18
+ - `mergeRate()` - track success/total, compute %
19
+ - `mergeMinMax()` - track min/max values
20
+ - Exported Zod schemas for internal state: `averageStateSchema`, `rateStateSchema`, `minMaxStateSchema`, `counterStateSchema`
21
+
22
+ ### Improvements
23
+
24
+ - Enables O(1) storage overhead by maintaining incremental aggregation state
25
+ - Prepares for real-time hourly aggregation without batch accumulation
26
+
27
+ - Updated dependencies [f676e11]
28
+ - Updated dependencies [48c2080]
29
+ - @checkstack/common@0.6.2
30
+ - @checkstack/backend-api@0.6.0
31
+ - @checkstack/catalog-backend@0.2.11
32
+ - @checkstack/catalog-common@1.2.7
33
+ - @checkstack/command-backend@0.1.9
34
+ - @checkstack/healthcheck-common@0.8.2
35
+ - @checkstack/incident-common@0.4.3
36
+ - @checkstack/integration-backend@0.1.9
37
+ - @checkstack/maintenance-common@0.4.5
38
+ - @checkstack/signal-common@0.1.6
39
+ - @checkstack/queue-api@0.2.3
40
+
3
41
  ## 0.8.2
4
42
 
5
43
  ### Patch Changes
@@ -0,0 +1 @@
1
+ ALTER TABLE "health_check_aggregates" ADD COLUMN "tdigest_state" jsonb;
@@ -0,0 +1,426 @@
1
+ {
2
+ "id": "b297253c-1c34-49b0-ad7e-4e06aff71d2d",
3
+ "prevId": "86171dc8-efcc-4246-a95a-665fdefb1a1f",
4
+ "version": "7",
5
+ "dialect": "postgresql",
6
+ "tables": {
7
+ "public.health_check_aggregates": {
8
+ "name": "health_check_aggregates",
9
+ "schema": "",
10
+ "columns": {
11
+ "id": {
12
+ "name": "id",
13
+ "type": "uuid",
14
+ "primaryKey": true,
15
+ "notNull": true,
16
+ "default": "gen_random_uuid()"
17
+ },
18
+ "configuration_id": {
19
+ "name": "configuration_id",
20
+ "type": "uuid",
21
+ "primaryKey": false,
22
+ "notNull": true
23
+ },
24
+ "system_id": {
25
+ "name": "system_id",
26
+ "type": "text",
27
+ "primaryKey": false,
28
+ "notNull": true
29
+ },
30
+ "bucket_start": {
31
+ "name": "bucket_start",
32
+ "type": "timestamp",
33
+ "primaryKey": false,
34
+ "notNull": true
35
+ },
36
+ "bucket_size": {
37
+ "name": "bucket_size",
38
+ "type": "bucket_size",
39
+ "typeSchema": "public",
40
+ "primaryKey": false,
41
+ "notNull": true
42
+ },
43
+ "run_count": {
44
+ "name": "run_count",
45
+ "type": "integer",
46
+ "primaryKey": false,
47
+ "notNull": true
48
+ },
49
+ "healthy_count": {
50
+ "name": "healthy_count",
51
+ "type": "integer",
52
+ "primaryKey": false,
53
+ "notNull": true
54
+ },
55
+ "degraded_count": {
56
+ "name": "degraded_count",
57
+ "type": "integer",
58
+ "primaryKey": false,
59
+ "notNull": true
60
+ },
61
+ "unhealthy_count": {
62
+ "name": "unhealthy_count",
63
+ "type": "integer",
64
+ "primaryKey": false,
65
+ "notNull": true
66
+ },
67
+ "latency_sum_ms": {
68
+ "name": "latency_sum_ms",
69
+ "type": "integer",
70
+ "primaryKey": false,
71
+ "notNull": false
72
+ },
73
+ "avg_latency_ms": {
74
+ "name": "avg_latency_ms",
75
+ "type": "integer",
76
+ "primaryKey": false,
77
+ "notNull": false
78
+ },
79
+ "min_latency_ms": {
80
+ "name": "min_latency_ms",
81
+ "type": "integer",
82
+ "primaryKey": false,
83
+ "notNull": false
84
+ },
85
+ "max_latency_ms": {
86
+ "name": "max_latency_ms",
87
+ "type": "integer",
88
+ "primaryKey": false,
89
+ "notNull": false
90
+ },
91
+ "p95_latency_ms": {
92
+ "name": "p95_latency_ms",
93
+ "type": "integer",
94
+ "primaryKey": false,
95
+ "notNull": false
96
+ },
97
+ "aggregated_result": {
98
+ "name": "aggregated_result",
99
+ "type": "jsonb",
100
+ "primaryKey": false,
101
+ "notNull": false
102
+ },
103
+ "tdigest_state": {
104
+ "name": "tdigest_state",
105
+ "type": "jsonb",
106
+ "primaryKey": false,
107
+ "notNull": false
108
+ }
109
+ },
110
+ "indexes": {
111
+ "health_check_aggregates_bucket_unique": {
112
+ "name": "health_check_aggregates_bucket_unique",
113
+ "columns": [
114
+ {
115
+ "expression": "configuration_id",
116
+ "isExpression": false,
117
+ "asc": true,
118
+ "nulls": "last"
119
+ },
120
+ {
121
+ "expression": "system_id",
122
+ "isExpression": false,
123
+ "asc": true,
124
+ "nulls": "last"
125
+ },
126
+ {
127
+ "expression": "bucket_start",
128
+ "isExpression": false,
129
+ "asc": true,
130
+ "nulls": "last"
131
+ },
132
+ {
133
+ "expression": "bucket_size",
134
+ "isExpression": false,
135
+ "asc": true,
136
+ "nulls": "last"
137
+ }
138
+ ],
139
+ "isUnique": true,
140
+ "concurrently": false,
141
+ "method": "btree",
142
+ "with": {}
143
+ }
144
+ },
145
+ "foreignKeys": {
146
+ "health_check_aggregates_configuration_id_health_check_configurations_id_fk": {
147
+ "name": "health_check_aggregates_configuration_id_health_check_configurations_id_fk",
148
+ "tableFrom": "health_check_aggregates",
149
+ "tableTo": "health_check_configurations",
150
+ "columnsFrom": [
151
+ "configuration_id"
152
+ ],
153
+ "columnsTo": [
154
+ "id"
155
+ ],
156
+ "onDelete": "cascade",
157
+ "onUpdate": "no action"
158
+ }
159
+ },
160
+ "compositePrimaryKeys": {},
161
+ "uniqueConstraints": {},
162
+ "policies": {},
163
+ "checkConstraints": {},
164
+ "isRLSEnabled": false
165
+ },
166
+ "public.health_check_configurations": {
167
+ "name": "health_check_configurations",
168
+ "schema": "",
169
+ "columns": {
170
+ "id": {
171
+ "name": "id",
172
+ "type": "uuid",
173
+ "primaryKey": true,
174
+ "notNull": true,
175
+ "default": "gen_random_uuid()"
176
+ },
177
+ "name": {
178
+ "name": "name",
179
+ "type": "text",
180
+ "primaryKey": false,
181
+ "notNull": true
182
+ },
183
+ "strategy_id": {
184
+ "name": "strategy_id",
185
+ "type": "text",
186
+ "primaryKey": false,
187
+ "notNull": true
188
+ },
189
+ "config": {
190
+ "name": "config",
191
+ "type": "jsonb",
192
+ "primaryKey": false,
193
+ "notNull": true
194
+ },
195
+ "collectors": {
196
+ "name": "collectors",
197
+ "type": "jsonb",
198
+ "primaryKey": false,
199
+ "notNull": false
200
+ },
201
+ "interval_seconds": {
202
+ "name": "interval_seconds",
203
+ "type": "integer",
204
+ "primaryKey": false,
205
+ "notNull": true
206
+ },
207
+ "is_template": {
208
+ "name": "is_template",
209
+ "type": "boolean",
210
+ "primaryKey": false,
211
+ "notNull": false,
212
+ "default": false
213
+ },
214
+ "paused": {
215
+ "name": "paused",
216
+ "type": "boolean",
217
+ "primaryKey": false,
218
+ "notNull": true,
219
+ "default": false
220
+ },
221
+ "created_at": {
222
+ "name": "created_at",
223
+ "type": "timestamp",
224
+ "primaryKey": false,
225
+ "notNull": true,
226
+ "default": "now()"
227
+ },
228
+ "updated_at": {
229
+ "name": "updated_at",
230
+ "type": "timestamp",
231
+ "primaryKey": false,
232
+ "notNull": true,
233
+ "default": "now()"
234
+ }
235
+ },
236
+ "indexes": {},
237
+ "foreignKeys": {},
238
+ "compositePrimaryKeys": {},
239
+ "uniqueConstraints": {},
240
+ "policies": {},
241
+ "checkConstraints": {},
242
+ "isRLSEnabled": false
243
+ },
244
+ "public.health_check_runs": {
245
+ "name": "health_check_runs",
246
+ "schema": "",
247
+ "columns": {
248
+ "id": {
249
+ "name": "id",
250
+ "type": "uuid",
251
+ "primaryKey": true,
252
+ "notNull": true,
253
+ "default": "gen_random_uuid()"
254
+ },
255
+ "configuration_id": {
256
+ "name": "configuration_id",
257
+ "type": "uuid",
258
+ "primaryKey": false,
259
+ "notNull": true
260
+ },
261
+ "system_id": {
262
+ "name": "system_id",
263
+ "type": "text",
264
+ "primaryKey": false,
265
+ "notNull": true
266
+ },
267
+ "status": {
268
+ "name": "status",
269
+ "type": "health_check_status",
270
+ "typeSchema": "public",
271
+ "primaryKey": false,
272
+ "notNull": true
273
+ },
274
+ "latency_ms": {
275
+ "name": "latency_ms",
276
+ "type": "integer",
277
+ "primaryKey": false,
278
+ "notNull": false
279
+ },
280
+ "result": {
281
+ "name": "result",
282
+ "type": "jsonb",
283
+ "primaryKey": false,
284
+ "notNull": false
285
+ },
286
+ "timestamp": {
287
+ "name": "timestamp",
288
+ "type": "timestamp",
289
+ "primaryKey": false,
290
+ "notNull": true,
291
+ "default": "now()"
292
+ }
293
+ },
294
+ "indexes": {},
295
+ "foreignKeys": {
296
+ "health_check_runs_configuration_id_health_check_configurations_id_fk": {
297
+ "name": "health_check_runs_configuration_id_health_check_configurations_id_fk",
298
+ "tableFrom": "health_check_runs",
299
+ "tableTo": "health_check_configurations",
300
+ "columnsFrom": [
301
+ "configuration_id"
302
+ ],
303
+ "columnsTo": [
304
+ "id"
305
+ ],
306
+ "onDelete": "cascade",
307
+ "onUpdate": "no action"
308
+ }
309
+ },
310
+ "compositePrimaryKeys": {},
311
+ "uniqueConstraints": {},
312
+ "policies": {},
313
+ "checkConstraints": {},
314
+ "isRLSEnabled": false
315
+ },
316
+ "public.system_health_checks": {
317
+ "name": "system_health_checks",
318
+ "schema": "",
319
+ "columns": {
320
+ "system_id": {
321
+ "name": "system_id",
322
+ "type": "text",
323
+ "primaryKey": false,
324
+ "notNull": true
325
+ },
326
+ "configuration_id": {
327
+ "name": "configuration_id",
328
+ "type": "uuid",
329
+ "primaryKey": false,
330
+ "notNull": true
331
+ },
332
+ "enabled": {
333
+ "name": "enabled",
334
+ "type": "boolean",
335
+ "primaryKey": false,
336
+ "notNull": true,
337
+ "default": true
338
+ },
339
+ "state_thresholds": {
340
+ "name": "state_thresholds",
341
+ "type": "jsonb",
342
+ "primaryKey": false,
343
+ "notNull": false
344
+ },
345
+ "retention_config": {
346
+ "name": "retention_config",
347
+ "type": "jsonb",
348
+ "primaryKey": false,
349
+ "notNull": false
350
+ },
351
+ "created_at": {
352
+ "name": "created_at",
353
+ "type": "timestamp",
354
+ "primaryKey": false,
355
+ "notNull": true,
356
+ "default": "now()"
357
+ },
358
+ "updated_at": {
359
+ "name": "updated_at",
360
+ "type": "timestamp",
361
+ "primaryKey": false,
362
+ "notNull": true,
363
+ "default": "now()"
364
+ }
365
+ },
366
+ "indexes": {},
367
+ "foreignKeys": {
368
+ "system_health_checks_configuration_id_health_check_configurations_id_fk": {
369
+ "name": "system_health_checks_configuration_id_health_check_configurations_id_fk",
370
+ "tableFrom": "system_health_checks",
371
+ "tableTo": "health_check_configurations",
372
+ "columnsFrom": [
373
+ "configuration_id"
374
+ ],
375
+ "columnsTo": [
376
+ "id"
377
+ ],
378
+ "onDelete": "cascade",
379
+ "onUpdate": "no action"
380
+ }
381
+ },
382
+ "compositePrimaryKeys": {
383
+ "system_health_checks_system_id_configuration_id_pk": {
384
+ "name": "system_health_checks_system_id_configuration_id_pk",
385
+ "columns": [
386
+ "system_id",
387
+ "configuration_id"
388
+ ]
389
+ }
390
+ },
391
+ "uniqueConstraints": {},
392
+ "policies": {},
393
+ "checkConstraints": {},
394
+ "isRLSEnabled": false
395
+ }
396
+ },
397
+ "enums": {
398
+ "public.bucket_size": {
399
+ "name": "bucket_size",
400
+ "schema": "public",
401
+ "values": [
402
+ "hourly",
403
+ "daily"
404
+ ]
405
+ },
406
+ "public.health_check_status": {
407
+ "name": "health_check_status",
408
+ "schema": "public",
409
+ "values": [
410
+ "healthy",
411
+ "unhealthy",
412
+ "degraded"
413
+ ]
414
+ }
415
+ },
416
+ "schemas": {},
417
+ "sequences": {},
418
+ "roles": {},
419
+ "policies": {},
420
+ "views": {},
421
+ "_meta": {
422
+ "columns": {},
423
+ "schemas": {},
424
+ "tables": {}
425
+ }
426
+ }
@@ -64,6 +64,13 @@
64
64
  "when": 1768934529918,
65
65
  "tag": "0008_broad_black_tom",
66
66
  "breakpoints": true
67
+ },
68
+ {
69
+ "idx": 9,
70
+ "version": "7",
71
+ "when": 1769077338943,
72
+ "tag": "0009_late_argent",
73
+ "breakpoints": true
67
74
  }
68
75
  ]
69
76
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@checkstack/healthcheck-backend",
3
- "version": "0.8.2",
3
+ "version": "0.8.3",
4
4
  "type": "module",
5
5
  "main": "src/index.ts",
6
6
  "scripts": {
@@ -11,20 +11,21 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "@checkstack/backend-api": "0.5.2",
14
- "@checkstack/catalog-backend": "0.2.9",
15
- "@checkstack/catalog-common": "1.2.5",
14
+ "@checkstack/catalog-backend": "0.2.10",
15
+ "@checkstack/catalog-common": "1.2.6",
16
+ "@checkstack/command-backend": "0.1.8",
17
+ "@checkstack/common": "0.6.1",
16
18
  "@checkstack/healthcheck-common": "0.8.1",
19
+ "@checkstack/incident-common": "0.4.2",
17
20
  "@checkstack/integration-backend": "0.1.8",
18
- "@checkstack/maintenance-common": "0.4.3",
19
- "@checkstack/incident-common": "0.4.1",
21
+ "@checkstack/maintenance-common": "0.4.4",
20
22
  "@checkstack/queue-api": "0.2.2",
21
23
  "@checkstack/signal-common": "0.1.5",
22
- "@checkstack/command-backend": "0.1.8",
23
24
  "@hono/zod-validator": "^0.7.6",
24
25
  "drizzle-orm": "^0.45.1",
25
26
  "hono": "^4.0.0",
26
- "zod": "^4.2.1",
27
- "@checkstack/common": "0.6.1"
27
+ "tdigest": "^0.1.2",
28
+ "zod": "^4.2.1"
28
29
  },
29
30
  "devDependencies": {
30
31
  "@checkstack/drizzle-helper": "0.0.3",
@@ -33,6 +34,7 @@
33
34
  "@checkstack/tsconfig": "0.0.3",
34
35
  "@orpc/server": "^1.13.2",
35
36
  "@types/bun": "^1.0.0",
37
+ "@types/tdigest": "^0.1.5",
36
38
  "date-fns": "^4.1.0",
37
39
  "drizzle-kit": "^0.31.8",
38
40
  "typescript": "^5.0.0"
@@ -100,8 +100,8 @@ export function extractLatencies(
100
100
  // ===== Collector Aggregation =====
101
101
 
102
102
  /**
103
- * Aggregate collector data from runs in a bucket.
104
- * Groups by collector UUID and calls each collector's aggregateResult.
103
+ * Aggregate collector data from runs in a bucket using incremental mergeResult.
104
+ * Groups by collector UUID and calls each collector's mergeResult for each run.
105
105
  */
106
106
  export function aggregateCollectorData(
107
107
  runs: Array<{
@@ -111,12 +111,13 @@ export function aggregateCollectorData(
111
111
  }>,
112
112
  collectorRegistry: CollectorRegistry,
113
113
  ): Record<string, unknown> {
114
- // Group collector data by UUID
115
- const collectorDataByUuid = new Map<
114
+ // Track aggregated results by collector UUID
115
+ const aggregatedByUuid = new Map<
116
116
  string,
117
- { collectorId: string; metadata: Record<string, unknown>[] }
117
+ { collectorId: string; aggregated: Record<string, unknown> }
118
118
  >();
119
119
 
120
+ // Process each run incrementally
120
121
  for (const run of runs) {
121
122
  const collectors = run.metadata?.collectors as
122
123
  | Record<string, Record<string, unknown>>
@@ -127,30 +128,32 @@ export function aggregateCollectorData(
127
128
  const collectorId = data._collectorId as string | undefined;
128
129
  if (!collectorId) continue;
129
130
 
130
- if (!collectorDataByUuid.has(uuid)) {
131
- collectorDataByUuid.set(uuid, { collectorId, metadata: [] });
132
- }
131
+ const registered = collectorRegistry.getCollector(collectorId);
132
+ if (!registered?.collector.mergeResult) continue;
133
133
 
134
- // Add metadata without internal fields
135
- const { _collectorId, _assertionFailed, ...rest } = data;
136
- collectorDataByUuid.get(uuid)!.metadata.push(rest);
137
- }
138
- }
134
+ // Get existing aggregate for this UUID (or undefined for first run)
135
+ const existing = aggregatedByUuid.get(uuid)?.aggregated;
139
136
 
140
- // Call aggregateResult for each collector
141
- const result: Record<string, unknown> = {};
137
+ // Strip internal fields from collector data
138
+ const { _collectorId, _assertionFailed, ...collectorMetadata } = data;
142
139
 
143
- for (const [uuid, { collectorId, metadata }] of collectorDataByUuid) {
144
- const registered = collectorRegistry.getCollector(collectorId);
145
- if (!registered?.collector.aggregateResult) continue;
140
+ // Call mergeResult to incrementally aggregate
141
+ const merged = registered.collector.mergeResult(existing, {
142
+ status: run.status as "healthy" | "unhealthy" | "degraded",
143
+ latencyMs: run.latencyMs,
144
+ metadata: collectorMetadata,
145
+ });
146
146
 
147
- // Transform metadata to the format expected by aggregateResult
148
- const runsForAggregation = metadata.map((m) => ({
149
- status: "healthy" as const,
150
- metadata: m,
151
- }));
147
+ aggregatedByUuid.set(uuid, {
148
+ collectorId,
149
+ aggregated: merged as Record<string, unknown>,
150
+ });
151
+ }
152
+ }
152
153
 
153
- const aggregated = registered.collector.aggregateResult(runsForAggregation);
154
+ // Build final result
155
+ const result: Record<string, unknown> = {};
156
+ for (const [uuid, { collectorId, aggregated }] of aggregatedByUuid) {
154
157
  result[uuid] = {
155
158
  _collectorId: collectorId,
156
159
  ...aggregated,
@@ -62,8 +62,8 @@ describe("HealthCheckService.getAggregatedHistory", () => {
62
62
  config: { version: 1, schema: {} },
63
63
  aggregatedResult: { version: 1, schema: {} },
64
64
  execute: mock(),
65
- aggregateResult: mock((runs: unknown[]) => ({
66
- totalRuns: runs.length,
65
+ mergeResult: mock((existing: { totalRuns?: number } | undefined) => ({
66
+ totalRuns: (existing?.totalRuns ?? 0) + 1,
67
67
  customMetric: "aggregated",
68
68
  })),
69
69
  })),
@@ -243,7 +243,7 @@ describe("HealthCheckService.getAggregatedHistory", () => {
243
243
  });
244
244
 
245
245
  describe("strategy metadata aggregation", () => {
246
- it("calls strategy.aggregateResult for each bucket", async () => {
246
+ it("calls strategy.mergeResult for each run in bucket", async () => {
247
247
  const runs = [
248
248
  {
249
249
  id: "run-1",
@@ -412,18 +412,23 @@ describe("HealthCheckService.getAggregatedHistory", () => {
412
412
  if (collectorId === "healthcheck-http.request") {
413
413
  return {
414
414
  collector: {
415
- aggregateResult: (
416
- runsData: Array<{
415
+ mergeResult: (
416
+ existing: Record<string, unknown> | undefined,
417
+ newRun: {
417
418
  status: string;
418
419
  metadata?: Record<string, unknown>;
419
- }>,
420
+ },
420
421
  ) => {
421
- const times = runsData
422
- .map((r) => r.metadata?.responseTimeMs as number)
423
- .filter((v) => typeof v === "number");
422
+ const prevSum = (existing?.sumResponseTimeMs as number) ?? 0;
423
+ const prevCount = (existing?.count as number) ?? 0;
424
+ const responseTime =
425
+ (newRun.metadata?.responseTimeMs as number) ?? 0;
426
+ const newSum = prevSum + responseTime;
427
+ const newCount = prevCount + 1;
424
428
  return {
425
- avgResponseTimeMs:
426
- times.reduce((a, b) => a + b, 0) / times.length,
429
+ sumResponseTimeMs: newSum,
430
+ count: newCount,
431
+ avgResponseTimeMs: newSum / newCount,
427
432
  successRate: 100,
428
433
  };
429
434
  },