@checkstack/healthcheck-backend 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,36 @@
1
1
  # @checkstack/healthcheck-backend
2
2
 
3
+ ## 1.0.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 2a749d3: fix: run afterPluginsReady in topological order; merge daily rollups on conflict
8
+
9
+ Two resilience fixes for the dependency chain:
10
+
11
+ 1. **Plugin loader**: Phase 3 (`afterPluginsReady`) now iterates plugins
12
+ in the same topologically-sorted order as Phase 2 (`init`). Previously
13
+ it iterated `pendingInits` in registration order, which raced
14
+ subscription-spec dependencies — catalog's afterPluginsReady registers
15
+ `catalog.system` and `catalog.group` notification targets, and emitting
16
+ plugins (incident, maintenance, …) call `registerSubscriptionSpec`
17
+ against those targets in their own afterPluginsReady. With registration
18
+ order, an emitter could run before catalog and hit
19
+ `Target type catalog.group is not registered`. Sorted order encodes
20
+ the dependency via `spec.target.ownerPlugin`, so the emitter now
21
+ always runs after the target owner.
22
+
23
+ 2. **Healthcheck retention job**: the daily rollup now upserts
24
+ `health_check_aggregates` with `ON CONFLICT DO UPDATE` instead of a
25
+ plain insert. Previously, late-arriving hourly aggregates (e.g. from
26
+ a satellite that was offline when the prior rollup ran) would crash
27
+ the rollup with a unique-constraint violation on
28
+ `(configuration_id, system_id, bucket_start, bucket_size, source_id)`.
29
+ The merge sums counts and folds min/max/p95 into the existing daily
30
+ row.
31
+
32
+ - @checkstack/satellite-backend@0.2.19
33
+
3
34
  ## 1.0.0
4
35
 
5
36
  ### Major Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@checkstack/healthcheck-backend",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "type": "module",
5
5
  "main": "src/index.ts",
6
6
  "checkstack": {
@@ -6,7 +6,7 @@ import {
6
6
  healthCheckAggregates,
7
7
  DEFAULT_RETENTION_CONFIG,
8
8
  } from "./schema";
9
- import { eq, and, lt } from "drizzle-orm";
9
+ import { eq, and, lt, sql } from "drizzle-orm";
10
10
  import type { QueueManager } from "@checkstack/queue-api";
11
11
 
12
12
  type Db = SafeDatabase<typeof schema>;
@@ -228,23 +228,59 @@ async function rollupHourlyAggregates(params: RollupParams) {
228
228
  const p95LatencyMs =
229
229
  p95Values.length > 0 ? Math.max(...p95Values) : undefined;
230
230
 
231
- // Insert daily aggregate
232
- await db.insert(healthCheckAggregates).values({
233
- configurationId,
234
- systemId,
235
- bucketStart: bucket.bucketStart,
236
- bucketSize: "daily",
237
- runCount,
238
- healthyCount,
239
- degradedCount,
240
- unhealthyCount,
241
- latencySumMs: latencySumMs > 0 ? latencySumMs : undefined,
242
- avgLatencyMs,
243
- minLatencyMs,
244
- maxLatencyMs,
245
- p95LatencyMs,
246
- aggregatedResult: undefined, // Cannot combine result across hours
247
- });
231
+ // Upsert the daily aggregate. A row may already exist for this
232
+ // (configurationId, systemId, day, daily, sourceId=null) tuple if a
233
+ // prior rollup ran and then late-arriving hourly buckets (e.g. from
234
+ // a satellite that was offline) were rolled up afterwards. Merge in
235
+ // that case rather than crashing — sums add, min/max/p95 fold.
236
+ const newLatencySum = latencySumMs > 0 ? latencySumMs : undefined;
237
+ await db
238
+ .insert(healthCheckAggregates)
239
+ .values({
240
+ configurationId,
241
+ systemId,
242
+ bucketStart: bucket.bucketStart,
243
+ bucketSize: "daily",
244
+ runCount,
245
+ healthyCount,
246
+ degradedCount,
247
+ unhealthyCount,
248
+ latencySumMs: newLatencySum,
249
+ avgLatencyMs,
250
+ minLatencyMs,
251
+ maxLatencyMs,
252
+ p95LatencyMs,
253
+ aggregatedResult: undefined, // Cannot combine result across hours
254
+ })
255
+ .onConflictDoUpdate({
256
+ target: [
257
+ healthCheckAggregates.configurationId,
258
+ healthCheckAggregates.systemId,
259
+ healthCheckAggregates.bucketStart,
260
+ healthCheckAggregates.bucketSize,
261
+ healthCheckAggregates.sourceId,
262
+ ],
263
+ set: {
264
+ runCount: sql`${healthCheckAggregates.runCount} + ${runCount}`,
265
+ healthyCount: sql`${healthCheckAggregates.healthyCount} + ${healthyCount}`,
266
+ degradedCount: sql`${healthCheckAggregates.degradedCount} + ${degradedCount}`,
267
+ unhealthyCount: sql`${healthCheckAggregates.unhealthyCount} + ${unhealthyCount}`,
268
+ latencySumMs: sql`COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}`,
269
+ avgLatencyMs: sql`CASE WHEN (${healthCheckAggregates.runCount} + ${runCount}) > 0 THEN (COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}) / (${healthCheckAggregates.runCount} + ${runCount}) ELSE ${healthCheckAggregates.avgLatencyMs} END`,
270
+ minLatencyMs:
271
+ minLatencyMs === undefined
272
+ ? sql`${healthCheckAggregates.minLatencyMs}`
273
+ : sql`LEAST(COALESCE(${healthCheckAggregates.minLatencyMs}, ${minLatencyMs}), ${minLatencyMs})`,
274
+ maxLatencyMs:
275
+ maxLatencyMs === undefined
276
+ ? sql`${healthCheckAggregates.maxLatencyMs}`
277
+ : sql`GREATEST(COALESCE(${healthCheckAggregates.maxLatencyMs}, ${maxLatencyMs}), ${maxLatencyMs})`,
278
+ p95LatencyMs:
279
+ p95LatencyMs === undefined
280
+ ? sql`${healthCheckAggregates.p95LatencyMs}`
281
+ : sql`GREATEST(COALESCE(${healthCheckAggregates.p95LatencyMs}, ${p95LatencyMs}), ${p95LatencyMs})`,
282
+ },
283
+ });
248
284
 
249
285
  // Delete processed hourly aggregates
250
286
  for (const hourly of bucket.aggregates) {