@checkstack/healthcheck-backend 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/service.ts CHANGED
@@ -5,6 +5,7 @@ import {
5
5
  StateThresholds,
6
6
  HealthCheckStatus,
7
7
  RetentionConfig,
8
+ type HealthCheckRunResult,
8
9
  } from "@checkstack/healthcheck-common";
9
10
  import {
10
11
  healthCheckConfigurations,
@@ -12,13 +13,13 @@ import {
12
13
  healthCheckRuns,
13
14
  healthCheckAggregates,
14
15
  VersionedStateThresholds,
15
- DEFAULT_RETENTION_CONFIG,
16
16
  } from "./schema";
17
17
  import * as schema from "./schema";
18
- import { eq, and, InferSelectModel, desc, gte, lte, lt } from "drizzle-orm";
18
+ import { eq, and, InferSelectModel, desc, gte, lte, isNull } from "drizzle-orm";
19
19
  import { ORPCError } from "@orpc/server";
20
20
  import { evaluateHealthStatus } from "./state-evaluator";
21
21
  import { stateThresholds } from "./state-thresholds-migrations";
22
+ import { incrementHourlyAggregate } from "./realtime-aggregation";
22
23
  import type {
23
24
  HealthCheckRegistry,
24
25
  SafeDatabase,
@@ -130,12 +131,16 @@ export class HealthCheckService {
130
131
  configurationId: string;
131
132
  enabled?: boolean;
132
133
  stateThresholds?: StateThresholds;
134
+ satelliteIds?: string[];
135
+ includeLocal?: boolean;
133
136
  }) {
134
137
  const {
135
138
  systemId,
136
139
  configurationId,
137
140
  enabled = true,
138
141
  stateThresholds: stateThresholds_,
142
+ satelliteIds,
143
+ includeLocal = true,
139
144
  } = props;
140
145
 
141
146
  // Wrap thresholds in versioned config if provided
@@ -149,6 +154,8 @@ export class HealthCheckService {
149
154
  configurationId,
150
155
  enabled,
151
156
  stateThresholds: versionedThresholds,
157
+ satelliteIds: satelliteIds ?? undefined,
158
+ includeLocal,
152
159
  })
153
160
  .onConflictDoUpdate({
154
161
  target: [
@@ -158,6 +165,8 @@ export class HealthCheckService {
158
165
  set: {
159
166
  enabled,
160
167
  stateThresholds: versionedThresholds,
168
+ satelliteIds: satelliteIds ?? undefined,
169
+ includeLocal,
161
170
  updatedAt: new Date(),
162
171
  },
163
172
  });
@@ -271,6 +280,8 @@ export class HealthCheckService {
271
280
  configName: healthCheckConfigurations.name,
272
281
  enabled: systemHealthChecks.enabled,
273
282
  stateThresholds: systemHealthChecks.stateThresholds,
283
+ satelliteIds: systemHealthChecks.satelliteIds,
284
+ includeLocal: systemHealthChecks.includeLocal,
274
285
  })
275
286
  .from(systemHealthChecks)
276
287
  .innerJoin(
@@ -291,6 +302,8 @@ export class HealthCheckService {
291
302
  configurationName: row.configName,
292
303
  enabled: row.enabled,
293
304
  stateThresholds: thresholds,
305
+ satelliteIds: row.satelliteIds ?? undefined,
306
+ includeLocal: row.includeLocal,
294
307
  });
295
308
  }
296
309
  return results;
@@ -475,6 +488,7 @@ export class HealthCheckService {
475
488
  configurationId?: string;
476
489
  startDate?: Date;
477
490
  endDate?: Date;
491
+ sourceFilter?: string;
478
492
  limit?: number;
479
493
  offset?: number;
480
494
  sortOrder: "asc" | "desc";
@@ -484,6 +498,7 @@ export class HealthCheckService {
484
498
  configurationId,
485
499
  startDate,
486
500
  endDate,
501
+ sourceFilter,
487
502
  limit = 10,
488
503
  offset = 0,
489
504
  sortOrder,
@@ -496,6 +511,13 @@ export class HealthCheckService {
496
511
  if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
497
512
  if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
498
513
 
514
+ // Source filtering: "local" = no sourceId, UUID = specific satellite
515
+ if (sourceFilter === "local") {
516
+ conditions.push(isNull(healthCheckRuns.sourceId));
517
+ } else if (sourceFilter) {
518
+ conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
519
+ }
520
+
499
521
  // Build where clause
500
522
  const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
501
523
 
@@ -523,6 +545,8 @@ export class HealthCheckService {
523
545
  status: run.status,
524
546
  timestamp: run.timestamp,
525
547
  latencyMs: run.latencyMs ?? undefined,
548
+ sourceId: run.sourceId ?? undefined,
549
+ sourceLabel: run.sourceLabel ?? undefined,
526
550
  })),
527
551
  total,
528
552
  };
@@ -538,6 +562,7 @@ export class HealthCheckService {
538
562
  configurationId?: string;
539
563
  startDate?: Date;
540
564
  endDate?: Date;
565
+ sourceFilter?: string;
541
566
  limit?: number;
542
567
  offset?: number;
543
568
  sortOrder: "asc" | "desc";
@@ -547,6 +572,7 @@ export class HealthCheckService {
547
572
  configurationId,
548
573
  startDate,
549
574
  endDate,
575
+ sourceFilter,
550
576
  limit = 10,
551
577
  offset = 0,
552
578
  sortOrder,
@@ -559,6 +585,13 @@ export class HealthCheckService {
559
585
  if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
560
586
  if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
561
587
 
588
+ // Source filtering: "local" = no sourceId, UUID = specific satellite
589
+ if (sourceFilter === "local") {
590
+ conditions.push(isNull(healthCheckRuns.sourceId));
591
+ } else if (sourceFilter) {
592
+ conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
593
+ }
594
+
562
595
  const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
563
596
  const total = await this.db.$count(healthCheckRuns, whereClause);
564
597
 
@@ -583,6 +616,8 @@ export class HealthCheckService {
583
616
  result: run.result ?? {},
584
617
  timestamp: run.timestamp,
585
618
  latencyMs: run.latencyMs ?? undefined,
619
+ sourceId: run.sourceId ?? undefined,
620
+ sourceLabel: run.sourceLabel ?? undefined,
586
621
  })),
587
622
  total,
588
623
  };
@@ -611,6 +646,8 @@ export class HealthCheckService {
611
646
  result: r.result ?? {},
612
647
  timestamp: r.timestamp,
613
648
  latencyMs: r.latencyMs ?? undefined,
649
+ sourceId: r.sourceId ?? undefined,
650
+ sourceLabel: r.sourceLabel ?? undefined,
614
651
  };
615
652
  }
616
653
 
@@ -625,6 +662,7 @@ export class HealthCheckService {
625
662
  configurationId: string;
626
663
  startDate: Date;
627
664
  endDate: Date;
665
+ sourceFilter?: string;
628
666
  targetPoints?: number;
629
667
  },
630
668
  options: { includeAggregatedResult: boolean },
@@ -634,6 +672,7 @@ export class HealthCheckService {
634
672
  configurationId,
635
673
  startDate,
636
674
  endDate,
675
+ sourceFilter,
637
676
  targetPoints = 500,
638
677
  } = props;
639
678
 
@@ -656,48 +695,66 @@ export class HealthCheckService {
656
695
  ? this.registry.getStrategy(config.strategyId)
657
696
  : undefined;
658
697
 
698
+ // Build source condition for raw runs
699
+ const rawConditions = [
700
+ eq(healthCheckRuns.systemId, systemId),
701
+ eq(healthCheckRuns.configurationId, configurationId),
702
+ gte(healthCheckRuns.timestamp, startDate),
703
+ lte(healthCheckRuns.timestamp, endDate),
704
+ ...(sourceFilter === "local"
705
+ ? [isNull(healthCheckRuns.sourceId)]
706
+ : sourceFilter
707
+ ? [eq(healthCheckRuns.sourceId, sourceFilter)]
708
+ : []),
709
+ ];
710
+
711
+ // Build source condition for hourly aggregates
712
+ const hourlyConditions = [
713
+ eq(healthCheckAggregates.systemId, systemId),
714
+ eq(healthCheckAggregates.configurationId, configurationId),
715
+ eq(healthCheckAggregates.bucketSize, "hourly"),
716
+ gte(healthCheckAggregates.bucketStart, startDate),
717
+ lte(healthCheckAggregates.bucketStart, endDate),
718
+ ...(sourceFilter === "local"
719
+ ? [isNull(healthCheckAggregates.sourceId)]
720
+ : sourceFilter
721
+ ? [eq(healthCheckAggregates.sourceId, sourceFilter)]
722
+ : []),
723
+ ];
724
+
725
+ // Build source condition for daily aggregates
726
+ const dailyConditions = [
727
+ eq(healthCheckAggregates.systemId, systemId),
728
+ eq(healthCheckAggregates.configurationId, configurationId),
729
+ eq(healthCheckAggregates.bucketSize, "daily"),
730
+ gte(healthCheckAggregates.bucketStart, startDate),
731
+ lte(healthCheckAggregates.bucketStart, endDate),
732
+ ...(sourceFilter === "local"
733
+ ? [isNull(healthCheckAggregates.sourceId)]
734
+ : sourceFilter
735
+ ? [eq(healthCheckAggregates.sourceId, sourceFilter)]
736
+ : []),
737
+ ];
738
+
659
739
  // Query all three tiers in parallel
660
740
  const [rawRuns, hourlyAggregates, dailyAggregates] = await Promise.all([
661
741
  // Raw runs
662
742
  this.db
663
743
  .select()
664
744
  .from(healthCheckRuns)
665
- .where(
666
- and(
667
- eq(healthCheckRuns.systemId, systemId),
668
- eq(healthCheckRuns.configurationId, configurationId),
669
- gte(healthCheckRuns.timestamp, startDate),
670
- lte(healthCheckRuns.timestamp, endDate),
671
- ),
672
- )
745
+ .where(and(...rawConditions))
673
746
  .orderBy(healthCheckRuns.timestamp),
674
747
  // Hourly aggregates
675
748
  this.db
676
749
  .select()
677
750
  .from(healthCheckAggregates)
678
- .where(
679
- and(
680
- eq(healthCheckAggregates.systemId, systemId),
681
- eq(healthCheckAggregates.configurationId, configurationId),
682
- eq(healthCheckAggregates.bucketSize, "hourly"),
683
- gte(healthCheckAggregates.bucketStart, startDate),
684
- lte(healthCheckAggregates.bucketStart, endDate),
685
- ),
686
- )
751
+ .where(and(...hourlyConditions))
687
752
  .orderBy(healthCheckAggregates.bucketStart),
688
753
  // Daily aggregates
689
754
  this.db
690
755
  .select()
691
756
  .from(healthCheckAggregates)
692
- .where(
693
- and(
694
- eq(healthCheckAggregates.systemId, systemId),
695
- eq(healthCheckAggregates.configurationId, configurationId),
696
- eq(healthCheckAggregates.bucketSize, "daily"),
697
- gte(healthCheckAggregates.bucketStart, startDate),
698
- lte(healthCheckAggregates.bucketStart, endDate),
699
- ),
700
- )
757
+ .where(and(...dailyConditions))
701
758
  .orderBy(healthCheckAggregates.bucketStart),
702
759
  ]);
703
760
 
@@ -928,122 +985,6 @@ export class HealthCheckService {
928
985
  * Calculate bucket start time for dynamic interval sizing.
929
986
  * Aligns buckets to the query start time.
930
987
  */
931
- /**
932
- * Get availability statistics for a health check over 31-day and 365-day periods.
933
- * Availability is calculated as (healthyCount / totalRunCount) * 100.
934
- *
935
- * With incremental real-time aggregation, hourly aggregates are always up-to-date
936
- * (updated immediately on every run), so we don't need to query raw runs.
937
- */
938
- async getAvailabilityStats(props: {
939
- systemId: string;
940
- configurationId: string;
941
- }): Promise<{
942
- availability31Days: number | null;
943
- availability365Days: number | null;
944
- totalRuns31Days: number;
945
- totalRuns365Days: number;
946
- }> {
947
- const { systemId, configurationId } = props;
948
- const now = new Date();
949
-
950
- // Get retention config to determine what data tiers are available
951
- const { retentionConfig } = await this.getRetentionConfig(
952
- systemId,
953
- configurationId,
954
- );
955
- const config = retentionConfig ?? DEFAULT_RETENTION_CONFIG;
956
-
957
- // Calculate cutoff dates
958
- const cutoff31Days = new Date(now.getTime() - 31 * 24 * 60 * 60 * 1000);
959
- const cutoff365Days = new Date(now.getTime() - 365 * 24 * 60 * 60 * 1000);
960
-
961
- // Cutoff for hourly aggregates based on retention config
962
- const hourlyCutoff = new Date(
963
- now.getTime() - config.hourlyRetentionDays * 24 * 60 * 60 * 1000,
964
- );
965
-
966
- // Query hourly aggregates for the period they cover (up to hourlyRetentionDays)
967
- // These are always up-to-date due to incremental real-time aggregation
968
- const hourlyAggregates = await this.db
969
- .select({
970
- bucketStart: healthCheckAggregates.bucketStart,
971
- runCount: healthCheckAggregates.runCount,
972
- healthyCount: healthCheckAggregates.healthyCount,
973
- })
974
- .from(healthCheckAggregates)
975
- .where(
976
- and(
977
- eq(healthCheckAggregates.systemId, systemId),
978
- eq(healthCheckAggregates.configurationId, configurationId),
979
- eq(healthCheckAggregates.bucketSize, "hourly"),
980
- gte(healthCheckAggregates.bucketStart, hourlyCutoff),
981
- ),
982
- );
983
-
984
- // Query daily aggregates for data beyond hourly retention
985
- const dailyAggregates = await this.db
986
- .select({
987
- bucketStart: healthCheckAggregates.bucketStart,
988
- runCount: healthCheckAggregates.runCount,
989
- healthyCount: healthCheckAggregates.healthyCount,
990
- })
991
- .from(healthCheckAggregates)
992
- .where(
993
- and(
994
- eq(healthCheckAggregates.systemId, systemId),
995
- eq(healthCheckAggregates.configurationId, configurationId),
996
- eq(healthCheckAggregates.bucketSize, "daily"),
997
- gte(healthCheckAggregates.bucketStart, cutoff365Days),
998
- lt(healthCheckAggregates.bucketStart, hourlyCutoff),
999
- ),
1000
- );
1001
-
1002
- // Aggregate counts
1003
- let totalRuns31Days = 0;
1004
- let healthyRuns31Days = 0;
1005
- let totalRuns365Days = 0;
1006
- let healthyRuns365Days = 0;
1007
-
1008
- // Process hourly aggregates (fresh data within hourlyRetentionDays)
1009
- for (const agg of hourlyAggregates) {
1010
- totalRuns365Days += agg.runCount;
1011
- healthyRuns365Days += agg.healthyCount;
1012
-
1013
- if (agg.bucketStart >= cutoff31Days) {
1014
- totalRuns31Days += agg.runCount;
1015
- healthyRuns31Days += agg.healthyCount;
1016
- }
1017
- }
1018
-
1019
- // Process daily aggregates (older data beyond hourly retention)
1020
- for (const agg of dailyAggregates) {
1021
- totalRuns365Days += agg.runCount;
1022
- healthyRuns365Days += agg.healthyCount;
1023
-
1024
- if (agg.bucketStart >= cutoff31Days) {
1025
- totalRuns31Days += agg.runCount;
1026
- healthyRuns31Days += agg.healthyCount;
1027
- }
1028
- }
1029
-
1030
- // Calculate availability percentages
1031
- const availability31Days =
1032
- // eslint-disable-next-line unicorn/no-null -- RPC contract uses nullable()
1033
- totalRuns31Days > 0 ? (healthyRuns31Days / totalRuns31Days) * 100 : null;
1034
- const availability365Days =
1035
- totalRuns365Days > 0
1036
- ? (healthyRuns365Days / totalRuns365Days) * 100
1037
- : // eslint-disable-next-line unicorn/no-null -- RPC contract uses nullable()
1038
- null;
1039
-
1040
- return {
1041
- availability31Days,
1042
- availability365Days,
1043
- totalRuns31Days,
1044
- totalRuns365Days,
1045
- };
1046
- }
1047
988
 
1048
989
  private getBucketStartDynamic(
1049
990
  timestamp: Date,
@@ -1070,4 +1011,134 @@ export class HealthCheckService {
1070
1011
  updatedAt: row.updatedAt,
1071
1012
  };
1072
1013
  }
1014
+
1015
+ /**
1016
+ * Remove a satellite ID from all systemHealthChecks.satelliteIds arrays.
1017
+ * Called when a satellite is deleted via the satellite.removed hook.
1018
+ */
1019
+ async scrubSatelliteFromAssociations(satelliteId: string): Promise<void> {
1020
+ // Get all associations that reference this satellite
1021
+ const associations = await this.db
1022
+ .select({
1023
+ systemId: systemHealthChecks.systemId,
1024
+ configurationId: systemHealthChecks.configurationId,
1025
+ satelliteIds: systemHealthChecks.satelliteIds,
1026
+ })
1027
+ .from(systemHealthChecks);
1028
+
1029
+ // Update each association that contains this satellite ID
1030
+ for (const assoc of associations) {
1031
+ if (!assoc.satelliteIds?.includes(satelliteId)) continue;
1032
+
1033
+ const updated = assoc.satelliteIds.filter((id) => id !== satelliteId);
1034
+ await this.db
1035
+ .update(systemHealthChecks)
1036
+ .set({
1037
+ satelliteIds: updated.length > 0 ? updated : undefined,
1038
+ updatedAt: new Date(),
1039
+ })
1040
+ .where(
1041
+ and(
1042
+ eq(systemHealthChecks.systemId, assoc.systemId),
1043
+ eq(systemHealthChecks.configurationId, assoc.configurationId),
1044
+ ),
1045
+ );
1046
+ }
1047
+ }
1048
+
1049
+ /**
1050
+ * Get all health check assignments for a specific satellite.
1051
+ * Returns the full configuration payload needed for the satellite to execute checks.
1052
+ */
1053
+ async getAssignmentsForSatellite(satelliteId: string) {
1054
+ // Get all associations that reference this satellite
1055
+ const associations = await this.db
1056
+ .select({
1057
+ systemId: systemHealthChecks.systemId,
1058
+ configurationId: systemHealthChecks.configurationId,
1059
+ satelliteIds: systemHealthChecks.satelliteIds,
1060
+ enabled: systemHealthChecks.enabled,
1061
+ })
1062
+ .from(systemHealthChecks);
1063
+
1064
+ // Filter to associations that include this satellite and are enabled
1065
+ const matchingAssociations = associations.filter(
1066
+ (a) => a.enabled && a.satelliteIds?.includes(satelliteId),
1067
+ );
1068
+
1069
+ if (matchingAssociations.length === 0) return [];
1070
+
1071
+ // Get configurations for each matching association
1072
+ const assignments = [];
1073
+ for (const assoc of matchingAssociations) {
1074
+ const [config] = await this.db
1075
+ .select()
1076
+ .from(healthCheckConfigurations)
1077
+ .where(eq(healthCheckConfigurations.id, assoc.configurationId));
1078
+
1079
+ if (!config || config.paused) continue;
1080
+
1081
+ assignments.push({
1082
+ configId: config.id,
1083
+ systemId: assoc.systemId,
1084
+ strategyId: config.strategyId,
1085
+ config: config.config,
1086
+ collectors: config.collectors ?? undefined,
1087
+ intervalSeconds: config.intervalSeconds,
1088
+ });
1089
+ }
1090
+
1091
+ return assignments;
1092
+ }
1093
+
1094
+ /**
1095
+ * Ingest a health check result from a satellite.
1096
+ * Stores the run with source attribution (sourceId + sourceLabel)
1097
+ * and triggers incremental aggregation to keep charts/availability current.
1098
+ */
1099
+ async ingestSatelliteResult(props: {
1100
+ configId: string;
1101
+ systemId: string;
1102
+ status: HealthCheckStatus;
1103
+ latencyMs?: number;
1104
+ result?: HealthCheckRunResult;
1105
+ executedAt: string;
1106
+ sourceId: string;
1107
+ sourceLabel: string;
1108
+ }) {
1109
+ const {
1110
+ configId,
1111
+ systemId,
1112
+ status,
1113
+ latencyMs,
1114
+ result,
1115
+ sourceId,
1116
+ sourceLabel,
1117
+ } = props;
1118
+
1119
+ const resultRecord = result ? { ...result } as Record<string, unknown> : {};
1120
+
1121
+ await this.db.insert(healthCheckRuns).values({
1122
+ configurationId: configId,
1123
+ systemId,
1124
+ status,
1125
+ latencyMs,
1126
+ result: resultRecord,
1127
+ sourceId,
1128
+ sourceLabel,
1129
+ });
1130
+
1131
+ // Trigger incremental hourly aggregation — same as local executor
1132
+ await incrementHourlyAggregate({
1133
+ db: this.db,
1134
+ systemId,
1135
+ configurationId: configId,
1136
+ status,
1137
+ latencyMs,
1138
+ runTimestamp: new Date(props.executedAt),
1139
+ result: resultRecord,
1140
+ collectorRegistry: this.collectorRegistry,
1141
+ sourceLabel,
1142
+ });
1143
+ }
1073
1144
  }