@checkstack/healthcheck-backend 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/service.ts CHANGED
@@ -10,6 +10,7 @@ import {
10
10
  healthCheckConfigurations,
11
11
  systemHealthChecks,
12
12
  healthCheckRuns,
13
+ healthCheckAggregates,
13
14
  VersionedStateThresholds,
14
15
  } from "./schema";
15
16
  import * as schema from "./schema";
@@ -20,7 +21,17 @@ import { stateThresholds } from "./state-thresholds-migrations";
20
21
  import type {
21
22
  HealthCheckRegistry,
22
23
  SafeDatabase,
24
+ CollectorRegistry,
23
25
  } from "@checkstack/backend-api";
26
+ import {
27
+ aggregateCollectorData,
28
+ extractLatencies,
29
+ mergeTieredBuckets,
30
+ reaggregateBuckets,
31
+ countStatuses,
32
+ calculateLatencyStats,
33
+ type NormalizedBucket,
34
+ } from "./aggregation-utils";
24
35
 
25
36
  // Drizzle type helper - uses SafeDatabase to prevent relational query API usage
26
37
  type Db = SafeDatabase<typeof schema>;
@@ -43,6 +54,7 @@ export class HealthCheckService {
43
54
  constructor(
44
55
  private db: Db,
45
56
  private registry?: HealthCheckRegistry,
57
+ private collectorRegistry?: CollectorRegistry,
46
58
  ) {}
47
59
 
48
60
  async createConfiguration(
@@ -93,6 +105,20 @@ export class HealthCheckService {
93
105
  .where(eq(healthCheckConfigurations.id, id));
94
106
  }
95
107
 
108
+ async pauseConfiguration(id: string): Promise<void> {
109
+ await this.db
110
+ .update(healthCheckConfigurations)
111
+ .set({ paused: true, updatedAt: new Date() })
112
+ .where(eq(healthCheckConfigurations.id, id));
113
+ }
114
+
115
+ async resumeConfiguration(id: string): Promise<void> {
116
+ await this.db
117
+ .update(healthCheckConfigurations)
118
+ .set({ paused: false, updatedAt: new Date() })
119
+ .where(eq(healthCheckConfigurations.id, id));
120
+ }
121
+
96
122
  async getConfigurations(): Promise<HealthCheckConfiguration[]> {
97
123
  const configs = await this.db.select().from(healthCheckConfigurations);
98
124
  return configs.map((c) => this.mapConfig(c));
@@ -551,9 +577,9 @@ export class HealthCheckService {
551
577
  }
552
578
 
553
579
  /**
554
- * Get aggregated health check history with bucketed metrics.
555
- * Currently aggregates raw data on-the-fly. Will merge with stored aggregates
556
- * once the retention job populates historical data.
580
+ * Get aggregated health check history with dynamically-sized buckets.
581
+ * Queries all three tiers (raw, hourly, daily) and merges with priority.
582
+ * Bucket interval is calculated as (endDate - startDate) / targetPoints.
557
583
  */
558
584
  async getAggregatedHistory(
559
585
  props: {
@@ -561,23 +587,25 @@ export class HealthCheckService {
561
587
  configurationId: string;
562
588
  startDate: Date;
563
589
  endDate: Date;
564
- bucketSize: "hourly" | "daily" | "auto";
590
+ targetPoints?: number;
565
591
  },
566
592
  options: { includeAggregatedResult: boolean },
567
593
  ) {
568
- const { systemId, configurationId, startDate, endDate } = props;
569
- let bucketSize = props.bucketSize;
570
-
571
- // Auto-select bucket size based on range
572
- if (bucketSize === "auto") {
573
- const diffDays =
574
- (endDate.getTime() - startDate.getTime()) / (1000 * 60 * 60 * 24);
575
- bucketSize = diffDays > 7 ? "daily" : "hourly";
576
- }
594
+ const {
595
+ systemId,
596
+ configurationId,
597
+ startDate,
598
+ endDate,
599
+ targetPoints = 500,
600
+ } = props;
601
+
602
+ // Calculate dynamic bucket interval
603
+ const rangeMs = endDate.getTime() - startDate.getTime();
604
+ const MIN_INTERVAL_MS = 1000; // 1 second minimum
605
+ const bucketIntervalMs = Math.max(rangeMs / targetPoints, MIN_INTERVAL_MS);
606
+ const bucketIntervalSeconds = Math.round(bucketIntervalMs / 1000);
577
607
 
578
608
  // Get the configuration to find the strategy
579
- // Note: Using standard select instead of relational query API
580
- // as the relational API is blocked by the scoped database proxy
581
609
  const [config] = await this.db
582
610
  .select()
583
611
  .from(healthCheckConfigurations)
@@ -590,21 +618,175 @@ export class HealthCheckService {
590
618
  ? this.registry.getStrategy(config.strategyId)
591
619
  : undefined;
592
620
 
593
- // Query raw runs within the date range (including result for metadata)
594
- const runs = await this.db
595
- .select()
596
- .from(healthCheckRuns)
597
- .where(
598
- and(
599
- eq(healthCheckRuns.systemId, systemId),
600
- eq(healthCheckRuns.configurationId, configurationId),
601
- gte(healthCheckRuns.timestamp, startDate),
602
- lte(healthCheckRuns.timestamp, endDate),
603
- ),
604
- )
605
- .orderBy(healthCheckRuns.timestamp);
621
+ // Query all three tiers in parallel
622
+ const [rawRuns, hourlyAggregates, dailyAggregates] = await Promise.all([
623
+ // Raw runs
624
+ this.db
625
+ .select()
626
+ .from(healthCheckRuns)
627
+ .where(
628
+ and(
629
+ eq(healthCheckRuns.systemId, systemId),
630
+ eq(healthCheckRuns.configurationId, configurationId),
631
+ gte(healthCheckRuns.timestamp, startDate),
632
+ lte(healthCheckRuns.timestamp, endDate),
633
+ ),
634
+ )
635
+ .orderBy(healthCheckRuns.timestamp),
636
+ // Hourly aggregates
637
+ this.db
638
+ .select()
639
+ .from(healthCheckAggregates)
640
+ .where(
641
+ and(
642
+ eq(healthCheckAggregates.systemId, systemId),
643
+ eq(healthCheckAggregates.configurationId, configurationId),
644
+ eq(healthCheckAggregates.bucketSize, "hourly"),
645
+ gte(healthCheckAggregates.bucketStart, startDate),
646
+ lte(healthCheckAggregates.bucketStart, endDate),
647
+ ),
648
+ )
649
+ .orderBy(healthCheckAggregates.bucketStart),
650
+ // Daily aggregates
651
+ this.db
652
+ .select()
653
+ .from(healthCheckAggregates)
654
+ .where(
655
+ and(
656
+ eq(healthCheckAggregates.systemId, systemId),
657
+ eq(healthCheckAggregates.configurationId, configurationId),
658
+ eq(healthCheckAggregates.bucketSize, "daily"),
659
+ gte(healthCheckAggregates.bucketStart, startDate),
660
+ lte(healthCheckAggregates.bucketStart, endDate),
661
+ ),
662
+ )
663
+ .orderBy(healthCheckAggregates.bucketStart),
664
+ ]);
665
+
666
+ // Normalize raw runs to buckets using target interval for proper aggregation
667
+ // This ensures aggregatedResult is computed per target bucket, not per sub-bucket
668
+ const rawBuckets = this.normalizeRawRunsToBuckets({
669
+ runs: rawRuns,
670
+ bucketIntervalMs: bucketIntervalMs,
671
+ rangeStart: startDate,
672
+ strategy,
673
+ });
674
+
675
+ // Normalize hourly and daily aggregates to NormalizedBucket format
676
+ const HOURLY_MS = 60 * 60 * 1000;
677
+ const DAILY_MS = 24 * 60 * 60 * 1000;
678
+
679
+ const hourlyBuckets: NormalizedBucket[] = hourlyAggregates.map((agg) => ({
680
+ bucketStart: agg.bucketStart,
681
+ bucketEndMs: agg.bucketStart.getTime() + HOURLY_MS,
682
+ runCount: agg.runCount,
683
+ healthyCount: agg.healthyCount,
684
+ degradedCount: agg.degradedCount,
685
+ unhealthyCount: agg.unhealthyCount,
686
+ latencySumMs: agg.latencySumMs ?? undefined,
687
+ minLatencyMs: agg.minLatencyMs ?? undefined,
688
+ maxLatencyMs: agg.maxLatencyMs ?? undefined,
689
+ p95LatencyMs: agg.p95LatencyMs ?? undefined,
690
+ aggregatedResult: agg.aggregatedResult ?? undefined,
691
+ sourceTier: "hourly" as const,
692
+ }));
693
+
694
+ const dailyBuckets: NormalizedBucket[] = dailyAggregates.map((agg) => ({
695
+ bucketStart: agg.bucketStart,
696
+ bucketEndMs: agg.bucketStart.getTime() + DAILY_MS,
697
+ runCount: agg.runCount,
698
+ healthyCount: agg.healthyCount,
699
+ degradedCount: agg.degradedCount,
700
+ unhealthyCount: agg.unhealthyCount,
701
+ latencySumMs: agg.latencySumMs ?? undefined,
702
+ minLatencyMs: agg.minLatencyMs ?? undefined,
703
+ maxLatencyMs: agg.maxLatencyMs ?? undefined,
704
+ p95LatencyMs: agg.p95LatencyMs ?? undefined,
705
+ aggregatedResult: agg.aggregatedResult ?? undefined,
706
+ sourceTier: "daily" as const,
707
+ }));
708
+
709
+ // Merge all tiers with priority (raw > hourly > daily)
710
+ const mergedBuckets = mergeTieredBuckets({
711
+ rawBuckets,
712
+ hourlyBuckets,
713
+ dailyBuckets,
714
+ });
715
+
716
+ // Re-aggregate to target bucket interval
717
+ const targetBuckets = reaggregateBuckets({
718
+ sourceBuckets: mergedBuckets,
719
+ targetIntervalMs: bucketIntervalMs,
720
+ rangeStart: startDate,
721
+ });
722
+
723
+ // Convert to output format
724
+ const buckets = targetBuckets.map((bucket) => {
725
+ const successRate =
726
+ bucket.runCount > 0 ? bucket.healthyCount / bucket.runCount : 0;
727
+ const avgLatencyMs =
728
+ bucket.latencySumMs !== undefined && bucket.runCount > 0
729
+ ? Math.round(bucket.latencySumMs / bucket.runCount)
730
+ : undefined;
606
731
 
607
- // Group runs into buckets (with full result for metadata aggregation)
732
+ const baseBucket = {
733
+ bucketStart: bucket.bucketStart,
734
+ bucketIntervalSeconds,
735
+ runCount: bucket.runCount,
736
+ healthyCount: bucket.healthyCount,
737
+ degradedCount: bucket.degradedCount,
738
+ unhealthyCount: bucket.unhealthyCount,
739
+ successRate,
740
+ avgLatencyMs,
741
+ minLatencyMs: bucket.minLatencyMs,
742
+ maxLatencyMs: bucket.maxLatencyMs,
743
+ p95LatencyMs: bucket.p95LatencyMs,
744
+ };
745
+
746
+ // Include aggregatedResult if available (only from raw data)
747
+ if (options.includeAggregatedResult && bucket.aggregatedResult) {
748
+ return {
749
+ ...baseBucket,
750
+ aggregatedResult: bucket.aggregatedResult,
751
+ };
752
+ }
753
+
754
+ return baseBucket;
755
+ });
756
+
757
+ return { buckets, bucketIntervalSeconds };
758
+ }
759
+
760
+ /**
761
+ * Normalize raw runs into buckets for merging with aggregate tiers.
762
+ */
763
+ private normalizeRawRunsToBuckets(params: {
764
+ runs: Array<{
765
+ id: string;
766
+ status: "healthy" | "unhealthy" | "degraded";
767
+ timestamp: Date;
768
+ latencyMs: number | null;
769
+ result: Record<string, unknown> | null;
770
+ }>;
771
+ bucketIntervalMs: number;
772
+ rangeStart: Date;
773
+ strategy?: {
774
+ aggregateResult: (
775
+ runs: Array<{
776
+ status: "healthy" | "unhealthy" | "degraded";
777
+ latencyMs?: number;
778
+ metadata?: unknown;
779
+ }>,
780
+ ) => unknown;
781
+ };
782
+ }): NormalizedBucket[] {
783
+ const { runs, bucketIntervalMs, rangeStart, strategy } = params;
784
+
785
+ if (runs.length === 0) {
786
+ return [];
787
+ }
788
+
789
+ // Group runs by bucket
608
790
  const bucketMap = new Map<
609
791
  string,
610
792
  {
@@ -618,17 +800,21 @@ export class HealthCheckService {
618
800
  >();
619
801
 
620
802
  for (const run of runs) {
621
- const bucketStart = this.getBucketStart(run.timestamp, bucketSize);
803
+ const bucketStart = this.getBucketStartDynamic(
804
+ run.timestamp,
805
+ rangeStart,
806
+ bucketIntervalMs,
807
+ );
622
808
  const key = bucketStart.toISOString();
623
809
 
624
810
  if (!bucketMap.has(key)) {
625
811
  bucketMap.set(key, { bucketStart, runs: [] });
626
812
  }
627
- // run.result is StoredHealthCheckResult: { status, latencyMs, message, metadata }
628
- // Strategy's aggregateResult expects metadata to be the strategy-specific fields
813
+
629
814
  const storedResult = run.result as {
630
815
  metadata?: Record<string, unknown>;
631
816
  } | null;
817
+
632
818
  bucketMap.get(key)!.runs.push({
633
819
  status: run.status,
634
820
  latencyMs: run.latencyMs ?? undefined,
@@ -636,85 +822,70 @@ export class HealthCheckService {
636
822
  });
637
823
  }
638
824
 
639
- // Calculate metrics for each bucket
640
- const buckets = [...bucketMap.values()].map((bucket) => {
641
- const runCount = bucket.runs.length;
642
- const healthyCount = bucket.runs.filter(
643
- (r) => r.status === "healthy",
644
- ).length;
645
- const degradedCount = bucket.runs.filter(
646
- (r) => r.status === "degraded",
647
- ).length;
648
- const unhealthyCount = bucket.runs.filter(
649
- (r) => r.status === "unhealthy",
650
- ).length;
651
- const successRate = runCount > 0 ? healthyCount / runCount : 0;
652
-
653
- const latencies = bucket.runs
654
- .map((r) => r.latencyMs)
655
- .filter((l): l is number => typeof l === "number");
656
- const avgLatencyMs =
657
- latencies.length > 0
658
- ? Math.round(latencies.reduce((a, b) => a + b, 0) / latencies.length)
659
- : undefined;
660
- const minLatencyMs =
661
- latencies.length > 0 ? Math.min(...latencies) : undefined;
662
- const maxLatencyMs =
663
- latencies.length > 0 ? Math.max(...latencies) : undefined;
664
- const p95LatencyMs =
665
- latencies.length > 0
666
- ? this.calculatePercentile(latencies, 95)
667
- : undefined;
825
+ // Convert to NormalizedBucket format
826
+ const result: NormalizedBucket[] = [];
668
827
 
669
- // Build base bucket (always included)
670
- const baseBucket = {
828
+ for (const [, bucket] of bucketMap) {
829
+ const { healthyCount, degradedCount, unhealthyCount } = countStatuses(
830
+ bucket.runs,
831
+ );
832
+ const latencies = extractLatencies(bucket.runs);
833
+ const latencyStats = calculateLatencyStats(latencies);
834
+
835
+ // Compute aggregatedResult if strategy is available
836
+ let aggregatedResult: Record<string, unknown> | undefined;
837
+ if (strategy) {
838
+ const strategyResult = strategy.aggregateResult(bucket.runs) as Record<
839
+ string,
840
+ unknown
841
+ >;
842
+
843
+ // Aggregate collector data if collector registry is available
844
+ let collectorsAggregated: Record<string, unknown> | undefined;
845
+ if (this.collectorRegistry) {
846
+ collectorsAggregated = aggregateCollectorData(
847
+ bucket.runs,
848
+ this.collectorRegistry,
849
+ );
850
+ }
851
+
852
+ aggregatedResult = {
853
+ ...strategyResult,
854
+ ...(collectorsAggregated ? { collectors: collectorsAggregated } : {}),
855
+ };
856
+ }
857
+
858
+ result.push({
671
859
  bucketStart: bucket.bucketStart,
672
- bucketSize: bucketSize as "hourly" | "daily",
673
- runCount,
860
+ bucketEndMs: bucket.bucketStart.getTime() + bucketIntervalMs,
861
+ runCount: bucket.runs.length,
674
862
  healthyCount,
675
863
  degradedCount,
676
864
  unhealthyCount,
677
- successRate,
678
- avgLatencyMs,
679
- minLatencyMs,
680
- maxLatencyMs,
681
- p95LatencyMs,
682
- };
683
-
684
- // Only include aggregatedResult if requested and strategy is available
685
- if (options.includeAggregatedResult && strategy) {
686
- return {
687
- ...baseBucket,
688
- aggregatedResult: strategy.aggregateResult(bucket.runs) as Record<
689
- string,
690
- unknown
691
- >,
692
- };
693
- }
694
-
695
- return baseBucket;
696
- });
865
+ latencySumMs: latencyStats.latencySumMs,
866
+ minLatencyMs: latencyStats.minLatencyMs,
867
+ maxLatencyMs: latencyStats.maxLatencyMs,
868
+ p95LatencyMs: latencyStats.p95LatencyMs,
869
+ aggregatedResult,
870
+ sourceTier: "raw",
871
+ });
872
+ }
697
873
 
698
- return { buckets };
874
+ return result;
699
875
  }
700
876
 
701
- private getBucketStart(
877
+ /**
878
+ * Calculate bucket start time for dynamic interval sizing.
879
+ * Aligns buckets to the query start time.
880
+ */
881
+ private getBucketStartDynamic(
702
882
  timestamp: Date,
703
- bucketSize: "hourly" | "daily",
883
+ rangeStart: Date,
884
+ intervalMs: number,
704
885
  ): Date {
705
- const date = new Date(timestamp);
706
- if (bucketSize === "daily") {
707
- date.setHours(0, 0, 0, 0);
708
- } else {
709
- date.setMinutes(0, 0, 0);
710
- }
711
- return date;
712
- }
713
-
714
- private calculatePercentile(values: number[], percentile: number): number {
715
- const sorted = values.toSorted((a, b) => a - b);
716
- const index = Math.ceil((percentile / 100) * sorted.length) - 1;
717
- return sorted[Math.max(0, index)];
886
+ const offsetMs = timestamp.getTime() - rangeStart.getTime();
887
+ const bucketIndex = Math.floor(offsetMs / intervalMs);
888
+ return new Date(rangeStart.getTime() + bucketIndex * intervalMs);
718
889
  }
719
890
 
720
891
  private mapConfig(
@@ -727,6 +898,7 @@ export class HealthCheckService {
727
898
  config: row.config,
728
899
  collectors: row.collectors ?? undefined,
729
900
  intervalSeconds: row.intervalSeconds,
901
+ paused: row.paused,
730
902
  createdAt: row.createdAt,
731
903
  updatedAt: row.updatedAt,
732
904
  };