@checkstack/healthcheck-backend 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/drizzle/0009_late_argent.sql +1 -0
- package/drizzle/meta/0009_snapshot.json +426 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +10 -8
- package/src/aggregation-utils.ts +27 -24
- package/src/aggregation.test.ts +16 -11
- package/src/availability.test.ts +107 -96
- package/src/index.ts +0 -2
- package/src/queue-executor.test.ts +1 -1
- package/src/queue-executor.ts +37 -0
- package/src/realtime-aggregation.test.ts +466 -0
- package/src/realtime-aggregation.ts +289 -0
- package/src/retention-job.ts +13 -165
- package/src/schema.ts +2 -0
- package/src/service.ts +57 -48
package/src/service.ts
CHANGED
|
@@ -12,9 +12,10 @@ import {
|
|
|
12
12
|
healthCheckRuns,
|
|
13
13
|
healthCheckAggregates,
|
|
14
14
|
VersionedStateThresholds,
|
|
15
|
+
DEFAULT_RETENTION_CONFIG,
|
|
15
16
|
} from "./schema";
|
|
16
17
|
import * as schema from "./schema";
|
|
17
|
-
import { eq, and, InferSelectModel, desc, gte, lte } from "drizzle-orm";
|
|
18
|
+
import { eq, and, InferSelectModel, desc, gte, lte, lt } from "drizzle-orm";
|
|
18
19
|
import { ORPCError } from "@orpc/server";
|
|
19
20
|
import { evaluateHealthStatus } from "./state-evaluator";
|
|
20
21
|
import { stateThresholds } from "./state-thresholds-migrations";
|
|
@@ -649,7 +650,7 @@ export class HealthCheckService {
|
|
|
649
650
|
.where(eq(healthCheckConfigurations.id, configurationId))
|
|
650
651
|
.limit(1);
|
|
651
652
|
|
|
652
|
-
// Look up strategy for
|
|
653
|
+
// Look up strategy for mergeResult function (only if needed)
|
|
653
654
|
const strategy =
|
|
654
655
|
options.includeAggregatedResult && config && this.registry
|
|
655
656
|
? this.registry.getStrategy(config.strategyId)
|
|
@@ -810,12 +811,13 @@ export class HealthCheckService {
|
|
|
810
811
|
bucketIntervalMs: number;
|
|
811
812
|
rangeStart: Date;
|
|
812
813
|
strategy?: {
|
|
813
|
-
|
|
814
|
-
|
|
814
|
+
mergeResult: (
|
|
815
|
+
existing: Record<string, unknown> | undefined,
|
|
816
|
+
newRun: {
|
|
815
817
|
status: "healthy" | "unhealthy" | "degraded";
|
|
816
818
|
latencyMs?: number;
|
|
817
819
|
metadata?: unknown;
|
|
818
|
-
}
|
|
820
|
+
},
|
|
819
821
|
) => unknown;
|
|
820
822
|
};
|
|
821
823
|
}): NormalizedBucket[] {
|
|
@@ -871,13 +873,17 @@ export class HealthCheckService {
|
|
|
871
873
|
const latencies = extractLatencies(bucket.runs);
|
|
872
874
|
const latencyStats = calculateLatencyStats(latencies);
|
|
873
875
|
|
|
874
|
-
// Compute aggregatedResult if strategy is available
|
|
876
|
+
// Compute aggregatedResult if strategy is available (using incremental mergeResult)
|
|
875
877
|
let aggregatedResult: Record<string, unknown> | undefined;
|
|
876
878
|
if (strategy) {
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
879
|
+
// Incrementally merge each run's result
|
|
880
|
+
let strategyResult: Record<string, unknown> | undefined;
|
|
881
|
+
for (const run of bucket.runs) {
|
|
882
|
+
strategyResult = strategy.mergeResult(strategyResult, run) as Record<
|
|
883
|
+
string,
|
|
884
|
+
unknown
|
|
885
|
+
>;
|
|
886
|
+
}
|
|
881
887
|
|
|
882
888
|
// Aggregate collector data if collector registry is available
|
|
883
889
|
let collectorsAggregated: Record<string, unknown> | undefined;
|
|
@@ -920,6 +926,9 @@ export class HealthCheckService {
|
|
|
920
926
|
/**
|
|
921
927
|
* Get availability statistics for a health check over 31-day and 365-day periods.
|
|
922
928
|
* Availability is calculated as (healthyCount / totalRunCount) * 100.
|
|
929
|
+
*
|
|
930
|
+
* With incremental real-time aggregation, hourly aggregates are always up-to-date
|
|
931
|
+
* (updated immediately on every run), so we don't need to query raw runs.
|
|
923
932
|
*/
|
|
924
933
|
async getAvailabilityStats(props: {
|
|
925
934
|
systemId: string;
|
|
@@ -933,12 +942,25 @@ export class HealthCheckService {
|
|
|
933
942
|
const { systemId, configurationId } = props;
|
|
934
943
|
const now = new Date();
|
|
935
944
|
|
|
945
|
+
// Get retention config to determine what data tiers are available
|
|
946
|
+
const { retentionConfig } = await this.getRetentionConfig(
|
|
947
|
+
systemId,
|
|
948
|
+
configurationId,
|
|
949
|
+
);
|
|
950
|
+
const config = retentionConfig ?? DEFAULT_RETENTION_CONFIG;
|
|
951
|
+
|
|
936
952
|
// Calculate cutoff dates
|
|
937
953
|
const cutoff31Days = new Date(now.getTime() - 31 * 24 * 60 * 60 * 1000);
|
|
938
954
|
const cutoff365Days = new Date(now.getTime() - 365 * 24 * 60 * 60 * 1000);
|
|
939
955
|
|
|
940
|
-
//
|
|
941
|
-
const
|
|
956
|
+
// Cutoff for hourly aggregates based on retention config
|
|
957
|
+
const hourlyCutoff = new Date(
|
|
958
|
+
now.getTime() - config.hourlyRetentionDays * 24 * 60 * 60 * 1000,
|
|
959
|
+
);
|
|
960
|
+
|
|
961
|
+
// Query hourly aggregates for the period they cover (up to hourlyRetentionDays)
|
|
962
|
+
// These are always up-to-date due to incremental real-time aggregation
|
|
963
|
+
const hourlyAggregates = await this.db
|
|
942
964
|
.select({
|
|
943
965
|
bucketStart: healthCheckAggregates.bucketStart,
|
|
944
966
|
runCount: healthCheckAggregates.runCount,
|
|
@@ -949,34 +971,37 @@ export class HealthCheckService {
|
|
|
949
971
|
and(
|
|
950
972
|
eq(healthCheckAggregates.systemId, systemId),
|
|
951
973
|
eq(healthCheckAggregates.configurationId, configurationId),
|
|
952
|
-
eq(healthCheckAggregates.bucketSize, "
|
|
953
|
-
gte(healthCheckAggregates.bucketStart,
|
|
974
|
+
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
975
|
+
gte(healthCheckAggregates.bucketStart, hourlyCutoff),
|
|
954
976
|
),
|
|
955
977
|
);
|
|
956
978
|
|
|
957
|
-
//
|
|
958
|
-
const
|
|
979
|
+
// Query daily aggregates for data beyond hourly retention
|
|
980
|
+
const dailyAggregates = await this.db
|
|
959
981
|
.select({
|
|
960
|
-
|
|
961
|
-
|
|
982
|
+
bucketStart: healthCheckAggregates.bucketStart,
|
|
983
|
+
runCount: healthCheckAggregates.runCount,
|
|
984
|
+
healthyCount: healthCheckAggregates.healthyCount,
|
|
962
985
|
})
|
|
963
|
-
.from(
|
|
986
|
+
.from(healthCheckAggregates)
|
|
964
987
|
.where(
|
|
965
988
|
and(
|
|
966
|
-
eq(
|
|
967
|
-
eq(
|
|
968
|
-
|
|
989
|
+
eq(healthCheckAggregates.systemId, systemId),
|
|
990
|
+
eq(healthCheckAggregates.configurationId, configurationId),
|
|
991
|
+
eq(healthCheckAggregates.bucketSize, "daily"),
|
|
992
|
+
gte(healthCheckAggregates.bucketStart, cutoff365Days),
|
|
993
|
+
lt(healthCheckAggregates.bucketStart, hourlyCutoff),
|
|
969
994
|
),
|
|
970
995
|
);
|
|
971
996
|
|
|
972
|
-
//
|
|
997
|
+
// Aggregate counts
|
|
973
998
|
let totalRuns31Days = 0;
|
|
974
999
|
let healthyRuns31Days = 0;
|
|
975
1000
|
let totalRuns365Days = 0;
|
|
976
1001
|
let healthyRuns365Days = 0;
|
|
977
1002
|
|
|
978
|
-
// Process
|
|
979
|
-
for (const agg of
|
|
1003
|
+
// Process hourly aggregates (fresh data within hourlyRetentionDays)
|
|
1004
|
+
for (const agg of hourlyAggregates) {
|
|
980
1005
|
totalRuns365Days += agg.runCount;
|
|
981
1006
|
healthyRuns365Days += agg.healthyCount;
|
|
982
1007
|
|
|
@@ -986,30 +1011,14 @@ export class HealthCheckService {
|
|
|
986
1011
|
}
|
|
987
1012
|
}
|
|
988
1013
|
|
|
989
|
-
// Process
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
);
|
|
994
|
-
|
|
995
|
-
for (const run of recentRuns) {
|
|
996
|
-
// Calculate which daily bucket this run would belong to
|
|
997
|
-
const runBucketStart = new Date(run.timestamp);
|
|
998
|
-
runBucketStart.setUTCHours(0, 0, 0, 0);
|
|
999
|
-
|
|
1000
|
-
// Only count if this bucket isn't already in aggregates
|
|
1001
|
-
if (!aggregateBucketStarts.has(runBucketStart.getTime())) {
|
|
1002
|
-
totalRuns365Days += 1;
|
|
1003
|
-
if (run.status === "healthy") {
|
|
1004
|
-
healthyRuns365Days += 1;
|
|
1005
|
-
}
|
|
1014
|
+
// Process daily aggregates (older data beyond hourly retention)
|
|
1015
|
+
for (const agg of dailyAggregates) {
|
|
1016
|
+
totalRuns365Days += agg.runCount;
|
|
1017
|
+
healthyRuns365Days += agg.healthyCount;
|
|
1006
1018
|
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
healthyRuns31Days += 1;
|
|
1011
|
-
}
|
|
1012
|
-
}
|
|
1019
|
+
if (agg.bucketStart >= cutoff31Days) {
|
|
1020
|
+
totalRuns31Days += agg.runCount;
|
|
1021
|
+
healthyRuns31Days += agg.healthyCount;
|
|
1013
1022
|
}
|
|
1014
1023
|
}
|
|
1015
1024
|
|