@checkstack/healthcheck-backend 0.4.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +116 -0
- package/drizzle/0007_tense_misty_knight.sql +1 -0
- package/drizzle/0008_broad_black_tom.sql +1 -0
- package/drizzle/meta/0007_snapshot.json +413 -0
- package/drizzle/meta/0008_snapshot.json +420 -0
- package/drizzle/meta/_journal.json +14 -0
- package/package.json +2 -1
- package/src/aggregation-utils.test.ts +644 -0
- package/src/aggregation-utils.ts +399 -0
- package/src/aggregation.test.ts +222 -79
- package/src/index.ts +16 -0
- package/src/queue-executor.test.ts +133 -0
- package/src/queue-executor.ts +48 -2
- package/src/retention-job.ts +72 -43
- package/src/router.test.ts +14 -3
- package/src/router.ts +14 -1
- package/src/schema.ts +8 -4
- package/src/service-pause.test.ts +50 -0
- package/src/service.ts +273 -101
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
import type { CollectorRegistry } from "@checkstack/backend-api";
|
|
2
|
+
|
|
3
|
+
// ===== Percentile Calculation =====
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Calculate a percentile from a list of values.
|
|
7
|
+
*/
|
|
8
|
+
export function calculatePercentile(
|
|
9
|
+
values: number[],
|
|
10
|
+
percentile: number,
|
|
11
|
+
): number {
|
|
12
|
+
const sorted = values.toSorted((a, b) => a - b);
|
|
13
|
+
const index = Math.ceil((percentile / 100) * sorted.length) - 1;
|
|
14
|
+
return sorted[Math.max(0, index)];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// ===== Status Counting =====
|
|
18
|
+
|
|
19
|
+
export interface StatusCounts {
|
|
20
|
+
healthyCount: number;
|
|
21
|
+
degradedCount: number;
|
|
22
|
+
unhealthyCount: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Count statuses from a list of runs.
|
|
27
|
+
*/
|
|
28
|
+
export function countStatuses(
|
|
29
|
+
runs: Array<{ status: "healthy" | "degraded" | "unhealthy" | string }>,
|
|
30
|
+
): StatusCounts {
|
|
31
|
+
let healthyCount = 0;
|
|
32
|
+
let degradedCount = 0;
|
|
33
|
+
let unhealthyCount = 0;
|
|
34
|
+
|
|
35
|
+
for (const r of runs) {
|
|
36
|
+
switch (r.status) {
|
|
37
|
+
case "healthy": {
|
|
38
|
+
healthyCount++;
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
case "degraded": {
|
|
42
|
+
degradedCount++;
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
case "unhealthy": {
|
|
46
|
+
unhealthyCount++;
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return { healthyCount, degradedCount, unhealthyCount };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ===== Latency Statistics =====
|
|
56
|
+
|
|
57
|
+
export interface LatencyStats {
|
|
58
|
+
latencySumMs: number | undefined;
|
|
59
|
+
avgLatencyMs: number | undefined;
|
|
60
|
+
minLatencyMs: number | undefined;
|
|
61
|
+
maxLatencyMs: number | undefined;
|
|
62
|
+
p95LatencyMs: number | undefined;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Calculate latency statistics from a list of latency values.
|
|
67
|
+
*/
|
|
68
|
+
export function calculateLatencyStats(latencies: number[]): LatencyStats {
|
|
69
|
+
if (latencies.length === 0) {
|
|
70
|
+
return {
|
|
71
|
+
latencySumMs: undefined,
|
|
72
|
+
avgLatencyMs: undefined,
|
|
73
|
+
minLatencyMs: undefined,
|
|
74
|
+
maxLatencyMs: undefined,
|
|
75
|
+
p95LatencyMs: undefined,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const sum = latencies.reduce((a, b) => a + b, 0);
|
|
80
|
+
return {
|
|
81
|
+
latencySumMs: sum,
|
|
82
|
+
avgLatencyMs: Math.round(sum / latencies.length),
|
|
83
|
+
minLatencyMs: Math.min(...latencies),
|
|
84
|
+
maxLatencyMs: Math.max(...latencies),
|
|
85
|
+
p95LatencyMs: calculatePercentile(latencies, 95),
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Extract latencies from runs, filtering out undefined values.
|
|
91
|
+
*/
|
|
92
|
+
export function extractLatencies(
|
|
93
|
+
runs: Array<{ latencyMs?: number }>,
|
|
94
|
+
): number[] {
|
|
95
|
+
return runs
|
|
96
|
+
.map((r) => r.latencyMs)
|
|
97
|
+
.filter((l): l is number => l !== undefined);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ===== Collector Aggregation =====
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Aggregate collector data from runs in a bucket.
|
|
104
|
+
* Groups by collector UUID and calls each collector's aggregateResult.
|
|
105
|
+
*/
|
|
106
|
+
export function aggregateCollectorData(
|
|
107
|
+
runs: Array<{
|
|
108
|
+
status: string;
|
|
109
|
+
latencyMs?: number;
|
|
110
|
+
metadata?: Record<string, unknown>;
|
|
111
|
+
}>,
|
|
112
|
+
collectorRegistry: CollectorRegistry,
|
|
113
|
+
): Record<string, unknown> {
|
|
114
|
+
// Group collector data by UUID
|
|
115
|
+
const collectorDataByUuid = new Map<
|
|
116
|
+
string,
|
|
117
|
+
{ collectorId: string; metadata: Record<string, unknown>[] }
|
|
118
|
+
>();
|
|
119
|
+
|
|
120
|
+
for (const run of runs) {
|
|
121
|
+
const collectors = run.metadata?.collectors as
|
|
122
|
+
| Record<string, Record<string, unknown>>
|
|
123
|
+
| undefined;
|
|
124
|
+
if (!collectors) continue;
|
|
125
|
+
|
|
126
|
+
for (const [uuid, data] of Object.entries(collectors)) {
|
|
127
|
+
const collectorId = data._collectorId as string | undefined;
|
|
128
|
+
if (!collectorId) continue;
|
|
129
|
+
|
|
130
|
+
if (!collectorDataByUuid.has(uuid)) {
|
|
131
|
+
collectorDataByUuid.set(uuid, { collectorId, metadata: [] });
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Add metadata without internal fields
|
|
135
|
+
const { _collectorId, _assertionFailed, ...rest } = data;
|
|
136
|
+
collectorDataByUuid.get(uuid)!.metadata.push(rest);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Call aggregateResult for each collector
|
|
141
|
+
const result: Record<string, unknown> = {};
|
|
142
|
+
|
|
143
|
+
for (const [uuid, { collectorId, metadata }] of collectorDataByUuid) {
|
|
144
|
+
const registered = collectorRegistry.getCollector(collectorId);
|
|
145
|
+
if (!registered?.collector.aggregateResult) continue;
|
|
146
|
+
|
|
147
|
+
// Transform metadata to the format expected by aggregateResult
|
|
148
|
+
const runsForAggregation = metadata.map((m) => ({
|
|
149
|
+
status: "healthy" as const,
|
|
150
|
+
metadata: m,
|
|
151
|
+
}));
|
|
152
|
+
|
|
153
|
+
const aggregated = registered.collector.aggregateResult(runsForAggregation);
|
|
154
|
+
result[uuid] = {
|
|
155
|
+
_collectorId: collectorId,
|
|
156
|
+
...aggregated,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return result;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ===== Cross-Tier Aggregation =====
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* A normalized bucket that can come from any tier.
|
|
167
|
+
* Used as the common format for merging and re-aggregating.
|
|
168
|
+
*/
|
|
169
|
+
export interface NormalizedBucket {
|
|
170
|
+
bucketStart: Date;
|
|
171
|
+
bucketEndMs: number; // bucketStart.getTime() + bucket duration in ms
|
|
172
|
+
runCount: number;
|
|
173
|
+
healthyCount: number;
|
|
174
|
+
degradedCount: number;
|
|
175
|
+
unhealthyCount: number;
|
|
176
|
+
latencySumMs: number | undefined;
|
|
177
|
+
minLatencyMs: number | undefined;
|
|
178
|
+
maxLatencyMs: number | undefined;
|
|
179
|
+
p95LatencyMs: number | undefined;
|
|
180
|
+
aggregatedResult?: Record<string, unknown>;
|
|
181
|
+
sourceTier: "raw" | "hourly" | "daily";
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Priority order for tiers (lower index = higher priority).
|
|
186
|
+
*/
|
|
187
|
+
const TIER_PRIORITY: Record<NormalizedBucket["sourceTier"], number> = {
|
|
188
|
+
raw: 0,
|
|
189
|
+
hourly: 1,
|
|
190
|
+
daily: 2,
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Merge buckets from different tiers, preferring most granular data.
|
|
195
|
+
* For overlapping time periods, uses priority: raw > hourly > daily.
|
|
196
|
+
*/
|
|
197
|
+
export function mergeTieredBuckets(params: {
|
|
198
|
+
rawBuckets: NormalizedBucket[];
|
|
199
|
+
hourlyBuckets: NormalizedBucket[];
|
|
200
|
+
dailyBuckets: NormalizedBucket[];
|
|
201
|
+
}): NormalizedBucket[] {
|
|
202
|
+
const { rawBuckets, hourlyBuckets, dailyBuckets } = params;
|
|
203
|
+
|
|
204
|
+
// Combine all buckets
|
|
205
|
+
const allBuckets = [...rawBuckets, ...hourlyBuckets, ...dailyBuckets];
|
|
206
|
+
|
|
207
|
+
if (allBuckets.length === 0) {
|
|
208
|
+
return [];
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Sort by start time, then by tier priority (most granular first)
|
|
212
|
+
allBuckets.sort((a, b) => {
|
|
213
|
+
const timeDiff = a.bucketStart.getTime() - b.bucketStart.getTime();
|
|
214
|
+
if (timeDiff !== 0) return timeDiff;
|
|
215
|
+
return TIER_PRIORITY[a.sourceTier] - TIER_PRIORITY[b.sourceTier];
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
// Merge overlapping buckets, keeping the most granular tier
|
|
219
|
+
const result: NormalizedBucket[] = [];
|
|
220
|
+
let coveredUntil = 0; // Timestamp up to which we have data
|
|
221
|
+
|
|
222
|
+
for (const bucket of allBuckets) {
|
|
223
|
+
const bucketStartMs = bucket.bucketStart.getTime();
|
|
224
|
+
|
|
225
|
+
// Skip if this bucket's time range is already covered by higher-priority data
|
|
226
|
+
if (bucketStartMs < coveredUntil) {
|
|
227
|
+
// Check if this bucket extends beyond current coverage
|
|
228
|
+
if (bucket.bucketEndMs > coveredUntil) {
|
|
229
|
+
// Partial overlap - for simplicity, we skip partially overlapping lower-priority buckets
|
|
230
|
+
// This is acceptable because we prefer raw data which is more granular
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
result.push(bucket);
|
|
237
|
+
coveredUntil = bucket.bucketEndMs;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return result;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Combine multiple buckets into a single bucket.
|
|
245
|
+
* Used when re-aggregating smaller buckets into larger target buckets.
|
|
246
|
+
*/
|
|
247
|
+
export function combineBuckets(params: {
|
|
248
|
+
buckets: NormalizedBucket[];
|
|
249
|
+
targetBucketStart: Date;
|
|
250
|
+
targetBucketEndMs: number;
|
|
251
|
+
}): NormalizedBucket {
|
|
252
|
+
const { buckets, targetBucketStart, targetBucketEndMs } = params;
|
|
253
|
+
|
|
254
|
+
if (buckets.length === 0) {
|
|
255
|
+
return {
|
|
256
|
+
bucketStart: targetBucketStart,
|
|
257
|
+
bucketEndMs: targetBucketEndMs,
|
|
258
|
+
runCount: 0,
|
|
259
|
+
healthyCount: 0,
|
|
260
|
+
degradedCount: 0,
|
|
261
|
+
unhealthyCount: 0,
|
|
262
|
+
latencySumMs: undefined,
|
|
263
|
+
minLatencyMs: undefined,
|
|
264
|
+
maxLatencyMs: undefined,
|
|
265
|
+
p95LatencyMs: undefined,
|
|
266
|
+
aggregatedResult: undefined,
|
|
267
|
+
sourceTier: "raw", // Will be overridden below
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Aggregate counts (additive)
|
|
272
|
+
let runCount = 0;
|
|
273
|
+
let healthyCount = 0;
|
|
274
|
+
let degradedCount = 0;
|
|
275
|
+
let unhealthyCount = 0;
|
|
276
|
+
let latencySumMs = 0;
|
|
277
|
+
let hasLatencyData = false;
|
|
278
|
+
|
|
279
|
+
const minValues: number[] = [];
|
|
280
|
+
const maxValues: number[] = [];
|
|
281
|
+
const p95Values: number[] = [];
|
|
282
|
+
|
|
283
|
+
// Track which tier the data primarily comes from
|
|
284
|
+
let lowestPriorityTier: NormalizedBucket["sourceTier"] = "raw";
|
|
285
|
+
|
|
286
|
+
// Track aggregatedResults - only preserve if single bucket or all from raw
|
|
287
|
+
const aggregatedResults: Array<Record<string, unknown> | undefined> = [];
|
|
288
|
+
|
|
289
|
+
for (const bucket of buckets) {
|
|
290
|
+
runCount += bucket.runCount;
|
|
291
|
+
healthyCount += bucket.healthyCount;
|
|
292
|
+
degradedCount += bucket.degradedCount;
|
|
293
|
+
unhealthyCount += bucket.unhealthyCount;
|
|
294
|
+
|
|
295
|
+
if (bucket.latencySumMs !== undefined) {
|
|
296
|
+
latencySumMs += bucket.latencySumMs;
|
|
297
|
+
hasLatencyData = true;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (bucket.minLatencyMs !== undefined) {
|
|
301
|
+
minValues.push(bucket.minLatencyMs);
|
|
302
|
+
}
|
|
303
|
+
if (bucket.maxLatencyMs !== undefined) {
|
|
304
|
+
maxValues.push(bucket.maxLatencyMs);
|
|
305
|
+
}
|
|
306
|
+
if (bucket.p95LatencyMs !== undefined) {
|
|
307
|
+
p95Values.push(bucket.p95LatencyMs);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Track lowest priority (highest number) tier
|
|
311
|
+
if (TIER_PRIORITY[bucket.sourceTier] > TIER_PRIORITY[lowestPriorityTier]) {
|
|
312
|
+
lowestPriorityTier = bucket.sourceTier;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
aggregatedResults.push(bucket.aggregatedResult);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Preserve aggregatedResult if there's exactly one bucket (no re-aggregation needed)
|
|
319
|
+
// or if there's exactly one non-undefined result and all buckets are raw
|
|
320
|
+
let preservedAggregatedResult: Record<string, unknown> | undefined;
|
|
321
|
+
if (buckets.length === 1) {
|
|
322
|
+
preservedAggregatedResult = buckets[0].aggregatedResult;
|
|
323
|
+
} else if (
|
|
324
|
+
lowestPriorityTier === "raw" &&
|
|
325
|
+
aggregatedResults.filter((r) => r !== undefined).length === 1
|
|
326
|
+
) {
|
|
327
|
+
// All raw buckets, and exactly one has aggregatedResult
|
|
328
|
+
preservedAggregatedResult = aggregatedResults.find((r) => r !== undefined);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
bucketStart: targetBucketStart,
|
|
333
|
+
bucketEndMs: targetBucketEndMs,
|
|
334
|
+
runCount,
|
|
335
|
+
healthyCount,
|
|
336
|
+
degradedCount,
|
|
337
|
+
unhealthyCount,
|
|
338
|
+
latencySumMs: hasLatencyData ? latencySumMs : undefined,
|
|
339
|
+
minLatencyMs: minValues.length > 0 ? Math.min(...minValues) : undefined,
|
|
340
|
+
maxLatencyMs: maxValues.length > 0 ? Math.max(...maxValues) : undefined,
|
|
341
|
+
// Use max of p95s as conservative upper-bound approximation
|
|
342
|
+
p95LatencyMs: p95Values.length > 0 ? Math.max(...p95Values) : undefined,
|
|
343
|
+
// Preserve aggregatedResult only when no actual re-aggregation is needed
|
|
344
|
+
aggregatedResult: preservedAggregatedResult,
|
|
345
|
+
sourceTier: lowestPriorityTier,
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Re-aggregate a list of normalized buckets into target-sized buckets.
|
|
351
|
+
* Groups source buckets by target bucket boundaries and combines them.
|
|
352
|
+
*/
|
|
353
|
+
export function reaggregateBuckets(params: {
|
|
354
|
+
sourceBuckets: NormalizedBucket[];
|
|
355
|
+
targetIntervalMs: number;
|
|
356
|
+
rangeStart: Date;
|
|
357
|
+
}): NormalizedBucket[] {
|
|
358
|
+
const { sourceBuckets, targetIntervalMs, rangeStart } = params;
|
|
359
|
+
|
|
360
|
+
if (sourceBuckets.length === 0) {
|
|
361
|
+
return [];
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const rangeStartMs = rangeStart.getTime();
|
|
365
|
+
|
|
366
|
+
// Group source buckets by target bucket index
|
|
367
|
+
const bucketGroups = new Map<number, NormalizedBucket[]>();
|
|
368
|
+
|
|
369
|
+
for (const bucket of sourceBuckets) {
|
|
370
|
+
const offsetMs = bucket.bucketStart.getTime() - rangeStartMs;
|
|
371
|
+
const targetIndex = Math.floor(offsetMs / targetIntervalMs);
|
|
372
|
+
|
|
373
|
+
if (!bucketGroups.has(targetIndex)) {
|
|
374
|
+
bucketGroups.set(targetIndex, []);
|
|
375
|
+
}
|
|
376
|
+
bucketGroups.get(targetIndex)!.push(bucket);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Combine each group into a single target bucket
|
|
380
|
+
const result: NormalizedBucket[] = [];
|
|
381
|
+
|
|
382
|
+
for (const [index, buckets] of bucketGroups) {
|
|
383
|
+
const targetBucketStart = new Date(rangeStartMs + index * targetIntervalMs);
|
|
384
|
+
const targetBucketEndMs = targetBucketStart.getTime() + targetIntervalMs;
|
|
385
|
+
|
|
386
|
+
result.push(
|
|
387
|
+
combineBuckets({
|
|
388
|
+
buckets,
|
|
389
|
+
targetBucketStart,
|
|
390
|
+
targetBucketEndMs,
|
|
391
|
+
}),
|
|
392
|
+
);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Sort by bucket start time
|
|
396
|
+
result.sort((a, b) => a.bucketStart.getTime() - b.bucketStart.getTime());
|
|
397
|
+
|
|
398
|
+
return result;
|
|
399
|
+
}
|