@checkstack/healthcheck-backend 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +116 -0
- package/drizzle/0007_tense_misty_knight.sql +1 -0
- package/drizzle/meta/0007_snapshot.json +413 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +1 -1
- package/src/aggregation-utils.test.ts +644 -0
- package/src/aggregation-utils.ts +399 -0
- package/src/aggregation.test.ts +250 -144
- package/src/index.ts +13 -2
- package/src/queue-executor.ts +10 -3
- package/src/retention-job.ts +93 -61
- package/src/router.test.ts +14 -3
- package/src/router.ts +21 -16
- package/src/schema.ts +6 -4
- package/src/service.ts +298 -132
package/src/retention-job.ts
CHANGED
|
@@ -1,23 +1,32 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type {
|
|
2
|
+
HealthCheckRegistry,
|
|
3
|
+
Logger,
|
|
4
|
+
SafeDatabase,
|
|
5
|
+
CollectorRegistry,
|
|
6
|
+
} from "@checkstack/backend-api";
|
|
2
7
|
import * as schema from "./schema";
|
|
3
8
|
import {
|
|
4
9
|
healthCheckRuns,
|
|
5
10
|
systemHealthChecks,
|
|
6
11
|
healthCheckAggregates,
|
|
12
|
+
healthCheckConfigurations,
|
|
7
13
|
DEFAULT_RETENTION_CONFIG,
|
|
8
14
|
} from "./schema";
|
|
9
15
|
import { eq, and, lt, sql } from "drizzle-orm";
|
|
10
|
-
import type {
|
|
11
|
-
HealthCheckRegistry,
|
|
12
|
-
Logger,
|
|
13
|
-
} from "@checkstack/backend-api";
|
|
14
16
|
import type { QueueManager } from "@checkstack/queue-api";
|
|
17
|
+
import {
|
|
18
|
+
aggregateCollectorData,
|
|
19
|
+
calculateLatencyStats,
|
|
20
|
+
countStatuses,
|
|
21
|
+
extractLatencies,
|
|
22
|
+
} from "./aggregation-utils";
|
|
15
23
|
|
|
16
|
-
type Db =
|
|
24
|
+
type Db = SafeDatabase<typeof schema>;
|
|
17
25
|
|
|
18
26
|
interface RetentionJobDeps {
|
|
19
27
|
db: Db;
|
|
20
28
|
registry: HealthCheckRegistry;
|
|
29
|
+
collectorRegistry: CollectorRegistry;
|
|
21
30
|
logger: Logger;
|
|
22
31
|
queueManager: QueueManager;
|
|
23
32
|
}
|
|
@@ -35,7 +44,7 @@ interface RetentionJobPayload {
|
|
|
35
44
|
* 3. Deletes expired daily aggregates
|
|
36
45
|
*/
|
|
37
46
|
export async function setupRetentionJob(deps: RetentionJobDeps) {
|
|
38
|
-
const { queueManager, logger, db, registry } = deps;
|
|
47
|
+
const { queueManager, logger, db, registry, collectorRegistry } = deps;
|
|
39
48
|
|
|
40
49
|
const queue = queueManager.getQueue<RetentionJobPayload>(RETENTION_QUEUE);
|
|
41
50
|
|
|
@@ -43,10 +52,16 @@ export async function setupRetentionJob(deps: RetentionJobDeps) {
|
|
|
43
52
|
await queue.consume(
|
|
44
53
|
async () => {
|
|
45
54
|
logger.info("Starting health check retention job");
|
|
46
|
-
await runRetentionJob({
|
|
55
|
+
await runRetentionJob({
|
|
56
|
+
db,
|
|
57
|
+
registry,
|
|
58
|
+
collectorRegistry,
|
|
59
|
+
logger,
|
|
60
|
+
queueManager,
|
|
61
|
+
});
|
|
47
62
|
logger.info("Completed health check retention job");
|
|
48
63
|
},
|
|
49
|
-
{ consumerGroup: "retention-worker" }
|
|
64
|
+
{ consumerGroup: "retention-worker" },
|
|
50
65
|
);
|
|
51
66
|
|
|
52
67
|
// Schedule daily retention run (86400 seconds = 24 hours)
|
|
@@ -55,7 +70,7 @@ export async function setupRetentionJob(deps: RetentionJobDeps) {
|
|
|
55
70
|
{
|
|
56
71
|
jobId: "health-check-retention-daily",
|
|
57
72
|
intervalSeconds: 24 * 60 * 60, // Daily (24 hours)
|
|
58
|
-
}
|
|
73
|
+
},
|
|
59
74
|
);
|
|
60
75
|
|
|
61
76
|
logger.info("Health check retention job scheduled (runs daily)");
|
|
@@ -65,7 +80,7 @@ export async function setupRetentionJob(deps: RetentionJobDeps) {
|
|
|
65
80
|
* Main retention job logic
|
|
66
81
|
*/
|
|
67
82
|
export async function runRetentionJob(deps: RetentionJobDeps) {
|
|
68
|
-
const { db, registry, logger } = deps;
|
|
83
|
+
const { db, registry, collectorRegistry, logger } = deps;
|
|
69
84
|
|
|
70
85
|
// Get all unique system-config assignments
|
|
71
86
|
const assignments = await db.select().from(systemHealthChecks);
|
|
@@ -79,6 +94,7 @@ export async function runRetentionJob(deps: RetentionJobDeps) {
|
|
|
79
94
|
await aggregateRawRuns({
|
|
80
95
|
db,
|
|
81
96
|
registry,
|
|
97
|
+
collectorRegistry,
|
|
82
98
|
systemId: assignment.systemId,
|
|
83
99
|
configurationId: assignment.configurationId,
|
|
84
100
|
rawRetentionDays: retentionConfig.rawRetentionDays,
|
|
@@ -102,7 +118,7 @@ export async function runRetentionJob(deps: RetentionJobDeps) {
|
|
|
102
118
|
} catch (error) {
|
|
103
119
|
logger.error(
|
|
104
120
|
`Retention job failed for ${assignment.systemId}/${assignment.configurationId}`,
|
|
105
|
-
{ error }
|
|
121
|
+
{ error },
|
|
106
122
|
);
|
|
107
123
|
}
|
|
108
124
|
}
|
|
@@ -111,6 +127,7 @@ export async function runRetentionJob(deps: RetentionJobDeps) {
|
|
|
111
127
|
interface AggregateRawRunsParams {
|
|
112
128
|
db: Db;
|
|
113
129
|
registry: HealthCheckRegistry;
|
|
130
|
+
collectorRegistry: CollectorRegistry;
|
|
114
131
|
systemId: string;
|
|
115
132
|
configurationId: string;
|
|
116
133
|
rawRetentionDays: number;
|
|
@@ -120,16 +137,25 @@ interface AggregateRawRunsParams {
|
|
|
120
137
|
* Aggregates raw runs older than retention period into hourly buckets
|
|
121
138
|
*/
|
|
122
139
|
async function aggregateRawRuns(params: AggregateRawRunsParams) {
|
|
123
|
-
const {
|
|
140
|
+
const {
|
|
141
|
+
db,
|
|
142
|
+
registry,
|
|
143
|
+
collectorRegistry,
|
|
144
|
+
systemId,
|
|
145
|
+
configurationId,
|
|
146
|
+
rawRetentionDays,
|
|
147
|
+
} = params;
|
|
124
148
|
|
|
125
149
|
const cutoffDate = new Date();
|
|
126
150
|
cutoffDate.setDate(cutoffDate.getDate() - rawRetentionDays);
|
|
127
151
|
cutoffDate.setHours(cutoffDate.getHours(), 0, 0, 0); // Round to hour
|
|
128
152
|
|
|
129
153
|
// Get strategy for metadata aggregation
|
|
130
|
-
const config = await db
|
|
131
|
-
|
|
132
|
-
|
|
154
|
+
const [config] = await db
|
|
155
|
+
.select()
|
|
156
|
+
.from(healthCheckConfigurations)
|
|
157
|
+
.where(eq(healthCheckConfigurations.id, configurationId))
|
|
158
|
+
.limit(1);
|
|
133
159
|
const strategy = config ? registry.getStrategy(config.strategyId) : undefined;
|
|
134
160
|
|
|
135
161
|
// Query raw runs older than cutoff, grouped by hour
|
|
@@ -140,8 +166,8 @@ async function aggregateRawRuns(params: AggregateRawRunsParams) {
|
|
|
140
166
|
and(
|
|
141
167
|
eq(healthCheckRuns.systemId, systemId),
|
|
142
168
|
eq(healthCheckRuns.configurationId, configurationId),
|
|
143
|
-
lt(healthCheckRuns.timestamp, cutoffDate)
|
|
144
|
-
)
|
|
169
|
+
lt(healthCheckRuns.timestamp, cutoffDate),
|
|
170
|
+
),
|
|
145
171
|
)
|
|
146
172
|
.orderBy(healthCheckRuns.timestamp);
|
|
147
173
|
|
|
@@ -180,42 +206,43 @@ async function aggregateRawRuns(params: AggregateRawRunsParams) {
|
|
|
180
206
|
|
|
181
207
|
// Create aggregates and delete raw runs
|
|
182
208
|
for (const [, bucket] of buckets) {
|
|
183
|
-
// Calculate metrics
|
|
184
209
|
const runCount = bucket.runs.length;
|
|
185
|
-
let healthyCount = 0;
|
|
186
|
-
let degradedCount = 0;
|
|
187
|
-
let unhealthyCount = 0;
|
|
188
|
-
for (const r of bucket.runs) {
|
|
189
|
-
if (r.status === "healthy") healthyCount++;
|
|
190
|
-
if (r.status === "degraded") degradedCount++;
|
|
191
|
-
if (r.status === "unhealthy") unhealthyCount++;
|
|
192
|
-
}
|
|
193
210
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
.
|
|
197
|
-
|
|
198
|
-
let avgLatencyMs: number | undefined;
|
|
199
|
-
let minLatencyMs: number | undefined;
|
|
200
|
-
let maxLatencyMs: number | undefined;
|
|
201
|
-
let p95LatencyMs: number | undefined;
|
|
202
|
-
|
|
203
|
-
if (latencies.length > 0) {
|
|
204
|
-
let sum = 0;
|
|
205
|
-
for (const l of latencies) sum += l;
|
|
206
|
-
avgLatencyMs = Math.round(sum / latencies.length);
|
|
207
|
-
minLatencyMs = Math.min(...latencies);
|
|
208
|
-
maxLatencyMs = Math.max(...latencies);
|
|
209
|
-
p95LatencyMs = calculatePercentile(latencies, 95);
|
|
210
|
-
}
|
|
211
|
+
// Calculate status counts
|
|
212
|
+
const { healthyCount, degradedCount, unhealthyCount } = countStatuses(
|
|
213
|
+
bucket.runs,
|
|
214
|
+
);
|
|
211
215
|
|
|
212
|
-
//
|
|
216
|
+
// Calculate latency stats
|
|
217
|
+
const latencies = extractLatencies(bucket.runs);
|
|
218
|
+
const {
|
|
219
|
+
latencySumMs,
|
|
220
|
+
avgLatencyMs,
|
|
221
|
+
minLatencyMs,
|
|
222
|
+
maxLatencyMs,
|
|
223
|
+
p95LatencyMs,
|
|
224
|
+
} = calculateLatencyStats(latencies);
|
|
225
|
+
|
|
226
|
+
// Aggregate strategy result
|
|
213
227
|
let aggregatedResult: Record<string, unknown> | undefined;
|
|
214
228
|
if (strategy) {
|
|
215
|
-
|
|
229
|
+
const strategyResult = strategy.aggregateResult(bucket.runs) as Record<
|
|
216
230
|
string,
|
|
217
231
|
unknown
|
|
218
232
|
>;
|
|
233
|
+
|
|
234
|
+
// Aggregate collector data
|
|
235
|
+
const collectorsAggregated = aggregateCollectorData(
|
|
236
|
+
bucket.runs,
|
|
237
|
+
collectorRegistry,
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
aggregatedResult = {
|
|
241
|
+
...strategyResult,
|
|
242
|
+
...(Object.keys(collectorsAggregated).length > 0
|
|
243
|
+
? { collectors: collectorsAggregated }
|
|
244
|
+
: {}),
|
|
245
|
+
};
|
|
219
246
|
}
|
|
220
247
|
|
|
221
248
|
// Insert or update aggregate
|
|
@@ -230,6 +257,7 @@ async function aggregateRawRuns(params: AggregateRawRunsParams) {
|
|
|
230
257
|
healthyCount,
|
|
231
258
|
degradedCount,
|
|
232
259
|
unhealthyCount,
|
|
260
|
+
latencySumMs,
|
|
233
261
|
avgLatencyMs,
|
|
234
262
|
minLatencyMs,
|
|
235
263
|
maxLatencyMs,
|
|
@@ -284,8 +312,8 @@ async function rollupHourlyAggregates(params: RollupParams) {
|
|
|
284
312
|
eq(healthCheckAggregates.systemId, systemId),
|
|
285
313
|
eq(healthCheckAggregates.configurationId, configurationId),
|
|
286
314
|
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
287
|
-
lt(healthCheckAggregates.bucketStart, cutoffDate)
|
|
288
|
-
)
|
|
315
|
+
lt(healthCheckAggregates.bucketStart, cutoffDate),
|
|
316
|
+
),
|
|
289
317
|
);
|
|
290
318
|
|
|
291
319
|
if (oldHourly.length === 0) return;
|
|
@@ -316,20 +344,23 @@ async function rollupHourlyAggregates(params: RollupParams) {
|
|
|
316
344
|
let healthyCount = 0;
|
|
317
345
|
let degradedCount = 0;
|
|
318
346
|
let unhealthyCount = 0;
|
|
319
|
-
let
|
|
347
|
+
let latencySumMs = 0;
|
|
320
348
|
|
|
321
349
|
for (const a of bucket.aggregates) {
|
|
322
350
|
runCount += a.runCount;
|
|
323
351
|
healthyCount += a.healthyCount;
|
|
324
352
|
degradedCount += a.degradedCount;
|
|
325
353
|
unhealthyCount += a.unhealthyCount;
|
|
326
|
-
if
|
|
327
|
-
|
|
354
|
+
// Use latencySumMs if available, fallback to avg*count approximation
|
|
355
|
+
if (a.latencySumMs !== null) {
|
|
356
|
+
latencySumMs += a.latencySumMs;
|
|
357
|
+
} else if (a.avgLatencyMs !== null) {
|
|
358
|
+
latencySumMs += a.avgLatencyMs * a.runCount;
|
|
328
359
|
}
|
|
329
360
|
}
|
|
330
361
|
|
|
331
362
|
const avgLatencyMs =
|
|
332
|
-
runCount > 0 ? Math.round(
|
|
363
|
+
runCount > 0 ? Math.round(latencySumMs / runCount) : undefined;
|
|
333
364
|
|
|
334
365
|
// Min/max across all hourly buckets
|
|
335
366
|
const minValues = bucket.aggregates
|
|
@@ -338,10 +369,16 @@ async function rollupHourlyAggregates(params: RollupParams) {
|
|
|
338
369
|
const maxValues = bucket.aggregates
|
|
339
370
|
.map((a) => a.maxLatencyMs)
|
|
340
371
|
.filter((v): v is number => v !== null);
|
|
372
|
+
const p95Values = bucket.aggregates
|
|
373
|
+
.map((a) => a.p95LatencyMs)
|
|
374
|
+
.filter((v): v is number => v !== null);
|
|
341
375
|
const minLatencyMs =
|
|
342
376
|
minValues.length > 0 ? Math.min(...minValues) : undefined;
|
|
343
377
|
const maxLatencyMs =
|
|
344
378
|
maxValues.length > 0 ? Math.max(...maxValues) : undefined;
|
|
379
|
+
// Use max of hourly p95s as upper bound approximation
|
|
380
|
+
const p95LatencyMs =
|
|
381
|
+
p95Values.length > 0 ? Math.max(...p95Values) : undefined;
|
|
345
382
|
|
|
346
383
|
// Insert daily aggregate
|
|
347
384
|
await db.insert(healthCheckAggregates).values({
|
|
@@ -353,10 +390,11 @@ async function rollupHourlyAggregates(params: RollupParams) {
|
|
|
353
390
|
healthyCount,
|
|
354
391
|
degradedCount,
|
|
355
392
|
unhealthyCount,
|
|
393
|
+
latencySumMs: latencySumMs > 0 ? latencySumMs : undefined,
|
|
356
394
|
avgLatencyMs,
|
|
357
395
|
minLatencyMs,
|
|
358
396
|
maxLatencyMs,
|
|
359
|
-
p95LatencyMs
|
|
397
|
+
p95LatencyMs,
|
|
360
398
|
aggregatedResult: undefined, // Cannot combine result across hours
|
|
361
399
|
});
|
|
362
400
|
|
|
@@ -392,13 +430,7 @@ async function deleteExpiredAggregates(params: DeleteExpiredParams) {
|
|
|
392
430
|
eq(healthCheckAggregates.systemId, systemId),
|
|
393
431
|
eq(healthCheckAggregates.configurationId, configurationId),
|
|
394
432
|
eq(healthCheckAggregates.bucketSize, "daily"),
|
|
395
|
-
lt(healthCheckAggregates.bucketStart, cutoffDate)
|
|
396
|
-
)
|
|
433
|
+
lt(healthCheckAggregates.bucketStart, cutoffDate),
|
|
434
|
+
),
|
|
397
435
|
);
|
|
398
436
|
}
|
|
399
|
-
|
|
400
|
-
function calculatePercentile(values: number[], percentile: number): number {
|
|
401
|
-
const sorted = values.toSorted((a, b) => a - b);
|
|
402
|
-
const index = Math.ceil((percentile / 100) * sorted.length) - 1;
|
|
403
|
-
return sorted[Math.max(0, index)];
|
|
404
|
-
}
|
package/src/router.test.ts
CHANGED
|
@@ -43,7 +43,18 @@ describe("HealthCheck Router", () => {
|
|
|
43
43
|
getStrategiesWithMeta: mock(() => []),
|
|
44
44
|
};
|
|
45
45
|
|
|
46
|
-
const
|
|
46
|
+
const mockCollectorRegistry = {
|
|
47
|
+
register: mock(),
|
|
48
|
+
getCollector: mock(),
|
|
49
|
+
getCollectors: mock(() => []),
|
|
50
|
+
getCollectorsForPlugin: mock(() => []),
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
const router = createHealthCheckRouter(
|
|
54
|
+
mockDb as never,
|
|
55
|
+
mockRegistry,
|
|
56
|
+
mockCollectorRegistry as never,
|
|
57
|
+
);
|
|
47
58
|
|
|
48
59
|
it("getStrategies returns strategies from registry", async () => {
|
|
49
60
|
const context = createMockRpcContext({
|
|
@@ -119,7 +130,7 @@ describe("HealthCheck Router", () => {
|
|
|
119
130
|
const result = await call(
|
|
120
131
|
router.getCollectors,
|
|
121
132
|
{ strategyId: "healthcheck-ssh" },
|
|
122
|
-
{ context }
|
|
133
|
+
{ context },
|
|
123
134
|
);
|
|
124
135
|
expect(result).toHaveLength(1);
|
|
125
136
|
expect(result[0].id).toBe("collector-hardware.cpu");
|
|
@@ -139,7 +150,7 @@ describe("HealthCheck Router", () => {
|
|
|
139
150
|
const result = await call(
|
|
140
151
|
router.getCollectors,
|
|
141
152
|
{ strategyId: "unknown" },
|
|
142
|
-
{ context }
|
|
153
|
+
{ context },
|
|
143
154
|
);
|
|
144
155
|
expect(result).toHaveLength(0);
|
|
145
156
|
});
|
package/src/router.ts
CHANGED
|
@@ -4,10 +4,11 @@ import {
|
|
|
4
4
|
toJsonSchema,
|
|
5
5
|
type RpcContext,
|
|
6
6
|
type HealthCheckRegistry,
|
|
7
|
+
type SafeDatabase,
|
|
8
|
+
type CollectorRegistry,
|
|
7
9
|
} from "@checkstack/backend-api";
|
|
8
10
|
import { healthCheckContract } from "@checkstack/healthcheck-common";
|
|
9
11
|
import { HealthCheckService } from "./service";
|
|
10
|
-
import { NodePgDatabase } from "drizzle-orm/node-postgres";
|
|
11
12
|
import * as schema from "./schema";
|
|
12
13
|
import { toJsonSchemaWithChartMeta } from "./schema-utils";
|
|
13
14
|
|
|
@@ -18,11 +19,12 @@ import { toJsonSchemaWithChartMeta } from "./schema-utils";
|
|
|
18
19
|
* based on the contract's meta.userType and meta.access.
|
|
19
20
|
*/
|
|
20
21
|
export const createHealthCheckRouter = (
|
|
21
|
-
database:
|
|
22
|
-
registry: HealthCheckRegistry
|
|
22
|
+
database: SafeDatabase<typeof schema>,
|
|
23
|
+
registry: HealthCheckRegistry,
|
|
24
|
+
collectorRegistry: CollectorRegistry,
|
|
23
25
|
) => {
|
|
24
26
|
// Create service instance once - shared across all handlers
|
|
25
|
-
const service = new HealthCheckService(database, registry);
|
|
27
|
+
const service = new HealthCheckService(database, registry, collectorRegistry);
|
|
26
28
|
|
|
27
29
|
// Create contract implementer with context type AND auto auth middleware
|
|
28
30
|
const os = implement(healthCheckContract)
|
|
@@ -40,7 +42,7 @@ export const createHealthCheckRouter = (
|
|
|
40
42
|
? toJsonSchemaWithChartMeta(r.strategy.result.schema)
|
|
41
43
|
: undefined,
|
|
42
44
|
aggregatedResultSchema: toJsonSchemaWithChartMeta(
|
|
43
|
-
r.strategy.aggregatedResult.schema
|
|
45
|
+
r.strategy.aggregatedResult.schema,
|
|
44
46
|
),
|
|
45
47
|
}));
|
|
46
48
|
}),
|
|
@@ -48,7 +50,7 @@ export const createHealthCheckRouter = (
|
|
|
48
50
|
getCollectors: os.getCollectors.handler(async ({ input, context }) => {
|
|
49
51
|
// Get strategy to verify it exists
|
|
50
52
|
const strategy = context.healthCheckRegistry.getStrategy(
|
|
51
|
-
input.strategyId
|
|
53
|
+
input.strategyId,
|
|
52
54
|
);
|
|
53
55
|
if (!strategy) {
|
|
54
56
|
return [];
|
|
@@ -74,6 +76,9 @@ export const createHealthCheckRouter = (
|
|
|
74
76
|
description: collector.description,
|
|
75
77
|
configSchema: toJsonSchema(collector.config.schema),
|
|
76
78
|
resultSchema: toJsonSchemaWithChartMeta(collector.result.schema),
|
|
79
|
+
aggregatedResultSchema: collector.aggregatedResult
|
|
80
|
+
? toJsonSchemaWithChartMeta(collector.aggregatedResult.schema)
|
|
81
|
+
: undefined,
|
|
77
82
|
allowMultiple: collector.allowMultiple ?? false,
|
|
78
83
|
}));
|
|
79
84
|
}),
|
|
@@ -103,13 +108,13 @@ export const createHealthCheckRouter = (
|
|
|
103
108
|
getSystemConfigurations: os.getSystemConfigurations.handler(
|
|
104
109
|
async ({ input }) => {
|
|
105
110
|
return service.getSystemConfigurations(input);
|
|
106
|
-
}
|
|
111
|
+
},
|
|
107
112
|
),
|
|
108
113
|
|
|
109
114
|
getSystemAssociations: os.getSystemAssociations.handler(
|
|
110
115
|
async ({ input }) => {
|
|
111
116
|
return service.getSystemAssociations(input.systemId);
|
|
112
|
-
}
|
|
117
|
+
},
|
|
113
118
|
),
|
|
114
119
|
|
|
115
120
|
associateSystem: os.associateSystem.handler(async ({ input, context }) => {
|
|
@@ -123,7 +128,7 @@ export const createHealthCheckRouter = (
|
|
|
123
128
|
// If enabling the health check, schedule it immediately
|
|
124
129
|
if (input.body.enabled) {
|
|
125
130
|
const config = await service.getConfiguration(
|
|
126
|
-
input.body.configurationId
|
|
131
|
+
input.body.configurationId,
|
|
127
132
|
);
|
|
128
133
|
if (config) {
|
|
129
134
|
const { scheduleHealthCheck } = await import("./queue-executor");
|
|
@@ -152,9 +157,9 @@ export const createHealthCheckRouter = (
|
|
|
152
157
|
await service.updateRetentionConfig(
|
|
153
158
|
input.systemId,
|
|
154
159
|
input.configurationId,
|
|
155
|
-
input.retentionConfig
|
|
160
|
+
input.retentionConfig,
|
|
156
161
|
);
|
|
157
|
-
}
|
|
162
|
+
},
|
|
158
163
|
),
|
|
159
164
|
|
|
160
165
|
getHistory: os.getHistory.handler(async ({ input }) => {
|
|
@@ -176,12 +181,12 @@ export const createHealthCheckRouter = (
|
|
|
176
181
|
return service.getAggregatedHistory(input, {
|
|
177
182
|
includeAggregatedResult: true,
|
|
178
183
|
});
|
|
179
|
-
}
|
|
184
|
+
},
|
|
180
185
|
),
|
|
181
186
|
getSystemHealthStatus: os.getSystemHealthStatus.handler(
|
|
182
187
|
async ({ input }) => {
|
|
183
188
|
return service.getSystemHealthStatus(input.systemId);
|
|
184
|
-
}
|
|
189
|
+
},
|
|
185
190
|
),
|
|
186
191
|
|
|
187
192
|
getBulkSystemHealthStatus: os.getBulkSystemHealthStatus.handler(
|
|
@@ -195,17 +200,17 @@ export const createHealthCheckRouter = (
|
|
|
195
200
|
await Promise.all(
|
|
196
201
|
input.systemIds.map(async (systemId) => {
|
|
197
202
|
statuses[systemId] = await service.getSystemHealthStatus(systemId);
|
|
198
|
-
})
|
|
203
|
+
}),
|
|
199
204
|
);
|
|
200
205
|
|
|
201
206
|
return { statuses };
|
|
202
|
-
}
|
|
207
|
+
},
|
|
203
208
|
),
|
|
204
209
|
|
|
205
210
|
getSystemHealthOverview: os.getSystemHealthOverview.handler(
|
|
206
211
|
async ({ input }) => {
|
|
207
212
|
return service.getSystemHealthOverview(input.systemId);
|
|
208
|
-
}
|
|
213
|
+
},
|
|
209
214
|
),
|
|
210
215
|
});
|
|
211
216
|
};
|
package/src/schema.ts
CHANGED
|
@@ -47,7 +47,7 @@ export const healthCheckConfigurations = pgTable(
|
|
|
47
47
|
isTemplate: boolean("is_template").default(false),
|
|
48
48
|
createdAt: timestamp("created_at").defaultNow().notNull(),
|
|
49
49
|
updatedAt: timestamp("updated_at").defaultNow().notNull(),
|
|
50
|
-
}
|
|
50
|
+
},
|
|
51
51
|
);
|
|
52
52
|
|
|
53
53
|
/**
|
|
@@ -93,7 +93,7 @@ export const systemHealthChecks = pgTable(
|
|
|
93
93
|
},
|
|
94
94
|
(t) => ({
|
|
95
95
|
pk: primaryKey({ columns: [t.systemId, t.configurationId] }),
|
|
96
|
-
})
|
|
96
|
+
}),
|
|
97
97
|
);
|
|
98
98
|
|
|
99
99
|
export const healthCheckRuns = pgTable("health_check_runs", {
|
|
@@ -137,6 +137,8 @@ export const healthCheckAggregates = pgTable(
|
|
|
137
137
|
healthyCount: integer("healthy_count").notNull(),
|
|
138
138
|
degradedCount: integer("degraded_count").notNull(),
|
|
139
139
|
unhealthyCount: integer("unhealthy_count").notNull(),
|
|
140
|
+
/** Sum of all latencies in this bucket (for accurate averaging when combining) */
|
|
141
|
+
latencySumMs: integer("latency_sum_ms"),
|
|
140
142
|
avgLatencyMs: integer("avg_latency_ms"),
|
|
141
143
|
minLatencyMs: integer("min_latency_ms"),
|
|
142
144
|
maxLatencyMs: integer("max_latency_ms"),
|
|
@@ -152,7 +154,7 @@ export const healthCheckAggregates = pgTable(
|
|
|
152
154
|
t.configurationId,
|
|
153
155
|
t.systemId,
|
|
154
156
|
t.bucketStart,
|
|
155
|
-
t.bucketSize
|
|
157
|
+
t.bucketSize,
|
|
156
158
|
),
|
|
157
|
-
})
|
|
159
|
+
}),
|
|
158
160
|
);
|