@checkstack/healthcheck-backend 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ import { NodePgDatabase } from "drizzle-orm/node-postgres";
2
+ import * as schema from "./schema";
3
+ import {
4
+ healthCheckRuns,
5
+ systemHealthChecks,
6
+ healthCheckAggregates,
7
+ DEFAULT_RETENTION_CONFIG,
8
+ } from "./schema";
9
+ import { eq, and, lt, sql } from "drizzle-orm";
10
+ import type {
11
+ HealthCheckRegistry,
12
+ Logger,
13
+ } from "@checkstack/backend-api";
14
+ import type { QueueManager } from "@checkstack/queue-api";
15
+
16
+ type Db = NodePgDatabase<typeof schema>;
17
+
18
+ interface RetentionJobDeps {
19
+ db: Db;
20
+ registry: HealthCheckRegistry;
21
+ logger: Logger;
22
+ queueManager: QueueManager;
23
+ }
24
+
25
+ const RETENTION_QUEUE = "health-check-retention";
26
+
27
+ interface RetentionJobPayload {
28
+ trigger: "scheduled";
29
+ }
30
+
31
+ /**
32
+ * Registers and runs the daily retention job that:
33
+ * 1. Aggregates old raw runs into hourly buckets
34
+ * 2. Rolls up old hourly aggregates into daily
35
+ * 3. Deletes expired daily aggregates
36
+ */
37
+ export async function setupRetentionJob(deps: RetentionJobDeps) {
38
+ const { queueManager, logger, db, registry } = deps;
39
+
40
+ const queue = queueManager.getQueue<RetentionJobPayload>(RETENTION_QUEUE);
41
+
42
+ // Register consumer for retention jobs
43
+ await queue.consume(
44
+ async () => {
45
+ logger.info("Starting health check retention job");
46
+ await runRetentionJob({ db, registry, logger, queueManager });
47
+ logger.info("Completed health check retention job");
48
+ },
49
+ { consumerGroup: "retention-worker" }
50
+ );
51
+
52
+ // Schedule daily retention run (86400 seconds = 24 hours)
53
+ await queue.scheduleRecurring(
54
+ { trigger: "scheduled" },
55
+ {
56
+ jobId: "health-check-retention-daily",
57
+ intervalSeconds: 24 * 60 * 60, // Daily (24 hours)
58
+ }
59
+ );
60
+
61
+ logger.info("Health check retention job scheduled (runs daily)");
62
+ }
63
+
64
+ /**
65
+ * Main retention job logic
66
+ */
67
+ export async function runRetentionJob(deps: RetentionJobDeps) {
68
+ const { db, registry, logger } = deps;
69
+
70
+ // Get all unique system-config assignments
71
+ const assignments = await db.select().from(systemHealthChecks);
72
+
73
+ for (const assignment of assignments) {
74
+ const retentionConfig =
75
+ assignment.retentionConfig ?? DEFAULT_RETENTION_CONFIG;
76
+
77
+ try {
78
+ // 1. Aggregate old raw runs into hourly buckets
79
+ await aggregateRawRuns({
80
+ db,
81
+ registry,
82
+ systemId: assignment.systemId,
83
+ configurationId: assignment.configurationId,
84
+ rawRetentionDays: retentionConfig.rawRetentionDays,
85
+ });
86
+
87
+ // 2. Roll up old hourly aggregates into daily
88
+ await rollupHourlyAggregates({
89
+ db,
90
+ systemId: assignment.systemId,
91
+ configurationId: assignment.configurationId,
92
+ hourlyRetentionDays: retentionConfig.hourlyRetentionDays,
93
+ });
94
+
95
+ // 3. Delete expired daily aggregates
96
+ await deleteExpiredAggregates({
97
+ db,
98
+ systemId: assignment.systemId,
99
+ configurationId: assignment.configurationId,
100
+ dailyRetentionDays: retentionConfig.dailyRetentionDays,
101
+ });
102
+ } catch (error) {
103
+ logger.error(
104
+ `Retention job failed for ${assignment.systemId}/${assignment.configurationId}`,
105
+ { error }
106
+ );
107
+ }
108
+ }
109
+ }
110
+
111
+ interface AggregateRawRunsParams {
112
+ db: Db;
113
+ registry: HealthCheckRegistry;
114
+ systemId: string;
115
+ configurationId: string;
116
+ rawRetentionDays: number;
117
+ }
118
+
119
+ /**
120
+ * Aggregates raw runs older than retention period into hourly buckets
121
+ */
122
+ async function aggregateRawRuns(params: AggregateRawRunsParams) {
123
+ const { db, registry, systemId, configurationId, rawRetentionDays } = params;
124
+
125
+ const cutoffDate = new Date();
126
+ cutoffDate.setDate(cutoffDate.getDate() - rawRetentionDays);
127
+ cutoffDate.setHours(cutoffDate.getHours(), 0, 0, 0); // Round to hour
128
+
129
+ // Get strategy for metadata aggregation
130
+ const config = await db.query.healthCheckConfigurations.findFirst({
131
+ where: eq(schema.healthCheckConfigurations.id, configurationId),
132
+ });
133
+ const strategy = config ? registry.getStrategy(config.strategyId) : undefined;
134
+
135
+ // Query raw runs older than cutoff, grouped by hour
136
+ const oldRuns = await db
137
+ .select()
138
+ .from(healthCheckRuns)
139
+ .where(
140
+ and(
141
+ eq(healthCheckRuns.systemId, systemId),
142
+ eq(healthCheckRuns.configurationId, configurationId),
143
+ lt(healthCheckRuns.timestamp, cutoffDate)
144
+ )
145
+ )
146
+ .orderBy(healthCheckRuns.timestamp);
147
+
148
+ if (oldRuns.length === 0) return;
149
+
150
+ // Group into hourly buckets
151
+ const buckets = new Map<
152
+ string,
153
+ {
154
+ bucketStart: Date;
155
+ runs: Array<{
156
+ status: "healthy" | "unhealthy" | "degraded";
157
+ latencyMs: number | undefined;
158
+ metadata?: Record<string, unknown>;
159
+ }>;
160
+ runIds: string[];
161
+ }
162
+ >();
163
+
164
+ for (const run of oldRuns) {
165
+ const bucketStart = new Date(run.timestamp);
166
+ bucketStart.setMinutes(0, 0, 0);
167
+ const key = bucketStart.toISOString();
168
+
169
+ if (!buckets.has(key)) {
170
+ buckets.set(key, { bucketStart, runs: [], runIds: [] });
171
+ }
172
+ const bucket = buckets.get(key)!;
173
+ bucket.runs.push({
174
+ status: run.status,
175
+ latencyMs: run.latencyMs ?? undefined,
176
+ metadata: run.result ?? undefined,
177
+ });
178
+ bucket.runIds.push(run.id);
179
+ }
180
+
181
+ // Create aggregates and delete raw runs
182
+ for (const [, bucket] of buckets) {
183
+ // Calculate metrics
184
+ const runCount = bucket.runs.length;
185
+ let healthyCount = 0;
186
+ let degradedCount = 0;
187
+ let unhealthyCount = 0;
188
+ for (const r of bucket.runs) {
189
+ if (r.status === "healthy") healthyCount++;
190
+ if (r.status === "degraded") degradedCount++;
191
+ if (r.status === "unhealthy") unhealthyCount++;
192
+ }
193
+
194
+ const latencies = bucket.runs
195
+ .map((r) => r.latencyMs)
196
+ .filter((l): l is number => l !== undefined);
197
+
198
+ let avgLatencyMs: number | undefined;
199
+ let minLatencyMs: number | undefined;
200
+ let maxLatencyMs: number | undefined;
201
+ let p95LatencyMs: number | undefined;
202
+
203
+ if (latencies.length > 0) {
204
+ let sum = 0;
205
+ for (const l of latencies) sum += l;
206
+ avgLatencyMs = Math.round(sum / latencies.length);
207
+ minLatencyMs = Math.min(...latencies);
208
+ maxLatencyMs = Math.max(...latencies);
209
+ p95LatencyMs = calculatePercentile(latencies, 95);
210
+ }
211
+
212
+ // Aggregate result if strategy is available
213
+ let aggregatedResult: Record<string, unknown> | undefined;
214
+ if (strategy) {
215
+ aggregatedResult = strategy.aggregateResult(bucket.runs) as Record<
216
+ string,
217
+ unknown
218
+ >;
219
+ }
220
+
221
+ // Insert or update aggregate
222
+ await db
223
+ .insert(healthCheckAggregates)
224
+ .values({
225
+ configurationId,
226
+ systemId,
227
+ bucketStart: bucket.bucketStart,
228
+ bucketSize: "hourly",
229
+ runCount,
230
+ healthyCount,
231
+ degradedCount,
232
+ unhealthyCount,
233
+ avgLatencyMs,
234
+ minLatencyMs,
235
+ maxLatencyMs,
236
+ p95LatencyMs,
237
+ aggregatedResult,
238
+ })
239
+ .onConflictDoUpdate({
240
+ target: [
241
+ healthCheckAggregates.configurationId,
242
+ healthCheckAggregates.systemId,
243
+ healthCheckAggregates.bucketStart,
244
+ healthCheckAggregates.bucketSize,
245
+ ],
246
+ set: {
247
+ runCount: sql`${healthCheckAggregates.runCount} + ${runCount}`,
248
+ healthyCount: sql`${healthCheckAggregates.healthyCount} + ${healthyCount}`,
249
+ degradedCount: sql`${healthCheckAggregates.degradedCount} + ${degradedCount}`,
250
+ unhealthyCount: sql`${healthCheckAggregates.unhealthyCount} + ${unhealthyCount}`,
251
+ },
252
+ });
253
+
254
+ // Delete processed raw runs
255
+ for (const runId of bucket.runIds) {
256
+ await db.delete(healthCheckRuns).where(eq(healthCheckRuns.id, runId));
257
+ }
258
+ }
259
+ }
260
+
261
+ interface RollupParams {
262
+ db: Db;
263
+ systemId: string;
264
+ configurationId: string;
265
+ hourlyRetentionDays: number;
266
+ }
267
+
268
+ /**
269
+ * Rolls up hourly aggregates older than retention period into daily buckets
270
+ */
271
+ async function rollupHourlyAggregates(params: RollupParams) {
272
+ const { db, systemId, configurationId, hourlyRetentionDays } = params;
273
+
274
+ const cutoffDate = new Date();
275
+ cutoffDate.setDate(cutoffDate.getDate() - hourlyRetentionDays);
276
+ cutoffDate.setHours(0, 0, 0, 0); // Round to day
277
+
278
+ // Get old hourly aggregates
279
+ const oldHourly = await db
280
+ .select()
281
+ .from(healthCheckAggregates)
282
+ .where(
283
+ and(
284
+ eq(healthCheckAggregates.systemId, systemId),
285
+ eq(healthCheckAggregates.configurationId, configurationId),
286
+ eq(healthCheckAggregates.bucketSize, "hourly"),
287
+ lt(healthCheckAggregates.bucketStart, cutoffDate)
288
+ )
289
+ );
290
+
291
+ if (oldHourly.length === 0) return;
292
+
293
+ // Group by day
294
+ const dailyBuckets = new Map<
295
+ string,
296
+ {
297
+ bucketStart: Date;
298
+ aggregates: typeof oldHourly;
299
+ }
300
+ >();
301
+
302
+ for (const hourly of oldHourly) {
303
+ const dayStart = new Date(hourly.bucketStart);
304
+ dayStart.setHours(0, 0, 0, 0);
305
+ const key = dayStart.toISOString();
306
+
307
+ if (!dailyBuckets.has(key)) {
308
+ dailyBuckets.set(key, { bucketStart: dayStart, aggregates: [] });
309
+ }
310
+ dailyBuckets.get(key)!.aggregates.push(hourly);
311
+ }
312
+
313
+ // Create daily aggregates
314
+ for (const [, bucket] of dailyBuckets) {
315
+ let runCount = 0;
316
+ let healthyCount = 0;
317
+ let degradedCount = 0;
318
+ let unhealthyCount = 0;
319
+ let totalWeightedLatency = 0;
320
+
321
+ for (const a of bucket.aggregates) {
322
+ runCount += a.runCount;
323
+ healthyCount += a.healthyCount;
324
+ degradedCount += a.degradedCount;
325
+ unhealthyCount += a.unhealthyCount;
326
+ if (a.avgLatencyMs !== null) {
327
+ totalWeightedLatency += a.avgLatencyMs * a.runCount;
328
+ }
329
+ }
330
+
331
+ const avgLatencyMs =
332
+ runCount > 0 ? Math.round(totalWeightedLatency / runCount) : undefined;
333
+
334
+ // Min/max across all hourly buckets
335
+ const minValues = bucket.aggregates
336
+ .map((a) => a.minLatencyMs)
337
+ .filter((v): v is number => v !== null);
338
+ const maxValues = bucket.aggregates
339
+ .map((a) => a.maxLatencyMs)
340
+ .filter((v): v is number => v !== null);
341
+ const minLatencyMs =
342
+ minValues.length > 0 ? Math.min(...minValues) : undefined;
343
+ const maxLatencyMs =
344
+ maxValues.length > 0 ? Math.max(...maxValues) : undefined;
345
+
346
+ // Insert daily aggregate
347
+ await db.insert(healthCheckAggregates).values({
348
+ configurationId,
349
+ systemId,
350
+ bucketStart: bucket.bucketStart,
351
+ bucketSize: "daily",
352
+ runCount,
353
+ healthyCount,
354
+ degradedCount,
355
+ unhealthyCount,
356
+ avgLatencyMs,
357
+ minLatencyMs,
358
+ maxLatencyMs,
359
+ p95LatencyMs: undefined, // Cannot accurately combine p95s
360
+ aggregatedResult: undefined, // Cannot combine result across hours
361
+ });
362
+
363
+ // Delete processed hourly aggregates
364
+ for (const hourly of bucket.aggregates) {
365
+ await db
366
+ .delete(healthCheckAggregates)
367
+ .where(eq(healthCheckAggregates.id, hourly.id));
368
+ }
369
+ }
370
+ }
371
+
372
+ interface DeleteExpiredParams {
373
+ db: Db;
374
+ systemId: string;
375
+ configurationId: string;
376
+ dailyRetentionDays: number;
377
+ }
378
+
379
+ /**
380
+ * Deletes daily aggregates older than retention period
381
+ */
382
+ async function deleteExpiredAggregates(params: DeleteExpiredParams) {
383
+ const { db, systemId, configurationId, dailyRetentionDays } = params;
384
+
385
+ const cutoffDate = new Date();
386
+ cutoffDate.setDate(cutoffDate.getDate() - dailyRetentionDays);
387
+
388
+ await db
389
+ .delete(healthCheckAggregates)
390
+ .where(
391
+ and(
392
+ eq(healthCheckAggregates.systemId, systemId),
393
+ eq(healthCheckAggregates.configurationId, configurationId),
394
+ eq(healthCheckAggregates.bucketSize, "daily"),
395
+ lt(healthCheckAggregates.bucketStart, cutoffDate)
396
+ )
397
+ );
398
+ }
399
+
400
+ function calculatePercentile(values: number[], percentile: number): number {
401
+ const sorted = values.toSorted((a, b) => a - b);
402
+ const index = Math.ceil((percentile / 100) * sorted.length) - 1;
403
+ return sorted[Math.max(0, index)];
404
+ }
@@ -0,0 +1,81 @@
1
+ import { describe, it, expect, mock } from "bun:test";
2
+ import { createHealthCheckRouter } from "./router";
3
+ import { createMockRpcContext } from "@checkstack/backend-api";
4
+ import { call } from "@orpc/server";
5
+ import { z } from "zod";
6
+
7
+ describe("HealthCheck Router", () => {
8
+ const mockUser = {
9
+ type: "user" as const,
10
+ id: "test-user",
11
+ permissions: ["*"],
12
+ roles: ["admin"],
13
+ };
14
+
15
+ // Create a mock database with the methods used by HealthCheckService
16
+ const createSelectMock = () => {
17
+ const fromResult = Object.assign(Promise.resolve([]), {
18
+ where: mock(() => Promise.resolve([])),
19
+ });
20
+ return {
21
+ from: mock(() => fromResult),
22
+ };
23
+ };
24
+
25
+ const mockDb = {
26
+ select: mock(() => createSelectMock()),
27
+ insert: mock(() => ({
28
+ values: mock(() => ({
29
+ returning: mock(() => Promise.resolve([])),
30
+ })),
31
+ })),
32
+ query: {
33
+ healthCheckConfigurations: {
34
+ findFirst: mock(() => Promise.resolve(null)),
35
+ },
36
+ },
37
+ } as unknown;
38
+
39
+ const mockRegistry = {
40
+ register: mock(),
41
+ getStrategy: mock(),
42
+ getStrategies: mock(() => []),
43
+ };
44
+
45
+ const router = createHealthCheckRouter(mockDb as never, mockRegistry);
46
+
47
+ it("getStrategies returns strategies from registry", async () => {
48
+ const context = createMockRpcContext({
49
+ user: mockUser,
50
+ healthCheckRegistry: {
51
+ getStrategies: mock().mockReturnValue([
52
+ {
53
+ id: "http",
54
+ displayName: "HTTP",
55
+ description: "Check HTTP",
56
+ config: {
57
+ version: 1,
58
+ schema: z.object({}),
59
+ },
60
+ aggregatedResult: {
61
+ schema: z.object({}),
62
+ },
63
+ },
64
+ ]),
65
+ } as any,
66
+ });
67
+
68
+ const result = await call(router.getStrategies, undefined, { context });
69
+ expect(result).toHaveLength(1);
70
+ expect(result[0].id).toBe("http");
71
+ });
72
+
73
+ it("getConfigurations calls service", async () => {
74
+ const context = createMockRpcContext({
75
+ user: mockUser,
76
+ });
77
+
78
+ const result = await call(router.getConfigurations, undefined, { context });
79
+ expect(Array.isArray(result)).toBe(true);
80
+ });
81
+ });
package/src/router.ts ADDED
@@ -0,0 +1,157 @@
1
+ import { implement, ORPCError } from "@orpc/server";
2
+ import {
3
+ autoAuthMiddleware,
4
+ zod,
5
+ type RpcContext,
6
+ type HealthCheckRegistry,
7
+ } from "@checkstack/backend-api";
8
+ import { healthCheckContract } from "@checkstack/healthcheck-common";
9
+ import { HealthCheckService } from "./service";
10
+ import { NodePgDatabase } from "drizzle-orm/node-postgres";
11
+ import * as schema from "./schema";
12
+
13
+ /**
14
+ * Creates the healthcheck router using contract-based implementation.
15
+ *
16
+ * Auth and permissions are automatically enforced via autoAuthMiddleware
17
+ * based on the contract's meta.userType and meta.permissions.
18
+ */
19
+ export const createHealthCheckRouter = (
20
+ database: NodePgDatabase<typeof schema>,
21
+ registry: HealthCheckRegistry
22
+ ) => {
23
+ // Create service instance once - shared across all handlers
24
+ const service = new HealthCheckService(database, registry);
25
+
26
+ // Create contract implementer with context type AND auto auth middleware
27
+ const os = implement(healthCheckContract)
28
+ .$context<RpcContext>()
29
+ .use(autoAuthMiddleware);
30
+
31
+ return os.router({
32
+ getStrategies: os.getStrategies.handler(async ({ context }) => {
33
+ return context.healthCheckRegistry.getStrategies().map((s) => ({
34
+ id: s.id,
35
+ displayName: s.displayName,
36
+ description: s.description,
37
+ configSchema: zod.toJSONSchema(s.config.schema),
38
+ resultSchema: s.result ? zod.toJSONSchema(s.result.schema) : undefined,
39
+ aggregatedResultSchema: zod.toJSONSchema(s.aggregatedResult.schema),
40
+ }));
41
+ }),
42
+
43
+ getConfigurations: os.getConfigurations.handler(async () => {
44
+ return service.getConfigurations();
45
+ }),
46
+
47
+ createConfiguration: os.createConfiguration.handler(async ({ input }) => {
48
+ return service.createConfiguration(input);
49
+ }),
50
+
51
+ updateConfiguration: os.updateConfiguration.handler(async ({ input }) => {
52
+ const config = await service.updateConfiguration(input.id, input.body);
53
+ if (!config) {
54
+ throw new ORPCError("NOT_FOUND", {
55
+ message: "Configuration not found",
56
+ });
57
+ }
58
+ return config;
59
+ }),
60
+
61
+ deleteConfiguration: os.deleteConfiguration.handler(async ({ input }) => {
62
+ await service.deleteConfiguration(input);
63
+ }),
64
+
65
+ getSystemConfigurations: os.getSystemConfigurations.handler(
66
+ async ({ input }) => {
67
+ return service.getSystemConfigurations(input);
68
+ }
69
+ ),
70
+
71
+ getSystemAssociations: os.getSystemAssociations.handler(
72
+ async ({ input }) => {
73
+ return service.getSystemAssociations(input.systemId);
74
+ }
75
+ ),
76
+
77
+ associateSystem: os.associateSystem.handler(async ({ input, context }) => {
78
+ await service.associateSystem({
79
+ systemId: input.systemId,
80
+ configurationId: input.body.configurationId,
81
+ enabled: input.body.enabled,
82
+ stateThresholds: input.body.stateThresholds,
83
+ });
84
+
85
+ // If enabling the health check, schedule it immediately
86
+ if (input.body.enabled) {
87
+ const config = await service.getConfiguration(
88
+ input.body.configurationId
89
+ );
90
+ if (config) {
91
+ const { scheduleHealthCheck } = await import("./queue-executor");
92
+ await scheduleHealthCheck({
93
+ queueManager: context.queueManager,
94
+ payload: {
95
+ configId: config.id,
96
+ systemId: input.systemId,
97
+ },
98
+ intervalSeconds: config.intervalSeconds,
99
+ });
100
+ }
101
+ }
102
+ }),
103
+
104
+ disassociateSystem: os.disassociateSystem.handler(async ({ input }) => {
105
+ await service.disassociateSystem(input.systemId, input.configId);
106
+ }),
107
+
108
+ getRetentionConfig: os.getRetentionConfig.handler(async ({ input }) => {
109
+ return service.getRetentionConfig(input.systemId, input.configurationId);
110
+ }),
111
+
112
+ updateRetentionConfig: os.updateRetentionConfig.handler(
113
+ async ({ input }) => {
114
+ await service.updateRetentionConfig(
115
+ input.systemId,
116
+ input.configurationId,
117
+ input.retentionConfig
118
+ );
119
+ }
120
+ ),
121
+
122
+ getHistory: os.getHistory.handler(async ({ input }) => {
123
+ return service.getHistory(input);
124
+ }),
125
+
126
+ getDetailedHistory: os.getDetailedHistory.handler(async ({ input }) => {
127
+ return service.getDetailedHistory(input);
128
+ }),
129
+
130
+ getAggregatedHistory: os.getAggregatedHistory.handler(async ({ input }) => {
131
+ return service.getAggregatedHistory(input, {
132
+ includeAggregatedResult: false,
133
+ });
134
+ }),
135
+
136
+ getDetailedAggregatedHistory: os.getDetailedAggregatedHistory.handler(
137
+ async ({ input }) => {
138
+ return service.getAggregatedHistory(input, {
139
+ includeAggregatedResult: true,
140
+ });
141
+ }
142
+ ),
143
+ getSystemHealthStatus: os.getSystemHealthStatus.handler(
144
+ async ({ input }) => {
145
+ return service.getSystemHealthStatus(input.systemId);
146
+ }
147
+ ),
148
+
149
+ getSystemHealthOverview: os.getSystemHealthOverview.handler(
150
+ async ({ input }) => {
151
+ return service.getSystemHealthOverview(input.systemId);
152
+ }
153
+ ),
154
+ });
155
+ };
156
+
157
+ export type HealthCheckRouter = ReturnType<typeof createHealthCheckRouter>;