@checkstack/healthcheck-backend 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +181 -0
- package/drizzle/0000_stormy_slayback.sql +33 -0
- package/drizzle/0001_thin_shotgun.sql +1 -0
- package/drizzle/0002_closed_lucky_pierre.sql +19 -0
- package/drizzle/0003_powerful_rage.sql +1 -0
- package/drizzle/0004_short_ezekiel.sql +1 -0
- package/drizzle/0005_glossy_longshot.sql +1 -0
- package/drizzle/meta/0000_snapshot.json +234 -0
- package/drizzle/meta/0001_snapshot.json +240 -0
- package/drizzle/meta/0002_snapshot.json +361 -0
- package/drizzle/meta/0003_snapshot.json +367 -0
- package/drizzle/meta/0004_snapshot.json +401 -0
- package/drizzle/meta/0005_snapshot.json +401 -0
- package/drizzle/meta/_journal.json +48 -0
- package/drizzle.config.ts +7 -0
- package/package.json +37 -0
- package/src/aggregation.test.ts +373 -0
- package/src/hooks.test.ts +16 -0
- package/src/hooks.ts +35 -0
- package/src/index.ts +195 -0
- package/src/queue-executor.test.ts +229 -0
- package/src/queue-executor.ts +569 -0
- package/src/retention-job.ts +404 -0
- package/src/router.test.ts +81 -0
- package/src/router.ts +157 -0
- package/src/schema.ts +153 -0
- package/src/service.ts +718 -0
- package/src/state-evaluator.test.ts +237 -0
- package/src/state-evaluator.ts +105 -0
- package/src/state-thresholds-migrations.ts +15 -0
- package/tsconfig.json +6 -0
package/src/service.ts
ADDED
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
import {
|
|
2
|
+
HealthCheckConfiguration,
|
|
3
|
+
CreateHealthCheckConfiguration,
|
|
4
|
+
UpdateHealthCheckConfiguration,
|
|
5
|
+
StateThresholds,
|
|
6
|
+
HealthCheckStatus,
|
|
7
|
+
RetentionConfig,
|
|
8
|
+
} from "@checkstack/healthcheck-common";
|
|
9
|
+
import {
|
|
10
|
+
healthCheckConfigurations,
|
|
11
|
+
systemHealthChecks,
|
|
12
|
+
healthCheckRuns,
|
|
13
|
+
VersionedStateThresholds,
|
|
14
|
+
} from "./schema";
|
|
15
|
+
import * as schema from "./schema";
|
|
16
|
+
import { eq, and, InferSelectModel, desc, gte, lte } from "drizzle-orm";
|
|
17
|
+
import { NodePgDatabase } from "drizzle-orm/node-postgres";
|
|
18
|
+
import { ORPCError } from "@orpc/server";
|
|
19
|
+
import { evaluateHealthStatus } from "./state-evaluator";
|
|
20
|
+
import { stateThresholds } from "./state-thresholds-migrations";
|
|
21
|
+
import type { HealthCheckRegistry } from "@checkstack/backend-api";
|
|
22
|
+
|
|
23
|
+
// Drizzle type helper
|
|
24
|
+
type Db = NodePgDatabase<typeof schema>;
|
|
25
|
+
|
|
26
|
+
interface SystemCheckStatus {
|
|
27
|
+
configurationId: string;
|
|
28
|
+
configurationName: string;
|
|
29
|
+
status: HealthCheckStatus;
|
|
30
|
+
runsConsidered: number;
|
|
31
|
+
lastRunAt?: Date;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface SystemHealthStatusResponse {
|
|
35
|
+
status: HealthCheckStatus;
|
|
36
|
+
evaluatedAt: Date;
|
|
37
|
+
checkStatuses: SystemCheckStatus[];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class HealthCheckService {
|
|
41
|
+
constructor(private db: Db, private registry?: HealthCheckRegistry) {}
|
|
42
|
+
|
|
43
|
+
async createConfiguration(
|
|
44
|
+
data: CreateHealthCheckConfiguration
|
|
45
|
+
): Promise<HealthCheckConfiguration> {
|
|
46
|
+
const [config] = await this.db
|
|
47
|
+
.insert(healthCheckConfigurations)
|
|
48
|
+
.values({
|
|
49
|
+
name: data.name,
|
|
50
|
+
strategyId: data.strategyId,
|
|
51
|
+
config: data.config,
|
|
52
|
+
intervalSeconds: data.intervalSeconds,
|
|
53
|
+
isTemplate: false, // Defaulting for now
|
|
54
|
+
})
|
|
55
|
+
.returning();
|
|
56
|
+
return this.mapConfig(config);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async getConfiguration(
|
|
60
|
+
id: string
|
|
61
|
+
): Promise<HealthCheckConfiguration | undefined> {
|
|
62
|
+
const [config] = await this.db
|
|
63
|
+
.select()
|
|
64
|
+
.from(healthCheckConfigurations)
|
|
65
|
+
.where(eq(healthCheckConfigurations.id, id));
|
|
66
|
+
return config ? this.mapConfig(config) : undefined;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async updateConfiguration(
|
|
70
|
+
id: string,
|
|
71
|
+
data: UpdateHealthCheckConfiguration
|
|
72
|
+
): Promise<HealthCheckConfiguration | undefined> {
|
|
73
|
+
const [config] = await this.db
|
|
74
|
+
.update(healthCheckConfigurations)
|
|
75
|
+
.set({
|
|
76
|
+
...data,
|
|
77
|
+
updatedAt: new Date(),
|
|
78
|
+
})
|
|
79
|
+
.where(eq(healthCheckConfigurations.id, id))
|
|
80
|
+
.returning();
|
|
81
|
+
return config ? this.mapConfig(config) : undefined;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async deleteConfiguration(id: string): Promise<void> {
|
|
85
|
+
await this.db
|
|
86
|
+
.delete(healthCheckConfigurations)
|
|
87
|
+
.where(eq(healthCheckConfigurations.id, id));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async getConfigurations(): Promise<HealthCheckConfiguration[]> {
|
|
91
|
+
const configs = await this.db.select().from(healthCheckConfigurations);
|
|
92
|
+
return configs.map((c) => this.mapConfig(c));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async associateSystem(props: {
|
|
96
|
+
systemId: string;
|
|
97
|
+
configurationId: string;
|
|
98
|
+
enabled?: boolean;
|
|
99
|
+
stateThresholds?: StateThresholds;
|
|
100
|
+
}) {
|
|
101
|
+
const {
|
|
102
|
+
systemId,
|
|
103
|
+
configurationId,
|
|
104
|
+
enabled = true,
|
|
105
|
+
stateThresholds: stateThresholds_,
|
|
106
|
+
} = props;
|
|
107
|
+
|
|
108
|
+
// Wrap thresholds in versioned config if provided
|
|
109
|
+
const versionedThresholds: VersionedStateThresholds | undefined =
|
|
110
|
+
stateThresholds_ ? stateThresholds.create(stateThresholds_) : undefined;
|
|
111
|
+
|
|
112
|
+
await this.db
|
|
113
|
+
.insert(systemHealthChecks)
|
|
114
|
+
.values({
|
|
115
|
+
systemId,
|
|
116
|
+
configurationId,
|
|
117
|
+
enabled,
|
|
118
|
+
stateThresholds: versionedThresholds,
|
|
119
|
+
})
|
|
120
|
+
.onConflictDoUpdate({
|
|
121
|
+
target: [
|
|
122
|
+
systemHealthChecks.systemId,
|
|
123
|
+
systemHealthChecks.configurationId,
|
|
124
|
+
],
|
|
125
|
+
set: {
|
|
126
|
+
enabled,
|
|
127
|
+
stateThresholds: versionedThresholds,
|
|
128
|
+
updatedAt: new Date(),
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
async disassociateSystem(systemId: string, configurationId: string) {
|
|
134
|
+
await this.db
|
|
135
|
+
.delete(systemHealthChecks)
|
|
136
|
+
.where(
|
|
137
|
+
and(
|
|
138
|
+
eq(systemHealthChecks.systemId, systemId),
|
|
139
|
+
eq(systemHealthChecks.configurationId, configurationId)
|
|
140
|
+
)
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Get retention configuration for a health check assignment.
|
|
146
|
+
*/
|
|
147
|
+
async getRetentionConfig(
|
|
148
|
+
systemId: string,
|
|
149
|
+
configurationId: string
|
|
150
|
+
): Promise<{ retentionConfig: RetentionConfig | null }> {
|
|
151
|
+
const row = await this.db
|
|
152
|
+
.select({ retentionConfig: systemHealthChecks.retentionConfig })
|
|
153
|
+
.from(systemHealthChecks)
|
|
154
|
+
.where(
|
|
155
|
+
and(
|
|
156
|
+
eq(systemHealthChecks.systemId, systemId),
|
|
157
|
+
eq(systemHealthChecks.configurationId, configurationId)
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
.then((rows) => rows[0]);
|
|
161
|
+
|
|
162
|
+
// eslint-disable-next-line unicorn/no-null -- RPC contract uses nullable()
|
|
163
|
+
return { retentionConfig: row?.retentionConfig ?? null };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Update retention configuration for a health check assignment.
|
|
168
|
+
*/
|
|
169
|
+
async updateRetentionConfig(
|
|
170
|
+
systemId: string,
|
|
171
|
+
configurationId: string,
|
|
172
|
+
retentionConfig: RetentionConfig | null
|
|
173
|
+
): Promise<void> {
|
|
174
|
+
// Validate retention hierarchy: raw < hourly < daily
|
|
175
|
+
if (retentionConfig) {
|
|
176
|
+
if (
|
|
177
|
+
retentionConfig.rawRetentionDays >= retentionConfig.hourlyRetentionDays
|
|
178
|
+
) {
|
|
179
|
+
throw new ORPCError("BAD_REQUEST", {
|
|
180
|
+
message: "Raw retention must be less than hourly retention",
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
if (
|
|
184
|
+
retentionConfig.hourlyRetentionDays >=
|
|
185
|
+
retentionConfig.dailyRetentionDays
|
|
186
|
+
) {
|
|
187
|
+
throw new ORPCError("BAD_REQUEST", {
|
|
188
|
+
message: "Hourly retention must be less than daily retention",
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
await this.db
|
|
194
|
+
.update(systemHealthChecks)
|
|
195
|
+
.set({ retentionConfig, updatedAt: new Date() })
|
|
196
|
+
.where(
|
|
197
|
+
and(
|
|
198
|
+
eq(systemHealthChecks.systemId, systemId),
|
|
199
|
+
eq(systemHealthChecks.configurationId, configurationId)
|
|
200
|
+
)
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Remove all health check associations for a system.
|
|
206
|
+
* Called when a system is deleted from the catalog.
|
|
207
|
+
*/
|
|
208
|
+
async removeAllSystemAssociations(systemId: string) {
|
|
209
|
+
await this.db
|
|
210
|
+
.delete(systemHealthChecks)
|
|
211
|
+
.where(eq(systemHealthChecks.systemId, systemId));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async getSystemConfigurations(
|
|
215
|
+
systemId: string
|
|
216
|
+
): Promise<HealthCheckConfiguration[]> {
|
|
217
|
+
const rows = await this.db
|
|
218
|
+
.select({
|
|
219
|
+
config: healthCheckConfigurations,
|
|
220
|
+
})
|
|
221
|
+
.from(systemHealthChecks)
|
|
222
|
+
.innerJoin(
|
|
223
|
+
healthCheckConfigurations,
|
|
224
|
+
eq(systemHealthChecks.configurationId, healthCheckConfigurations.id)
|
|
225
|
+
)
|
|
226
|
+
.where(eq(systemHealthChecks.systemId, systemId));
|
|
227
|
+
|
|
228
|
+
return rows.map((r) => this.mapConfig(r.config));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Get system associations with their threshold configurations.
|
|
233
|
+
*/
|
|
234
|
+
async getSystemAssociations(systemId: string) {
|
|
235
|
+
const rows = await this.db
|
|
236
|
+
.select({
|
|
237
|
+
configurationId: systemHealthChecks.configurationId,
|
|
238
|
+
configName: healthCheckConfigurations.name,
|
|
239
|
+
enabled: systemHealthChecks.enabled,
|
|
240
|
+
stateThresholds: systemHealthChecks.stateThresholds,
|
|
241
|
+
})
|
|
242
|
+
.from(systemHealthChecks)
|
|
243
|
+
.innerJoin(
|
|
244
|
+
healthCheckConfigurations,
|
|
245
|
+
eq(systemHealthChecks.configurationId, healthCheckConfigurations.id)
|
|
246
|
+
)
|
|
247
|
+
.where(eq(systemHealthChecks.systemId, systemId));
|
|
248
|
+
|
|
249
|
+
// Migrate and extract thresholds for each association
|
|
250
|
+
const results = [];
|
|
251
|
+
for (const row of rows) {
|
|
252
|
+
let thresholds: StateThresholds | undefined;
|
|
253
|
+
if (row.stateThresholds) {
|
|
254
|
+
thresholds = await stateThresholds.parse(row.stateThresholds);
|
|
255
|
+
}
|
|
256
|
+
results.push({
|
|
257
|
+
configurationId: row.configurationId,
|
|
258
|
+
configurationName: row.configName,
|
|
259
|
+
enabled: row.enabled,
|
|
260
|
+
stateThresholds: thresholds,
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
return results;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Get the evaluated health status for a system based on configured thresholds.
|
|
268
|
+
* Aggregates status from all health check configurations for this system.
|
|
269
|
+
*/
|
|
270
|
+
async getSystemHealthStatus(
|
|
271
|
+
systemId: string
|
|
272
|
+
): Promise<SystemHealthStatusResponse> {
|
|
273
|
+
// Get all associations for this system with their thresholds and config names
|
|
274
|
+
const associations = await this.db
|
|
275
|
+
.select({
|
|
276
|
+
configurationId: systemHealthChecks.configurationId,
|
|
277
|
+
stateThresholds: systemHealthChecks.stateThresholds,
|
|
278
|
+
configName: healthCheckConfigurations.name,
|
|
279
|
+
enabled: systemHealthChecks.enabled,
|
|
280
|
+
})
|
|
281
|
+
.from(systemHealthChecks)
|
|
282
|
+
.innerJoin(
|
|
283
|
+
healthCheckConfigurations,
|
|
284
|
+
eq(systemHealthChecks.configurationId, healthCheckConfigurations.id)
|
|
285
|
+
)
|
|
286
|
+
.where(
|
|
287
|
+
and(
|
|
288
|
+
eq(systemHealthChecks.systemId, systemId),
|
|
289
|
+
eq(systemHealthChecks.enabled, true)
|
|
290
|
+
)
|
|
291
|
+
);
|
|
292
|
+
|
|
293
|
+
if (associations.length === 0) {
|
|
294
|
+
// No health checks configured - default healthy
|
|
295
|
+
return {
|
|
296
|
+
status: "healthy",
|
|
297
|
+
evaluatedAt: new Date(),
|
|
298
|
+
checkStatuses: [],
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// For each association, get recent runs and evaluate status
|
|
303
|
+
const checkStatuses: SystemCheckStatus[] = [];
|
|
304
|
+
const maxWindowSize = 100; // Max configurable window size
|
|
305
|
+
|
|
306
|
+
for (const assoc of associations) {
|
|
307
|
+
const runs = await this.db
|
|
308
|
+
.select({
|
|
309
|
+
status: healthCheckRuns.status,
|
|
310
|
+
timestamp: healthCheckRuns.timestamp,
|
|
311
|
+
})
|
|
312
|
+
.from(healthCheckRuns)
|
|
313
|
+
.where(
|
|
314
|
+
and(
|
|
315
|
+
eq(healthCheckRuns.systemId, systemId),
|
|
316
|
+
eq(healthCheckRuns.configurationId, assoc.configurationId)
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
.orderBy(desc(healthCheckRuns.timestamp))
|
|
320
|
+
.limit(maxWindowSize);
|
|
321
|
+
|
|
322
|
+
// Extract and migrate thresholds from versioned config
|
|
323
|
+
let thresholds: StateThresholds | undefined;
|
|
324
|
+
if (assoc.stateThresholds) {
|
|
325
|
+
thresholds = await stateThresholds.parse(assoc.stateThresholds);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const status = evaluateHealthStatus({ runs, thresholds });
|
|
329
|
+
|
|
330
|
+
checkStatuses.push({
|
|
331
|
+
configurationId: assoc.configurationId,
|
|
332
|
+
configurationName: assoc.configName,
|
|
333
|
+
status,
|
|
334
|
+
runsConsidered: runs.length,
|
|
335
|
+
lastRunAt: runs[0]?.timestamp,
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Aggregate status: worst status wins (unhealthy > degraded > healthy)
|
|
340
|
+
let aggregateStatus: HealthCheckStatus = "healthy";
|
|
341
|
+
for (const cs of checkStatuses) {
|
|
342
|
+
if (cs.status === "unhealthy") {
|
|
343
|
+
aggregateStatus = "unhealthy";
|
|
344
|
+
break; // Can't get worse
|
|
345
|
+
}
|
|
346
|
+
if (cs.status === "degraded") {
|
|
347
|
+
aggregateStatus = "degraded";
|
|
348
|
+
// Don't break - keep looking for unhealthy
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return {
|
|
353
|
+
status: aggregateStatus,
|
|
354
|
+
evaluatedAt: new Date(),
|
|
355
|
+
checkStatuses,
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Get comprehensive health overview for a system.
|
|
361
|
+
* Returns all health checks with their last 25 runs for sparkline visualization.
|
|
362
|
+
*/
|
|
363
|
+
async getSystemHealthOverview(systemId: string) {
|
|
364
|
+
// Get all associations with config details
|
|
365
|
+
const associations = await this.db
|
|
366
|
+
.select({
|
|
367
|
+
configurationId: systemHealthChecks.configurationId,
|
|
368
|
+
configName: healthCheckConfigurations.name,
|
|
369
|
+
strategyId: healthCheckConfigurations.strategyId,
|
|
370
|
+
intervalSeconds: healthCheckConfigurations.intervalSeconds,
|
|
371
|
+
enabled: systemHealthChecks.enabled,
|
|
372
|
+
stateThresholds: systemHealthChecks.stateThresholds,
|
|
373
|
+
})
|
|
374
|
+
.from(systemHealthChecks)
|
|
375
|
+
.innerJoin(
|
|
376
|
+
healthCheckConfigurations,
|
|
377
|
+
eq(systemHealthChecks.configurationId, healthCheckConfigurations.id)
|
|
378
|
+
)
|
|
379
|
+
.where(eq(systemHealthChecks.systemId, systemId));
|
|
380
|
+
|
|
381
|
+
const checks = [];
|
|
382
|
+
const sparklineLimit = 25;
|
|
383
|
+
|
|
384
|
+
for (const assoc of associations) {
|
|
385
|
+
// Get last 25 runs for sparkline
|
|
386
|
+
const runs = await this.db
|
|
387
|
+
.select({
|
|
388
|
+
id: healthCheckRuns.id,
|
|
389
|
+
status: healthCheckRuns.status,
|
|
390
|
+
timestamp: healthCheckRuns.timestamp,
|
|
391
|
+
})
|
|
392
|
+
.from(healthCheckRuns)
|
|
393
|
+
.where(
|
|
394
|
+
and(
|
|
395
|
+
eq(healthCheckRuns.systemId, systemId),
|
|
396
|
+
eq(healthCheckRuns.configurationId, assoc.configurationId)
|
|
397
|
+
)
|
|
398
|
+
)
|
|
399
|
+
.orderBy(desc(healthCheckRuns.timestamp))
|
|
400
|
+
.limit(sparklineLimit);
|
|
401
|
+
|
|
402
|
+
// Migrate and extract thresholds
|
|
403
|
+
let thresholds: StateThresholds | undefined;
|
|
404
|
+
if (assoc.stateThresholds) {
|
|
405
|
+
thresholds = await stateThresholds.parse(assoc.stateThresholds);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Evaluate current status
|
|
409
|
+
const status = evaluateHealthStatus({
|
|
410
|
+
runs: runs as Array<{ status: HealthCheckStatus; timestamp: Date }>,
|
|
411
|
+
thresholds,
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
checks.push({
|
|
415
|
+
configurationId: assoc.configurationId,
|
|
416
|
+
configurationName: assoc.configName,
|
|
417
|
+
strategyId: assoc.strategyId,
|
|
418
|
+
intervalSeconds: assoc.intervalSeconds,
|
|
419
|
+
enabled: assoc.enabled,
|
|
420
|
+
status,
|
|
421
|
+
stateThresholds: thresholds,
|
|
422
|
+
recentRuns: runs.map((r) => ({
|
|
423
|
+
id: r.id,
|
|
424
|
+
status: r.status,
|
|
425
|
+
timestamp: r.timestamp,
|
|
426
|
+
})),
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return { systemId, checks };
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Get paginated health check run history (public - no result data).
|
|
435
|
+
*/
|
|
436
|
+
async getHistory(props: {
|
|
437
|
+
systemId?: string;
|
|
438
|
+
configurationId?: string;
|
|
439
|
+
startDate?: Date;
|
|
440
|
+
endDate?: Date;
|
|
441
|
+
limit?: number;
|
|
442
|
+
offset?: number;
|
|
443
|
+
}) {
|
|
444
|
+
const {
|
|
445
|
+
systemId,
|
|
446
|
+
configurationId,
|
|
447
|
+
startDate,
|
|
448
|
+
endDate,
|
|
449
|
+
limit = 10,
|
|
450
|
+
offset = 0,
|
|
451
|
+
} = props;
|
|
452
|
+
|
|
453
|
+
const conditions = [];
|
|
454
|
+
if (systemId) conditions.push(eq(healthCheckRuns.systemId, systemId));
|
|
455
|
+
if (configurationId)
|
|
456
|
+
conditions.push(eq(healthCheckRuns.configurationId, configurationId));
|
|
457
|
+
if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
|
|
458
|
+
if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
|
|
459
|
+
|
|
460
|
+
// Build where clause
|
|
461
|
+
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
462
|
+
|
|
463
|
+
// Get total count using drizzle $count
|
|
464
|
+
const total = await this.db.$count(healthCheckRuns, whereClause);
|
|
465
|
+
|
|
466
|
+
// Get paginated runs
|
|
467
|
+
let query = this.db.select().from(healthCheckRuns);
|
|
468
|
+
if (whereClause) {
|
|
469
|
+
// @ts-expect-error drizzle-orm type mismatch
|
|
470
|
+
query = query.where(whereClause);
|
|
471
|
+
}
|
|
472
|
+
const runs = await query
|
|
473
|
+
.orderBy(desc(healthCheckRuns.timestamp))
|
|
474
|
+
.limit(limit)
|
|
475
|
+
.offset(offset);
|
|
476
|
+
|
|
477
|
+
// Return without result field for public access (latencyMs is public data)
|
|
478
|
+
return {
|
|
479
|
+
runs: runs.map((run) => ({
|
|
480
|
+
id: run.id,
|
|
481
|
+
configurationId: run.configurationId,
|
|
482
|
+
systemId: run.systemId,
|
|
483
|
+
status: run.status,
|
|
484
|
+
timestamp: run.timestamp,
|
|
485
|
+
latencyMs: run.latencyMs ?? undefined,
|
|
486
|
+
})),
|
|
487
|
+
total,
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Get detailed health check run history with full result data.
|
|
493
|
+
* Restricted to users with manage permission.
|
|
494
|
+
*/
|
|
495
|
+
async getDetailedHistory(props: {
|
|
496
|
+
systemId?: string;
|
|
497
|
+
configurationId?: string;
|
|
498
|
+
startDate?: Date;
|
|
499
|
+
endDate?: Date;
|
|
500
|
+
limit?: number;
|
|
501
|
+
offset?: number;
|
|
502
|
+
}) {
|
|
503
|
+
const {
|
|
504
|
+
systemId,
|
|
505
|
+
configurationId,
|
|
506
|
+
startDate,
|
|
507
|
+
endDate,
|
|
508
|
+
limit = 10,
|
|
509
|
+
offset = 0,
|
|
510
|
+
} = props;
|
|
511
|
+
|
|
512
|
+
const conditions = [];
|
|
513
|
+
if (systemId) conditions.push(eq(healthCheckRuns.systemId, systemId));
|
|
514
|
+
if (configurationId)
|
|
515
|
+
conditions.push(eq(healthCheckRuns.configurationId, configurationId));
|
|
516
|
+
if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
|
|
517
|
+
if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
|
|
518
|
+
|
|
519
|
+
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
520
|
+
const total = await this.db.$count(healthCheckRuns, whereClause);
|
|
521
|
+
|
|
522
|
+
let query = this.db.select().from(healthCheckRuns);
|
|
523
|
+
if (whereClause) {
|
|
524
|
+
// @ts-expect-error drizzle-orm type mismatch
|
|
525
|
+
query = query.where(whereClause);
|
|
526
|
+
}
|
|
527
|
+
const runs = await query
|
|
528
|
+
.orderBy(desc(healthCheckRuns.timestamp))
|
|
529
|
+
.limit(limit)
|
|
530
|
+
.offset(offset);
|
|
531
|
+
|
|
532
|
+
// Return with full result data for manage permission
|
|
533
|
+
return {
|
|
534
|
+
runs: runs.map((run) => ({
|
|
535
|
+
id: run.id,
|
|
536
|
+
configurationId: run.configurationId,
|
|
537
|
+
systemId: run.systemId,
|
|
538
|
+
status: run.status,
|
|
539
|
+
result: run.result ?? {},
|
|
540
|
+
timestamp: run.timestamp,
|
|
541
|
+
latencyMs: run.latencyMs ?? undefined,
|
|
542
|
+
})),
|
|
543
|
+
total,
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Get aggregated health check history with bucketed metrics.
|
|
549
|
+
* Currently aggregates raw data on-the-fly. Will merge with stored aggregates
|
|
550
|
+
* once the retention job populates historical data.
|
|
551
|
+
*/
|
|
552
|
+
async getAggregatedHistory(
|
|
553
|
+
props: {
|
|
554
|
+
systemId: string;
|
|
555
|
+
configurationId: string;
|
|
556
|
+
startDate: Date;
|
|
557
|
+
endDate: Date;
|
|
558
|
+
bucketSize: "hourly" | "daily" | "auto";
|
|
559
|
+
},
|
|
560
|
+
options: { includeAggregatedResult: boolean }
|
|
561
|
+
) {
|
|
562
|
+
const { systemId, configurationId, startDate, endDate } = props;
|
|
563
|
+
let bucketSize = props.bucketSize;
|
|
564
|
+
|
|
565
|
+
// Auto-select bucket size based on range
|
|
566
|
+
if (bucketSize === "auto") {
|
|
567
|
+
const diffDays =
|
|
568
|
+
(endDate.getTime() - startDate.getTime()) / (1000 * 60 * 60 * 24);
|
|
569
|
+
bucketSize = diffDays > 7 ? "daily" : "hourly";
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// Get the configuration to find the strategy
|
|
573
|
+
const config = await this.db.query.healthCheckConfigurations.findFirst({
|
|
574
|
+
where: eq(healthCheckConfigurations.id, configurationId),
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
// Look up strategy for aggregateResult function (only if needed)
|
|
578
|
+
const strategy =
|
|
579
|
+
options.includeAggregatedResult && config && this.registry
|
|
580
|
+
? this.registry.getStrategy(config.strategyId)
|
|
581
|
+
: undefined;
|
|
582
|
+
|
|
583
|
+
// Query raw runs within the date range (including result for metadata)
|
|
584
|
+
const runs = await this.db
|
|
585
|
+
.select()
|
|
586
|
+
.from(healthCheckRuns)
|
|
587
|
+
.where(
|
|
588
|
+
and(
|
|
589
|
+
eq(healthCheckRuns.systemId, systemId),
|
|
590
|
+
eq(healthCheckRuns.configurationId, configurationId),
|
|
591
|
+
gte(healthCheckRuns.timestamp, startDate),
|
|
592
|
+
lte(healthCheckRuns.timestamp, endDate)
|
|
593
|
+
)
|
|
594
|
+
)
|
|
595
|
+
.orderBy(healthCheckRuns.timestamp);
|
|
596
|
+
|
|
597
|
+
// Group runs into buckets (with full result for metadata aggregation)
|
|
598
|
+
const bucketMap = new Map<
|
|
599
|
+
string,
|
|
600
|
+
{
|
|
601
|
+
bucketStart: Date;
|
|
602
|
+
runs: Array<{
|
|
603
|
+
status: "healthy" | "unhealthy" | "degraded";
|
|
604
|
+
latencyMs: number | undefined;
|
|
605
|
+
metadata?: Record<string, unknown>;
|
|
606
|
+
}>;
|
|
607
|
+
}
|
|
608
|
+
>();
|
|
609
|
+
|
|
610
|
+
for (const run of runs) {
|
|
611
|
+
const bucketStart = this.getBucketStart(run.timestamp, bucketSize);
|
|
612
|
+
const key = bucketStart.toISOString();
|
|
613
|
+
|
|
614
|
+
if (!bucketMap.has(key)) {
|
|
615
|
+
bucketMap.set(key, { bucketStart, runs: [] });
|
|
616
|
+
}
|
|
617
|
+
bucketMap.get(key)!.runs.push({
|
|
618
|
+
status: run.status,
|
|
619
|
+
latencyMs: run.latencyMs ?? undefined,
|
|
620
|
+
metadata: run.result ?? undefined,
|
|
621
|
+
});
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Calculate metrics for each bucket
|
|
625
|
+
const buckets = [...bucketMap.values()].map((bucket) => {
|
|
626
|
+
const runCount = bucket.runs.length;
|
|
627
|
+
const healthyCount = bucket.runs.filter(
|
|
628
|
+
(r) => r.status === "healthy"
|
|
629
|
+
).length;
|
|
630
|
+
const degradedCount = bucket.runs.filter(
|
|
631
|
+
(r) => r.status === "degraded"
|
|
632
|
+
).length;
|
|
633
|
+
const unhealthyCount = bucket.runs.filter(
|
|
634
|
+
(r) => r.status === "unhealthy"
|
|
635
|
+
).length;
|
|
636
|
+
const successRate = runCount > 0 ? healthyCount / runCount : 0;
|
|
637
|
+
|
|
638
|
+
const latencies = bucket.runs
|
|
639
|
+
.map((r) => r.latencyMs)
|
|
640
|
+
.filter((l): l is number => l !== null);
|
|
641
|
+
const avgLatencyMs =
|
|
642
|
+
latencies.length > 0
|
|
643
|
+
? Math.round(latencies.reduce((a, b) => a + b, 0) / latencies.length)
|
|
644
|
+
: undefined;
|
|
645
|
+
const minLatencyMs =
|
|
646
|
+
latencies.length > 0 ? Math.min(...latencies) : undefined;
|
|
647
|
+
const maxLatencyMs =
|
|
648
|
+
latencies.length > 0 ? Math.max(...latencies) : undefined;
|
|
649
|
+
const p95LatencyMs =
|
|
650
|
+
latencies.length > 0
|
|
651
|
+
? this.calculatePercentile(latencies, 95)
|
|
652
|
+
: undefined;
|
|
653
|
+
|
|
654
|
+
// Build base bucket (always included)
|
|
655
|
+
const baseBucket = {
|
|
656
|
+
bucketStart: bucket.bucketStart,
|
|
657
|
+
bucketSize: bucketSize as "hourly" | "daily",
|
|
658
|
+
runCount,
|
|
659
|
+
healthyCount,
|
|
660
|
+
degradedCount,
|
|
661
|
+
unhealthyCount,
|
|
662
|
+
successRate,
|
|
663
|
+
avgLatencyMs,
|
|
664
|
+
minLatencyMs,
|
|
665
|
+
maxLatencyMs,
|
|
666
|
+
p95LatencyMs,
|
|
667
|
+
};
|
|
668
|
+
|
|
669
|
+
// Only include aggregatedResult if requested and strategy is available
|
|
670
|
+
if (options.includeAggregatedResult && strategy) {
|
|
671
|
+
return {
|
|
672
|
+
...baseBucket,
|
|
673
|
+
aggregatedResult: strategy.aggregateResult(bucket.runs) as Record<
|
|
674
|
+
string,
|
|
675
|
+
unknown
|
|
676
|
+
>,
|
|
677
|
+
};
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
return baseBucket;
|
|
681
|
+
});
|
|
682
|
+
|
|
683
|
+
return { buckets };
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
private getBucketStart(
|
|
687
|
+
timestamp: Date,
|
|
688
|
+
bucketSize: "hourly" | "daily"
|
|
689
|
+
): Date {
|
|
690
|
+
const date = new Date(timestamp);
|
|
691
|
+
if (bucketSize === "daily") {
|
|
692
|
+
date.setHours(0, 0, 0, 0);
|
|
693
|
+
} else {
|
|
694
|
+
date.setMinutes(0, 0, 0);
|
|
695
|
+
}
|
|
696
|
+
return date;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
private calculatePercentile(values: number[], percentile: number): number {
|
|
700
|
+
const sorted = values.toSorted((a, b) => a - b);
|
|
701
|
+
const index = Math.ceil((percentile / 100) * sorted.length) - 1;
|
|
702
|
+
return sorted[Math.max(0, index)];
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
private mapConfig(
|
|
706
|
+
row: InferSelectModel<typeof healthCheckConfigurations>
|
|
707
|
+
): HealthCheckConfiguration {
|
|
708
|
+
return {
|
|
709
|
+
id: row.id,
|
|
710
|
+
name: row.name,
|
|
711
|
+
strategyId: row.strategyId,
|
|
712
|
+
config: row.config,
|
|
713
|
+
intervalSeconds: row.intervalSeconds,
|
|
714
|
+
createdAt: row.createdAt,
|
|
715
|
+
updatedAt: row.updatedAt,
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
}
|