@checkstack/healthcheck-backend 1.1.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/service.ts CHANGED
@@ -6,7 +6,18 @@ import {
6
6
  HealthCheckStatus,
7
7
  RetentionConfig,
8
8
  type HealthCheckRunResult,
9
+ type NotificationPolicy,
10
+ NotificationPolicySchema,
11
+ DEFAULT_NOTIFICATION_POLICY,
9
12
  } from "@checkstack/healthcheck-common";
13
+ import type { ConfigService } from "@checkstack/backend-api";
14
+ import type { InferClient } from "@checkstack/common";
15
+ import type { CatalogApi } from "@checkstack/catalog-common";
16
+ import {
17
+ notificationDefaultsConfigV1,
18
+ NOTIFICATION_DEFAULTS_CONFIG_ID,
19
+ NOTIFICATION_DEFAULTS_CONFIG_VERSION,
20
+ } from "./notification-defaults-config";
10
21
  import {
11
22
  healthCheckConfigurations,
12
23
  systemHealthChecks,
@@ -15,7 +26,16 @@ import {
15
26
  VersionedStateThresholds,
16
27
  } from "./schema";
17
28
  import * as schema from "./schema";
18
- import { eq, and, InferSelectModel, desc, gte, lte, isNull } from "drizzle-orm";
29
+ import {
30
+ eq,
31
+ and,
32
+ InferSelectModel,
33
+ desc,
34
+ gte,
35
+ lte,
36
+ isNull,
37
+ inArray,
38
+ } from "drizzle-orm";
19
39
  import { ORPCError } from "@orpc/server";
20
40
  import { evaluateHealthStatus } from "./state-evaluator";
21
41
  import { stateThresholds } from "./state-thresholds-migrations";
@@ -38,6 +58,10 @@ import {
38
58
  // Drizzle type helper - uses SafeDatabase to prevent relational query API usage
39
59
  type Db = SafeDatabase<typeof schema>;
40
60
 
61
+ // Catalog client type used to resolve human-readable system names for
62
+ // satellite assignment run-context. Optional on the service.
63
+ type CatalogClient = InferClient<typeof CatalogApi>;
64
+
41
65
  interface SystemCheckStatus {
42
66
  configurationId: string;
43
67
  configurationName: string;
@@ -57,8 +81,62 @@ export class HealthCheckService {
57
81
  private db: Db,
58
82
  private registry: HealthCheckRegistry,
59
83
  private collectorRegistry: CollectorRegistry,
84
+ /**
85
+ * Optional — only required by code paths that resolve platform
86
+ * defaults (notification policy fallback). When absent, callers
87
+ * fall back to the compile-time `DEFAULT_NOTIFICATION_POLICY`.
88
+ * Kept optional so existing GitOps-only / test constructions don't
89
+ * have to plumb it through.
90
+ */
91
+ private configService?: ConfigService,
92
+ /**
93
+ * Optional — used to resolve human-readable system names when building
94
+ * satellite assignment run-context. When absent (e.g. GitOps-only /
95
+ * test constructions), `systemName` falls back to the `systemId`.
96
+ */
97
+ private catalogClient?: CatalogClient,
60
98
  ) {}
61
99
 
100
+ /**
101
+ * Resolve the platform-wide notification policy defaults. Returns
102
+ * the compile-time defaults when no `configService` was provided or
103
+ * nothing has ever been persisted. Stored values are passed through
104
+ * the schema so missing fields default in.
105
+ */
106
+ async getPlatformNotificationDefaults(): Promise<NotificationPolicy> {
107
+ if (!this.configService) {
108
+ return DEFAULT_NOTIFICATION_POLICY;
109
+ }
110
+ const stored = await this.configService.get(
111
+ NOTIFICATION_DEFAULTS_CONFIG_ID,
112
+ notificationDefaultsConfigV1,
113
+ NOTIFICATION_DEFAULTS_CONFIG_VERSION,
114
+ );
115
+ return stored ?? DEFAULT_NOTIFICATION_POLICY;
116
+ }
117
+
118
+ /**
119
+ * Persist platform-wide notification policy defaults. Per-assignment
120
+ * rows with `notificationPolicy = null` will read the new defaults
121
+ * on their next evaluation. In-flight auto-incidents are unaffected
122
+ * (their cooldown is snapshotted per-row at open time).
123
+ */
124
+ async setPlatformNotificationDefaults(
125
+ policy: NotificationPolicy,
126
+ ): Promise<void> {
127
+ if (!this.configService) {
128
+ throw new Error(
129
+ "ConfigService not configured; cannot persist platform notification defaults",
130
+ );
131
+ }
132
+ await this.configService.set(
133
+ NOTIFICATION_DEFAULTS_CONFIG_ID,
134
+ notificationDefaultsConfigV1,
135
+ NOTIFICATION_DEFAULTS_CONFIG_VERSION,
136
+ policy,
137
+ );
138
+ }
139
+
62
140
  async createConfiguration(
63
141
  data: CreateHealthCheckConfiguration,
64
142
  ): Promise<HealthCheckConfiguration> {
@@ -133,6 +211,7 @@ export class HealthCheckService {
133
211
  stateThresholds?: StateThresholds;
134
212
  satelliteIds?: string[];
135
213
  includeLocal?: boolean;
214
+ notificationPolicy?: NotificationPolicy;
136
215
  }) {
137
216
  const {
138
217
  systemId,
@@ -141,6 +220,7 @@ export class HealthCheckService {
141
220
  stateThresholds: stateThresholds_,
142
221
  satelliteIds,
143
222
  includeLocal = true,
223
+ notificationPolicy,
144
224
  } = props;
145
225
 
146
226
  // Wrap thresholds in versioned config if provided
@@ -156,6 +236,7 @@ export class HealthCheckService {
156
236
  stateThresholds: versionedThresholds,
157
237
  satelliteIds: satelliteIds ?? undefined,
158
238
  includeLocal,
239
+ notificationPolicy: notificationPolicy ?? undefined,
159
240
  })
160
241
  .onConflictDoUpdate({
161
242
  target: [
@@ -167,11 +248,41 @@ export class HealthCheckService {
167
248
  stateThresholds: versionedThresholds,
168
249
  satelliteIds: satelliteIds ?? undefined,
169
250
  includeLocal,
251
+ notificationPolicy: notificationPolicy ?? undefined,
170
252
  updatedAt: new Date(),
171
253
  },
172
254
  });
173
255
  }
174
256
 
257
+ /**
258
+ * Flip the `enabled` flag on an existing `systemHealthChecks` row
259
+ * without touching any of the other configuration (thresholds,
260
+ * satellite assignment, notification policy). Returns `true` when a
261
+ * row was updated, `false` when the assignment doesn't exist.
262
+ *
263
+ * Carved out so the automation actions `enable_assignment` /
264
+ * `disable_assignment` don't have to round-trip through
265
+ * `associateSystem` (which would otherwise wipe operator-managed
266
+ * fields when invoked with a sparse partial).
267
+ */
268
+ async setAssignmentEnabled(
269
+ systemId: string,
270
+ configurationId: string,
271
+ enabled: boolean,
272
+ ): Promise<boolean> {
273
+ const result = await this.db
274
+ .update(systemHealthChecks)
275
+ .set({ enabled, updatedAt: new Date() })
276
+ .where(
277
+ and(
278
+ eq(systemHealthChecks.systemId, systemId),
279
+ eq(systemHealthChecks.configurationId, configurationId),
280
+ ),
281
+ )
282
+ .returning({ systemId: systemHealthChecks.systemId });
283
+ return result.length > 0;
284
+ }
285
+
175
286
  async disassociateSystem(systemId: string, configurationId: string) {
176
287
  await this.db
177
288
  .delete(systemHealthChecks)
@@ -282,6 +393,7 @@ export class HealthCheckService {
282
393
  stateThresholds: systemHealthChecks.stateThresholds,
283
394
  satelliteIds: systemHealthChecks.satelliteIds,
284
395
  includeLocal: systemHealthChecks.includeLocal,
396
+ notificationPolicy: systemHealthChecks.notificationPolicy,
285
397
  })
286
398
  .from(systemHealthChecks)
287
399
  .innerJoin(
@@ -304,11 +416,55 @@ export class HealthCheckService {
304
416
  stateThresholds: thresholds,
305
417
  satelliteIds: row.satelliteIds ?? undefined,
306
418
  includeLocal: row.includeLocal,
419
+ notificationPolicy: row.notificationPolicy ?? undefined,
307
420
  });
308
421
  }
309
422
  return results;
310
423
  }
311
424
 
425
+ /**
426
+ * Resolve the fully-defaulted notification policy for a single
427
+ * (system, configuration) association. Resolution order:
428
+ *
429
+ * 1. Per-assignment override (`systemHealthChecks.notificationPolicy`)
430
+ * when non-null. Stored as a full policy; missing keys defaulted
431
+ * via zod parse.
432
+ * 2. Platform-wide defaults via `ConfigService`.
433
+ * 3. Compile-time `DEFAULT_NOTIFICATION_POLICY`.
434
+ *
435
+ * The all-or-nothing semantic is intentional: assignment rows are
436
+ * either fully-overridden or fully-inherited from the platform.
437
+ * Operators can revert an override by setting the row's policy to
438
+ * `null`, which is the "Use platform defaults" action in the UI.
439
+ */
440
+ async getAssignmentNotificationPolicy({
441
+ systemId,
442
+ configurationId,
443
+ }: {
444
+ systemId: string;
445
+ configurationId: string;
446
+ }): Promise<NotificationPolicy> {
447
+ const [row] = await this.db
448
+ .select({
449
+ notificationPolicy: systemHealthChecks.notificationPolicy,
450
+ })
451
+ .from(systemHealthChecks)
452
+ .where(
453
+ and(
454
+ eq(systemHealthChecks.systemId, systemId),
455
+ eq(systemHealthChecks.configurationId, configurationId),
456
+ ),
457
+ )
458
+ .limit(1);
459
+
460
+ // No assignment row → use platform defaults (the only sensible
461
+ // value for a configuration nothing has explicitly touched).
462
+ if (!row || row.notificationPolicy === null) {
463
+ return this.getPlatformNotificationDefaults();
464
+ }
465
+ return NotificationPolicySchema.parse(row.notificationPolicy);
466
+ }
467
+
312
468
  /**
313
469
  * Get the evaluated health status for a system based on configured thresholds.
314
470
  * Aggregates status from all health check configurations for this system.
@@ -489,6 +645,7 @@ export class HealthCheckService {
489
645
  startDate?: Date;
490
646
  endDate?: Date;
491
647
  sourceFilter?: string;
648
+ statusFilter?: HealthCheckStatus[];
492
649
  limit?: number;
493
650
  offset?: number;
494
651
  sortOrder: "asc" | "desc";
@@ -499,6 +656,7 @@ export class HealthCheckService {
499
656
  startDate,
500
657
  endDate,
501
658
  sourceFilter,
659
+ statusFilter,
502
660
  limit = 10,
503
661
  offset = 0,
504
662
  sortOrder,
@@ -518,6 +676,11 @@ export class HealthCheckService {
518
676
  conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
519
677
  }
520
678
 
679
+ // Status filtering (e.g. only failing runs)
680
+ if (statusFilter && statusFilter.length > 0) {
681
+ conditions.push(inArray(healthCheckRuns.status, statusFilter));
682
+ }
683
+
521
684
  // Build where clause
522
685
  const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
523
686
 
@@ -563,6 +726,7 @@ export class HealthCheckService {
563
726
  startDate?: Date;
564
727
  endDate?: Date;
565
728
  sourceFilter?: string;
729
+ statusFilter?: HealthCheckStatus[];
566
730
  limit?: number;
567
731
  offset?: number;
568
732
  sortOrder: "asc" | "desc";
@@ -573,6 +737,7 @@ export class HealthCheckService {
573
737
  startDate,
574
738
  endDate,
575
739
  sourceFilter,
740
+ statusFilter,
576
741
  limit = 10,
577
742
  offset = 0,
578
743
  sortOrder,
@@ -592,6 +757,11 @@ export class HealthCheckService {
592
757
  conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
593
758
  }
594
759
 
760
+ // Status filtering (e.g. only failing runs)
761
+ if (statusFilter && statusFilter.length > 0) {
762
+ conditions.push(inArray(healthCheckRuns.status, statusFilter));
763
+ }
764
+
595
765
  const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
596
766
  const total = await this.db.$count(healthCheckRuns, whereClause);
597
767
 
@@ -1068,6 +1238,27 @@ export class HealthCheckService {
1068
1238
 
1069
1239
  if (matchingAssociations.length === 0) return [];
1070
1240
 
1241
+ // Resolve human-readable system names once per distinct systemId.
1242
+ // Falls back to the systemId when no catalog client is wired or the
1243
+ // lookup fails, mirroring the queue-executor's resolution behaviour.
1244
+ const systemNameCache = new Map<string, string>();
1245
+ const resolveSystemName = async (systemId: string): Promise<string> => {
1246
+ const cached = systemNameCache.get(systemId);
1247
+ if (cached !== undefined) return cached;
1248
+
1249
+ let systemName = systemId;
1250
+ if (this.catalogClient) {
1251
+ try {
1252
+ const system = await this.catalogClient.getSystem({ systemId });
1253
+ if (system) systemName = system.name;
1254
+ } catch {
1255
+ // Fall back to systemId if catalog lookup fails.
1256
+ }
1257
+ }
1258
+ systemNameCache.set(systemId, systemName);
1259
+ return systemName;
1260
+ };
1261
+
1071
1262
  // Get configurations for each matching association
1072
1263
  const assignments = [];
1073
1264
  for (const assoc of matchingAssociations) {
@@ -1085,6 +1276,9 @@ export class HealthCheckService {
1085
1276
  config: config.config,
1086
1277
  collectors: config.collectors ?? undefined,
1087
1278
  intervalSeconds: config.intervalSeconds,
1279
+ // Curated run-context metadata exposed to satellite collectors.
1280
+ configName: config.name,
1281
+ systemName: await resolveSystemName(assoc.systemId),
1088
1282
  });
1089
1283
  }
1090
1284
 
package/tsconfig.json CHANGED
@@ -4,6 +4,9 @@
4
4
  "src"
5
5
  ],
6
6
  "references": [
7
+ {
8
+ "path": "../automation-backend"
9
+ },
7
10
  {
8
11
  "path": "../backend-api"
9
12
  },
@@ -38,10 +41,10 @@
38
41
  "path": "../healthcheck-common"
39
42
  },
40
43
  {
41
- "path": "../incident-common"
44
+ "path": "../incident-backend"
42
45
  },
43
46
  {
44
- "path": "../integration-backend"
47
+ "path": "../incident-common"
45
48
  },
46
49
  {
47
50
  "path": "../maintenance-common"