@checkstack/healthcheck-backend 1.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +99 -0
- package/drizzle/0012_fair_boomer.sql +1 -0
- package/drizzle/0013_clean_fabian_cortez.sql +20 -0
- package/drizzle/0014_chilly_ultragirl.sql +2 -0
- package/drizzle/meta/0012_snapshot.json +447 -0
- package/drizzle/meta/0013_snapshot.json +615 -0
- package/drizzle/meta/0014_snapshot.json +648 -0
- package/drizzle/meta/_journal.json +21 -0
- package/package.json +21 -20
- package/src/auto-incident-close-job.ts +164 -0
- package/src/auto-incident.test.ts +196 -0
- package/src/auto-incident.ts +332 -0
- package/src/healthcheck-gitops-kinds.test.ts +93 -0
- package/src/healthcheck-gitops-kinds.ts +34 -0
- package/src/index.ts +43 -0
- package/src/notification-defaults-config.ts +10 -0
- package/src/notification-policy.test.ts +104 -0
- package/src/notification-policy.ts +56 -0
- package/src/queue-executor.ts +304 -15
- package/src/router.test.ts +7 -0
- package/src/router.ts +21 -2
- package/src/schema.ts +76 -0
- package/src/service-notification-policy.test.ts +174 -0
- package/src/service.ts +130 -1
- package/tsconfig.json +3 -0
package/src/schema.ts
CHANGED
|
@@ -9,10 +9,12 @@ import {
|
|
|
9
9
|
timestamp,
|
|
10
10
|
primaryKey,
|
|
11
11
|
unique,
|
|
12
|
+
index,
|
|
12
13
|
} from "drizzle-orm/pg-core";
|
|
13
14
|
import type {
|
|
14
15
|
StateThresholds,
|
|
15
16
|
CollectorConfigEntry,
|
|
17
|
+
NotificationPolicy,
|
|
16
18
|
} from "@checkstack/healthcheck-common";
|
|
17
19
|
import type { VersionedRecord } from "@checkstack/backend-api";
|
|
18
20
|
|
|
@@ -100,6 +102,12 @@ export const systemHealthChecks = pgTable(
|
|
|
100
102
|
* Defaults to true. Only relevant when satelliteIds is set.
|
|
101
103
|
*/
|
|
102
104
|
includeLocal: boolean("include_local").default(true).notNull(),
|
|
105
|
+
/**
|
|
106
|
+
* Per-association notification policy. Null falls back to platform
|
|
107
|
+
* defaults (no suppression).
|
|
108
|
+
*/
|
|
109
|
+
notificationPolicy:
|
|
110
|
+
jsonb("notification_policy").$type<NotificationPolicy>(),
|
|
103
111
|
createdAt: timestamp("created_at").defaultNow().notNull(),
|
|
104
112
|
updatedAt: timestamp("updated_at").defaultNow().notNull(),
|
|
105
113
|
},
|
|
@@ -108,6 +116,74 @@ export const systemHealthChecks = pgTable(
|
|
|
108
116
|
}),
|
|
109
117
|
);
|
|
110
118
|
|
|
119
|
+
/**
|
|
120
|
+
* Records each time a check's *evaluated* state transitions from
|
|
121
|
+
* non-unhealthy to unhealthy. Used to decide whether the per-check
|
|
122
|
+
* incident threshold (N transitions in M minutes) has been met.
|
|
123
|
+
* Pruned by the retention job alongside raw runs.
|
|
124
|
+
*/
|
|
125
|
+
export const healthCheckUnhealthyTransitions = pgTable(
|
|
126
|
+
"health_check_unhealthy_transitions",
|
|
127
|
+
{
|
|
128
|
+
id: uuid("id").primaryKey().defaultRandom(),
|
|
129
|
+
configurationId: uuid("configuration_id")
|
|
130
|
+
.notNull()
|
|
131
|
+
.references(() => healthCheckConfigurations.id, { onDelete: "cascade" }),
|
|
132
|
+
systemId: text("system_id").notNull(),
|
|
133
|
+
transitionedAt: timestamp("transitioned_at").defaultNow().notNull(),
|
|
134
|
+
},
|
|
135
|
+
(t) => ({
|
|
136
|
+
// Powers the threshold count query
|
|
137
|
+
// (WHERE config_id = ? AND system_id = ? AND transitioned_at > ?).
|
|
138
|
+
lookupIdx: index(
|
|
139
|
+
"health_check_unhealthy_transitions_lookup_idx",
|
|
140
|
+
).on(t.configurationId, t.systemId, t.transitionedAt),
|
|
141
|
+
}),
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Mapping of auto-opened incidents back to the system + check that
|
|
146
|
+
* triggered them. `closedAt` stays null while the incident is active;
|
|
147
|
+
* the auto-close worker sets it once the linked system has been
|
|
148
|
+
* steadily healthy for the cooldown.
|
|
149
|
+
*
|
|
150
|
+
* No FK to the incident table — that lives in another plugin's schema
|
|
151
|
+
* and we treat it as a soft reference (incident deletes are handled
|
|
152
|
+
* by the auto-close worker, which tolerates missing rows).
|
|
153
|
+
*/
|
|
154
|
+
export const healthCheckAutoIncidents = pgTable(
|
|
155
|
+
"health_check_auto_incidents",
|
|
156
|
+
{
|
|
157
|
+
id: uuid("id").primaryKey().defaultRandom(),
|
|
158
|
+
incidentId: uuid("incident_id").notNull(),
|
|
159
|
+
systemId: text("system_id").notNull(),
|
|
160
|
+
configurationId: uuid("configuration_id")
|
|
161
|
+
.notNull()
|
|
162
|
+
.references(() => healthCheckConfigurations.id, { onDelete: "cascade" }),
|
|
163
|
+
openedAt: timestamp("opened_at").defaultNow().notNull(),
|
|
164
|
+
closedAt: timestamp("closed_at"),
|
|
165
|
+
/**
|
|
166
|
+
* Auto-close cooldown snapshot taken when the incident was opened.
|
|
167
|
+
* `null` means "never auto-close" — the worker leaves this
|
|
168
|
+
* incident alone and an operator must resolve it manually. Stored
|
|
169
|
+
* per-row so a later policy change doesn't retroactively alter
|
|
170
|
+
* the close behaviour of incidents already in flight.
|
|
171
|
+
*/
|
|
172
|
+
cooldownMinutes: integer("cooldown_minutes"),
|
|
173
|
+
},
|
|
174
|
+
(t) => ({
|
|
175
|
+
// Powers "is there an active auto-incident for this system?" check.
|
|
176
|
+
activeBySystemIdx: index(
|
|
177
|
+
"health_check_auto_incidents_active_by_system_idx",
|
|
178
|
+
).on(t.systemId, t.closedAt),
|
|
179
|
+
// Powers "find the most recent close for this assignment" lookup
|
|
180
|
+
// used by the require-recovery-before-reopen check.
|
|
181
|
+
lastCloseByAssignmentIdx: index(
|
|
182
|
+
"health_check_auto_incidents_last_close_idx",
|
|
183
|
+
).on(t.configurationId, t.systemId, t.closedAt),
|
|
184
|
+
}),
|
|
185
|
+
);
|
|
186
|
+
|
|
111
187
|
export const healthCheckRuns = pgTable("health_check_runs", {
|
|
112
188
|
id: uuid("id").primaryKey().defaultRandom(),
|
|
113
189
|
configurationId: uuid("configuration_id")
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import { describe, it, expect, mock } from "bun:test";
|
|
2
|
+
import { HealthCheckService } from "./service";
|
|
3
|
+
import { createMockDb } from "@checkstack/test-utils-backend";
|
|
4
|
+
import {
|
|
5
|
+
DEFAULT_NOTIFICATION_POLICY,
|
|
6
|
+
type NotificationPolicy,
|
|
7
|
+
} from "@checkstack/healthcheck-common";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Build a service whose only DB interaction is the chain used by
|
|
11
|
+
* `getAssignmentNotificationPolicy`. The chain ends in `.limit(1)` and
|
|
12
|
+
* returns the supplied rows verbatim. An optional in-memory platform
|
|
13
|
+
* default stands in for the ConfigService.
|
|
14
|
+
*/
|
|
15
|
+
function buildServiceWithRows(
|
|
16
|
+
rows: unknown[],
|
|
17
|
+
platformDefault?: NotificationPolicy,
|
|
18
|
+
): HealthCheckService {
|
|
19
|
+
const mockDb = createMockDb();
|
|
20
|
+
const limitChain = mock(async () => rows);
|
|
21
|
+
const whereChain = mock(() => ({ limit: limitChain }));
|
|
22
|
+
const fromChain = mock(() => ({ where: whereChain }));
|
|
23
|
+
const selectChain = mock(() => ({ from: fromChain }));
|
|
24
|
+
(mockDb as { select: unknown }).select = selectChain;
|
|
25
|
+
|
|
26
|
+
const configService =
|
|
27
|
+
platformDefault === undefined
|
|
28
|
+
? undefined
|
|
29
|
+
: ({
|
|
30
|
+
get: mock(async () => platformDefault),
|
|
31
|
+
set: mock(async () => {}),
|
|
32
|
+
} as never);
|
|
33
|
+
|
|
34
|
+
return new HealthCheckService(
|
|
35
|
+
mockDb as never,
|
|
36
|
+
{} as never,
|
|
37
|
+
{} as never,
|
|
38
|
+
configService,
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
describe("HealthCheckService.getAssignmentNotificationPolicy", () => {
|
|
43
|
+
it("falls back to compile-time defaults when no association and no platform defaults", async () => {
|
|
44
|
+
const service = buildServiceWithRows([]);
|
|
45
|
+
const policy = await service.getAssignmentNotificationPolicy({
|
|
46
|
+
systemId: "sys-1",
|
|
47
|
+
configurationId: "cfg-1",
|
|
48
|
+
});
|
|
49
|
+
expect(policy).toEqual(DEFAULT_NOTIFICATION_POLICY);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("falls back to platform defaults when association exists but notificationPolicy is null", async () => {
|
|
53
|
+
const customPlatformDefault: NotificationPolicy = {
|
|
54
|
+
...DEFAULT_NOTIFICATION_POLICY,
|
|
55
|
+
autoCloseAfterMinutes: 120,
|
|
56
|
+
sustainedUnhealthyTrigger: { enabled: true, durationMinutes: 15 },
|
|
57
|
+
};
|
|
58
|
+
const service = buildServiceWithRows(
|
|
59
|
+
[{ notificationPolicy: null }],
|
|
60
|
+
customPlatformDefault,
|
|
61
|
+
);
|
|
62
|
+
const policy = await service.getAssignmentNotificationPolicy({
|
|
63
|
+
systemId: "sys-1",
|
|
64
|
+
configurationId: "cfg-1",
|
|
65
|
+
});
|
|
66
|
+
expect(policy.autoCloseAfterMinutes).toBe(120);
|
|
67
|
+
expect(policy.sustainedUnhealthyTrigger.durationMinutes).toBe(15);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("falls back to platform defaults when no association exists", async () => {
|
|
71
|
+
const customPlatformDefault: NotificationPolicy = {
|
|
72
|
+
...DEFAULT_NOTIFICATION_POLICY,
|
|
73
|
+
flappingTrigger: { enabled: true, transitions: 10, windowMinutes: 30 },
|
|
74
|
+
};
|
|
75
|
+
const service = buildServiceWithRows([], customPlatformDefault);
|
|
76
|
+
const policy = await service.getAssignmentNotificationPolicy({
|
|
77
|
+
systemId: "sys-1",
|
|
78
|
+
configurationId: "cfg-1",
|
|
79
|
+
});
|
|
80
|
+
expect(policy.flappingTrigger).toEqual({
|
|
81
|
+
enabled: true,
|
|
82
|
+
transitions: 10,
|
|
83
|
+
windowMinutes: 30,
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it("prefers per-assignment override over platform defaults", async () => {
|
|
88
|
+
const platformDefault: NotificationPolicy = {
|
|
89
|
+
...DEFAULT_NOTIFICATION_POLICY,
|
|
90
|
+
autoOpenIncidentOnUnhealthy: false,
|
|
91
|
+
};
|
|
92
|
+
const assignmentOverride = {
|
|
93
|
+
suppressDeEscalations: true,
|
|
94
|
+
autoOpenIncidentOnUnhealthy: true, // overrides platform default
|
|
95
|
+
useNotificationSuppression: true,
|
|
96
|
+
skipDuringMaintenance: true,
|
|
97
|
+
sustainedUnhealthyTrigger: { enabled: true, durationMinutes: 30 },
|
|
98
|
+
flappingTrigger: { enabled: true, transitions: 3, windowMinutes: 60 },
|
|
99
|
+
autoCloseAfterMinutes: 30,
|
|
100
|
+
};
|
|
101
|
+
const service = buildServiceWithRows(
|
|
102
|
+
[{ notificationPolicy: assignmentOverride }],
|
|
103
|
+
platformDefault,
|
|
104
|
+
);
|
|
105
|
+
const policy = await service.getAssignmentNotificationPolicy({
|
|
106
|
+
systemId: "sys-1",
|
|
107
|
+
configurationId: "cfg-1",
|
|
108
|
+
});
|
|
109
|
+
expect(policy.autoOpenIncidentOnUnhealthy).toBe(true);
|
|
110
|
+
expect(policy.suppressDeEscalations).toBe(true);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("fills in defaults for partial stored policies", async () => {
|
|
114
|
+
// Older rows may have only `suppressDeEscalations` set from the
|
|
115
|
+
// first migration. All other fields must default in.
|
|
116
|
+
const service = buildServiceWithRows([
|
|
117
|
+
{ notificationPolicy: { suppressDeEscalations: true } },
|
|
118
|
+
]);
|
|
119
|
+
const policy = await service.getAssignmentNotificationPolicy({
|
|
120
|
+
systemId: "sys-1",
|
|
121
|
+
configurationId: "cfg-1",
|
|
122
|
+
});
|
|
123
|
+
expect(policy.suppressDeEscalations).toBe(true);
|
|
124
|
+
expect(policy.autoOpenIncidentOnUnhealthy).toBe(true);
|
|
125
|
+
expect(policy.useNotificationSuppression).toBe(true);
|
|
126
|
+
expect(policy.skipDuringMaintenance).toBe(true);
|
|
127
|
+
expect(policy.sustainedUnhealthyTrigger).toEqual({
|
|
128
|
+
enabled: true,
|
|
129
|
+
durationMinutes: 30,
|
|
130
|
+
});
|
|
131
|
+
expect(policy.flappingTrigger).toEqual({
|
|
132
|
+
enabled: true,
|
|
133
|
+
transitions: 3,
|
|
134
|
+
windowMinutes: 60,
|
|
135
|
+
});
|
|
136
|
+
expect(policy.autoCloseAfterMinutes).toBe(30);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("returns explicit values exactly when fully specified", async () => {
|
|
140
|
+
const service = buildServiceWithRows([
|
|
141
|
+
{
|
|
142
|
+
notificationPolicy: {
|
|
143
|
+
suppressDeEscalations: false,
|
|
144
|
+
autoOpenIncidentOnUnhealthy: false,
|
|
145
|
+
useNotificationSuppression: false,
|
|
146
|
+
skipDuringMaintenance: false,
|
|
147
|
+
sustainedUnhealthyTrigger: { enabled: false, durationMinutes: 15 },
|
|
148
|
+
flappingTrigger: {
|
|
149
|
+
enabled: true,
|
|
150
|
+
transitions: 5,
|
|
151
|
+
windowMinutes: 30,
|
|
152
|
+
},
|
|
153
|
+
autoCloseAfterMinutes: null,
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
]);
|
|
157
|
+
const policy = await service.getAssignmentNotificationPolicy({
|
|
158
|
+
systemId: "sys-1",
|
|
159
|
+
configurationId: "cfg-1",
|
|
160
|
+
});
|
|
161
|
+
expect(policy.autoOpenIncidentOnUnhealthy).toBe(false);
|
|
162
|
+
expect(policy.skipDuringMaintenance).toBe(false);
|
|
163
|
+
expect(policy.sustainedUnhealthyTrigger).toEqual({
|
|
164
|
+
enabled: false,
|
|
165
|
+
durationMinutes: 15,
|
|
166
|
+
});
|
|
167
|
+
expect(policy.flappingTrigger).toEqual({
|
|
168
|
+
enabled: true,
|
|
169
|
+
transitions: 5,
|
|
170
|
+
windowMinutes: 30,
|
|
171
|
+
});
|
|
172
|
+
expect(policy.autoCloseAfterMinutes).toBeNull();
|
|
173
|
+
});
|
|
174
|
+
});
|
package/src/service.ts
CHANGED
|
@@ -6,7 +6,16 @@ import {
|
|
|
6
6
|
HealthCheckStatus,
|
|
7
7
|
RetentionConfig,
|
|
8
8
|
type HealthCheckRunResult,
|
|
9
|
+
type NotificationPolicy,
|
|
10
|
+
NotificationPolicySchema,
|
|
11
|
+
DEFAULT_NOTIFICATION_POLICY,
|
|
9
12
|
} from "@checkstack/healthcheck-common";
|
|
13
|
+
import type { ConfigService } from "@checkstack/backend-api";
|
|
14
|
+
import {
|
|
15
|
+
notificationDefaultsConfigV1,
|
|
16
|
+
NOTIFICATION_DEFAULTS_CONFIG_ID,
|
|
17
|
+
NOTIFICATION_DEFAULTS_CONFIG_VERSION,
|
|
18
|
+
} from "./notification-defaults-config";
|
|
10
19
|
import {
|
|
11
20
|
healthCheckConfigurations,
|
|
12
21
|
systemHealthChecks,
|
|
@@ -15,7 +24,16 @@ import {
|
|
|
15
24
|
VersionedStateThresholds,
|
|
16
25
|
} from "./schema";
|
|
17
26
|
import * as schema from "./schema";
|
|
18
|
-
import {
|
|
27
|
+
import {
|
|
28
|
+
eq,
|
|
29
|
+
and,
|
|
30
|
+
InferSelectModel,
|
|
31
|
+
desc,
|
|
32
|
+
gte,
|
|
33
|
+
lte,
|
|
34
|
+
isNull,
|
|
35
|
+
inArray,
|
|
36
|
+
} from "drizzle-orm";
|
|
19
37
|
import { ORPCError } from "@orpc/server";
|
|
20
38
|
import { evaluateHealthStatus } from "./state-evaluator";
|
|
21
39
|
import { stateThresholds } from "./state-thresholds-migrations";
|
|
@@ -57,8 +75,56 @@ export class HealthCheckService {
|
|
|
57
75
|
private db: Db,
|
|
58
76
|
private registry: HealthCheckRegistry,
|
|
59
77
|
private collectorRegistry: CollectorRegistry,
|
|
78
|
+
/**
|
|
79
|
+
* Optional — only required by code paths that resolve platform
|
|
80
|
+
* defaults (notification policy fallback). When absent, callers
|
|
81
|
+
* fall back to the compile-time `DEFAULT_NOTIFICATION_POLICY`.
|
|
82
|
+
* Kept optional so existing GitOps-only / test constructions don't
|
|
83
|
+
* have to plumb it through.
|
|
84
|
+
*/
|
|
85
|
+
private configService?: ConfigService,
|
|
60
86
|
) {}
|
|
61
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Resolve the platform-wide notification policy defaults. Returns
|
|
90
|
+
* the compile-time defaults when no `configService` was provided or
|
|
91
|
+
* nothing has ever been persisted. Stored values are passed through
|
|
92
|
+
* the schema so missing fields default in.
|
|
93
|
+
*/
|
|
94
|
+
async getPlatformNotificationDefaults(): Promise<NotificationPolicy> {
|
|
95
|
+
if (!this.configService) {
|
|
96
|
+
return DEFAULT_NOTIFICATION_POLICY;
|
|
97
|
+
}
|
|
98
|
+
const stored = await this.configService.get(
|
|
99
|
+
NOTIFICATION_DEFAULTS_CONFIG_ID,
|
|
100
|
+
notificationDefaultsConfigV1,
|
|
101
|
+
NOTIFICATION_DEFAULTS_CONFIG_VERSION,
|
|
102
|
+
);
|
|
103
|
+
return stored ?? DEFAULT_NOTIFICATION_POLICY;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Persist platform-wide notification policy defaults. Per-assignment
|
|
108
|
+
* rows with `notificationPolicy = null` will read the new defaults
|
|
109
|
+
* on their next evaluation. In-flight auto-incidents are unaffected
|
|
110
|
+
* (their cooldown is snapshotted per-row at open time).
|
|
111
|
+
*/
|
|
112
|
+
async setPlatformNotificationDefaults(
|
|
113
|
+
policy: NotificationPolicy,
|
|
114
|
+
): Promise<void> {
|
|
115
|
+
if (!this.configService) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
"ConfigService not configured; cannot persist platform notification defaults",
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
await this.configService.set(
|
|
121
|
+
NOTIFICATION_DEFAULTS_CONFIG_ID,
|
|
122
|
+
notificationDefaultsConfigV1,
|
|
123
|
+
NOTIFICATION_DEFAULTS_CONFIG_VERSION,
|
|
124
|
+
policy,
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
|
|
62
128
|
async createConfiguration(
|
|
63
129
|
data: CreateHealthCheckConfiguration,
|
|
64
130
|
): Promise<HealthCheckConfiguration> {
|
|
@@ -133,6 +199,7 @@ export class HealthCheckService {
|
|
|
133
199
|
stateThresholds?: StateThresholds;
|
|
134
200
|
satelliteIds?: string[];
|
|
135
201
|
includeLocal?: boolean;
|
|
202
|
+
notificationPolicy?: NotificationPolicy;
|
|
136
203
|
}) {
|
|
137
204
|
const {
|
|
138
205
|
systemId,
|
|
@@ -141,6 +208,7 @@ export class HealthCheckService {
|
|
|
141
208
|
stateThresholds: stateThresholds_,
|
|
142
209
|
satelliteIds,
|
|
143
210
|
includeLocal = true,
|
|
211
|
+
notificationPolicy,
|
|
144
212
|
} = props;
|
|
145
213
|
|
|
146
214
|
// Wrap thresholds in versioned config if provided
|
|
@@ -156,6 +224,7 @@ export class HealthCheckService {
|
|
|
156
224
|
stateThresholds: versionedThresholds,
|
|
157
225
|
satelliteIds: satelliteIds ?? undefined,
|
|
158
226
|
includeLocal,
|
|
227
|
+
notificationPolicy: notificationPolicy ?? undefined,
|
|
159
228
|
})
|
|
160
229
|
.onConflictDoUpdate({
|
|
161
230
|
target: [
|
|
@@ -167,6 +236,7 @@ export class HealthCheckService {
|
|
|
167
236
|
stateThresholds: versionedThresholds,
|
|
168
237
|
satelliteIds: satelliteIds ?? undefined,
|
|
169
238
|
includeLocal,
|
|
239
|
+
notificationPolicy: notificationPolicy ?? undefined,
|
|
170
240
|
updatedAt: new Date(),
|
|
171
241
|
},
|
|
172
242
|
});
|
|
@@ -282,6 +352,7 @@ export class HealthCheckService {
|
|
|
282
352
|
stateThresholds: systemHealthChecks.stateThresholds,
|
|
283
353
|
satelliteIds: systemHealthChecks.satelliteIds,
|
|
284
354
|
includeLocal: systemHealthChecks.includeLocal,
|
|
355
|
+
notificationPolicy: systemHealthChecks.notificationPolicy,
|
|
285
356
|
})
|
|
286
357
|
.from(systemHealthChecks)
|
|
287
358
|
.innerJoin(
|
|
@@ -304,11 +375,55 @@ export class HealthCheckService {
|
|
|
304
375
|
stateThresholds: thresholds,
|
|
305
376
|
satelliteIds: row.satelliteIds ?? undefined,
|
|
306
377
|
includeLocal: row.includeLocal,
|
|
378
|
+
notificationPolicy: row.notificationPolicy ?? undefined,
|
|
307
379
|
});
|
|
308
380
|
}
|
|
309
381
|
return results;
|
|
310
382
|
}
|
|
311
383
|
|
|
384
|
+
/**
|
|
385
|
+
* Resolve the fully-defaulted notification policy for a single
|
|
386
|
+
* (system, configuration) association. Resolution order:
|
|
387
|
+
*
|
|
388
|
+
* 1. Per-assignment override (`systemHealthChecks.notificationPolicy`)
|
|
389
|
+
* when non-null. Stored as a full policy; missing keys defaulted
|
|
390
|
+
* via zod parse.
|
|
391
|
+
* 2. Platform-wide defaults via `ConfigService`.
|
|
392
|
+
* 3. Compile-time `DEFAULT_NOTIFICATION_POLICY`.
|
|
393
|
+
*
|
|
394
|
+
* The all-or-nothing semantic is intentional: assignment rows are
|
|
395
|
+
* either fully-overridden or fully-inherited from the platform.
|
|
396
|
+
* Operators can revert an override by setting the row's policy to
|
|
397
|
+
* `null`, which is the "Use platform defaults" action in the UI.
|
|
398
|
+
*/
|
|
399
|
+
async getAssignmentNotificationPolicy({
|
|
400
|
+
systemId,
|
|
401
|
+
configurationId,
|
|
402
|
+
}: {
|
|
403
|
+
systemId: string;
|
|
404
|
+
configurationId: string;
|
|
405
|
+
}): Promise<NotificationPolicy> {
|
|
406
|
+
const [row] = await this.db
|
|
407
|
+
.select({
|
|
408
|
+
notificationPolicy: systemHealthChecks.notificationPolicy,
|
|
409
|
+
})
|
|
410
|
+
.from(systemHealthChecks)
|
|
411
|
+
.where(
|
|
412
|
+
and(
|
|
413
|
+
eq(systemHealthChecks.systemId, systemId),
|
|
414
|
+
eq(systemHealthChecks.configurationId, configurationId),
|
|
415
|
+
),
|
|
416
|
+
)
|
|
417
|
+
.limit(1);
|
|
418
|
+
|
|
419
|
+
// No assignment row → use platform defaults (the only sensible
|
|
420
|
+
// value for a configuration nothing has explicitly touched).
|
|
421
|
+
if (!row || row.notificationPolicy === null) {
|
|
422
|
+
return this.getPlatformNotificationDefaults();
|
|
423
|
+
}
|
|
424
|
+
return NotificationPolicySchema.parse(row.notificationPolicy);
|
|
425
|
+
}
|
|
426
|
+
|
|
312
427
|
/**
|
|
313
428
|
* Get the evaluated health status for a system based on configured thresholds.
|
|
314
429
|
* Aggregates status from all health check configurations for this system.
|
|
@@ -489,6 +604,7 @@ export class HealthCheckService {
|
|
|
489
604
|
startDate?: Date;
|
|
490
605
|
endDate?: Date;
|
|
491
606
|
sourceFilter?: string;
|
|
607
|
+
statusFilter?: HealthCheckStatus[];
|
|
492
608
|
limit?: number;
|
|
493
609
|
offset?: number;
|
|
494
610
|
sortOrder: "asc" | "desc";
|
|
@@ -499,6 +615,7 @@ export class HealthCheckService {
|
|
|
499
615
|
startDate,
|
|
500
616
|
endDate,
|
|
501
617
|
sourceFilter,
|
|
618
|
+
statusFilter,
|
|
502
619
|
limit = 10,
|
|
503
620
|
offset = 0,
|
|
504
621
|
sortOrder,
|
|
@@ -518,6 +635,11 @@ export class HealthCheckService {
|
|
|
518
635
|
conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
|
|
519
636
|
}
|
|
520
637
|
|
|
638
|
+
// Status filtering (e.g. only failing runs)
|
|
639
|
+
if (statusFilter && statusFilter.length > 0) {
|
|
640
|
+
conditions.push(inArray(healthCheckRuns.status, statusFilter));
|
|
641
|
+
}
|
|
642
|
+
|
|
521
643
|
// Build where clause
|
|
522
644
|
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
523
645
|
|
|
@@ -563,6 +685,7 @@ export class HealthCheckService {
|
|
|
563
685
|
startDate?: Date;
|
|
564
686
|
endDate?: Date;
|
|
565
687
|
sourceFilter?: string;
|
|
688
|
+
statusFilter?: HealthCheckStatus[];
|
|
566
689
|
limit?: number;
|
|
567
690
|
offset?: number;
|
|
568
691
|
sortOrder: "asc" | "desc";
|
|
@@ -573,6 +696,7 @@ export class HealthCheckService {
|
|
|
573
696
|
startDate,
|
|
574
697
|
endDate,
|
|
575
698
|
sourceFilter,
|
|
699
|
+
statusFilter,
|
|
576
700
|
limit = 10,
|
|
577
701
|
offset = 0,
|
|
578
702
|
sortOrder,
|
|
@@ -592,6 +716,11 @@ export class HealthCheckService {
|
|
|
592
716
|
conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
|
|
593
717
|
}
|
|
594
718
|
|
|
719
|
+
// Status filtering (e.g. only failing runs)
|
|
720
|
+
if (statusFilter && statusFilter.length > 0) {
|
|
721
|
+
conditions.push(inArray(healthCheckRuns.status, statusFilter));
|
|
722
|
+
}
|
|
723
|
+
|
|
595
724
|
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
596
725
|
const total = await this.db.$count(healthCheckRuns, whereClause);
|
|
597
726
|
|