@checkstack/anomaly-backend 0.2.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +168 -0
- package/drizzle/0004_gray_trauma.sql +10 -0
- package/drizzle/meta/0004_snapshot.json +401 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +14 -10
- package/src/detector.test.ts +44 -8
- package/src/detector.ts +7 -0
- package/src/drift-evaluator.test.ts +28 -3
- package/src/drift-evaluator.ts +7 -0
- package/src/jobs/baseline-analyzer.ts +4 -0
- package/src/notification-mute.test.ts +155 -0
- package/src/notification.ts +47 -17
- package/src/plugin.ts +36 -5
- package/src/router.ts +41 -3
- package/src/schema.ts +26 -0
- package/src/service.ts +103 -1
package/src/notification.ts
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
|
-
import type { Logger } from "@checkstack/backend-api";
|
|
1
|
+
import type { Logger, SafeDatabase } from "@checkstack/backend-api";
|
|
2
2
|
import type { CatalogApi } from "@checkstack/catalog-common";
|
|
3
|
-
import { catalogRoutes } from "@checkstack/catalog-common";
|
|
3
|
+
import { catalogRoutes, createSystemSubject } from "@checkstack/catalog-common";
|
|
4
4
|
import type { InferClient } from "@checkstack/common";
|
|
5
5
|
import { resolveRoute } from "@checkstack/common";
|
|
6
|
+
import {
|
|
7
|
+
anomalyCollapseKey,
|
|
8
|
+
anomalySystemSubscription,
|
|
9
|
+
} from "@checkstack/anomaly-common";
|
|
10
|
+
import type { NotificationApi } from "@checkstack/notification-common";
|
|
11
|
+
import { AnomalyService } from "./service";
|
|
12
|
+
import * as schema from "./schema";
|
|
6
13
|
|
|
7
14
|
export type AnomalyNotificationAction =
|
|
8
15
|
| "confirmed"
|
|
@@ -17,16 +24,19 @@ export interface DispatchAnomalyNotificationInput {
|
|
|
17
24
|
observedValue: string | boolean | number;
|
|
18
25
|
baselineMean: number;
|
|
19
26
|
catalogClient: InferClient<typeof CatalogApi>;
|
|
27
|
+
notificationClient: InferClient<typeof NotificationApi>;
|
|
28
|
+
db: SafeDatabase<typeof schema>;
|
|
20
29
|
logger: Logger;
|
|
21
30
|
/** Drift-specific: projected change over the baseline window. */
|
|
22
31
|
projectedChange?: number;
|
|
23
32
|
}
|
|
24
33
|
|
|
25
34
|
/**
|
|
26
|
-
* Dispatches anomaly
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
35
|
+
* Dispatches anomaly notifications via the platform spec contract.
|
|
36
|
+
* Notification-backend resolves the spec → groupId convention, walks
|
|
37
|
+
* parent catalog groups via stored target edges, and unions
|
|
38
|
+
* subscribers. Anomaly only contributes the recipient-exclusion list
|
|
39
|
+
* (per-field / per-system mutes) before delivery.
|
|
30
40
|
*/
|
|
31
41
|
export async function dispatchAnomalyNotification({
|
|
32
42
|
action,
|
|
@@ -35,6 +45,8 @@ export async function dispatchAnomalyNotification({
|
|
|
35
45
|
observedValue,
|
|
36
46
|
baselineMean,
|
|
37
47
|
catalogClient,
|
|
48
|
+
notificationClient,
|
|
49
|
+
db,
|
|
38
50
|
logger,
|
|
39
51
|
projectedChange,
|
|
40
52
|
}: DispatchAnomalyNotificationInput): Promise<void> {
|
|
@@ -66,13 +78,33 @@ export async function dispatchAnomalyNotification({
|
|
|
66
78
|
|
|
67
79
|
const importance = getImportance(action);
|
|
68
80
|
|
|
69
|
-
|
|
81
|
+
// Mute exclusions are computed against the candidate set the
|
|
82
|
+
// dispatcher will produce — for that we need to know who *would*
|
|
83
|
+
// be reached. Ask the AnomalyService for everyone muted on this
|
|
84
|
+
// (system, fieldPath) regardless of subscription state; backend
|
|
85
|
+
// does the intersection during dispatch (subscribers ∩ excluded).
|
|
86
|
+
const service = new AnomalyService(db);
|
|
87
|
+
const mutedUserIds = await service.getMutedUserIds({
|
|
70
88
|
systemId,
|
|
89
|
+
fieldPath,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
await notificationClient.notifyForSubscription({
|
|
93
|
+
specId: anomalySystemSubscription.specId,
|
|
94
|
+
resourceKeys: [systemId],
|
|
95
|
+
excludeUserIds: [...mutedUserIds],
|
|
71
96
|
title,
|
|
72
97
|
body: message,
|
|
73
98
|
importance,
|
|
74
99
|
action: { label: "View System", url: actionUrl },
|
|
75
|
-
|
|
100
|
+
collapseKey: anomalyCollapseKey(systemId, fieldPath),
|
|
101
|
+
subjects: [
|
|
102
|
+
createSystemSubject({
|
|
103
|
+
id: systemId,
|
|
104
|
+
name: systemName,
|
|
105
|
+
url: actionUrl,
|
|
106
|
+
}),
|
|
107
|
+
],
|
|
76
108
|
});
|
|
77
109
|
} catch (error) {
|
|
78
110
|
logger.warn(
|
|
@@ -111,9 +143,10 @@ function buildNotificationCopy({
|
|
|
111
143
|
};
|
|
112
144
|
}
|
|
113
145
|
case "drift_confirmed": {
|
|
114
|
-
const projectionFragment =
|
|
115
|
-
|
|
116
|
-
|
|
146
|
+
const projectionFragment =
|
|
147
|
+
driftStr === ""
|
|
148
|
+
? ""
|
|
149
|
+
: ` Projected change over the baseline window: ${driftStr}.`;
|
|
117
150
|
return {
|
|
118
151
|
title: `Trend Drift Detected: ${systemName}`,
|
|
119
152
|
message: `**${fieldPath}** is drifting. Current mean: ${obsStr}, Baseline: ${baseStr}.${projectionFragment}`,
|
|
@@ -128,12 +161,9 @@ function buildNotificationCopy({
|
|
|
128
161
|
}
|
|
129
162
|
}
|
|
130
163
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
* - "Bad News" states (confirmed, drift_confirmed) are warnings.
|
|
135
|
-
*/
|
|
136
|
-
export function getImportance(action: AnomalyNotificationAction): "info" | "warning" {
|
|
164
|
+
export function getImportance(
|
|
165
|
+
action: AnomalyNotificationAction,
|
|
166
|
+
): "info" | "warning" {
|
|
137
167
|
if (action === "recovered" || action === "drift_recovered") return "info";
|
|
138
168
|
return "warning";
|
|
139
169
|
}
|
package/src/plugin.ts
CHANGED
|
@@ -4,10 +4,17 @@ import { setupBaselineAnalyzerJob } from "./jobs/baseline-analyzer";
|
|
|
4
4
|
import { processCheckCompleted } from "./detector";
|
|
5
5
|
import * as schema from "./schema";
|
|
6
6
|
import { CatalogApi } from "@checkstack/catalog-common";
|
|
7
|
+
import { NotificationApi } from "@checkstack/notification-common";
|
|
7
8
|
import { AnomalyService } from "./service";
|
|
8
9
|
import { createRouter } from "./router";
|
|
9
10
|
import { createAnomalyRouterCache, type AnomalyRouterCache } from "./router-cache";
|
|
10
|
-
import {
|
|
11
|
+
import {
|
|
12
|
+
anomalyContract,
|
|
13
|
+
anomalyAccessRules,
|
|
14
|
+
anomalySystemSubscription,
|
|
15
|
+
anomalyGroupSubscription,
|
|
16
|
+
} from "@checkstack/anomaly-common";
|
|
17
|
+
import { specToRegistration } from "@checkstack/notification-common";
|
|
11
18
|
import { HealthCheckApi } from "@checkstack/healthcheck-common";
|
|
12
19
|
|
|
13
20
|
import { definePluginMetadata } from "@checkstack/common";
|
|
@@ -18,10 +25,16 @@ export const plugin = createBackendPlugin({
|
|
|
18
25
|
}),
|
|
19
26
|
register(env) {
|
|
20
27
|
env.registerAccessRules(anomalyAccessRules);
|
|
28
|
+
// Declared subscription specs feed the plugin loader's
|
|
29
|
+
// dependency sorter — each spec's target.ownerPlugin becomes an
|
|
30
|
+
// implicit init-order dep, so anomaly waits for catalog (the
|
|
31
|
+
// owner of catalogSystemTarget / catalogGroupTarget) before its
|
|
32
|
+
// own init + afterPluginsReady runs.
|
|
33
|
+
env.registerSubscriptionSpecs([
|
|
34
|
+
anomalySystemSubscription,
|
|
35
|
+
anomalyGroupSubscription,
|
|
36
|
+
]);
|
|
21
37
|
|
|
22
|
-
// Shared between init (router) and afterPluginsReady (detector hook),
|
|
23
|
-
// so the detector can drop the router cache before broadcasting state
|
|
24
|
-
// change signals.
|
|
25
38
|
let routerCache: AnomalyRouterCache | undefined;
|
|
26
39
|
|
|
27
40
|
env.registerInit({
|
|
@@ -30,7 +43,7 @@ export const plugin = createBackendPlugin({
|
|
|
30
43
|
db: coreServices.database,
|
|
31
44
|
logger: coreServices.logger,
|
|
32
45
|
queueManager: coreServices.queueManager,
|
|
33
|
-
cacheManager: coreServices.cacheManager,
|
|
46
|
+
cacheManager: coreServices.cacheManager,
|
|
34
47
|
rpcClient: coreServices.rpcClient,
|
|
35
48
|
rpc: coreServices.rpc,
|
|
36
49
|
signalService: coreServices.signalService,
|
|
@@ -43,6 +56,7 @@ export const plugin = createBackendPlugin({
|
|
|
43
56
|
const typedDb = db as SafeDatabase<typeof schema>;
|
|
44
57
|
const healthCheckClient = rpcClient.forPlugin(HealthCheckApi);
|
|
45
58
|
const catalogClient = rpcClient.forPlugin(CatalogApi);
|
|
59
|
+
const notificationClient = rpcClient.forPlugin(NotificationApi);
|
|
46
60
|
|
|
47
61
|
await setupBaselineAnalyzerJob({
|
|
48
62
|
db: typedDb,
|
|
@@ -52,6 +66,7 @@ export const plugin = createBackendPlugin({
|
|
|
52
66
|
healthCheckClient,
|
|
53
67
|
signalService,
|
|
54
68
|
catalogClient,
|
|
69
|
+
notificationClient,
|
|
55
70
|
collectorRegistry,
|
|
56
71
|
});
|
|
57
72
|
|
|
@@ -66,6 +81,21 @@ export const plugin = createBackendPlugin({
|
|
|
66
81
|
const cache = cacheManager.getProvider();
|
|
67
82
|
const typedDb = db as SafeDatabase<typeof schema>;
|
|
68
83
|
const catalogClient = rpcClient.forPlugin(CatalogApi);
|
|
84
|
+
const notificationClient = rpcClient.forPlugin(NotificationApi);
|
|
85
|
+
|
|
86
|
+
// Register subscription specs against the platform. notification-
|
|
87
|
+
// backend takes care of provisioning per-resource groups by joining
|
|
88
|
+
// the spec's target type onto the resource registry catalog already
|
|
89
|
+
// pushes — anomaly never needs to know about per-system or
|
|
90
|
+
// per-group lifecycle.
|
|
91
|
+
await Promise.all([
|
|
92
|
+
notificationClient.registerSubscriptionSpec(
|
|
93
|
+
specToRegistration(anomalySystemSubscription),
|
|
94
|
+
),
|
|
95
|
+
notificationClient.registerSubscriptionSpec(
|
|
96
|
+
specToRegistration(anomalyGroupSubscription),
|
|
97
|
+
),
|
|
98
|
+
]);
|
|
69
99
|
|
|
70
100
|
onHook(healthCheckHooks.checkCompleted, async (payload) => {
|
|
71
101
|
await processCheckCompleted({
|
|
@@ -75,6 +105,7 @@ export const plugin = createBackendPlugin({
|
|
|
75
105
|
routerCache,
|
|
76
106
|
logger,
|
|
77
107
|
catalogClient,
|
|
108
|
+
notificationClient,
|
|
78
109
|
signalService,
|
|
79
110
|
collectorRegistry,
|
|
80
111
|
});
|
package/src/router.ts
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import { implement } from "@orpc/server";
|
|
2
2
|
import { anomalyContract } from "@checkstack/anomaly-common";
|
|
3
3
|
import type { AnomalyService } from "./service";
|
|
4
|
-
import
|
|
5
|
-
|
|
4
|
+
import {
|
|
5
|
+
autoAuthMiddleware,
|
|
6
|
+
type Logger,
|
|
7
|
+
type RealUser,
|
|
8
|
+
type RpcContext,
|
|
9
|
+
type VersionedRecord,
|
|
10
|
+
} from "@checkstack/backend-api";
|
|
6
11
|
import type { AnomalySettings } from "@checkstack/anomaly-common";
|
|
7
12
|
import type { AnomalyRouterCache } from "./router-cache";
|
|
8
13
|
|
|
@@ -11,7 +16,9 @@ export function createRouter(
|
|
|
11
16
|
logger: Logger,
|
|
12
17
|
cache: AnomalyRouterCache,
|
|
13
18
|
) {
|
|
14
|
-
const os = implement(anomalyContract)
|
|
19
|
+
const os = implement(anomalyContract)
|
|
20
|
+
.$context<RpcContext>()
|
|
21
|
+
.use(autoAuthMiddleware);
|
|
15
22
|
|
|
16
23
|
return os.router({
|
|
17
24
|
getAnomalies: os.getAnomalies.handler(
|
|
@@ -70,5 +77,36 @@ export function createRouter(
|
|
|
70
77
|
return result as VersionedRecord<Partial<AnomalySettings>>;
|
|
71
78
|
}
|
|
72
79
|
),
|
|
80
|
+
|
|
81
|
+
listAnomalyNotificationMutes: os.listAnomalyNotificationMutes.handler(
|
|
82
|
+
async ({ input, context }) => {
|
|
83
|
+
const userId = (context.user as RealUser).id;
|
|
84
|
+
return service.listMutes({ userId, systemId: input.systemId });
|
|
85
|
+
},
|
|
86
|
+
),
|
|
87
|
+
|
|
88
|
+
muteAnomalyNotification: os.muteAnomalyNotification.handler(
|
|
89
|
+
async ({ input, context }) => {
|
|
90
|
+
const userId = (context.user as RealUser).id;
|
|
91
|
+
await service.addMute({
|
|
92
|
+
userId,
|
|
93
|
+
systemId: input.systemId,
|
|
94
|
+
fieldPath: input.fieldPath,
|
|
95
|
+
});
|
|
96
|
+
return { success: true };
|
|
97
|
+
},
|
|
98
|
+
),
|
|
99
|
+
|
|
100
|
+
unmuteAnomalyNotification: os.unmuteAnomalyNotification.handler(
|
|
101
|
+
async ({ input, context }) => {
|
|
102
|
+
const userId = (context.user as RealUser).id;
|
|
103
|
+
await service.removeMute({
|
|
104
|
+
userId,
|
|
105
|
+
systemId: input.systemId,
|
|
106
|
+
fieldPath: input.fieldPath,
|
|
107
|
+
});
|
|
108
|
+
return { success: true };
|
|
109
|
+
},
|
|
110
|
+
),
|
|
73
111
|
});
|
|
74
112
|
}
|
package/src/schema.ts
CHANGED
|
@@ -8,6 +8,8 @@ import {
|
|
|
8
8
|
timestamp,
|
|
9
9
|
doublePrecision,
|
|
10
10
|
unique,
|
|
11
|
+
index,
|
|
12
|
+
primaryKey,
|
|
11
13
|
} from "drizzle-orm/pg-core";
|
|
12
14
|
|
|
13
15
|
export const anomalyStateEnum = pgEnum("anomaly_state", [
|
|
@@ -85,3 +87,27 @@ export const anomalyAssignments = pgTable("anomaly_assignments", {
|
|
|
85
87
|
}, (t) => ({
|
|
86
88
|
pk: unique("anomaly_assignments_pk").on(t.systemId, t.configurationId),
|
|
87
89
|
}));
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Per-user mute records for anomaly notifications. A row's existence means
|
|
93
|
+
* the user has muted notifications for that (system, fieldPath) pair.
|
|
94
|
+
*
|
|
95
|
+
* Empty fieldPath ("") represents a system-wide mute — anomaly notifications
|
|
96
|
+
* for the entire system are suppressed for that user. We collapse the two
|
|
97
|
+
* granularities into one table so dispatch can answer "is this user muted?"
|
|
98
|
+
* with a single index lookup instead of two queries.
|
|
99
|
+
*/
|
|
100
|
+
export const anomalyNotificationMutes = pgTable(
|
|
101
|
+
"anomaly_notification_mutes",
|
|
102
|
+
{
|
|
103
|
+
userId: text("user_id").notNull(),
|
|
104
|
+
systemId: text("system_id").notNull(),
|
|
105
|
+
fieldPath: text("field_path").notNull(),
|
|
106
|
+
mutedAt: timestamp("muted_at").defaultNow().notNull(),
|
|
107
|
+
},
|
|
108
|
+
(t) => ({
|
|
109
|
+
pk: primaryKey({ columns: [t.userId, t.systemId, t.fieldPath] }),
|
|
110
|
+
userIdx: index("anomaly_notification_mutes_user_idx").on(t.userId),
|
|
111
|
+
systemIdx: index("anomaly_notification_mutes_system_idx").on(t.systemId),
|
|
112
|
+
}),
|
|
113
|
+
);
|
package/src/service.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { eq, and, desc } from "drizzle-orm";
|
|
1
|
+
import { eq, and, desc, inArray } from "drizzle-orm";
|
|
2
2
|
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
3
3
|
import * as schema from "./schema";
|
|
4
4
|
import { anomalySettingsConfig } from "./config";
|
|
@@ -160,4 +160,106 @@ export class AnomalyService {
|
|
|
160
160
|
|
|
161
161
|
return result.config;
|
|
162
162
|
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* List anomaly-notification mutes for a user. Optionally narrow to one
|
|
166
|
+
* system. Returns the same shape as the DTO (mutedAt is ISO-formatted).
|
|
167
|
+
*/
|
|
168
|
+
async listMutes({
|
|
169
|
+
userId,
|
|
170
|
+
systemId,
|
|
171
|
+
}: {
|
|
172
|
+
userId: string;
|
|
173
|
+
systemId?: string;
|
|
174
|
+
}) {
|
|
175
|
+
const conditions = [eq(schema.anomalyNotificationMutes.userId, userId)];
|
|
176
|
+
if (systemId !== undefined) {
|
|
177
|
+
conditions.push(eq(schema.anomalyNotificationMutes.systemId, systemId));
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const rows = await this.db
|
|
181
|
+
.select()
|
|
182
|
+
.from(schema.anomalyNotificationMutes)
|
|
183
|
+
.where(and(...conditions));
|
|
184
|
+
|
|
185
|
+
return rows.map((r) => ({
|
|
186
|
+
systemId: r.systemId,
|
|
187
|
+
fieldPath: r.fieldPath,
|
|
188
|
+
mutedAt: r.mutedAt.toISOString(),
|
|
189
|
+
}));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async addMute({
|
|
193
|
+
userId,
|
|
194
|
+
systemId,
|
|
195
|
+
fieldPath,
|
|
196
|
+
}: {
|
|
197
|
+
userId: string;
|
|
198
|
+
systemId: string;
|
|
199
|
+
fieldPath: string;
|
|
200
|
+
}) {
|
|
201
|
+
await this.db
|
|
202
|
+
.insert(schema.anomalyNotificationMutes)
|
|
203
|
+
.values({ userId, systemId, fieldPath })
|
|
204
|
+
.onConflictDoNothing();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async removeMute({
|
|
208
|
+
userId,
|
|
209
|
+
systemId,
|
|
210
|
+
fieldPath,
|
|
211
|
+
}: {
|
|
212
|
+
userId: string;
|
|
213
|
+
systemId: string;
|
|
214
|
+
fieldPath: string;
|
|
215
|
+
}) {
|
|
216
|
+
await this.db
|
|
217
|
+
.delete(schema.anomalyNotificationMutes)
|
|
218
|
+
.where(
|
|
219
|
+
and(
|
|
220
|
+
eq(schema.anomalyNotificationMutes.userId, userId),
|
|
221
|
+
eq(schema.anomalyNotificationMutes.systemId, systemId),
|
|
222
|
+
eq(schema.anomalyNotificationMutes.fieldPath, fieldPath),
|
|
223
|
+
),
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* For a given (system, fieldPath), return the set of userIds that have
|
|
229
|
+
* muted notifications. A row with empty fieldPath ("") for the system
|
|
230
|
+
* counts as a mute regardless of which field triggered the dispatch.
|
|
231
|
+
* Used by the notification dispatcher to populate `excludeUserIds`.
|
|
232
|
+
*
|
|
233
|
+
* `candidateUserIds` is optional — when omitted, returns every user
|
|
234
|
+
* that ever muted this (system, field). The notification backend
|
|
235
|
+
* intersects against actual subscribers anyway, so a broader exclude
|
|
236
|
+
* set is harmless.
|
|
237
|
+
*/
|
|
238
|
+
async getMutedUserIds({
|
|
239
|
+
systemId,
|
|
240
|
+
fieldPath,
|
|
241
|
+
candidateUserIds,
|
|
242
|
+
}: {
|
|
243
|
+
systemId: string;
|
|
244
|
+
fieldPath: string;
|
|
245
|
+
candidateUserIds?: string[];
|
|
246
|
+
}): Promise<Set<string>> {
|
|
247
|
+
const conditions = [
|
|
248
|
+
eq(schema.anomalyNotificationMutes.systemId, systemId),
|
|
249
|
+
inArray(schema.anomalyNotificationMutes.fieldPath, [fieldPath, ""]),
|
|
250
|
+
];
|
|
251
|
+
if (candidateUserIds !== undefined) {
|
|
252
|
+
if (candidateUserIds.length === 0) return new Set();
|
|
253
|
+
conditions.push(
|
|
254
|
+
inArray(schema.anomalyNotificationMutes.userId, candidateUserIds),
|
|
255
|
+
);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const rows = await this.db
|
|
259
|
+
.select({ userId: schema.anomalyNotificationMutes.userId })
|
|
260
|
+
.from(schema.anomalyNotificationMutes)
|
|
261
|
+
.where(and(...conditions));
|
|
262
|
+
|
|
263
|
+
return new Set(rows.map((r) => r.userId));
|
|
264
|
+
}
|
|
163
265
|
}
|