@checkstack/dependency-backend 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,208 @@
1
+ import * as schema from "./schema";
2
+ import type { SafeDatabase } from "@checkstack/backend-api";
3
+ import {
4
+ dependencyAccessRules,
5
+ pluginMetadata,
6
+ dependencyContract,
7
+ } from "@checkstack/dependency-common";
8
+ import { createBackendPlugin, coreServices } from "@checkstack/backend-api";
9
+ import { DependencyService } from "./services/dependency-service";
10
+ import { WarningEvaluationService } from "./services/warning-evaluation-service";
11
+ import type { SystemStatus } from "./services/warning-evaluation-service";
12
+ import { createRouter } from "./router";
13
+ import { CatalogApi } from "@checkstack/catalog-common";
14
+ import { HealthCheckApi } from "@checkstack/healthcheck-common";
15
+ import { MaintenanceApi } from "@checkstack/maintenance-common";
16
+ import { IncidentApi } from "@checkstack/incident-common";
17
+ import { catalogHooks } from "@checkstack/catalog-backend";
18
+ import { healthCheckHooks } from "@checkstack/healthcheck-backend";
19
+ import { evaluateAndNotifyDownstream } from "./notifications";
20
+
21
+ // =============================================================================
22
+ // Plugin Definition
23
+ // =============================================================================
24
+
25
+ export default createBackendPlugin({
26
+ metadata: pluginMetadata,
27
+ register(env) {
28
+ env.registerAccessRules(dependencyAccessRules);
29
+
30
+ env.registerInit({
31
+ schema,
32
+ deps: {
33
+ logger: coreServices.logger,
34
+ rpc: coreServices.rpc,
35
+ rpcClient: coreServices.rpcClient,
36
+ signalService: coreServices.signalService,
37
+ },
38
+ init: async ({ logger, database, rpc, rpcClient, signalService }) => {
39
+ logger.debug("🔧 Initializing Dependency Backend...");
40
+
41
+ const catalogClient = rpcClient.forPlugin(CatalogApi);
42
+ const healthCheckClient = rpcClient.forPlugin(HealthCheckApi);
43
+
44
+ const service = new DependencyService(
45
+ database as SafeDatabase<typeof schema>,
46
+ );
47
+ const warningService = new WarningEvaluationService();
48
+
49
+ const router = createRouter({
50
+ service,
51
+ warningService,
52
+ signalService,
53
+ catalogClient,
54
+ healthCheckClient,
55
+ logger,
56
+ });
57
+ rpc.registerRouter(router, dependencyContract);
58
+
59
+ logger.debug("✅ Dependency Backend initialized.");
60
+ },
61
+ afterPluginsReady: async ({
62
+ database,
63
+ rpcClient,
64
+ logger,
65
+ onHook,
66
+ signalService,
67
+ }) => {
68
+ const typedDb = database as SafeDatabase<typeof schema>;
69
+ const service = new DependencyService(typedDb);
70
+ const warningService = new WarningEvaluationService();
71
+
72
+ const catalogClient = rpcClient.forPlugin(CatalogApi);
73
+ const healthCheckClient = rpcClient.forPlugin(HealthCheckApi);
74
+ const maintenanceClient = rpcClient.forPlugin(MaintenanceApi);
75
+ const incidentClient = rpcClient.forPlugin(IncidentApi);
76
+
77
+ /**
78
+ * Build system statuses for warning evaluation.
79
+ * This mirrors the fetchSystemStatuses function in the router.
80
+ */
81
+ async function fetchSystemStatuses(
82
+ systemIds: string[],
83
+ ): Promise<Map<string, SystemStatus>> {
84
+ const statuses = new Map<string, SystemStatus>();
85
+ const { systems } = await catalogClient.getSystems();
86
+ const systemMap = new Map(systems.map((s) => [s.id, s]));
87
+
88
+ try {
89
+ const { statuses: healthStatuses } =
90
+ await healthCheckClient.getBulkSystemHealthStatus({ systemIds });
91
+
92
+ for (const systemId of systemIds) {
93
+ const system = systemMap.get(systemId);
94
+ if (!system) continue;
95
+
96
+ const healthStatus = healthStatuses[systemId];
97
+ if (healthStatus) {
98
+ let overallStatus: "operational" | "degraded" | "down" =
99
+ "operational";
100
+ if (healthStatus.status === "unhealthy") {
101
+ overallStatus = "down";
102
+ } else if (healthStatus.status === "degraded") {
103
+ overallStatus = "degraded";
104
+ }
105
+
106
+ statuses.set(systemId, {
107
+ systemId,
108
+ systemName: system.name,
109
+ status: overallStatus,
110
+ healthCheckStatuses: healthStatus.checkStatuses.map((cs) => ({
111
+ healthCheckId: cs.configurationId,
112
+ status: cs.status,
113
+ })),
114
+ });
115
+ } else {
116
+ statuses.set(systemId, {
117
+ systemId,
118
+ systemName: system.name,
119
+ status: "operational",
120
+ });
121
+ }
122
+ }
123
+ } catch {
124
+ for (const systemId of systemIds) {
125
+ const system = systemMap.get(systemId);
126
+ if (!system) continue;
127
+ statuses.set(systemId, {
128
+ systemId,
129
+ systemName: system.name,
130
+ status: "operational",
131
+ });
132
+ }
133
+ }
134
+
135
+ return statuses;
136
+ }
137
+
138
+ // Subscribe to catalog system deletion to clean up dependencies
139
+ onHook(
140
+ catalogHooks.systemDeleted,
141
+ async (payload) => {
142
+ logger.debug(
143
+ `Cleaning up dependencies for deleted system: ${payload.systemId}`,
144
+ );
145
+ await service.removeSystemDependencies(payload.systemId);
146
+ },
147
+ { mode: "work-queue", workerGroup: "dependency-system-cleanup" },
148
+ );
149
+
150
+ // Subscribe to health check state changes to notify downstream dependents
151
+ onHook(
152
+ healthCheckHooks.systemDegraded,
153
+ async (payload) => {
154
+ logger.debug(
155
+ `Upstream ${payload.systemId} degraded (${payload.previousStatus} → ${payload.newStatus}), evaluating downstream dependencies`,
156
+ );
157
+ await evaluateAndNotifyDownstream({
158
+ changedSystemId: payload.systemId,
159
+ db: typedDb,
160
+ dependencyService: service,
161
+ warningService,
162
+ fetchSystemStatuses,
163
+ catalogClient,
164
+ maintenanceClient,
165
+ incidentClient,
166
+ signalService,
167
+ logger,
168
+ });
169
+ },
170
+ {
171
+ mode: "work-queue",
172
+ workerGroup: "dependency-notification-evaluator",
173
+ },
174
+ );
175
+
176
+ onHook(
177
+ healthCheckHooks.systemHealthy,
178
+ async (payload) => {
179
+ logger.debug(
180
+ `Upstream ${payload.systemId} recovered, evaluating downstream dependencies`,
181
+ );
182
+ await evaluateAndNotifyDownstream({
183
+ changedSystemId: payload.systemId,
184
+ db: typedDb,
185
+ dependencyService: service,
186
+ warningService,
187
+ fetchSystemStatuses,
188
+ catalogClient,
189
+ maintenanceClient,
190
+ incidentClient,
191
+ signalService,
192
+ logger,
193
+ });
194
+ },
195
+ {
196
+ mode: "work-queue",
197
+ workerGroup: "dependency-notification-recovery",
198
+ },
199
+ );
200
+
201
+ logger.debug("✅ Dependency Backend afterPluginsReady complete.");
202
+ },
203
+ });
204
+ },
205
+ });
206
+
207
+ // Re-export hooks for other plugins to use
208
+ export { dependencyHooks } from "./hooks";
@@ -0,0 +1,339 @@
1
+ import type { Logger } from "@checkstack/backend-api";
2
+ import type { InferClient } from "@checkstack/common";
3
+ import { resolveRoute } from "@checkstack/common";
4
+ import type { CatalogApi } from "@checkstack/catalog-common";
5
+ import { catalogRoutes } from "@checkstack/catalog-common";
6
+ import type { MaintenanceApi } from "@checkstack/maintenance-common";
7
+ import type { IncidentApi } from "@checkstack/incident-common";
8
+ import type { DerivedState } from "@checkstack/dependency-common";
9
+ import { DEPENDENCY_WARNINGS_CHANGED } from "@checkstack/dependency-common";
10
+ import type { DependencyService } from "./services/dependency-service";
11
+ import type {
12
+ WarningEvaluationService,
13
+ SystemStatus,
14
+ } from "./services/warning-evaluation-service";
15
+ import type { SafeDatabase } from "@checkstack/backend-api";
16
+ import type { SignalService } from "@checkstack/signal-common";
17
+ import * as schema from "./schema";
18
+ import { dependencyDerivedStates } from "./schema";
19
+ import { eq } from "drizzle-orm";
20
+
21
+ type Db = SafeDatabase<typeof schema>;
22
+
23
+ /**
24
+ * Map derived state to notification importance.
25
+ */
26
+ function derivedStateToImportance(
27
+ derivedState: DerivedState,
28
+ ): "info" | "warning" | "critical" {
29
+ switch (derivedState) {
30
+ case "info": {
31
+ return "info";
32
+ }
33
+ case "degraded": {
34
+ return "warning";
35
+ }
36
+ case "down": {
37
+ return "critical";
38
+ }
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Generate notification title for a dependency-driven state change.
44
+ */
45
+ export function buildNotificationTitle({
46
+ derivedState,
47
+ isRecovery,
48
+ }: {
49
+ derivedState?: DerivedState;
50
+ isRecovery: boolean;
51
+ }): string {
52
+ if (isRecovery) {
53
+ return "Dependency impact resolved";
54
+ }
55
+
56
+ switch (derivedState) {
57
+ case "info": {
58
+ return "Upstream dependency issue (informational)";
59
+ }
60
+ case "degraded": {
61
+ return "Availability impacted by upstream dependency";
62
+ }
63
+ case "down": {
64
+ return "Availability critically impacted by upstream dependency";
65
+ }
66
+ default: {
67
+ return "Dependency impact changed";
68
+ }
69
+ }
70
+ }
71
+
72
+ /**
73
+ * Generate notification body for a dependency-driven state change.
74
+ */
75
+ export function buildNotificationBody({
76
+ upstreamNames,
77
+ derivedState,
78
+ isRecovery,
79
+ }: {
80
+ upstreamNames: string[];
81
+ derivedState?: DerivedState;
82
+ isRecovery: boolean;
83
+ }): string {
84
+ const upstreamList = upstreamNames.join(", ");
85
+
86
+ if (isRecovery) {
87
+ return "All upstream dependencies have recovered. This system is no longer affected by dependency failures.";
88
+ }
89
+
90
+ switch (derivedState) {
91
+ case "info": {
92
+ return `An upstream dependency (${upstreamList}) is experiencing issues. This is informational — no direct impact expected.`;
93
+ }
94
+ case "degraded": {
95
+ return `An upstream dependency (${upstreamList}) is experiencing issues. This system's availability may be degraded.`;
96
+ }
97
+ case "down": {
98
+ return `A critical upstream dependency (${upstreamList}) is down. This system is expected to be unavailable.`;
99
+ }
100
+ default: {
101
+ return `Upstream dependency status has changed (${upstreamList}).`;
102
+ }
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Evaluate downstream systems for dependency-driven state changes
108
+ * and notify subscribers when the derived state transitions.
109
+ *
110
+ * This is the Sidecar Notification Orchestration function.
111
+ * It runs when an upstream system's health status changes.
112
+ */
113
+ export async function evaluateAndNotifyDownstream({
114
+ changedSystemId,
115
+ db,
116
+ dependencyService,
117
+ warningService,
118
+ fetchSystemStatuses,
119
+ catalogClient,
120
+ maintenanceClient,
121
+ incidentClient,
122
+ signalService,
123
+ logger,
124
+ }: {
125
+ changedSystemId: string;
126
+ db: Db;
127
+ dependencyService: DependencyService;
128
+ warningService: WarningEvaluationService;
129
+ fetchSystemStatuses: (
130
+ systemIds: string[],
131
+ ) => Promise<Map<string, SystemStatus>>;
132
+ catalogClient: InferClient<typeof CatalogApi>;
133
+ maintenanceClient: InferClient<typeof MaintenanceApi>;
134
+ incidentClient: InferClient<typeof IncidentApi>;
135
+ signalService: SignalService;
136
+ logger: Logger;
137
+ }): Promise<void> {
138
+ try {
139
+ // 1. Find all downstream systems that depend on the changed system
140
+ const allDeps = await dependencyService.getAllDependencies();
141
+ const downstreamSystemIds = new Set<string>();
142
+
143
+ for (const dep of allDeps) {
144
+ if (dep.targetSystemId === changedSystemId) {
145
+ downstreamSystemIds.add(dep.sourceSystemId);
146
+ }
147
+ }
148
+
149
+ // Also check transitive downstream systems (systems that depend on
150
+ // systems that depend on the changed system, etc.)
151
+ const visited = new Set<string>();
152
+ const queue = [...downstreamSystemIds];
153
+ while (queue.length > 0) {
154
+ const systemId = queue.pop()!;
155
+ if (visited.has(systemId)) continue;
156
+ visited.add(systemId);
157
+ downstreamSystemIds.add(systemId);
158
+
159
+ // Find systems that depend on this system transitively
160
+ for (const dep of allDeps) {
161
+ if (
162
+ dep.targetSystemId === systemId &&
163
+ dep.transitive &&
164
+ !visited.has(dep.sourceSystemId)
165
+ ) {
166
+ queue.push(dep.sourceSystemId);
167
+ }
168
+ }
169
+ }
170
+
171
+ if (downstreamSystemIds.size === 0) {
172
+ return;
173
+ }
174
+
175
+ const downstreamIds = [...downstreamSystemIds];
176
+
177
+ // 2. Fetch all system IDs needed for evaluation
178
+ const allSystemIds = new Set<string>(downstreamIds);
179
+ for (const dep of allDeps) {
180
+ allSystemIds.add(dep.sourceSystemId);
181
+ allSystemIds.add(dep.targetSystemId);
182
+ }
183
+
184
+ const statuses = await fetchSystemStatuses([...allSystemIds]);
185
+
186
+ // 3. Evaluate current warnings for all downstream systems
187
+ const warningMap = warningService.evaluateWarnings({
188
+ systemIds: downstreamIds,
189
+ allDependencies: allDeps,
190
+ systemStatuses: statuses,
191
+ });
192
+
193
+ // 4. Load previous derived states from DB
194
+ const previousStates = await db
195
+ .select()
196
+ .from(dependencyDerivedStates)
197
+ .then((rows) => rows)
198
+ .catch(() => []);
199
+
200
+ // Build lookup from existing records — filter to relevant systems only
201
+ const previousStateMap = new Map<string, DerivedState>();
202
+ for (const row of previousStates) {
203
+ if (downstreamSystemIds.has(row.systemId)) {
204
+ previousStateMap.set(row.systemId, row.derivedState as DerivedState);
205
+ }
206
+ }
207
+
208
+ // 5. Check maintenance suppression on the upstream system that changed
209
+ let upstreamSuppressed = false;
210
+ try {
211
+ const { suppressed } =
212
+ await maintenanceClient.hasActiveMaintenanceWithSuppression({
213
+ systemId: changedSystemId,
214
+ });
215
+ upstreamSuppressed = suppressed;
216
+ } catch (error) {
217
+ logger.warn(
218
+ `Failed to check maintenance suppression for upstream ${changedSystemId}:`,
219
+ error,
220
+ );
221
+ }
222
+
223
+ // Also check incident suppression on the upstream
224
+ if (!upstreamSuppressed) {
225
+ try {
226
+ const { suppressed } =
227
+ await incidentClient.hasActiveIncidentWithSuppression({
228
+ systemId: changedSystemId,
229
+ });
230
+ upstreamSuppressed = suppressed;
231
+ } catch (error) {
232
+ logger.warn(
233
+ `Failed to check incident suppression for upstream ${changedSystemId}:`,
234
+ error,
235
+ );
236
+ }
237
+ }
238
+
239
+ // 6. Compare and notify for each downstream system
240
+ const changedSystemIds: string[] = [];
241
+ for (const systemId of downstreamIds) {
242
+ const currentWarning = warningMap.get(systemId);
243
+ const currentState = currentWarning?.derivedState;
244
+ const previousState = previousStateMap.get(systemId);
245
+
246
+ // No change — skip
247
+ if (currentState === previousState) {
248
+ continue;
249
+ }
250
+
251
+ // State changed — update DB first
252
+ await (currentState
253
+ ? db
254
+ .insert(dependencyDerivedStates)
255
+ .values({
256
+ systemId,
257
+ derivedState: currentState,
258
+ updatedAt: new Date(),
259
+ })
260
+ .onConflictDoUpdate({
261
+ target: dependencyDerivedStates.systemId,
262
+ set: {
263
+ derivedState: currentState,
264
+ updatedAt: new Date(),
265
+ },
266
+ })
267
+ : db
268
+ .delete(dependencyDerivedStates)
269
+ .where(eq(dependencyDerivedStates.systemId, systemId)));
270
+
271
+ changedSystemIds.push(systemId);
272
+
273
+ // Skip notifications if upstream is suppressed
274
+ if (upstreamSuppressed) {
275
+ logger.debug(
276
+ `Skipping dependency notification for ${systemId}: upstream ${changedSystemId} has suppression enabled`,
277
+ );
278
+ continue;
279
+ }
280
+
281
+ // Build notification
282
+ const isRecovery = !currentState && !!previousState;
283
+ const upstreamNames =
284
+ currentWarning?.affectedUpstreams.map(
285
+ (u) => u.systemName ?? u.systemId,
286
+ ) ?? [];
287
+
288
+ const title = buildNotificationTitle({
289
+ derivedState: currentState,
290
+ isRecovery,
291
+ });
292
+ const body = buildNotificationBody({
293
+ upstreamNames,
294
+ derivedState: currentState,
295
+ isRecovery,
296
+ });
297
+ const importance = isRecovery
298
+ ? ("info" as const)
299
+ : derivedStateToImportance(currentState!);
300
+
301
+ const systemDetailPath = resolveRoute(catalogRoutes.routes.systemDetail, {
302
+ systemId,
303
+ });
304
+
305
+ try {
306
+ await catalogClient.notifySystemSubscribers({
307
+ systemId,
308
+ title,
309
+ body,
310
+ importance,
311
+ action: { label: "View System", url: systemDetailPath },
312
+ includeGroupSubscribers: true,
313
+ });
314
+ logger.debug(
315
+ `Dependency notification sent: ${systemId} ${previousState ?? "none"} → ${currentState ?? "none"}`,
316
+ );
317
+ } catch (error) {
318
+ // Notifications are best-effort
319
+ logger.warn(
320
+ `Failed to send dependency notification for ${systemId}:`,
321
+ error,
322
+ );
323
+ }
324
+ }
325
+
326
+ // 7. Broadcast signal so frontends can react
327
+ if (changedSystemIds.length > 0) {
328
+ await signalService.broadcast(DEPENDENCY_WARNINGS_CHANGED, {
329
+ affectedSystemIds: changedSystemIds,
330
+ });
331
+ }
332
+ } catch (error) {
333
+ // Don't crash the hook handler
334
+ logger.error(
335
+ `Failed to evaluate dependency notifications for upstream ${changedSystemId}:`,
336
+ error,
337
+ );
338
+ }
339
+ }