@checkstack/healthcheck-backend 0.12.1 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/drizzle/0010_colorful_shinobi_shaw.sql +8 -0
- package/drizzle/meta/0010_snapshot.json +469 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +14 -13
- package/src/hooks.ts +10 -0
- package/src/index.ts +18 -4
- package/src/queue-executor.ts +26 -1
- package/src/realtime-aggregation.ts +12 -0
- package/src/router.test.ts +6 -5
- package/src/router.ts +44 -5
- package/src/schema.ts +31 -1
- package/src/service.ts +215 -27
package/src/queue-executor.ts
CHANGED
|
@@ -259,6 +259,8 @@ async function executeHealthCheckJob(props: {
|
|
|
259
259
|
interval: healthCheckConfigurations.intervalSeconds,
|
|
260
260
|
enabled: systemHealthChecks.enabled,
|
|
261
261
|
paused: healthCheckConfigurations.paused,
|
|
262
|
+
includeLocal: systemHealthChecks.includeLocal,
|
|
263
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
262
264
|
})
|
|
263
265
|
.from(systemHealthChecks)
|
|
264
266
|
.innerJoin(
|
|
@@ -289,6 +291,19 @@ async function executeHealthCheckJob(props: {
|
|
|
289
291
|
return;
|
|
290
292
|
}
|
|
291
293
|
|
|
294
|
+
// If includeLocal is false and satellites are assigned, skip local execution
|
|
295
|
+
// (satellites handle execution, local core doesn't run this check)
|
|
296
|
+
if (
|
|
297
|
+
!configRow.includeLocal &&
|
|
298
|
+
configRow.satelliteIds &&
|
|
299
|
+
configRow.satelliteIds.length > 0
|
|
300
|
+
) {
|
|
301
|
+
logger.debug(
|
|
302
|
+
`Health check ${configId} for system ${systemId} is satellite-only, skipping local execution`,
|
|
303
|
+
);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
|
|
292
307
|
// Fetch system name for signal payload
|
|
293
308
|
let systemName = systemId;
|
|
294
309
|
try {
|
|
@@ -395,6 +410,7 @@ async function executeHealthCheckJob(props: {
|
|
|
395
410
|
result: {
|
|
396
411
|
_collectorId: collectorEntry.collectorId,
|
|
397
412
|
_assertionFailed: assertionFailed,
|
|
413
|
+
_collectorError: collectorError,
|
|
398
414
|
...strippedResult,
|
|
399
415
|
},
|
|
400
416
|
};
|
|
@@ -410,7 +426,7 @@ async function executeHealthCheckJob(props: {
|
|
|
410
426
|
result: {
|
|
411
427
|
_collectorId: collectorEntry.collectorId,
|
|
412
428
|
_assertionFailed: undefined,
|
|
413
|
-
|
|
429
|
+
_collectorError: errorStr,
|
|
414
430
|
},
|
|
415
431
|
};
|
|
416
432
|
}
|
|
@@ -486,6 +502,8 @@ async function executeHealthCheckJob(props: {
|
|
|
486
502
|
status: result.status,
|
|
487
503
|
latencyMs: result.latencyMs,
|
|
488
504
|
result: { ...result } as Record<string, unknown>,
|
|
505
|
+
sourceId: undefined,
|
|
506
|
+
sourceLabel: "Local",
|
|
489
507
|
});
|
|
490
508
|
|
|
491
509
|
await incrementHourlyAggregate({
|
|
@@ -497,6 +515,7 @@ async function executeHealthCheckJob(props: {
|
|
|
497
515
|
runTimestamp: new Date(),
|
|
498
516
|
result: { ...result } as Record<string, unknown>,
|
|
499
517
|
collectorRegistry,
|
|
518
|
+
sourceLabel: "Local",
|
|
500
519
|
});
|
|
501
520
|
|
|
502
521
|
logger.debug(
|
|
@@ -560,6 +579,8 @@ async function executeHealthCheckJob(props: {
|
|
|
560
579
|
status: result.status,
|
|
561
580
|
latencyMs: result.latencyMs,
|
|
562
581
|
result: { ...result } as Record<string, unknown>,
|
|
582
|
+
sourceId: undefined,
|
|
583
|
+
sourceLabel: "Local",
|
|
563
584
|
});
|
|
564
585
|
|
|
565
586
|
// Trigger incremental hourly aggregation
|
|
@@ -572,6 +593,7 @@ async function executeHealthCheckJob(props: {
|
|
|
572
593
|
runTimestamp: new Date(),
|
|
573
594
|
result: { ...result } as Record<string, unknown>,
|
|
574
595
|
collectorRegistry,
|
|
596
|
+
sourceLabel: "Local",
|
|
575
597
|
});
|
|
576
598
|
|
|
577
599
|
logger.debug(
|
|
@@ -660,6 +682,8 @@ async function executeHealthCheckJob(props: {
|
|
|
660
682
|
systemId,
|
|
661
683
|
status: "unhealthy",
|
|
662
684
|
result: { error: String(error) } as Record<string, unknown>,
|
|
685
|
+
sourceId: undefined,
|
|
686
|
+
sourceLabel: "Local",
|
|
663
687
|
});
|
|
664
688
|
|
|
665
689
|
// Trigger incremental hourly aggregation
|
|
@@ -672,6 +696,7 @@ async function executeHealthCheckJob(props: {
|
|
|
672
696
|
runTimestamp: new Date(),
|
|
673
697
|
// No collector data for error cases
|
|
674
698
|
collectorRegistry,
|
|
699
|
+
sourceLabel: "Local",
|
|
675
700
|
});
|
|
676
701
|
|
|
677
702
|
// Try to fetch names for the enriched signal (best-effort)
|
|
@@ -67,6 +67,10 @@ interface IncrementHourlyAggregateParams {
|
|
|
67
67
|
result?: Record<string, unknown>;
|
|
68
68
|
/** Collector registry for aggregating collector data via mergeResult */
|
|
69
69
|
collectorRegistry?: CollectorRegistry;
|
|
70
|
+
/** Source identifier: undefined = local core, string = satellite ID */
|
|
71
|
+
sourceId?: string;
|
|
72
|
+
/** Human-readable source label for display */
|
|
73
|
+
sourceLabel?: string;
|
|
70
74
|
}
|
|
71
75
|
|
|
72
76
|
/**
|
|
@@ -88,6 +92,8 @@ export async function incrementHourlyAggregate(
|
|
|
88
92
|
runTimestamp,
|
|
89
93
|
result,
|
|
90
94
|
collectorRegistry,
|
|
95
|
+
sourceId,
|
|
96
|
+
sourceLabel,
|
|
91
97
|
} = params;
|
|
92
98
|
|
|
93
99
|
const bucketStart = getHourBucketStart(runTimestamp);
|
|
@@ -107,6 +113,9 @@ export async function incrementHourlyAggregate(
|
|
|
107
113
|
eq(healthCheckAggregates.configurationId, configurationId),
|
|
108
114
|
eq(healthCheckAggregates.bucketStart, bucketStart),
|
|
109
115
|
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
116
|
+
sourceId
|
|
117
|
+
? eq(healthCheckAggregates.sourceId, sourceId)
|
|
118
|
+
: sql`${healthCheckAggregates.sourceId} IS NULL`,
|
|
110
119
|
),
|
|
111
120
|
)
|
|
112
121
|
.limit(1);
|
|
@@ -181,6 +190,8 @@ export async function incrementHourlyAggregate(
|
|
|
181
190
|
p95LatencyMs: latencyUpdate?.p95,
|
|
182
191
|
tdigestState: latencyUpdate?.tdigestState,
|
|
183
192
|
aggregatedResult,
|
|
193
|
+
sourceId: sourceId ?? undefined,
|
|
194
|
+
sourceLabel: sourceLabel ?? undefined,
|
|
184
195
|
})
|
|
185
196
|
.onConflictDoUpdate({
|
|
186
197
|
target: [
|
|
@@ -188,6 +199,7 @@ export async function incrementHourlyAggregate(
|
|
|
188
199
|
healthCheckAggregates.systemId,
|
|
189
200
|
healthCheckAggregates.bucketStart,
|
|
190
201
|
healthCheckAggregates.bucketSize,
|
|
202
|
+
healthCheckAggregates.sourceId,
|
|
191
203
|
],
|
|
192
204
|
set: {
|
|
193
205
|
runCount: sql`${healthCheckAggregates.runCount} + 1`,
|
package/src/router.test.ts
CHANGED
|
@@ -50,11 +50,12 @@ describe("HealthCheck Router", () => {
|
|
|
50
50
|
getCollectorsForPlugin: mock(() => []),
|
|
51
51
|
};
|
|
52
52
|
|
|
53
|
-
const router = createHealthCheckRouter(
|
|
54
|
-
mockDb as never,
|
|
55
|
-
mockRegistry,
|
|
56
|
-
mockCollectorRegistry as never,
|
|
57
|
-
|
|
53
|
+
const router = createHealthCheckRouter({
|
|
54
|
+
database: mockDb as never,
|
|
55
|
+
registry: mockRegistry,
|
|
56
|
+
collectorRegistry: mockCollectorRegistry as never,
|
|
57
|
+
getEmitHook: () => undefined,
|
|
58
|
+
});
|
|
58
59
|
|
|
59
60
|
it("getStrategies returns strategies from registry", async () => {
|
|
60
61
|
const context = createMockRpcContext({
|
package/src/router.ts
CHANGED
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
import { healthCheckContract } from "@checkstack/healthcheck-common";
|
|
11
11
|
import type { StrategyCategory } from "@checkstack/healthcheck-common";
|
|
12
12
|
import { HealthCheckService } from "./service";
|
|
13
|
+
import { healthCheckHooks } from "./hooks";
|
|
13
14
|
import * as schema from "./schema";
|
|
14
15
|
import { toJsonSchemaWithChartMeta } from "./schema-utils";
|
|
15
16
|
|
|
@@ -19,11 +20,13 @@ import { toJsonSchemaWithChartMeta } from "./schema-utils";
|
|
|
19
20
|
* Auth and access rules are automatically enforced via autoAuthMiddleware
|
|
20
21
|
* based on the contract's meta.userType and meta.access.
|
|
21
22
|
*/
|
|
22
|
-
export const createHealthCheckRouter = (
|
|
23
|
-
database: SafeDatabase<typeof schema
|
|
24
|
-
registry: HealthCheckRegistry
|
|
25
|
-
collectorRegistry: CollectorRegistry
|
|
26
|
-
) => {
|
|
23
|
+
export const createHealthCheckRouter = (opts: {
|
|
24
|
+
database: SafeDatabase<typeof schema>;
|
|
25
|
+
registry: HealthCheckRegistry;
|
|
26
|
+
collectorRegistry: CollectorRegistry;
|
|
27
|
+
getEmitHook: () => ((hook: { id: string }, payload: Record<string, unknown>) => Promise<void>) | undefined;
|
|
28
|
+
}) => {
|
|
29
|
+
const { database, registry, collectorRegistry, getEmitHook } = opts;
|
|
27
30
|
// Create service instance once - shared across all handlers
|
|
28
31
|
const service = new HealthCheckService(database, registry, collectorRegistry);
|
|
29
32
|
|
|
@@ -137,6 +140,8 @@ export const createHealthCheckRouter = (
|
|
|
137
140
|
configurationId: input.body.configurationId,
|
|
138
141
|
enabled: input.body.enabled,
|
|
139
142
|
stateThresholds: input.body.stateThresholds,
|
|
143
|
+
satelliteIds: input.body.satelliteIds,
|
|
144
|
+
includeLocal: input.body.includeLocal,
|
|
140
145
|
});
|
|
141
146
|
|
|
142
147
|
// If enabling the health check, schedule it immediately
|
|
@@ -156,10 +161,28 @@ export const createHealthCheckRouter = (
|
|
|
156
161
|
});
|
|
157
162
|
}
|
|
158
163
|
}
|
|
164
|
+
|
|
165
|
+
// Notify subscribers (e.g., satellite-backend) that assignments changed
|
|
166
|
+
const emitHook = getEmitHook();
|
|
167
|
+
if (emitHook) {
|
|
168
|
+
await emitHook(healthCheckHooks.assignmentChanged, {
|
|
169
|
+
systemId: input.systemId,
|
|
170
|
+
configurationId: input.body.configurationId,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
159
173
|
}),
|
|
160
174
|
|
|
161
175
|
disassociateSystem: os.disassociateSystem.handler(async ({ input }) => {
|
|
162
176
|
await service.disassociateSystem(input.systemId, input.configId);
|
|
177
|
+
|
|
178
|
+
// Notify subscribers that assignments changed
|
|
179
|
+
const emitHook = getEmitHook();
|
|
180
|
+
if (emitHook) {
|
|
181
|
+
await emitHook(healthCheckHooks.assignmentChanged, {
|
|
182
|
+
systemId: input.systemId,
|
|
183
|
+
configurationId: input.configId,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
163
186
|
}),
|
|
164
187
|
|
|
165
188
|
getRetentionConfig: os.getRetentionConfig.handler(async ({ input }) => {
|
|
@@ -230,6 +253,22 @@ export const createHealthCheckRouter = (
|
|
|
230
253
|
return service.getSystemHealthOverview(input.systemId);
|
|
231
254
|
},
|
|
232
255
|
),
|
|
256
|
+
|
|
257
|
+
// ========================================================================
|
|
258
|
+
// SERVICE INTERFACE (S2S — satellite-backend)
|
|
259
|
+
// ========================================================================
|
|
260
|
+
|
|
261
|
+
getAssignmentsForSatellite: os.getAssignmentsForSatellite.handler(
|
|
262
|
+
async ({ input }) => {
|
|
263
|
+
return service.getAssignmentsForSatellite(input.satelliteId);
|
|
264
|
+
},
|
|
265
|
+
),
|
|
266
|
+
|
|
267
|
+
ingestSatelliteResult: os.ingestSatelliteResult.handler(
|
|
268
|
+
async ({ input }) => {
|
|
269
|
+
await service.ingestSatelliteResult(input);
|
|
270
|
+
},
|
|
271
|
+
),
|
|
233
272
|
});
|
|
234
273
|
};
|
|
235
274
|
|
package/src/schema.ts
CHANGED
|
@@ -90,6 +90,16 @@ export const systemHealthChecks = pgTable(
|
|
|
90
90
|
* Null means use default retention settings.
|
|
91
91
|
*/
|
|
92
92
|
retentionConfig: jsonb("retention_config").$type<RetentionConfig>(),
|
|
93
|
+
/**
|
|
94
|
+
* IDs of satellites assigned to execute this health check.
|
|
95
|
+
* When set, the check runs on these satellite nodes in addition to (or instead of) the core.
|
|
96
|
+
*/
|
|
97
|
+
satelliteIds: jsonb("satellite_ids").$type<string[]>(),
|
|
98
|
+
/**
|
|
99
|
+
* Whether to also run this check locally on the core instance.
|
|
100
|
+
* Defaults to true. Only relevant when satelliteIds is set.
|
|
101
|
+
*/
|
|
102
|
+
includeLocal: boolean("include_local").default(true).notNull(),
|
|
93
103
|
createdAt: timestamp("created_at").defaultNow().notNull(),
|
|
94
104
|
updatedAt: timestamp("updated_at").defaultNow().notNull(),
|
|
95
105
|
},
|
|
@@ -108,6 +118,16 @@ export const healthCheckRuns = pgTable("health_check_runs", {
|
|
|
108
118
|
/** Execution duration in milliseconds */
|
|
109
119
|
latencyMs: integer("latency_ms"),
|
|
110
120
|
result: jsonb("result").$type<Record<string, unknown>>(),
|
|
121
|
+
/**
|
|
122
|
+
* Source identifier for result attribution.
|
|
123
|
+
* null = local core execution, UUID = satellite ID.
|
|
124
|
+
*/
|
|
125
|
+
sourceId: text("source_id"),
|
|
126
|
+
/**
|
|
127
|
+
* Human-readable source label for UI display.
|
|
128
|
+
* e.g. "Local" or "EU West (eu-west-1)".
|
|
129
|
+
*/
|
|
130
|
+
sourceLabel: text("source_label"),
|
|
111
131
|
timestamp: timestamp("timestamp").defaultNow().notNull(),
|
|
112
132
|
});
|
|
113
133
|
|
|
@@ -151,14 +171,24 @@ export const healthCheckAggregates = pgTable(
|
|
|
151
171
|
jsonb("aggregated_result").$type<Record<string, unknown>>(),
|
|
152
172
|
/** Serialized t-digest state for incremental p95 calculation */
|
|
153
173
|
tdigestState: jsonb("tdigest_state").$type<number[]>(),
|
|
174
|
+
/**
|
|
175
|
+
* Source identifier for per-region aggregation.
|
|
176
|
+
* null = local core execution, UUID = satellite ID.
|
|
177
|
+
*/
|
|
178
|
+
sourceId: text("source_id"),
|
|
179
|
+
/**
|
|
180
|
+
* Human-readable source label for UI display.
|
|
181
|
+
*/
|
|
182
|
+
sourceLabel: text("source_label"),
|
|
154
183
|
},
|
|
155
184
|
(t) => ({
|
|
156
|
-
// Unique constraint for
|
|
185
|
+
// Unique constraint includes sourceId for per-region aggregation
|
|
157
186
|
bucketUnique: uniqueIndex("health_check_aggregates_bucket_unique").on(
|
|
158
187
|
t.configurationId,
|
|
159
188
|
t.systemId,
|
|
160
189
|
t.bucketStart,
|
|
161
190
|
t.bucketSize,
|
|
191
|
+
t.sourceId,
|
|
162
192
|
),
|
|
163
193
|
}),
|
|
164
194
|
);
|
package/src/service.ts
CHANGED
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
StateThresholds,
|
|
6
6
|
HealthCheckStatus,
|
|
7
7
|
RetentionConfig,
|
|
8
|
+
type HealthCheckRunResult,
|
|
8
9
|
} from "@checkstack/healthcheck-common";
|
|
9
10
|
import {
|
|
10
11
|
healthCheckConfigurations,
|
|
@@ -14,10 +15,11 @@ import {
|
|
|
14
15
|
VersionedStateThresholds,
|
|
15
16
|
} from "./schema";
|
|
16
17
|
import * as schema from "./schema";
|
|
17
|
-
import { eq, and, InferSelectModel, desc, gte, lte } from "drizzle-orm";
|
|
18
|
+
import { eq, and, InferSelectModel, desc, gte, lte, isNull } from "drizzle-orm";
|
|
18
19
|
import { ORPCError } from "@orpc/server";
|
|
19
20
|
import { evaluateHealthStatus } from "./state-evaluator";
|
|
20
21
|
import { stateThresholds } from "./state-thresholds-migrations";
|
|
22
|
+
import { incrementHourlyAggregate } from "./realtime-aggregation";
|
|
21
23
|
import type {
|
|
22
24
|
HealthCheckRegistry,
|
|
23
25
|
SafeDatabase,
|
|
@@ -129,12 +131,16 @@ export class HealthCheckService {
|
|
|
129
131
|
configurationId: string;
|
|
130
132
|
enabled?: boolean;
|
|
131
133
|
stateThresholds?: StateThresholds;
|
|
134
|
+
satelliteIds?: string[];
|
|
135
|
+
includeLocal?: boolean;
|
|
132
136
|
}) {
|
|
133
137
|
const {
|
|
134
138
|
systemId,
|
|
135
139
|
configurationId,
|
|
136
140
|
enabled = true,
|
|
137
141
|
stateThresholds: stateThresholds_,
|
|
142
|
+
satelliteIds,
|
|
143
|
+
includeLocal = true,
|
|
138
144
|
} = props;
|
|
139
145
|
|
|
140
146
|
// Wrap thresholds in versioned config if provided
|
|
@@ -148,6 +154,8 @@ export class HealthCheckService {
|
|
|
148
154
|
configurationId,
|
|
149
155
|
enabled,
|
|
150
156
|
stateThresholds: versionedThresholds,
|
|
157
|
+
satelliteIds: satelliteIds ?? undefined,
|
|
158
|
+
includeLocal,
|
|
151
159
|
})
|
|
152
160
|
.onConflictDoUpdate({
|
|
153
161
|
target: [
|
|
@@ -157,6 +165,8 @@ export class HealthCheckService {
|
|
|
157
165
|
set: {
|
|
158
166
|
enabled,
|
|
159
167
|
stateThresholds: versionedThresholds,
|
|
168
|
+
satelliteIds: satelliteIds ?? undefined,
|
|
169
|
+
includeLocal,
|
|
160
170
|
updatedAt: new Date(),
|
|
161
171
|
},
|
|
162
172
|
});
|
|
@@ -270,6 +280,8 @@ export class HealthCheckService {
|
|
|
270
280
|
configName: healthCheckConfigurations.name,
|
|
271
281
|
enabled: systemHealthChecks.enabled,
|
|
272
282
|
stateThresholds: systemHealthChecks.stateThresholds,
|
|
283
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
284
|
+
includeLocal: systemHealthChecks.includeLocal,
|
|
273
285
|
})
|
|
274
286
|
.from(systemHealthChecks)
|
|
275
287
|
.innerJoin(
|
|
@@ -290,6 +302,8 @@ export class HealthCheckService {
|
|
|
290
302
|
configurationName: row.configName,
|
|
291
303
|
enabled: row.enabled,
|
|
292
304
|
stateThresholds: thresholds,
|
|
305
|
+
satelliteIds: row.satelliteIds ?? undefined,
|
|
306
|
+
includeLocal: row.includeLocal,
|
|
293
307
|
});
|
|
294
308
|
}
|
|
295
309
|
return results;
|
|
@@ -474,6 +488,7 @@ export class HealthCheckService {
|
|
|
474
488
|
configurationId?: string;
|
|
475
489
|
startDate?: Date;
|
|
476
490
|
endDate?: Date;
|
|
491
|
+
sourceFilter?: string;
|
|
477
492
|
limit?: number;
|
|
478
493
|
offset?: number;
|
|
479
494
|
sortOrder: "asc" | "desc";
|
|
@@ -483,6 +498,7 @@ export class HealthCheckService {
|
|
|
483
498
|
configurationId,
|
|
484
499
|
startDate,
|
|
485
500
|
endDate,
|
|
501
|
+
sourceFilter,
|
|
486
502
|
limit = 10,
|
|
487
503
|
offset = 0,
|
|
488
504
|
sortOrder,
|
|
@@ -495,6 +511,13 @@ export class HealthCheckService {
|
|
|
495
511
|
if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
|
|
496
512
|
if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
|
|
497
513
|
|
|
514
|
+
// Source filtering: "local" = no sourceId, UUID = specific satellite
|
|
515
|
+
if (sourceFilter === "local") {
|
|
516
|
+
conditions.push(isNull(healthCheckRuns.sourceId));
|
|
517
|
+
} else if (sourceFilter) {
|
|
518
|
+
conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
|
|
519
|
+
}
|
|
520
|
+
|
|
498
521
|
// Build where clause
|
|
499
522
|
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
500
523
|
|
|
@@ -522,6 +545,8 @@ export class HealthCheckService {
|
|
|
522
545
|
status: run.status,
|
|
523
546
|
timestamp: run.timestamp,
|
|
524
547
|
latencyMs: run.latencyMs ?? undefined,
|
|
548
|
+
sourceId: run.sourceId ?? undefined,
|
|
549
|
+
sourceLabel: run.sourceLabel ?? undefined,
|
|
525
550
|
})),
|
|
526
551
|
total,
|
|
527
552
|
};
|
|
@@ -537,6 +562,7 @@ export class HealthCheckService {
|
|
|
537
562
|
configurationId?: string;
|
|
538
563
|
startDate?: Date;
|
|
539
564
|
endDate?: Date;
|
|
565
|
+
sourceFilter?: string;
|
|
540
566
|
limit?: number;
|
|
541
567
|
offset?: number;
|
|
542
568
|
sortOrder: "asc" | "desc";
|
|
@@ -546,6 +572,7 @@ export class HealthCheckService {
|
|
|
546
572
|
configurationId,
|
|
547
573
|
startDate,
|
|
548
574
|
endDate,
|
|
575
|
+
sourceFilter,
|
|
549
576
|
limit = 10,
|
|
550
577
|
offset = 0,
|
|
551
578
|
sortOrder,
|
|
@@ -558,6 +585,13 @@ export class HealthCheckService {
|
|
|
558
585
|
if (startDate) conditions.push(gte(healthCheckRuns.timestamp, startDate));
|
|
559
586
|
if (endDate) conditions.push(lte(healthCheckRuns.timestamp, endDate));
|
|
560
587
|
|
|
588
|
+
// Source filtering: "local" = no sourceId, UUID = specific satellite
|
|
589
|
+
if (sourceFilter === "local") {
|
|
590
|
+
conditions.push(isNull(healthCheckRuns.sourceId));
|
|
591
|
+
} else if (sourceFilter) {
|
|
592
|
+
conditions.push(eq(healthCheckRuns.sourceId, sourceFilter));
|
|
593
|
+
}
|
|
594
|
+
|
|
561
595
|
const whereClause = conditions.length > 0 ? and(...conditions) : undefined;
|
|
562
596
|
const total = await this.db.$count(healthCheckRuns, whereClause);
|
|
563
597
|
|
|
@@ -582,6 +616,8 @@ export class HealthCheckService {
|
|
|
582
616
|
result: run.result ?? {},
|
|
583
617
|
timestamp: run.timestamp,
|
|
584
618
|
latencyMs: run.latencyMs ?? undefined,
|
|
619
|
+
sourceId: run.sourceId ?? undefined,
|
|
620
|
+
sourceLabel: run.sourceLabel ?? undefined,
|
|
585
621
|
})),
|
|
586
622
|
total,
|
|
587
623
|
};
|
|
@@ -610,6 +646,8 @@ export class HealthCheckService {
|
|
|
610
646
|
result: r.result ?? {},
|
|
611
647
|
timestamp: r.timestamp,
|
|
612
648
|
latencyMs: r.latencyMs ?? undefined,
|
|
649
|
+
sourceId: r.sourceId ?? undefined,
|
|
650
|
+
sourceLabel: r.sourceLabel ?? undefined,
|
|
613
651
|
};
|
|
614
652
|
}
|
|
615
653
|
|
|
@@ -624,6 +662,7 @@ export class HealthCheckService {
|
|
|
624
662
|
configurationId: string;
|
|
625
663
|
startDate: Date;
|
|
626
664
|
endDate: Date;
|
|
665
|
+
sourceFilter?: string;
|
|
627
666
|
targetPoints?: number;
|
|
628
667
|
},
|
|
629
668
|
options: { includeAggregatedResult: boolean },
|
|
@@ -633,6 +672,7 @@ export class HealthCheckService {
|
|
|
633
672
|
configurationId,
|
|
634
673
|
startDate,
|
|
635
674
|
endDate,
|
|
675
|
+
sourceFilter,
|
|
636
676
|
targetPoints = 500,
|
|
637
677
|
} = props;
|
|
638
678
|
|
|
@@ -655,48 +695,66 @@ export class HealthCheckService {
|
|
|
655
695
|
? this.registry.getStrategy(config.strategyId)
|
|
656
696
|
: undefined;
|
|
657
697
|
|
|
698
|
+
// Build source condition for raw runs
|
|
699
|
+
const rawConditions = [
|
|
700
|
+
eq(healthCheckRuns.systemId, systemId),
|
|
701
|
+
eq(healthCheckRuns.configurationId, configurationId),
|
|
702
|
+
gte(healthCheckRuns.timestamp, startDate),
|
|
703
|
+
lte(healthCheckRuns.timestamp, endDate),
|
|
704
|
+
...(sourceFilter === "local"
|
|
705
|
+
? [isNull(healthCheckRuns.sourceId)]
|
|
706
|
+
: sourceFilter
|
|
707
|
+
? [eq(healthCheckRuns.sourceId, sourceFilter)]
|
|
708
|
+
: []),
|
|
709
|
+
];
|
|
710
|
+
|
|
711
|
+
// Build source condition for hourly aggregates
|
|
712
|
+
const hourlyConditions = [
|
|
713
|
+
eq(healthCheckAggregates.systemId, systemId),
|
|
714
|
+
eq(healthCheckAggregates.configurationId, configurationId),
|
|
715
|
+
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
716
|
+
gte(healthCheckAggregates.bucketStart, startDate),
|
|
717
|
+
lte(healthCheckAggregates.bucketStart, endDate),
|
|
718
|
+
...(sourceFilter === "local"
|
|
719
|
+
? [isNull(healthCheckAggregates.sourceId)]
|
|
720
|
+
: sourceFilter
|
|
721
|
+
? [eq(healthCheckAggregates.sourceId, sourceFilter)]
|
|
722
|
+
: []),
|
|
723
|
+
];
|
|
724
|
+
|
|
725
|
+
// Build source condition for daily aggregates
|
|
726
|
+
const dailyConditions = [
|
|
727
|
+
eq(healthCheckAggregates.systemId, systemId),
|
|
728
|
+
eq(healthCheckAggregates.configurationId, configurationId),
|
|
729
|
+
eq(healthCheckAggregates.bucketSize, "daily"),
|
|
730
|
+
gte(healthCheckAggregates.bucketStart, startDate),
|
|
731
|
+
lte(healthCheckAggregates.bucketStart, endDate),
|
|
732
|
+
...(sourceFilter === "local"
|
|
733
|
+
? [isNull(healthCheckAggregates.sourceId)]
|
|
734
|
+
: sourceFilter
|
|
735
|
+
? [eq(healthCheckAggregates.sourceId, sourceFilter)]
|
|
736
|
+
: []),
|
|
737
|
+
];
|
|
738
|
+
|
|
658
739
|
// Query all three tiers in parallel
|
|
659
740
|
const [rawRuns, hourlyAggregates, dailyAggregates] = await Promise.all([
|
|
660
741
|
// Raw runs
|
|
661
742
|
this.db
|
|
662
743
|
.select()
|
|
663
744
|
.from(healthCheckRuns)
|
|
664
|
-
.where(
|
|
665
|
-
and(
|
|
666
|
-
eq(healthCheckRuns.systemId, systemId),
|
|
667
|
-
eq(healthCheckRuns.configurationId, configurationId),
|
|
668
|
-
gte(healthCheckRuns.timestamp, startDate),
|
|
669
|
-
lte(healthCheckRuns.timestamp, endDate),
|
|
670
|
-
),
|
|
671
|
-
)
|
|
745
|
+
.where(and(...rawConditions))
|
|
672
746
|
.orderBy(healthCheckRuns.timestamp),
|
|
673
747
|
// Hourly aggregates
|
|
674
748
|
this.db
|
|
675
749
|
.select()
|
|
676
750
|
.from(healthCheckAggregates)
|
|
677
|
-
.where(
|
|
678
|
-
and(
|
|
679
|
-
eq(healthCheckAggregates.systemId, systemId),
|
|
680
|
-
eq(healthCheckAggregates.configurationId, configurationId),
|
|
681
|
-
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
682
|
-
gte(healthCheckAggregates.bucketStart, startDate),
|
|
683
|
-
lte(healthCheckAggregates.bucketStart, endDate),
|
|
684
|
-
),
|
|
685
|
-
)
|
|
751
|
+
.where(and(...hourlyConditions))
|
|
686
752
|
.orderBy(healthCheckAggregates.bucketStart),
|
|
687
753
|
// Daily aggregates
|
|
688
754
|
this.db
|
|
689
755
|
.select()
|
|
690
756
|
.from(healthCheckAggregates)
|
|
691
|
-
.where(
|
|
692
|
-
and(
|
|
693
|
-
eq(healthCheckAggregates.systemId, systemId),
|
|
694
|
-
eq(healthCheckAggregates.configurationId, configurationId),
|
|
695
|
-
eq(healthCheckAggregates.bucketSize, "daily"),
|
|
696
|
-
gte(healthCheckAggregates.bucketStart, startDate),
|
|
697
|
-
lte(healthCheckAggregates.bucketStart, endDate),
|
|
698
|
-
),
|
|
699
|
-
)
|
|
757
|
+
.where(and(...dailyConditions))
|
|
700
758
|
.orderBy(healthCheckAggregates.bucketStart),
|
|
701
759
|
]);
|
|
702
760
|
|
|
@@ -953,4 +1011,134 @@ export class HealthCheckService {
|
|
|
953
1011
|
updatedAt: row.updatedAt,
|
|
954
1012
|
};
|
|
955
1013
|
}
|
|
1014
|
+
|
|
1015
|
+
/**
|
|
1016
|
+
* Remove a satellite ID from all systemHealthChecks.satelliteIds arrays.
|
|
1017
|
+
* Called when a satellite is deleted via the satellite.removed hook.
|
|
1018
|
+
*/
|
|
1019
|
+
async scrubSatelliteFromAssociations(satelliteId: string): Promise<void> {
|
|
1020
|
+
// Get all associations that reference this satellite
|
|
1021
|
+
const associations = await this.db
|
|
1022
|
+
.select({
|
|
1023
|
+
systemId: systemHealthChecks.systemId,
|
|
1024
|
+
configurationId: systemHealthChecks.configurationId,
|
|
1025
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
1026
|
+
})
|
|
1027
|
+
.from(systemHealthChecks);
|
|
1028
|
+
|
|
1029
|
+
// Update each association that contains this satellite ID
|
|
1030
|
+
for (const assoc of associations) {
|
|
1031
|
+
if (!assoc.satelliteIds?.includes(satelliteId)) continue;
|
|
1032
|
+
|
|
1033
|
+
const updated = assoc.satelliteIds.filter((id) => id !== satelliteId);
|
|
1034
|
+
await this.db
|
|
1035
|
+
.update(systemHealthChecks)
|
|
1036
|
+
.set({
|
|
1037
|
+
satelliteIds: updated.length > 0 ? updated : undefined,
|
|
1038
|
+
updatedAt: new Date(),
|
|
1039
|
+
})
|
|
1040
|
+
.where(
|
|
1041
|
+
and(
|
|
1042
|
+
eq(systemHealthChecks.systemId, assoc.systemId),
|
|
1043
|
+
eq(systemHealthChecks.configurationId, assoc.configurationId),
|
|
1044
|
+
),
|
|
1045
|
+
);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
/**
|
|
1050
|
+
* Get all health check assignments for a specific satellite.
|
|
1051
|
+
* Returns the full configuration payload needed for the satellite to execute checks.
|
|
1052
|
+
*/
|
|
1053
|
+
async getAssignmentsForSatellite(satelliteId: string) {
|
|
1054
|
+
// Get all associations that reference this satellite
|
|
1055
|
+
const associations = await this.db
|
|
1056
|
+
.select({
|
|
1057
|
+
systemId: systemHealthChecks.systemId,
|
|
1058
|
+
configurationId: systemHealthChecks.configurationId,
|
|
1059
|
+
satelliteIds: systemHealthChecks.satelliteIds,
|
|
1060
|
+
enabled: systemHealthChecks.enabled,
|
|
1061
|
+
})
|
|
1062
|
+
.from(systemHealthChecks);
|
|
1063
|
+
|
|
1064
|
+
// Filter to associations that include this satellite and are enabled
|
|
1065
|
+
const matchingAssociations = associations.filter(
|
|
1066
|
+
(a) => a.enabled && a.satelliteIds?.includes(satelliteId),
|
|
1067
|
+
);
|
|
1068
|
+
|
|
1069
|
+
if (matchingAssociations.length === 0) return [];
|
|
1070
|
+
|
|
1071
|
+
// Get configurations for each matching association
|
|
1072
|
+
const assignments = [];
|
|
1073
|
+
for (const assoc of matchingAssociations) {
|
|
1074
|
+
const [config] = await this.db
|
|
1075
|
+
.select()
|
|
1076
|
+
.from(healthCheckConfigurations)
|
|
1077
|
+
.where(eq(healthCheckConfigurations.id, assoc.configurationId));
|
|
1078
|
+
|
|
1079
|
+
if (!config || config.paused) continue;
|
|
1080
|
+
|
|
1081
|
+
assignments.push({
|
|
1082
|
+
configId: config.id,
|
|
1083
|
+
systemId: assoc.systemId,
|
|
1084
|
+
strategyId: config.strategyId,
|
|
1085
|
+
config: config.config,
|
|
1086
|
+
collectors: config.collectors ?? undefined,
|
|
1087
|
+
intervalSeconds: config.intervalSeconds,
|
|
1088
|
+
});
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
return assignments;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
/**
|
|
1095
|
+
* Ingest a health check result from a satellite.
|
|
1096
|
+
* Stores the run with source attribution (sourceId + sourceLabel)
|
|
1097
|
+
* and triggers incremental aggregation to keep charts/availability current.
|
|
1098
|
+
*/
|
|
1099
|
+
async ingestSatelliteResult(props: {
|
|
1100
|
+
configId: string;
|
|
1101
|
+
systemId: string;
|
|
1102
|
+
status: HealthCheckStatus;
|
|
1103
|
+
latencyMs?: number;
|
|
1104
|
+
result?: HealthCheckRunResult;
|
|
1105
|
+
executedAt: string;
|
|
1106
|
+
sourceId: string;
|
|
1107
|
+
sourceLabel: string;
|
|
1108
|
+
}) {
|
|
1109
|
+
const {
|
|
1110
|
+
configId,
|
|
1111
|
+
systemId,
|
|
1112
|
+
status,
|
|
1113
|
+
latencyMs,
|
|
1114
|
+
result,
|
|
1115
|
+
sourceId,
|
|
1116
|
+
sourceLabel,
|
|
1117
|
+
} = props;
|
|
1118
|
+
|
|
1119
|
+
const resultRecord = result ? { ...result } as Record<string, unknown> : {};
|
|
1120
|
+
|
|
1121
|
+
await this.db.insert(healthCheckRuns).values({
|
|
1122
|
+
configurationId: configId,
|
|
1123
|
+
systemId,
|
|
1124
|
+
status,
|
|
1125
|
+
latencyMs,
|
|
1126
|
+
result: resultRecord,
|
|
1127
|
+
sourceId,
|
|
1128
|
+
sourceLabel,
|
|
1129
|
+
});
|
|
1130
|
+
|
|
1131
|
+
// Trigger incremental hourly aggregation — same as local executor
|
|
1132
|
+
await incrementHourlyAggregate({
|
|
1133
|
+
db: this.db,
|
|
1134
|
+
systemId,
|
|
1135
|
+
configurationId: configId,
|
|
1136
|
+
status,
|
|
1137
|
+
latencyMs,
|
|
1138
|
+
runTimestamp: new Date(props.executedAt),
|
|
1139
|
+
result: resultRecord,
|
|
1140
|
+
collectorRegistry: this.collectorRegistry,
|
|
1141
|
+
sourceLabel,
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
956
1144
|
}
|