@checkstack/healthcheck-backend 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +223 -0
- package/drizzle/0018_abnormal_preak.sql +10 -0
- package/drizzle/meta/0018_snapshot.json +600 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +26 -21
- package/src/ai/assertion-validation.test.ts +117 -0
- package/src/ai/assertion-validation.ts +147 -0
- package/src/ai/healthcheck-capabilities.test.ts +158 -0
- package/src/ai/healthcheck-capabilities.ts +217 -0
- package/src/ai/healthcheck-delete.test.ts +81 -0
- package/src/ai/healthcheck-delete.ts +81 -0
- package/src/ai/healthcheck-projection.test.ts +36 -0
- package/src/ai/healthcheck-propose.test.ts +268 -0
- package/src/ai/healthcheck-propose.ts +290 -0
- package/src/ai/healthcheck-script-tools.test.ts +93 -0
- package/src/ai/healthcheck-script-tools.ts +179 -0
- package/src/ai/healthcheck-update.test.ts +123 -0
- package/src/ai/healthcheck-update.ts +123 -0
- package/src/ai/notify-subscribers.test.ts +109 -0
- package/src/ai/notify-subscribers.ts +176 -0
- package/src/ai/register-ai-tools.test.ts +41 -0
- package/src/ai/register-ai-tools.ts +53 -0
- package/src/ai/shell-env-table.test.ts +47 -0
- package/src/automations.test.ts +2 -1
- package/src/automations.ts +9 -1
- package/src/collector-script-test.test.ts +53 -1
- package/src/collector-script-test.ts +59 -7
- package/src/effective-environments.test.ts +93 -0
- package/src/effective-environments.ts +64 -0
- package/src/health-entity-id.ts +57 -0
- package/src/health-entity.test.ts +384 -6
- package/src/health-entity.ts +93 -35
- package/src/health-state.ts +41 -4
- package/src/healthcheck-gitops-kinds.test.ts +95 -0
- package/src/healthcheck-gitops-kinds.ts +56 -13
- package/src/index.ts +30 -0
- package/src/migration-chain-contract.test.ts +57 -0
- package/src/queue-executor.test.ts +801 -0
- package/src/queue-executor.ts +336 -52
- package/src/realtime-aggregation.test.ts +30 -0
- package/src/realtime-aggregation.ts +16 -0
- package/src/retention-job.ts +167 -93
- package/src/retention-rollup.test.ts +118 -0
- package/src/router.test.ts +120 -1
- package/src/router.ts +20 -0
- package/src/schema.ts +44 -6
- package/src/service.ts +199 -43
- package/src/state-transitions.test.ts +104 -0
- package/src/state-transitions.ts +39 -1
- package/src/validate-configuration.test.ts +205 -0
- package/src/validate-configuration.ts +159 -0
- package/tsconfig.json +9 -0
package/src/retention-job.ts
CHANGED
|
@@ -206,64 +206,103 @@ interface RollupParams {
|
|
|
206
206
|
}
|
|
207
207
|
|
|
208
208
|
/**
|
|
209
|
-
*
|
|
209
|
+
* The ON CONFLICT target for the daily-aggregate upsert. It MUST list exactly
|
|
210
|
+
* the columns of the `health_check_aggregates_bucket_unique` constraint
|
|
211
|
+
* (configurationId, systemId, environmentId, bucketStart, bucketSize, sourceId)
|
|
212
|
+
* - Postgres rejects an ON CONFLICT whose target does not match a real unique
|
|
213
|
+
* constraint with SQLSTATE 42P10. `retention-rollup.test.ts` asserts this stays
|
|
214
|
+
* in lock-step with the schema so the rollup can never throw 42P10 again.
|
|
210
215
|
*/
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
227
|
-
lt(healthCheckAggregates.bucketStart, cutoffDate),
|
|
228
|
-
),
|
|
229
|
-
);
|
|
230
|
-
|
|
231
|
-
if (oldHourly.length === 0) return;
|
|
216
|
+
export const DAILY_AGGREGATE_CONFLICT_TARGET = [
|
|
217
|
+
healthCheckAggregates.configurationId,
|
|
218
|
+
healthCheckAggregates.systemId,
|
|
219
|
+
healthCheckAggregates.environmentId,
|
|
220
|
+
healthCheckAggregates.bucketStart,
|
|
221
|
+
healthCheckAggregates.bucketSize,
|
|
222
|
+
healthCheckAggregates.sourceId,
|
|
223
|
+
] as const;
|
|
224
|
+
|
|
225
|
+
/** Truncate a timestamp to the start of its (local) day. */
|
|
226
|
+
function dayStartOf(date: Date): Date {
|
|
227
|
+
const day = new Date(date);
|
|
228
|
+
day.setHours(0, 0, 0, 0);
|
|
229
|
+
return day;
|
|
230
|
+
}
|
|
232
231
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
232
|
+
/** The subset of an hourly aggregate row the rollup math needs. */
|
|
233
|
+
export interface HourlyAggregateRow {
|
|
234
|
+
bucketStart: Date;
|
|
235
|
+
environmentId: string | null;
|
|
236
|
+
sourceId: string | null;
|
|
237
|
+
sourceLabel: string | null;
|
|
238
|
+
runCount: number;
|
|
239
|
+
healthyCount: number;
|
|
240
|
+
degradedCount: number;
|
|
241
|
+
unhealthyCount: number;
|
|
242
|
+
latencySumMs: number | null;
|
|
243
|
+
avgLatencyMs: number | null;
|
|
244
|
+
minLatencyMs: number | null;
|
|
245
|
+
maxLatencyMs: number | null;
|
|
246
|
+
p95LatencyMs: number | null;
|
|
247
|
+
}
|
|
241
248
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
249
|
+
/** A computed daily aggregate ready to upsert. */
|
|
250
|
+
export interface DailyAggregateValues {
|
|
251
|
+
bucketStart: Date;
|
|
252
|
+
environmentId: string | null;
|
|
253
|
+
sourceId: string | null;
|
|
254
|
+
sourceLabel: string | null;
|
|
255
|
+
runCount: number;
|
|
256
|
+
healthyCount: number;
|
|
257
|
+
degradedCount: number;
|
|
258
|
+
unhealthyCount: number;
|
|
259
|
+
latencySumMs: number | undefined;
|
|
260
|
+
avgLatencyMs: number | undefined;
|
|
261
|
+
minLatencyMs: number | undefined;
|
|
262
|
+
maxLatencyMs: number | undefined;
|
|
263
|
+
p95LatencyMs: number | undefined;
|
|
264
|
+
}
|
|
246
265
|
|
|
247
|
-
|
|
248
|
-
|
|
266
|
+
/**
|
|
267
|
+
* Fold hourly aggregates into daily ones. CRITICAL: rows are grouped by
|
|
268
|
+
* (day, environmentId, sourceId) - the same dimensions as the unique key - so
|
|
269
|
+
* distinct per-environment / per-source series stay separate instead of being
|
|
270
|
+
* collapsed into one `environmentId=null` daily row. Counts sum; latency sum
|
|
271
|
+
* folds (with avg*count fallback); min/max/p95 fold across the group.
|
|
272
|
+
*/
|
|
273
|
+
export function buildDailyAggregates(
|
|
274
|
+
oldHourly: HourlyAggregateRow[],
|
|
275
|
+
): DailyAggregateValues[] {
|
|
276
|
+
const groups = new Map<string, HourlyAggregateRow[]>();
|
|
277
|
+
|
|
278
|
+
for (const row of oldHourly) {
|
|
279
|
+
const key = JSON.stringify([
|
|
280
|
+
dayStartOf(row.bucketStart).toISOString(),
|
|
281
|
+
row.environmentId,
|
|
282
|
+
row.sourceId,
|
|
283
|
+
]);
|
|
284
|
+
const existing = groups.get(key);
|
|
285
|
+
if (existing) {
|
|
286
|
+
existing.push(row);
|
|
287
|
+
} else {
|
|
288
|
+
groups.set(key, [row]);
|
|
249
289
|
}
|
|
250
|
-
dailyBuckets.get(key)!.aggregates.push(hourly);
|
|
251
290
|
}
|
|
252
291
|
|
|
253
|
-
|
|
254
|
-
for (const
|
|
292
|
+
const result: DailyAggregateValues[] = [];
|
|
293
|
+
for (const rows of groups.values()) {
|
|
255
294
|
let runCount = 0;
|
|
256
295
|
let healthyCount = 0;
|
|
257
296
|
let degradedCount = 0;
|
|
258
297
|
let unhealthyCount = 0;
|
|
259
298
|
let latencySumMs = 0;
|
|
260
299
|
|
|
261
|
-
for (const a of
|
|
300
|
+
for (const a of rows) {
|
|
262
301
|
runCount += a.runCount;
|
|
263
302
|
healthyCount += a.healthyCount;
|
|
264
303
|
degradedCount += a.degradedCount;
|
|
265
304
|
unhealthyCount += a.unhealthyCount;
|
|
266
|
-
// Use latencySumMs if available, fallback to avg*count approximation
|
|
305
|
+
// Use latencySumMs if available, fallback to avg*count approximation.
|
|
267
306
|
if (a.latencySumMs !== null) {
|
|
268
307
|
latencySumMs += a.latencySumMs;
|
|
269
308
|
} else if (a.avgLatencyMs !== null) {
|
|
@@ -271,87 +310,122 @@ async function rollupHourlyAggregates(params: RollupParams) {
|
|
|
271
310
|
}
|
|
272
311
|
}
|
|
273
312
|
|
|
274
|
-
const
|
|
275
|
-
runCount > 0 ? Math.round(latencySumMs / runCount) : undefined;
|
|
276
|
-
|
|
277
|
-
// Min/max across all hourly buckets
|
|
278
|
-
const minValues = bucket.aggregates
|
|
313
|
+
const minValues = rows
|
|
279
314
|
.map((a) => a.minLatencyMs)
|
|
280
315
|
.filter((v): v is number => v !== null);
|
|
281
|
-
const maxValues =
|
|
316
|
+
const maxValues = rows
|
|
282
317
|
.map((a) => a.maxLatencyMs)
|
|
283
318
|
.filter((v): v is number => v !== null);
|
|
284
|
-
const p95Values =
|
|
319
|
+
const p95Values = rows
|
|
285
320
|
.map((a) => a.p95LatencyMs)
|
|
286
321
|
.filter((v): v is number => v !== null);
|
|
287
|
-
const minLatencyMs =
|
|
288
|
-
minValues.length > 0 ? Math.min(...minValues) : undefined;
|
|
289
|
-
const maxLatencyMs =
|
|
290
|
-
maxValues.length > 0 ? Math.max(...maxValues) : undefined;
|
|
291
|
-
// Use max of hourly p95s as upper bound approximation
|
|
292
|
-
const p95LatencyMs =
|
|
293
|
-
p95Values.length > 0 ? Math.max(...p95Values) : undefined;
|
|
294
322
|
|
|
323
|
+
result.push({
|
|
324
|
+
bucketStart: dayStartOf(rows[0].bucketStart),
|
|
325
|
+
environmentId: rows[0].environmentId,
|
|
326
|
+
sourceId: rows[0].sourceId,
|
|
327
|
+
sourceLabel: rows[0].sourceLabel,
|
|
328
|
+
runCount,
|
|
329
|
+
healthyCount,
|
|
330
|
+
degradedCount,
|
|
331
|
+
unhealthyCount,
|
|
332
|
+
latencySumMs: latencySumMs > 0 ? latencySumMs : undefined,
|
|
333
|
+
avgLatencyMs:
|
|
334
|
+
runCount > 0 ? Math.round(latencySumMs / runCount) : undefined,
|
|
335
|
+
minLatencyMs: minValues.length > 0 ? Math.min(...minValues) : undefined,
|
|
336
|
+
maxLatencyMs: maxValues.length > 0 ? Math.max(...maxValues) : undefined,
|
|
337
|
+
// Use max of hourly p95s as an upper-bound approximation.
|
|
338
|
+
p95LatencyMs: p95Values.length > 0 ? Math.max(...p95Values) : undefined,
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
return result;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Rolls up hourly aggregates older than retention period into daily buckets
|
|
347
|
+
*/
|
|
348
|
+
async function rollupHourlyAggregates(params: RollupParams) {
|
|
349
|
+
const { db, systemId, configurationId, hourlyRetentionDays } = params;
|
|
350
|
+
|
|
351
|
+
const cutoffDate = new Date();
|
|
352
|
+
cutoffDate.setDate(cutoffDate.getDate() - hourlyRetentionDays);
|
|
353
|
+
cutoffDate.setHours(0, 0, 0, 0); // Round to day
|
|
354
|
+
|
|
355
|
+
// Get old hourly aggregates
|
|
356
|
+
const oldHourly = await db
|
|
357
|
+
.select()
|
|
358
|
+
.from(healthCheckAggregates)
|
|
359
|
+
.where(
|
|
360
|
+
and(
|
|
361
|
+
eq(healthCheckAggregates.systemId, systemId),
|
|
362
|
+
eq(healthCheckAggregates.configurationId, configurationId),
|
|
363
|
+
eq(healthCheckAggregates.bucketSize, "hourly"),
|
|
364
|
+
lt(healthCheckAggregates.bucketStart, cutoffDate),
|
|
365
|
+
),
|
|
366
|
+
);
|
|
367
|
+
|
|
368
|
+
if (oldHourly.length === 0) return;
|
|
369
|
+
|
|
370
|
+
// Fold into daily aggregates, preserving (day, environmentId, sourceId) series.
|
|
371
|
+
for (const daily of buildDailyAggregates(oldHourly)) {
|
|
372
|
+
const newLatencySum = daily.latencySumMs;
|
|
295
373
|
// Upsert the daily aggregate. A row may already exist for this
|
|
296
|
-
// (configurationId, systemId, day, daily, sourceId
|
|
297
|
-
// prior rollup ran and then late-arriving hourly buckets (e.g. from
|
|
298
|
-
//
|
|
299
|
-
//
|
|
300
|
-
const newLatencySum = latencySumMs > 0 ? latencySumMs : undefined;
|
|
374
|
+
// (configurationId, systemId, environmentId, day, daily, sourceId) tuple if
|
|
375
|
+
// a prior rollup ran and then late-arriving hourly buckets (e.g. from a
|
|
376
|
+
// satellite that was offline) were rolled up afterwards. Merge in that case
|
|
377
|
+
// rather than crashing — sums add, min/max/p95 fold.
|
|
301
378
|
await db
|
|
302
379
|
.insert(healthCheckAggregates)
|
|
303
380
|
.values({
|
|
304
381
|
configurationId,
|
|
305
382
|
systemId,
|
|
306
|
-
|
|
383
|
+
environmentId: daily.environmentId,
|
|
384
|
+
sourceId: daily.sourceId,
|
|
385
|
+
sourceLabel: daily.sourceLabel,
|
|
386
|
+
bucketStart: daily.bucketStart,
|
|
307
387
|
bucketSize: "daily",
|
|
308
|
-
runCount,
|
|
309
|
-
healthyCount,
|
|
310
|
-
degradedCount,
|
|
311
|
-
unhealthyCount,
|
|
388
|
+
runCount: daily.runCount,
|
|
389
|
+
healthyCount: daily.healthyCount,
|
|
390
|
+
degradedCount: daily.degradedCount,
|
|
391
|
+
unhealthyCount: daily.unhealthyCount,
|
|
312
392
|
latencySumMs: newLatencySum,
|
|
313
|
-
avgLatencyMs,
|
|
314
|
-
minLatencyMs,
|
|
315
|
-
maxLatencyMs,
|
|
316
|
-
p95LatencyMs,
|
|
393
|
+
avgLatencyMs: daily.avgLatencyMs,
|
|
394
|
+
minLatencyMs: daily.minLatencyMs,
|
|
395
|
+
maxLatencyMs: daily.maxLatencyMs,
|
|
396
|
+
p95LatencyMs: daily.p95LatencyMs,
|
|
317
397
|
aggregatedResult: undefined, // Cannot combine result across hours
|
|
318
398
|
})
|
|
319
399
|
.onConflictDoUpdate({
|
|
320
|
-
target: [
|
|
321
|
-
healthCheckAggregates.configurationId,
|
|
322
|
-
healthCheckAggregates.systemId,
|
|
323
|
-
healthCheckAggregates.bucketStart,
|
|
324
|
-
healthCheckAggregates.bucketSize,
|
|
325
|
-
healthCheckAggregates.sourceId,
|
|
326
|
-
],
|
|
400
|
+
target: [...DAILY_AGGREGATE_CONFLICT_TARGET],
|
|
327
401
|
set: {
|
|
328
|
-
runCount: sql`${healthCheckAggregates.runCount} + ${runCount}`,
|
|
329
|
-
healthyCount: sql`${healthCheckAggregates.healthyCount} + ${healthyCount}`,
|
|
330
|
-
degradedCount: sql`${healthCheckAggregates.degradedCount} + ${degradedCount}`,
|
|
331
|
-
unhealthyCount: sql`${healthCheckAggregates.unhealthyCount} + ${unhealthyCount}`,
|
|
402
|
+
runCount: sql`${healthCheckAggregates.runCount} + ${daily.runCount}`,
|
|
403
|
+
healthyCount: sql`${healthCheckAggregates.healthyCount} + ${daily.healthyCount}`,
|
|
404
|
+
degradedCount: sql`${healthCheckAggregates.degradedCount} + ${daily.degradedCount}`,
|
|
405
|
+
unhealthyCount: sql`${healthCheckAggregates.unhealthyCount} + ${daily.unhealthyCount}`,
|
|
332
406
|
latencySumMs: sql`COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}`,
|
|
333
|
-
avgLatencyMs: sql`CASE WHEN (${healthCheckAggregates.runCount} + ${runCount}) > 0 THEN (COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}) / (${healthCheckAggregates.runCount} + ${runCount}) ELSE ${healthCheckAggregates.avgLatencyMs} END`,
|
|
407
|
+
avgLatencyMs: sql`CASE WHEN (${healthCheckAggregates.runCount} + ${daily.runCount}) > 0 THEN (COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}) / (${healthCheckAggregates.runCount} + ${daily.runCount}) ELSE ${healthCheckAggregates.avgLatencyMs} END`,
|
|
334
408
|
minLatencyMs:
|
|
335
|
-
minLatencyMs === undefined
|
|
409
|
+
daily.minLatencyMs === undefined
|
|
336
410
|
? sql`${healthCheckAggregates.minLatencyMs}`
|
|
337
|
-
: sql`LEAST(COALESCE(${healthCheckAggregates.minLatencyMs}, ${minLatencyMs}), ${minLatencyMs})`,
|
|
411
|
+
: sql`LEAST(COALESCE(${healthCheckAggregates.minLatencyMs}, ${daily.minLatencyMs}), ${daily.minLatencyMs})`,
|
|
338
412
|
maxLatencyMs:
|
|
339
|
-
maxLatencyMs === undefined
|
|
413
|
+
daily.maxLatencyMs === undefined
|
|
340
414
|
? sql`${healthCheckAggregates.maxLatencyMs}`
|
|
341
|
-
: sql`GREATEST(COALESCE(${healthCheckAggregates.maxLatencyMs}, ${maxLatencyMs}), ${maxLatencyMs})`,
|
|
415
|
+
: sql`GREATEST(COALESCE(${healthCheckAggregates.maxLatencyMs}, ${daily.maxLatencyMs}), ${daily.maxLatencyMs})`,
|
|
342
416
|
p95LatencyMs:
|
|
343
|
-
p95LatencyMs === undefined
|
|
417
|
+
daily.p95LatencyMs === undefined
|
|
344
418
|
? sql`${healthCheckAggregates.p95LatencyMs}`
|
|
345
|
-
: sql`GREATEST(COALESCE(${healthCheckAggregates.p95LatencyMs}, ${p95LatencyMs}), ${p95LatencyMs})`,
|
|
419
|
+
: sql`GREATEST(COALESCE(${healthCheckAggregates.p95LatencyMs}, ${daily.p95LatencyMs}), ${daily.p95LatencyMs})`,
|
|
346
420
|
},
|
|
347
421
|
});
|
|
422
|
+
}
|
|
348
423
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
}
|
|
424
|
+
// Delete the processed hourly aggregates (all were folded into daily rows).
|
|
425
|
+
for (const hourly of oldHourly) {
|
|
426
|
+
await db
|
|
427
|
+
.delete(healthCheckAggregates)
|
|
428
|
+
.where(eq(healthCheckAggregates.id, hourly.id));
|
|
355
429
|
}
|
|
356
430
|
}
|
|
357
431
|
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { getTableConfig } from "drizzle-orm/pg-core";
|
|
3
|
+
import { healthCheckAggregates } from "./schema";
|
|
4
|
+
import {
|
|
5
|
+
buildDailyAggregates,
|
|
6
|
+
DAILY_AGGREGATE_CONFLICT_TARGET,
|
|
7
|
+
type HourlyAggregateRow,
|
|
8
|
+
} from "./retention-job";
|
|
9
|
+
|
|
10
|
+
function hourly(overrides: Partial<HourlyAggregateRow>): HourlyAggregateRow {
|
|
11
|
+
return {
|
|
12
|
+
bucketStart: new Date("2026-01-01T03:00:00.000Z"),
|
|
13
|
+
environmentId: null,
|
|
14
|
+
sourceId: null,
|
|
15
|
+
sourceLabel: null,
|
|
16
|
+
runCount: 1,
|
|
17
|
+
healthyCount: 1,
|
|
18
|
+
degradedCount: 0,
|
|
19
|
+
unhealthyCount: 0,
|
|
20
|
+
latencySumMs: 100,
|
|
21
|
+
avgLatencyMs: 100,
|
|
22
|
+
minLatencyMs: 100,
|
|
23
|
+
maxLatencyMs: 100,
|
|
24
|
+
p95LatencyMs: 100,
|
|
25
|
+
...overrides,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
describe("buildDailyAggregates", () => {
|
|
30
|
+
it("keeps per-environment series separate within the same day", () => {
|
|
31
|
+
// Two hourly buckets on the same day but for different environments must NOT
|
|
32
|
+
// collapse into one daily row (they are distinct rows under the unique key).
|
|
33
|
+
const daily = buildDailyAggregates([
|
|
34
|
+
hourly({
|
|
35
|
+
bucketStart: new Date("2026-01-01T03:00:00.000Z"),
|
|
36
|
+
environmentId: "prod",
|
|
37
|
+
}),
|
|
38
|
+
hourly({
|
|
39
|
+
bucketStart: new Date("2026-01-01T09:00:00.000Z"),
|
|
40
|
+
environmentId: "staging",
|
|
41
|
+
}),
|
|
42
|
+
]);
|
|
43
|
+
|
|
44
|
+
expect(daily).toHaveLength(2);
|
|
45
|
+
const envs = daily.map((d) => d.environmentId).sort();
|
|
46
|
+
expect(envs).toEqual(["prod", "staging"]);
|
|
47
|
+
// Both fall on the same UTC day start.
|
|
48
|
+
for (const d of daily) {
|
|
49
|
+
expect(d.bucketStart.toISOString()).toBe("2026-01-01T00:00:00.000Z");
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it("keeps per-source series separate within the same day+environment", () => {
|
|
54
|
+
const daily = buildDailyAggregates([
|
|
55
|
+
hourly({ environmentId: "prod", sourceId: null }),
|
|
56
|
+
hourly({ environmentId: "prod", sourceId: "satellite-eu" }),
|
|
57
|
+
]);
|
|
58
|
+
expect(daily).toHaveLength(2);
|
|
59
|
+
expect(new Set(daily.map((d) => d.sourceId))).toEqual(
|
|
60
|
+
new Set<string | null>(["satellite-eu", null]),
|
|
61
|
+
);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("sums counts and folds latency stats within a group", () => {
|
|
65
|
+
const daily = buildDailyAggregates([
|
|
66
|
+
hourly({
|
|
67
|
+
bucketStart: new Date("2026-01-01T01:00:00.000Z"),
|
|
68
|
+
environmentId: "prod",
|
|
69
|
+
runCount: 2,
|
|
70
|
+
healthyCount: 1,
|
|
71
|
+
degradedCount: 1,
|
|
72
|
+
unhealthyCount: 0,
|
|
73
|
+
latencySumMs: 300,
|
|
74
|
+
minLatencyMs: 50,
|
|
75
|
+
maxLatencyMs: 250,
|
|
76
|
+
p95LatencyMs: 240,
|
|
77
|
+
}),
|
|
78
|
+
hourly({
|
|
79
|
+
bucketStart: new Date("2026-01-01T05:00:00.000Z"),
|
|
80
|
+
environmentId: "prod",
|
|
81
|
+
runCount: 3,
|
|
82
|
+
healthyCount: 3,
|
|
83
|
+
degradedCount: 0,
|
|
84
|
+
unhealthyCount: 0,
|
|
85
|
+
latencySumMs: 300,
|
|
86
|
+
minLatencyMs: 80,
|
|
87
|
+
maxLatencyMs: 120,
|
|
88
|
+
p95LatencyMs: 110,
|
|
89
|
+
}),
|
|
90
|
+
]);
|
|
91
|
+
|
|
92
|
+
expect(daily).toHaveLength(1);
|
|
93
|
+
const d = daily[0];
|
|
94
|
+
expect(d.runCount).toBe(5);
|
|
95
|
+
expect(d.healthyCount).toBe(4);
|
|
96
|
+
expect(d.degradedCount).toBe(1);
|
|
97
|
+
expect(d.latencySumMs).toBe(600);
|
|
98
|
+
expect(d.avgLatencyMs).toBe(120); // 600 / 5
|
|
99
|
+
expect(d.minLatencyMs).toBe(50);
|
|
100
|
+
expect(d.maxLatencyMs).toBe(250);
|
|
101
|
+
expect(d.p95LatencyMs).toBe(240);
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
describe("DAILY_AGGREGATE_CONFLICT_TARGET", () => {
|
|
106
|
+
it("matches the health_check_aggregates unique constraint exactly", () => {
|
|
107
|
+
// Postgres rejects an ON CONFLICT target that does not match a real unique
|
|
108
|
+
// constraint (SQLSTATE 42P10). Keep the rollup's upsert target in lock-step
|
|
109
|
+
// with the schema's unique constraint so the rollup can never throw 42P10.
|
|
110
|
+
const { uniqueConstraints } = getTableConfig(healthCheckAggregates);
|
|
111
|
+
expect(uniqueConstraints).toHaveLength(1);
|
|
112
|
+
const constraintCols = uniqueConstraints[0].columns
|
|
113
|
+
.map((c) => c.name)
|
|
114
|
+
.sort();
|
|
115
|
+
const targetCols = DAILY_AGGREGATE_CONFLICT_TARGET.map((c) => c.name).sort();
|
|
116
|
+
expect(targetCols).toEqual(constraintCols);
|
|
117
|
+
});
|
|
118
|
+
});
|
package/src/router.test.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, mock } from "bun:test";
|
|
2
2
|
import { createHealthCheckRouter } from "./router";
|
|
3
|
-
import { createMockRpcContext } from "@checkstack/backend-api";
|
|
3
|
+
import { createMockRpcContext, Versioned } from "@checkstack/backend-api";
|
|
4
4
|
import { call } from "@orpc/server";
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import type { HealthCheckCache } from "./cache";
|
|
@@ -209,6 +209,125 @@ describe("HealthCheck Router", () => {
|
|
|
209
209
|
expect(result).toHaveLength(0);
|
|
210
210
|
});
|
|
211
211
|
|
|
212
|
+
describe("validateConfiguration", () => {
|
|
213
|
+
// A strategy whose config requires a URL-typed `url` field. The schema is
|
|
214
|
+
// strict-validated through the migrate-then-validate path, so a wrong type
|
|
215
|
+
// or an unknown key is rejected even though the key may be present.
|
|
216
|
+
const strategyConfigSchema = z.object({ url: z.string().url() });
|
|
217
|
+
const collectorConfigSchema = z.object({ path: z.string().min(1) });
|
|
218
|
+
|
|
219
|
+
const registeredStrategy = {
|
|
220
|
+
strategy: {
|
|
221
|
+
id: "http",
|
|
222
|
+
displayName: "HTTP",
|
|
223
|
+
config: new Versioned({ version: 1, schema: strategyConfigSchema }),
|
|
224
|
+
aggregatedResult: { schema: z.object({}) },
|
|
225
|
+
},
|
|
226
|
+
qualifiedId: "healthcheck-http.http",
|
|
227
|
+
ownerPluginId: "healthcheck-http",
|
|
228
|
+
};
|
|
229
|
+
const registeredCollector = {
|
|
230
|
+
qualifiedId: "collector-file.file",
|
|
231
|
+
collector: {
|
|
232
|
+
displayName: "File",
|
|
233
|
+
config: new Versioned({ version: 1, schema: collectorConfigSchema }),
|
|
234
|
+
result: { schema: z.object({}) },
|
|
235
|
+
supportedPlugins: [{ pluginId: "healthcheck-http" }],
|
|
236
|
+
},
|
|
237
|
+
ownerPlugin: { id: "collector-file" },
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
const validateContext = () =>
|
|
241
|
+
createMockRpcContext({
|
|
242
|
+
user: mockUser,
|
|
243
|
+
healthCheckRegistry: {
|
|
244
|
+
getStrategiesWithMeta: mock().mockReturnValue([registeredStrategy]),
|
|
245
|
+
getStrategy: mock().mockReturnValue(registeredStrategy.strategy),
|
|
246
|
+
getStrategies: mock().mockReturnValue([]),
|
|
247
|
+
} as never,
|
|
248
|
+
collectorRegistry: {
|
|
249
|
+
getCollectors: mock().mockReturnValue([registeredCollector]),
|
|
250
|
+
getCollector: mock().mockReturnValue(registeredCollector),
|
|
251
|
+
getCollectorsForPlugin: mock().mockReturnValue([registeredCollector]),
|
|
252
|
+
register: mock(),
|
|
253
|
+
} as never,
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
it("returns valid for a well-formed configuration without persisting", async () => {
|
|
257
|
+
const result = await call(
|
|
258
|
+
router.validateConfiguration,
|
|
259
|
+
{
|
|
260
|
+
name: "ok",
|
|
261
|
+
strategyId: "healthcheck-http.http",
|
|
262
|
+
config: { url: "https://example.test" },
|
|
263
|
+
intervalSeconds: 60,
|
|
264
|
+
collectors: [
|
|
265
|
+
{
|
|
266
|
+
id: "c1",
|
|
267
|
+
collectorId: "collector-file.file",
|
|
268
|
+
config: { path: "/tmp/x" },
|
|
269
|
+
},
|
|
270
|
+
],
|
|
271
|
+
},
|
|
272
|
+
{ context: validateContext() },
|
|
273
|
+
);
|
|
274
|
+
expect(result.valid).toBe(true);
|
|
275
|
+
expect(result.errors).toEqual([]);
|
|
276
|
+
// No DB insert ran (the insert mock returns []), proving non-persistence.
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
it("rejects an unknown strategy", async () => {
|
|
280
|
+
const result = await call(
|
|
281
|
+
router.validateConfiguration,
|
|
282
|
+
{
|
|
283
|
+
name: "x",
|
|
284
|
+
strategyId: "healthcheck-http.ghost",
|
|
285
|
+
config: { url: "https://example.test" },
|
|
286
|
+
intervalSeconds: 60,
|
|
287
|
+
},
|
|
288
|
+
{ context: validateContext() },
|
|
289
|
+
);
|
|
290
|
+
expect(result.valid).toBe(false);
|
|
291
|
+
expect(result.errors[0].path).toEqual(["strategyId"]);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Deep-vs-lightweight: `url` IS present (the old presence check passes),
|
|
295
|
+
// but holds the wrong TYPE. Only the strict migrate-then-validate path
|
|
296
|
+
// rejects it.
|
|
297
|
+
it("rejects a deep field/type error a presence check would miss", async () => {
|
|
298
|
+
const result = await call(
|
|
299
|
+
router.validateConfiguration,
|
|
300
|
+
{
|
|
301
|
+
name: "x",
|
|
302
|
+
strategyId: "healthcheck-http.http",
|
|
303
|
+
config: { url: 12345 },
|
|
304
|
+
intervalSeconds: 60,
|
|
305
|
+
},
|
|
306
|
+
{ context: validateContext() },
|
|
307
|
+
);
|
|
308
|
+
expect(result.valid).toBe(false);
|
|
309
|
+
expect(result.errors[0].path[0]).toBe("config");
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
it("rejects an unknown collector", async () => {
|
|
313
|
+
const result = await call(
|
|
314
|
+
router.validateConfiguration,
|
|
315
|
+
{
|
|
316
|
+
name: "x",
|
|
317
|
+
strategyId: "healthcheck-http.http",
|
|
318
|
+
config: { url: "https://example.test" },
|
|
319
|
+
intervalSeconds: 60,
|
|
320
|
+
collectors: [
|
|
321
|
+
{ id: "c1", collectorId: "collector-file.ghost", config: {} },
|
|
322
|
+
],
|
|
323
|
+
},
|
|
324
|
+
{ context: validateContext() },
|
|
325
|
+
);
|
|
326
|
+
expect(result.valid).toBe(false);
|
|
327
|
+
expect(result.errors[0].path).toEqual(["collectors", 0, "collectorId"]);
|
|
328
|
+
});
|
|
329
|
+
});
|
|
330
|
+
|
|
212
331
|
describe("GitOps Provenance Enforcement", () => {
|
|
213
332
|
it("allows deleteConfiguration when GitOps lock is not present", async () => {
|
|
214
333
|
mockGitOpsClient.getProvenance.mockResolvedValueOnce(null);
|
package/src/router.ts
CHANGED
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
resolveScriptPackagesDir,
|
|
17
17
|
} from "@checkstack/script-packages-backend";
|
|
18
18
|
import { HealthCheckService } from "./service";
|
|
19
|
+
import { collectConfigurationIssues } from "./validate-configuration";
|
|
19
20
|
import { runCollectorScriptTest } from "./collector-script-test";
|
|
20
21
|
import { healthCheckHooks } from "./hooks";
|
|
21
22
|
import * as schema from "./schema";
|
|
@@ -166,6 +167,24 @@ export const createHealthCheckRouter = (opts: {
|
|
|
166
167
|
return created;
|
|
167
168
|
}),
|
|
168
169
|
|
|
170
|
+
validateConfiguration: os.validateConfiguration.handler(
|
|
171
|
+
async ({ input, context }) => {
|
|
172
|
+
// Deep validation WITHOUT persisting: resolve the strategy/collectors
|
|
173
|
+
// against the live registries and run the same migrate-then-validate-
|
|
174
|
+
// strict logic the create / gitops-apply path uses, so propose-time
|
|
175
|
+
// errors match apply-time errors. Strategy/collector config (typed
|
|
176
|
+
// `z.record(z.unknown())` on the input) is validated against each
|
|
177
|
+
// registered schema, surfacing wrong types, missing required fields,
|
|
178
|
+
// and unknown keys - not just missing-field presence.
|
|
179
|
+
const errors = await collectConfigurationIssues({
|
|
180
|
+
input,
|
|
181
|
+
registry: context.healthCheckRegistry,
|
|
182
|
+
collectorRegistry: context.collectorRegistry,
|
|
183
|
+
});
|
|
184
|
+
return { valid: errors.length === 0, errors };
|
|
185
|
+
},
|
|
186
|
+
),
|
|
187
|
+
|
|
169
188
|
updateConfiguration: os.updateConfiguration.handler(async ({ input }) => {
|
|
170
189
|
await enforceNotGitOpsLocked("Healthcheck", input.id);
|
|
171
190
|
const config = await service.updateConfiguration(input.id, input.body);
|
|
@@ -217,6 +236,7 @@ export const createHealthCheckRouter = (opts: {
|
|
|
217
236
|
enabled: input.body.enabled,
|
|
218
237
|
stateThresholds: input.body.stateThresholds,
|
|
219
238
|
satelliteIds: input.body.satelliteIds,
|
|
239
|
+
environmentIds: input.body.environmentIds,
|
|
220
240
|
includeLocal: input.body.includeLocal,
|
|
221
241
|
});
|
|
222
242
|
await cache.invalidateSystem(input.systemId);
|