@checkstack/healthcheck-backend 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +253 -0
  2. package/drizzle/0018_abnormal_preak.sql +10 -0
  3. package/drizzle/meta/0018_snapshot.json +600 -0
  4. package/drizzle/meta/_journal.json +7 -0
  5. package/package.json +32 -27
  6. package/src/ai/assertion-validation.test.ts +117 -0
  7. package/src/ai/assertion-validation.ts +147 -0
  8. package/src/ai/healthcheck-capabilities.test.ts +158 -0
  9. package/src/ai/healthcheck-capabilities.ts +217 -0
  10. package/src/ai/healthcheck-delete.test.ts +81 -0
  11. package/src/ai/healthcheck-delete.ts +81 -0
  12. package/src/ai/healthcheck-projection.test.ts +36 -0
  13. package/src/ai/healthcheck-propose.test.ts +268 -0
  14. package/src/ai/healthcheck-propose.ts +290 -0
  15. package/src/ai/healthcheck-script-tools.test.ts +93 -0
  16. package/src/ai/healthcheck-script-tools.ts +179 -0
  17. package/src/ai/healthcheck-update.test.ts +123 -0
  18. package/src/ai/healthcheck-update.ts +123 -0
  19. package/src/ai/notify-subscribers.test.ts +109 -0
  20. package/src/ai/notify-subscribers.ts +176 -0
  21. package/src/ai/register-ai-tools.test.ts +41 -0
  22. package/src/ai/register-ai-tools.ts +53 -0
  23. package/src/ai/shell-env-table.test.ts +47 -0
  24. package/src/automations.test.ts +2 -1
  25. package/src/automations.ts +9 -1
  26. package/src/collector-script-test.test.ts +53 -1
  27. package/src/collector-script-test.ts +59 -7
  28. package/src/effective-environments.test.ts +93 -0
  29. package/src/effective-environments.ts +64 -0
  30. package/src/health-entity-id.ts +57 -0
  31. package/src/health-entity.test.ts +384 -6
  32. package/src/health-entity.ts +93 -35
  33. package/src/health-state.ts +41 -4
  34. package/src/healthcheck-gitops-kinds.test.ts +95 -0
  35. package/src/healthcheck-gitops-kinds.ts +56 -13
  36. package/src/index.ts +30 -0
  37. package/src/migration-chain-contract.test.ts +57 -0
  38. package/src/queue-executor.test.ts +801 -0
  39. package/src/queue-executor.ts +336 -52
  40. package/src/realtime-aggregation.test.ts +30 -0
  41. package/src/realtime-aggregation.ts +16 -0
  42. package/src/retention-job.ts +167 -93
  43. package/src/retention-rollup.test.ts +118 -0
  44. package/src/router.test.ts +120 -1
  45. package/src/router.ts +20 -0
  46. package/src/schema.ts +44 -6
  47. package/src/service.ts +199 -43
  48. package/src/state-transitions.test.ts +104 -0
  49. package/src/state-transitions.ts +39 -1
  50. package/src/validate-configuration.test.ts +205 -0
  51. package/src/validate-configuration.ts +159 -0
  52. package/tsconfig.json +9 -0
@@ -206,64 +206,103 @@ interface RollupParams {
206
206
  }
207
207
 
208
208
  /**
209
- * Rolls up hourly aggregates older than retention period into daily buckets
209
+ * The ON CONFLICT target for the daily-aggregate upsert. It MUST list exactly
210
+ * the columns of the `health_check_aggregates_bucket_unique` constraint
211
+ * (configurationId, systemId, environmentId, bucketStart, bucketSize, sourceId)
212
+ * - Postgres rejects an ON CONFLICT whose target does not match a real unique
213
+ * constraint with SQLSTATE 42P10. `retention-rollup.test.ts` asserts this stays
214
+ * in lock-step with the schema so the rollup can never throw 42P10 again.
210
215
  */
211
- async function rollupHourlyAggregates(params: RollupParams) {
212
- const { db, systemId, configurationId, hourlyRetentionDays } = params;
213
-
214
- const cutoffDate = new Date();
215
- cutoffDate.setDate(cutoffDate.getDate() - hourlyRetentionDays);
216
- cutoffDate.setHours(0, 0, 0, 0); // Round to day
217
-
218
- // Get old hourly aggregates
219
- const oldHourly = await db
220
- .select()
221
- .from(healthCheckAggregates)
222
- .where(
223
- and(
224
- eq(healthCheckAggregates.systemId, systemId),
225
- eq(healthCheckAggregates.configurationId, configurationId),
226
- eq(healthCheckAggregates.bucketSize, "hourly"),
227
- lt(healthCheckAggregates.bucketStart, cutoffDate),
228
- ),
229
- );
230
-
231
- if (oldHourly.length === 0) return;
216
+ export const DAILY_AGGREGATE_CONFLICT_TARGET = [
217
+ healthCheckAggregates.configurationId,
218
+ healthCheckAggregates.systemId,
219
+ healthCheckAggregates.environmentId,
220
+ healthCheckAggregates.bucketStart,
221
+ healthCheckAggregates.bucketSize,
222
+ healthCheckAggregates.sourceId,
223
+ ] as const;
224
+
225
+ /** Truncate a timestamp to the start of its (local) day. */
226
+ function dayStartOf(date: Date): Date {
227
+ const day = new Date(date);
228
+ day.setHours(0, 0, 0, 0);
229
+ return day;
230
+ }
232
231
 
233
- // Group by day
234
- const dailyBuckets = new Map<
235
- string,
236
- {
237
- bucketStart: Date;
238
- aggregates: typeof oldHourly;
239
- }
240
- >();
232
+ /** The subset of an hourly aggregate row the rollup math needs. */
233
+ export interface HourlyAggregateRow {
234
+ bucketStart: Date;
235
+ environmentId: string | null;
236
+ sourceId: string | null;
237
+ sourceLabel: string | null;
238
+ runCount: number;
239
+ healthyCount: number;
240
+ degradedCount: number;
241
+ unhealthyCount: number;
242
+ latencySumMs: number | null;
243
+ avgLatencyMs: number | null;
244
+ minLatencyMs: number | null;
245
+ maxLatencyMs: number | null;
246
+ p95LatencyMs: number | null;
247
+ }
241
248
 
242
- for (const hourly of oldHourly) {
243
- const dayStart = new Date(hourly.bucketStart);
244
- dayStart.setHours(0, 0, 0, 0);
245
- const key = dayStart.toISOString();
249
+ /** A computed daily aggregate ready to upsert. */
250
+ export interface DailyAggregateValues {
251
+ bucketStart: Date;
252
+ environmentId: string | null;
253
+ sourceId: string | null;
254
+ sourceLabel: string | null;
255
+ runCount: number;
256
+ healthyCount: number;
257
+ degradedCount: number;
258
+ unhealthyCount: number;
259
+ latencySumMs: number | undefined;
260
+ avgLatencyMs: number | undefined;
261
+ minLatencyMs: number | undefined;
262
+ maxLatencyMs: number | undefined;
263
+ p95LatencyMs: number | undefined;
264
+ }
246
265
 
247
- if (!dailyBuckets.has(key)) {
248
- dailyBuckets.set(key, { bucketStart: dayStart, aggregates: [] });
266
+ /**
267
+ * Fold hourly aggregates into daily ones. CRITICAL: rows are grouped by
268
+ * (day, environmentId, sourceId) - the same dimensions as the unique key - so
269
+ * distinct per-environment / per-source series stay separate instead of being
270
+ * collapsed into one `environmentId=null` daily row. Counts sum; latency sum
271
+ * folds (with avg*count fallback); min/max/p95 fold across the group.
272
+ */
273
+ export function buildDailyAggregates(
274
+ oldHourly: HourlyAggregateRow[],
275
+ ): DailyAggregateValues[] {
276
+ const groups = new Map<string, HourlyAggregateRow[]>();
277
+
278
+ for (const row of oldHourly) {
279
+ const key = JSON.stringify([
280
+ dayStartOf(row.bucketStart).toISOString(),
281
+ row.environmentId,
282
+ row.sourceId,
283
+ ]);
284
+ const existing = groups.get(key);
285
+ if (existing) {
286
+ existing.push(row);
287
+ } else {
288
+ groups.set(key, [row]);
249
289
  }
250
- dailyBuckets.get(key)!.aggregates.push(hourly);
251
290
  }
252
291
 
253
- // Create daily aggregates
254
- for (const [, bucket] of dailyBuckets) {
292
+ const result: DailyAggregateValues[] = [];
293
+ for (const rows of groups.values()) {
255
294
  let runCount = 0;
256
295
  let healthyCount = 0;
257
296
  let degradedCount = 0;
258
297
  let unhealthyCount = 0;
259
298
  let latencySumMs = 0;
260
299
 
261
- for (const a of bucket.aggregates) {
300
+ for (const a of rows) {
262
301
  runCount += a.runCount;
263
302
  healthyCount += a.healthyCount;
264
303
  degradedCount += a.degradedCount;
265
304
  unhealthyCount += a.unhealthyCount;
266
- // Use latencySumMs if available, fallback to avg*count approximation
305
+ // Use latencySumMs if available, fallback to avg*count approximation.
267
306
  if (a.latencySumMs !== null) {
268
307
  latencySumMs += a.latencySumMs;
269
308
  } else if (a.avgLatencyMs !== null) {
@@ -271,87 +310,122 @@ async function rollupHourlyAggregates(params: RollupParams) {
271
310
  }
272
311
  }
273
312
 
274
- const avgLatencyMs =
275
- runCount > 0 ? Math.round(latencySumMs / runCount) : undefined;
276
-
277
- // Min/max across all hourly buckets
278
- const minValues = bucket.aggregates
313
+ const minValues = rows
279
314
  .map((a) => a.minLatencyMs)
280
315
  .filter((v): v is number => v !== null);
281
- const maxValues = bucket.aggregates
316
+ const maxValues = rows
282
317
  .map((a) => a.maxLatencyMs)
283
318
  .filter((v): v is number => v !== null);
284
- const p95Values = bucket.aggregates
319
+ const p95Values = rows
285
320
  .map((a) => a.p95LatencyMs)
286
321
  .filter((v): v is number => v !== null);
287
- const minLatencyMs =
288
- minValues.length > 0 ? Math.min(...minValues) : undefined;
289
- const maxLatencyMs =
290
- maxValues.length > 0 ? Math.max(...maxValues) : undefined;
291
- // Use max of hourly p95s as upper bound approximation
292
- const p95LatencyMs =
293
- p95Values.length > 0 ? Math.max(...p95Values) : undefined;
294
322
 
323
+ result.push({
324
+ bucketStart: dayStartOf(rows[0].bucketStart),
325
+ environmentId: rows[0].environmentId,
326
+ sourceId: rows[0].sourceId,
327
+ sourceLabel: rows[0].sourceLabel,
328
+ runCount,
329
+ healthyCount,
330
+ degradedCount,
331
+ unhealthyCount,
332
+ latencySumMs: latencySumMs > 0 ? latencySumMs : undefined,
333
+ avgLatencyMs:
334
+ runCount > 0 ? Math.round(latencySumMs / runCount) : undefined,
335
+ minLatencyMs: minValues.length > 0 ? Math.min(...minValues) : undefined,
336
+ maxLatencyMs: maxValues.length > 0 ? Math.max(...maxValues) : undefined,
337
+ // Use max of hourly p95s as an upper-bound approximation.
338
+ p95LatencyMs: p95Values.length > 0 ? Math.max(...p95Values) : undefined,
339
+ });
340
+ }
341
+
342
+ return result;
343
+ }
344
+
345
+ /**
346
+ * Rolls up hourly aggregates older than retention period into daily buckets
347
+ */
348
+ async function rollupHourlyAggregates(params: RollupParams) {
349
+ const { db, systemId, configurationId, hourlyRetentionDays } = params;
350
+
351
+ const cutoffDate = new Date();
352
+ cutoffDate.setDate(cutoffDate.getDate() - hourlyRetentionDays);
353
+ cutoffDate.setHours(0, 0, 0, 0); // Round to day
354
+
355
+ // Get old hourly aggregates
356
+ const oldHourly = await db
357
+ .select()
358
+ .from(healthCheckAggregates)
359
+ .where(
360
+ and(
361
+ eq(healthCheckAggregates.systemId, systemId),
362
+ eq(healthCheckAggregates.configurationId, configurationId),
363
+ eq(healthCheckAggregates.bucketSize, "hourly"),
364
+ lt(healthCheckAggregates.bucketStart, cutoffDate),
365
+ ),
366
+ );
367
+
368
+ if (oldHourly.length === 0) return;
369
+
370
+ // Fold into daily aggregates, preserving (day, environmentId, sourceId) series.
371
+ for (const daily of buildDailyAggregates(oldHourly)) {
372
+ const newLatencySum = daily.latencySumMs;
295
373
  // Upsert the daily aggregate. A row may already exist for this
296
- // (configurationId, systemId, day, daily, sourceId=null) tuple if a
297
- // prior rollup ran and then late-arriving hourly buckets (e.g. from
298
- // a satellite that was offline) were rolled up afterwards. Merge in
299
- // that case rather than crashing — sums add, min/max/p95 fold.
300
- const newLatencySum = latencySumMs > 0 ? latencySumMs : undefined;
374
+ // (configurationId, systemId, environmentId, day, daily, sourceId) tuple if
375
+ // a prior rollup ran and then late-arriving hourly buckets (e.g. from a
376
+ // satellite that was offline) were rolled up afterwards. Merge in that case
377
+ // rather than crashing — sums add, min/max/p95 fold.
301
378
  await db
302
379
  .insert(healthCheckAggregates)
303
380
  .values({
304
381
  configurationId,
305
382
  systemId,
306
- bucketStart: bucket.bucketStart,
383
+ environmentId: daily.environmentId,
384
+ sourceId: daily.sourceId,
385
+ sourceLabel: daily.sourceLabel,
386
+ bucketStart: daily.bucketStart,
307
387
  bucketSize: "daily",
308
- runCount,
309
- healthyCount,
310
- degradedCount,
311
- unhealthyCount,
388
+ runCount: daily.runCount,
389
+ healthyCount: daily.healthyCount,
390
+ degradedCount: daily.degradedCount,
391
+ unhealthyCount: daily.unhealthyCount,
312
392
  latencySumMs: newLatencySum,
313
- avgLatencyMs,
314
- minLatencyMs,
315
- maxLatencyMs,
316
- p95LatencyMs,
393
+ avgLatencyMs: daily.avgLatencyMs,
394
+ minLatencyMs: daily.minLatencyMs,
395
+ maxLatencyMs: daily.maxLatencyMs,
396
+ p95LatencyMs: daily.p95LatencyMs,
317
397
  aggregatedResult: undefined, // Cannot combine result across hours
318
398
  })
319
399
  .onConflictDoUpdate({
320
- target: [
321
- healthCheckAggregates.configurationId,
322
- healthCheckAggregates.systemId,
323
- healthCheckAggregates.bucketStart,
324
- healthCheckAggregates.bucketSize,
325
- healthCheckAggregates.sourceId,
326
- ],
400
+ target: [...DAILY_AGGREGATE_CONFLICT_TARGET],
327
401
  set: {
328
- runCount: sql`${healthCheckAggregates.runCount} + ${runCount}`,
329
- healthyCount: sql`${healthCheckAggregates.healthyCount} + ${healthyCount}`,
330
- degradedCount: sql`${healthCheckAggregates.degradedCount} + ${degradedCount}`,
331
- unhealthyCount: sql`${healthCheckAggregates.unhealthyCount} + ${unhealthyCount}`,
402
+ runCount: sql`${healthCheckAggregates.runCount} + ${daily.runCount}`,
403
+ healthyCount: sql`${healthCheckAggregates.healthyCount} + ${daily.healthyCount}`,
404
+ degradedCount: sql`${healthCheckAggregates.degradedCount} + ${daily.degradedCount}`,
405
+ unhealthyCount: sql`${healthCheckAggregates.unhealthyCount} + ${daily.unhealthyCount}`,
332
406
  latencySumMs: sql`COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}`,
333
- avgLatencyMs: sql`CASE WHEN (${healthCheckAggregates.runCount} + ${runCount}) > 0 THEN (COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}) / (${healthCheckAggregates.runCount} + ${runCount}) ELSE ${healthCheckAggregates.avgLatencyMs} END`,
407
+ avgLatencyMs: sql`CASE WHEN (${healthCheckAggregates.runCount} + ${daily.runCount}) > 0 THEN (COALESCE(${healthCheckAggregates.latencySumMs}, 0) + ${newLatencySum ?? 0}) / (${healthCheckAggregates.runCount} + ${daily.runCount}) ELSE ${healthCheckAggregates.avgLatencyMs} END`,
334
408
  minLatencyMs:
335
- minLatencyMs === undefined
409
+ daily.minLatencyMs === undefined
336
410
  ? sql`${healthCheckAggregates.minLatencyMs}`
337
- : sql`LEAST(COALESCE(${healthCheckAggregates.minLatencyMs}, ${minLatencyMs}), ${minLatencyMs})`,
411
+ : sql`LEAST(COALESCE(${healthCheckAggregates.minLatencyMs}, ${daily.minLatencyMs}), ${daily.minLatencyMs})`,
338
412
  maxLatencyMs:
339
- maxLatencyMs === undefined
413
+ daily.maxLatencyMs === undefined
340
414
  ? sql`${healthCheckAggregates.maxLatencyMs}`
341
- : sql`GREATEST(COALESCE(${healthCheckAggregates.maxLatencyMs}, ${maxLatencyMs}), ${maxLatencyMs})`,
415
+ : sql`GREATEST(COALESCE(${healthCheckAggregates.maxLatencyMs}, ${daily.maxLatencyMs}), ${daily.maxLatencyMs})`,
342
416
  p95LatencyMs:
343
- p95LatencyMs === undefined
417
+ daily.p95LatencyMs === undefined
344
418
  ? sql`${healthCheckAggregates.p95LatencyMs}`
345
- : sql`GREATEST(COALESCE(${healthCheckAggregates.p95LatencyMs}, ${p95LatencyMs}), ${p95LatencyMs})`,
419
+ : sql`GREATEST(COALESCE(${healthCheckAggregates.p95LatencyMs}, ${daily.p95LatencyMs}), ${daily.p95LatencyMs})`,
346
420
  },
347
421
  });
422
+ }
348
423
 
349
- // Delete processed hourly aggregates
350
- for (const hourly of bucket.aggregates) {
351
- await db
352
- .delete(healthCheckAggregates)
353
- .where(eq(healthCheckAggregates.id, hourly.id));
354
- }
424
+ // Delete the processed hourly aggregates (all were folded into daily rows).
425
+ for (const hourly of oldHourly) {
426
+ await db
427
+ .delete(healthCheckAggregates)
428
+ .where(eq(healthCheckAggregates.id, hourly.id));
355
429
  }
356
430
  }
357
431
 
@@ -0,0 +1,118 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { getTableConfig } from "drizzle-orm/pg-core";
3
+ import { healthCheckAggregates } from "./schema";
4
+ import {
5
+ buildDailyAggregates,
6
+ DAILY_AGGREGATE_CONFLICT_TARGET,
7
+ type HourlyAggregateRow,
8
+ } from "./retention-job";
9
+
10
+ function hourly(overrides: Partial<HourlyAggregateRow>): HourlyAggregateRow {
11
+ return {
12
+ bucketStart: new Date("2026-01-01T03:00:00.000Z"),
13
+ environmentId: null,
14
+ sourceId: null,
15
+ sourceLabel: null,
16
+ runCount: 1,
17
+ healthyCount: 1,
18
+ degradedCount: 0,
19
+ unhealthyCount: 0,
20
+ latencySumMs: 100,
21
+ avgLatencyMs: 100,
22
+ minLatencyMs: 100,
23
+ maxLatencyMs: 100,
24
+ p95LatencyMs: 100,
25
+ ...overrides,
26
+ };
27
+ }
28
+
29
+ describe("buildDailyAggregates", () => {
30
+ it("keeps per-environment series separate within the same day", () => {
31
+ // Two hourly buckets on the same day but for different environments must NOT
32
+ // collapse into one daily row (they are distinct rows under the unique key).
33
+ const daily = buildDailyAggregates([
34
+ hourly({
35
+ bucketStart: new Date("2026-01-01T03:00:00.000Z"),
36
+ environmentId: "prod",
37
+ }),
38
+ hourly({
39
+ bucketStart: new Date("2026-01-01T09:00:00.000Z"),
40
+ environmentId: "staging",
41
+ }),
42
+ ]);
43
+
44
+ expect(daily).toHaveLength(2);
45
+ const envs = daily.map((d) => d.environmentId).sort();
46
+ expect(envs).toEqual(["prod", "staging"]);
47
+ // Both fall on the same UTC day start.
48
+ for (const d of daily) {
49
+ expect(d.bucketStart.toISOString()).toBe("2026-01-01T00:00:00.000Z");
50
+ }
51
+ });
52
+
53
+ it("keeps per-source series separate within the same day+environment", () => {
54
+ const daily = buildDailyAggregates([
55
+ hourly({ environmentId: "prod", sourceId: null }),
56
+ hourly({ environmentId: "prod", sourceId: "satellite-eu" }),
57
+ ]);
58
+ expect(daily).toHaveLength(2);
59
+ expect(new Set(daily.map((d) => d.sourceId))).toEqual(
60
+ new Set<string | null>(["satellite-eu", null]),
61
+ );
62
+ });
63
+
64
+ it("sums counts and folds latency stats within a group", () => {
65
+ const daily = buildDailyAggregates([
66
+ hourly({
67
+ bucketStart: new Date("2026-01-01T01:00:00.000Z"),
68
+ environmentId: "prod",
69
+ runCount: 2,
70
+ healthyCount: 1,
71
+ degradedCount: 1,
72
+ unhealthyCount: 0,
73
+ latencySumMs: 300,
74
+ minLatencyMs: 50,
75
+ maxLatencyMs: 250,
76
+ p95LatencyMs: 240,
77
+ }),
78
+ hourly({
79
+ bucketStart: new Date("2026-01-01T05:00:00.000Z"),
80
+ environmentId: "prod",
81
+ runCount: 3,
82
+ healthyCount: 3,
83
+ degradedCount: 0,
84
+ unhealthyCount: 0,
85
+ latencySumMs: 300,
86
+ minLatencyMs: 80,
87
+ maxLatencyMs: 120,
88
+ p95LatencyMs: 110,
89
+ }),
90
+ ]);
91
+
92
+ expect(daily).toHaveLength(1);
93
+ const d = daily[0];
94
+ expect(d.runCount).toBe(5);
95
+ expect(d.healthyCount).toBe(4);
96
+ expect(d.degradedCount).toBe(1);
97
+ expect(d.latencySumMs).toBe(600);
98
+ expect(d.avgLatencyMs).toBe(120); // 600 / 5
99
+ expect(d.minLatencyMs).toBe(50);
100
+ expect(d.maxLatencyMs).toBe(250);
101
+ expect(d.p95LatencyMs).toBe(240);
102
+ });
103
+ });
104
+
105
+ describe("DAILY_AGGREGATE_CONFLICT_TARGET", () => {
106
+ it("matches the health_check_aggregates unique constraint exactly", () => {
107
+ // Postgres rejects an ON CONFLICT target that does not match a real unique
108
+ // constraint (SQLSTATE 42P10). Keep the rollup's upsert target in lock-step
109
+ // with the schema's unique constraint so the rollup can never throw 42P10.
110
+ const { uniqueConstraints } = getTableConfig(healthCheckAggregates);
111
+ expect(uniqueConstraints).toHaveLength(1);
112
+ const constraintCols = uniqueConstraints[0].columns
113
+ .map((c) => c.name)
114
+ .sort();
115
+ const targetCols = DAILY_AGGREGATE_CONFLICT_TARGET.map((c) => c.name).sort();
116
+ expect(targetCols).toEqual(constraintCols);
117
+ });
118
+ });
@@ -1,6 +1,6 @@
1
1
  import { describe, it, expect, mock } from "bun:test";
2
2
  import { createHealthCheckRouter } from "./router";
3
- import { createMockRpcContext } from "@checkstack/backend-api";
3
+ import { createMockRpcContext, Versioned } from "@checkstack/backend-api";
4
4
  import { call } from "@orpc/server";
5
5
  import { z } from "zod";
6
6
  import type { HealthCheckCache } from "./cache";
@@ -209,6 +209,125 @@ describe("HealthCheck Router", () => {
209
209
  expect(result).toHaveLength(0);
210
210
  });
211
211
 
212
+ describe("validateConfiguration", () => {
213
+ // A strategy whose config requires a URL-typed `url` field. The schema is
214
+ // strict-validated through the migrate-then-validate path, so a wrong type
215
+ // or an unknown key is rejected even though the key may be present.
216
+ const strategyConfigSchema = z.object({ url: z.string().url() });
217
+ const collectorConfigSchema = z.object({ path: z.string().min(1) });
218
+
219
+ const registeredStrategy = {
220
+ strategy: {
221
+ id: "http",
222
+ displayName: "HTTP",
223
+ config: new Versioned({ version: 1, schema: strategyConfigSchema }),
224
+ aggregatedResult: { schema: z.object({}) },
225
+ },
226
+ qualifiedId: "healthcheck-http.http",
227
+ ownerPluginId: "healthcheck-http",
228
+ };
229
+ const registeredCollector = {
230
+ qualifiedId: "collector-file.file",
231
+ collector: {
232
+ displayName: "File",
233
+ config: new Versioned({ version: 1, schema: collectorConfigSchema }),
234
+ result: { schema: z.object({}) },
235
+ supportedPlugins: [{ pluginId: "healthcheck-http" }],
236
+ },
237
+ ownerPlugin: { id: "collector-file" },
238
+ };
239
+
240
+ const validateContext = () =>
241
+ createMockRpcContext({
242
+ user: mockUser,
243
+ healthCheckRegistry: {
244
+ getStrategiesWithMeta: mock().mockReturnValue([registeredStrategy]),
245
+ getStrategy: mock().mockReturnValue(registeredStrategy.strategy),
246
+ getStrategies: mock().mockReturnValue([]),
247
+ } as never,
248
+ collectorRegistry: {
249
+ getCollectors: mock().mockReturnValue([registeredCollector]),
250
+ getCollector: mock().mockReturnValue(registeredCollector),
251
+ getCollectorsForPlugin: mock().mockReturnValue([registeredCollector]),
252
+ register: mock(),
253
+ } as never,
254
+ });
255
+
256
+ it("returns valid for a well-formed configuration without persisting", async () => {
257
+ const result = await call(
258
+ router.validateConfiguration,
259
+ {
260
+ name: "ok",
261
+ strategyId: "healthcheck-http.http",
262
+ config: { url: "https://example.test" },
263
+ intervalSeconds: 60,
264
+ collectors: [
265
+ {
266
+ id: "c1",
267
+ collectorId: "collector-file.file",
268
+ config: { path: "/tmp/x" },
269
+ },
270
+ ],
271
+ },
272
+ { context: validateContext() },
273
+ );
274
+ expect(result.valid).toBe(true);
275
+ expect(result.errors).toEqual([]);
276
+ // No DB insert ran (the insert mock returns []), proving non-persistence.
277
+ });
278
+
279
+ it("rejects an unknown strategy", async () => {
280
+ const result = await call(
281
+ router.validateConfiguration,
282
+ {
283
+ name: "x",
284
+ strategyId: "healthcheck-http.ghost",
285
+ config: { url: "https://example.test" },
286
+ intervalSeconds: 60,
287
+ },
288
+ { context: validateContext() },
289
+ );
290
+ expect(result.valid).toBe(false);
291
+ expect(result.errors[0].path).toEqual(["strategyId"]);
292
+ });
293
+
294
+ // Deep-vs-lightweight: `url` IS present (the old presence check passes),
295
+ // but holds the wrong TYPE. Only the strict migrate-then-validate path
296
+ // rejects it.
297
+ it("rejects a deep field/type error a presence check would miss", async () => {
298
+ const result = await call(
299
+ router.validateConfiguration,
300
+ {
301
+ name: "x",
302
+ strategyId: "healthcheck-http.http",
303
+ config: { url: 12345 },
304
+ intervalSeconds: 60,
305
+ },
306
+ { context: validateContext() },
307
+ );
308
+ expect(result.valid).toBe(false);
309
+ expect(result.errors[0].path[0]).toBe("config");
310
+ });
311
+
312
+ it("rejects an unknown collector", async () => {
313
+ const result = await call(
314
+ router.validateConfiguration,
315
+ {
316
+ name: "x",
317
+ strategyId: "healthcheck-http.http",
318
+ config: { url: "https://example.test" },
319
+ intervalSeconds: 60,
320
+ collectors: [
321
+ { id: "c1", collectorId: "collector-file.ghost", config: {} },
322
+ ],
323
+ },
324
+ { context: validateContext() },
325
+ );
326
+ expect(result.valid).toBe(false);
327
+ expect(result.errors[0].path).toEqual(["collectors", 0, "collectorId"]);
328
+ });
329
+ });
330
+
212
331
  describe("GitOps Provenance Enforcement", () => {
213
332
  it("allows deleteConfiguration when GitOps lock is not present", async () => {
214
333
  mockGitOpsClient.getProvenance.mockResolvedValueOnce(null);
package/src/router.ts CHANGED
@@ -16,6 +16,7 @@ import {
16
16
  resolveScriptPackagesDir,
17
17
  } from "@checkstack/script-packages-backend";
18
18
  import { HealthCheckService } from "./service";
19
+ import { collectConfigurationIssues } from "./validate-configuration";
19
20
  import { runCollectorScriptTest } from "./collector-script-test";
20
21
  import { healthCheckHooks } from "./hooks";
21
22
  import * as schema from "./schema";
@@ -166,6 +167,24 @@ export const createHealthCheckRouter = (opts: {
166
167
  return created;
167
168
  }),
168
169
 
170
+ validateConfiguration: os.validateConfiguration.handler(
171
+ async ({ input, context }) => {
172
+ // Deep validation WITHOUT persisting: resolve the strategy/collectors
173
+ // against the live registries and run the same migrate-then-validate-
174
+ // strict logic the create / gitops-apply path uses, so propose-time
175
+ // errors match apply-time errors. Strategy/collector config (typed
176
+ // `z.record(z.unknown())` on the input) is validated against each
177
+ // registered schema, surfacing wrong types, missing required fields,
178
+ // and unknown keys - not just missing-field presence.
179
+ const errors = await collectConfigurationIssues({
180
+ input,
181
+ registry: context.healthCheckRegistry,
182
+ collectorRegistry: context.collectorRegistry,
183
+ });
184
+ return { valid: errors.length === 0, errors };
185
+ },
186
+ ),
187
+
169
188
  updateConfiguration: os.updateConfiguration.handler(async ({ input }) => {
170
189
  await enforceNotGitOpsLocked("Healthcheck", input.id);
171
190
  const config = await service.updateConfiguration(input.id, input.body);
@@ -217,6 +236,7 @@ export const createHealthCheckRouter = (opts: {
217
236
  enabled: input.body.enabled,
218
237
  stateThresholds: input.body.stateThresholds,
219
238
  satelliteIds: input.body.satelliteIds,
239
+ environmentIds: input.body.environmentIds,
220
240
  includeLocal: input.body.includeLocal,
221
241
  });
222
242
  await cache.invalidateSystem(input.systemId);