npm - @checkstack/healthcheck-backend - Versions diffs - 1.4.0 → 1.6.0 - Mend

@checkstack/healthcheck-backend 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/CHANGELOG.md +303 -0
package/drizzle/0018_abnormal_preak.sql +10 -0
package/drizzle/meta/0018_snapshot.json +600 -0
package/drizzle/meta/_journal.json +7 -0
package/package.json +26 -21
package/src/ai/assertion-validation.test.ts +117 -0
package/src/ai/assertion-validation.ts +147 -0
package/src/ai/healthcheck-capabilities.test.ts +158 -0
package/src/ai/healthcheck-capabilities.ts +217 -0
package/src/ai/healthcheck-delete.test.ts +81 -0
package/src/ai/healthcheck-delete.ts +81 -0
package/src/ai/healthcheck-projection.test.ts +36 -0
package/src/ai/healthcheck-propose.test.ts +268 -0
package/src/ai/healthcheck-propose.ts +290 -0
package/src/ai/healthcheck-script-tools.test.ts +93 -0
package/src/ai/healthcheck-script-tools.ts +179 -0
package/src/ai/healthcheck-update.test.ts +123 -0
package/src/ai/healthcheck-update.ts +123 -0
package/src/ai/notify-subscribers.test.ts +109 -0
package/src/ai/notify-subscribers.ts +176 -0
package/src/ai/register-ai-tools.test.ts +41 -0
package/src/ai/register-ai-tools.ts +53 -0
package/src/ai/shell-env-table.test.ts +47 -0
package/src/automations.test.ts +2 -1
package/src/automations.ts +9 -1
package/src/collector-script-test.test.ts +53 -1
package/src/collector-script-test.ts +59 -7
package/src/effective-environments.test.ts +93 -0
package/src/effective-environments.ts +64 -0
package/src/health-entity-id.ts +57 -0
package/src/health-entity.test.ts +405 -31
package/src/health-entity.ts +99 -43
package/src/health-state.ts +41 -4
package/src/healthcheck-gitops-kinds.test.ts +95 -0
package/src/healthcheck-gitops-kinds.ts +56 -13
package/src/index.ts +33 -0
package/src/migration-chain-contract.test.ts +57 -0
package/src/queue-executor.test.ts +814 -0
package/src/queue-executor.ts +342 -50
package/src/realtime-aggregation.test.ts +30 -0
package/src/realtime-aggregation.ts +16 -0
package/src/retention-job.ts +167 -93
package/src/retention-rollup.test.ts +118 -0
package/src/router.test.ts +120 -1
package/src/router.ts +20 -0
package/src/schema.ts +44 -6
package/src/service.ts +199 -43
package/src/state-evaluator.test.ts +50 -5
package/src/state-evaluator.ts +9 -2
package/src/state-transitions.test.ts +104 -0
package/src/state-transitions.ts +39 -1
package/src/validate-configuration.test.ts +205 -0
package/src/validate-configuration.ts +159 -0
package/tsconfig.json +9 -0

package/src/health-entity.ts CHANGED Viewed

@@ -23,7 +23,7 @@
  */
 import { z } from "zod";
 import { HealthCheckStatusSchema } from "@checkstack/healthcheck-common";
-import { withXactLock, type SafeDatabase } from "@checkstack/backend-api";
+import type { AdvisoryLockService } from "@checkstack/backend-api";
 import type {
   EntityChangeDeriver,
   EntityChangePayloadMapper,
@@ -31,13 +31,9 @@ import type {
   EntityRead,
 } from "@checkstack/automation-backend";
 import type { HealthCheckService } from "./service";
-import * as schema from "./schema";
-// Re-export the change type through automation-backend's barrel (it
-// re-exports it from automation-common) so this domain needs no extra dep.
+import { parseHealthEntityId } from "./health-entity-id";
-type Db = SafeDatabase<typeof schema>;
-/** Entity kind id for the per-system aggregated health. */
+/** Entity kind id for the aggregated health (system rollup + per-environment). */
 export const HEALTH_ENTITY_KIND = "health";
 /**
@@ -126,15 +122,23 @@ function readNumber(
  * Restores the keys operators read (`trigger.payload.systemId`,
  * `.previousStatus`, …) that the generic change shape omits.
  *
- * `systemId` is the entity id; `previousStatus` is `prev.status` and `newStatus`
- * is `next.status`; `healthyChecks` / `totalChecks` come from `next`;
- * `timestamp` is the change's `occurredAt`. `systemName` is not derivable from a
- * health change (it lives in the catalog) and is OPTIONAL on the schemas, so it
- * is omitted.
+ * The entity id is now env-qualified (Phase 3b): `payload.systemId` is ALWAYS
+ * the systemId portion (so existing automations reading `trigger.payload.systemId`
+ * are unaffected — the rollup carries the bare systemId), and the NEW optional
+ * `payload.environmentId` is the env portion — present only for a per-environment
+ * change, absent (undefined) for the system rollup. `previousStatus` is
+ * `prev.status` and `newStatus` is `next.status`; `healthyChecks` / `totalChecks`
+ * come from `next`; `timestamp` is the change's `occurredAt`. `systemName` is not
+ * derivable from a health change (it lives in the catalog) and is OPTIONAL on the
+ * schemas, so it is omitted.
  */
 export const healthChangeToPayload: EntityChangePayloadMapper = (changed) => {
+  const { systemId, environmentId } = parseHealthEntityId(changed.id);
   return {
-    systemId: changed.id,
+    systemId,
+    // Present only for a per-env change; omitted for the rollup so the field
+    // is `undefined` (the optional schema accepts both).
+    ...(environmentId === null ? {} : { environmentId }),
     previousStatus: readStatus(changed.prev) ?? undefined,
     newStatus: readStatus(changed.next) ?? undefined,
     healthyChecks: readNumber(changed.next, "healthyChecks") ?? 0,
@@ -157,6 +161,12 @@ export const healthChangeToPayload: EntityChangePayloadMapper = (changed) => {
  */
 export interface HealthChangeClassification {
   systemId: string;
+  /**
+   * The environment portion of the entity id (Phase 3b). `null` for the
+   * system rollup change; the env id for a per-environment change. Cross-plugin
+   * consumers that only care about the system (SLO / dependency) can ignore it.
+   */
+  environmentId: string | null;
   previousStatus: string | null;
   newStatus: string | null;
   degraded: boolean;
@@ -168,6 +178,7 @@ export function classifyHealthChange(changed: {
   prev: Record<string, unknown> | null;
   next: Record<string, unknown> | null;
 }): HealthChangeClassification {
+  const { systemId, environmentId } = parseHealthEntityId(changed.id);
   const previousStatus = readStatus(changed.prev);
   const newStatus = readStatus(changed.next);
   const bothPresent = previousStatus !== null && newStatus !== null;
@@ -176,7 +187,8 @@ export function classifyHealthChange(changed: {
   const recovered =
     bothPresent && newStatus === "healthy" && previousStatus !== "healthy";
   return {
-    systemId: changed.id,
+    systemId,
+    environmentId,
     previousStatus,
     newStatus,
     degraded,
@@ -214,9 +226,17 @@ export function classifyHealthChange(changed: {
 export async function computeHealthEntityState(args: {
   service: HealthCheckService;
   systemId: string;
+  /**
+   * Environment to compute the view for (Phase 3b). `undefined` = the SYSTEM
+   * ROLLUP (worst status across all environments + env-less runs — the
+   * all-runs aggregate, §7.4.2). `null` = the env-less slice. A string = that
+   * environment's per-env view. The existence gate (`checkStatuses.length`) is
+   * env-independent, so a per-env view and the rollup agree on totalChecks.
+   */
+  environmentId?: string | null;
 }): Promise<HealthEntityState | undefined> {
-  const { service, systemId } = args;
-  const overview = await service.getSystemHealthStatus(systemId);
+  const { service, systemId, environmentId } = args;
+  const overview = await service.getSystemHealthStatus(systemId, environmentId);
   // No enabled check associations ⇒ no health entity for this system.
   if (overview.checkStatuses.length === 0) return undefined;
   return {
@@ -229,10 +249,16 @@ export async function computeHealthEntityState(args: {
 /**
  * Build the PLUGIN-BACKED + COMPUTED `read` accessor for the `health` entity.
- * For each systemId, assembles the view via {@link computeHealthEntityState}
- * (systems with no runs omitted). This is the single source of truth that
- * `handle.mutate` snapshots `prev` from and `get`/`getMany`/scope enrichment
- * route through — no framework `entity_state` storage.
+ *
+ * Env-aware id parsing (Phase 3b, §7.4.2): each incoming id is parsed via
+ * {@link parseHealthEntityId}. A BARE `"<systemId>"` resolves the SYSTEM
+ * ROLLUP; a `"<systemId>::<environmentId>"` resolves that environment's
+ * per-env view. The result is keyed by the ORIGINAL id, so the reactive
+ * engine, `getMany`, and scope enrichment all see the right view for the id
+ * they asked for. Systems with no enabled check associations are omitted
+ * (existence gate). No framework `entity_state` storage — compute-on-read from
+ * the durable, env-keyed `health_check_runs`, so a read returns the same answer
+ * on every pod (state-and-scale).
  */
 export function createHealthEntityRead(deps: {
   service: HealthCheckService;
@@ -242,9 +268,20 @@ export function createHealthEntityRead(deps: {
     if (ids.length === 0) return {};
     const out: Record<string, HealthEntityState> = {};
     await Promise.all(
-      ids.map(async (systemId) => {
-        const state = await computeHealthEntityState({ service, systemId });
-        if (state) out[systemId] = state;
+      ids.map(async (id) => {
+        const { systemId, environmentId } = parseHealthEntityId(id);
+        const state = await computeHealthEntityState({
+          service,
+          systemId,
+          // A bare `<systemId>` id is the ROLLUP: `parseHealthEntityId`
+          // returns `environmentId: null` for it (so the payload mapper can
+          // tell "rollup → omit environmentId"), but the rollup must read ALL
+          // runs — `undefined` — NOT the env-less slice (`null`, which filters
+          // to `env_id IS NULL`). Reserve `null` for an explicit env-less
+          // read; map the rollup's null to undefined here.
+          environmentId: environmentId === null ? undefined : environmentId,
+        });
+        if (state) out[id] = state;
       }),
     );
     return out;
@@ -298,19 +335,28 @@ export function createHealthEntityRead(deps: {
  */
 export async function writeHealthEntity(args: {
   handle: EntityHandle<HealthEntityState> | undefined;
-  systemId: string;
+  /**
+   * The `health` entity id to mutate (Phase 3b): the env-qualified
+   * `"<systemId>::<environmentId>"` for a per-env write, or the bare
+   * `"<systemId>"` for the env-less / system-rollup write. This is the id the
+   * framework diffs/emits, so it drives both the per-env and rollup
+   * `ENTITY_CHANGED`.
+   */
+  entityId: string;
   apply: () => Promise<HealthEntityState>;
   onError?: (error: unknown) => void;
   /**
-   * Optional per-`systemId` critical section wrapping the snapshot-prev +
+   * Optional per-`entityId` critical section wrapping the snapshot-prev +
    * apply + diff + emit. The executor supplies a transaction-scoped advisory
-   * lock (`withXactLock`, key `health:<systemId>`) so concurrent evaluations
-   * of one system can't double-emit a single logical transition. Identity by
-   * default (no serialization) for the unbound-handle / test paths.
+   * lock (`withXactLock`, key `health:<entityId>`) so concurrent evaluations
+   * of one (system, environment) — or of the rollup — can't double-emit a
+   * single logical transition, and per-env + rollup writes serialize against
+   * their OWN keys (distinct envs / the rollup don't block each other).
+   * Identity by default (no serialization) for the unbound-handle / test paths.
    */
   serialize?: <T>(fn: () => Promise<T>) => Promise<T>;
 }): Promise<HealthEntityState> {
-  const { handle, systemId, apply, onError, serialize } = args;
+  const { handle, entityId, apply, onError, serialize } = args;
   if (!handle) {
     // No reactivity bound — run the durable write directly.
     return apply();
@@ -323,7 +369,7 @@ export async function writeHealthEntity(args: {
     // call, and we wrap that whole call so two concurrent evals serialize.
     return await run(() =>
       handle.mutate({
-        id: systemId,
+        id: entityId,
         apply: async () => {
           durableState = await apply();
           return durableState;
@@ -340,19 +386,26 @@ export async function writeHealthEntity(args: {
   }
 }
-/** Advisory-lock key namespace for the per-system health critical section. */
-export function healthSystemLockKey(systemId: string): string {
-  return `health:${systemId}`;
+/**
+ * Advisory-lock key namespace for the per-entity health critical section. The
+ * argument is the FULL `health` entity id (Phase 3b): the bare `"<systemId>"`
+ * for the rollup or `"<systemId>::<environmentId>"` for a per-env write. Two
+ * different envs (or an env vs the rollup) get DIFFERENT keys, so they
+ * serialize independently and never block each other.
+ */
+export function healthEntityLockKey(entityId: string): string {
+  return `health:${entityId}`;
 }
 /**
- * Build the per-`systemId` serializer for {@link writeHealthEntity} backed by
+ * Build the per-`entityId` serializer for {@link writeHealthEntity} backed by
  * a transaction-scoped advisory lock (`withXactLock`, key
- * `health:<systemId>`). The returned function blocks until it holds the
- * system's lock, runs `fn` (the whole snapshot-prev + apply + diff + emit), and
+ * `health:<entityId>`). The returned function blocks until it holds the
+ * entity's lock, runs `fn` (the whole snapshot-prev + apply + diff + emit), and
  * auto-releases the lock at COMMIT/ROLLBACK. Two concurrent evaluations of one
- * system therefore serialize — exactly one logical `healthy → degraded`
- * transition emits exactly one `ENTITY_CHANGED` + one transition row.
+ * (system, environment) — or of the rollup — therefore serialize, while
+ * distinct envs proceed in parallel. Exactly one logical transition per entity
+ * emits exactly one `ENTITY_CHANGED` + one transition row.
  *
  * `fn` does its own durable writes on the outer pool; the lock only gates
  * ENTRY to the critical section, so its connection affinity is irrelevant —
@@ -360,10 +413,13 @@ export function healthSystemLockKey(systemId: string): string {
  * commits.
  */
 export function createHealthEntitySerializer(deps: {
-  db: Db;
-}): (systemId: string) => <T>(fn: () => Promise<T>) => Promise<T> {
-  const { db } = deps;
-  return (systemId) =>
+  advisoryLock: AdvisoryLockService;
+}): (entityId: string) => <T>(fn: () => Promise<T>) => Promise<T> {
+  const { advisoryLock } = deps;
+  return (entityId) =>
     <T>(fn: () => Promise<T>) =>
-      withXactLock({ db, key: healthSystemLockKey(systemId), fn: () => fn() });
+      advisoryLock.withXactLock({
+        key: healthEntityLockKey(entityId),
+        fn: () => fn(),
+      });
 }

package/src/health-state.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { and, desc, eq, gte } from "drizzle-orm";
+import { and, desc, eq, gte, isNull } from "drizzle-orm";
 import type { HealthCheckStatus } from "@checkstack/healthcheck-common";
 import type { Logger, SafeDatabase } from "@checkstack/backend-api";
 import type { InferClient } from "@checkstack/common";
@@ -122,15 +122,28 @@ export async function findLatestRun({
   db,
   systemId,
   configurationId,
+  environmentId,
 }: {
   db: Db;
   systemId: string;
   configurationId?: string;
+  /**
+   * Environment to scope the run lookup to (Phase 3b). `undefined` = any
+   * environment (rollup). `null` = env-less runs only. A string = that env.
+   */
+  environmentId?: string | null;
 }): Promise<{ latencyMs?: number; lastRunAt?: Date }> {
   const conditions = [eq(healthCheckRuns.systemId, systemId)];
   if (configurationId) {
     conditions.push(eq(healthCheckRuns.configurationId, configurationId));
   }
+  if (environmentId !== undefined) {
+    conditions.push(
+      environmentId === null
+        ? isNull(healthCheckRuns.environmentId)
+        : eq(healthCheckRuns.environmentId, environmentId),
+    );
+  }
   const [row] = await db
     .select({
@@ -161,12 +174,19 @@ export async function computeWindowedMetrics({
   db,
   systemId,
   configurationId,
+  environmentId,
   now = new Date(),
   windowHours = DEFAULT_METRICS_WINDOW_HOURS,
 }: {
   db: Db;
   systemId: string;
   configurationId?: string;
+  /**
+   * Environment to scope the windowed metrics to (Phase 3b). `undefined` =
+   * any environment (rollup). `null` = env-less aggregates only. A string =
+   * that environment's aggregate buckets only.
+   */
+  environmentId?: string | null;
   now?: Date;
   windowHours?: number;
 }): Promise<{
@@ -185,6 +205,13 @@ export async function computeWindowedMetrics({
       eq(healthCheckAggregates.configurationId, configurationId),
     );
   }
+  if (environmentId !== undefined) {
+    conditions.push(
+      environmentId === null
+        ? isNull(healthCheckAggregates.environmentId)
+        : eq(healthCheckAggregates.environmentId, environmentId),
+    );
+  }
   const buckets = await db
     .select({
@@ -284,6 +311,7 @@ export async function computeHealthState({
   db,
   systemId,
   configurationId,
+  environmentId,
   resolveStatus,
   maintenanceClient,
   logger,
@@ -293,6 +321,14 @@ export async function computeHealthState({
   db: Db;
   systemId: string;
   configurationId?: string;
+  /**
+   * Environment to scope EVERY durable read to (Phase 3b). `undefined` = the
+   * system rollup (all environments + env-less). `null` = the env-less slice.
+   * A string = that environment. `inStatusSince`, latest run, windowed
+   * metrics, and the transition count all narrow to this env so a per-env
+   * health snapshot reflects only that environment's runs/transitions.
+   */
+  environmentId?: string | null;
   /** Returns the aggregate status for the system (per-check when scoped). */
   resolveStatus: () => Promise<HealthCheckStatus>;
   maintenanceClient?: MaintenanceClient;
@@ -305,14 +341,15 @@ export async function computeHealthState({
   const [inStatusSince, latest, windowed, inMaintenance, transitionsInWindow] =
     await Promise.all([
-      findInStatusSince({ db, systemId, status }),
-      findLatestRun({ db, systemId, configurationId }),
-      computeWindowedMetrics({ db, systemId, configurationId, now }),
+      findInStatusSince({ db, systemId, status, environmentId }),
+      findLatestRun({ db, systemId, configurationId, environmentId }),
+      computeWindowedMetrics({ db, systemId, configurationId, environmentId, now }),
       resolveInMaintenance({ maintenanceClient, systemId, logger }),
       countStateTransitionsInWindow({
         db,
         systemId,
         windowMinutes: transitionWindowMinutes,
+        environmentId,
         now,
       }),
     ]);

package/src/healthcheck-gitops-kinds.test.ts CHANGED Viewed

@@ -415,6 +415,101 @@ describe("Healthcheck GitOps Kind: Healthcheck", () => {
     ).rejects.toThrow(/config validation failed/);
   });
+  it("migrates an OLD-shape authored config forward and stores the migrated value", async () => {
+    // A strategy at version 2 whose v1->v2 migration drops a removed
+    // `legacyMode` key. Authored gitops YAML still in the v1 shape (carrying
+    // `legacyMode`) must be migrated forward and applied, not rejected.
+    const v2Schema = z.object({ host: z.string() });
+    const versionedStrategy = {
+      id: "postgres",
+      displayName: "PostgreSQL",
+      description: "test",
+      config: new Versioned({
+        version: 2,
+        schema: v2Schema,
+        migrations: [
+          {
+            fromVersion: 1,
+            toVersion: 2,
+            description: "Drop removed legacyMode key",
+            migrate: ({ legacyMode: _legacyMode, ...rest }: Record<string, unknown>) =>
+              rest,
+          },
+        ],
+      }),
+    };
+    mockHCRegistry.getStrategiesWithMeta = () =>
+      [
+        { strategy: versionedStrategy, ownerPluginId: "mock", qualifiedId: "postgres" },
+      ] as any;
+    const kind = buildKind();
+    const result = await kind.reconcile({
+      entity: {
+        apiVersion: CHECKSTACK_API_VERSION,
+        kind: "Healthcheck",
+        metadata: { name: "legacy-check" },
+        spec: {
+          strategy: "postgres",
+          intervalSeconds: 30,
+          // Old v1 shape: carries the now-removed `legacyMode`.
+          config: { host: "db.legacy", legacyMode: true },
+        },
+      },
+      context: mockContext,
+    });
+    expect(result.entityId).toBe("hc-1");
+    // The MIGRATED config (legacyMode dropped) is what gets stored.
+    expect(mockService.configs[0].config).toEqual({ host: "db.legacy" });
+  });
+  it("rejects a genuine typo the migration does not account for (strict)", async () => {
+    const v2Schema = z.object({ host: z.string() });
+    const versionedStrategy = {
+      id: "postgres",
+      displayName: "PostgreSQL",
+      description: "test",
+      config: new Versioned({
+        version: 2,
+        schema: v2Schema,
+        migrations: [
+          {
+            fromVersion: 1,
+            toVersion: 2,
+            description: "Drop removed legacyMode key",
+            migrate: ({ legacyMode: _legacyMode, ...rest }: Record<string, unknown>) =>
+              rest,
+          },
+        ],
+      }),
+    };
+    mockHCRegistry.getStrategiesWithMeta = () =>
+      [
+        { strategy: versionedStrategy, ownerPluginId: "mock", qualifiedId: "postgres" },
+      ] as any;
+    const kind = buildKind();
+    await expect(
+      kind.reconcile({
+        entity: {
+          apiVersion: CHECKSTACK_API_VERSION,
+          kind: "Healthcheck",
+          metadata: { name: "typo-check" },
+          spec: {
+            strategy: "postgres",
+            intervalSeconds: 30,
+            // `hsot` is a genuine typo no migration accounts for.
+            config: { host: "db.local", hsot: "oops" },
+          },
+        },
+        context: mockContext,
+      }),
+    ).rejects.toThrow(/config validation failed/);
+  });
   it("validates collector configs against collector registry schemas", async () => {
     const kind = buildKind();

package/src/healthcheck-gitops-kinds.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import type {
 } from "@checkstack/backend-api";
 import { NotificationPolicySchema } from "@checkstack/healthcheck-common";
 import { HealthCheckService } from "./service";
+import { validateVersionedConfigStrict } from "./validate-configuration";
 import {
   DynamicOperators,
   numericField,
@@ -154,13 +155,25 @@ export function buildHealthcheckKind(
         },
       );
-      // Validate resolved config against strategy's Zod schema
-      const configValidation = strategy.config.schema.safeParse(resolvedConfig);
-      if (!configValidation.success) {
+      // Migrate-then-validate-strict: authored gitops YAML may be in an OLD
+      // config shape, so run the migration chain (assume-v1-on-read) before
+      // strict validation. Old-shape YAML still applies; genuine typos
+      // (unknown keys no migration accounts for) are still rejected. Shares the
+      // exact strict-validate path the `validateConfiguration` RPC uses, so the
+      // two agree on what counts as valid. A strategy config is always a plain
+      // object validated by the strategy's own schema, so narrowing the
+      // `unknown` result to the stored `Record` shape is safe.
+      const strategyResult = await validateVersionedConfigStrict({
+        config: strategy.config,
+        value: resolvedConfig,
+        basePath: ["config"],
+      });
+      if (!strategyResult.ok) {
         throw new Error(
-          `Strategy "${spec.strategy}" config validation failed: ${configValidation.error.message}`,
+          `Strategy "${spec.strategy}" config validation failed: ${formatIssues(strategyResult.issues)}`,
         );
       }
+      const migratedConfig = strategyResult.value as Record<string, unknown>;
       // Resolve and validate collector configs using their registry schemas
       const resolvedCollectors = spec.collectors
@@ -190,17 +203,30 @@ export function buildHealthcheckKind(
                   schema: registered.collector.config.schema,
                 });
-              const collectorConfigValidation =
-                registered.collector.config.schema.safeParse(
-                  resolvedCollectorConfig,
-                );
-              if (!collectorConfigValidation.success) {
+              // Migrate-then-validate-strict: authored gitops YAML may use an
+              // OLD collector config shape. Run the migration chain before
+              // strict validation so old-shape YAML still applies while
+              // genuine typos are still rejected. Shares the exact strict-
+              // validate path the `validateConfiguration` RPC uses. A collector
+              // config is always a plain object validated by the collector's
+              // schema, so narrowing the `unknown` result to the stored
+              // `Record` shape is safe.
+              const collectorResult = await validateVersionedConfigStrict({
+                config: registered.collector.config,
+                value: resolvedCollectorConfig,
+                basePath: ["config"],
+              });
+              if (!collectorResult.ok) {
                 throw new Error(
-                  `Collector "${c.collectorId}" config validation failed: ${collectorConfigValidation.error.message}`,
+                  `Collector "${c.collectorId}" config validation failed: ${formatIssues(collectorResult.issues)}`,
                 );
               }
+              const migratedCollectorConfig = collectorResult.value as Record<
+                string,
+                unknown
+              >;
-              return { ...c, config: resolvedCollectorConfig };
+              return { ...c, config: migratedCollectorConfig };
             }),
           )
         : undefined;
@@ -212,7 +238,7 @@ export function buildHealthcheckKind(
         await service.updateConfiguration(existingEntityId, {
           name: displayName,
           strategyId: spec.strategy,
-          config: resolvedConfig,
+          config: migratedConfig,
           intervalSeconds: spec.intervalSeconds,
           collectors: resolvedCollectors?.map((c) => ({
             id: c.collectorId,
@@ -230,7 +256,7 @@ export function buildHealthcheckKind(
       const config = await service.createConfiguration({
         name: displayName,
         strategyId: spec.strategy,
-        config: resolvedConfig,
+        config: migratedConfig,
         intervalSeconds: spec.intervalSeconds,
         collectors: resolvedCollectors?.map((c) => ({
           id: c.collectorId,
@@ -517,6 +543,23 @@ export function registerHealthcheckGitOpsDocumentation({
   }
 }
+/**
+ * Render the structured issues from {@link validateVersionedConfigStrict} into
+ * a single human-readable message for the thrown GitOps reconcile error,
+ * preserving the per-field path (e.g. `config.url: Invalid url`).
+ */
+function formatIssues(
+  issues: Array<{ path: Array<string | number>; message: string }>,
+): string {
+  return issues
+    .map((issue) =>
+      issue.path.length > 0
+        ? `${issue.path.join(".")}: ${issue.message}`
+        : issue.message,
+    )
+    .join("; ");
+}
 function unwrapZodType(type: z.ZodTypeAny): z.ZodTypeAny {
   let current = type;
   while (current) {

package/src/index.ts CHANGED Viewed

@@ -17,6 +17,12 @@ import {
   NotificationApi,
   specToRegistration,
 } from "@checkstack/notification-common";
+import {
+  aiToolExtensionPoint,
+  aiToolProjectionExtensionPoint,
+  deferredProjectionExecute,
+} from "@checkstack/ai-backend";
+import { buildHealthcheckAiTools } from "./ai/register-ai-tools";
 import {
   createBackendPlugin,
   coreServices,
@@ -198,6 +204,7 @@ export default createBackendPlugin({
         cacheManager: coreServices.cacheManager,
         config: coreServices.config,
         secretResolver: secretResolverRef,
+        advisoryLock: coreServices.advisoryLock,
       },
       // Phase 2: Register router and setup worker
       init: async ({
@@ -212,6 +219,7 @@ export default createBackendPlugin({
         cacheManager,
         config,
         secretResolver,
+        advisoryLock,
       }) => {
         logger.debug("🏥 Initializing Health Check Backend...");
@@ -232,6 +240,30 @@ export default createBackendPlugin({
           collectorRegistry,
         );
+        // Register this plugin's AI tools (propose/update/delete) into the AI
+        // registry via the extension point - owned here, not in ai-backend.
+        const aiToolExt = env.getExtensionPoint(aiToolExtensionPoint);
+        for (const tool of buildHealthcheckAiTools()) {
+          aiToolExt.registerTool(tool, pluginMetadata);
+        }
+        // Expose this plugin's OWN read-only AI projection of the existing
+        // `getConfigurations` query via aiToolProjectionExtensionPoint - owned
+        // here, not in ai-backend. The projected read tool is routed by the
+        // transport (MCP / chat) AS the principal, so `getConfigurations`'
+        // own contract access rules gate it; `deferredProjectionExecute` is
+        // the fail-closed net if a transport ever forgot to route.
+        env.getExtensionPoint(aiToolProjectionExtensionPoint).expose({
+          procedure: healthCheckContract.getConfigurations,
+          sourcePluginMetadata: pluginMetadata,
+          procedureKey: "getConfigurations",
+          name: "healthcheck.status",
+          description:
+            "List health-check configurations and their current status. Read-only.",
+          effect: "read",
+          execute: deferredProjectionExecute,
+        });
         // Create catalog client for notification delegation
         const catalogClient = rpcClient.forPlugin(CatalogApi);
@@ -258,6 +290,7 @@ export default createBackendPlugin({
         await setupHealthCheckWorker({
           notificationClient,
           db: database,
+          advisoryLock,
           registry: healthCheckRegistry,
           collectorRegistry,
           logger,