@checkstack/healthcheck-backend 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +303 -0
- package/drizzle/0018_abnormal_preak.sql +10 -0
- package/drizzle/meta/0018_snapshot.json +600 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +26 -21
- package/src/ai/assertion-validation.test.ts +117 -0
- package/src/ai/assertion-validation.ts +147 -0
- package/src/ai/healthcheck-capabilities.test.ts +158 -0
- package/src/ai/healthcheck-capabilities.ts +217 -0
- package/src/ai/healthcheck-delete.test.ts +81 -0
- package/src/ai/healthcheck-delete.ts +81 -0
- package/src/ai/healthcheck-projection.test.ts +36 -0
- package/src/ai/healthcheck-propose.test.ts +268 -0
- package/src/ai/healthcheck-propose.ts +290 -0
- package/src/ai/healthcheck-script-tools.test.ts +93 -0
- package/src/ai/healthcheck-script-tools.ts +179 -0
- package/src/ai/healthcheck-update.test.ts +123 -0
- package/src/ai/healthcheck-update.ts +123 -0
- package/src/ai/notify-subscribers.test.ts +109 -0
- package/src/ai/notify-subscribers.ts +176 -0
- package/src/ai/register-ai-tools.test.ts +41 -0
- package/src/ai/register-ai-tools.ts +53 -0
- package/src/ai/shell-env-table.test.ts +47 -0
- package/src/automations.test.ts +2 -1
- package/src/automations.ts +9 -1
- package/src/collector-script-test.test.ts +53 -1
- package/src/collector-script-test.ts +59 -7
- package/src/effective-environments.test.ts +93 -0
- package/src/effective-environments.ts +64 -0
- package/src/health-entity-id.ts +57 -0
- package/src/health-entity.test.ts +405 -31
- package/src/health-entity.ts +99 -43
- package/src/health-state.ts +41 -4
- package/src/healthcheck-gitops-kinds.test.ts +95 -0
- package/src/healthcheck-gitops-kinds.ts +56 -13
- package/src/index.ts +33 -0
- package/src/migration-chain-contract.test.ts +57 -0
- package/src/queue-executor.test.ts +814 -0
- package/src/queue-executor.ts +342 -50
- package/src/realtime-aggregation.test.ts +30 -0
- package/src/realtime-aggregation.ts +16 -0
- package/src/retention-job.ts +167 -93
- package/src/retention-rollup.test.ts +118 -0
- package/src/router.test.ts +120 -1
- package/src/router.ts +20 -0
- package/src/schema.ts +44 -6
- package/src/service.ts +199 -43
- package/src/state-evaluator.test.ts +50 -5
- package/src/state-evaluator.ts +9 -2
- package/src/state-transitions.test.ts +104 -0
- package/src/state-transitions.ts +39 -1
- package/src/validate-configuration.test.ts +205 -0
- package/src/validate-configuration.ts +159 -0
- package/tsconfig.json +9 -0
|
@@ -13,6 +13,16 @@ const passthroughCache: HealthCheckCache = {
|
|
|
13
13
|
invalidateAllSystems: async () => 0,
|
|
14
14
|
scope: {} as HealthCheckCache["scope"],
|
|
15
15
|
};
|
|
16
|
+
|
|
17
|
+
// Pass-through advisory lock: these tests don't exercise cross-pod
|
|
18
|
+
// serialization, so run the critical section directly.
|
|
19
|
+
const mockAdvisoryLock: Parameters<
|
|
20
|
+
typeof setupHealthCheckWorker
|
|
21
|
+
>[0]["advisoryLock"] = {
|
|
22
|
+
tryAcquire: async () => ({ release: async () => {} }),
|
|
23
|
+
withXactLock: <T>({ fn }: { key: string; fn: () => Promise<T> }): Promise<T> =>
|
|
24
|
+
fn(),
|
|
25
|
+
};
|
|
16
26
|
import {
|
|
17
27
|
createMockLogger,
|
|
18
28
|
createMockQueueManager,
|
|
@@ -24,6 +34,7 @@ import {
|
|
|
24
34
|
Versioned,
|
|
25
35
|
VersionedAggregated,
|
|
26
36
|
aggregatedCounter,
|
|
37
|
+
configString,
|
|
27
38
|
z,
|
|
28
39
|
} from "@checkstack/backend-api";
|
|
29
40
|
import { mock } from "bun:test";
|
|
@@ -179,6 +190,7 @@ describe("Queue-Based Health Check Executor", () => {
|
|
|
179
190
|
db: mockDb as unknown as Parameters<
|
|
180
191
|
typeof setupHealthCheckWorker
|
|
181
192
|
>[0]["db"],
|
|
193
|
+
advisoryLock: mockAdvisoryLock,
|
|
182
194
|
registry: mockRegistry,
|
|
183
195
|
collectorRegistry:
|
|
184
196
|
createMockCollectorRegistry() as unknown as Parameters<
|
|
@@ -376,6 +388,7 @@ describe("Queue-Based Health Check Executor", () => {
|
|
|
376
388
|
db: mockDb as unknown as Parameters<
|
|
377
389
|
typeof setupHealthCheckWorker
|
|
378
390
|
>[0]["db"],
|
|
391
|
+
advisoryLock: mockAdvisoryLock,
|
|
379
392
|
registry: mockRegistry,
|
|
380
393
|
collectorRegistry:
|
|
381
394
|
createMockCollectorRegistry() as unknown as Parameters<
|
|
@@ -487,6 +500,7 @@ describe("Queue-Based Health Check Executor", () => {
|
|
|
487
500
|
collector: {
|
|
488
501
|
id: "test-collector",
|
|
489
502
|
execute: collectorExecute,
|
|
503
|
+
config: new Versioned({ version: 1, schema: z.object({}) }),
|
|
490
504
|
mergeResult: mock(() => ({})),
|
|
491
505
|
},
|
|
492
506
|
})),
|
|
@@ -510,6 +524,7 @@ describe("Queue-Based Health Check Executor", () => {
|
|
|
510
524
|
db: mockDb as unknown as Parameters<
|
|
511
525
|
typeof setupHealthCheckWorker
|
|
512
526
|
>[0]["db"],
|
|
527
|
+
advisoryLock: mockAdvisoryLock,
|
|
513
528
|
registry: mockRegistry,
|
|
514
529
|
collectorRegistry: mockCollectorRegistry as unknown as Parameters<
|
|
515
530
|
typeof setupHealthCheckWorker
|
|
@@ -551,5 +566,804 @@ describe("Queue-Based Health Check Executor", () => {
|
|
|
551
566
|
system: { id: "system-1", name: "web-01" },
|
|
552
567
|
});
|
|
553
568
|
});
|
|
569
|
+
|
|
570
|
+
it("migrates a stored v1 strategy + collector config on the execution path", async () => {
|
|
571
|
+
const mockDb = createMockDb();
|
|
572
|
+
const mockLogger = createMockLogger();
|
|
573
|
+
const mockQueueManager = createMockQueueManager();
|
|
574
|
+
const mockCatalogClient = createMockCatalogClient();
|
|
575
|
+
const mockMaintenanceClient = createMockMaintenanceClient();
|
|
576
|
+
const mockIncidentClient = createMockIncidentClient();
|
|
577
|
+
const mockSignalService = createMockSignalService();
|
|
578
|
+
|
|
579
|
+
// A strategy whose config migrates v1 -> v2 by STRIPPING a moved field
|
|
580
|
+
// (`endpoint`), mirroring the real health-check reshapers. The stored
|
|
581
|
+
// config is genuinely v1 (carries `endpoint`); the executor must run the
|
|
582
|
+
// migration before handing the config to createClient.
|
|
583
|
+
let capturedStrategyConfig: unknown;
|
|
584
|
+
const strategyMigratingRegistry: HealthCheckRegistry = {
|
|
585
|
+
getStrategy: mock(() => ({
|
|
586
|
+
id: "migrating-strategy",
|
|
587
|
+
displayName: "Migrating",
|
|
588
|
+
config: new Versioned({
|
|
589
|
+
version: 2,
|
|
590
|
+
schema: z.object({ timeout: z.number() }),
|
|
591
|
+
migrations: [
|
|
592
|
+
{
|
|
593
|
+
fromVersion: 1,
|
|
594
|
+
toVersion: 2,
|
|
595
|
+
description: "strip endpoint",
|
|
596
|
+
migrate: (data: unknown): unknown => {
|
|
597
|
+
if (
|
|
598
|
+
typeof data === "object" &&
|
|
599
|
+
data !== null &&
|
|
600
|
+
"endpoint" in data
|
|
601
|
+
) {
|
|
602
|
+
const timeout = (data as { timeout?: unknown }).timeout;
|
|
603
|
+
return { timeout: typeof timeout === "number" ? timeout : 0 };
|
|
604
|
+
}
|
|
605
|
+
return data;
|
|
606
|
+
},
|
|
607
|
+
},
|
|
608
|
+
],
|
|
609
|
+
}),
|
|
610
|
+
result: new Versioned({ version: 1, schema: z.object({}) }),
|
|
611
|
+
aggregatedResult: new VersionedAggregated({
|
|
612
|
+
version: 1,
|
|
613
|
+
fields: { count: aggregatedCounter({}) },
|
|
614
|
+
}),
|
|
615
|
+
createClient: mock(async (config: unknown) => {
|
|
616
|
+
capturedStrategyConfig = config;
|
|
617
|
+
return {
|
|
618
|
+
client: { exec: mock(async () => ({})) },
|
|
619
|
+
close: mock(() => {}),
|
|
620
|
+
};
|
|
621
|
+
}),
|
|
622
|
+
mergeResult: mock(() => ({})),
|
|
623
|
+
})),
|
|
624
|
+
register: mock(() => {}),
|
|
625
|
+
getStrategies: mock(() => []),
|
|
626
|
+
getStrategiesWithMeta: mock(() => []),
|
|
627
|
+
};
|
|
628
|
+
|
|
629
|
+
// A collector whose config migrates v1 -> v2 by renaming `cmd` -> `value`.
|
|
630
|
+
let capturedCollectorConfig: unknown;
|
|
631
|
+
const collectorExecute = mock(async (params: { config?: unknown }) => {
|
|
632
|
+
capturedCollectorConfig = params.config;
|
|
633
|
+
return { result: {} };
|
|
634
|
+
});
|
|
635
|
+
const migratingCollectorRegistry = {
|
|
636
|
+
register: mock(() => {}),
|
|
637
|
+
getCollector: mock(() => ({
|
|
638
|
+
collector: {
|
|
639
|
+
id: "migrating-collector",
|
|
640
|
+
execute: collectorExecute,
|
|
641
|
+
config: new Versioned({
|
|
642
|
+
version: 2,
|
|
643
|
+
schema: z.object({ value: z.string() }),
|
|
644
|
+
migrations: [
|
|
645
|
+
{
|
|
646
|
+
fromVersion: 1,
|
|
647
|
+
toVersion: 2,
|
|
648
|
+
description: "rename cmd -> value",
|
|
649
|
+
migrate: (data: unknown): unknown => {
|
|
650
|
+
if (
|
|
651
|
+
typeof data === "object" &&
|
|
652
|
+
data !== null &&
|
|
653
|
+
"cmd" in data &&
|
|
654
|
+
!("value" in data)
|
|
655
|
+
) {
|
|
656
|
+
const cmd = (data as { cmd?: unknown }).cmd;
|
|
657
|
+
return { value: typeof cmd === "string" ? cmd : "" };
|
|
658
|
+
}
|
|
659
|
+
return data;
|
|
660
|
+
},
|
|
661
|
+
},
|
|
662
|
+
],
|
|
663
|
+
}),
|
|
664
|
+
result: new Versioned({ version: 1, schema: z.object({}) }),
|
|
665
|
+
mergeResult: mock(() => ({})),
|
|
666
|
+
},
|
|
667
|
+
})),
|
|
668
|
+
getCollectors: mock(() => []),
|
|
669
|
+
};
|
|
670
|
+
|
|
671
|
+
let selectCallCount = 0;
|
|
672
|
+
(mockDb.select as any) = mock(() => {
|
|
673
|
+
selectCallCount++;
|
|
674
|
+
if (selectCallCount === 2) {
|
|
675
|
+
return {
|
|
676
|
+
from: mock(() => ({
|
|
677
|
+
innerJoin: mock(() => ({
|
|
678
|
+
where: mock(() =>
|
|
679
|
+
Promise.resolve([
|
|
680
|
+
{
|
|
681
|
+
configId: "config-1",
|
|
682
|
+
configName: "v1 config",
|
|
683
|
+
strategyId: "migrating-strategy",
|
|
684
|
+
// Stored RAW + genuinely v1 (carries the moved field).
|
|
685
|
+
config: { endpoint: "tcp://old", timeout: 1234 },
|
|
686
|
+
collectors: [
|
|
687
|
+
{
|
|
688
|
+
id: "col-1",
|
|
689
|
+
collectorId: "migrating-collector",
|
|
690
|
+
config: { cmd: "legacy-value" },
|
|
691
|
+
},
|
|
692
|
+
],
|
|
693
|
+
interval: 30,
|
|
694
|
+
enabled: true,
|
|
695
|
+
paused: false,
|
|
696
|
+
includeLocal: true,
|
|
697
|
+
satelliteIds: [],
|
|
698
|
+
},
|
|
699
|
+
]),
|
|
700
|
+
),
|
|
701
|
+
})),
|
|
702
|
+
})),
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
return {
|
|
706
|
+
from: mock(() => ({
|
|
707
|
+
innerJoin: mock(() => ({
|
|
708
|
+
where: mock(() => Promise.resolve([])),
|
|
709
|
+
})),
|
|
710
|
+
})),
|
|
711
|
+
};
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
const queue =
|
|
715
|
+
mockQueueManager.getQueue<HealthCheckJobPayload>("health-checks");
|
|
716
|
+
let capturedHandler:
|
|
717
|
+
| ((job: { data: HealthCheckJobPayload }) => Promise<void>)
|
|
718
|
+
| undefined;
|
|
719
|
+
(queue.consume as any) = mock(
|
|
720
|
+
async (
|
|
721
|
+
handler: (job: { data: HealthCheckJobPayload }) => Promise<void>,
|
|
722
|
+
) => {
|
|
723
|
+
capturedHandler = handler;
|
|
724
|
+
},
|
|
725
|
+
);
|
|
726
|
+
|
|
727
|
+
await setupHealthCheckWorker({
|
|
728
|
+
db: mockDb as unknown as Parameters<
|
|
729
|
+
typeof setupHealthCheckWorker
|
|
730
|
+
>[0]["db"],
|
|
731
|
+
advisoryLock: mockAdvisoryLock,
|
|
732
|
+
registry: strategyMigratingRegistry,
|
|
733
|
+
collectorRegistry: migratingCollectorRegistry as unknown as Parameters<
|
|
734
|
+
typeof setupHealthCheckWorker
|
|
735
|
+
>[0]["collectorRegistry"],
|
|
736
|
+
logger: mockLogger,
|
|
737
|
+
queueManager: mockQueueManager,
|
|
738
|
+
signalService: mockSignalService,
|
|
739
|
+
catalogClient: mockCatalogClient as unknown as Parameters<
|
|
740
|
+
typeof setupHealthCheckWorker
|
|
741
|
+
>[0]["catalogClient"],
|
|
742
|
+
notificationClient: {
|
|
743
|
+
notifyForSubscription: () => Promise.resolve({ notifiedCount: 0 }),
|
|
744
|
+
} as unknown as Parameters<
|
|
745
|
+
typeof setupHealthCheckWorker
|
|
746
|
+
>[0]["notificationClient"],
|
|
747
|
+
maintenanceClient: mockMaintenanceClient as unknown as Parameters<
|
|
748
|
+
typeof setupHealthCheckWorker
|
|
749
|
+
>[0]["maintenanceClient"],
|
|
750
|
+
incidentClient: mockIncidentClient as unknown as Parameters<
|
|
751
|
+
typeof setupHealthCheckWorker
|
|
752
|
+
>[0]["incidentClient"],
|
|
753
|
+
getEmitHook: () => undefined,
|
|
754
|
+
cache: passthroughCache,
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
if (capturedHandler) {
|
|
758
|
+
await capturedHandler({
|
|
759
|
+
data: { configId: "config-1", systemId: "system-1" },
|
|
760
|
+
}).catch(() => {});
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
// Strategy config reached createClient MIGRATED (endpoint stripped,
|
|
764
|
+
// timeout preserved) and VALIDATED against the v2 schema.
|
|
765
|
+
expect(capturedStrategyConfig).toEqual({ timeout: 1234 });
|
|
766
|
+
// Collector config reached execute MIGRATED (cmd renamed to value).
|
|
767
|
+
expect(collectorExecute).toHaveBeenCalled();
|
|
768
|
+
expect(capturedCollectorConfig).toEqual({ value: "legacy-value" });
|
|
769
|
+
});
|
|
770
|
+
});
|
|
771
|
+
|
|
772
|
+
describe("executeHealthCheckJob - per-environment fan-out", () => {
|
|
773
|
+
/**
|
|
774
|
+
* Drive one job with a configurable assignment `environmentIds` + catalog
|
|
775
|
+
* membership, capturing the run-context handed to the collector on EACH
|
|
776
|
+
* run. The collector executes once per fanned-out run, so the captured
|
|
777
|
+
* list is a faithful witness of "one run per effective environment".
|
|
778
|
+
*/
|
|
779
|
+
async function runFanOut({
|
|
780
|
+
environmentIds,
|
|
781
|
+
membership,
|
|
782
|
+
collectorConfig = {},
|
|
783
|
+
collectorConfigSchema = z.object({}),
|
|
784
|
+
}: {
|
|
785
|
+
environmentIds: string[] | null;
|
|
786
|
+
membership: Array<{
|
|
787
|
+
id: string;
|
|
788
|
+
name: string;
|
|
789
|
+
metadata: Record<string, unknown> | null;
|
|
790
|
+
}>;
|
|
791
|
+
/** Stored (pre-render) collector config for the single collector. */
|
|
792
|
+
collectorConfig?: Record<string, unknown>;
|
|
793
|
+
/** Schema used to detect `x-templatable` fields for the render pass. */
|
|
794
|
+
collectorConfigSchema?: z.ZodType<unknown>;
|
|
795
|
+
}): Promise<Array<{ environment?: unknown; config?: unknown }>> {
|
|
796
|
+
const mockDb = createMockDb();
|
|
797
|
+
const mockRegistry = createMockRegistry();
|
|
798
|
+
const mockLogger = createMockLogger();
|
|
799
|
+
const mockQueueManager = createMockQueueManager();
|
|
800
|
+
const mockCatalogClient = createMockCatalogClient();
|
|
801
|
+
const mockMaintenanceClient = createMockMaintenanceClient();
|
|
802
|
+
const mockIncidentClient = createMockIncidentClient();
|
|
803
|
+
const mockSignalService = createMockSignalService();
|
|
804
|
+
|
|
805
|
+
(mockCatalogClient.getSystem as any) = mock(async () => ({
|
|
806
|
+
id: "system-1",
|
|
807
|
+
name: "web-01",
|
|
808
|
+
}));
|
|
809
|
+
(mockCatalogClient as any).resolveSystemEnvironments = mock(async () =>
|
|
810
|
+
membership.map((m) => ({
|
|
811
|
+
...m,
|
|
812
|
+
description: null,
|
|
813
|
+
systemIds: [],
|
|
814
|
+
createdAt: new Date(),
|
|
815
|
+
updatedAt: new Date(),
|
|
816
|
+
})),
|
|
817
|
+
);
|
|
818
|
+
|
|
819
|
+
let selectCallCount = 0;
|
|
820
|
+
(mockDb.select as any) = mock(() => {
|
|
821
|
+
selectCallCount++;
|
|
822
|
+
if (selectCallCount === 2) {
|
|
823
|
+
return {
|
|
824
|
+
from: mock(() => ({
|
|
825
|
+
innerJoin: mock(() => ({
|
|
826
|
+
where: mock(() =>
|
|
827
|
+
Promise.resolve([
|
|
828
|
+
{
|
|
829
|
+
configId: "config-1",
|
|
830
|
+
configName: "Check",
|
|
831
|
+
strategyId: "test-strategy",
|
|
832
|
+
config: { timeout: 5000 },
|
|
833
|
+
collectors: [
|
|
834
|
+
{
|
|
835
|
+
id: "col-1",
|
|
836
|
+
collectorId: "test-collector",
|
|
837
|
+
config: collectorConfig,
|
|
838
|
+
},
|
|
839
|
+
],
|
|
840
|
+
interval: 45,
|
|
841
|
+
enabled: true,
|
|
842
|
+
paused: false,
|
|
843
|
+
includeLocal: true,
|
|
844
|
+
satelliteIds: [],
|
|
845
|
+
environmentIds,
|
|
846
|
+
},
|
|
847
|
+
]),
|
|
848
|
+
),
|
|
849
|
+
})),
|
|
850
|
+
})),
|
|
851
|
+
};
|
|
852
|
+
}
|
|
853
|
+
return {
|
|
854
|
+
from: mock(() => ({
|
|
855
|
+
innerJoin: mock(() => ({
|
|
856
|
+
where: mock(() => Promise.resolve([])),
|
|
857
|
+
})),
|
|
858
|
+
})),
|
|
859
|
+
};
|
|
860
|
+
});
|
|
861
|
+
|
|
862
|
+
const captured: Array<{ environment?: unknown; config?: unknown }> = [];
|
|
863
|
+
const collectorExecute = mock(
|
|
864
|
+
async (params: {
|
|
865
|
+
runContext?: { environment?: unknown };
|
|
866
|
+
config?: unknown;
|
|
867
|
+
}) => {
|
|
868
|
+
captured.push({
|
|
869
|
+
environment: params.runContext?.environment,
|
|
870
|
+
config: params.config,
|
|
871
|
+
});
|
|
872
|
+
return { result: {} };
|
|
873
|
+
},
|
|
874
|
+
);
|
|
875
|
+
const mockCollectorRegistry = {
|
|
876
|
+
register: mock(() => {}),
|
|
877
|
+
getCollector: mock(() => ({
|
|
878
|
+
collector: {
|
|
879
|
+
id: "test-collector",
|
|
880
|
+
execute: collectorExecute,
|
|
881
|
+
config: new Versioned({
|
|
882
|
+
version: 1,
|
|
883
|
+
schema: collectorConfigSchema,
|
|
884
|
+
}),
|
|
885
|
+
mergeResult: mock(() => ({})),
|
|
886
|
+
},
|
|
887
|
+
})),
|
|
888
|
+
getCollectors: mock(() => []),
|
|
889
|
+
};
|
|
890
|
+
|
|
891
|
+
const queue =
|
|
892
|
+
mockQueueManager.getQueue<HealthCheckJobPayload>("health-checks");
|
|
893
|
+
let capturedHandler:
|
|
894
|
+
| ((job: { data: HealthCheckJobPayload }) => Promise<void>)
|
|
895
|
+
| undefined;
|
|
896
|
+
(queue.consume as any) = mock(
|
|
897
|
+
async (
|
|
898
|
+
handler: (job: { data: HealthCheckJobPayload }) => Promise<void>,
|
|
899
|
+
) => {
|
|
900
|
+
capturedHandler = handler;
|
|
901
|
+
},
|
|
902
|
+
);
|
|
903
|
+
|
|
904
|
+
await setupHealthCheckWorker({
|
|
905
|
+
db: mockDb as unknown as Parameters<
|
|
906
|
+
typeof setupHealthCheckWorker
|
|
907
|
+
>[0]["db"],
|
|
908
|
+
advisoryLock: mockAdvisoryLock,
|
|
909
|
+
registry: mockRegistry,
|
|
910
|
+
collectorRegistry: mockCollectorRegistry as unknown as Parameters<
|
|
911
|
+
typeof setupHealthCheckWorker
|
|
912
|
+
>[0]["collectorRegistry"],
|
|
913
|
+
logger: mockLogger,
|
|
914
|
+
queueManager: mockQueueManager,
|
|
915
|
+
signalService: mockSignalService,
|
|
916
|
+
catalogClient: mockCatalogClient as unknown as Parameters<
|
|
917
|
+
typeof setupHealthCheckWorker
|
|
918
|
+
>[0]["catalogClient"],
|
|
919
|
+
notificationClient: {
|
|
920
|
+
notifyForSubscription: () => Promise.resolve({ notifiedCount: 0 }),
|
|
921
|
+
} as unknown as Parameters<
|
|
922
|
+
typeof setupHealthCheckWorker
|
|
923
|
+
>[0]["notificationClient"],
|
|
924
|
+
maintenanceClient: mockMaintenanceClient as unknown as Parameters<
|
|
925
|
+
typeof setupHealthCheckWorker
|
|
926
|
+
>[0]["maintenanceClient"],
|
|
927
|
+
incidentClient: mockIncidentClient as unknown as Parameters<
|
|
928
|
+
typeof setupHealthCheckWorker
|
|
929
|
+
>[0]["incidentClient"],
|
|
930
|
+
getEmitHook: () => undefined,
|
|
931
|
+
cache: passthroughCache,
|
|
932
|
+
});
|
|
933
|
+
|
|
934
|
+
if (capturedHandler) {
|
|
935
|
+
// Downstream persistence touches DB surfaces the lightweight mock
|
|
936
|
+
// doesn't fully model; tolerate a later throw — run-contexts are
|
|
937
|
+
// captured synchronously at collector-execute time, one per run.
|
|
938
|
+
await capturedHandler({
|
|
939
|
+
data: { configId: "config-1", systemId: "system-1" },
|
|
940
|
+
}).catch(() => {});
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
return captured;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
it("runs once per effective environment with that env in run-context (null selector = all)", async () => {
|
|
947
|
+
const captured = await runFanOut({
|
|
948
|
+
environmentIds: null,
|
|
949
|
+
membership: [
|
|
950
|
+
{ id: "prod", name: "Production", metadata: { baseUrl: "p" } },
|
|
951
|
+
{ id: "staging", name: "Staging", metadata: { baseUrl: "s" } },
|
|
952
|
+
],
|
|
953
|
+
});
|
|
954
|
+
|
|
955
|
+
expect(captured).toHaveLength(2);
|
|
956
|
+
expect(captured[0]?.environment).toEqual({
|
|
957
|
+
id: "prod",
|
|
958
|
+
name: "Production",
|
|
959
|
+
fields: { baseUrl: "p" },
|
|
960
|
+
});
|
|
961
|
+
expect(captured[1]?.environment).toEqual({
|
|
962
|
+
id: "staging",
|
|
963
|
+
name: "Staging",
|
|
964
|
+
fields: { baseUrl: "s" },
|
|
965
|
+
});
|
|
966
|
+
});
|
|
967
|
+
|
|
968
|
+
it("renders x-templatable config fields per environment against environment.*", async () => {
|
|
969
|
+
const captured = await runFanOut({
|
|
970
|
+
environmentIds: null,
|
|
971
|
+
membership: [
|
|
972
|
+
{
|
|
973
|
+
id: "prod",
|
|
974
|
+
name: "Production",
|
|
975
|
+
metadata: { baseUrl: "https://prod.example.com" },
|
|
976
|
+
},
|
|
977
|
+
{
|
|
978
|
+
id: "staging",
|
|
979
|
+
name: "Staging",
|
|
980
|
+
metadata: { baseUrl: "https://staging.example.com" },
|
|
981
|
+
},
|
|
982
|
+
],
|
|
983
|
+
collectorConfig: { url: "{{ environment.baseUrl }}/healthz" },
|
|
984
|
+
collectorConfigSchema: z.object({
|
|
985
|
+
url: configString({ "x-templatable": true }),
|
|
986
|
+
}),
|
|
987
|
+
});
|
|
988
|
+
|
|
989
|
+
expect(captured).toHaveLength(2);
|
|
990
|
+
// Each env gets its own rendered config (per-env render pass, §6.3.3).
|
|
991
|
+
expect((captured[0]?.config as { url: string }).url).toBe(
|
|
992
|
+
"https://prod.example.com/healthz",
|
|
993
|
+
);
|
|
994
|
+
expect((captured[1]?.config as { url: string }).url).toBe(
|
|
995
|
+
"https://staging.example.com/healthz",
|
|
996
|
+
);
|
|
997
|
+
});
|
|
998
|
+
|
|
999
|
+
it("renders environment.* to empty string for an env-less run (render-empty, §11.6)", async () => {
|
|
1000
|
+
const captured = await runFanOut({
|
|
1001
|
+
environmentIds: [],
|
|
1002
|
+
membership: [
|
|
1003
|
+
{ id: "prod", name: "Production", metadata: { baseUrl: "x" } },
|
|
1004
|
+
],
|
|
1005
|
+
collectorConfig: { url: "{{ environment.baseUrl }}/healthz" },
|
|
1006
|
+
collectorConfigSchema: z.object({
|
|
1007
|
+
url: configString({ "x-templatable": true }),
|
|
1008
|
+
}),
|
|
1009
|
+
});
|
|
1010
|
+
|
|
1011
|
+
expect(captured).toHaveLength(1);
|
|
1012
|
+
expect(captured[0]?.environment).toBeUndefined();
|
|
1013
|
+
// Missing path renders empty (strict: false) — the HTTP collector's
|
|
1014
|
+
// post-render .url() check turns this into a clear config error.
|
|
1015
|
+
expect((captured[0]?.config as { url: string }).url).toBe("/healthz");
|
|
1016
|
+
});
|
|
1017
|
+
|
|
1018
|
+
it("runs only the explicit subset, intersected with membership", async () => {
|
|
1019
|
+
const captured = await runFanOut({
|
|
1020
|
+
environmentIds: ["staging"],
|
|
1021
|
+
membership: [
|
|
1022
|
+
{ id: "prod", name: "Production", metadata: {} },
|
|
1023
|
+
{ id: "staging", name: "Staging", metadata: {} },
|
|
1024
|
+
],
|
|
1025
|
+
});
|
|
1026
|
+
|
|
1027
|
+
expect(captured).toHaveLength(1);
|
|
1028
|
+
expect((captured[0]?.environment as { id: string }).id).toBe("staging");
|
|
1029
|
+
});
|
|
1030
|
+
|
|
1031
|
+
it("runs exactly once with no environment when opting out ([] selector)", async () => {
|
|
1032
|
+
const captured = await runFanOut({
|
|
1033
|
+
environmentIds: [],
|
|
1034
|
+
membership: [{ id: "prod", name: "Production", metadata: {} }],
|
|
1035
|
+
});
|
|
1036
|
+
|
|
1037
|
+
expect(captured).toHaveLength(1);
|
|
1038
|
+
expect(captured[0]?.environment).toBeUndefined();
|
|
1039
|
+
});
|
|
1040
|
+
|
|
1041
|
+
it("runs exactly once env-less when the system has no environments (null selector, empty membership)", async () => {
|
|
1042
|
+
const captured = await runFanOut({
|
|
1043
|
+
environmentIds: null,
|
|
1044
|
+
membership: [],
|
|
1045
|
+
});
|
|
1046
|
+
|
|
1047
|
+
expect(captured).toHaveLength(1);
|
|
1048
|
+
expect(captured[0]?.environment).toBeUndefined();
|
|
1049
|
+
});
|
|
1050
|
+
|
|
1051
|
+
/**
|
|
1052
|
+
* Per-environment ISOLATION regression (§7.2). When the FIRST
|
|
1053
|
+
* environment's run throws (here: its durable persist rejects, which —
|
|
1054
|
+
* with no health-entity handle bound — propagates out of
|
|
1055
|
+
* `writeHealthEntity` to the per-env catch), the loop MUST log and
|
|
1056
|
+
* continue so the SECOND environment still produces a run. One env's
|
|
1057
|
+
* failure must never abort its siblings.
|
|
1058
|
+
*/
|
|
1059
|
+
it("continues to the next environment when the first environment's run throws", async () => {
|
|
1060
|
+
const mockDb = createMockDb();
|
|
1061
|
+
const mockRegistry = createMockRegistry();
|
|
1062
|
+
const mockLogger = createMockLogger();
|
|
1063
|
+
const mockQueueManager = createMockQueueManager();
|
|
1064
|
+
const mockCatalogClient = createMockCatalogClient();
|
|
1065
|
+
const mockMaintenanceClient = createMockMaintenanceClient();
|
|
1066
|
+
const mockIncidentClient = createMockIncidentClient();
|
|
1067
|
+
const mockSignalService = createMockSignalService();
|
|
1068
|
+
|
|
1069
|
+
(mockCatalogClient.getSystem as any) = mock(async () => ({
|
|
1070
|
+
id: "system-1",
|
|
1071
|
+
name: "web-01",
|
|
1072
|
+
}));
|
|
1073
|
+
const membership = [
|
|
1074
|
+
{ id: "prod", name: "Production", metadata: {} },
|
|
1075
|
+
{ id: "staging", name: "Staging", metadata: {} },
|
|
1076
|
+
];
|
|
1077
|
+
(mockCatalogClient as any).resolveSystemEnvironments = mock(async () =>
|
|
1078
|
+
membership.map((m) => ({
|
|
1079
|
+
...m,
|
|
1080
|
+
description: null,
|
|
1081
|
+
systemIds: [],
|
|
1082
|
+
createdAt: new Date(),
|
|
1083
|
+
updatedAt: new Date(),
|
|
1084
|
+
})),
|
|
1085
|
+
);
|
|
1086
|
+
|
|
1087
|
+
let selectCallCount = 0;
|
|
1088
|
+
(mockDb.select as any) = mock(() => {
|
|
1089
|
+
selectCallCount++;
|
|
1090
|
+
if (selectCallCount === 2) {
|
|
1091
|
+
return {
|
|
1092
|
+
from: mock(() => ({
|
|
1093
|
+
innerJoin: mock(() => ({
|
|
1094
|
+
where: mock(() =>
|
|
1095
|
+
Promise.resolve([
|
|
1096
|
+
{
|
|
1097
|
+
configId: "config-1",
|
|
1098
|
+
configName: "Check",
|
|
1099
|
+
strategyId: "test-strategy",
|
|
1100
|
+
config: { timeout: 5000 },
|
|
1101
|
+
collectors: [
|
|
1102
|
+
{
|
|
1103
|
+
id: "col-1",
|
|
1104
|
+
collectorId: "test-collector",
|
|
1105
|
+
config: {},
|
|
1106
|
+
},
|
|
1107
|
+
],
|
|
1108
|
+
interval: 45,
|
|
1109
|
+
enabled: true,
|
|
1110
|
+
paused: false,
|
|
1111
|
+
includeLocal: true,
|
|
1112
|
+
satelliteIds: [],
|
|
1113
|
+
environmentIds: null,
|
|
1114
|
+
},
|
|
1115
|
+
]),
|
|
1116
|
+
),
|
|
1117
|
+
})),
|
|
1118
|
+
})),
|
|
1119
|
+
};
|
|
1120
|
+
}
|
|
1121
|
+
return {
|
|
1122
|
+
from: mock(() => ({
|
|
1123
|
+
innerJoin: mock(() => ({
|
|
1124
|
+
where: mock(() => Promise.resolve([])),
|
|
1125
|
+
})),
|
|
1126
|
+
})),
|
|
1127
|
+
};
|
|
1128
|
+
});
|
|
1129
|
+
|
|
1130
|
+
// The first environment's run insert REJECTS; the second succeeds.
|
|
1131
|
+
// With no health-entity handle bound, a failed `apply` propagates out
|
|
1132
|
+
// of `writeHealthEntity`, so this throw reaches the per-env catch.
|
|
1133
|
+
let insertCalls = 0;
|
|
1134
|
+
(mockDb.insert as any) = mock(() => ({
|
|
1135
|
+
values: mock(() => {
|
|
1136
|
+
insertCalls++;
|
|
1137
|
+
if (insertCalls === 1) {
|
|
1138
|
+
return Promise.reject(new Error("env-1 persist failed"));
|
|
1139
|
+
}
|
|
1140
|
+
return Promise.resolve();
|
|
1141
|
+
}),
|
|
1142
|
+
}));
|
|
1143
|
+
|
|
1144
|
+
const envSeen: Array<string | undefined> = [];
|
|
1145
|
+
const collectorExecute = mock(
|
|
1146
|
+
async (params: { runContext?: { environment?: { id?: string } } }) => {
|
|
1147
|
+
envSeen.push(params.runContext?.environment?.id);
|
|
1148
|
+
return { result: {} };
|
|
1149
|
+
},
|
|
1150
|
+
);
|
|
1151
|
+
const mockCollectorRegistry = {
|
|
1152
|
+
register: mock(() => {}),
|
|
1153
|
+
getCollector: mock(() => ({
|
|
1154
|
+
collector: {
|
|
1155
|
+
id: "test-collector",
|
|
1156
|
+
execute: collectorExecute,
|
|
1157
|
+
config: new Versioned({ version: 1, schema: z.object({}) }),
|
|
1158
|
+
mergeResult: mock(() => ({})),
|
|
1159
|
+
},
|
|
1160
|
+
})),
|
|
1161
|
+
getCollectors: mock(() => []),
|
|
1162
|
+
};
|
|
1163
|
+
|
|
1164
|
+
const queue =
|
|
1165
|
+
mockQueueManager.getQueue<HealthCheckJobPayload>("health-checks");
|
|
1166
|
+
let capturedHandler:
|
|
1167
|
+
| ((job: { data: HealthCheckJobPayload }) => Promise<void>)
|
|
1168
|
+
| undefined;
|
|
1169
|
+
(queue.consume as any) = mock(
|
|
1170
|
+
async (
|
|
1171
|
+
handler: (job: { data: HealthCheckJobPayload }) => Promise<void>,
|
|
1172
|
+
) => {
|
|
1173
|
+
capturedHandler = handler;
|
|
1174
|
+
},
|
|
1175
|
+
);
|
|
1176
|
+
|
|
1177
|
+
await setupHealthCheckWorker({
|
|
1178
|
+
db: mockDb as unknown as Parameters<
|
|
1179
|
+
typeof setupHealthCheckWorker
|
|
1180
|
+
>[0]["db"],
|
|
1181
|
+
advisoryLock: mockAdvisoryLock,
|
|
1182
|
+
registry: mockRegistry,
|
|
1183
|
+
collectorRegistry: mockCollectorRegistry as unknown as Parameters<
|
|
1184
|
+
typeof setupHealthCheckWorker
|
|
1185
|
+
>[0]["collectorRegistry"],
|
|
1186
|
+
logger: mockLogger,
|
|
1187
|
+
queueManager: mockQueueManager,
|
|
1188
|
+
signalService: mockSignalService,
|
|
1189
|
+
catalogClient: mockCatalogClient as unknown as Parameters<
|
|
1190
|
+
typeof setupHealthCheckWorker
|
|
1191
|
+
>[0]["catalogClient"],
|
|
1192
|
+
notificationClient: {
|
|
1193
|
+
notifyForSubscription: () => Promise.resolve({ notifiedCount: 0 }),
|
|
1194
|
+
} as unknown as Parameters<
|
|
1195
|
+
typeof setupHealthCheckWorker
|
|
1196
|
+
>[0]["notificationClient"],
|
|
1197
|
+
maintenanceClient: mockMaintenanceClient as unknown as Parameters<
|
|
1198
|
+
typeof setupHealthCheckWorker
|
|
1199
|
+
>[0]["maintenanceClient"],
|
|
1200
|
+
incidentClient: mockIncidentClient as unknown as Parameters<
|
|
1201
|
+
typeof setupHealthCheckWorker
|
|
1202
|
+
>[0]["incidentClient"],
|
|
1203
|
+
getEmitHook: () => undefined,
|
|
1204
|
+
cache: passthroughCache,
|
|
1205
|
+
});
|
|
1206
|
+
|
|
1207
|
+
if (capturedHandler) {
|
|
1208
|
+
await capturedHandler({
|
|
1209
|
+
data: { configId: "config-1", systemId: "system-1" },
|
|
1210
|
+
});
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
// BOTH environments' collectors ran — the first env's persist failure
|
|
1214
|
+
// did not abort the loop.
|
|
1215
|
+
expect(envSeen).toEqual(["prod", "staging"]);
|
|
1216
|
+
// The failure was logged (isolated), not propagated.
|
|
1217
|
+
expect(mockLogger.error).toHaveBeenCalled();
|
|
1218
|
+
});
|
|
1219
|
+
|
|
1220
|
+
/**
|
|
1221
|
+
* Fail-open OBSERVABILITY (P3 review item 2). When the catalog
|
|
1222
|
+
* `resolveSystemEnvironments` read fails and the executor degrades to a
|
|
1223
|
+
* single env-less run, it MUST emit a counter-style signal (not just a
|
|
1224
|
+
* `logger.warn`) so durable catalog misconfig / outage is observable.
|
|
1225
|
+
*/
|
|
1226
|
+
it("broadcasts ENVIRONMENT_RESOLUTION_FAILED and degrades to one env-less run when the catalog read fails", async () => {
|
|
1227
|
+
const mockDb = createMockDb();
|
|
1228
|
+
const mockRegistry = createMockRegistry();
|
|
1229
|
+
const mockLogger = createMockLogger();
|
|
1230
|
+
const mockQueueManager = createMockQueueManager();
|
|
1231
|
+
const mockCatalogClient = createMockCatalogClient();
|
|
1232
|
+
const mockMaintenanceClient = createMockMaintenanceClient();
|
|
1233
|
+
const mockIncidentClient = createMockIncidentClient();
|
|
1234
|
+
const mockSignalService = createMockSignalService();
|
|
1235
|
+
|
|
1236
|
+
(mockCatalogClient.getSystem as any) = mock(async () => ({
|
|
1237
|
+
id: "system-1",
|
|
1238
|
+
name: "web-01",
|
|
1239
|
+
}));
|
|
1240
|
+
// The catalog read REJECTS — the executor must fail open.
|
|
1241
|
+
(mockCatalogClient as any).resolveSystemEnvironments = mock(async () => {
|
|
1242
|
+
throw new Error("catalog unavailable");
|
|
1243
|
+
});
|
|
1244
|
+
|
|
1245
|
+
let selectCallCount = 0;
|
|
1246
|
+
(mockDb.select as any) = mock(() => {
|
|
1247
|
+
selectCallCount++;
|
|
1248
|
+
if (selectCallCount === 2) {
|
|
1249
|
+
return {
|
|
1250
|
+
from: mock(() => ({
|
|
1251
|
+
innerJoin: mock(() => ({
|
|
1252
|
+
where: mock(() =>
|
|
1253
|
+
Promise.resolve([
|
|
1254
|
+
{
|
|
1255
|
+
configId: "config-1",
|
|
1256
|
+
configName: "Check",
|
|
1257
|
+
strategyId: "test-strategy",
|
|
1258
|
+
config: { timeout: 5000 },
|
|
1259
|
+
collectors: [
|
|
1260
|
+
{
|
|
1261
|
+
id: "col-1",
|
|
1262
|
+
collectorId: "test-collector",
|
|
1263
|
+
config: {},
|
|
1264
|
+
},
|
|
1265
|
+
],
|
|
1266
|
+
interval: 45,
|
|
1267
|
+
enabled: true,
|
|
1268
|
+
paused: false,
|
|
1269
|
+
includeLocal: true,
|
|
1270
|
+
satelliteIds: [],
|
|
1271
|
+
environmentIds: null,
|
|
1272
|
+
},
|
|
1273
|
+
]),
|
|
1274
|
+
),
|
|
1275
|
+
})),
|
|
1276
|
+
})),
|
|
1277
|
+
};
|
|
1278
|
+
}
|
|
1279
|
+
return {
|
|
1280
|
+
from: mock(() => ({
|
|
1281
|
+
innerJoin: mock(() => ({
|
|
1282
|
+
where: mock(() => Promise.resolve([])),
|
|
1283
|
+
})),
|
|
1284
|
+
})),
|
|
1285
|
+
};
|
|
1286
|
+
});
|
|
1287
|
+
|
|
1288
|
+
const envSeen: Array<string | undefined> = [];
|
|
1289
|
+
const collectorExecute = mock(
|
|
1290
|
+
async (params: { runContext?: { environment?: { id?: string } } }) => {
|
|
1291
|
+
envSeen.push(params.runContext?.environment?.id);
|
|
1292
|
+
return { result: {} };
|
|
1293
|
+
},
|
|
1294
|
+
);
|
|
1295
|
+
const mockCollectorRegistry = {
|
|
1296
|
+
register: mock(() => {}),
|
|
1297
|
+
getCollector: mock(() => ({
|
|
1298
|
+
collector: {
|
|
1299
|
+
id: "test-collector",
|
|
1300
|
+
execute: collectorExecute,
|
|
1301
|
+
config: new Versioned({ version: 1, schema: z.object({}) }),
|
|
1302
|
+
mergeResult: mock(() => ({})),
|
|
1303
|
+
},
|
|
1304
|
+
})),
|
|
1305
|
+
getCollectors: mock(() => []),
|
|
1306
|
+
};
|
|
1307
|
+
|
|
1308
|
+
const queue =
|
|
1309
|
+
mockQueueManager.getQueue<HealthCheckJobPayload>("health-checks");
|
|
1310
|
+
let capturedHandler:
|
|
1311
|
+
| ((job: { data: HealthCheckJobPayload }) => Promise<void>)
|
|
1312
|
+
| undefined;
|
|
1313
|
+
(queue.consume as any) = mock(
|
|
1314
|
+
async (
|
|
1315
|
+
handler: (job: { data: HealthCheckJobPayload }) => Promise<void>,
|
|
1316
|
+
) => {
|
|
1317
|
+
capturedHandler = handler;
|
|
1318
|
+
},
|
|
1319
|
+
);
|
|
1320
|
+
|
|
1321
|
+
await setupHealthCheckWorker({
|
|
1322
|
+
db: mockDb as unknown as Parameters<
|
|
1323
|
+
typeof setupHealthCheckWorker
|
|
1324
|
+
>[0]["db"],
|
|
1325
|
+
advisoryLock: mockAdvisoryLock,
|
|
1326
|
+
registry: mockRegistry,
|
|
1327
|
+
collectorRegistry: mockCollectorRegistry as unknown as Parameters<
|
|
1328
|
+
typeof setupHealthCheckWorker
|
|
1329
|
+
>[0]["collectorRegistry"],
|
|
1330
|
+
logger: mockLogger,
|
|
1331
|
+
queueManager: mockQueueManager,
|
|
1332
|
+
signalService: mockSignalService,
|
|
1333
|
+
catalogClient: mockCatalogClient as unknown as Parameters<
|
|
1334
|
+
typeof setupHealthCheckWorker
|
|
1335
|
+
>[0]["catalogClient"],
|
|
1336
|
+
notificationClient: {
|
|
1337
|
+
notifyForSubscription: () => Promise.resolve({ notifiedCount: 0 }),
|
|
1338
|
+
} as unknown as Parameters<
|
|
1339
|
+
typeof setupHealthCheckWorker
|
|
1340
|
+
>[0]["notificationClient"],
|
|
1341
|
+
maintenanceClient: mockMaintenanceClient as unknown as Parameters<
|
|
1342
|
+
typeof setupHealthCheckWorker
|
|
1343
|
+
>[0]["maintenanceClient"],
|
|
1344
|
+
incidentClient: mockIncidentClient as unknown as Parameters<
|
|
1345
|
+
typeof setupHealthCheckWorker
|
|
1346
|
+
>[0]["incidentClient"],
|
|
1347
|
+
getEmitHook: () => undefined,
|
|
1348
|
+
cache: passthroughCache,
|
|
1349
|
+
});
|
|
1350
|
+
|
|
1351
|
+
if (capturedHandler) {
|
|
1352
|
+
await capturedHandler({
|
|
1353
|
+
data: { configId: "config-1", systemId: "system-1" },
|
|
1354
|
+
}).catch(() => {});
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
// Degraded to exactly one env-less run.
|
|
1358
|
+
expect(envSeen).toEqual([undefined]);
|
|
1359
|
+
// The observability signal was broadcast with the failure detail.
|
|
1360
|
+
const resolutionFailed = mockSignalService.getRecordedSignalsById(
|
|
1361
|
+
"healthcheck.environment.resolution_failed",
|
|
1362
|
+
);
|
|
1363
|
+
expect(resolutionFailed).toHaveLength(1);
|
|
1364
|
+
expect(
|
|
1365
|
+
(resolutionFailed[0]?.payload as { systemId?: string }).systemId,
|
|
1366
|
+
).toBe("system-1");
|
|
1367
|
+
});
|
|
554
1368
|
});
|
|
555
1369
|
});
|