@checkstack/healthcheck-backend 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +409 -0
- package/drizzle/0015_quiet_meggan.sql +12 -0
- package/drizzle/0016_complex_maginty.sql +1 -0
- package/drizzle/0017_pretty_caretaker.sql +1 -0
- package/drizzle/meta/0015_snapshot.json +764 -0
- package/drizzle/meta/0016_snapshot.json +644 -0
- package/drizzle/meta/0017_snapshot.json +563 -0
- package/drizzle/meta/_journal.json +21 -0
- package/package.json +24 -21
- package/src/automations.test.ts +6 -27
- package/src/automations.ts +32 -30
- package/src/collector-script-test.test.ts +236 -0
- package/src/collector-script-test.ts +221 -0
- package/src/health-entity.test.ts +694 -0
- package/src/health-entity.ts +367 -0
- package/src/health-state.test.ts +115 -0
- package/src/health-state.ts +333 -0
- package/src/healthcheck-gitops-kinds.test.ts +6 -32
- package/src/healthcheck-gitops-kinds.ts +4 -19
- package/src/hooks.test.ts +19 -6
- package/src/hooks.ts +13 -68
- package/src/index.ts +118 -48
- package/src/queue-executor.test.ts +13 -0
- package/src/queue-executor.ts +251 -444
- package/src/retention-job.ts +65 -1
- package/src/retention-state-transitions.test.ts +49 -0
- package/src/router.test.ts +13 -0
- package/src/router.ts +44 -0
- package/src/schema.ts +34 -54
- package/src/service-notification-policy.test.ts +28 -71
- package/src/service.ts +89 -0
- package/src/state-evaluator.test.ts +50 -5
- package/src/state-evaluator.ts +9 -2
- package/src/state-transitions.test.ts +126 -0
- package/src/state-transitions.ts +112 -0
- package/tsconfig.json +9 -0
- package/src/auto-incident-close-job.ts +0 -164
- package/src/auto-incident.test.ts +0 -196
- package/src/auto-incident.ts +0 -332
package/src/automations.test.ts
CHANGED
|
@@ -10,7 +10,6 @@ import {
|
|
|
10
10
|
assignmentArtifactType,
|
|
11
11
|
checkFailedTrigger,
|
|
12
12
|
createHealthCheckActions,
|
|
13
|
-
flappingDetectedTrigger,
|
|
14
13
|
healthCheckTriggers,
|
|
15
14
|
systemDegradedTrigger,
|
|
16
15
|
systemHealthChangedTrigger,
|
|
@@ -32,22 +31,19 @@ const ctxBase = {
|
|
|
32
31
|
};
|
|
33
32
|
|
|
34
33
|
describe("healthcheck triggers", () => {
|
|
35
|
-
it("exposes
|
|
36
|
-
expect(healthCheckTriggers).toHaveLength(
|
|
34
|
+
it("exposes four triggers in a stable order", () => {
|
|
35
|
+
expect(healthCheckTriggers).toHaveLength(4);
|
|
37
36
|
expect(healthCheckTriggers[0]).toBe(
|
|
38
|
-
systemDegradedTrigger as (typeof healthCheckTriggers)[number],
|
|
37
|
+
systemDegradedTrigger as unknown as (typeof healthCheckTriggers)[number],
|
|
39
38
|
);
|
|
40
39
|
expect(healthCheckTriggers[1]).toBe(
|
|
41
|
-
systemHealthyTrigger as (typeof healthCheckTriggers)[number],
|
|
40
|
+
systemHealthyTrigger as unknown as (typeof healthCheckTriggers)[number],
|
|
42
41
|
);
|
|
43
42
|
expect(healthCheckTriggers[2]).toBe(
|
|
44
|
-
systemHealthChangedTrigger as (typeof healthCheckTriggers)[number],
|
|
43
|
+
systemHealthChangedTrigger as unknown as (typeof healthCheckTriggers)[number],
|
|
45
44
|
);
|
|
46
45
|
expect(healthCheckTriggers[3]).toBe(
|
|
47
|
-
checkFailedTrigger as (typeof healthCheckTriggers)[number],
|
|
48
|
-
);
|
|
49
|
-
expect(healthCheckTriggers[4]).toBe(
|
|
50
|
-
flappingDetectedTrigger as (typeof healthCheckTriggers)[number],
|
|
46
|
+
checkFailedTrigger as unknown as (typeof healthCheckTriggers)[number],
|
|
51
47
|
);
|
|
52
48
|
});
|
|
53
49
|
|
|
@@ -69,23 +65,6 @@ describe("healthcheck triggers", () => {
|
|
|
69
65
|
).toBe("sys-1");
|
|
70
66
|
});
|
|
71
67
|
|
|
72
|
-
it("validates flappingDetected payload and requires transitionCount + windowMinutes", () => {
|
|
73
|
-
const ok = flappingDetectedTrigger.payloadSchema.safeParse({
|
|
74
|
-
systemId: "sys-1",
|
|
75
|
-
configurationId: "cfg-1",
|
|
76
|
-
transitionCount: 5,
|
|
77
|
-
windowMinutes: 10,
|
|
78
|
-
timestamp: "2026-05-29T12:00:00Z",
|
|
79
|
-
});
|
|
80
|
-
expect(ok.success).toBe(true);
|
|
81
|
-
|
|
82
|
-
const bad = flappingDetectedTrigger.payloadSchema.safeParse({
|
|
83
|
-
systemId: "sys-1",
|
|
84
|
-
configurationId: "cfg-1",
|
|
85
|
-
timestamp: "2026-05-29T12:00:00Z",
|
|
86
|
-
});
|
|
87
|
-
expect(bad.success).toBe(false);
|
|
88
|
-
});
|
|
89
68
|
|
|
90
69
|
it("extracts systemId as the contextKey on all three", () => {
|
|
91
70
|
const degradedOrChanged = {
|
package/src/automations.ts
CHANGED
|
@@ -30,6 +30,7 @@ import type {
|
|
|
30
30
|
ActionDefinition,
|
|
31
31
|
TriggerDefinition,
|
|
32
32
|
} from "@checkstack/automation-backend";
|
|
33
|
+
import { makeEntityDrivenTriggerSetup } from "@checkstack/automation-backend";
|
|
33
34
|
import { HealthCheckStatusSchema } from "@checkstack/healthcheck-common";
|
|
34
35
|
|
|
35
36
|
import { healthCheckHooks } from "./hooks";
|
|
@@ -79,14 +80,6 @@ const checkFailedPayloadSchema = z.object({
|
|
|
79
80
|
timestamp: z.string(),
|
|
80
81
|
});
|
|
81
82
|
|
|
82
|
-
const flappingDetectedPayloadSchema = z.object({
|
|
83
|
-
systemId: z.string(),
|
|
84
|
-
configurationId: z.string(),
|
|
85
|
-
transitionCount: z.number(),
|
|
86
|
-
windowMinutes: z.number(),
|
|
87
|
-
timestamp: z.string(),
|
|
88
|
-
});
|
|
89
|
-
|
|
90
83
|
// ─── Triggers ──────────────────────────────────────────────────────────
|
|
91
84
|
|
|
92
85
|
export const systemDegradedTrigger: TriggerDefinition<
|
|
@@ -99,8 +92,13 @@ export const systemDegradedTrigger: TriggerDefinition<
|
|
|
99
92
|
category: "Health",
|
|
100
93
|
icon: "HeartPulse",
|
|
101
94
|
payloadSchema: systemDegradedPayloadSchema,
|
|
102
|
-
|
|
95
|
+
// Entity-driven (§10.3): fired by the `health` entity change deriver via
|
|
96
|
+
// Stage-1 routing, not a hook. No-op setup keeps it in the editor catalog.
|
|
97
|
+
setup: makeEntityDrivenTriggerSetup<
|
|
98
|
+
z.infer<typeof systemDegradedPayloadSchema>
|
|
99
|
+
>(),
|
|
103
100
|
contextKey: (p) => p.systemId,
|
|
101
|
+
contextKeyLabel: "system",
|
|
104
102
|
};
|
|
105
103
|
|
|
106
104
|
export const systemHealthyTrigger: TriggerDefinition<
|
|
@@ -112,8 +110,12 @@ export const systemHealthyTrigger: TriggerDefinition<
|
|
|
112
110
|
category: "Health",
|
|
113
111
|
icon: "HeartPulse",
|
|
114
112
|
payloadSchema: systemHealthyPayloadSchema,
|
|
115
|
-
|
|
113
|
+
// Entity-driven (§10.3): fired by the `health` entity change deriver.
|
|
114
|
+
setup: makeEntityDrivenTriggerSetup<
|
|
115
|
+
z.infer<typeof systemHealthyPayloadSchema>
|
|
116
|
+
>(),
|
|
116
117
|
contextKey: (p) => p.systemId,
|
|
118
|
+
contextKeyLabel: "system",
|
|
117
119
|
};
|
|
118
120
|
|
|
119
121
|
export const systemHealthChangedTrigger: TriggerDefinition<
|
|
@@ -126,8 +128,12 @@ export const systemHealthChangedTrigger: TriggerDefinition<
|
|
|
126
128
|
category: "Health",
|
|
127
129
|
icon: "HeartPulse",
|
|
128
130
|
payloadSchema: systemHealthChangedPayloadSchema,
|
|
129
|
-
|
|
131
|
+
// Entity-driven (§10.3): fired by the `health` entity change deriver.
|
|
132
|
+
setup: makeEntityDrivenTriggerSetup<
|
|
133
|
+
z.infer<typeof systemHealthChangedPayloadSchema>
|
|
134
|
+
>(),
|
|
130
135
|
contextKey: (p) => p.systemId,
|
|
136
|
+
contextKeyLabel: "system",
|
|
131
137
|
};
|
|
132
138
|
|
|
133
139
|
export const checkFailedTrigger: TriggerDefinition<
|
|
@@ -142,28 +148,24 @@ export const checkFailedTrigger: TriggerDefinition<
|
|
|
142
148
|
payloadSchema: checkFailedPayloadSchema,
|
|
143
149
|
hook: healthCheckHooks.checkFailed,
|
|
144
150
|
contextKey: (p) => p.systemId,
|
|
151
|
+
contextKeyLabel: "system",
|
|
145
152
|
};
|
|
146
153
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
displayName: "Health Check Flapping",
|
|
152
|
-
description:
|
|
153
|
-
"Fires when N unhealthy transitions are observed within the policy window. Re-fires on every additional transition while flapping; debounce in the automation if needed.",
|
|
154
|
-
category: "Health",
|
|
155
|
-
icon: "Repeat",
|
|
156
|
-
payloadSchema: flappingDetectedPayloadSchema,
|
|
157
|
-
hook: healthCheckHooks.flappingDetected,
|
|
158
|
-
contextKey: (p) => p.systemId,
|
|
159
|
-
};
|
|
154
|
+
// The flapping trigger + its `flapping_detected` hook were removed. Flapping
|
|
155
|
+
// is now detected in the automation engine by a windowed-count gate on the
|
|
156
|
+
// `system_health_changed` trigger (raw change event + `filter` +
|
|
157
|
+
// `window: { count, minutes, refire: "once" }`) — no per-derived event.
|
|
160
158
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
159
|
+
// Triggers carry heterogeneous config types (all healthcheck triggers are
|
|
160
|
+
// currently config-less). The registry accepts the `<unknown, unknown>` shape
|
|
161
|
+
// and re-validates config against each trigger's own `configSchema` at load,
|
|
162
|
+
// so the registration array is widened here — mirroring
|
|
163
|
+
// `registerBuiltinTriggers` in automation-backend.
|
|
164
|
+
export const healthCheckTriggers: TriggerDefinition<unknown, unknown>[] = [
|
|
165
|
+
systemDegradedTrigger as unknown as TriggerDefinition<unknown, unknown>,
|
|
166
|
+
systemHealthyTrigger as unknown as TriggerDefinition<unknown, unknown>,
|
|
167
|
+
systemHealthChangedTrigger as unknown as TriggerDefinition<unknown, unknown>,
|
|
168
|
+
checkFailedTrigger as unknown as TriggerDefinition<unknown, unknown>,
|
|
167
169
|
];
|
|
168
170
|
|
|
169
171
|
// ─── Action configs ────────────────────────────────────────────────────
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import type {
|
|
3
|
+
EsmScriptRunner,
|
|
4
|
+
ShellScriptRunner,
|
|
5
|
+
} from "@checkstack/backend-api";
|
|
6
|
+
import {
|
|
7
|
+
buildCollectorContext,
|
|
8
|
+
buildShellRunContextEnv,
|
|
9
|
+
runCollectorScriptTest,
|
|
10
|
+
} from "./collector-script-test";
|
|
11
|
+
|
|
12
|
+
function fakeEsm(impl: EsmScriptRunner["run"]): {
|
|
13
|
+
runner: EsmScriptRunner;
|
|
14
|
+
calls: Parameters<EsmScriptRunner["run"]>[0][];
|
|
15
|
+
} {
|
|
16
|
+
const calls: Parameters<EsmScriptRunner["run"]>[0][] = [];
|
|
17
|
+
return { calls, runner: { run: (o) => (calls.push(o), impl(o)) } };
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function fakeShell(impl: ShellScriptRunner["run"]): {
|
|
21
|
+
runner: ShellScriptRunner;
|
|
22
|
+
calls: Parameters<ShellScriptRunner["run"]>[0][];
|
|
23
|
+
} {
|
|
24
|
+
const calls: Parameters<ShellScriptRunner["run"]>[0][] = [];
|
|
25
|
+
return { calls, runner: { run: (o) => (calls.push(o), impl(o)) } };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
describe("buildShellRunContextEnv", () => {
|
|
29
|
+
test("emits check + system vars when present", () => {
|
|
30
|
+
const env = buildShellRunContextEnv({
|
|
31
|
+
check: { id: "c1", name: "CPU", intervalSeconds: 60 },
|
|
32
|
+
system: { id: "s1", name: "web-1" },
|
|
33
|
+
});
|
|
34
|
+
expect(env).toEqual({
|
|
35
|
+
CHECKSTACK_CHECK_ID: "c1",
|
|
36
|
+
CHECKSTACK_CHECK_NAME: "CPU",
|
|
37
|
+
CHECKSTACK_CHECK_INTERVAL_SECONDS: "60",
|
|
38
|
+
CHECKSTACK_SYSTEM_ID: "s1",
|
|
39
|
+
CHECKSTACK_SYSTEM_NAME: "web-1",
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("emits nothing for an empty run context", () => {
|
|
44
|
+
expect(buildShellRunContextEnv(undefined)).toEqual({});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("emits only the provided half", () => {
|
|
48
|
+
const env = buildShellRunContextEnv({
|
|
49
|
+
system: { id: "s1", name: "web-1" },
|
|
50
|
+
});
|
|
51
|
+
expect(env).toEqual({
|
|
52
|
+
CHECKSTACK_SYSTEM_ID: "s1",
|
|
53
|
+
CHECKSTACK_SYSTEM_NAME: "web-1",
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe("buildCollectorContext", () => {
|
|
59
|
+
test("always includes config, includes check/system only when present", () => {
|
|
60
|
+
expect(buildCollectorContext({ config: { threshold: 1 } })).toEqual({
|
|
61
|
+
config: { threshold: 1 },
|
|
62
|
+
});
|
|
63
|
+
expect(
|
|
64
|
+
buildCollectorContext({
|
|
65
|
+
config: {},
|
|
66
|
+
runContext: { check: { id: "c", name: "n", intervalSeconds: 30 } },
|
|
67
|
+
}),
|
|
68
|
+
).toEqual({
|
|
69
|
+
config: {},
|
|
70
|
+
check: { id: "c", name: "n", intervalSeconds: 30 },
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test("defaults config to an empty object", () => {
|
|
75
|
+
expect(buildCollectorContext({})).toEqual({ config: {} });
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
describe("runCollectorScriptTest — typescript", () => {
|
|
80
|
+
test("runs with the healthcheck helper + built context", async () => {
|
|
81
|
+
const { runner, calls } = fakeEsm(async () => ({
|
|
82
|
+
result: { success: true, value: 0.4 },
|
|
83
|
+
stdout: "",
|
|
84
|
+
stderr: "",
|
|
85
|
+
timedOut: false,
|
|
86
|
+
}));
|
|
87
|
+
const out = await runCollectorScriptTest({
|
|
88
|
+
input: {
|
|
89
|
+
kind: "typescript",
|
|
90
|
+
script: "export default { success: true }",
|
|
91
|
+
config: { threshold: 0.6 },
|
|
92
|
+
runContext: { check: { id: "c", name: "Load", intervalSeconds: 60 } },
|
|
93
|
+
timeoutMs: 5000,
|
|
94
|
+
},
|
|
95
|
+
deps: { esmRunner: runner },
|
|
96
|
+
});
|
|
97
|
+
expect(calls[0]?.helperModuleName).toBe("@checkstack/healthcheck");
|
|
98
|
+
expect(calls[0]?.helperFunctionName).toBe("defineHealthCheck");
|
|
99
|
+
expect(calls[0]?.context).toEqual({
|
|
100
|
+
config: { threshold: 0.6 },
|
|
101
|
+
check: { id: "c", name: "Load", intervalSeconds: 60 },
|
|
102
|
+
});
|
|
103
|
+
expect(out.result).toEqual({ success: true, value: 0.4 });
|
|
104
|
+
expect(out.error).toBeUndefined();
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("surfaces a thrown error and a timeout", async () => {
|
|
108
|
+
const thrown = await runCollectorScriptTest({
|
|
109
|
+
input: { kind: "typescript", script: "throw 1", timeoutMs: 1000 },
|
|
110
|
+
deps: { esmRunner: fakeEsm(async () => ({ error: "boom", stdout: "", stderr: "", timedOut: false })).runner },
|
|
111
|
+
});
|
|
112
|
+
expect(thrown.error).toBe("boom");
|
|
113
|
+
|
|
114
|
+
const timedOut = await runCollectorScriptTest({
|
|
115
|
+
input: { kind: "typescript", script: "while(1){}", timeoutMs: 50 },
|
|
116
|
+
deps: { esmRunner: fakeEsm(async () => ({ stdout: "", stderr: "", timedOut: true })).runner },
|
|
117
|
+
});
|
|
118
|
+
expect(timedOut.timedOut).toBe(true);
|
|
119
|
+
expect(timedOut.error).toBe("Script execution timed out");
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
test("catches an unexpected runner rejection", async () => {
|
|
123
|
+
const out = await runCollectorScriptTest({
|
|
124
|
+
input: { kind: "typescript", script: "x", timeoutMs: 1000 },
|
|
125
|
+
deps: {
|
|
126
|
+
esmRunner: fakeEsm(async () => {
|
|
127
|
+
throw new Error("spawn failed");
|
|
128
|
+
}).runner,
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
expect(out.error).toBe("spawn failed");
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
describe("runCollectorScriptTest — shell", () => {
|
|
136
|
+
test("injects CHECKSTACK_* run-context env + merges explicit env", async () => {
|
|
137
|
+
const { runner, calls } = fakeShell(async () => ({
|
|
138
|
+
exitCode: 0,
|
|
139
|
+
stdout: "ok",
|
|
140
|
+
stderr: "",
|
|
141
|
+
timedOut: false,
|
|
142
|
+
}));
|
|
143
|
+
const out = await runCollectorScriptTest({
|
|
144
|
+
input: {
|
|
145
|
+
kind: "shell",
|
|
146
|
+
script: "echo $CHECKSTACK_CHECK_NAME",
|
|
147
|
+
runContext: { check: { id: "c", name: "Disk", intervalSeconds: 30 } },
|
|
148
|
+
env: { EXTRA: "1" },
|
|
149
|
+
timeoutMs: 3000,
|
|
150
|
+
},
|
|
151
|
+
deps: { shellRunner: runner },
|
|
152
|
+
});
|
|
153
|
+
const env = calls[0]?.env ?? {};
|
|
154
|
+
expect(env.CHECKSTACK_CHECK_NAME).toBe("Disk");
|
|
155
|
+
expect(env.EXTRA).toBe("1");
|
|
156
|
+
expect(out.exitCode).toBe(0);
|
|
157
|
+
expect(out.stdout).toBe("ok");
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test("reports a non-zero exit as an error", async () => {
|
|
161
|
+
const out = await runCollectorScriptTest({
|
|
162
|
+
input: { kind: "shell", script: "exit 5", timeoutMs: 1000 },
|
|
163
|
+
deps: {
|
|
164
|
+
shellRunner: fakeShell(async () => ({
|
|
165
|
+
exitCode: 5,
|
|
166
|
+
stdout: "",
|
|
167
|
+
stderr: "bad",
|
|
168
|
+
timedOut: false,
|
|
169
|
+
})).runner,
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
expect(out.exitCode).toBe(5);
|
|
173
|
+
expect(out.error).toContain("exited with code 5");
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
describe("runCollectorScriptTest secret placeholders + overrides (decision 4)", () => {
|
|
178
|
+
test("injects __SECRET_<NAME>__ placeholders into the collector env by default", async () => {
|
|
179
|
+
const { runner, calls } = fakeEsm(async (opts) => ({
|
|
180
|
+
result: opts.env ?? null,
|
|
181
|
+
stdout: "",
|
|
182
|
+
stderr: "",
|
|
183
|
+
timedOut: false,
|
|
184
|
+
}));
|
|
185
|
+
await runCollectorScriptTest({
|
|
186
|
+
input: {
|
|
187
|
+
kind: "typescript",
|
|
188
|
+
script: "export default () => ({ success: true })",
|
|
189
|
+
timeoutMs: 1000,
|
|
190
|
+
secretEnv: { TOKEN: "${{ secrets.api }}" },
|
|
191
|
+
},
|
|
192
|
+
deps: { esmRunner: runner },
|
|
193
|
+
});
|
|
194
|
+
expect(calls[0]?.env).toEqual({ TOKEN: "__SECRET_api__" });
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test("injects + masks a user override (no real resolution)", async () => {
|
|
198
|
+
const { runner, calls } = fakeShell(async () => ({
|
|
199
|
+
exitCode: 0,
|
|
200
|
+
stdout: "value=override-secret",
|
|
201
|
+
stderr: "",
|
|
202
|
+
timedOut: false,
|
|
203
|
+
}));
|
|
204
|
+
const out = await runCollectorScriptTest({
|
|
205
|
+
input: {
|
|
206
|
+
kind: "shell",
|
|
207
|
+
script: "echo value=$TOKEN",
|
|
208
|
+
timeoutMs: 1000,
|
|
209
|
+
secretEnv: { TOKEN: "${{ secrets.api }}" },
|
|
210
|
+
secretOverrides: { api: "override-secret" },
|
|
211
|
+
},
|
|
212
|
+
deps: { shellRunner: runner },
|
|
213
|
+
});
|
|
214
|
+
expect(calls[0]?.env?.TOKEN).toBe("override-secret");
|
|
215
|
+
expect(out.stdout).toBe("value=****");
|
|
216
|
+
expect(JSON.stringify(out)).not.toContain("override-secret");
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test("no secretEnv -> no secret env injected (least-privilege)", async () => {
|
|
220
|
+
const { runner, calls } = fakeEsm(async (opts) => ({
|
|
221
|
+
result: opts.env ?? null,
|
|
222
|
+
stdout: "",
|
|
223
|
+
stderr: "",
|
|
224
|
+
timedOut: false,
|
|
225
|
+
}));
|
|
226
|
+
await runCollectorScriptTest({
|
|
227
|
+
input: {
|
|
228
|
+
kind: "typescript",
|
|
229
|
+
script: "export default () => ({ success: true })",
|
|
230
|
+
timeoutMs: 1000,
|
|
231
|
+
},
|
|
232
|
+
deps: { esmRunner: runner },
|
|
233
|
+
});
|
|
234
|
+
expect(calls[0]?.env).toBeUndefined();
|
|
235
|
+
});
|
|
236
|
+
});
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-UI testing for health-check collector scripts (the inline-script TS
|
|
3
|
+
* collector and the shell `script` collector from
|
|
4
|
+
* `@checkstack/healthcheck-script-backend`).
|
|
5
|
+
*
|
|
6
|
+
* Exercises the same sandboxed runners the real collectors use against an
|
|
7
|
+
* editable sample context, so an operator can click "Run" in the collector
|
|
8
|
+
* editor and see the result without scheduling a real check execution.
|
|
9
|
+
*
|
|
10
|
+
* Mirrors the automation `runScriptTest` design: pure, runner-injectable,
|
|
11
|
+
* never throws for ordinary script failures (they're returned in the
|
|
12
|
+
* result), central-only, time-bounded. Healthcheck *replay* from a past
|
|
13
|
+
* execution is intentionally NOT supported - `health_check_runs` persists
|
|
14
|
+
* only the result, never the script/config/check/system that produced it,
|
|
15
|
+
* so a faithful replay cannot be reconstructed. Auto-seed is the only
|
|
16
|
+
* context source (see the feature plan, open item g).
|
|
17
|
+
*/
|
|
18
|
+
import {
|
|
19
|
+
defaultEsmScriptRunner,
|
|
20
|
+
defaultShellScriptRunner,
|
|
21
|
+
type EsmScriptRunner,
|
|
22
|
+
type ShellScriptRunner,
|
|
23
|
+
} from "@checkstack/backend-api";
|
|
24
|
+
import { extractErrorMessage } from "@checkstack/common";
|
|
25
|
+
import {
|
|
26
|
+
buildTestSecretEnv,
|
|
27
|
+
maskScriptRunOutput,
|
|
28
|
+
} from "@checkstack/secrets-common";
|
|
29
|
+
|
|
30
|
+
export type CollectorScriptTestKind = "typescript" | "shell";
|
|
31
|
+
|
|
32
|
+
/** Curated check/system metadata a collector script can read. */
|
|
33
|
+
export interface CollectorTestRunContext {
|
|
34
|
+
check?: { id: string; name: string; intervalSeconds: number };
|
|
35
|
+
system?: { id: string; name: string };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface CollectorScriptTestInput {
|
|
39
|
+
kind: CollectorScriptTestKind;
|
|
40
|
+
script: string;
|
|
41
|
+
/** Collector config the script reads via `context.config` / its own fields. */
|
|
42
|
+
config?: Record<string, unknown>;
|
|
43
|
+
/** Extra env vars for shell collectors (merged over the run-context vars). */
|
|
44
|
+
env?: Record<string, string>;
|
|
45
|
+
/**
|
|
46
|
+
* The collector's declared secret -> env mapping. The test panel NEVER
|
|
47
|
+
* resolves real secret values (decision 4): each declared env var gets a
|
|
48
|
+
* `__SECRET_<NAME>__` placeholder, or the user override below.
|
|
49
|
+
*/
|
|
50
|
+
secretEnv?: Record<string, string>;
|
|
51
|
+
/** User-supplied per-secret-NAME override values, masked out of the result. */
|
|
52
|
+
secretOverrides?: Record<string, string>;
|
|
53
|
+
/** Working directory for shell collectors. */
|
|
54
|
+
workingDirectory?: string;
|
|
55
|
+
runContext?: CollectorTestRunContext;
|
|
56
|
+
timeoutMs: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface CollectorScriptTestResult {
|
|
60
|
+
result?: unknown;
|
|
61
|
+
stdout: string;
|
|
62
|
+
stderr: string;
|
|
63
|
+
exitCode?: number;
|
|
64
|
+
durationMs: number;
|
|
65
|
+
timedOut: boolean;
|
|
66
|
+
error?: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface CollectorScriptTestDeps {
|
|
70
|
+
esmRunner?: EsmScriptRunner;
|
|
71
|
+
shellRunner?: ShellScriptRunner;
|
|
72
|
+
/**
|
|
73
|
+
* Managed npm-package resolution root, so a TypeScript collector test
|
|
74
|
+
* resolves the same allowlisted packages the real collector would. Omit
|
|
75
|
+
* when no packages are configured. Plan §4.1.
|
|
76
|
+
*/
|
|
77
|
+
resolutionRoot?: string;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Map curated run-context metadata to the reserved `CHECKSTACK_*` env vars
|
|
82
|
+
* the shell collector exposes. Mirrors `runContextEnv` in
|
|
83
|
+
* `@checkstack/healthcheck-script-backend` (kept local - we don't import
|
|
84
|
+
* across plugins). Only emits vars for the parts of the context provided.
|
|
85
|
+
*/
|
|
86
|
+
export function buildShellRunContextEnv(
|
|
87
|
+
runContext: CollectorTestRunContext | undefined,
|
|
88
|
+
): Record<string, string> {
|
|
89
|
+
const env: Record<string, string> = {};
|
|
90
|
+
if (runContext?.check) {
|
|
91
|
+
env.CHECKSTACK_CHECK_ID = runContext.check.id;
|
|
92
|
+
env.CHECKSTACK_CHECK_NAME = runContext.check.name;
|
|
93
|
+
env.CHECKSTACK_CHECK_INTERVAL_SECONDS = String(
|
|
94
|
+
runContext.check.intervalSeconds,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
if (runContext?.system) {
|
|
98
|
+
env.CHECKSTACK_SYSTEM_ID = runContext.system.id;
|
|
99
|
+
env.CHECKSTACK_SYSTEM_NAME = runContext.system.name;
|
|
100
|
+
}
|
|
101
|
+
return env;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Build the `globalThis.context` object the inline-script (TS) collector
|
|
106
|
+
* sees: `{ config, check?, system? }`. Matches the runtime collector so a
|
|
107
|
+
* test mirrors production.
|
|
108
|
+
*/
|
|
109
|
+
export function buildCollectorContext(
|
|
110
|
+
input: Pick<CollectorScriptTestInput, "config" | "runContext">,
|
|
111
|
+
): Record<string, unknown> {
|
|
112
|
+
return {
|
|
113
|
+
config: input.config ?? {},
|
|
114
|
+
...(input.runContext?.check ? { check: input.runContext.check } : {}),
|
|
115
|
+
...(input.runContext?.system ? { system: input.runContext.system } : {}),
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Execute a single collector-script test against a sample context. Never
|
|
121
|
+
* throws for ordinary script failures - those land in the result.
|
|
122
|
+
*/
|
|
123
|
+
export async function runCollectorScriptTest({
|
|
124
|
+
input,
|
|
125
|
+
deps = {},
|
|
126
|
+
}: {
|
|
127
|
+
input: CollectorScriptTestInput;
|
|
128
|
+
deps?: CollectorScriptTestDeps;
|
|
129
|
+
}): Promise<CollectorScriptTestResult> {
|
|
130
|
+
const startedAt = Date.now();
|
|
131
|
+
// Build the test secret env: placeholders by default, user overrides if
|
|
132
|
+
// given. NO real secret value is resolved in the test path (decision 4).
|
|
133
|
+
const secretTest = buildTestSecretEnv({
|
|
134
|
+
secretEnv: input.secretEnv,
|
|
135
|
+
secretOverrides: input.secretOverrides,
|
|
136
|
+
});
|
|
137
|
+
// Mask any user-override value out of the result so it can't round-trip.
|
|
138
|
+
const mask = (
|
|
139
|
+
res: CollectorScriptTestResult,
|
|
140
|
+
): CollectorScriptTestResult => {
|
|
141
|
+
const masked = maskScriptRunOutput({
|
|
142
|
+
output: {
|
|
143
|
+
result: res.result,
|
|
144
|
+
stdout: res.stdout,
|
|
145
|
+
stderr: res.stderr,
|
|
146
|
+
error: res.error,
|
|
147
|
+
},
|
|
148
|
+
values: secretTest.maskValues,
|
|
149
|
+
});
|
|
150
|
+
return { ...res, ...masked };
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
if (input.kind === "shell") {
|
|
155
|
+
const runner = deps.shellRunner ?? defaultShellScriptRunner;
|
|
156
|
+
const res = await runner.run({
|
|
157
|
+
script: input.script,
|
|
158
|
+
// Run-context vars, operator env, then the test secret env on top.
|
|
159
|
+
env: {
|
|
160
|
+
...buildShellRunContextEnv(input.runContext),
|
|
161
|
+
...input.env,
|
|
162
|
+
...secretTest.env,
|
|
163
|
+
},
|
|
164
|
+
cwd: input.workingDirectory,
|
|
165
|
+
timeoutMs: input.timeoutMs,
|
|
166
|
+
});
|
|
167
|
+
const durationMs = Date.now() - startedAt;
|
|
168
|
+
if (res.timedOut) {
|
|
169
|
+
return mask({
|
|
170
|
+
stdout: res.stdout,
|
|
171
|
+
stderr: res.stderr,
|
|
172
|
+
exitCode: res.exitCode,
|
|
173
|
+
durationMs,
|
|
174
|
+
timedOut: true,
|
|
175
|
+
error: "Script execution timed out",
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
return mask({
|
|
179
|
+
stdout: res.stdout,
|
|
180
|
+
stderr: res.stderr,
|
|
181
|
+
exitCode: res.exitCode,
|
|
182
|
+
durationMs,
|
|
183
|
+
timedOut: false,
|
|
184
|
+
error:
|
|
185
|
+
res.exitCode === 0
|
|
186
|
+
? undefined
|
|
187
|
+
: `Shell script exited with code ${res.exitCode}`,
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const runner = deps.esmRunner ?? defaultEsmScriptRunner;
|
|
192
|
+
const res = await runner.run({
|
|
193
|
+
script: input.script,
|
|
194
|
+
context: buildCollectorContext(input),
|
|
195
|
+
timeoutMs: input.timeoutMs,
|
|
196
|
+
helperModuleName: "@checkstack/healthcheck",
|
|
197
|
+
helperFunctionName: "defineHealthCheck",
|
|
198
|
+
...(Object.keys(secretTest.env).length > 0
|
|
199
|
+
? { env: secretTest.env }
|
|
200
|
+
: {}),
|
|
201
|
+
...(deps.resolutionRoot ? { resolutionRoot: deps.resolutionRoot } : {}),
|
|
202
|
+
});
|
|
203
|
+
const durationMs = Date.now() - startedAt;
|
|
204
|
+
return mask({
|
|
205
|
+
result: res.result,
|
|
206
|
+
stdout: res.stdout,
|
|
207
|
+
stderr: res.stderr,
|
|
208
|
+
durationMs,
|
|
209
|
+
timedOut: res.timedOut,
|
|
210
|
+
error: res.timedOut ? "Script execution timed out" : res.error,
|
|
211
|
+
});
|
|
212
|
+
} catch (error) {
|
|
213
|
+
return mask({
|
|
214
|
+
stdout: "",
|
|
215
|
+
stderr: "",
|
|
216
|
+
durationMs: Date.now() - startedAt,
|
|
217
|
+
timedOut: false,
|
|
218
|
+
error: extractErrorMessage(error),
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
}
|