@checkstack/automation-backend 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/CHANGELOG.md +544 -0
  2. package/drizzle/0003_sparkling_xorn.sql +17 -0
  3. package/drizzle/0004_cultured_spyke.sql +2 -0
  4. package/drizzle/0005_classy_the_hand.sql +19 -0
  5. package/drizzle/0006_burly_wallop.sql +10 -0
  6. package/drizzle/0007_nappy_jackal.sql +1 -0
  7. package/drizzle/0008_remove_seeded_auto_incident_automations.sql +13 -0
  8. package/drizzle/0009_steady_liz_osborn.sql +12 -0
  9. package/drizzle/0010_chunky_changeling.sql +2 -0
  10. package/drizzle/meta/0003_snapshot.json +1007 -0
  11. package/drizzle/meta/0004_snapshot.json +1028 -0
  12. package/drizzle/meta/0005_snapshot.json +1164 -0
  13. package/drizzle/meta/0006_snapshot.json +1261 -0
  14. package/drizzle/meta/0007_snapshot.json +1215 -0
  15. package/drizzle/meta/0008_snapshot.json +1215 -0
  16. package/drizzle/meta/0009_snapshot.json +1328 -0
  17. package/drizzle/meta/0010_snapshot.json +1349 -0
  18. package/drizzle/meta/_journal.json +56 -0
  19. package/package.json +23 -12
  20. package/src/action-types.ts +23 -0
  21. package/src/artifact-store.ts +16 -1
  22. package/src/automation-store.test.ts +143 -0
  23. package/src/automation-store.ts +30 -8
  24. package/src/builtin-triggers.test.ts +77 -74
  25. package/src/builtin-triggers.ts +105 -108
  26. package/src/dispatch/action-kind.ts +2 -0
  27. package/src/dispatch/assemble-get-service.ts +31 -0
  28. package/src/dispatch/cancel-resurrect.test.ts +147 -0
  29. package/src/dispatch/concurrency-race.test.ts +255 -0
  30. package/src/dispatch/concurrency-scope.test.ts +166 -0
  31. package/src/dispatch/condition.ts +24 -5
  32. package/src/dispatch/dwell-queue.ts +65 -0
  33. package/src/dispatch/dwell-store.ts +154 -0
  34. package/src/dispatch/dwell.it.test.ts +142 -0
  35. package/src/dispatch/dwell.test.ts +799 -0
  36. package/src/dispatch/dwell.ts +257 -0
  37. package/src/dispatch/engine.test.ts +189 -2
  38. package/src/dispatch/engine.ts +555 -9
  39. package/src/dispatch/entity-scope.test.ts +176 -0
  40. package/src/dispatch/get-service-wiring.test.ts +318 -0
  41. package/src/dispatch/numeric.test.ts +71 -0
  42. package/src/dispatch/numeric.ts +96 -0
  43. package/src/dispatch/render.test.ts +34 -0
  44. package/src/dispatch/render.ts +31 -11
  45. package/src/dispatch/reseed-run-secrets.ts +230 -0
  46. package/src/dispatch/run-secret-registry.test.ts +189 -0
  47. package/src/dispatch/run-secret-registry.ts +247 -0
  48. package/src/dispatch/run-state-masking.test.ts +376 -0
  49. package/src/dispatch/run-state-store.ts +95 -38
  50. package/src/dispatch/run-state.ts +226 -59
  51. package/src/dispatch/scope-artifact-masking.test.ts +138 -0
  52. package/src/dispatch/secret-ref-ids.test.ts +19 -0
  53. package/src/dispatch/secret-ref-ids.ts +17 -0
  54. package/src/dispatch/snapshots.test.ts +86 -0
  55. package/src/dispatch/snapshots.ts +79 -0
  56. package/src/dispatch/stage1-router.test.ts +324 -0
  57. package/src/dispatch/stage1-router.ts +152 -0
  58. package/src/dispatch/stage1.it.test.ts +84 -0
  59. package/src/dispatch/stage2-dispatch.test.ts +285 -0
  60. package/src/dispatch/stage2-dispatch.ts +207 -0
  61. package/src/dispatch/stage2-stalled.it.test.ts +132 -0
  62. package/src/dispatch/stalled-sweeper.test.ts +197 -0
  63. package/src/dispatch/stalled-sweeper.ts +112 -5
  64. package/src/dispatch/state-scope.test.ts +234 -0
  65. package/src/dispatch/state-scope.ts +322 -0
  66. package/src/dispatch/structured-conditions.test.ts +246 -0
  67. package/src/dispatch/structured-conditions.ts +146 -0
  68. package/src/dispatch/test-fixtures.ts +306 -38
  69. package/src/dispatch/trigger-fanin.test.ts +111 -0
  70. package/src/dispatch/trigger-subscriber.ts +316 -14
  71. package/src/dispatch/types.ts +263 -8
  72. package/src/dispatch/wait-timeout-queue.ts +89 -0
  73. package/src/dispatch/wait-until-entity-wake.test.ts +544 -0
  74. package/src/dispatch/wait-until.test.ts +540 -0
  75. package/src/dispatch/wake-refs.test.ts +158 -0
  76. package/src/dispatch/wake-refs.ts +348 -0
  77. package/src/dispatch/window-gate.test.ts +513 -0
  78. package/src/dispatch/window-store.test.ts +162 -0
  79. package/src/dispatch/window-store.ts +102 -0
  80. package/src/entity/change-derivers.test.ts +148 -0
  81. package/src/entity/change-derivers.ts +143 -0
  82. package/src/entity/change-emitter.test.ts +66 -0
  83. package/src/entity/change-emitter.ts +76 -0
  84. package/src/entity/create-handle.ts +344 -0
  85. package/src/entity/cross-pod-read-consistency.it.test.ts +281 -0
  86. package/src/entity/define-entity.ts +157 -0
  87. package/src/entity/diff.test.ts +57 -0
  88. package/src/entity/diff.ts +54 -0
  89. package/src/entity/entity-store.test.ts +30 -0
  90. package/src/entity/entity-store.ts +171 -0
  91. package/src/entity/extension-point.ts +56 -0
  92. package/src/entity/fake-entity-store.ts +130 -0
  93. package/src/entity/hook.ts +19 -0
  94. package/src/entity/index.ts +50 -0
  95. package/src/entity/mutate-handle.test.ts +517 -0
  96. package/src/entity/on-entity-changed.test.ts +189 -0
  97. package/src/entity/on-entity-changed.ts +214 -0
  98. package/src/entity/registry.test.ts +181 -0
  99. package/src/entity/registry.ts +200 -0
  100. package/src/entity/stable-stringify.test.ts +55 -0
  101. package/src/entity/stable-stringify.ts +49 -0
  102. package/src/entity/wake-index.it.test.ts +251 -0
  103. package/src/entity/with-entity-write.test.ts +100 -0
  104. package/src/entity/with-entity-write.ts +69 -0
  105. package/src/entity-driven-trigger.ts +46 -0
  106. package/src/extension-points.ts +35 -0
  107. package/src/gitops-docs.test.ts +215 -0
  108. package/src/gitops-docs.ts +151 -0
  109. package/src/gitops-kinds.test.ts +174 -0
  110. package/src/gitops-kinds.ts +137 -0
  111. package/src/index.ts +355 -11
  112. package/src/migration/flapping-to-window.test.ts +123 -0
  113. package/src/migration/flapping-to-window.ts +205 -0
  114. package/src/router.test.ts +182 -1
  115. package/src/router.ts +73 -2
  116. package/src/schema.ts +236 -3
  117. package/src/script-test-replay.test.ts +88 -0
  118. package/src/script-test-replay.ts +100 -0
  119. package/src/script-test-shell-env.test.ts +41 -0
  120. package/src/script-test-shell-env.ts +89 -0
  121. package/src/script-test.test.ts +386 -0
  122. package/src/script-test.ts +258 -0
  123. package/src/trigger-registry.ts +2 -0
  124. package/src/validate-definition.test.ts +1 -0
  125. package/tsconfig.json +24 -0
@@ -0,0 +1,540 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { AutomationDefinitionSchema } from "@checkstack/automation-common";
3
+ import { createActionRegistry } from "../action-registry";
4
+ import {
5
+ checkWaitUntil,
6
+ dispatchTrigger,
7
+ WAIT_TIMEOUT_QUEUE_NAME,
8
+ } from "./engine";
9
+ import { startStalledSweeper } from "./stalled-sweeper";
10
+ import { createChangeDeriverRegistry } from "../entity/change-derivers";
11
+ import { routeEntityChange } from "./stage1-router";
12
+ import { handleDispatchJob, DISPATCH_QUEUE_NAME } from "./stage2-dispatch";
13
+ import type { EntityChanged } from "@checkstack/automation-common";
14
+ import { SYSTEM_ACTOR } from "@checkstack/common";
15
+ import {
16
+ makeDispatchDeps,
17
+ makeRecordingAction,
18
+ testPlugin,
19
+ } from "./test-fixtures";
20
+ import type { LoadedAutomation } from "./types";
21
+ import type { AutomationStore } from "../automation-store";
22
+
23
+ /** A health client whose `system` status is mutable between checks. */
24
+ function mutableHealthClient(initial: string) {
25
+ const state = { status: initial };
26
+ const stateObj = () => ({
27
+ status: state.status,
28
+ inStatusSince: new Date(),
29
+ inStatusForMs: 0,
30
+ inMaintenance: false,
31
+ evaluatedAt: new Date(),
32
+ });
33
+ return {
34
+ set: (s: string) => {
35
+ state.status = s;
36
+ },
37
+ client: {
38
+ getHealthState: async () => stateObj(),
39
+ getBulkHealthState: async ({ systemIds }: { systemIds: string[] }) => {
40
+ const states: Record<string, unknown> = {};
41
+ for (const id of systemIds) states[id] = stateObj();
42
+ return { states };
43
+ },
44
+ } as never,
45
+ };
46
+ }
47
+
48
+ /**
49
+ * A health client that flips to `flipped` on the Nth `getBulkHealthState`
50
+ * call (1-based) — used to simulate a change landing in the wait_until ARM
51
+ * WINDOW (between the lock being committed and the engine's re-evaluation).
52
+ */
53
+ function flipOnNthBulkCall(args: {
54
+ initial: string;
55
+ flipped: string;
56
+ nthCall: number;
57
+ }) {
58
+ let calls = 0;
59
+ const stateObj = (status: string) => ({
60
+ status,
61
+ inStatusSince: new Date(),
62
+ inStatusForMs: 0,
63
+ inMaintenance: false,
64
+ evaluatedAt: new Date(),
65
+ });
66
+ return {
67
+ callCount: () => calls,
68
+ client: {
69
+ getHealthState: async () => stateObj(args.initial),
70
+ getBulkHealthState: async ({ systemIds }: { systemIds: string[] }) => {
71
+ calls += 1;
72
+ const status = calls >= args.nthCall ? args.flipped : args.initial;
73
+ const states: Record<string, unknown> = {};
74
+ for (const id of systemIds) states[id] = stateObj(status);
75
+ return { states };
76
+ },
77
+ } as never,
78
+ };
79
+ }
80
+
81
+ function automation(actions: unknown[]): LoadedAutomation {
82
+ const definition = AutomationDefinitionSchema.parse({
83
+ name: "WU",
84
+ triggers: [{ event: "test.event" }],
85
+ conditions: [],
86
+ actions,
87
+ mode: "single",
88
+ max_runs: 10,
89
+ });
90
+ return { id: "auto-1", name: "WU", status: "enabled", definition };
91
+ }
92
+
93
+ /** Minimal automation store returning a single fixed automation. */
94
+ function storeFor(auto: LoadedAutomation): AutomationStore {
95
+ return {
96
+ create: async () => {
97
+ throw new Error("nope");
98
+ },
99
+ update: async () => {
100
+ throw new Error("nope");
101
+ },
102
+ delete: async () => {},
103
+ toggle: async () => {
104
+ throw new Error("nope");
105
+ },
106
+ getById: async (id) =>
107
+ id === auto.id
108
+ ? {
109
+ id: auto.id,
110
+ name: auto.name,
111
+ description: undefined,
112
+ status: auto.status,
113
+ definition: auto.definition,
114
+ managedBy: undefined,
115
+ createdAt: new Date(),
116
+ updatedAt: new Date(),
117
+ }
118
+ : undefined,
119
+ list: async () => ({ items: [], total: 0 }),
120
+ listGroups: async () => [],
121
+ findEnabledByTriggerEvent: async () => [auto],
122
+ listEnabled: async () => [auto],
123
+ };
124
+ }
125
+
126
+ const CONDITION = "health.system.status == 'healthy'";
127
+
128
+ function setup(initialStatus: string) {
129
+ const actionsReg = createActionRegistry();
130
+ const rec = makeRecordingAction();
131
+ actionsReg.register(rec.definition, testPlugin);
132
+ const health = mutableHealthClient(initialStatus);
133
+ const { deps, runs, queue, state } = makeDispatchDeps({
134
+ actions: actionsReg,
135
+ healthCheckClient: health.client,
136
+ });
137
+ return { deps, runs, queue, state, rec, health };
138
+ }
139
+
140
+ /** A `health:<id>` entity change, used to drive Stage-1 wake routing. */
141
+ function healthChange(id: string): EntityChanged {
142
+ return {
143
+ kind: "health",
144
+ id,
145
+ prev: { status: "unhealthy" },
146
+ next: { status: "healthy" },
147
+ delta: { status: "healthy" },
148
+ changedFields: ["status"],
149
+ actor: SYSTEM_ACTOR,
150
+ occurredAt: new Date().toISOString(),
151
+ };
152
+ }
153
+
154
+ describe("wait_until — immediate satisfaction", () => {
155
+ it("continues inline without suspending when already true", async () => {
156
+ const { deps, runs, rec } = setup("healthy");
157
+ const auto = automation([
158
+ { wait_until: { condition: CONDITION } },
159
+ { action: "test.record", config: { value: "after" } },
160
+ ]);
161
+ const result = await dispatchTrigger(deps, {
162
+ automation: auto,
163
+ triggerId: "test_event",
164
+ triggerEventId: "test.event",
165
+ payload: { id: "sys-1" },
166
+ contextKey: "sys-1",
167
+ });
168
+ expect(result.status).toBe("success");
169
+ expect(runs.waitLocks.size).toBe(0);
170
+ expect(rec.calls.map((c) => c.value)).toEqual(["after"]);
171
+ });
172
+ });
173
+
174
+ describe("wait_until — reactive suspend + wake-index", () => {
175
+ it("suspends with a wake-index ref and NO poll job, then a relevant change wakes + resumes", async () => {
176
+ const { deps, runs, queue, rec, health } = setup("unhealthy");
177
+ const auto = automation([
178
+ { wait_until: { condition: CONDITION } },
179
+ { action: "test.record", config: { value: "recovered" } },
180
+ ]);
181
+
182
+ const result = await dispatchTrigger(deps, {
183
+ automation: auto,
184
+ triggerId: "test_event",
185
+ triggerEventId: "test.event",
186
+ payload: { id: "sys-1" },
187
+ contextKey: "sys-1",
188
+ });
189
+ expect(result.status).toBe("waiting");
190
+ expect(runs.waitLocks.size).toBe(1);
191
+ expect(rec.calls).toHaveLength(0);
192
+
193
+ // Reactive: NO poll re-check job is enqueued (and no timeout was set).
194
+ expect(queue.jobs).toHaveLength(0);
195
+
196
+ // The wake-index recorded the health wildcard (health.system → kind:*).
197
+ const lock = [...runs.waitLocks.values()][0]!;
198
+ const refs = runs.wakeRefs.get(lock.id);
199
+ expect(refs && [...refs]).toEqual(["health:*"]);
200
+
201
+ // Stage-1 routing of a relevant ENTITY_CHANGED enqueues a Stage-2 wake.
202
+ health.set("healthy");
203
+ const changeDerivers = createChangeDeriverRegistry();
204
+ const jobs = await routeEntityChange({
205
+ deps,
206
+ automationStore: storeFor(auto),
207
+ changeDerivers,
208
+ changed: healthChange("sys-1"),
209
+ });
210
+ expect(jobs).toHaveLength(1);
211
+ expect(jobs[0]?.reason).toBe("wake");
212
+ // The Stage-2 wake job was enqueued onto the dispatch queue.
213
+ expect(queue.jobs.some((j) => j.queue === DISPATCH_QUEUE_NAME)).toBe(true);
214
+
215
+ // Run the Stage-2 wake job → re-eval (now true) → resume.
216
+ await handleDispatchJob({
217
+ deps,
218
+ automationStore: storeFor(auto),
219
+ changeDerivers,
220
+ job: jobs[0]!,
221
+ });
222
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
223
+ expect(rec.calls.map((c) => c.value)).toEqual(["recovered"]);
224
+ expect(runs.waitLocks.size).toBe(0);
225
+ });
226
+
227
+ it("a wake while the condition is still false does NOT resume (re-eval gate)", async () => {
228
+ const { deps, runs, rec } = setup("unhealthy");
229
+ const auto = automation([
230
+ { wait_until: { condition: CONDITION } },
231
+ { action: "test.record", config: { value: "recovered" } },
232
+ ]);
233
+ const result = await dispatchTrigger(deps, {
234
+ automation: auto,
235
+ triggerId: "test_event",
236
+ triggerEventId: "test.event",
237
+ payload: { id: "sys-1" },
238
+ contextKey: "sys-1",
239
+ });
240
+ expect(result.status).toBe("waiting");
241
+
242
+ const lock = [...runs.waitLocks.values()][0]!;
243
+ // Still unhealthy → re-eval false → still-waiting.
244
+ const outcome = await checkWaitUntil(deps, {
245
+ runId: result.runId,
246
+ waitLockId: lock.id,
247
+ automation: auto,
248
+ });
249
+ expect(outcome).toBe("still-waiting");
250
+ expect(rec.calls).toHaveLength(0);
251
+ expect(runs.waitLocks.size).toBe(1);
252
+ });
253
+ });
254
+
255
+ describe("wait_until — lost-wakeup arm-window guard (§17)", () => {
256
+ it("resumes inline (no stall) when the condition becomes true between arming the lock and the re-eval", async () => {
257
+ // The fast path sees `unhealthy` (call 1, at dispatch start), arms the
258
+ // wait lock + wake-index, THEN the arm-window re-eval (call 2) sees the
259
+ // system flip to `healthy` — modelling an ENTITY_CHANGED that landed
260
+ // during the arm window and was NOT routed to a wake job. Without the
261
+ // re-evaluate-on-registration guard this NO-TIMEOUT wait would stall
262
+ // forever (the sweeper only re-checks locks with a timeout).
263
+ const actionsReg = createActionRegistry();
264
+ const rec = makeRecordingAction();
265
+ actionsReg.register(rec.definition, testPlugin);
266
+ const health = flipOnNthBulkCall({
267
+ initial: "unhealthy",
268
+ flipped: "healthy",
269
+ nthCall: 2,
270
+ });
271
+ const { deps, runs, queue } = makeDispatchDeps({
272
+ actions: actionsReg,
273
+ healthCheckClient: health.client,
274
+ });
275
+
276
+ const auto = automation([
277
+ { wait_until: { condition: CONDITION } },
278
+ { action: "test.record", config: { value: "recovered" } },
279
+ ]);
280
+
281
+ const result = await dispatchTrigger(deps, {
282
+ automation: auto,
283
+ triggerId: "test_event",
284
+ triggerEventId: "test.event",
285
+ payload: { id: "sys-1" },
286
+ contextKey: "sys-1",
287
+ });
288
+
289
+ // Resumed inline: the run finished rather than stalling as `waiting`.
290
+ expect(result.status).toBe("success");
291
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
292
+ // The post-wait action ran.
293
+ expect(rec.calls.map((c) => c.value)).toEqual(["recovered"]);
294
+ // The lock (and its wake-index rows) were cleaned up.
295
+ expect(runs.waitLocks.size).toBe(0);
296
+ // No timeout job was armed (no timeout was set), and crucially no poll
297
+ // loop — the run completed purely via the inline re-eval.
298
+ expect(queue.jobs).toHaveLength(0);
299
+ // Sanity: the arm-window re-eval actually re-queried health.
300
+ expect(health.callCount()).toBeGreaterThanOrEqual(2);
301
+ });
302
+ });
303
+
304
+ describe("wait_until — timeout", () => {
305
+ it("arms a single timeout timer job (not a poll loop)", async () => {
306
+ const { deps, queue } = setup("unhealthy");
307
+ const auto = automation([
308
+ { wait_until: { condition: CONDITION, timeout_seconds: 60 } },
309
+ { action: "test.record", config: { value: "escalate" } },
310
+ ]);
311
+ await dispatchTrigger(deps, {
312
+ automation: auto,
313
+ triggerId: "test_event",
314
+ triggerEventId: "test.event",
315
+ payload: { id: "sys-1" },
316
+ contextKey: "sys-1",
317
+ });
318
+ const timeoutJobs = queue.jobs.filter(
319
+ (j) => j.queue === WAIT_TIMEOUT_QUEUE_NAME,
320
+ );
321
+ expect(timeoutJobs).toHaveLength(1);
322
+ expect(timeoutJobs[0]?.startDelay).toBeGreaterThan(0);
323
+ });
324
+
325
+ it("continues past on timeout when continue_on_timeout is true (default)", async () => {
326
+ const { deps, runs, rec } = setup("unhealthy");
327
+ const auto = automation([
328
+ { wait_until: { condition: CONDITION, timeout_seconds: 60 } },
329
+ { action: "test.record", config: { value: "escalate" } },
330
+ ]);
331
+ const result = await dispatchTrigger(deps, {
332
+ automation: auto,
333
+ triggerId: "test_event",
334
+ triggerEventId: "test.event",
335
+ payload: { id: "sys-1" },
336
+ contextKey: "sys-1",
337
+ });
338
+ expect(result.status).toBe("waiting");
339
+ const lock = [...runs.waitLocks.values()][0]!;
340
+ // Force the deadline into the past.
341
+ lock.timeoutAt = new Date(Date.now() - 1000);
342
+
343
+ const outcome = await checkWaitUntil(deps, {
344
+ runId: result.runId,
345
+ waitLockId: lock.id,
346
+ automation: auto,
347
+ });
348
+ expect(outcome).toBe("resumed");
349
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
350
+ expect(rec.calls.map((c) => c.value)).toEqual(["escalate"]);
351
+ });
352
+
353
+ it("fails the run on timeout when continue_on_timeout is false", async () => {
354
+ const { deps, runs, rec } = setup("unhealthy");
355
+ const auto = automation([
356
+ {
357
+ wait_until: {
358
+ condition: CONDITION,
359
+ timeout_seconds: 60,
360
+ continue_on_timeout: false,
361
+ },
362
+ },
363
+ { action: "test.record", config: { value: "after" } },
364
+ ]);
365
+ const result = await dispatchTrigger(deps, {
366
+ automation: auto,
367
+ triggerId: "test_event",
368
+ triggerEventId: "test.event",
369
+ payload: { id: "sys-1" },
370
+ contextKey: "sys-1",
371
+ });
372
+ const lock = [...runs.waitLocks.values()][0]!;
373
+ lock.timeoutAt = new Date(Date.now() - 1000);
374
+
375
+ const outcome = await checkWaitUntil(deps, {
376
+ runId: result.runId,
377
+ waitLockId: lock.id,
378
+ automation: auto,
379
+ });
380
+ expect(outcome).toBe("failed");
381
+ expect(runs.runs.get(result.runId)?.status).toBe("failed");
382
+ expect(rec.calls).toHaveLength(0);
383
+ });
384
+ });
385
+
386
+ describe("wait_until — timeout backstop via sweeper", () => {
387
+ it("the sweeper applies the timeout policy for an expired until lock whose timer was lost", async () => {
388
+ const { deps, runs, rec } = setup("unhealthy");
389
+ const auto = automation([
390
+ { wait_until: { condition: CONDITION, timeout_seconds: 60 } },
391
+ { action: "test.record", config: { value: "escalate" } },
392
+ ]);
393
+ const result = await dispatchTrigger(deps, {
394
+ automation: auto,
395
+ triggerId: "test_event",
396
+ triggerEventId: "test.event",
397
+ payload: { id: "sys-1" },
398
+ contextKey: "sys-1",
399
+ });
400
+ expect(result.status).toBe("waiting");
401
+
402
+ // The timeout timer job is "lost"; force the deadline into the past and
403
+ // only the sweeper runs.
404
+ const lock = [...runs.waitLocks.values()][0]!;
405
+ lock.timeoutAt = new Date(Date.now() - 1000);
406
+ const sweeper = startStalledSweeper({
407
+ deps,
408
+ automationStore: storeFor(auto),
409
+ logger: deps.logger,
410
+ });
411
+ await sweeper.sweep();
412
+ sweeper.stop();
413
+
414
+ // continue_on_timeout defaults to true → run continues past the wait.
415
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
416
+ expect(rec.calls.map((c) => c.value)).toEqual(["escalate"]);
417
+ expect(runs.waitLocks.size).toBe(0);
418
+ });
419
+ });
420
+
421
+ describe("wait_until — nested inside containers", () => {
422
+ it("resumes a wait_until nested inside a choose branch", async () => {
423
+ const { deps, runs, rec, health } = setup("unhealthy");
424
+ const auto = automation([
425
+ {
426
+ choose: [
427
+ {
428
+ when: "trigger.payload.sev == 'crit'",
429
+ sequence: [
430
+ { action: "test.record", config: { value: "opened" } },
431
+ { wait_until: { condition: CONDITION } },
432
+ { action: "test.record", config: { value: "closed" } },
433
+ ],
434
+ },
435
+ ],
436
+ },
437
+ ]);
438
+ const result = await dispatchTrigger(deps, {
439
+ automation: auto,
440
+ triggerId: "test_event",
441
+ triggerEventId: "test.event",
442
+ payload: { id: "sys-1", sev: "crit" },
443
+ contextKey: "sys-1",
444
+ });
445
+ expect(result.status).toBe("waiting");
446
+ expect(rec.calls.map((c) => c.value)).toEqual(["opened"]);
447
+
448
+ const lock = [...runs.waitLocks.values()][0]!;
449
+ expect(lock.actionPath).toBe("actions[0].choose[0].sequence[1]");
450
+
451
+ health.set("healthy");
452
+ const outcome = await checkWaitUntil(deps, {
453
+ runId: result.runId,
454
+ waitLockId: lock.id,
455
+ automation: auto,
456
+ });
457
+ expect(outcome).toBe("resumed");
458
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
459
+ expect(rec.calls.map((c) => c.value)).toEqual(["opened", "closed"]);
460
+ });
461
+
462
+ it("resumes a wait_until nested inside a repeat iteration", async () => {
463
+ const { deps, runs, rec, health } = setup("unhealthy");
464
+ const auto = automation([
465
+ {
466
+ repeat: {
467
+ count: 1,
468
+ sequence: [
469
+ { action: "test.record", config: { value: "iter" } },
470
+ { wait_until: { condition: CONDITION } },
471
+ { action: "test.record", config: { value: "done" } },
472
+ ],
473
+ },
474
+ },
475
+ ]);
476
+ const result = await dispatchTrigger(deps, {
477
+ automation: auto,
478
+ triggerId: "test_event",
479
+ triggerEventId: "test.event",
480
+ payload: { id: "sys-1" },
481
+ contextKey: "sys-1",
482
+ });
483
+ expect(result.status).toBe("waiting");
484
+ expect(rec.calls.map((c) => c.value)).toEqual(["iter"]);
485
+
486
+ const lock = [...runs.waitLocks.values()][0]!;
487
+ health.set("healthy");
488
+ const outcome = await checkWaitUntil(deps, {
489
+ runId: result.runId,
490
+ waitLockId: lock.id,
491
+ automation: auto,
492
+ });
493
+ expect(outcome).toBe("resumed");
494
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
495
+ expect(rec.calls.map((c) => c.value)).toEqual(["iter", "done"]);
496
+ });
497
+
498
+ it("resumes a wait_until nested inside a parallel branch (via sequence)", async () => {
499
+ const { deps, runs, rec, health } = setup("unhealthy");
500
+ const auto = automation([
501
+ {
502
+ parallel: [
503
+ {
504
+ sequence: [
505
+ { action: "test.record", config: { value: "branch-a" } },
506
+ { wait_until: { condition: CONDITION } },
507
+ { action: "test.record", config: { value: "a-done" } },
508
+ ],
509
+ },
510
+ { action: "test.record", config: { value: "branch-b" } },
511
+ ],
512
+ },
513
+ { action: "test.record", config: { value: "after-parallel" } },
514
+ ]);
515
+ const result = await dispatchTrigger(deps, {
516
+ automation: auto,
517
+ triggerId: "test_event",
518
+ triggerEventId: "test.event",
519
+ payload: { id: "sys-1" },
520
+ contextKey: "sys-1",
521
+ });
522
+ expect(result.status).toBe("waiting");
523
+ // branch-a's first action + branch-b ran; branch-a suspended at the wait.
524
+ expect(rec.calls.map((c) => c.value).sort()).toEqual(
525
+ ["branch-a", "branch-b"].sort(),
526
+ );
527
+
528
+ const lock = [...runs.waitLocks.values()][0]!;
529
+ health.set("healthy");
530
+ const outcome = await checkWaitUntil(deps, {
531
+ runId: result.runId,
532
+ waitLockId: lock.id,
533
+ automation: auto,
534
+ });
535
+ expect(outcome).toBe("resumed");
536
+ expect(runs.runs.get(result.runId)?.status).toBe("success");
537
+ expect(rec.calls.map((c) => c.value)).toContain("a-done");
538
+ expect(rec.calls.map((c) => c.value)).toContain("after-parallel");
539
+ });
540
+ });
@@ -0,0 +1,158 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import type { Condition } from "@checkstack/automation-common";
3
+
4
+ import { extractWakeRefs, refToString, type WakeRef } from "./wake-refs";
5
+
6
+ /** Sort + stringify refs for stable comparison. */
7
+ function refStrings(refs: ReadonlyArray<WakeRef>): string[] {
8
+ return refs.map(refToString).toSorted();
9
+ }
10
+
11
+ describe("extractWakeRefs — structured `state` conditions", () => {
12
+ it("extracts the health entity ref from a state condition", () => {
13
+ const condition: Condition = {
14
+ state: { entity: "sys-1", status: "healthy" },
15
+ };
16
+ const { refs, indeterminate } = extractWakeRefs(condition);
17
+ expect(refStrings(refs)).toEqual(["health:sys-1"]);
18
+ expect(indeterminate).toBe(false);
19
+ });
20
+
21
+ it("carries the `for` dwell without affecting the ref", () => {
22
+ const condition: Condition = {
23
+ state: { entity: "sys-2", status: "degraded", for: { minutes: 5 } },
24
+ };
25
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual([
26
+ "health:sys-2",
27
+ ]);
28
+ });
29
+ });
30
+
31
+ describe("extractWakeRefs — structured `numeric_state` conditions", () => {
32
+ it("extracts a ref from a health.* value path", () => {
33
+ const condition: Condition = {
34
+ numeric_state: { value: "health.system.p95_latency_ms", above: 500 },
35
+ };
36
+ // health.system has no concrete id → kind wildcard.
37
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual(["health:*"]);
38
+ });
39
+
40
+ it("extracts a concrete ref from a state.<kind>.<id>.<field> value path", () => {
41
+ const condition: Condition = {
42
+ numeric_state: {
43
+ value: "state.slo['obj-1'].budget_remaining_percent",
44
+ below: 20,
45
+ },
46
+ };
47
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual(["slo:obj-1"]);
48
+ });
49
+
50
+ it("a literal numeric value references no entity", () => {
51
+ const condition: Condition = { numeric_state: { value: 42, above: 1 } };
52
+ const { refs, indeterminate } = extractWakeRefs(condition);
53
+ expect(refs).toHaveLength(0);
54
+ // touchedState (numeric_state) but no ref → indeterminate fallback.
55
+ expect(indeterminate).toBe(true);
56
+ });
57
+ });
58
+
59
+ describe("extractWakeRefs — template-string conditions", () => {
60
+ it("extracts state.<kind>.<id> member chains (identifier ids)", () => {
61
+ const condition: Condition = "state.incident.inc9.status == 'resolved'";
62
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual([
63
+ "incident:inc9",
64
+ ]);
65
+ });
66
+
67
+ it("extracts state.<kind>[<id>] index chains (hyphenated ids)", () => {
68
+ const condition: Condition = "state.incident['inc-9'].status == 'resolved'";
69
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual([
70
+ "incident:inc-9",
71
+ ]);
72
+ });
73
+
74
+ it("extracts health.systems[<id>] index chains", () => {
75
+ const condition: Condition =
76
+ "health.systems['sys-7'].status == 'healthy'";
77
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual(["health:sys-7"]);
78
+ });
79
+
80
+ it("treats health.system as a kind wildcard (implicit context id)", () => {
81
+ const condition: Condition = "health.system.status == 'healthy'";
82
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual(["health:*"]);
83
+ });
84
+
85
+ it("collects multiple distinct refs across a boolean expression", () => {
86
+ const condition: Condition =
87
+ "state.incident['inc-1'].status == 'open' && state.health['sys-2'].status == 'degraded'";
88
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual([
89
+ "health:sys-2",
90
+ "incident:inc-1",
91
+ ]);
92
+ });
93
+
94
+ it("de-duplicates repeated refs", () => {
95
+ const condition: Condition =
96
+ "state.health['sys-1'].status == 'healthy' || state.health['sys-1'].latency_ms > 100";
97
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual(["health:sys-1"]);
98
+ });
99
+ });
100
+
101
+ describe("extractWakeRefs — combinators", () => {
102
+ it("recurses into and/or/not", () => {
103
+ const condition: Condition = {
104
+ and: [
105
+ { state: { entity: "sys-a", status: "healthy" } },
106
+ {
107
+ or: [
108
+ "state.slo['obj-x'].budget_remaining_percent < 10",
109
+ { not: "state.maintenance['win-1'].status == 'active'" },
110
+ ],
111
+ },
112
+ ],
113
+ };
114
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual([
115
+ "health:sys-a",
116
+ "maintenance:win-1",
117
+ "slo:obj-x",
118
+ ]);
119
+ });
120
+ });
121
+
122
+ describe("extractWakeRefs — wildcard fallback (uncertain id/kind)", () => {
123
+ it("wildcards the kind when the id is a dynamic key", () => {
124
+ const condition: Condition =
125
+ "state.incident[trigger.payload.incidentId].status == 'open'";
126
+ const out = extractWakeRefs(condition);
127
+ // The dynamic index yields the kind wildcard. The dynamic key itself
128
+ // (`trigger.payload.incidentId`) reads no state, so no extra ref.
129
+ expect(refStrings(out.refs)).toEqual(["incident:*"]);
130
+ });
131
+
132
+ it("wildcards when state.<kind> is read with no id", () => {
133
+ const condition: Condition = "state.health.systems != null";
134
+ // `state.health.systems`: kind=health, id=systems is treated as the id
135
+ // segment — a concrete (if unusual) ref. Use a bare-kind case instead:
136
+ const bareKind: Condition = "state.incident != null";
137
+ expect(refStrings(extractWakeRefs(condition).refs)).toEqual([
138
+ "health:systems",
139
+ ]);
140
+ expect(refStrings(extractWakeRefs(bareKind).refs)).toEqual(["incident:*"]);
141
+ });
142
+
143
+ it("is indeterminate when even the kind is dynamic", () => {
144
+ const condition: Condition = "state[trigger.kind][trigger.id].status == 1";
145
+ const { refs, indeterminate } = extractWakeRefs(condition);
146
+ expect(refs).toHaveLength(0);
147
+ expect(indeterminate).toBe(true);
148
+ });
149
+
150
+ it("is indeterminate for an unparseable template that reads no resolvable state", () => {
151
+ // A condition string with no state.* / health.* root and no parse error
152
+ // references no entity at all → not indeterminate, just empty.
153
+ const noState: Condition = "trigger.payload.count > 5";
154
+ const r1 = extractWakeRefs(noState);
155
+ expect(r1.refs).toHaveLength(0);
156
+ expect(r1.indeterminate).toBe(false);
157
+ });
158
+ });