@checkstack/automation-backend 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/CHANGELOG.md +544 -0
  2. package/drizzle/0003_sparkling_xorn.sql +17 -0
  3. package/drizzle/0004_cultured_spyke.sql +2 -0
  4. package/drizzle/0005_classy_the_hand.sql +19 -0
  5. package/drizzle/0006_burly_wallop.sql +10 -0
  6. package/drizzle/0007_nappy_jackal.sql +1 -0
  7. package/drizzle/0008_remove_seeded_auto_incident_automations.sql +13 -0
  8. package/drizzle/0009_steady_liz_osborn.sql +12 -0
  9. package/drizzle/0010_chunky_changeling.sql +2 -0
  10. package/drizzle/meta/0003_snapshot.json +1007 -0
  11. package/drizzle/meta/0004_snapshot.json +1028 -0
  12. package/drizzle/meta/0005_snapshot.json +1164 -0
  13. package/drizzle/meta/0006_snapshot.json +1261 -0
  14. package/drizzle/meta/0007_snapshot.json +1215 -0
  15. package/drizzle/meta/0008_snapshot.json +1215 -0
  16. package/drizzle/meta/0009_snapshot.json +1328 -0
  17. package/drizzle/meta/0010_snapshot.json +1349 -0
  18. package/drizzle/meta/_journal.json +56 -0
  19. package/package.json +23 -12
  20. package/src/action-types.ts +23 -0
  21. package/src/artifact-store.ts +16 -1
  22. package/src/automation-store.test.ts +143 -0
  23. package/src/automation-store.ts +30 -8
  24. package/src/builtin-triggers.test.ts +77 -74
  25. package/src/builtin-triggers.ts +105 -108
  26. package/src/dispatch/action-kind.ts +2 -0
  27. package/src/dispatch/assemble-get-service.ts +31 -0
  28. package/src/dispatch/cancel-resurrect.test.ts +147 -0
  29. package/src/dispatch/concurrency-race.test.ts +255 -0
  30. package/src/dispatch/concurrency-scope.test.ts +166 -0
  31. package/src/dispatch/condition.ts +24 -5
  32. package/src/dispatch/dwell-queue.ts +65 -0
  33. package/src/dispatch/dwell-store.ts +154 -0
  34. package/src/dispatch/dwell.it.test.ts +142 -0
  35. package/src/dispatch/dwell.test.ts +799 -0
  36. package/src/dispatch/dwell.ts +257 -0
  37. package/src/dispatch/engine.test.ts +189 -2
  38. package/src/dispatch/engine.ts +555 -9
  39. package/src/dispatch/entity-scope.test.ts +176 -0
  40. package/src/dispatch/get-service-wiring.test.ts +318 -0
  41. package/src/dispatch/numeric.test.ts +71 -0
  42. package/src/dispatch/numeric.ts +96 -0
  43. package/src/dispatch/render.test.ts +34 -0
  44. package/src/dispatch/render.ts +31 -11
  45. package/src/dispatch/reseed-run-secrets.ts +230 -0
  46. package/src/dispatch/run-secret-registry.test.ts +189 -0
  47. package/src/dispatch/run-secret-registry.ts +247 -0
  48. package/src/dispatch/run-state-masking.test.ts +376 -0
  49. package/src/dispatch/run-state-store.ts +95 -38
  50. package/src/dispatch/run-state.ts +226 -59
  51. package/src/dispatch/scope-artifact-masking.test.ts +138 -0
  52. package/src/dispatch/secret-ref-ids.test.ts +19 -0
  53. package/src/dispatch/secret-ref-ids.ts +17 -0
  54. package/src/dispatch/snapshots.test.ts +86 -0
  55. package/src/dispatch/snapshots.ts +79 -0
  56. package/src/dispatch/stage1-router.test.ts +324 -0
  57. package/src/dispatch/stage1-router.ts +152 -0
  58. package/src/dispatch/stage1.it.test.ts +84 -0
  59. package/src/dispatch/stage2-dispatch.test.ts +285 -0
  60. package/src/dispatch/stage2-dispatch.ts +207 -0
  61. package/src/dispatch/stage2-stalled.it.test.ts +132 -0
  62. package/src/dispatch/stalled-sweeper.test.ts +197 -0
  63. package/src/dispatch/stalled-sweeper.ts +112 -5
  64. package/src/dispatch/state-scope.test.ts +234 -0
  65. package/src/dispatch/state-scope.ts +322 -0
  66. package/src/dispatch/structured-conditions.test.ts +246 -0
  67. package/src/dispatch/structured-conditions.ts +146 -0
  68. package/src/dispatch/test-fixtures.ts +306 -38
  69. package/src/dispatch/trigger-fanin.test.ts +111 -0
  70. package/src/dispatch/trigger-subscriber.ts +316 -14
  71. package/src/dispatch/types.ts +263 -8
  72. package/src/dispatch/wait-timeout-queue.ts +89 -0
  73. package/src/dispatch/wait-until-entity-wake.test.ts +544 -0
  74. package/src/dispatch/wait-until.test.ts +540 -0
  75. package/src/dispatch/wake-refs.test.ts +158 -0
  76. package/src/dispatch/wake-refs.ts +348 -0
  77. package/src/dispatch/window-gate.test.ts +513 -0
  78. package/src/dispatch/window-store.test.ts +162 -0
  79. package/src/dispatch/window-store.ts +102 -0
  80. package/src/entity/change-derivers.test.ts +148 -0
  81. package/src/entity/change-derivers.ts +143 -0
  82. package/src/entity/change-emitter.test.ts +66 -0
  83. package/src/entity/change-emitter.ts +76 -0
  84. package/src/entity/create-handle.ts +344 -0
  85. package/src/entity/cross-pod-read-consistency.it.test.ts +281 -0
  86. package/src/entity/define-entity.ts +157 -0
  87. package/src/entity/diff.test.ts +57 -0
  88. package/src/entity/diff.ts +54 -0
  89. package/src/entity/entity-store.test.ts +30 -0
  90. package/src/entity/entity-store.ts +171 -0
  91. package/src/entity/extension-point.ts +56 -0
  92. package/src/entity/fake-entity-store.ts +130 -0
  93. package/src/entity/hook.ts +19 -0
  94. package/src/entity/index.ts +50 -0
  95. package/src/entity/mutate-handle.test.ts +517 -0
  96. package/src/entity/on-entity-changed.test.ts +189 -0
  97. package/src/entity/on-entity-changed.ts +214 -0
  98. package/src/entity/registry.test.ts +181 -0
  99. package/src/entity/registry.ts +200 -0
  100. package/src/entity/stable-stringify.test.ts +55 -0
  101. package/src/entity/stable-stringify.ts +49 -0
  102. package/src/entity/wake-index.it.test.ts +251 -0
  103. package/src/entity/with-entity-write.test.ts +100 -0
  104. package/src/entity/with-entity-write.ts +69 -0
  105. package/src/entity-driven-trigger.ts +46 -0
  106. package/src/extension-points.ts +35 -0
  107. package/src/gitops-docs.test.ts +215 -0
  108. package/src/gitops-docs.ts +151 -0
  109. package/src/gitops-kinds.test.ts +174 -0
  110. package/src/gitops-kinds.ts +137 -0
  111. package/src/index.ts +355 -11
  112. package/src/migration/flapping-to-window.test.ts +123 -0
  113. package/src/migration/flapping-to-window.ts +205 -0
  114. package/src/router.test.ts +182 -1
  115. package/src/router.ts +73 -2
  116. package/src/schema.ts +236 -3
  117. package/src/script-test-replay.test.ts +88 -0
  118. package/src/script-test-replay.ts +100 -0
  119. package/src/script-test-shell-env.test.ts +41 -0
  120. package/src/script-test-shell-env.ts +89 -0
  121. package/src/script-test.test.ts +386 -0
  122. package/src/script-test.ts +258 -0
  123. package/src/trigger-registry.ts +2 -0
  124. package/src/validate-definition.test.ts +1 -0
  125. package/tsconfig.json +24 -0
@@ -0,0 +1,255 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import { SYSTEM_ACTOR } from "@checkstack/common";
3
+ import { AutomationDefinitionSchema } from "@checkstack/automation-common";
4
+ import type { AutomationStore } from "../automation-store";
5
+ import { createActionRegistry } from "../action-registry";
6
+ import { recoverStalledRun, resumeRun } from "./engine";
7
+ import { handleTriggerFiring } from "./trigger-subscriber";
8
+ import { makeDispatchDeps, makeRecordingAction, testPlugin } from "./test-fixtures";
9
+ import type { DispatchDeps, LoadedAutomation } from "./types";
10
+
11
+ const EVENT = "test.event";
12
+
13
+ /** Single-mode automation whose run stays active (waits forever). */
14
+ function buildAutomation(): LoadedAutomation {
15
+ const definition = AutomationDefinitionSchema.parse({
16
+ name: "Race test",
17
+ triggers: [{ event: EVENT }],
18
+ conditions: [],
19
+ actions: [{ wait_for_trigger: { event: "never.fires" } }],
20
+ mode: "single",
21
+ max_runs: 10,
22
+ });
23
+ return { id: "auto-1", name: "Race test", status: "enabled", definition };
24
+ }
25
+
26
+ function storeFor(auto: LoadedAutomation): AutomationStore {
27
+ return {
28
+ create: async () => {
29
+ throw new Error("nope");
30
+ },
31
+ update: async () => {
32
+ throw new Error("nope");
33
+ },
34
+ delete: async () => {},
35
+ toggle: async () => {
36
+ throw new Error("nope");
37
+ },
38
+ getById: async () => undefined,
39
+ list: async () => ({ items: [], total: 0 }),
40
+ listGroups: async () => [],
41
+ findEnabledByTriggerEvent: async () => [auto],
42
+ listEnabled: async () => [auto],
43
+ };
44
+ }
45
+
46
+ function activeCount(runs: ReturnType<typeof makeDispatchDeps>["runs"]): number {
47
+ return [...runs.runs.values()].filter((r) =>
48
+ ["pending", "running", "waiting"].includes(r.status),
49
+ ).length;
50
+ }
51
+
52
+ describe("M1 — concurrency check-then-create race (single mode)", () => {
53
+ it("two concurrent fires create exactly one run", async () => {
54
+ const actionsReg = createActionRegistry();
55
+ actionsReg.register(makeRecordingAction().definition, testPlugin);
56
+ const { deps, runs } = makeDispatchDeps({
57
+ actions: actionsReg,
58
+ withConcurrencyLock: true,
59
+ });
60
+ const auto = buildAutomation();
61
+
62
+ // Widen the check-then-create window with a real async gap, so that
63
+ // WITHOUT serialization both fires can complete their "is a run active?"
64
+ // check before either has created its run — the exact interleaving that
65
+ // double-runs a single-mode automation. WITH the lock, the second fire
66
+ // blocks at lock-acquire and only checks after the first committed, so
67
+ // the gap is harmless. (Macrotask yield, not a 2-party barrier, so it
68
+ // works in both the locked and unlocked variants without deadlock.)
69
+ const realHasActiveRun = deps.runStore.hasActiveRun.bind(deps.runStore);
70
+ deps.runStore.hasActiveRun = async (automationId, contextKey) => {
71
+ const result = await realHasActiveRun(automationId, contextKey);
72
+ await new Promise((r) => setTimeout(r, 5));
73
+ return result;
74
+ };
75
+
76
+ const fire = () =>
77
+ handleTriggerFiring({
78
+ deps,
79
+ automationStore: storeFor(auto),
80
+ qualifiedEventId: EVENT,
81
+ triggerPayload: { id: "sys-1" },
82
+ actor: SYSTEM_ACTOR,
83
+ contextKey: "sys-1",
84
+ });
85
+
86
+ await Promise.all([fire(), fire()]);
87
+
88
+ expect(activeCount(runs)).toBe(1);
89
+ });
90
+ });
91
+
92
+ // ─── Resume-vs-recover same-run race ─────────────────────────────────────
93
+
94
+ /**
95
+ * Wrap a recover the way `stalled-sweeper.ts` does: acquire the per-run
96
+ * advisory lock FIRST, then recover, releasing in a finally. The lock is the
97
+ * cross-path arbiter — the SAME `Set`-backed advisory-lock fake (one Set
98
+ * across both paths) is what makes "exactly one executes" hold across a
99
+ * sweeper-recover racing a wake-driven resume / a second sweeper.
100
+ */
101
+ async function sweeperRecover(
102
+ deps: DispatchDeps,
103
+ args: { runId: string; automation: LoadedAutomation },
104
+ ): Promise<{ acted: boolean }> {
105
+ const lock = await deps.runStateStore.tryAdvisoryLock(args.runId);
106
+ if (!lock) return { acted: false }; // another instance already on it
107
+ try {
108
+ await recoverStalledRun(deps, args);
109
+ return { acted: true };
110
+ } finally {
111
+ await lock.release();
112
+ }
113
+ }
114
+
115
+ /** An automation: one recording action, gated behind a wait, then another. */
116
+ function recoverableAutomation(actionsReg: ReturnType<typeof createActionRegistry>): {
117
+ auto: LoadedAutomation;
118
+ recorded: () => number;
119
+ } {
120
+ const recording = makeRecordingAction();
121
+ actionsReg.register(recording.definition, testPlugin);
122
+ const definition = AutomationDefinitionSchema.parse({
123
+ name: "Recover race",
124
+ triggers: [{ event: EVENT }],
125
+ conditions: [],
126
+ actions: [
127
+ { action: "test.record", config: { value: "after-recover" } },
128
+ ],
129
+ mode: "single",
130
+ max_runs: 10,
131
+ });
132
+ return {
133
+ auto: { id: "auto-1", name: "Recover race", status: "enabled", definition },
134
+ recorded: () => recording.calls.length,
135
+ };
136
+ }
137
+
138
+ describe("M2 — resume-vs-recover same-run race (shared advisory lock)", () => {
139
+ it("two sweeper recoveries of one stalled run: exactly one executes", async () => {
140
+ const actionsReg = createActionRegistry();
141
+ const { auto, recorded } = recoverableAutomation(actionsReg);
142
+ const { deps, runs, state } = makeDispatchDeps({ actions: actionsReg });
143
+
144
+ // A genuinely-stalled run: status `running`, a persisted snapshot, and no
145
+ // wait lock — exactly what `recoverStalledRun` is allowed to re-walk.
146
+ const runId = "run-stalled";
147
+ runs.runs.set(runId, {
148
+ id: runId,
149
+ automationId: auto.id,
150
+ triggerId: "t",
151
+ triggerEventId: EVENT,
152
+ triggerPayload: {},
153
+ contextKey: null,
154
+ status: "running",
155
+ errorMessage: null,
156
+ startedAt: new Date(),
157
+ finishedAt: null,
158
+ });
159
+ state.states.set(runId, {
160
+ scopeSnapshot: { trigger: { id: "t", event: EVENT, payload: {} } },
161
+ lastActionPath: null, // crashed before the first step → from the top
162
+ lastHeartbeatAt: new Date(0),
163
+ });
164
+
165
+ // Two pods sweep the same stalled run at once; the shared `locks` Set
166
+ // (state.locks) arbitrates.
167
+ const [a, b] = await Promise.all([
168
+ sweeperRecover(deps, { runId, automation: auto }),
169
+ sweeperRecover(deps, { runId, automation: auto }),
170
+ ]);
171
+
172
+ expect([a.acted, b.acted].filter(Boolean)).toHaveLength(1);
173
+ expect(recorded()).toBe(1); // the action ran exactly once
174
+ expect(runs.runs.get(runId)!.status).toBe("success");
175
+ expect(state.locks.size).toBe(0); // lock released by the winner
176
+ });
177
+
178
+ it("a resume racing a recover for the same waiting run: the wake wins, recover no-ops", async () => {
179
+ const actionsReg = createActionRegistry();
180
+ const recording = makeRecordingAction();
181
+ actionsReg.register(recording.definition, testPlugin);
182
+ const definition = AutomationDefinitionSchema.parse({
183
+ name: "Resume race",
184
+ triggers: [{ event: EVENT }],
185
+ conditions: [],
186
+ actions: [
187
+ { wait_for_trigger: { event: "wake.event" } },
188
+ { action: "test.record", config: { value: "post-wait" } },
189
+ ],
190
+ mode: "single",
191
+ max_runs: 10,
192
+ });
193
+ const auto: LoadedAutomation = {
194
+ id: "auto-1",
195
+ name: "Resume race",
196
+ status: "enabled",
197
+ definition,
198
+ };
199
+ const { deps, runs, state } = makeDispatchDeps({ actions: actionsReg });
200
+
201
+ // A run intentionally suspended at the wait: status `waiting`, snapshot at
202
+ // the wait, plus a wait lock. `resumeRun` (the wake path) owns it; a
203
+ // sweeper recover must refuse (status not `running` + a live wait lock).
204
+ const runId = "run-waiting";
205
+ runs.runs.set(runId, {
206
+ id: runId,
207
+ automationId: auto.id,
208
+ triggerId: "t",
209
+ triggerEventId: EVENT,
210
+ triggerPayload: {},
211
+ contextKey: null,
212
+ status: "waiting",
213
+ errorMessage: null,
214
+ startedAt: new Date(),
215
+ finishedAt: null,
216
+ });
217
+ state.states.set(runId, {
218
+ scopeSnapshot: { trigger: { id: "t", event: EVENT, payload: {} } },
219
+ lastActionPath: "actions[0]",
220
+ lastHeartbeatAt: new Date(),
221
+ });
222
+ await deps.runStore.createWaitLock({
223
+ runId,
224
+ actionPath: "actions[0]",
225
+ kind: "trigger",
226
+ eventId: "wake.event",
227
+ contextKey: null,
228
+ filterTemplate: null,
229
+ timeoutAt: null,
230
+ });
231
+
232
+ // `recoverStalledRun` is invoked DIRECTLY (not under the sweeper lock
233
+ // wrapper) — faithful to production, where the sweeper only ever
234
+ // *recovers* `running` runs and never competes for a `waiting` run's
235
+ // lock (it filters on status and runs the wait paths first). So recover
236
+ // here must refuse on its own status / wait-lock guard, leaving the
237
+ // wake-driven `resumeRun` to own + complete the run.
238
+ const [resumeOut, recoverOut] = await Promise.all([
239
+ resumeRun(deps, {
240
+ runId,
241
+ automation: auto,
242
+ waitedAtPath: "actions[0]",
243
+ }),
244
+ recoverStalledRun(deps, { runId, automation: auto }),
245
+ ]);
246
+
247
+ // Recover refused (saw a non-`running` / wait-locked run); resume woke +
248
+ // completed it. The post-wait action ran EXACTLY once.
249
+ expect(recoverOut.status).toBe("waiting"); // refused, did not re-walk
250
+ expect(recording.calls).toHaveLength(1);
251
+ expect(resumeOut.status).toBe("success");
252
+ expect(runs.runs.get(runId)!.status).toBe("success");
253
+ expect(state.locks.size).toBe(0);
254
+ });
255
+ });
@@ -0,0 +1,166 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { SYSTEM_ACTOR } from "@checkstack/common";
3
+ import {
4
+ AutomationDefinitionSchema,
5
+ type Automation,
6
+ type ConcurrencyScope,
7
+ } from "@checkstack/automation-common";
8
+ import type { AutomationStore } from "../automation-store";
9
+ import { handleTriggerFiring } from "./trigger-subscriber";
10
+ import { makeDispatchDeps, makeRecordingAction, testPlugin } from "./test-fixtures";
11
+ import { createActionRegistry } from "../action-registry";
12
+ import type { LoadedAutomation } from "./types";
13
+
14
+ const EVENT = "test.event";
15
+
16
+ /**
17
+ * An automation whose single action is a wait_for_trigger, so a started
18
+ * run stays in `waiting` (active) - lets us observe concurrency dedup.
19
+ */
20
+ function buildAutomation(
21
+ scope: ConcurrencyScope,
22
+ opts: { mode?: string; maxRuns?: number } = {},
23
+ ): Automation {
24
+ const definition = AutomationDefinitionSchema.parse({
25
+ name: "Concurrency test",
26
+ triggers: [{ event: EVENT }],
27
+ conditions: [],
28
+ actions: [{ wait_for_trigger: { event: "never.fires" } }],
29
+ mode: opts.mode ?? "single",
30
+ concurrency_scope: scope,
31
+ max_runs: opts.maxRuns ?? 10,
32
+ });
33
+ return {
34
+ id: "auto-1",
35
+ name: "Concurrency test",
36
+ status: "enabled",
37
+ definition,
38
+ createdAt: new Date(),
39
+ updatedAt: new Date(),
40
+ };
41
+ }
42
+
43
+ function makeStore(auto: Automation): AutomationStore {
44
+ const loaded: LoadedAutomation = {
45
+ id: auto.id,
46
+ name: auto.name,
47
+ status: auto.status,
48
+ definition: auto.definition,
49
+ };
50
+ return {
51
+ create: async () => {
52
+ throw new Error("nope");
53
+ },
54
+ update: async () => {
55
+ throw new Error("nope");
56
+ },
57
+ delete: async () => {},
58
+ toggle: async () => {
59
+ throw new Error("nope");
60
+ },
61
+ getById: async (id) =>
62
+ id === auto.id
63
+ ? {
64
+ id: auto.id,
65
+ name: auto.name,
66
+ description: undefined,
67
+ status: auto.status,
68
+ definition: auto.definition,
69
+ managedBy: undefined,
70
+ createdAt: new Date(),
71
+ updatedAt: new Date(),
72
+ }
73
+ : undefined,
74
+ list: async () => ({ items: [auto], total: 1 }),
75
+ listGroups: async () => [],
76
+ findEnabledByTriggerEvent: async () => [loaded],
77
+ listEnabled: async () => [loaded],
78
+ };
79
+ }
80
+
81
+ function setup(
82
+ scope: ConcurrencyScope,
83
+ opts: { mode?: string; maxRuns?: number } = {},
84
+ ) {
85
+ const actionsReg = createActionRegistry();
86
+ const rec = makeRecordingAction();
87
+ actionsReg.register(rec.definition, testPlugin);
88
+ const { deps, runs } = makeDispatchDeps({ actions: actionsReg });
89
+ const auto = buildAutomation(scope, opts);
90
+ const store = makeStore(auto);
91
+ const fire = (systemId: string) =>
92
+ handleTriggerFiring({
93
+ deps,
94
+ automationStore: store,
95
+ qualifiedEventId: EVENT,
96
+ triggerPayload: { id: systemId },
97
+ actor: SYSTEM_ACTOR,
98
+ contextKey: systemId,
99
+ });
100
+ return { deps, runs, fire };
101
+ }
102
+
103
+ /** Count runs that are currently active (a started run waits forever here). */
104
+ function activeCount(
105
+ runs: ReturnType<typeof makeDispatchDeps>["runs"],
106
+ ): number {
107
+ return [...runs.runs.values()].filter((r) =>
108
+ ["pending", "running", "waiting"].includes(r.status),
109
+ ).length;
110
+ }
111
+
112
+ describe("concurrency_scope: automation (default)", () => {
113
+ it("single mode dedups across ALL systems (one active run total)", async () => {
114
+ const { runs, fire } = setup("automation");
115
+ await fire("sys-a");
116
+ await fire("sys-b"); // different system, but per-automation single -> skipped
117
+ await fire("sys-a");
118
+ expect(activeCount(runs)).toBe(1);
119
+ });
120
+ });
121
+
122
+ describe("concurrency_scope: context_key", () => {
123
+ it("single mode dedups per system but runs different systems concurrently", async () => {
124
+ const { runs, fire } = setup("context_key");
125
+ await fire("sys-a"); // starts run for A
126
+ await fire("sys-b"); // starts run for B (different key)
127
+ await fire("sys-a"); // A already active -> deduped
128
+ await fire("sys-b"); // B already active -> deduped
129
+ // One active run per distinct system, no duplicates.
130
+ expect(activeCount(runs)).toBe(2);
131
+ const byContext = new Map<string | null, number>();
132
+ for (const r of runs.runs.values()) {
133
+ if (!["pending", "running", "waiting"].includes(r.status)) continue;
134
+ byContext.set(r.contextKey, (byContext.get(r.contextKey) ?? 0) + 1);
135
+ }
136
+ expect(byContext.get("sys-a")).toBe(1);
137
+ expect(byContext.get("sys-b")).toBe(1);
138
+ });
139
+ });
140
+
141
+ describe("concurrency modes (automation scope)", () => {
142
+ it("parallel mode allows up to max_runs concurrent runs, then caps", async () => {
143
+ const { runs, fire } = setup("automation", { mode: "parallel", maxRuns: 2 });
144
+ await fire("a");
145
+ await fire("b");
146
+ await fire("c"); // over the cap → skipped
147
+ expect(activeCount(runs)).toBe(2);
148
+ });
149
+
150
+ it("queued mode caps at max_runs (v1 behaves like parallel)", async () => {
151
+ const { runs, fire } = setup("automation", { mode: "queued", maxRuns: 1 });
152
+ await fire("a");
153
+ await fire("b"); // over the cap → skipped
154
+ expect(activeCount(runs)).toBe(1);
155
+ });
156
+
157
+ it("restart mode cancels the prior active run and starts fresh", async () => {
158
+ const { runs, fire } = setup("automation", { mode: "restart" });
159
+ await fire("a");
160
+ const firstId = [...runs.runs.values()][0]!.id;
161
+ await fire("b"); // cancels the first, starts a new run
162
+ expect(runs.runs.get(firstId)?.status).toBe("cancelled");
163
+ // Exactly one active run (the fresh one).
164
+ expect(activeCount(runs)).toBe(1);
165
+ });
166
+ });
@@ -1,14 +1,16 @@
1
1
  /**
2
2
  * Condition evaluation for the dispatch engine.
3
3
  *
4
- * Conditions come in two shapes from the schema:
4
+ * Conditions come in several shapes from the schema:
5
5
  *
6
6
  * - A template string returning truthy/falsy.
7
7
  * - A combinator object — `{ and: [...] }`, `{ or: [...] }`, or
8
8
  * `{ not: condition }` — recursing into nested conditions.
9
+ * - A structured variant — `{ numeric_state }`, `{ time }`, `{ state }`.
9
10
  *
10
- * Both forms eval against the current dispatch scope through the shared
11
- * template engine.
11
+ * All forms eval against the current dispatch scope through the shared
12
+ * template engine. Structured variants additionally compute a fresh `now`
13
+ * per evaluation (the `time` variant) rather than reading scope `now`.
12
14
  */
13
15
  import {
14
16
  evaluateBoolean,
@@ -18,6 +20,12 @@ import {
18
20
  } from "@checkstack/template-engine";
19
21
  import type { Condition } from "@checkstack/automation-common";
20
22
 
23
+ import {
24
+ evaluateNumericStateCondition,
25
+ evaluateStateCondition,
26
+ evaluateTimeCondition,
27
+ } from "./structured-conditions";
28
+
21
29
  /**
22
30
  * Evaluate a condition to boolean.
23
31
  *
@@ -40,8 +48,19 @@ export function evaluateCondition(
40
48
  if ("or" in condition) {
41
49
  return condition.or.some((c) => evaluateCondition(c, context, filters));
42
50
  }
43
- // not
44
- return !evaluateCondition(condition.not, context, filters);
51
+ if ("not" in condition) {
52
+ return !evaluateCondition(condition.not, context, filters);
53
+ }
54
+ if ("numeric_state" in condition) {
55
+ return evaluateNumericStateCondition(condition, context, filters);
56
+ }
57
+ if ("time" in condition) {
58
+ // Fresh `now` per evaluation (constraint 7) — time-of-day gating must
59
+ // never read the frozen scope timestamp.
60
+ return evaluateTimeCondition(condition, new Date());
61
+ }
62
+ // state
63
+ return evaluateStateCondition(condition, context);
45
64
  }
46
65
 
47
66
  /**
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Queue consumer that fires `for:` dwell timers.
3
+ *
4
+ * `armDwell` persists a dwell row and enqueues an `automation-dwell` job
5
+ * with the matching `startDelay`. When the scheduler fires the job, this
6
+ * consumer:
7
+ *
8
+ * 1. Loads the dwell row; bails if it's gone (cancelled, re-armed under
9
+ * a different id, or already fired) — the row is the source of truth.
10
+ * 2. Delegates to `fireDwell`, which re-confirms the matched state
11
+ * still holds, then starts the run via `startRunRespectingMode`.
12
+ *
13
+ * The stalled sweeper also catches expired dwell rows in case the queue
14
+ * job is lost; both paths are idempotent via delete-on-fire.
15
+ */
16
+ import type { Logger } from "@checkstack/backend-api";
17
+
18
+ import type { AutomationStore } from "../automation-store";
19
+ import { DWELL_QUEUE_NAME, fireDwell, type DwellFireJob } from "./dwell";
20
+ import { startRunRespectingMode } from "./trigger-subscriber";
21
+ import type { DispatchDeps } from "./types";
22
+
23
+ export interface DwellQueueConsumerArgs {
24
+ deps: DispatchDeps;
25
+ automationStore: AutomationStore;
26
+ logger: Logger;
27
+ }
28
+
29
+ export interface DwellQueueConsumer {
30
+ stop: () => Promise<void>;
31
+ }
32
+
33
+ export async function startDwellQueueConsumer(
34
+ args: DwellQueueConsumerArgs,
35
+ ): Promise<DwellQueueConsumer> {
36
+ const queue = args.deps.queueManager.getQueue<DwellFireJob>(
37
+ DWELL_QUEUE_NAME,
38
+ );
39
+
40
+ await queue.consume(
41
+ async (job) => {
42
+ const { dwellId } = job.data;
43
+ const dwell = await args.deps.dwellStore.load(dwellId);
44
+ if (!dwell) {
45
+ args.logger.debug(
46
+ `dwell-queue: dwell ${dwellId} no longer exists (cancelled / re-armed / already fired)`,
47
+ );
48
+ return;
49
+ }
50
+ await fireDwell({
51
+ deps: args.deps,
52
+ automationStore: args.automationStore,
53
+ dwell,
54
+ startRun: startRunRespectingMode,
55
+ });
56
+ },
57
+ { consumerGroup: "automation-dwell-fire", maxRetries: 3 },
58
+ );
59
+
60
+ return {
61
+ stop: async () => {
62
+ await queue.stop();
63
+ },
64
+ };
65
+ }
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Drizzle-backed implementation of `DwellStore` — pre-run `for:` dwell
3
+ * timers. Kept thin: each method maps almost 1:1 to a DB statement. The
4
+ * row is the source of truth; the queue job is just the wake signal and
5
+ * cancellation is a row delete (constraint 2).
6
+ */
7
+ import { and, eq, isNull, lte } from "drizzle-orm";
8
+ import type { SafeDatabase } from "@checkstack/backend-api";
9
+
10
+ import { automationDwellTimers } from "../schema";
11
+ import type { DwellStore, LoadedDwell, UpsertDwellInput } from "./types";
12
+
13
+ type Schema = { automationDwellTimers: typeof automationDwellTimers };
14
+
15
+ function mapRow(row: typeof automationDwellTimers.$inferSelect): LoadedDwell {
16
+ return {
17
+ id: row.id,
18
+ automationId: row.automationId,
19
+ triggerId: row.triggerId,
20
+ eventId: row.eventId,
21
+ contextKey: row.contextKey,
22
+ armedStatus: row.armedStatus,
23
+ payloadSnapshot: row.payloadSnapshot,
24
+ actorSnapshot: row.actorSnapshot,
25
+ fireAt: row.fireAt,
26
+ createdAt: row.createdAt,
27
+ };
28
+ }
29
+
30
+ /** Build the `(automationId, triggerId, contextKey)` match predicate. */
31
+ function keyWhere(
32
+ automationId: string,
33
+ triggerId: string,
34
+ contextKey: string | null,
35
+ ) {
36
+ return and(
37
+ eq(automationDwellTimers.automationId, automationId),
38
+ eq(automationDwellTimers.triggerId, triggerId),
39
+ contextKey === null
40
+ ? isNull(automationDwellTimers.contextKey)
41
+ : eq(automationDwellTimers.contextKey, contextKey),
42
+ );
43
+ }
44
+
45
+ export function createDwellStore(db: SafeDatabase<Schema>): DwellStore {
46
+ return {
47
+ async arm(input: UpsertDwellInput) {
48
+ // Insert-if-absent. A dwell already armed for this key is preserved
49
+ // UNCHANGED — its original `fireAt` stands so the `for:` window
50
+ // measures "continuously matched since first arm", not "since the
51
+ // most recent matching event". (A genuine recover-then-recur deletes
52
+ // the row first via inverse-cancel / re-confirm, starting fresh.)
53
+
54
+ // Fast path: if a row already exists for the key, return it untouched.
55
+ // (Also covers the null-context-key case where ON CONFLICT can't
56
+ // match, since NULLs are distinct in a Postgres unique index.)
57
+ const [existing] = await db
58
+ .select()
59
+ .from(automationDwellTimers)
60
+ .where(keyWhere(input.automationId, input.triggerId, input.contextKey))
61
+ .limit(1);
62
+ if (existing) {
63
+ return { id: existing.id, created: false, fireAt: existing.fireAt };
64
+ }
65
+
66
+ // No row yet — INSERT. ON CONFLICT DO NOTHING guards the race where a
67
+ // concurrent arm inserted between our SELECT and INSERT; in that case
68
+ // `returning` is empty and we re-read the winner's row.
69
+ const [row] = await db
70
+ .insert(automationDwellTimers)
71
+ .values({
72
+ automationId: input.automationId,
73
+ triggerId: input.triggerId,
74
+ eventId: input.eventId,
75
+ contextKey: input.contextKey,
76
+ armedStatus: input.armedStatus,
77
+ payloadSnapshot: input.payloadSnapshot,
78
+ actorSnapshot: input.actorSnapshot,
79
+ fireAt: input.fireAt,
80
+ })
81
+ .onConflictDoNothing({
82
+ target: [
83
+ automationDwellTimers.automationId,
84
+ automationDwellTimers.triggerId,
85
+ automationDwellTimers.contextKey,
86
+ ],
87
+ })
88
+ .returning();
89
+ if (row) {
90
+ return { id: row.id, created: true, fireAt: row.fireAt };
91
+ }
92
+
93
+ // Lost the race — another arm won. Re-read the existing row.
94
+ const [winner] = await db
95
+ .select()
96
+ .from(automationDwellTimers)
97
+ .where(keyWhere(input.automationId, input.triggerId, input.contextKey))
98
+ .limit(1);
99
+ if (!winner) throw new Error("arm dwell: row vanished after conflict");
100
+ return { id: winner.id, created: false, fireAt: winner.fireAt };
101
+ },
102
+
103
+ async load(id) {
104
+ const [row] = await db
105
+ .select()
106
+ .from(automationDwellTimers)
107
+ .where(eq(automationDwellTimers.id, id))
108
+ .limit(1);
109
+ return row ? mapRow(row) : undefined;
110
+ },
111
+
112
+ async findByKey(automationId, triggerId, contextKey) {
113
+ const [row] = await db
114
+ .select()
115
+ .from(automationDwellTimers)
116
+ .where(keyWhere(automationId, triggerId, contextKey))
117
+ .limit(1);
118
+ return row ? mapRow(row) : undefined;
119
+ },
120
+
121
+ async delete(id) {
122
+ // `RETURNING id` makes the delete an atomic claim: exactly one
123
+ // concurrent caller sees a returned row (the row the DB actually
124
+ // removed), every other caller gets an empty result. `fireDwell`
125
+ // relies on this so two pods (or the sweeper vs the queue consumer)
126
+ // can't both fire the same dwell.
127
+ const deleted = await db
128
+ .delete(automationDwellTimers)
129
+ .where(eq(automationDwellTimers.id, id))
130
+ .returning({ id: automationDwellTimers.id });
131
+ return deleted.length > 0;
132
+ },
133
+
134
+ async deleteByKey(automationId, triggerId, contextKey) {
135
+ await db
136
+ .delete(automationDwellTimers)
137
+ .where(keyWhere(automationId, triggerId, contextKey));
138
+ },
139
+
140
+ async deleteForAutomation(automationId) {
141
+ await db
142
+ .delete(automationDwellTimers)
143
+ .where(eq(automationDwellTimers.automationId, automationId));
144
+ },
145
+
146
+ async sweepExpired(now) {
147
+ const rows = await db
148
+ .select()
149
+ .from(automationDwellTimers)
150
+ .where(lte(automationDwellTimers.fireAt, now));
151
+ return rows.map((row) => mapRow(row));
152
+ },
153
+ };
154
+ }