@checkstack/automation-backend 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +453 -0
  2. package/drizzle/0000_acoustic_diamondback.sql +80 -0
  3. package/drizzle/0001_mute_vindicator.sql +12 -0
  4. package/drizzle/0002_silky_omega_red.sql +12 -0
  5. package/drizzle/meta/0000_snapshot.json +688 -0
  6. package/drizzle/meta/0001_snapshot.json +785 -0
  7. package/drizzle/meta/0002_snapshot.json +861 -0
  8. package/drizzle/meta/_journal.json +27 -0
  9. package/drizzle.config.ts +12 -0
  10. package/package.json +41 -0
  11. package/src/action-registry.ts +83 -0
  12. package/src/action-types.ts +324 -0
  13. package/src/artifact-store.ts +140 -0
  14. package/src/artifact-type-registry.ts +64 -0
  15. package/src/automation-store.ts +227 -0
  16. package/src/builtin-actions.test.ts +185 -0
  17. package/src/builtin-actions.ts +132 -0
  18. package/src/builtin-triggers.test.ts +264 -0
  19. package/src/builtin-triggers.ts +365 -0
  20. package/src/dispatch/action-kind.ts +44 -0
  21. package/src/dispatch/condition.ts +61 -0
  22. package/src/dispatch/delay-queue.ts +91 -0
  23. package/src/dispatch/engine.test.ts +1198 -0
  24. package/src/dispatch/engine.ts +1672 -0
  25. package/src/dispatch/path-nav.ts +65 -0
  26. package/src/dispatch/render.test.ts +75 -0
  27. package/src/dispatch/render.ts +136 -0
  28. package/src/dispatch/run-state-store.ts +143 -0
  29. package/src/dispatch/run-state.ts +298 -0
  30. package/src/dispatch/scope.test.ts +40 -0
  31. package/src/dispatch/scope.ts +125 -0
  32. package/src/dispatch/stalled-sweeper.ts +164 -0
  33. package/src/dispatch/test-fixtures.ts +558 -0
  34. package/src/dispatch/trigger-subscriber.ts +397 -0
  35. package/src/dispatch/types.ts +259 -0
  36. package/src/extension-points.ts +88 -0
  37. package/src/index.ts +379 -0
  38. package/src/migration/from-webhook-subscriptions.test.ts +237 -0
  39. package/src/migration/from-webhook-subscriptions.ts +398 -0
  40. package/src/registries.test.ts +357 -0
  41. package/src/router.test.ts +724 -0
  42. package/src/router.ts +556 -0
  43. package/src/schema.ts +310 -0
  44. package/src/trigger-registry.ts +99 -0
  45. package/src/validate-definition.test.ts +306 -0
  46. package/src/validate-definition.ts +304 -0
  47. package/tsconfig.json +41 -0
package/src/schema.ts ADDED
@@ -0,0 +1,310 @@
1
+ import {
2
+ pgTable,
3
+ text,
4
+ timestamp,
5
+ jsonb,
6
+ integer,
7
+ index,
8
+ } from "drizzle-orm/pg-core";
9
+
10
+ /**
11
+ * Automations — top-level entity. The `definition` column holds the full
12
+ * `AutomationDefinitionSchema`-validated JSON (triggers, conditions,
13
+ * actions, mode).
14
+ */
15
+ export const automations = pgTable(
16
+ "automations",
17
+ {
18
+ id: text("id")
19
+ .primaryKey()
20
+ .$defaultFn(() => crypto.randomUUID()),
21
+ name: text("name").notNull(),
22
+ description: text("description"),
23
+ /** "enabled" | "disabled" */
24
+ status: text("status").notNull().default("enabled"),
25
+ /** Validated AutomationDefinition (zod-checked on write). */
26
+ definition: jsonb("definition")
27
+ .notNull()
28
+ .$type<Record<string, unknown>>(),
29
+ /**
30
+ * Origin marker. Set to "migrated-subscription:<id>" for rows produced
31
+ * by the webhook-subscription auto-migration, or "gitops:<provider>"
32
+ * for declaratively managed automations.
33
+ */
34
+ managedBy: text("managed_by"),
35
+ createdAt: timestamp("created_at").defaultNow().notNull(),
36
+ updatedAt: timestamp("updated_at").defaultNow().notNull(),
37
+ },
38
+ (t) => ({
39
+ statusIdx: index("automations_status_idx").on(t.status),
40
+ managedByIdx: index("automations_managed_by_idx").on(t.managedBy),
41
+ }),
42
+ );
43
+
44
+ /**
45
+ * Automation runs — one row per dispatch. Captures the originating
46
+ * trigger, the payload it carried, and the durable context key for the
47
+ * run scope (typically `incidentId`).
48
+ */
49
+ export const automationRuns = pgTable(
50
+ "automation_runs",
51
+ {
52
+ id: text("id")
53
+ .primaryKey()
54
+ .$defaultFn(() => crypto.randomUUID()),
55
+ automationId: text("automation_id")
56
+ .notNull()
57
+ .references(() => automations.id, { onDelete: "cascade" }),
58
+ /** Discriminator chosen at definition time. */
59
+ triggerId: text("trigger_id").notNull(),
60
+ /** Fully qualified event id (e.g. "incident.incident.created"). */
61
+ triggerEventId: text("trigger_event_id").notNull(),
62
+ triggerPayload: jsonb("trigger_payload")
63
+ .notNull()
64
+ .$type<Record<string, unknown>>(),
65
+ /** Durable key linking the run to a domain entity. Nullable. */
66
+ contextKey: text("context_key"),
67
+ /** "pending" | "running" | "waiting" | "success" | "failed" | "cancelled" | "skipped" */
68
+ status: text("status").notNull().default("pending"),
69
+ errorMessage: text("error_message"),
70
+ startedAt: timestamp("started_at").defaultNow().notNull(),
71
+ finishedAt: timestamp("finished_at"),
72
+ },
73
+ (t) => ({
74
+ automationIdx: index("automation_runs_automation_idx").on(
75
+ t.automationId,
76
+ t.startedAt,
77
+ ),
78
+ statusIdx: index("automation_runs_status_idx").on(t.status),
79
+ contextKeyIdx: index("automation_runs_context_key_idx").on(
80
+ t.automationId,
81
+ t.contextKey,
82
+ ),
83
+ }),
84
+ );
85
+
86
+ /**
87
+ * Run steps — one row per attempted action node. `actionPath` is the
88
+ * hierarchical path inside the action tree (e.g.
89
+ * "actions[0].choose[1].then[2]") so we can correlate logs back to the
90
+ * editor card.
91
+ */
92
+ export const automationRunSteps = pgTable(
93
+ "automation_run_steps",
94
+ {
95
+ id: text("id")
96
+ .primaryKey()
97
+ .$defaultFn(() => crypto.randomUUID()),
98
+ runId: text("run_id")
99
+ .notNull()
100
+ .references(() => automationRuns.id, { onDelete: "cascade" }),
101
+ actionPath: text("action_path").notNull(),
102
+ /** Operator-assigned action id, if any. */
103
+ actionId: text("action_id"),
104
+ /** Discriminator kind: "action" | "choose" | "parallel" | … */
105
+ actionKind: text("action_kind").notNull(),
106
+ /** Fully qualified action id for provider-action steps. */
107
+ providerActionId: text("provider_action_id"),
108
+ /** "pending" | "running" | "success" | "failed" | "skipped" | "waiting" */
109
+ status: text("status").notNull().default("pending"),
110
+ attempts: integer("attempts").notNull().default(0),
111
+ errorMessage: text("error_message"),
112
+ resultPayload: jsonb("result_payload").$type<Record<string, unknown>>(),
113
+ startedAt: timestamp("started_at").defaultNow().notNull(),
114
+ finishedAt: timestamp("finished_at"),
115
+ },
116
+ (t) => ({
117
+ runIdx: index("automation_run_steps_run_idx").on(t.runId),
118
+ }),
119
+ );
120
+
121
+ /**
122
+ * Artifacts — typed payloads actions persist for downstream lookup.
123
+ *
124
+ * Lookup keys:
125
+ * - `(automationId, contextKey, artifactType)` — "give me the most recent
126
+ * jira.issue artifact for incident X in automation Y"
127
+ * - `(automationId, actionId)` — "give me what action Z produced"
128
+ */
129
+ export const automationArtifacts = pgTable(
130
+ "automation_artifacts",
131
+ {
132
+ id: text("id")
133
+ .primaryKey()
134
+ .$defaultFn(() => crypto.randomUUID()),
135
+ automationId: text("automation_id")
136
+ .notNull()
137
+ .references(() => automations.id, { onDelete: "cascade" }),
138
+ runId: text("run_id")
139
+ .notNull()
140
+ .references(() => automationRuns.id, { onDelete: "cascade" }),
141
+ stepId: text("step_id")
142
+ .notNull()
143
+ .references(() => automationRunSteps.id, { onDelete: "cascade" }),
144
+ /** Operator-assigned action id, if any. */
145
+ actionId: text("action_id"),
146
+ /** Fully qualified artifact type (e.g. "jira.issue"). */
147
+ artifactType: text("artifact_type").notNull(),
148
+ /** Free-form artifact data — validated against the type's schema on write. */
149
+ data: jsonb("data").notNull().$type<Record<string, unknown>>(),
150
+ /** Durable lookup key (typically `incidentId`). */
151
+ contextKey: text("context_key"),
152
+ /** Set when a downstream close action marks the artifact resolved. */
153
+ closedAt: timestamp("closed_at"),
154
+ createdAt: timestamp("created_at").defaultNow().notNull(),
155
+ },
156
+ (t) => ({
157
+ /**
158
+ * Powers "find the most recent jira.issue for incident X" — the
159
+ * default lookup when an action declares `consumes`.
160
+ */
161
+ contextLookupIdx: index(
162
+ "automation_artifacts_context_lookup_idx",
163
+ ).on(t.automationId, t.contextKey, t.artifactType, t.createdAt),
164
+ /** Powers per-action lookup (`artifacts.<actionId>` references). */
165
+ actionLookupIdx: index("automation_artifacts_action_lookup_idx").on(
166
+ t.automationId,
167
+ t.actionId,
168
+ t.createdAt,
169
+ ),
170
+ /** Used by close actions to filter "still open" artifacts. */
171
+ openIdx: index("automation_artifacts_open_idx").on(
172
+ t.automationId,
173
+ t.closedAt,
174
+ ),
175
+ }),
176
+ );
177
+
178
+ /**
179
+ * Wait locks — durable bookkeeping for any kind of suspended run.
180
+ *
181
+ * - `kind = "trigger"`: classic `wait_for_trigger`. Resumed by the
182
+ * trigger fan-in when a matching event arrives.
183
+ * - `kind = "delay"`: durable replacement for `setTimeout`. The dispatch
184
+ * engine persists the lock and enqueues a `automation-delay` queue
185
+ * job with `startDelay`; the queue consumer resumes the run on
186
+ * firing. The stalled-run sweeper also catches expired delay locks
187
+ * in case the queue job is lost.
188
+ *
189
+ * Either kind survives process restarts and horizontal scaling: the
190
+ * lock is the source of truth, the queue / hook is the wake-up signal.
191
+ */
192
+ export const automationWaitLocks = pgTable(
193
+ "automation_wait_locks",
194
+ {
195
+ id: text("id")
196
+ .primaryKey()
197
+ .$defaultFn(() => crypto.randomUUID()),
198
+ runId: text("run_id")
199
+ .notNull()
200
+ .references(() => automationRuns.id, { onDelete: "cascade" }),
201
+ /** Action path of the suspended node — used to resume from the next sibling. */
202
+ actionPath: text("action_path").notNull(),
203
+ /** Discriminator: "trigger" (wait_for_trigger) or "delay" (queue-backed sleep). */
204
+ kind: text("kind").notNull().default("trigger"),
205
+ /** Fully qualified event id being awaited (only meaningful when kind = "trigger"). */
206
+ eventId: text("event_id").notNull(),
207
+ /** Optional context-key filter (e.g. same incidentId). */
208
+ contextKey: text("context_key"),
209
+ /** Optional template that must evaluate truthy on the arriving payload. */
210
+ filterTemplate: text("filter_template"),
211
+ /**
212
+ * Absolute deadline. For `kind = "trigger"`: nullable; if set, the
213
+ * sweeper fails the run when exceeded. For `kind = "delay"`:
214
+ * required; the firing time after which the run should resume even
215
+ * if the queue job is lost.
216
+ */
217
+ timeoutAt: timestamp("timeout_at"),
218
+ createdAt: timestamp("created_at").defaultNow().notNull(),
219
+ },
220
+ (t) => ({
221
+ /** Powers "any wait locks for this incoming event + context?" */
222
+ eventLookupIdx: index("automation_wait_locks_event_lookup_idx").on(
223
+ t.eventId,
224
+ t.contextKey,
225
+ ),
226
+ /** Powers periodic timeout sweep. */
227
+ timeoutIdx: index("automation_wait_locks_timeout_idx").on(t.timeoutAt),
228
+ /** Powers the run-detail UI's "what are we waiting on?" view. */
229
+ runIdx: index("automation_wait_locks_run_idx").on(t.runId),
230
+ }),
231
+ );
232
+
233
+ /**
234
+ * Per-run durable execution state.
235
+ *
236
+ * Updated after every successfully-completed step (and again on
237
+ * suspension) so a future process can resume a run from exactly where
238
+ * the prior process left off.
239
+ *
240
+ * - `scopeSnapshot` — JSON-encoded variable scope (`trigger`, `variables`,
241
+ * `artifacts`, plus helpers) sufficient to seed a fresh
242
+ * `DispatchContext` on resume.
243
+ * - `lastActionPath` — path of the most recently completed action; the
244
+ * walker resumes from the next sibling after this.
245
+ * - `lastHeartbeatAt` — bumped on every step write. The stalled-run
246
+ * sweeper uses this to identify runs that look dead.
247
+ *
248
+ * One row per active or waiting run. Cleared on terminal status.
249
+ */
250
+ export const automationRunState = pgTable(
251
+ "automation_run_state",
252
+ {
253
+ runId: text("run_id")
254
+ .primaryKey()
255
+ .references(() => automationRuns.id, { onDelete: "cascade" }),
256
+ scopeSnapshot: jsonb("scope_snapshot")
257
+ .notNull()
258
+ .$type<Record<string, unknown>>(),
259
+ lastActionPath: text("last_action_path"),
260
+ lastHeartbeatAt: timestamp("last_heartbeat_at").defaultNow().notNull(),
261
+ updatedAt: timestamp("updated_at").defaultNow().notNull(),
262
+ },
263
+ (t) => ({
264
+ /**
265
+ * Powers the stalled-run sweeper: scan runs whose heartbeat is older
266
+ * than the threshold.
267
+ */
268
+ heartbeatIdx: index("automation_run_state_heartbeat_idx").on(
269
+ t.lastHeartbeatAt,
270
+ ),
271
+ }),
272
+ );
273
+
274
+ /**
275
+ * Subscription-migration failures.
276
+ *
277
+ * Surfaces every webhook_subscription row the one-time migration
278
+ * couldn't convert into an automation. Operators see them via the
279
+ * `listMigrationFailures` RPC + acknowledge them once the source
280
+ * subscription is recreated as an automation. Acknowledged rows are
281
+ * dropped so the queue is self-cleaning.
282
+ *
283
+ * The table is empty under normal operation — its presence costs
284
+ * nothing on greenfield installs.
285
+ */
286
+ export const automationMigrationFailures = pgTable(
287
+ "automation_migration_failures",
288
+ {
289
+ id: text("id")
290
+ .primaryKey()
291
+ .$defaultFn(() => crypto.randomUUID()),
292
+ /** Source `webhook_subscriptions.id`. */
293
+ subscriptionId: text("subscription_id").notNull().unique(),
294
+ /** Subscription `name`, captured for the admin UI. */
295
+ subscriptionName: text("subscription_name").notNull(),
296
+ /** Source `providerId` (e.g. `integration-teams.teams`). */
297
+ providerId: text("provider_id").notNull(),
298
+ /** Source `eventId` the subscription was bound to. */
299
+ eventId: text("event_id").notNull(),
300
+ /** Short reason (a code or one-line summary). */
301
+ reason: text("reason").notNull(),
302
+ /** Detailed error message captured at migration time. */
303
+ detail: text("detail"),
304
+ /** Original `providerConfig` JSON for postmortem / manual rebuild. */
305
+ providerConfig: jsonb("provider_config")
306
+ .notNull()
307
+ .$type<Record<string, unknown>>(),
308
+ createdAt: timestamp("created_at").defaultNow().notNull(),
309
+ },
310
+ );
@@ -0,0 +1,99 @@
1
+ import type { PluginMetadata } from "@checkstack/common";
2
+ import { toJsonSchema } from "@checkstack/backend-api";
3
+ import type {
4
+ RegisteredTrigger,
5
+ TriggerDefinition,
6
+ } from "./action-types";
7
+
8
+ /**
9
+ * Registry for automation triggers. Plugins register triggers here through
10
+ * `automationTriggerExtensionPoint`; the dispatch engine reads from this
11
+ * registry in `afterPluginsReady` to wire up hook subscriptions and custom
12
+ * setups.
13
+ */
14
+ export interface TriggerRegistry {
15
+ register<TPayload, TConfig = void>(
16
+ definition: TriggerDefinition<TPayload, TConfig>,
17
+ pluginMetadata: PluginMetadata,
18
+ ): void;
19
+
20
+ /** Get every registered trigger. */
21
+ getTriggers(): RegisteredTrigger[];
22
+ /** Look up a trigger by its fully qualified id. */
23
+ getTrigger(qualifiedId: string): RegisteredTrigger | undefined;
24
+ /** Group triggers by their category for UI listings. */
25
+ getTriggersByCategory(): Map<string, RegisteredTrigger[]>;
26
+ hasTrigger(qualifiedId: string): boolean;
27
+ }
28
+
29
+ export function createTriggerRegistry(): TriggerRegistry {
30
+ const triggers = new Map<string, RegisteredTrigger>();
31
+
32
+ return {
33
+ register<TPayload, TConfig = void>(
34
+ definition: TriggerDefinition<TPayload, TConfig>,
35
+ pluginMetadata: PluginMetadata,
36
+ ): void {
37
+ const qualifiedId = `${pluginMetadata.pluginId}.${definition.id}`;
38
+ if (triggers.has(qualifiedId)) {
39
+ throw new Error(
40
+ `Trigger ${qualifiedId} already registered — likely a duplicate registration in ${pluginMetadata.pluginId}.`,
41
+ );
42
+ }
43
+
44
+ // Defence in depth: a trigger must be reachable somehow.
45
+ if (!definition.hook && !definition.setup) {
46
+ throw new Error(
47
+ `Trigger ${qualifiedId} has neither a hook nor a setup callback; it cannot fire.`,
48
+ );
49
+ }
50
+
51
+ const payloadJsonSchema = toJsonSchema(definition.payloadSchema);
52
+ const configJsonSchema = definition.configSchema
53
+ ? toJsonSchema(definition.configSchema)
54
+ : undefined;
55
+
56
+ const registered: RegisteredTrigger<TPayload, TConfig> = {
57
+ id: definition.id,
58
+ displayName: definition.displayName,
59
+ description: definition.description,
60
+ category: definition.category ?? "Uncategorized",
61
+ icon: definition.icon,
62
+ payloadSchema: definition.payloadSchema,
63
+ configSchema: definition.configSchema,
64
+ contextKey: definition.contextKey,
65
+ hook: definition.hook,
66
+ setup: definition.setup,
67
+ qualifiedId,
68
+ ownerPluginId: pluginMetadata.pluginId,
69
+ payloadJsonSchema,
70
+ configJsonSchema,
71
+ };
72
+
73
+ triggers.set(qualifiedId, registered as RegisteredTrigger);
74
+ },
75
+
76
+ getTriggers(): RegisteredTrigger[] {
77
+ return [...triggers.values()];
78
+ },
79
+
80
+ getTrigger(qualifiedId: string): RegisteredTrigger | undefined {
81
+ return triggers.get(qualifiedId);
82
+ },
83
+
84
+ getTriggersByCategory(): Map<string, RegisteredTrigger[]> {
85
+ const byCategory = new Map<string, RegisteredTrigger[]>();
86
+ for (const trigger of triggers.values()) {
87
+ const category = trigger.category ?? "Uncategorized";
88
+ const existing = byCategory.get(category) ?? [];
89
+ existing.push(trigger);
90
+ byCategory.set(category, existing);
91
+ }
92
+ return byCategory;
93
+ },
94
+
95
+ hasTrigger(qualifiedId: string): boolean {
96
+ return triggers.has(qualifiedId);
97
+ },
98
+ };
99
+ }