@checkstack/automation-backend 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/CHANGELOG.md +544 -0
  2. package/drizzle/0003_sparkling_xorn.sql +17 -0
  3. package/drizzle/0004_cultured_spyke.sql +2 -0
  4. package/drizzle/0005_classy_the_hand.sql +19 -0
  5. package/drizzle/0006_burly_wallop.sql +10 -0
  6. package/drizzle/0007_nappy_jackal.sql +1 -0
  7. package/drizzle/0008_remove_seeded_auto_incident_automations.sql +13 -0
  8. package/drizzle/0009_steady_liz_osborn.sql +12 -0
  9. package/drizzle/0010_chunky_changeling.sql +2 -0
  10. package/drizzle/meta/0003_snapshot.json +1007 -0
  11. package/drizzle/meta/0004_snapshot.json +1028 -0
  12. package/drizzle/meta/0005_snapshot.json +1164 -0
  13. package/drizzle/meta/0006_snapshot.json +1261 -0
  14. package/drizzle/meta/0007_snapshot.json +1215 -0
  15. package/drizzle/meta/0008_snapshot.json +1215 -0
  16. package/drizzle/meta/0009_snapshot.json +1328 -0
  17. package/drizzle/meta/0010_snapshot.json +1349 -0
  18. package/drizzle/meta/_journal.json +56 -0
  19. package/package.json +23 -12
  20. package/src/action-types.ts +23 -0
  21. package/src/artifact-store.ts +16 -1
  22. package/src/automation-store.test.ts +143 -0
  23. package/src/automation-store.ts +30 -8
  24. package/src/builtin-triggers.test.ts +77 -74
  25. package/src/builtin-triggers.ts +105 -108
  26. package/src/dispatch/action-kind.ts +2 -0
  27. package/src/dispatch/assemble-get-service.ts +31 -0
  28. package/src/dispatch/cancel-resurrect.test.ts +147 -0
  29. package/src/dispatch/concurrency-race.test.ts +255 -0
  30. package/src/dispatch/concurrency-scope.test.ts +166 -0
  31. package/src/dispatch/condition.ts +24 -5
  32. package/src/dispatch/dwell-queue.ts +65 -0
  33. package/src/dispatch/dwell-store.ts +154 -0
  34. package/src/dispatch/dwell.it.test.ts +142 -0
  35. package/src/dispatch/dwell.test.ts +799 -0
  36. package/src/dispatch/dwell.ts +257 -0
  37. package/src/dispatch/engine.test.ts +189 -2
  38. package/src/dispatch/engine.ts +555 -9
  39. package/src/dispatch/entity-scope.test.ts +176 -0
  40. package/src/dispatch/get-service-wiring.test.ts +318 -0
  41. package/src/dispatch/numeric.test.ts +71 -0
  42. package/src/dispatch/numeric.ts +96 -0
  43. package/src/dispatch/render.test.ts +34 -0
  44. package/src/dispatch/render.ts +31 -11
  45. package/src/dispatch/reseed-run-secrets.ts +230 -0
  46. package/src/dispatch/run-secret-registry.test.ts +189 -0
  47. package/src/dispatch/run-secret-registry.ts +247 -0
  48. package/src/dispatch/run-state-masking.test.ts +376 -0
  49. package/src/dispatch/run-state-store.ts +95 -38
  50. package/src/dispatch/run-state.ts +226 -59
  51. package/src/dispatch/scope-artifact-masking.test.ts +138 -0
  52. package/src/dispatch/secret-ref-ids.test.ts +19 -0
  53. package/src/dispatch/secret-ref-ids.ts +17 -0
  54. package/src/dispatch/snapshots.test.ts +86 -0
  55. package/src/dispatch/snapshots.ts +79 -0
  56. package/src/dispatch/stage1-router.test.ts +324 -0
  57. package/src/dispatch/stage1-router.ts +152 -0
  58. package/src/dispatch/stage1.it.test.ts +84 -0
  59. package/src/dispatch/stage2-dispatch.test.ts +285 -0
  60. package/src/dispatch/stage2-dispatch.ts +207 -0
  61. package/src/dispatch/stage2-stalled.it.test.ts +132 -0
  62. package/src/dispatch/stalled-sweeper.test.ts +197 -0
  63. package/src/dispatch/stalled-sweeper.ts +112 -5
  64. package/src/dispatch/state-scope.test.ts +234 -0
  65. package/src/dispatch/state-scope.ts +322 -0
  66. package/src/dispatch/structured-conditions.test.ts +246 -0
  67. package/src/dispatch/structured-conditions.ts +146 -0
  68. package/src/dispatch/test-fixtures.ts +306 -38
  69. package/src/dispatch/trigger-fanin.test.ts +111 -0
  70. package/src/dispatch/trigger-subscriber.ts +316 -14
  71. package/src/dispatch/types.ts +263 -8
  72. package/src/dispatch/wait-timeout-queue.ts +89 -0
  73. package/src/dispatch/wait-until-entity-wake.test.ts +544 -0
  74. package/src/dispatch/wait-until.test.ts +540 -0
  75. package/src/dispatch/wake-refs.test.ts +158 -0
  76. package/src/dispatch/wake-refs.ts +348 -0
  77. package/src/dispatch/window-gate.test.ts +513 -0
  78. package/src/dispatch/window-store.test.ts +162 -0
  79. package/src/dispatch/window-store.ts +102 -0
  80. package/src/entity/change-derivers.test.ts +148 -0
  81. package/src/entity/change-derivers.ts +143 -0
  82. package/src/entity/change-emitter.test.ts +66 -0
  83. package/src/entity/change-emitter.ts +76 -0
  84. package/src/entity/create-handle.ts +344 -0
  85. package/src/entity/cross-pod-read-consistency.it.test.ts +281 -0
  86. package/src/entity/define-entity.ts +157 -0
  87. package/src/entity/diff.test.ts +57 -0
  88. package/src/entity/diff.ts +54 -0
  89. package/src/entity/entity-store.test.ts +30 -0
  90. package/src/entity/entity-store.ts +171 -0
  91. package/src/entity/extension-point.ts +56 -0
  92. package/src/entity/fake-entity-store.ts +130 -0
  93. package/src/entity/hook.ts +19 -0
  94. package/src/entity/index.ts +50 -0
  95. package/src/entity/mutate-handle.test.ts +517 -0
  96. package/src/entity/on-entity-changed.test.ts +189 -0
  97. package/src/entity/on-entity-changed.ts +214 -0
  98. package/src/entity/registry.test.ts +181 -0
  99. package/src/entity/registry.ts +200 -0
  100. package/src/entity/stable-stringify.test.ts +55 -0
  101. package/src/entity/stable-stringify.ts +49 -0
  102. package/src/entity/wake-index.it.test.ts +251 -0
  103. package/src/entity/with-entity-write.test.ts +100 -0
  104. package/src/entity/with-entity-write.ts +69 -0
  105. package/src/entity-driven-trigger.ts +46 -0
  106. package/src/extension-points.ts +35 -0
  107. package/src/gitops-docs.test.ts +215 -0
  108. package/src/gitops-docs.ts +151 -0
  109. package/src/gitops-kinds.test.ts +174 -0
  110. package/src/gitops-kinds.ts +137 -0
  111. package/src/index.ts +355 -11
  112. package/src/migration/flapping-to-window.test.ts +123 -0
  113. package/src/migration/flapping-to-window.ts +205 -0
  114. package/src/router.test.ts +182 -1
  115. package/src/router.ts +73 -2
  116. package/src/schema.ts +236 -3
  117. package/src/script-test-replay.test.ts +88 -0
  118. package/src/script-test-replay.ts +100 -0
  119. package/src/script-test-shell-env.test.ts +41 -0
  120. package/src/script-test-shell-env.ts +89 -0
  121. package/src/script-test.test.ts +386 -0
  122. package/src/script-test.ts +258 -0
  123. package/src/trigger-registry.ts +2 -0
  124. package/src/validate-definition.test.ts +1 -0
  125. package/tsconfig.json +24 -0
@@ -21,10 +21,14 @@ import type {
21
21
  import { SYSTEM_ACTOR, type Actor } from "@checkstack/common";
22
22
 
23
23
  import type { AutomationStore } from "../automation-store";
24
+ import { evaluate, parseCondition } from "@checkstack/template-engine";
25
+
24
26
  import { dispatchTrigger, resumeRun } from "./engine";
25
27
  import { evaluateCondition } from "./condition";
26
28
  import { renderString } from "./render";
27
29
  import { buildInitialScope } from "./scope";
30
+ import { enrichScopeWithState } from "./state-scope";
31
+ import { armDwell, type StartRunFromDwell } from "./dwell";
28
32
  import type {
29
33
  DispatchDeps,
30
34
  LoadedAutomation,
@@ -147,7 +151,7 @@ export async function setupTriggerSubscriptions(
147
151
  };
148
152
  }
149
153
 
150
- interface HandleTriggerFiringArgs {
154
+ export interface HandleTriggerFiringArgs {
151
155
  deps: DispatchDeps;
152
156
  automationStore: AutomationStore;
153
157
  qualifiedEventId: string;
@@ -156,7 +160,7 @@ interface HandleTriggerFiringArgs {
156
160
  contextKey: string | null;
157
161
  }
158
162
 
159
- async function handleTriggerFiring(
163
+ export async function handleTriggerFiring(
160
164
  args: HandleTriggerFiringArgs,
161
165
  ): Promise<void> {
162
166
  // ── Step 1: resume any waiting runs ──
@@ -182,6 +186,65 @@ async function handleTriggerFiring(
182
186
  });
183
187
  }
184
188
  }
189
+
190
+ // ── Step 3: eager inverse-cancel ──
191
+ // A state-change event may be the natural inverse of an armed dwell
192
+ // (e.g. `system.healthy` cancels a `system.degraded` + for: dwell on
193
+ // the same automation + system). The expiry re-confirm would catch
194
+ // this anyway, but cancelling now deletes the dwell row so its queue
195
+ // job no-ops promptly instead of waking and re-checking later.
196
+ await cancelStaleDwells(args);
197
+ }
198
+
199
+ /**
200
+ * For every automation referencing the firing event with a `for:` dwell
201
+ * armed on the same context key, re-confirm the system's current status;
202
+ * if it no longer matches the dwell's `armedStatus`, cancel the dwell.
203
+ * Bounded to the matching automations and skipped entirely when no
204
+ * health client is wired (nothing to re-confirm against).
205
+ */
206
+ async function cancelStaleDwells(
207
+ args: HandleTriggerFiringArgs,
208
+ ): Promise<void> {
209
+ const client = args.deps.healthCheckClient;
210
+ if (!client || args.contextKey === null) return;
211
+
212
+ const matches = await args.automationStore.findEnabledByTriggerEvent(
213
+ args.qualifiedEventId,
214
+ );
215
+
216
+ let currentStatus: string | undefined;
217
+ for (const automation of matches) {
218
+ for (const trigger of automation.definition.triggers) {
219
+ if (!trigger.for) continue;
220
+ const triggerId = trigger.id ?? deriveTriggerId(trigger);
221
+ const dwell = await args.deps.dwellStore.findByKey(
222
+ automation.id,
223
+ triggerId,
224
+ args.contextKey,
225
+ );
226
+ if (!dwell || dwell.armedStatus === null) continue;
227
+
228
+ // Resolve current status once per firing (cheap memoised lookup).
229
+ if (currentStatus === undefined) {
230
+ try {
231
+ const state = await client.getHealthState({
232
+ systemId: args.contextKey,
233
+ });
234
+ currentStatus = state.status;
235
+ } catch {
236
+ return; // can't re-confirm — leave the dwell for expiry.
237
+ }
238
+ }
239
+
240
+ if (currentStatus !== dwell.armedStatus) {
241
+ await args.deps.dwellStore.delete(dwell.id);
242
+ args.deps.logger.debug(
243
+ `Cancelled dwell ${dwell.id} (${automation.id}/${triggerId}): system ${args.contextKey} left status "${dwell.armedStatus}" (now "${currentStatus}")`,
244
+ );
245
+ }
246
+ }
247
+ }
185
248
  }
186
249
 
187
250
  async function wakeWaitingRuns(args: HandleTriggerFiringArgs): Promise<void> {
@@ -202,6 +265,12 @@ async function wakeWaitingRuns(args: HandleTriggerFiringArgs): Promise<void> {
202
265
  actor: args.actor,
203
266
  startedAt: new Date(),
204
267
  });
268
+ await enrichScopeWithState({
269
+ scope: ctx,
270
+ client: args.deps.healthCheckClient,
271
+ logger: args.deps.logger,
272
+ contextKey: args.contextKey,
273
+ });
205
274
  const pass = evaluateCondition(
206
275
  lock.filterTemplate,
207
276
  ctx,
@@ -237,6 +306,39 @@ async function wakeWaitingRuns(args: HandleTriggerFiringArgs): Promise<void> {
237
306
  }
238
307
  }
239
308
 
309
+ /**
310
+ * Stage-2 entry (reactive automation engine §13.3): start fresh runs for ONE
311
+ * already-resolved automation whose trigger references `eventId`, using the
312
+ * entity-change as the trigger payload. Mirrors the per-automation inner of
313
+ * `handleTriggerFiring` step 2, but scoped to a single automation so the
314
+ * Stage-2 fan-out job (one automation + one entity change) runs in isolation.
315
+ *
316
+ * Each matching trigger goes through `maybeStartRun` (config gate, filter,
317
+ * dwell, concurrency mode) exactly as the hook-driven path does.
318
+ */
319
+ export async function startRunsForAutomationEvent(args: {
320
+ deps: DispatchDeps;
321
+ automation: LoadedAutomation;
322
+ eventId: string;
323
+ triggerPayload: Record<string, unknown>;
324
+ actor: Actor;
325
+ contextKey: string | null;
326
+ }): Promise<void> {
327
+ for (const trigger of args.automation.definition.triggers.filter(
328
+ (t) => t.event === args.eventId,
329
+ )) {
330
+ await maybeStartRun({
331
+ deps: args.deps,
332
+ automation: args.automation,
333
+ trigger,
334
+ triggerPayload: args.triggerPayload,
335
+ actor: args.actor,
336
+ contextKey: args.contextKey,
337
+ eventId: args.eventId,
338
+ });
339
+ }
340
+ }
341
+
240
342
  interface MaybeStartRunArgs {
241
343
  deps: DispatchDeps;
242
344
  automation: LoadedAutomation;
@@ -248,20 +350,51 @@ interface MaybeStartRunArgs {
248
350
  }
249
351
 
250
352
  async function maybeStartRun(args: MaybeStartRunArgs): Promise<void> {
251
- // Trigger-level filter check.
353
+ // Structured config gate (e.g. numeric_state's above/below threshold).
354
+ // Runs before the operator's template filter. A registered trigger that
355
+ // declares `evaluateConfig` decides per-automation whether this payload
356
+ // fires, using the trigger's typed `config`.
357
+ const registered = args.deps.registries.triggers.getTrigger(args.eventId);
358
+ if (registered?.evaluateConfig) {
359
+ let pass: boolean;
360
+ try {
361
+ pass = registered.evaluateConfig(
362
+ args.triggerPayload,
363
+ args.trigger.config,
364
+ );
365
+ } catch (error) {
366
+ args.deps.logger.warn(
367
+ `Trigger config gate threw; skipping firing: ${(error as Error).message}`,
368
+ );
369
+ return;
370
+ }
371
+ if (!pass) return;
372
+ }
373
+
374
+ // Trigger-level filter gates BOTH the immediate run and arming a dwell.
375
+ // (Conditions, by contrast, gate the run itself and are evaluated at
376
+ // fire time so a dwell re-checks them after the duration.)
252
377
  if (args.trigger.filter) {
253
- const ctx = buildInitialScope({
378
+ const filterScope = buildInitialScope({
254
379
  triggerId: args.trigger.id ?? deriveTriggerId(args.trigger),
255
380
  triggerEventId: args.eventId,
256
381
  payload: args.triggerPayload,
257
382
  actor: args.actor,
258
383
  startedAt: new Date(),
259
384
  });
385
+ await enrichScopeWithState({
386
+ scope: filterScope,
387
+ client: args.deps.healthCheckClient,
388
+ logger: args.deps.logger,
389
+ contextKey: args.contextKey,
390
+ usesState: args.automation.definition.uses_state,
391
+ transitionWindowMinutes: args.automation.definition.state_window_minutes,
392
+ });
260
393
  let pass: boolean;
261
394
  try {
262
395
  pass = evaluateCondition(
263
396
  args.trigger.filter,
264
- ctx,
397
+ filterScope,
265
398
  args.deps.filters,
266
399
  );
267
400
  } catch (error) {
@@ -273,20 +406,100 @@ async function maybeStartRun(args: MaybeStartRunArgs): Promise<void> {
273
406
  if (!pass) return;
274
407
  }
275
408
 
276
- // Top-level conditions gate the run.
277
- if (args.automation.definition.conditions.length > 0) {
278
- const ctx = buildInitialScope({
409
+ // Windowed-count / rate gate — runs AFTER the structured config gate + the
410
+ // operator's `filter` (so only QUALIFYING occurrences count) and BEFORE the
411
+ // `for:` dwell (so a window can compose with a dwell). Records this
412
+ // occurrence in the durable append log and counts rows in the trailing
413
+ // window; fires per the re-fire policy.
414
+ //
415
+ // Cross-pod: the work-queue claim gives exactly one INSERT per emission, and
416
+ // the COUNT is a pure DB read, so every pod agrees on whether the threshold
417
+ // was crossed (state-and-scale rule). No process-local state.
418
+ if (args.trigger.window) {
419
+ // Partition key the count buckets by. Defaults to the trigger's built-in
420
+ // context key (e.g. systemId); `partitionBy` overrides it with a bare
421
+ // expression evaluated against the SAME scope `filter` uses. An
422
+ // empty/undefined result or an eval error falls back to the built-in key
423
+ // (never accidental global counting).
424
+ const partitionKey = await resolvePartitionKey(args);
425
+ let fired: boolean;
426
+ try {
427
+ fired = await args.deps.windowStore.recordAndCount({
428
+ automationId: args.automation.id,
429
+ triggerId: args.trigger.id ?? deriveTriggerId(args.trigger),
430
+ eventId: args.eventId,
431
+ contextKey: partitionKey,
432
+ occurredAt: new Date(),
433
+ windowMinutes: args.trigger.window.minutes,
434
+ threshold: args.trigger.window.count,
435
+ refire: args.trigger.window.refire,
436
+ });
437
+ } catch (error) {
438
+ args.deps.logger.warn(
439
+ `Trigger window gate failed; skipping firing: ${(error as Error).message}`,
440
+ );
441
+ return;
442
+ }
443
+ if (!fired) return;
444
+ }
445
+
446
+ // `for:` dwell — arm (or re-arm) instead of starting the run now. The
447
+ // run starts only if the matched state still holds after the duration.
448
+ if (args.trigger.for) {
449
+ await armDwell({
450
+ deps: args.deps,
451
+ automation: args.automation,
452
+ trigger: args.trigger,
279
453
  triggerId: args.trigger.id ?? deriveTriggerId(args.trigger),
454
+ eventId: args.eventId,
455
+ contextKey: args.contextKey,
456
+ triggerPayload: args.triggerPayload,
457
+ actor: args.actor,
458
+ });
459
+ return;
460
+ }
461
+
462
+ await startRunRespectingMode({
463
+ deps: args.deps,
464
+ automation: args.automation,
465
+ trigger: args.trigger,
466
+ triggerId: args.trigger.id ?? deriveTriggerId(args.trigger),
467
+ eventId: args.eventId,
468
+ contextKey: args.contextKey,
469
+ triggerPayload: args.triggerPayload,
470
+ actor: args.actor,
471
+ });
472
+ }
473
+
474
+ /**
475
+ * Evaluate the automation's pre-run conditions (against freshly-enriched
476
+ * scope) and, if they pass, dispatch a run honouring the concurrency
477
+ * mode. Shared by the immediate trigger path and the dwell-fire path
478
+ * (so a dwell re-checks conditions at expiry, not at arm time).
479
+ */
480
+ export const startRunRespectingMode: StartRunFromDwell = async (args) => {
481
+ // Top-level conditions gate the run, evaluated against enriched scope.
482
+ if (args.automation.definition.conditions.length > 0) {
483
+ const gateScope = buildInitialScope({
484
+ triggerId: args.triggerId,
280
485
  triggerEventId: args.eventId,
281
486
  payload: args.triggerPayload,
282
487
  actor: args.actor,
283
488
  startedAt: new Date(),
284
489
  });
490
+ await enrichScopeWithState({
491
+ scope: gateScope,
492
+ client: args.deps.healthCheckClient,
493
+ logger: args.deps.logger,
494
+ contextKey: args.contextKey,
495
+ usesState: args.automation.definition.uses_state,
496
+ transitionWindowMinutes: args.automation.definition.state_window_minutes,
497
+ });
285
498
  for (const condition of args.automation.definition.conditions) {
286
499
  try {
287
500
  const pass = evaluateCondition(
288
501
  condition,
289
- ctx,
502
+ gateScope,
290
503
  args.deps.filters,
291
504
  );
292
505
  if (!pass) return;
@@ -301,14 +514,14 @@ async function maybeStartRun(args: MaybeStartRunArgs): Promise<void> {
301
514
  automationId: args.automation.id,
302
515
  mode: args.automation.definition.mode,
303
516
  maxRuns: args.automation.definition.max_runs,
304
- triggerId: args.trigger.id ?? deriveTriggerId(args.trigger),
517
+ triggerId: args.triggerId,
305
518
  triggerEventId: args.eventId,
306
519
  triggerPayload: args.triggerPayload,
307
520
  actor: args.actor,
308
521
  contextKey: args.contextKey,
309
522
  automation: args.automation,
310
523
  });
311
- }
524
+ };
312
525
 
313
526
  interface RespectConcurrencyArgs {
314
527
  deps: DispatchDeps;
@@ -325,10 +538,43 @@ interface RespectConcurrencyArgs {
325
538
 
326
539
  async function respectConcurrencyMode(
327
540
  args: RespectConcurrencyArgs,
541
+ ): Promise<void> {
542
+ // Per the automation's concurrency scope, the active-run bucket is
543
+ // either the whole automation (`undefined` → no context filter) or just
544
+ // the incoming context key. Passing `undefined` keeps the original
545
+ // per-automation behaviour for the default scope.
546
+ const scopeKey =
547
+ args.automation.definition.concurrency_scope === "context_key"
548
+ ? args.contextKey
549
+ : undefined;
550
+
551
+ // Serialize the check-then-create. Without a lock, two concurrent fires
552
+ // (two trigger events, a dwell-fire racing a fresh fire, or two pods) can
553
+ // both read "no active run" and both `dispatchTrigger`, double-running a
554
+ // `single`-mode automation. The lock is keyed on (automationId, scope) so
555
+ // it doesn't serialize unrelated automations or distinct context keys.
556
+ const lockKey = `automation.concurrency:${args.automationId}:${
557
+ scopeKey ?? "@@all"
558
+ }`;
559
+ const run = args.deps.withConcurrencyLock
560
+ ? <T>(fn: () => Promise<T>) => args.deps.withConcurrencyLock!(lockKey, fn)
561
+ : <T>(fn: () => Promise<T>) => fn();
562
+
563
+ await run(async () => {
564
+ await respectConcurrencyModeInner(args, scopeKey);
565
+ });
566
+ }
567
+
568
+ async function respectConcurrencyModeInner(
569
+ args: RespectConcurrencyArgs,
570
+ scopeKey: string | null | undefined,
328
571
  ): Promise<void> {
329
572
  switch (args.mode) {
330
573
  case "single": {
331
- const active = await args.deps.runStore.hasActiveRun(args.automationId);
574
+ const active = await args.deps.runStore.hasActiveRun(
575
+ args.automationId,
576
+ scopeKey,
577
+ );
332
578
  if (active) {
333
579
  args.deps.logger.debug(
334
580
  `Skipping trigger for ${args.automationId} — single mode and a run is active`,
@@ -338,7 +584,10 @@ async function respectConcurrencyMode(
338
584
  break;
339
585
  }
340
586
  case "parallel": {
341
- const count = await args.deps.runStore.countActiveRuns(args.automationId);
587
+ const count = await args.deps.runStore.countActiveRuns(
588
+ args.automationId,
589
+ scopeKey,
590
+ );
342
591
  if (count >= args.maxRuns) {
343
592
  args.deps.logger.debug(
344
593
  `Skipping trigger for ${args.automationId} — parallel limit reached (${count}/${args.maxRuns})`,
@@ -352,7 +601,10 @@ async function respectConcurrencyMode(
352
601
  // queueing requires its own coordination queue, which we add in a
353
602
  // follow-up. Behaviour stays correct (no double-fire) under the
354
603
  // existing work-queue mode.
355
- const count = await args.deps.runStore.countActiveRuns(args.automationId);
604
+ const count = await args.deps.runStore.countActiveRuns(
605
+ args.automationId,
606
+ scopeKey,
607
+ );
356
608
  if (count >= args.maxRuns) return;
357
609
  break;
358
610
  }
@@ -360,6 +612,7 @@ async function respectConcurrencyMode(
360
612
  const cancelled = await args.deps.runStore.cancelActiveRuns(
361
613
  args.automationId,
362
614
  "restart — superseded by newer trigger",
615
+ scopeKey,
363
616
  );
364
617
  if (cancelled.length > 0) {
365
618
  args.deps.logger.debug(
@@ -386,6 +639,55 @@ async function respectConcurrencyMode(
386
639
  // is convenient for future filter expressions.
387
640
  void renderString;
388
641
 
642
+ /**
643
+ * Resolve the partition key the windowed-count gate buckets the occurrence
644
+ * count by.
645
+ *
646
+ * - No `window.partitionBy` → the trigger's built-in context key
647
+ * (`args.contextKey`, e.g. systemId). Existing behaviour, unchanged.
648
+ * - `window.partitionBy` set → evaluate it as a BARE expression (same flavour
649
+ * as `filter`, no `{{ }}`) against the SAME scope `filter` uses, then
650
+ * coerce the result to a string.
651
+ * - The evaluated value is null/undefined/empty, OR evaluation throws →
652
+ * fall back to `args.contextKey` (never accidental global counting). An
653
+ * eval error is logged, matching the gate's fail-open posture.
654
+ */
655
+ async function resolvePartitionKey(
656
+ args: MaybeStartRunArgs,
657
+ ): Promise<string | null> {
658
+ const expression = args.trigger.window?.partitionBy;
659
+ if (expression === undefined) return args.contextKey;
660
+
661
+ try {
662
+ const scope = buildInitialScope({
663
+ triggerId: args.trigger.id ?? deriveTriggerId(args.trigger),
664
+ triggerEventId: args.eventId,
665
+ payload: args.triggerPayload,
666
+ actor: args.actor,
667
+ startedAt: new Date(),
668
+ });
669
+ await enrichScopeWithState({
670
+ scope,
671
+ client: args.deps.healthCheckClient,
672
+ logger: args.deps.logger,
673
+ contextKey: args.contextKey,
674
+ usesState: args.automation.definition.uses_state,
675
+ transitionWindowMinutes: args.automation.definition.state_window_minutes,
676
+ });
677
+ const value = evaluate(parseCondition(expression), scope, {
678
+ filters: args.deps.filters,
679
+ });
680
+ if (value === null || value === undefined) return args.contextKey;
681
+ const key = String(value).trim();
682
+ return key.length > 0 ? key : args.contextKey;
683
+ } catch (error) {
684
+ args.deps.logger.warn(
685
+ `Trigger window partitionBy failed to evaluate; falling back to the built-in context key: ${(error as Error).message}`,
686
+ );
687
+ return args.contextKey;
688
+ }
689
+ }
690
+
389
691
  /**
390
692
  * Derive a stable trigger id from the trigger declaration when the
391
693
  * operator hasn't assigned one. Slugifies the event id; collisions