@hogsend/engine 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,721 @@
1
+ import { ConcurrencyLimitStrategy } from "@hatchet-dev/typescript-sdk/v1/index.js";
2
+ import type { JsonObject } from "@hatchet-dev/typescript-sdk/v1/types.js";
3
+ import {
4
+ type BucketMeta,
5
+ type ConditionEval,
6
+ type DurationObject,
7
+ durationToMs,
8
+ evaluateCondition,
9
+ } from "@hogsend/core";
10
+ import {
11
+ bucketMemberships,
12
+ contacts,
13
+ createDatabase,
14
+ type Database,
15
+ userEvents,
16
+ } from "@hogsend/db";
17
+ import {
18
+ and,
19
+ eq,
20
+ gte,
21
+ inArray,
22
+ isNotNull,
23
+ isNull,
24
+ lte,
25
+ sql,
26
+ } from "drizzle-orm";
27
+ import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
28
+ import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
29
+ import { emitBucketTransition } from "../lib/bucket-emit.js";
30
+ import { hatchet } from "../lib/hatchet.js";
31
+ import type { Logger } from "../lib/logger.js";
32
+ import { createLogger } from "../lib/logger.js";
33
+
34
+ /** Chunk size for the composite-only per-member re-evaluation path (Section 6.4). */
35
+ const BATCH_SIZE = 500;
36
+
37
+ /** The reserved prefix every bucket transition event carries. */
38
+ const BUCKET_EVENT_PREFIX = "bucket:";
39
+
40
+ /**
41
+ * Engine-owned cron reconciliation for TIME-BASED bucket leaves (Section 6.4).
42
+ *
43
+ * Time-based criteria (an `EventCondition.within` rolling window) silently flip a
44
+ * user OUT of a bucket as the clock advances with NO inbound event — the real-time
45
+ * path structurally cannot catch this. This cron sweeps every `timeBased`,
46
+ * `kind:"dynamic"` bucket and transitions members who SHOULD leave via a single
47
+ * set-based SHOULD-LEAVE query (per criterion shape) + a bulk compare-and-swap.
48
+ *
49
+ * It self-bootstraps `db`/`logger` from `process.env` (cron runs have no request
50
+ * container), cloned from `check-alerts.ts`, and reads the process bucket-registry
51
+ * singleton (installed by `createHogsendClient`, which both API and worker call).
52
+ *
53
+ * Emission is gated on the `RETURNING` of the bulk CAS — the loser of a concurrent
54
+ * race (e.g. an overlapping ingest tick) mutates zero rows and never emits — and it
55
+ * reuses the SAME deterministic `idempotencyKey` shape as the real-time path
56
+ * (`bucket:<id>:<userId>:<kind>:<entryCount>`), so concurrent ingest + cron
57
+ * converge to exactly ONE emission (Section 6.3 worked example).
58
+ *
59
+ * NON-cancelling concurrency: a sweep that overruns the interval must be allowed to
60
+ * FINISH (else an expiration never completes and members are stuck `active`
61
+ * forever), so the strategy is `GROUP_ROUND_ROBIN` with `maxRuns: 1` keyed on a
62
+ * single static group — a newcomer queues behind the incumbent, it is never
63
+ * cancelled (NOT `CANCEL_IN_PROGRESS`).
64
+ */
65
+ export const bucketReconcileTask = hatchet.task({
66
+ name: "bucket-reconcile",
67
+ onCrons: [process.env.BUCKET_RECONCILE_CRON ?? "*/5 * * * *"],
68
+ retries: 1,
69
+ executionTimeout: "120s",
70
+ concurrency: {
71
+ // Single global key → at most one sweep runs; the next one QUEUES (round
72
+ // robin) rather than cancelling the in-flight run.
73
+ expression: "'bucket-reconcile'",
74
+ maxRuns: 1,
75
+ limitStrategy: ConcurrencyLimitStrategy.GROUP_ROUND_ROBIN,
76
+ },
77
+ fn: async () => {
78
+ const { db } = createDatabase({ url: process.env.DATABASE_URL ?? "" });
79
+ const logger = createLogger(process.env.LOG_LEVEL ?? "info");
80
+ const registry = getBucketRegistrySingleton();
81
+ const journeyRegistry = getJourneyRegistrySingleton();
82
+
83
+ let reconciled = 0;
84
+ let joined = 0;
85
+
86
+ for (const bucket of registry.getEnabled()) {
87
+ // kind:"manual" buckets are NEVER auto-recomputed (early-continue).
88
+ if (bucket.kind === "manual" || !bucket.criteria) continue;
89
+
90
+ // Process a bucket here iff a clock can flip its membership: a TIME-BASED
91
+ // criteria window (criteria-driven leaves/joins) OR an unconditional
92
+ // `maxDwell` TTL (membership-age-driven leaves). timeBased is honoured
93
+ // explicitly OR inferred from a `within` window.
94
+ const timeBased = isTimeBased(bucket);
95
+ if (!timeBased && !bucket.maxDwell) continue;
96
+
97
+ try {
98
+ if (timeBased) {
99
+ reconciled += await reconcileBucketLeaves({
100
+ db,
101
+ logger,
102
+ journeyRegistry,
103
+ bucket,
104
+ });
105
+
106
+ // reconcileJoins (default off) materializes absence joins the
107
+ // real-time path cannot see (e.g. went-dormant — the
108
+ // NOT-EXISTS-within-window case). Kept off for non-absence buckets to
109
+ // bound cost (Section 6.4).
110
+ if (bucket.reconcileJoins) {
111
+ joined += await reconcileBucketJoins({
112
+ db,
113
+ logger,
114
+ journeyRegistry,
115
+ bucket,
116
+ });
117
+ }
118
+ }
119
+
120
+ // Unconditional max-dwell TTL: force-leave members past
121
+ // enteredAt + maxDwell REGARDLESS of whether criteria still match. Runs
122
+ // for time-based AND pure-property dynamic buckets. Re-entry afterwards
123
+ // is governed by the bucket's `reentry` policy (per-bucket time-box vs
124
+ // periodic flush).
125
+ if (bucket.maxDwell) {
126
+ reconciled += await reconcileBucketTtlLeaves({
127
+ db,
128
+ logger,
129
+ journeyRegistry,
130
+ bucket,
131
+ });
132
+ }
133
+ } catch (err) {
134
+ logger.error("Bucket reconcile failed", {
135
+ bucketId: bucket.id,
136
+ error: err instanceof Error ? err.message : String(err),
137
+ });
138
+ }
139
+ }
140
+
141
+ logger.info("Bucket reconcile sweep complete", { reconciled, joined });
142
+ return { reconciled, joined };
143
+ },
144
+ });
145
+
146
+ /**
147
+ * Optional per-user fast-expiry durable timer (Section 6.5). Armed on JOIN for
148
+ * `meta.fastExpiry` buckets, it durably sleeps until the membership's `expiresAt`
149
+ * deadline, then leaves via a SINGLE atomic CAS keyed on the ARMED `expiresAt` —
150
+ * never read-then-act. A concurrent real-time event that re-armed the window (a new
151
+ * `expiresAt`) makes the CAS match zero rows, so the stale timer no-ops WITHOUT
152
+ * emitting a spurious `bucket:left`. The cron remains the authoritative backstop
153
+ * for any timer lost to worker churn.
154
+ *
155
+ * It is a single SHARED durableTask keyed on `bucket:arm-expiry` (per-bucket arming
156
+ * is by event payload, not per-bucket task instances), registered once by
157
+ * `selectBucketTasks` if ANY enabled bucket opts in (Section 9.4). The
158
+ * `bucket:`-prefixed event name is recursion-guarded by `checkBucketMembership`.
159
+ */
160
+ export interface BucketArmExpiryInput extends JsonObject {
161
+ rowId: string;
162
+ bucketId: string;
163
+ userId: string;
164
+ userEmail: string | null;
165
+ /** ISO timestamp of the armed deadline — the CAS epoch. */
166
+ armedExpiresAt: string;
167
+ /** ms from arming to the deadline (the durable sleep). */
168
+ msUntilExpiry: number;
169
+ }
170
+
171
+ export const bucketExpiryTask = hatchet.durableTask({
172
+ name: "bucket-expiry",
173
+ onEvents: [`${BUCKET_EVENT_PREFIX}arm-expiry`],
174
+ retries: 0,
175
+ fn: async (input: BucketArmExpiryInput, ctx) => {
176
+ const { db } = createDatabase({ url: process.env.DATABASE_URL ?? "" });
177
+ const logger = createLogger(process.env.LOG_LEVEL ?? "info");
178
+ const registry = getBucketRegistrySingleton();
179
+ const journeyRegistry = getJourneyRegistrySingleton();
180
+
181
+ // Durable sleep to the deadline. Hatchet's sleepFor accepts a ms number.
182
+ await ctx.sleepFor(input.msUntilExpiry);
183
+
184
+ const bucket = registry.get(input.bucketId);
185
+ if (!bucket?.criteria) {
186
+ return { status: "skipped", reason: "bucket_unregistered" };
187
+ }
188
+
189
+ // On wake, re-confirm the criteria still says "should leave". If the user
190
+ // re-qualified (e.g. fired the event again), do not leave.
191
+ const stillMember = await evaluateCondition({
192
+ condition: bucket.criteria,
193
+ ctx: { db, userId: input.userId, journeyContext: {} },
194
+ });
195
+ if (stillMember) {
196
+ return { status: "skipped", reason: "still_member" };
197
+ }
198
+
199
+ // SINGLE atomic CAS keyed on the ARMED expiresAt — a re-armed window (new
200
+ // expiresAt) makes this match zero rows → no spurious leave (Section 6.5).
201
+ const left = await db
202
+ .update(bucketMemberships)
203
+ .set({
204
+ status: "left",
205
+ leftAt: new Date(),
206
+ lastEvaluatedAt: new Date(),
207
+ updatedAt: new Date(),
208
+ })
209
+ .where(
210
+ and(
211
+ eq(bucketMemberships.id, input.rowId),
212
+ eq(bucketMemberships.status, "active"),
213
+ eq(bucketMemberships.expiresAt, new Date(input.armedExpiresAt)),
214
+ ),
215
+ )
216
+ .returning({
217
+ id: bucketMemberships.id,
218
+ entryCount: bucketMemberships.entryCount,
219
+ });
220
+
221
+ const flipped = left[0];
222
+ if (!flipped) {
223
+ return { status: "skipped", reason: "re_armed_or_already_left" };
224
+ }
225
+
226
+ await emitBucketTransition({
227
+ db,
228
+ registry: journeyRegistry,
229
+ hatchet,
230
+ logger,
231
+ kind: "left",
232
+ bucket,
233
+ userId: input.userId,
234
+ userEmail: input.userEmail,
235
+ epoch: flipped.entryCount,
236
+ source: "reconcile",
237
+ });
238
+
239
+ return { status: "left", rowId: flipped.id };
240
+ },
241
+ });
242
+
243
+ /**
244
+ * Set-based SHOULD-LEAVE for one time-based bucket → bulk CAS → RETURNING-gated
245
+ * emit. For single-event `not_exists`/`exists`/`count within` criteria the
246
+ * SHOULD-LEAVE SQL IS the authoritative evaluation (NO per-member
247
+ * `evaluateCondition`). Composite/multi-condition time-based buckets fall back to a
248
+ * chunked per-member `evaluateCondition` loop keyed on `lastEvaluatedAt`.
249
+ */
250
+ async function reconcileBucketLeaves(opts: {
251
+ db: Database;
252
+ logger: Logger;
253
+ journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
254
+ bucket: BucketMeta;
255
+ }): Promise<number> {
256
+ const { db, logger, journeyRegistry, bucket } = opts;
257
+ const criteria = bucket.criteria as ConditionEval;
258
+
259
+ // A single-event/within/count criterion → set-based SHOULD-LEAVE query.
260
+ if (criteria.type === "event") {
261
+ const leaverIds = await selectEventLeavers(db, bucket, criteria);
262
+ if (leaverIds.length === 0) return 0;
263
+ return bulkLeave({
264
+ db,
265
+ logger,
266
+ journeyRegistry,
267
+ bucket,
268
+ userIds: leaverIds,
269
+ });
270
+ }
271
+
272
+ // composite/multi-condition → chunked per-member evaluateCondition (the
273
+ // documented O(active members) fallback, Section 6.4).
274
+ return reconcileCompositeLeaves({ db, logger, journeyRegistry, bucket });
275
+ }
276
+
277
+ /**
278
+ * The SHOULD-LEAVE user-id set for a single-event time-based criterion, matched to
279
+ * the criterion SHAPE (a single `NOT EXISTS` is WRONG for count/exists — Section
280
+ * 6.4). Returns active members who SHOULD leave (the set is a superset of real
281
+ * leavers; never misses one).
282
+ */
283
+ async function selectEventLeavers(
284
+ db: Database,
285
+ bucket: BucketMeta,
286
+ criteria: Extract<ConditionEval, { type: "event" }>,
287
+ ): Promise<string[]> {
288
+ const cutoff = criteria.within
289
+ ? new Date(Date.now() - durationToMs(criteria.within))
290
+ : null;
291
+
292
+ // Active members of this bucket whose contact is live (GDPR — Section 8.6).
293
+ const members = db
294
+ .select({ userId: bucketMemberships.userId })
295
+ .from(bucketMemberships)
296
+ .where(
297
+ and(
298
+ eq(bucketMemberships.bucketId, bucket.id),
299
+ eq(bucketMemberships.status, "active"),
300
+ isNull(bucketMemberships.deletedAt),
301
+ ),
302
+ )
303
+ .as("members");
304
+
305
+ // The windowed count of the criterion's event per member, set-based.
306
+ const counted = db
307
+ .select({
308
+ userId: userEvents.userId,
309
+ cnt: sql<number>`count(*)::int`.as("cnt"),
310
+ })
311
+ .from(userEvents)
312
+ .where(
313
+ and(
314
+ eq(userEvents.event, criteria.eventName),
315
+ cutoff ? gte(userEvents.occurredAt, cutoff) : undefined,
316
+ ),
317
+ )
318
+ .groupBy(userEvents.userId)
319
+ .as("counted");
320
+
321
+ // LEFT JOIN members → windowed counts. A missing/zero count is a 0.
322
+ const rows = await db
323
+ .select({
324
+ userId: members.userId,
325
+ cnt: sql<number>`coalesce(${counted.cnt}, 0)`,
326
+ })
327
+ .from(members)
328
+ .leftJoin(counted, eq(members.userId, counted.userId))
329
+ .innerJoin(contacts, eq(contacts.externalId, members.userId))
330
+ .where(isNull(contacts.deletedAt));
331
+
332
+ return rows
333
+ .filter((r) => shouldLeaveByCount(criteria, Number(r.cnt)))
334
+ .map((r) => r.userId);
335
+ }
336
+
337
+ /**
338
+ * SHOULD-LEAVE decision from the windowed count, per criterion shape (Section
339
+ * 6.4). A member is a leaver when the criterion is NO LONGER satisfied.
340
+ */
341
+ function shouldLeaveByCount(
342
+ criteria: Extract<ConditionEval, { type: "event" }>,
343
+ windowedCount: number,
344
+ ): boolean {
345
+ switch (criteria.check) {
346
+ case "not_exists":
347
+ // Absence bucket: SHOULD LEAVE when an event REAPPEARS in the window.
348
+ return windowedCount > 0;
349
+ case "exists":
350
+ // Positive existence: SHOULD LEAVE when NOT EXISTS in the window.
351
+ return windowedCount === 0;
352
+ case "count": {
353
+ // SHOULD LEAVE when the windowed count NO LONGER satisfies the operator.
354
+ if (!criteria.operator || criteria.value === undefined) {
355
+ return windowedCount === 0;
356
+ }
357
+ switch (criteria.operator) {
358
+ case "gt":
359
+ return !(windowedCount > criteria.value);
360
+ case "gte":
361
+ return !(windowedCount >= criteria.value);
362
+ case "lt":
363
+ return !(windowedCount < criteria.value);
364
+ case "lte":
365
+ return !(windowedCount <= criteria.value);
366
+ case "eq":
367
+ return !(windowedCount === criteria.value);
368
+ default:
369
+ return false;
370
+ }
371
+ }
372
+ default:
373
+ return false;
374
+ }
375
+ }
376
+
377
+ /**
378
+ * Composite/multi-condition time-based fallback — chunked per-member
379
+ * `evaluateCondition` keyed on `lastEvaluatedAt` so the oldest-evaluated members
380
+ * are swept first and the run is bounded by `BATCH_SIZE` (Section 6.4).
381
+ */
382
+ async function reconcileCompositeLeaves(opts: {
383
+ db: Database;
384
+ logger: Logger;
385
+ journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
386
+ bucket: BucketMeta;
387
+ }): Promise<number> {
388
+ const { db, logger, journeyRegistry, bucket } = opts;
389
+ const criteria = bucket.criteria as ConditionEval;
390
+
391
+ const members = await db
392
+ .select({
393
+ userId: bucketMemberships.userId,
394
+ })
395
+ .from(bucketMemberships)
396
+ .innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
397
+ .where(
398
+ and(
399
+ eq(bucketMemberships.bucketId, bucket.id),
400
+ eq(bucketMemberships.status, "active"),
401
+ isNull(bucketMemberships.deletedAt),
402
+ isNull(contacts.deletedAt),
403
+ ),
404
+ )
405
+ .orderBy(sql`${bucketMemberships.lastEvaluatedAt} asc nulls first`)
406
+ .limit(BATCH_SIZE);
407
+
408
+ const leaverIds: string[] = [];
409
+ const evaluatedIds: string[] = [];
410
+ for (const member of members) {
411
+ evaluatedIds.push(member.userId);
412
+ const isMember = await evaluateCondition({
413
+ condition: criteria,
414
+ ctx: { db, userId: member.userId, journeyContext: {} },
415
+ });
416
+ if (!isMember) leaverIds.push(member.userId);
417
+ }
418
+
419
+ // Bump lastEvaluatedAt for the whole chunk so the next tick advances the cursor
420
+ // (including stable members, which are NOT leavers).
421
+ if (evaluatedIds.length > 0) {
422
+ await db
423
+ .update(bucketMemberships)
424
+ .set({ lastEvaluatedAt: new Date() })
425
+ .where(
426
+ and(
427
+ eq(bucketMemberships.bucketId, bucket.id),
428
+ eq(bucketMemberships.status, "active"),
429
+ inArray(bucketMemberships.userId, evaluatedIds),
430
+ ),
431
+ );
432
+ }
433
+
434
+ if (leaverIds.length === 0) return 0;
435
+ return bulkLeave({ db, logger, journeyRegistry, bucket, userIds: leaverIds });
436
+ }
437
+
438
+ /**
439
+ * Unconditional max-dwell TTL leave (per-bucket `maxDwell`). Selects active
440
+ * members whose `maxDwellAt` deadline has passed (GDPR: live contacts only) and
441
+ * force-leaves them through the shared `bulkLeave` CAS — with NO criteria
442
+ * re-evaluation, unlike the criteria SHOULD-LEAVE path. Emits `bucket:left`;
443
+ * whether the user can re-join afterwards is governed by the bucket's `reentry`
444
+ * policy on their next qualifying event (the per-bucket time-box vs flush knob).
445
+ */
446
+ async function reconcileBucketTtlLeaves(opts: {
447
+ db: Database;
448
+ logger: Logger;
449
+ journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
450
+ bucket: BucketMeta;
451
+ }): Promise<number> {
452
+ const { db, logger, journeyRegistry, bucket } = opts;
453
+
454
+ const expired = await db
455
+ .select({ userId: bucketMemberships.userId })
456
+ .from(bucketMemberships)
457
+ .innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
458
+ .where(
459
+ and(
460
+ eq(bucketMemberships.bucketId, bucket.id),
461
+ eq(bucketMemberships.status, "active"),
462
+ isNull(bucketMemberships.deletedAt),
463
+ isNotNull(bucketMemberships.maxDwellAt),
464
+ lte(bucketMemberships.maxDwellAt, new Date()),
465
+ isNull(contacts.deletedAt),
466
+ ),
467
+ );
468
+
469
+ if (expired.length === 0) return 0;
470
+ return bulkLeave({
471
+ db,
472
+ logger,
473
+ journeyRegistry,
474
+ bucket,
475
+ userIds: expired.map((r) => r.userId),
476
+ });
477
+ }
478
+
479
+ /**
480
+ * Bulk compare-and-swap a set of active members to `left`, then emit `bucket:left`
481
+ * for each row the UPDATE actually flipped (gated on RETURNING — the loser of a
482
+ * concurrent race mutates zero rows and never emits, Section 6.3). minDwell defers:
483
+ * a member still inside its dwell window is NOT left here (the dwell deadline is
484
+ * carried on `expiresAt`; the next eligible tick leaves it).
485
+ */
486
+ async function bulkLeave(opts: {
487
+ db: Database;
488
+ logger: Logger;
489
+ journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
490
+ bucket: BucketMeta;
491
+ userIds: string[];
492
+ }): Promise<number> {
493
+ const { db, logger, journeyRegistry, bucket, userIds } = opts;
494
+
495
+ const dwellMs = bucket.minDwell ? durationToMs(bucket.minDwell) : 0;
496
+ const dwellCutoff = dwellMs > 0 ? new Date(Date.now() - dwellMs) : null;
497
+
498
+ // Flip only active rows for the leaver set whose minDwell has elapsed. The CAS
499
+ // guard (status = 'active') means a concurrent leave makes this affect zero of
500
+ // those rows. RETURNING carries userEmail + entryCount for the emit.
501
+ const flipped = await db
502
+ .update(bucketMemberships)
503
+ .set({
504
+ status: "left",
505
+ leftAt: new Date(),
506
+ lastEvaluatedAt: new Date(),
507
+ updatedAt: new Date(),
508
+ })
509
+ .where(
510
+ and(
511
+ eq(bucketMemberships.bucketId, bucket.id),
512
+ eq(bucketMemberships.status, "active"),
513
+ isNull(bucketMemberships.deletedAt),
514
+ inArray(bucketMemberships.userId, userIds),
515
+ // minDwell: only leave rows that have existed at least minDwell.
516
+ dwellCutoff ? lte(bucketMemberships.enteredAt, dwellCutoff) : undefined,
517
+ ),
518
+ )
519
+ .returning({
520
+ id: bucketMemberships.id,
521
+ userId: bucketMemberships.userId,
522
+ userEmail: bucketMemberships.userEmail,
523
+ entryCount: bucketMemberships.entryCount,
524
+ });
525
+
526
+ for (const row of flipped) {
527
+ await emitBucketTransition({
528
+ db,
529
+ registry: journeyRegistry,
530
+ hatchet,
531
+ logger,
532
+ kind: "left",
533
+ bucket,
534
+ userId: row.userId,
535
+ userEmail: row.userEmail,
536
+ epoch: row.entryCount,
537
+ source: "reconcile",
538
+ });
539
+ }
540
+
541
+ return flipped.length;
542
+ }
543
+
544
+ /**
545
+ * reconcileJoins (absence buckets): materialize NEW members the real-time path
546
+ * cannot see. For a `not_exists within W` (absence) criterion, a user JOINS when
547
+ * they have NO such event in the window — i.e. the set-based JOIN query. Inserts a
548
+ * fresh active row (RETURNING-gated, partial-active unique index) and emits
549
+ * `bucket:entered` for each genuine new member.
550
+ */
551
+ async function reconcileBucketJoins(opts: {
552
+ db: Database;
553
+ logger: Logger;
554
+ journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
555
+ bucket: BucketMeta;
556
+ }): Promise<number> {
557
+ const { db, logger, journeyRegistry, bucket } = opts;
558
+ const criteria = bucket.criteria as ConditionEval;
559
+
560
+ // Only single-event absence criteria have a tractable set-based JOIN query; the
561
+ // composite/positive cases are already caught real-time on event arrival.
562
+ if (criteria.type !== "event" || criteria.check !== "not_exists") {
563
+ return 0;
564
+ }
565
+
566
+ const cutoff = criteria.within
567
+ ? new Date(Date.now() - durationToMs(criteria.within))
568
+ : null;
569
+
570
+ // Users who have fired the event inside the window (they are NOT candidates).
571
+ const present = db
572
+ .select({ userId: userEvents.userId })
573
+ .from(userEvents)
574
+ .where(
575
+ and(
576
+ eq(userEvents.event, criteria.eventName),
577
+ cutoff ? gte(userEvents.occurredAt, cutoff) : undefined,
578
+ ),
579
+ )
580
+ .groupBy(userEvents.userId)
581
+ .as("present");
582
+
583
+ // Users who already have an active membership (skip — they are members).
584
+ const activeMembers = db
585
+ .select({ userId: bucketMemberships.userId })
586
+ .from(bucketMemberships)
587
+ .where(
588
+ and(
589
+ eq(bucketMemberships.bucketId, bucket.id),
590
+ eq(bucketMemberships.status, "active"),
591
+ isNull(bucketMemberships.deletedAt),
592
+ ),
593
+ )
594
+ .as("active_members");
595
+
596
+ // Candidates: live contacts NOT present in the window AND not already members.
597
+ const candidates = await db
598
+ .select({
599
+ userId: contacts.externalId,
600
+ email: contacts.email,
601
+ })
602
+ .from(contacts)
603
+ .leftJoin(present, eq(present.userId, contacts.externalId))
604
+ .leftJoin(activeMembers, eq(activeMembers.userId, contacts.externalId))
605
+ .where(
606
+ and(
607
+ isNull(contacts.deletedAt),
608
+ isNull(present.userId),
609
+ isNull(activeMembers.userId),
610
+ ),
611
+ )
612
+ .limit(BATCH_SIZE);
613
+
614
+ let joined = 0;
615
+ for (const candidate of candidates) {
616
+ const transitioned = await reconcileJoinOne({
617
+ db,
618
+ logger,
619
+ journeyRegistry,
620
+ bucket,
621
+ userId: candidate.userId,
622
+ userEmail: candidate.email ?? null,
623
+ });
624
+ if (transitioned) joined += 1;
625
+ }
626
+ return joined;
627
+ }
628
+
629
+ /**
630
+ * Insert ONE reconcile-discovered join (RETURNING-gated on the partial-active
631
+ * unique index) and emit `bucket:entered`. entryCount = 1 + prior memberships.
632
+ */
633
+ async function reconcileJoinOne(opts: {
634
+ db: Database;
635
+ logger: Logger;
636
+ journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
637
+ bucket: BucketMeta;
638
+ userId: string;
639
+ userEmail: string | null;
640
+ }): Promise<boolean> {
641
+ const { db, logger, journeyRegistry, bucket, userId, userEmail } = opts;
642
+
643
+ const [counted] = await db
644
+ .select({ priorCount: sql<number>`count(*)::int` })
645
+ .from(bucketMemberships)
646
+ .where(
647
+ and(
648
+ eq(bucketMemberships.userId, userId),
649
+ eq(bucketMemberships.bucketId, bucket.id),
650
+ ),
651
+ );
652
+ const priorCount = Number(counted?.priorCount ?? 0);
653
+ const epoch = priorCount + 1;
654
+
655
+ const inserted = await db
656
+ .insert(bucketMemberships)
657
+ .values({
658
+ userId,
659
+ userEmail,
660
+ bucketId: bucket.id,
661
+ status: "active",
662
+ source: "reconcile",
663
+ entryCount: epoch,
664
+ expiresAt: computeReconcileExpiresAt(bucket),
665
+ maxDwellAt: bucket.maxDwell
666
+ ? new Date(Date.now() + durationToMs(bucket.maxDwell))
667
+ : null,
668
+ lastEvaluatedAt: new Date(),
669
+ })
670
+ .onConflictDoNothing()
671
+ .returning({ id: bucketMemberships.id });
672
+
673
+ if (inserted.length !== 1) {
674
+ return false;
675
+ }
676
+
677
+ await emitBucketTransition({
678
+ db,
679
+ registry: journeyRegistry,
680
+ hatchet,
681
+ logger,
682
+ kind: "entered",
683
+ bucket,
684
+ userId,
685
+ userEmail,
686
+ epoch,
687
+ source: "reconcile",
688
+ });
689
+
690
+ return true;
691
+ }
692
+
693
+ /** now + within for time-based / fastExpiry buckets; null otherwise. */
694
+ function computeReconcileExpiresAt(bucket: BucketMeta): Date | null {
695
+ if (!bucket.criteria) return null;
696
+ if (!bucket.timeBased && !bucket.fastExpiry) return null;
697
+ const within = firstWithin(bucket.criteria);
698
+ if (!within) return null;
699
+ return new Date(Date.now() + durationToMs(within));
700
+ }
701
+
702
+ /** A bucket is time-based if flagged OR its criteria carry a `within` window. */
703
+ function isTimeBased(bucket: BucketMeta): boolean {
704
+ if (bucket.timeBased) return true;
705
+ if (!bucket.criteria) return false;
706
+ return firstWithin(bucket.criteria) != null;
707
+ }
708
+
709
+ /** Find the first EventCondition.within in a criteria tree (depth-first). */
710
+ function firstWithin(criteria: ConditionEval): DurationObject | null {
711
+ if (criteria.type === "event" && criteria.within) {
712
+ return criteria.within;
713
+ }
714
+ if (criteria.type === "composite") {
715
+ for (const child of criteria.conditions) {
716
+ const found = firstWithin(child);
717
+ if (found) return found;
718
+ }
719
+ }
720
+ return null;
721
+ }