@hogsend/engine 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,15 +3,17 @@ import type { JsonObject } from "@hatchet-dev/typescript-sdk/v1/types.js";
3
3
  import {
4
4
  type BucketMeta,
5
5
  type ConditionEval,
6
- type DurationObject,
6
+ collectPropertyNames,
7
7
  durationToMs,
8
8
  evaluateCondition,
9
9
  } from "@hogsend/core";
10
10
  import {
11
+ bucketConfigs,
11
12
  bucketMemberships,
12
13
  contacts,
13
14
  createDatabase,
14
15
  type Database,
16
+ importJobs,
15
17
  userEvents,
16
18
  } from "@hogsend/db";
17
19
  import {
@@ -22,21 +24,29 @@ import {
22
24
  isNotNull,
23
25
  isNull,
24
26
  lte,
27
+ or,
25
28
  sql,
26
29
  } from "drizzle-orm";
30
+ import { shouldEmitJoin } from "../buckets/check-membership.js";
31
+ import {
32
+ BUCKET_EVENT_PREFIX,
33
+ computeExpiresAt,
34
+ computeMaxDwellAt,
35
+ countPriorMemberships,
36
+ firstWithin,
37
+ shouldLeaveByCount,
38
+ } from "../buckets/membership-epoch.js";
27
39
  import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
28
40
  import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
29
41
  import { emitBucketTransition } from "../lib/bucket-emit.js";
30
42
  import { hatchet } from "../lib/hatchet.js";
31
43
  import type { Logger } from "../lib/logger.js";
32
44
  import { createLogger } from "../lib/logger.js";
45
+ import { FIRST_TIME_FORMAT } from "./bucket-backfill.js";
33
46
 
34
47
  /** Chunk size for the composite-only per-member re-evaluation path (Section 6.4). */
35
48
  const BATCH_SIZE = 500;
36
49
 
37
- /** The reserved prefix every bucket transition event carries. */
38
- const BUCKET_EVENT_PREFIX = "bucket:";
39
-
40
50
  /**
41
51
  * Engine-owned cron reconciliation for TIME-BASED bucket leaves (Section 6.4).
42
52
  *
@@ -103,11 +113,16 @@ export const bucketReconcileTask = hatchet.task({
103
113
  bucket,
104
114
  });
105
115
 
106
- // reconcileJoins (default off) materializes absence joins the
107
- // real-time path cannot see (e.g. went-dormant — the
108
- // NOT-EXISTS-within-window case). Kept off for non-absence buckets to
109
- // bound cost (Section 6.4).
110
- if (bucket.reconcileJoins) {
116
+ // reconcileJoins materializes absence joins the real-time path
117
+ // cannot see (e.g. went-dormant — the NOT-EXISTS-within-window case).
118
+ // An explicit `reconcileJoins` overrides; when omitted it is INFERRED
119
+ // true ONLY for the two SAFE set-based shapes — a single-event windowed
120
+ // `not_exists` and the lapsed-active composite (Fix #3) — whose SQL
121
+ // candidate set is exact. Other absence composites (OR-of-absence,
122
+ // absence + property/count) need an explicit opt-in and run the
123
+ // BATCH_SIZE-bounded per-member confirm, keeping the sweep O(active
124
+ // members) for everything else (Section 6.4).
125
+ if (shouldReconcileJoins(bucket)) {
111
126
  joined += await reconcileBucketJoins({
112
127
  db,
113
128
  logger,
@@ -120,7 +135,7 @@ export const bucketReconcileTask = hatchet.task({
120
135
  // Unconditional max-dwell TTL: force-leave members past
121
136
  // enteredAt + maxDwell REGARDLESS of whether criteria still match. Runs
122
137
  // for time-based AND pure-property dynamic buckets. Re-entry afterwards
123
- // is governed by the bucket's `reentry` policy (per-bucket time-box vs
138
+ // is governed by the bucket's `entryLimit` policy (per-bucket time-box vs
124
139
  // periodic flush).
125
140
  if (bucket.maxDwell) {
126
141
  reconciled += await reconcileBucketTtlLeaves({
@@ -187,10 +202,16 @@ export const bucketExpiryTask = hatchet.durableTask({
187
202
  }
188
203
 
189
204
  // On wake, re-confirm the criteria still says "should leave". If the user
190
- // re-qualified (e.g. fired the event again), do not leave.
205
+ // re-qualified (e.g. fired the event again), do not leave. Load merged
206
+ // contact properties iff a property leg needs them so property predicates
207
+ // match the real-time path instead of evaluating against undefined.
208
+ const journeyContext =
209
+ collectPropertyNames(bucket.criteria).length > 0
210
+ ? await loadContactProperties(db, input.userId)
211
+ : {};
191
212
  const stillMember = await evaluateCondition({
192
213
  condition: bucket.criteria,
193
- ctx: { db, userId: input.userId, journeyContext: {} },
214
+ ctx: { db, userId: input.userId, journeyContext },
194
215
  });
195
216
  if (stillMember) {
196
217
  return { status: "skipped", reason: "still_member" };
@@ -334,46 +355,6 @@ async function selectEventLeavers(
334
355
  .map((r) => r.userId);
335
356
  }
336
357
 
337
- /**
338
- * SHOULD-LEAVE decision from the windowed count, per criterion shape (Section
339
- * 6.4). A member is a leaver when the criterion is NO LONGER satisfied.
340
- */
341
- function shouldLeaveByCount(
342
- criteria: Extract<ConditionEval, { type: "event" }>,
343
- windowedCount: number,
344
- ): boolean {
345
- switch (criteria.check) {
346
- case "not_exists":
347
- // Absence bucket: SHOULD LEAVE when an event REAPPEARS in the window.
348
- return windowedCount > 0;
349
- case "exists":
350
- // Positive existence: SHOULD LEAVE when NOT EXISTS in the window.
351
- return windowedCount === 0;
352
- case "count": {
353
- // SHOULD LEAVE when the windowed count NO LONGER satisfies the operator.
354
- if (!criteria.operator || criteria.value === undefined) {
355
- return windowedCount === 0;
356
- }
357
- switch (criteria.operator) {
358
- case "gt":
359
- return !(windowedCount > criteria.value);
360
- case "gte":
361
- return !(windowedCount >= criteria.value);
362
- case "lt":
363
- return !(windowedCount < criteria.value);
364
- case "lte":
365
- return !(windowedCount <= criteria.value);
366
- case "eq":
367
- return !(windowedCount === criteria.value);
368
- default:
369
- return false;
370
- }
371
- }
372
- default:
373
- return false;
374
- }
375
- }
376
-
377
358
  /**
378
359
  * Composite/multi-condition time-based fallback — chunked per-member
379
360
  * `evaluateCondition` keyed on `lastEvaluatedAt` so the oldest-evaluated members
@@ -388,9 +369,14 @@ async function reconcileCompositeLeaves(opts: {
388
369
  const { db, logger, journeyRegistry, bucket } = opts;
389
370
  const criteria = bucket.criteria as ConditionEval;
390
371
 
372
+ // Pull contact properties alongside members iff a property leg needs them, so
373
+ // property predicates in a composite evaluate against MERGED contact state —
374
+ // the SAME state the real-time path reads — instead of always-undefined.
375
+ const needsProps = collectPropertyNames(criteria).length > 0;
391
376
  const members = await db
392
377
  .select({
393
378
  userId: bucketMemberships.userId,
379
+ properties: contacts.properties,
394
380
  })
395
381
  .from(bucketMemberships)
396
382
  .innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
@@ -409,9 +395,12 @@ async function reconcileCompositeLeaves(opts: {
409
395
  const evaluatedIds: string[] = [];
410
396
  for (const member of members) {
411
397
  evaluatedIds.push(member.userId);
398
+ const journeyContext = needsProps
399
+ ? ((member.properties as Record<string, unknown> | null) ?? {})
400
+ : {};
412
401
  const isMember = await evaluateCondition({
413
402
  condition: criteria,
414
- ctx: { db, userId: member.userId, journeyContext: {} },
403
+ ctx: { db, userId: member.userId, journeyContext },
415
404
  });
416
405
  if (!isMember) leaverIds.push(member.userId);
417
406
  }
@@ -440,7 +429,7 @@ async function reconcileCompositeLeaves(opts: {
440
429
  * members whose `maxDwellAt` deadline has passed (GDPR: live contacts only) and
441
430
  * force-leaves them through the shared `bulkLeave` CAS — with NO criteria
442
431
  * re-evaluation, unlike the criteria SHOULD-LEAVE path. Emits `bucket:left`;
443
- * whether the user can re-join afterwards is governed by the bucket's `reentry`
432
+ * whether the user can re-join afterwards is governed by the bucket's `entryLimit`
444
433
  * policy on their next qualifying event (the per-bucket time-box vs flush knob).
445
434
  */
446
435
  async function reconcileBucketTtlLeaves(opts: {
@@ -543,10 +532,41 @@ async function bulkLeave(opts: {
543
532
 
544
533
  /**
545
534
  * reconcileJoins (absence buckets): materialize NEW members the real-time path
546
- * cannot see. For a `not_exists within W` (absence) criterion, a user JOINS when
547
- * they have NO such event in the window i.e. the set-based JOIN query. Inserts a
548
- * fresh active row (RETURNING-gated, partial-active unique index) and emits
549
- * `bucket:entered` for each genuine new member.
535
+ * cannot see a user who STOPS doing X fires no event, so only the clock can
536
+ * enroll them. ONE bounded (BATCH_SIZE per tick) path handles every shape, but
537
+ * the per-candidate handling splits on whether the SQL candidate set is EXACT:
538
+ *
539
+ * - SET-BASED / EXACT (no per-member confirm, Fix #3) — the SAFE shapes the
540
+ * engine auto-infers `reconcileJoins` on:
541
+ * (a) SINGLE-EVENT `not_exists within W` — the `present` windowed anti-join
542
+ * makes the candidate query exact, and
543
+ * (b) the LAPSED-ACTIVE composite `all(event(X).exists(),
544
+ * event(X).within(W).not_exists())` — ever-fired X satisfies the exists()
545
+ * leg and the present-in-X's-window anti-join satisfies the not_exists()
546
+ * leg, so EVERY returned row is a true matcher.
547
+ * Because each matcher becomes an active member, the next tick excludes it →
548
+ * the `externalId asc` page advances naturally and the scan cannot starve.
549
+ * - PER-MEMBER CONFIRM (non-exact superset) — any OTHER absence-containing
550
+ * composite (an OR of absence legs, or absence mixed with property/count legs)
551
+ * reached ONLY via an EXPLICIT `reconcileJoins: true`. The candidate query is a
552
+ * cheap superset, so each candidate is confirmed with `evaluateCondition`
553
+ * (correct AND/OR) before it is materialized. This path is BATCH_SIZE-bounded
554
+ * per tick: a wide non-matching prefix can keep genuine matchers off the page
555
+ * indefinitely (a clean cursor would require a per-candidate examined-stamp =
556
+ * a schema change), so the bound is LOGGED once per sweep rather than silently
557
+ * starving (Fix #3).
558
+ *
559
+ * In all cases the candidate set is the exists-ever floor over ALL windowed
560
+ * `not_exists` legs (the UNION of their ever-fired sets — so an OR of absence
561
+ * legs never silently drops a user who only fired the OTHER leg), MINUS users
562
+ * present in EVERY absence leg's window (always-safe to exclude: such a user
563
+ * fails every not_exists leg, so they qualify via none — this drops the
564
+ * currently-active prefix so the bounded scan reaches genuinely-dormant users
565
+ * and converges), MINUS current active members. Deterministic `externalId asc`
566
+ * pages the cohort across ticks (convergence in ceil(candidates / BATCH_SIZE)).
567
+ *
568
+ * Composite NON-absence and positive shapes are caught real-time on event
569
+ * arrival, so they short-circuit to 0 here.
550
570
  */
551
571
  async function reconcileBucketJoins(opts: {
552
572
  db: Database;
@@ -557,28 +577,39 @@ async function reconcileBucketJoins(opts: {
557
577
  const { db, logger, journeyRegistry, bucket } = opts;
558
578
  const criteria = bucket.criteria as ConditionEval;
559
579
 
560
- // Only single-event absence criteria have a tractable set-based JOIN query; the
561
- // composite/positive cases are already caught real-time on event arrival.
562
- if (criteria.type !== "event" || criteria.check !== "not_exists") {
580
+ // First-deploy guard (Fix #2): the JOIN path must NOT emit `bucket:entered`
581
+ // for historically-dormant users while a brand-new bucket's first-time
582
+ // backfill is still claiming them silently. The backfill materializes
583
+ // historical members WITHOUT live emission (the Customer.io rule); if the
584
+ // cron's absence-join scan runs concurrently it would re-discover the SAME
585
+ // dormant cohort and emit for them — a historical blast. So skip the join
586
+ // path entirely until the first-time backfill has persisted its
587
+ // criteriaHash. The transition skipped→active-joins happens when the backfill
588
+ // task finishes and calls persistCriteriaHash (bucket-backfill.ts), at which
589
+ // point bucket_configs.criteriaHash is non-null and no first-time job is in
590
+ // flight. (The LEAVE + maxDwell TTL paths are unaffected — see the caller.)
591
+ if (await firstTimeBackfillIncomplete(db, bucket)) {
592
+ logger.info("Bucket join reconcile skipped (first-time backfill pending)", {
593
+ bucketId: bucket.id,
594
+ });
563
595
  return 0;
564
596
  }
565
597
 
566
- const cutoff = criteria.within
567
- ? new Date(Date.now() - durationToMs(criteria.within))
568
- : null;
569
-
570
- // Users who have fired the event inside the window (they are NOT candidates).
571
- const present = db
572
- .select({ userId: userEvents.userId })
598
+ // Every windowed not_exists leg (the shapes a clock can JOIN). No absence leg
599
+ // nothing for the cron to materialize (positive shapes are caught live).
600
+ const absenceLegs = collectAbsenceLegs(criteria);
601
+ if (absenceLegs.length === 0) return 0;
602
+
603
+ // Exists-ever floor: contacts who fired ANY absence-leg event AT LEAST ONCE
604
+ // (no window). UNIONing across legs keeps an OR-of-absence bucket from
605
+ // dropping a user who only ever fired one of the legs. Excludes brand-new
606
+ // never-active signups and bounds the scan to the once-active cohort.
607
+ const everFiredEvents = Array.from(new Set(absenceLegs.map((l) => l.event)));
608
+ const everFired = db
609
+ .selectDistinct({ userId: userEvents.userId })
573
610
  .from(userEvents)
574
- .where(
575
- and(
576
- eq(userEvents.event, criteria.eventName),
577
- cutoff ? gte(userEvents.occurredAt, cutoff) : undefined,
578
- ),
579
- )
580
- .groupBy(userEvents.userId)
581
- .as("present");
611
+ .where(inArray(userEvents.event, everFiredEvents))
612
+ .as("ever_fired");
582
613
 
583
614
  // Users who already have an active membership (skip — they are members).
584
615
  const activeMembers = db
@@ -593,26 +624,93 @@ async function reconcileBucketJoins(opts: {
593
624
  )
594
625
  .as("active_members");
595
626
 
596
- // Candidates: live contacts NOT present in the window AND not already members.
597
- const candidates = await db
627
+ // Present-in-ALL-windows exclusion: a user who fired EVERY absence-leg event
628
+ // inside that leg's window fails every not_exists leg, so they cannot qualify
629
+ // (AND or OR). Dropping them is always-safe AND breaks the prefix-lock — the
630
+ // currently-active cohort (which fails the criteria anyway) is excluded so the
631
+ // bounded scan reaches real dormant users. For a single absence leg this is
632
+ // exactly the single-event `present` anti-join; the SQL is then exact.
633
+ //
634
+ // The exclusion is only applied when every leg has a DISTINCT event, so the
635
+ // `count(distinct event) = #legs` test exactly means "present in each leg's
636
+ // window". Two legs on the SAME event with different windows would let the
637
+ // wider window over-exclude a user who is absent in the tighter (joinable)
638
+ // window, so that pathological shape skips the exclusion and relies on the
639
+ // per-member confirm + paging (no over-exclusion, just no early prune).
640
+ const distinctLegEvents = new Set(absenceLegs.map((l) => l.event));
641
+ const canExclude = distinctLegEvents.size === absenceLegs.length;
642
+ const presentInAll = canExclude
643
+ ? selectPresentInAllWindows(db, absenceLegs)
644
+ : null;
645
+
646
+ const baseQuery = db
598
647
  .select({
599
648
  userId: contacts.externalId,
600
649
  email: contacts.email,
601
650
  })
602
651
  .from(contacts)
603
- .leftJoin(present, eq(present.userId, contacts.externalId))
604
- .leftJoin(activeMembers, eq(activeMembers.userId, contacts.externalId))
605
- .where(
606
- and(
607
- isNull(contacts.deletedAt),
608
- isNull(present.userId),
609
- isNull(activeMembers.userId),
610
- ),
611
- )
652
+ .innerJoin(everFired, eq(everFired.userId, contacts.externalId))
653
+ .leftJoin(activeMembers, eq(activeMembers.userId, contacts.externalId));
654
+
655
+ const candidates = await (presentInAll
656
+ ? baseQuery
657
+ .leftJoin(presentInAll, eq(presentInAll.userId, contacts.externalId))
658
+ .where(
659
+ and(
660
+ isNull(contacts.deletedAt),
661
+ isNull(activeMembers.userId),
662
+ isNull(presentInAll.userId),
663
+ ),
664
+ )
665
+ : baseQuery.where(
666
+ and(isNull(contacts.deletedAt), isNull(activeMembers.userId)),
667
+ )
668
+ )
669
+ .orderBy(sql`${contacts.externalId} asc`)
612
670
  .limit(BATCH_SIZE);
613
671
 
672
+ // SET-BASED / EXACT shapes (Fix #3) — every candidate row is a true matcher,
673
+ // so the per-member confirm is skipped entirely:
674
+ // (a) a single absence leg makes the candidate query exact (present-in-all =
675
+ // the one leg's present anti-join), and
676
+ // (b) the lapsed-active composite — ever-fired X satisfies the exists() leg
677
+ // and the present-in-X-window exclusion satisfies the not_exists() leg.
678
+ // Any OTHER composite (OR-of-absence, absence + property/count) is a non-exact
679
+ // superset that needs the full `evaluateCondition` confirm for correct AND/OR.
680
+ const exact =
681
+ (criteria.type === "event" && absenceLegs.length === 1) ||
682
+ isLapsedActiveComposite(criteria) != null;
683
+
684
+ // Merged contact properties feed property legs in the per-member confirm so
685
+ // an absence+property composite evaluates the SAME way it does on the
686
+ // real-time path (which reads merged contact state). Empty when no confirm
687
+ // runs (exact path) or no property leg exists.
688
+ const needsProps = !exact && collectPropertyNames(criteria).length > 0;
689
+
690
+ // The non-exact per-member path is BATCH_SIZE-bounded per tick with no
691
+ // examined-cursor (a clean cursor would need a schema change). Log the bound
692
+ // ONCE per sweep so a wide non-matching prefix that delays genuine matchers is
693
+ // visible rather than a silent starve (Fix #3).
694
+ if (!exact && candidates.length >= BATCH_SIZE) {
695
+ logger.warn(
696
+ "Bucket composite-join confirm is bounded to BATCH_SIZE/tick (explicit reconcileJoins); matchers behind a wide non-matching prefix may take multiple ticks to enroll",
697
+ { bucketId: bucket.id, batchSize: BATCH_SIZE },
698
+ );
699
+ }
700
+
614
701
  let joined = 0;
615
702
  for (const candidate of candidates) {
703
+ if (!exact) {
704
+ const journeyContext = needsProps
705
+ ? await loadContactProperties(db, candidate.userId)
706
+ : {};
707
+ const isMember = await evaluateCondition({
708
+ condition: criteria,
709
+ ctx: { db, userId: candidate.userId, journeyContext },
710
+ });
711
+ if (!isMember) continue;
712
+ }
713
+
616
714
  const transitioned = await reconcileJoinOne({
617
715
  db,
618
716
  logger,
@@ -626,6 +724,90 @@ async function reconcileBucketJoins(opts: {
626
724
  return joined;
627
725
  }
628
726
 
727
+ /**
728
+ * True while a bucket's first-time backfill has NOT completed — the gate that
729
+ * keeps the cron JOIN path from emitting a historical blast on first deploy
730
+ * (Fix #2). Two signals, either of which means "not yet safe to join-reconcile":
731
+ *
732
+ * 1. `bucket_configs.criteriaHash IS NULL` (or no row at all) — the first-time
733
+ * backfill task persists this hash on completion (persistCriteriaHash in
734
+ * bucket-backfill.ts), so a null/absent hash means the backfill has not yet
735
+ * finished claiming the historical cohort silently.
736
+ * 2. A first-time backfill `import_jobs` row is in flight — `fileName =
737
+ * bucket.id AND format = FIRST_TIME_FORMAT AND status IN
738
+ * ('pending','processing')`. This covers the boot window AFTER a prior run
739
+ * persisted a hash but BEFORE a freshly-enqueued first-time job runs (and
740
+ * the general in-flight case), so a concurrent cron tick never races the
741
+ * backfill's silent materialization.
742
+ *
743
+ * The transition skipped→active-joins is monotonic: once the backfill completes,
744
+ * the hash is non-null AND its job leaves the in-flight set, so the next cron
745
+ * tick proceeds with the absence-join scan as normal.
746
+ */
747
+ async function firstTimeBackfillIncomplete(
748
+ db: Database,
749
+ bucket: BucketMeta,
750
+ ): Promise<boolean> {
751
+ // (1) criteriaHash not yet persisted → backfill hasn't finished.
752
+ const config = await db.query.bucketConfigs.findFirst({
753
+ where: eq(bucketConfigs.bucketId, bucket.id),
754
+ });
755
+ if (!config || config.criteriaHash == null) return true;
756
+
757
+ // (2) a first-time backfill job is still pending/processing for this bucket.
758
+ const inFlight = await db
759
+ .select({ id: importJobs.id })
760
+ .from(importJobs)
761
+ .where(
762
+ and(
763
+ eq(importJobs.fileName, bucket.id),
764
+ eq(importJobs.format, FIRST_TIME_FORMAT),
765
+ inArray(importJobs.status, ["pending", "processing"]),
766
+ ),
767
+ )
768
+ .limit(1);
769
+ return inFlight.length > 0;
770
+ }
771
+
772
+ /**
773
+ * A subquery of users who fired EVERY absence leg's event inside that leg's
774
+ * rolling window — the intersection across legs. Such a user fails every
775
+ * not_exists leg, so they qualify via none and are always-safe to exclude from
776
+ * candidates. PRECONDITION: every leg has a DISTINCT event (the caller enforces
777
+ * this), so `count(distinct event) = #legs` exactly means "present in each leg's
778
+ * window".
779
+ */
780
+ function selectPresentInAllWindows(db: Database, legs: AbsenceLeg[]) {
781
+ // OR together each leg's "fired this event inside its window" predicate, then
782
+ // require a distinct match for EVERY leg (count(distinct event) = #legs).
783
+ const perLeg = legs.map((leg) =>
784
+ and(
785
+ eq(userEvents.event, leg.event),
786
+ leg.cutoff ? gte(userEvents.occurredAt, leg.cutoff) : undefined,
787
+ ),
788
+ );
789
+ return db
790
+ .select({ userId: userEvents.userId })
791
+ .from(userEvents)
792
+ .where(or(...perLeg))
793
+ .groupBy(userEvents.userId)
794
+ .having(sql`count(distinct ${userEvents.event}) >= ${legs.length}`)
795
+ .as("present_all");
796
+ }
797
+
798
+ /** The merged stored properties of a contact (for property-leg evaluation). */
799
+ async function loadContactProperties(
800
+ db: Database,
801
+ userId: string,
802
+ ): Promise<Record<string, unknown>> {
803
+ const [contact] = await db
804
+ .select({ properties: contacts.properties })
805
+ .from(contacts)
806
+ .where(eq(contacts.externalId, userId))
807
+ .limit(1);
808
+ return (contact?.properties as Record<string, unknown> | null) ?? {};
809
+ }
810
+
629
811
  /**
630
812
  * Insert ONE reconcile-discovered join (RETURNING-gated on the partial-active
631
813
  * unique index) and emit `bucket:entered`. entryCount = 1 + prior memberships.
@@ -640,16 +822,9 @@ async function reconcileJoinOne(opts: {
640
822
  }): Promise<boolean> {
641
823
  const { db, logger, journeyRegistry, bucket, userId, userEmail } = opts;
642
824
 
643
- const [counted] = await db
644
- .select({ priorCount: sql<number>`count(*)::int` })
645
- .from(bucketMemberships)
646
- .where(
647
- and(
648
- eq(bucketMemberships.userId, userId),
649
- eq(bucketMemberships.bucketId, bucket.id),
650
- ),
651
- );
652
- const priorCount = Number(counted?.priorCount ?? 0);
825
+ // entryCount ordinal = 1 + ALL prior memberships (active + left). Shared with
826
+ // the real-time join path so the ordinal never drifts between the two writers.
827
+ const priorCount = await countPriorMemberships(db, bucket.id, userId);
653
828
  const epoch = priorCount + 1;
654
829
 
655
830
  const inserted = await db
@@ -661,10 +836,8 @@ async function reconcileJoinOne(opts: {
661
836
  status: "active",
662
837
  source: "reconcile",
663
838
  entryCount: epoch,
664
- expiresAt: computeReconcileExpiresAt(bucket),
665
- maxDwellAt: bucket.maxDwell
666
- ? new Date(Date.now() + durationToMs(bucket.maxDwell))
667
- : null,
839
+ expiresAt: computeExpiresAt(bucket),
840
+ maxDwellAt: computeMaxDwellAt(bucket),
668
841
  lastEvaluatedAt: new Date(),
669
842
  })
670
843
  .onConflictDoNothing()
@@ -674,31 +847,34 @@ async function reconcileJoinOne(opts: {
674
847
  return false;
675
848
  }
676
849
 
677
- await emitBucketTransition({
678
- db,
679
- registry: journeyRegistry,
680
- hatchet,
681
- logger,
682
- kind: "entered",
683
- bucket,
684
- userId,
685
- userEmail,
686
- epoch,
687
- source: "reconcile",
688
- });
850
+ // The active row is always written (Studio size must reflect reality) and the
851
+ // epoch always advances via the real insert; only the bucket:entered emission
852
+ // is gated by the entryLimit policy — mirrors the real-time join path so the
853
+ // cron-discovered join cannot bypass entryLimit (Section 6.3).
854
+ if (await shouldEmitJoin({ db, bucket, userId, priorCount })) {
855
+ await emitBucketTransition({
856
+ db,
857
+ registry: journeyRegistry,
858
+ hatchet,
859
+ logger,
860
+ kind: "entered",
861
+ bucket,
862
+ userId,
863
+ userEmail,
864
+ epoch,
865
+ source: "reconcile",
866
+ });
867
+ } else {
868
+ logger.info("Bucket join emit suppressed by entryLimit policy", {
869
+ bucketId: bucket.id,
870
+ userId,
871
+ entryLimit: bucket.entryLimit ?? "unlimited",
872
+ });
873
+ }
689
874
 
690
875
  return true;
691
876
  }
692
877
 
693
- /** now + within for time-based / fastExpiry buckets; null otherwise. */
694
- function computeReconcileExpiresAt(bucket: BucketMeta): Date | null {
695
- if (!bucket.criteria) return null;
696
- if (!bucket.timeBased && !bucket.fastExpiry) return null;
697
- const within = firstWithin(bucket.criteria);
698
- if (!within) return null;
699
- return new Date(Date.now() + durationToMs(within));
700
- }
701
-
702
878
  /** A bucket is time-based if flagged OR its criteria carry a `within` window. */
703
879
  function isTimeBased(bucket: BucketMeta): boolean {
704
880
  if (bucket.timeBased) return true;
@@ -706,16 +882,129 @@ function isTimeBased(bucket: BucketMeta): boolean {
706
882
  return firstWithin(bucket.criteria) != null;
707
883
  }
708
884
 
709
- /** Find the first EventCondition.within in a criteria tree (depth-first). */
710
- function firstWithin(criteria: ConditionEval): DurationObject | null {
711
- if (criteria.type === "event" && criteria.within) {
712
- return criteria.within;
885
+ /**
886
+ * Resolve the JOIN-reconciliation decision for a bucket (tri-state on
887
+ * `reconcileJoins`):
888
+ * - `false` → hard OFF (explicit cost-bounding override; the absence join is
889
+ * skipped even for an absence-shaped bucket).
890
+ * - `true` → explicit ON (unchanged 0.2.0 opt-in behavior).
891
+ * - `undefined` → INFERRED, but ONLY for the two SAFE set-based shapes the cron
892
+ * can JOIN with an EXACT SQL candidate set (every returned row a true matcher,
893
+ * so no per-member confirm → no starvation, Fix #3):
894
+ * (a) a single-event windowed `not_exists` criterion, and
895
+ * (b) the lapsed-active composite `all(event(X).exists(),
896
+ * event(X).within(W).not_exists())` (see {@link isLapsedActiveComposite}).
897
+ * ANY OTHER absence-containing composite (an OR of absence legs, or absence
898
+ * mixed with extra property/count legs) is NOT auto-inferred — its candidate
899
+ * set is a non-exact superset that needs a per-member confirm, which is
900
+ * BATCH_SIZE-bounded per tick and can starve, so it requires an explicit
901
+ * `reconcileJoins: true` opt-in. Non-absence time-based buckets still skip the
902
+ * join scan (their joins are caught real-time).
903
+ */
904
+ function shouldReconcileJoins(bucket: BucketMeta): boolean {
905
+ if (bucket.reconcileJoins === false) return false;
906
+ if (bucket.reconcileJoins === true) return true;
907
+ if (!bucket.criteria) return false;
908
+ return isSafeAbsenceShape(bucket.criteria);
909
+ }
910
+
911
+ /**
912
+ * The two SAFE absence shapes whose cron-JOIN candidate set is EXACT in SQL
913
+ * alone — the only shapes the engine AUTO-INFERS `reconcileJoins` on (Fix #3):
914
+ * (a) a single-event windowed `not_exists` criterion, and
915
+ * (b) the lapsed-active composite (see {@link isLapsedActiveComposite}).
916
+ * Every other absence-containing composite is a non-exact superset and must opt
917
+ * in explicitly (the per-member confirm path is BATCH_SIZE-bounded per tick).
918
+ */
919
+ function isSafeAbsenceShape(criteria: ConditionEval): boolean {
920
+ if (
921
+ criteria.type === "event" &&
922
+ criteria.check === "not_exists" &&
923
+ criteria.within != null
924
+ ) {
925
+ return true;
713
926
  }
714
- if (criteria.type === "composite") {
715
- for (const child of criteria.conditions) {
716
- const found = firstWithin(child);
717
- if (found) return found;
927
+ return isLapsedActiveComposite(criteria) != null;
928
+ }
929
+
930
+ /** The recognized lapsed-active composite (shape (b)): event + window cutoff. */
931
+ interface LapsedActiveShape {
932
+ event: string;
933
+ /** now - within for the not_exists leg's window. */
934
+ cutoff: Date;
935
+ }
936
+
937
+ /**
938
+ * Recognize shape (b) — the flagship "went-dormant" composite — and return its
939
+ * (event, window cutoff), else null. It is a composite AND of EXACTLY two legs on
940
+ * the SAME event X: an `event(X).exists()` ever-fired anchor (no window) and an
941
+ * `event(X).within(W).not_exists()` windowed-absence leg. Because the candidate
942
+ * SQL (ever-fired X, MINUS present-in-X's-window, MINUS active members) satisfies
943
+ * BOTH legs of the AND for every returned row, the set is EXACT — no per-member
944
+ * `evaluateCondition` is needed and the page advances naturally (matchers become
945
+ * active members → excluded next tick), so it cannot starve (Fix #3).
946
+ */
947
+ function isLapsedActiveComposite(
948
+ criteria: ConditionEval,
949
+ ): LapsedActiveShape | null {
950
+ if (
951
+ criteria.type !== "composite" ||
952
+ criteria.operator !== "and" ||
953
+ criteria.conditions.length !== 2
954
+ ) {
955
+ return null;
956
+ }
957
+
958
+ const existsLeg = criteria.conditions.find(
959
+ (c) => c.type === "event" && c.check === "exists" && c.within == null,
960
+ );
961
+ const notExistsLeg = criteria.conditions.find(
962
+ (c) => c.type === "event" && c.check === "not_exists" && c.within != null,
963
+ );
964
+ if (
965
+ existsLeg?.type !== "event" ||
966
+ notExistsLeg?.type !== "event" ||
967
+ existsLeg.eventName !== notExistsLeg.eventName ||
968
+ notExistsLeg.within == null
969
+ ) {
970
+ return null;
971
+ }
972
+
973
+ return {
974
+ event: notExistsLeg.eventName,
975
+ cutoff: new Date(Date.now() - durationToMs(notExistsLeg.within)),
976
+ };
977
+ }
978
+
979
+ /** One windowed `not_exists` leg: the event + its window cutoff instant. */
980
+ interface AbsenceLeg {
981
+ event: string;
982
+ /** now - within for the leg's window; null only if within is somehow unset. */
983
+ cutoff: Date | null;
984
+ }
985
+
986
+ /**
987
+ * Every windowed `not_exists` leg in a criteria tree (depth-first) — "stopped
988
+ * doing X in the last N", the only shapes a clock can materialize a JOIN for. An
989
+ * UNBOUNDED not_exists (no window) is degenerate and not auto-joinable (the
990
+ * schema already rejects pure-unbounded-negation buckets), so it is skipped.
991
+ * Collecting ALL legs (not just the first) keeps an OR-of-absence composite from
992
+ * silently dropping users who only ever fired one of the legs.
993
+ */
994
+ function collectAbsenceLegs(criteria: ConditionEval): AbsenceLeg[] {
995
+ if (criteria.type === "event") {
996
+ if (criteria.check === "not_exists" && criteria.within != null) {
997
+ return [
998
+ {
999
+ event: criteria.eventName,
1000
+ cutoff: new Date(Date.now() - durationToMs(criteria.within)),
1001
+ },
1002
+ ];
718
1003
  }
1004
+ return [];
1005
+ }
1006
+ if (criteria.type === "composite") {
1007
+ return criteria.conditions.flatMap(collectAbsenceLegs);
719
1008
  }
720
- return null;
1009
+ return [];
721
1010
  }