@hogsend/engine 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/buckets/bucket-access.ts +213 -0
- package/src/buckets/bucket-reactions.ts +225 -0
- package/src/buckets/check-membership.ts +35 -15
- package/src/buckets/define-bucket.ts +79 -8
- package/src/buckets/registry.ts +81 -0
- package/src/container.ts +69 -4
- package/src/env.ts +4 -0
- package/src/index.ts +27 -0
- package/src/journeys/journey-context.ts +5 -1
- package/src/lib/boot.ts +12 -2
- package/src/lib/bucket-emit.ts +49 -7
- package/src/lib/contacts.ts +1083 -18
- package/src/lib/email-service-types.ts +8 -0
- package/src/lib/ingestion.ts +63 -33
- package/src/lib/mailer.ts +1 -0
- package/src/lib/preferences.ts +106 -0
- package/src/lib/tracked.ts +159 -34
- package/src/lib/tracking-events.ts +1 -1
- package/src/lists/define-list.ts +81 -0
- package/src/lists/registry-singleton.ts +39 -0
- package/src/lists/registry.ts +95 -0
- package/src/middleware/api-key.ts +33 -7
- package/src/middleware/rate-limit.ts +73 -49
- package/src/routes/_shared.ts +30 -0
- package/src/routes/admin/api-keys.ts +1 -1
- package/src/routes/admin/buckets.ts +39 -9
- package/src/routes/admin/bulk.ts +7 -3
- package/src/routes/admin/contacts.ts +66 -57
- package/src/routes/admin/events.ts +65 -0
- package/src/routes/admin/journeys.ts +3 -1
- package/src/routes/admin/preferences.ts +2 -2
- package/src/routes/admin/reporting.ts +3 -3
- package/src/routes/admin/timeline.ts +5 -2
- package/src/routes/campaigns/index.ts +252 -0
- package/src/routes/contacts/index.ts +188 -0
- package/src/routes/email/preferences.ts +27 -3
- package/src/routes/email/unsubscribe.ts +7 -49
- package/src/routes/emails/index.ts +133 -0
- package/src/routes/events/index.ts +119 -0
- package/src/routes/index.ts +52 -2
- package/src/routes/lists/index.ts +222 -0
- package/src/worker.ts +25 -2
- package/src/workflows/bucket-backfill.ts +122 -22
- package/src/workflows/bucket-reconcile.ts +225 -12
- package/src/workflows/import-contacts.ts +28 -20
- package/src/workflows/send-campaign.ts +589 -0
- package/src/routes/ingest.ts +0 -71
package/src/worker.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import type { DefinedBucket } from "./buckets/define-bucket.js";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
selectBucketReactionTasks,
|
|
4
|
+
selectBucketTasks,
|
|
5
|
+
} from "./buckets/registry.js";
|
|
3
6
|
import type { HogsendClient } from "./container.js";
|
|
4
7
|
import type { DefinedJourney } from "./journeys/define-journey.js";
|
|
5
8
|
import { selectJourneyTasks } from "./journeys/registry.js";
|
|
@@ -14,6 +17,10 @@ import {
|
|
|
14
17
|
import { bucketReconcileTask } from "./workflows/bucket-reconcile.js";
|
|
15
18
|
import { checkAlertsTask } from "./workflows/check-alerts.js";
|
|
16
19
|
import { importContactsTask } from "./workflows/import-contacts.js";
|
|
20
|
+
import {
|
|
21
|
+
reapStuckCampaignsTask,
|
|
22
|
+
sendCampaignTask,
|
|
23
|
+
} from "./workflows/send-campaign.js";
|
|
17
24
|
import { sendEmailTask } from "./workflows/send-email.js";
|
|
18
25
|
|
|
19
26
|
export interface CreateWorkerOptions {
|
|
@@ -46,15 +53,27 @@ export function createWorker(opts: CreateWorkerOptions): Worker {
|
|
|
46
53
|
// reconcile cron (bucketReconcileTask) is ALWAYS registered in baseWorkflows
|
|
47
54
|
// below (Section 10), regardless of fastExpiry.
|
|
48
55
|
const bucketTasks = selectBucketTasks(opts.buckets ?? [], enabledBuckets);
|
|
56
|
+
// Reaction journeys generated by `bucket.on()` desugar to real durable tasks.
|
|
57
|
+
// They are bucket-owned, so they are gated by ENABLED_BUCKETS (NOT
|
|
58
|
+
// ENABLED_JOURNEYS) and wired directly here rather than via the journeys[]
|
|
59
|
+
// array (Section 9). Throws loudly on a reaction-id collision.
|
|
60
|
+
const bucketReactionTasks = selectBucketReactionTasks(
|
|
61
|
+
opts.buckets ?? [],
|
|
62
|
+
enabledBuckets,
|
|
63
|
+
journeys.map((j) => j.meta.id),
|
|
64
|
+
);
|
|
49
65
|
|
|
50
66
|
const baseWorkflows = [
|
|
51
67
|
sendEmailTask,
|
|
52
68
|
importContactsTask,
|
|
69
|
+
sendCampaignTask,
|
|
70
|
+
reapStuckCampaignsTask,
|
|
53
71
|
checkAlertsTask,
|
|
54
72
|
bucketReconcileTask,
|
|
55
73
|
bucketBackfillTask,
|
|
56
74
|
...journeyTasks,
|
|
57
75
|
...bucketTasks,
|
|
76
|
+
...bucketReactionTasks,
|
|
58
77
|
];
|
|
59
78
|
const workflows = [
|
|
60
79
|
...baseWorkflows,
|
|
@@ -94,8 +113,12 @@ export function createWorker(opts: CreateWorkerOptions): Worker {
|
|
|
94
113
|
client: container,
|
|
95
114
|
journeyTasks: journeyTasks.length,
|
|
96
115
|
bucketTasks: bucketTasks.length,
|
|
116
|
+
bucketReactionTasks: bucketReactionTasks.length,
|
|
97
117
|
builtinTasks:
|
|
98
|
-
baseWorkflows.length -
|
|
118
|
+
baseWorkflows.length -
|
|
119
|
+
journeyTasks.length -
|
|
120
|
+
bucketTasks.length -
|
|
121
|
+
bucketReactionTasks.length,
|
|
99
122
|
});
|
|
100
123
|
|
|
101
124
|
// Publish liveness so the API + Studio can show "worker connected"
|
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
importJobs,
|
|
16
16
|
userEvents,
|
|
17
17
|
} from "@hogsend/db";
|
|
18
|
-
import { and, eq, gt, gte, inArray, isNull, sql } from "drizzle-orm";
|
|
18
|
+
import { and, eq, gt, gte, inArray, isNull, max, sql } from "drizzle-orm";
|
|
19
19
|
import {
|
|
20
20
|
computeExpiresAt,
|
|
21
21
|
computeMaxDwellAt,
|
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
|
|
25
25
|
import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
|
|
26
26
|
import { emitBucketTransition } from "../lib/bucket-emit.js";
|
|
27
|
+
import { contactKeySql } from "../lib/contacts.js";
|
|
27
28
|
import { hatchet } from "../lib/hatchet.js";
|
|
28
29
|
import type { Logger } from "../lib/logger.js";
|
|
29
30
|
import { createLogger } from "../lib/logger.js";
|
|
@@ -209,6 +210,18 @@ async function backfillJoins(opts: {
|
|
|
209
210
|
// unset value would never be force-left.
|
|
210
211
|
const maxDwellAt = computeMaxDwellAt(bucket);
|
|
211
212
|
|
|
213
|
+
// Historical dwell anchor (Section 6.3 / LOCKED DECISION 1). For a
|
|
214
|
+
// windowed/event criterion the anchor is `max(occurredAt)` of the qualifying
|
|
215
|
+
// event = "when they became dormant" (e.g. went-dormant = the last
|
|
216
|
+
// `app_opened`). The dwell gate reads `coalesce(dwellAnchorAt, enteredAt)`, so
|
|
217
|
+
// backfilled members start the dwell clock at their real historical instant
|
|
218
|
+
// rather than the deploy-time `enteredAt`. Shapes with no cheap per-matcher
|
|
219
|
+
// timestamp leave the anchor NULL (fall back to enteredAt). The live join path
|
|
220
|
+
// (handleJoin) never sets dwellAnchorAt, so post-deploy joins clock from their
|
|
221
|
+
// real enteredAt. Computed batched per chunk (one GROUP BY max(occurredAt),
|
|
222
|
+
// mirroring the priorCounts GROUP BY) — never per-user serial queries.
|
|
223
|
+
const anchorEvent = resolveDwellAnchorEvent(criteria);
|
|
224
|
+
|
|
212
225
|
// Fix C (DEFERRED): backfilled fastExpiry rows are NOT armed with a
|
|
213
226
|
// bucket:arm-expiry durable timer here — they are picked up by the next cron
|
|
214
227
|
// sweep instead (reconcileBucketLeaves / reconcileBucketTtlLeaves are the
|
|
@@ -219,16 +232,20 @@ async function backfillJoins(opts: {
|
|
|
219
232
|
for (let i = 0; i < matcherIds.length; i += BATCH_SIZE) {
|
|
220
233
|
const chunk = matcherIds.slice(i, i + BATCH_SIZE);
|
|
221
234
|
|
|
222
|
-
// userEmail backfilled from the contacts row where available.
|
|
235
|
+
// userEmail backfilled from the contacts row where available. The chunk
|
|
236
|
+
// holds the RESOLVED key (coalesce(external_id, anonymous_id, id)) — for an
|
|
237
|
+
// email-only / anonymous contact that is the anonymous_id or the uuid id, NOT
|
|
238
|
+
// the (null) external_id. Looking up by `contacts.externalId` would miss
|
|
239
|
+
// those rows and write a NULL userEmail despite the contact having an email,
|
|
240
|
+
// so we key the lookup + the map by the SAME coalesce expression the chunk
|
|
241
|
+
// carries (matches reconcileBucketJoins, which reads userId + email off one
|
|
242
|
+
// contacts row).
|
|
243
|
+
const resolvedKey = contactKeySql();
|
|
223
244
|
const chunkContacts = await db
|
|
224
|
-
.select({
|
|
245
|
+
.select({ userKey: resolvedKey, email: contacts.email })
|
|
225
246
|
.from(contacts)
|
|
226
|
-
.where(
|
|
227
|
-
|
|
228
|
-
);
|
|
229
|
-
const emailByUser = new Map(
|
|
230
|
-
chunkContacts.map((c) => [c.externalId, c.email]),
|
|
231
|
-
);
|
|
247
|
+
.where(and(inArray(resolvedKey, chunk), isNull(contacts.deletedAt)));
|
|
248
|
+
const emailByUser = new Map(chunkContacts.map((c) => [c.userKey, c.email]));
|
|
232
249
|
|
|
233
250
|
// Fix A: entryCount = 1 + prior memberships for each (user, bucket), the
|
|
234
251
|
// same monotonic ordinal the live join computes (check-membership.ts). On a
|
|
@@ -253,6 +270,35 @@ async function backfillJoins(opts: {
|
|
|
253
270
|
priorCounts.map((r) => [r.userId, Number(r.cnt)]),
|
|
254
271
|
);
|
|
255
272
|
|
|
273
|
+
// Batched dwell-anchor derivation (LOCKED DECISION 1): one GROUP BY
|
|
274
|
+
// max(occurredAt) over the qualifying event for THIS chunk, mirroring the
|
|
275
|
+
// priorCounts GROUP BY above (never per-user serial queries). Only computed
|
|
276
|
+
// when the criteria shape exposes a cheap per-matcher anchor event; an empty
|
|
277
|
+
// map leaves dwellAnchorAt NULL → the dwell gate falls back to enteredAt.
|
|
278
|
+
let anchorByUser = new Map<string, Date>();
|
|
279
|
+
if (anchorEvent != null) {
|
|
280
|
+
const anchors = await db
|
|
281
|
+
.select({
|
|
282
|
+
userId: userEvents.userId,
|
|
283
|
+
lastAt: max(userEvents.occurredAt),
|
|
284
|
+
})
|
|
285
|
+
.from(userEvents)
|
|
286
|
+
.where(
|
|
287
|
+
and(
|
|
288
|
+
eq(userEvents.event, anchorEvent),
|
|
289
|
+
inArray(userEvents.userId, chunk),
|
|
290
|
+
),
|
|
291
|
+
)
|
|
292
|
+
.groupBy(userEvents.userId);
|
|
293
|
+
anchorByUser = new Map(
|
|
294
|
+
anchors
|
|
295
|
+
.filter(
|
|
296
|
+
(r): r is { userId: string; lastAt: Date } => r.lastAt != null,
|
|
297
|
+
)
|
|
298
|
+
.map((r) => [r.userId, r.lastAt]),
|
|
299
|
+
);
|
|
300
|
+
}
|
|
301
|
+
|
|
256
302
|
const rows = chunk.map((userId) => ({
|
|
257
303
|
userId,
|
|
258
304
|
userEmail: emailByUser.get(userId) ?? null,
|
|
@@ -262,6 +308,8 @@ async function backfillJoins(opts: {
|
|
|
262
308
|
entryCount: 1 + (priorByUser.get(userId) ?? 0),
|
|
263
309
|
expiresAt: computeExpiresAt(bucket),
|
|
264
310
|
maxDwellAt,
|
|
311
|
+
// Historical dwell anchor where derivable; NULL otherwise (→ enteredAt).
|
|
312
|
+
dwellAnchorAt: anchorByUser.get(userId) ?? null,
|
|
265
313
|
lastEvaluatedAt: new Date(),
|
|
266
314
|
}));
|
|
267
315
|
|
|
@@ -365,6 +413,7 @@ async function reevalLeaves(opts: {
|
|
|
365
413
|
userEmail: row.userEmail,
|
|
366
414
|
epoch: row.entryCount,
|
|
367
415
|
source: "backfill",
|
|
416
|
+
reason: "criteria",
|
|
368
417
|
});
|
|
369
418
|
}
|
|
370
419
|
leftCount += flipped.length;
|
|
@@ -412,7 +461,9 @@ async function selectEventMatchers(
|
|
|
412
461
|
.as("present");
|
|
413
462
|
|
|
414
463
|
const rows = await db
|
|
415
|
-
.select({
|
|
464
|
+
.select({
|
|
465
|
+
userId: contactKeySql(),
|
|
466
|
+
})
|
|
416
467
|
.from(contacts)
|
|
417
468
|
.innerJoin(everFired, eq(everFired.userId, contacts.externalId))
|
|
418
469
|
.leftJoin(present, eq(present.userId, contacts.externalId))
|
|
@@ -452,12 +503,15 @@ async function selectEventMatchers(
|
|
|
452
503
|
* a per-contact `evaluateCondition` loop over live contacts. Property
|
|
453
504
|
* sub-conditions evaluate against the contact's merged properties.
|
|
454
505
|
*
|
|
455
|
-
* KEYSET PAGINATION by `contacts.
|
|
456
|
-
*
|
|
457
|
-
*
|
|
458
|
-
*
|
|
459
|
-
*
|
|
460
|
-
*
|
|
506
|
+
* KEYSET PAGINATION by `contacts.id` in BATCH_SIZE pages: each page selects
|
|
507
|
+
* `WHERE id > :cursor ORDER BY id ASC LIMIT BATCH_SIZE`, evaluates the criteria
|
|
508
|
+
* per contact, then advances the cursor to the last `id` of the page — repeating
|
|
509
|
+
* until a short page ends the scan. The whole contacts table is never held in
|
|
510
|
+
* memory at once. Paging on `id` (the non-null unique PK) — NOT `external_id`,
|
|
511
|
+
* which is nullable (email-only / anonymous contacts) and would drop every
|
|
512
|
+
* null-external_id row and order NULLs unstably. (reconcileBucketJoins is not a
|
|
513
|
+
* keyset scan — it relies on matchers dropping out as they become active
|
|
514
|
+
* members — so this no longer mirrors it.)
|
|
461
515
|
*/
|
|
462
516
|
async function selectCompositeMatchers(
|
|
463
517
|
db: Database,
|
|
@@ -469,17 +523,18 @@ async function selectCompositeMatchers(
|
|
|
469
523
|
for (;;) {
|
|
470
524
|
const page = await db
|
|
471
525
|
.select({
|
|
472
|
-
|
|
526
|
+
id: contacts.id,
|
|
527
|
+
userId: contactKeySql(),
|
|
473
528
|
properties: contacts.properties,
|
|
474
529
|
})
|
|
475
530
|
.from(contacts)
|
|
476
531
|
.where(
|
|
477
532
|
and(
|
|
478
533
|
isNull(contacts.deletedAt),
|
|
479
|
-
cursor != null ? gt(contacts.
|
|
534
|
+
cursor != null ? gt(contacts.id, cursor) : undefined,
|
|
480
535
|
),
|
|
481
536
|
)
|
|
482
|
-
.orderBy(sql`${contacts.
|
|
537
|
+
.orderBy(sql`${contacts.id} asc`)
|
|
483
538
|
.limit(BATCH_SIZE);
|
|
484
539
|
|
|
485
540
|
for (const contact of page) {
|
|
@@ -487,23 +542,68 @@ async function selectCompositeMatchers(
|
|
|
487
542
|
condition: criteria,
|
|
488
543
|
ctx: {
|
|
489
544
|
db,
|
|
490
|
-
userId: contact.
|
|
545
|
+
userId: contact.userId,
|
|
491
546
|
journeyContext:
|
|
492
547
|
(contact.properties as Record<string, unknown> | null) ?? {},
|
|
493
548
|
},
|
|
494
549
|
});
|
|
495
|
-
if (isMember) matchers.push(contact.
|
|
550
|
+
if (isMember) matchers.push(contact.userId);
|
|
496
551
|
}
|
|
497
552
|
|
|
498
553
|
// A short page (fewer than a full batch) means the scan is exhausted.
|
|
499
554
|
if (page.length < BATCH_SIZE) break;
|
|
500
|
-
cursor = page[page.length - 1]?.
|
|
555
|
+
cursor = page[page.length - 1]?.id ?? null;
|
|
501
556
|
if (cursor == null) break;
|
|
502
557
|
}
|
|
503
558
|
|
|
504
559
|
return matchers;
|
|
505
560
|
}
|
|
506
561
|
|
|
562
|
+
/**
|
|
563
|
+
* Resolve the event whose `max(occurredAt)` is the historical dwell anchor for a
|
|
564
|
+
* backfilled member (LOCKED DECISION 1 / Section 6.3) — "when they became
|
|
565
|
+
* dormant". Returns an event name only for the windowed/event shapes that expose
|
|
566
|
+
* a cheap per-matcher timestamp; `null` for everything else (the anchor stays
|
|
567
|
+
* NULL and the dwell gate falls back to `enteredAt`):
|
|
568
|
+
*
|
|
569
|
+
* - a single windowed `event` criterion → its `eventName` (the last qualifying
|
|
570
|
+
* occurrence is the window boundary, e.g. the last `app_opened`).
|
|
571
|
+
* - the lapsed-active composite `all(event(X).exists(),
|
|
572
|
+
* event(X).within(W).not_exists())` → event X (the flagship went-dormant
|
|
573
|
+
* shape; the last X is when they lapsed).
|
|
574
|
+
*
|
|
575
|
+
* Other shapes (property/count composites, OR-of-absence, multi-event) have no
|
|
576
|
+
* single cheap per-matcher timestamp, so they keep a NULL anchor.
|
|
577
|
+
*/
|
|
578
|
+
function resolveDwellAnchorEvent(criteria: ConditionEval): string | null {
|
|
579
|
+
if (criteria.type === "event") {
|
|
580
|
+
return criteria.within != null ? criteria.eventName : null;
|
|
581
|
+
}
|
|
582
|
+
// Lapsed-active composite — two legs on the SAME event X: an unwindowed
|
|
583
|
+
// exists() anchor and a windowed not_exists() leg. Mirrors
|
|
584
|
+
// isLapsedActiveComposite in bucket-reconcile.ts.
|
|
585
|
+
if (
|
|
586
|
+
criteria.type === "composite" &&
|
|
587
|
+
criteria.operator === "and" &&
|
|
588
|
+
criteria.conditions.length === 2
|
|
589
|
+
) {
|
|
590
|
+
const existsLeg = criteria.conditions.find(
|
|
591
|
+
(c) => c.type === "event" && c.check === "exists" && c.within == null,
|
|
592
|
+
);
|
|
593
|
+
const notExistsLeg = criteria.conditions.find(
|
|
594
|
+
(c) => c.type === "event" && c.check === "not_exists" && c.within != null,
|
|
595
|
+
);
|
|
596
|
+
if (
|
|
597
|
+
existsLeg?.type === "event" &&
|
|
598
|
+
notExistsLeg?.type === "event" &&
|
|
599
|
+
existsLeg.eventName === notExistsLeg.eventName
|
|
600
|
+
) {
|
|
601
|
+
return notExistsLeg.eventName;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
return null;
|
|
605
|
+
}
|
|
606
|
+
|
|
507
607
|
/**
|
|
508
608
|
* Upsert the bucket's current criteria fingerprint onto `bucket_configs` (Section
|
|
509
609
|
* 6.6 B). Mirrors the admin enable/disable onConflictDoUpdate target.
|
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
durationToMs,
|
|
8
8
|
evaluateCondition,
|
|
9
9
|
} from "@hogsend/core";
|
|
10
|
+
import type { JourneyMeta } from "@hogsend/core/types";
|
|
10
11
|
import {
|
|
11
12
|
bucketConfigs,
|
|
12
13
|
bucketMemberships,
|
|
@@ -27,6 +28,7 @@ import {
|
|
|
27
28
|
or,
|
|
28
29
|
sql,
|
|
29
30
|
} from "drizzle-orm";
|
|
31
|
+
import type { BucketLeaveReason } from "../buckets/bucket-reactions.js";
|
|
30
32
|
import { shouldEmitJoin } from "../buckets/check-membership.js";
|
|
31
33
|
import {
|
|
32
34
|
BUCKET_EVENT_PREFIX,
|
|
@@ -39,6 +41,7 @@ import {
|
|
|
39
41
|
import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
|
|
40
42
|
import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
|
|
41
43
|
import { emitBucketTransition } from "../lib/bucket-emit.js";
|
|
44
|
+
import { contactKeySql } from "../lib/contacts.js";
|
|
42
45
|
import { hatchet } from "../lib/hatchet.js";
|
|
43
46
|
import type { Logger } from "../lib/logger.js";
|
|
44
47
|
import { createLogger } from "../lib/logger.js";
|
|
@@ -97,12 +100,21 @@ export const bucketReconcileTask = hatchet.task({
|
|
|
97
100
|
// kind:"manual" buckets are NEVER auto-recomputed (early-continue).
|
|
98
101
|
if (bucket.kind === "manual" || !bucket.criteria) continue;
|
|
99
102
|
|
|
100
|
-
// Process a bucket here iff a clock can flip its membership
|
|
101
|
-
// criteria window (criteria-driven
|
|
102
|
-
// `maxDwell` TTL (membership-age-driven
|
|
103
|
-
//
|
|
103
|
+
// Process a bucket here iff a clock can flip its membership OR fire a
|
|
104
|
+
// membership-age dwell: a TIME-BASED criteria window (criteria-driven
|
|
105
|
+
// leaves/joins), an unconditional `maxDwell` TTL (membership-age-driven
|
|
106
|
+
// leaves), OR a `dwell` reaction (membership-age-driven fire). timeBased is
|
|
107
|
+
// honoured explicitly OR inferred from a `within` window. The dwell-only
|
|
108
|
+
// bucket falls through and runs ONLY the dwell pass (the criteria pass is
|
|
109
|
+
// behind `if (timeBased)`, the TTL pass behind `if (bucket.maxDwell)`).
|
|
104
110
|
const timeBased = isTimeBased(bucket);
|
|
105
|
-
|
|
111
|
+
const dwellReactions = journeyRegistry
|
|
112
|
+
.getAll()
|
|
113
|
+
.filter(
|
|
114
|
+
(j) => j.sourceBucketId === bucket.id && j.reactionKind === "dwell",
|
|
115
|
+
);
|
|
116
|
+
const hasDwell = dwellReactions.length > 0;
|
|
117
|
+
if (!timeBased && !bucket.maxDwell && !hasDwell) continue;
|
|
106
118
|
|
|
107
119
|
try {
|
|
108
120
|
if (timeBased) {
|
|
@@ -145,6 +157,21 @@ export const bucketReconcileTask = hatchet.task({
|
|
|
145
157
|
bucket,
|
|
146
158
|
});
|
|
147
159
|
}
|
|
160
|
+
|
|
161
|
+
// Dwell pass — runs AFTER the TTL pass (ordering is load-bearing: a
|
|
162
|
+
// member force-left by maxDwell earlier this iteration is status='left'
|
|
163
|
+
// here, so the dwell scan's status='active' filter excludes it). Fires
|
|
164
|
+
// `bucket:dwell:<id>:<label>` over the continuously-dwelling active
|
|
165
|
+
// population at cron resolution (Section 6.4–6.6).
|
|
166
|
+
if (hasDwell) {
|
|
167
|
+
reconciled += await reconcileBucketDwell({
|
|
168
|
+
db,
|
|
169
|
+
logger,
|
|
170
|
+
journeyRegistry,
|
|
171
|
+
bucket,
|
|
172
|
+
dwellReactions,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
148
175
|
} catch (err) {
|
|
149
176
|
logger.error("Bucket reconcile failed", {
|
|
150
177
|
bucketId: bucket.id,
|
|
@@ -255,6 +282,8 @@ export const bucketExpiryTask = hatchet.durableTask({
|
|
|
255
282
|
userEmail: input.userEmail,
|
|
256
283
|
epoch: flipped.entryCount,
|
|
257
284
|
source: "reconcile",
|
|
285
|
+
// Fast-expiry is a criteria re-confirm leave (Section 6.7).
|
|
286
|
+
reason: "criteria",
|
|
258
287
|
});
|
|
259
288
|
|
|
260
289
|
return { status: "left", rowId: flipped.id };
|
|
@@ -287,6 +316,7 @@ async function reconcileBucketLeaves(opts: {
|
|
|
287
316
|
journeyRegistry,
|
|
288
317
|
bucket,
|
|
289
318
|
userIds: leaverIds,
|
|
319
|
+
reason: "criteria",
|
|
290
320
|
});
|
|
291
321
|
}
|
|
292
322
|
|
|
@@ -421,7 +451,14 @@ async function reconcileCompositeLeaves(opts: {
|
|
|
421
451
|
}
|
|
422
452
|
|
|
423
453
|
if (leaverIds.length === 0) return 0;
|
|
424
|
-
return bulkLeave({
|
|
454
|
+
return bulkLeave({
|
|
455
|
+
db,
|
|
456
|
+
logger,
|
|
457
|
+
journeyRegistry,
|
|
458
|
+
bucket,
|
|
459
|
+
userIds: leaverIds,
|
|
460
|
+
reason: "criteria",
|
|
461
|
+
});
|
|
425
462
|
}
|
|
426
463
|
|
|
427
464
|
/**
|
|
@@ -462,6 +499,7 @@ async function reconcileBucketTtlLeaves(opts: {
|
|
|
462
499
|
journeyRegistry,
|
|
463
500
|
bucket,
|
|
464
501
|
userIds: expired.map((r) => r.userId),
|
|
502
|
+
reason: "maxDwell",
|
|
465
503
|
});
|
|
466
504
|
}
|
|
467
505
|
|
|
@@ -478,8 +516,10 @@ async function bulkLeave(opts: {
|
|
|
478
516
|
journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
|
|
479
517
|
bucket: BucketMeta;
|
|
480
518
|
userIds: string[];
|
|
519
|
+
/** Why these members leave — TTL passes "maxDwell", criteria passes "criteria". */
|
|
520
|
+
reason: BucketLeaveReason;
|
|
481
521
|
}): Promise<number> {
|
|
482
|
-
const { db, logger, journeyRegistry, bucket, userIds } = opts;
|
|
522
|
+
const { db, logger, journeyRegistry, bucket, userIds, reason } = opts;
|
|
483
523
|
|
|
484
524
|
const dwellMs = bucket.minDwell ? durationToMs(bucket.minDwell) : 0;
|
|
485
525
|
const dwellCutoff = dwellMs > 0 ? new Date(Date.now() - dwellMs) : null;
|
|
@@ -524,12 +564,171 @@ async function bulkLeave(opts: {
|
|
|
524
564
|
userEmail: row.userEmail,
|
|
525
565
|
epoch: row.entryCount,
|
|
526
566
|
source: "reconcile",
|
|
567
|
+
reason,
|
|
527
568
|
});
|
|
528
569
|
}
|
|
529
570
|
|
|
530
571
|
return flipped.length;
|
|
531
572
|
}
|
|
532
573
|
|
|
574
|
+
/**
|
|
575
|
+
* Dwell pass for one bucket (Section 6.4–6.6). Fires `bucket:dwell:<id>:<label>`
|
|
576
|
+
* over the EXISTING continuously-dwelling active population at cron resolution —
|
|
577
|
+
* its unique value over `on("enter") + ctx.sleep`. Idempotent across sweeps,
|
|
578
|
+
* interoperable with maxDwell/fastExpiry, and routed through
|
|
579
|
+
* `emitBucketTransition` (NOT a raw push) for `userEvents`/exitOn/history/analytics
|
|
580
|
+
* parity (Section 6.1):
|
|
581
|
+
*
|
|
582
|
+
* - PUSH FIRST (at-least-once; the deterministic idempotencyKey + the userEvents
|
|
583
|
+
* dedup absorb a same-sweep retry), THEN stamp `dwellState` (the inter-sweep
|
|
584
|
+
* "already fired this membership" gate). The stamp's `status='active'` clause
|
|
585
|
+
* makes the leave/fastExpiry interop correct (a row flipped to `left` between
|
|
586
|
+
* SELECT and UPDATE no-ops).
|
|
587
|
+
* - The dwell clock is `coalesce(dwellAnchorAt, enteredAt)` — backfilled members
|
|
588
|
+
* use their derived historical anchor (LOCKED DECISION 1), live joins use
|
|
589
|
+
* enteredAt (anchor NULL).
|
|
590
|
+
* - Candidates are ordered `lastEvaluatedAt asc nulls first` (oldest-served-first)
|
|
591
|
+
* so a busy `every` bucket cannot starve members past BATCH_SIZE; the stamp
|
|
592
|
+
* bumps `lastEvaluatedAt`, advancing the cursor. Hitting BATCH_SIZE is logged
|
|
593
|
+
* once per sweep (visibility, not silent).
|
|
594
|
+
* - First-deploy quiet window: reuse `firstTimeBackfillIncomplete` so the
|
|
595
|
+
* pre-existing/backfilled population is not blasted before the first-time
|
|
596
|
+
* backfill has settled.
|
|
597
|
+
*
|
|
598
|
+
* `every` is fires-at-most-once-per-sweep, coalescing (one catch-up fire after a
|
|
599
|
+
* multi-interval outage); `dwellCount` is the deterministic interval ordinal
|
|
600
|
+
* `floor((sweepInstant - anchor) / offsetMs)` (gap-stable, NOT a fire count). For
|
|
601
|
+
* `after` the ordinal is always 1 (one-shot).
|
|
602
|
+
*/
|
|
603
|
+
async function reconcileBucketDwell(opts: {
|
|
604
|
+
db: Database;
|
|
605
|
+
logger: Logger;
|
|
606
|
+
journeyRegistry: ReturnType<typeof getJourneyRegistrySingleton>;
|
|
607
|
+
bucket: BucketMeta;
|
|
608
|
+
dwellReactions: JourneyMeta[];
|
|
609
|
+
}): Promise<number> {
|
|
610
|
+
const { db, logger, journeyRegistry, bucket, dwellReactions } = opts;
|
|
611
|
+
|
|
612
|
+
// First-deploy quiet window: do not blast the pre-existing/backfilled
|
|
613
|
+
// population before the first-time backfill has settled (reuse the guard).
|
|
614
|
+
if (await firstTimeBackfillIncomplete(db, bucket)) return 0;
|
|
615
|
+
|
|
616
|
+
// Captured once per invocation and reused for the ordinal. The ordinal is
|
|
617
|
+
// floor((sweepInstant - anchor) / offsetMs), so it is grid-quantized: a Hatchet
|
|
618
|
+
// retry (a fresh fn() invocation, seconds–minutes later) lands in the SAME
|
|
619
|
+
// interval window and recomputes the SAME ordinal → SAME idempotencyKey →
|
|
620
|
+
// absorbed by the userEvents dedup. `after` is always ordinal 1 (fully stable).
|
|
621
|
+
// Residual edge: an `every` retry that straddles an interval boundary yields a
|
|
622
|
+
// new ordinal and thus one extra dwell fire — bounded to a single duplicate by
|
|
623
|
+
// the key, and only possible for sub-retry-window intervals. Documented, not
|
|
624
|
+
// load-bearing for the common (hours/days) intervals.
|
|
625
|
+
const sweepInstant = Date.now();
|
|
626
|
+
let fired = 0;
|
|
627
|
+
|
|
628
|
+
for (const reaction of dwellReactions) {
|
|
629
|
+
const schedule = reaction.dwellSchedule;
|
|
630
|
+
if (!schedule) continue;
|
|
631
|
+
const { label, after, every } = schedule;
|
|
632
|
+
const offsetMs = after ?? every;
|
|
633
|
+
if (offsetMs == null) continue;
|
|
634
|
+
const cutoff = new Date(sweepInstant - offsetMs);
|
|
635
|
+
|
|
636
|
+
// Continuous-member gate. coalesce(dwellAnchorAt, enteredAt) is the dwell
|
|
637
|
+
// clock. Oldest-served-first (Section 6.5).
|
|
638
|
+
const candidates = await db
|
|
639
|
+
.select({
|
|
640
|
+
id: bucketMemberships.id,
|
|
641
|
+
userId: bucketMemberships.userId,
|
|
642
|
+
userEmail: bucketMemberships.userEmail,
|
|
643
|
+
entryCount: bucketMemberships.entryCount,
|
|
644
|
+
anchor: sql<Date>`coalesce(${bucketMemberships.dwellAnchorAt}, ${bucketMemberships.enteredAt})`,
|
|
645
|
+
dwellState: bucketMemberships.dwellState,
|
|
646
|
+
})
|
|
647
|
+
.from(bucketMemberships)
|
|
648
|
+
.innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
|
|
649
|
+
.where(
|
|
650
|
+
and(
|
|
651
|
+
eq(bucketMemberships.bucketId, bucket.id),
|
|
652
|
+
eq(bucketMemberships.status, "active"),
|
|
653
|
+
isNull(bucketMemberships.deletedAt),
|
|
654
|
+
isNull(contacts.deletedAt),
|
|
655
|
+
// Fold the comparison into the fragment with an explicit cast: a JS
|
|
656
|
+
// Date passed to lte() against a raw sql`coalesce(...)` fragment has no
|
|
657
|
+
// column type to drive param encoding, so the pg driver throws on the
|
|
658
|
+
// Date (and the per-bucket try/catch would silently swallow it → 0
|
|
659
|
+
// dwell fires). Binding the ISO string + ::timestamptz is well-typed.
|
|
660
|
+
sql`coalesce(${bucketMemberships.dwellAnchorAt}, ${bucketMemberships.enteredAt}) <= ${cutoff.toISOString()}::timestamptz`,
|
|
661
|
+
),
|
|
662
|
+
)
|
|
663
|
+
.orderBy(sql`${bucketMemberships.lastEvaluatedAt} asc nulls first`)
|
|
664
|
+
.limit(BATCH_SIZE);
|
|
665
|
+
|
|
666
|
+
if (candidates.length >= BATCH_SIZE) {
|
|
667
|
+
logger.warn("Bucket dwell pass bounded to BATCH_SIZE/tick", {
|
|
668
|
+
bucketId: bucket.id,
|
|
669
|
+
label,
|
|
670
|
+
batchSize: BATCH_SIZE,
|
|
671
|
+
});
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
for (const m of candidates) {
|
|
675
|
+
const state = (m.dwellState ?? {}) as Record<string, string>;
|
|
676
|
+
const lastFired = state[label] ? Date.parse(state[label]) : null;
|
|
677
|
+
const anchorMs = new Date(m.anchor).getTime();
|
|
678
|
+
|
|
679
|
+
if (after != null) {
|
|
680
|
+
// one-shot: already fired for this membership → skip.
|
|
681
|
+
if (lastFired != null) continue;
|
|
682
|
+
} else {
|
|
683
|
+
// every: not yet due since the last fire (or the anchor).
|
|
684
|
+
const since = lastFired ?? anchorMs;
|
|
685
|
+
if (sweepInstant - since < offsetMs) continue;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// Deterministic per (membership, sweepInstant) so a retry recomputes it.
|
|
689
|
+
const ordinal =
|
|
690
|
+
after != null ? 1 : Math.floor((sweepInstant - anchorMs) / offsetMs);
|
|
691
|
+
|
|
692
|
+
// PUSH FIRST (at-least-once; idempotencyKey + userEvents dedup absorb
|
|
693
|
+
// retries), THEN stamp. emitBucketTransition handles the
|
|
694
|
+
// userEvents/exitOn/analytics parity.
|
|
695
|
+
await emitBucketTransition({
|
|
696
|
+
db,
|
|
697
|
+
registry: journeyRegistry,
|
|
698
|
+
hatchet,
|
|
699
|
+
logger,
|
|
700
|
+
kind: "dwell",
|
|
701
|
+
bucket,
|
|
702
|
+
userId: m.userId,
|
|
703
|
+
userEmail: m.userEmail,
|
|
704
|
+
epoch: m.entryCount,
|
|
705
|
+
source: "reconcile",
|
|
706
|
+
dwellLabel: label,
|
|
707
|
+
dwellOrdinal: ordinal,
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
// Stamp the membership (inter-sweep gate). status='active' clause = leave
|
|
711
|
+
// interop (a row flipped to 'left' between SELECT and UPDATE no-ops).
|
|
712
|
+
await db
|
|
713
|
+
.update(bucketMemberships)
|
|
714
|
+
.set({
|
|
715
|
+
dwellState: sql`jsonb_set(coalesce(${bucketMemberships.dwellState}, '{}'::jsonb), ${`{${label}}`}, ${`"${new Date(sweepInstant).toISOString()}"`}::jsonb)`,
|
|
716
|
+
lastEvaluatedAt: new Date(),
|
|
717
|
+
updatedAt: new Date(),
|
|
718
|
+
})
|
|
719
|
+
.where(
|
|
720
|
+
and(
|
|
721
|
+
eq(bucketMemberships.id, m.id),
|
|
722
|
+
eq(bucketMemberships.status, "active"),
|
|
723
|
+
),
|
|
724
|
+
);
|
|
725
|
+
fired += 1;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
return fired;
|
|
730
|
+
}
|
|
731
|
+
|
|
533
732
|
/**
|
|
534
733
|
* reconcileJoins (absence buckets): materialize NEW members the real-time path
|
|
535
734
|
* cannot see — a user who STOPS doing X fires no event, so only the clock can
|
|
@@ -643,18 +842,27 @@ async function reconcileBucketJoins(opts: {
|
|
|
643
842
|
? selectPresentInAllWindows(db, absenceLegs)
|
|
644
843
|
: null;
|
|
645
844
|
|
|
845
|
+
// The membership/event tables key on the RESOLVED string key (external_id ??
|
|
846
|
+
// anonymous_id ?? contact.id), NOT necessarily external_id — email-only /
|
|
847
|
+
// anonymous contacts have a NULL external_id and are keyed on their uuid /
|
|
848
|
+
// anonymous_id. Joining on contacts.externalId would force external_id NOT NULL
|
|
849
|
+
// for every candidate (the coalesce would collapse to external_id) and silently
|
|
850
|
+
// drop exactly the dormant email-only contacts this cron exists to reconcile.
|
|
851
|
+
// Join on the SAME coalesce expression so the projected key matches the join.
|
|
852
|
+
const contactKey = contactKeySql();
|
|
853
|
+
|
|
646
854
|
const baseQuery = db
|
|
647
855
|
.select({
|
|
648
|
-
userId:
|
|
856
|
+
userId: contactKey,
|
|
649
857
|
email: contacts.email,
|
|
650
858
|
})
|
|
651
859
|
.from(contacts)
|
|
652
|
-
.innerJoin(everFired, eq(everFired.userId,
|
|
653
|
-
.leftJoin(activeMembers, eq(activeMembers.userId,
|
|
860
|
+
.innerJoin(everFired, eq(everFired.userId, contactKey))
|
|
861
|
+
.leftJoin(activeMembers, eq(activeMembers.userId, contactKey));
|
|
654
862
|
|
|
655
863
|
const candidates = await (presentInAll
|
|
656
864
|
? baseQuery
|
|
657
|
-
.leftJoin(presentInAll, eq(presentInAll.userId,
|
|
865
|
+
.leftJoin(presentInAll, eq(presentInAll.userId, contactKey))
|
|
658
866
|
.where(
|
|
659
867
|
and(
|
|
660
868
|
isNull(contacts.deletedAt),
|
|
@@ -666,7 +874,12 @@ async function reconcileBucketJoins(opts: {
|
|
|
666
874
|
and(isNull(contacts.deletedAt), isNull(activeMembers.userId)),
|
|
667
875
|
)
|
|
668
876
|
)
|
|
669
|
-
|
|
877
|
+
// Deterministic scan order for the bounded re-run (no keyset cursor; the
|
|
878
|
+
// scan advances as reconciled matchers become active members and drop out).
|
|
879
|
+
// Order by contacts.id (the non-null unique PK) so the scan is null-safe and
|
|
880
|
+
// stable even for null-external_id contacts now that the join is on the
|
|
881
|
+
// coalesce key.
|
|
882
|
+
.orderBy(sql`${contacts.id} asc`)
|
|
670
883
|
.limit(BATCH_SIZE);
|
|
671
884
|
|
|
672
885
|
// SET-BASED / EXACT shapes (Fix #3) — every candidate row is a true matcher,
|