@hogsend/engine 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/package.json +7 -6
  2. package/src/app.ts +36 -1
  3. package/src/buckets/check-membership.ts +34 -15
  4. package/src/container.ts +33 -0
  5. package/src/env.ts +29 -0
  6. package/src/index.ts +47 -1
  7. package/src/journeys/define-journey.ts +26 -2
  8. package/src/journeys/journey-context.ts +5 -1
  9. package/src/lib/boot.ts +1 -1
  10. package/src/lib/bucket-emit.ts +47 -2
  11. package/src/lib/contacts.ts +1105 -18
  12. package/src/lib/email-service-types.ts +8 -0
  13. package/src/lib/ingestion.ts +63 -33
  14. package/src/lib/mailer.ts +88 -0
  15. package/src/lib/outbound.ts +216 -0
  16. package/src/lib/preferences.ts +137 -0
  17. package/src/lib/tracked.ts +204 -37
  18. package/src/lib/tracking-events.ts +67 -2
  19. package/src/lib/webhook-signing.ts +151 -0
  20. package/src/lists/define-list.ts +81 -0
  21. package/src/lists/registry-singleton.ts +39 -0
  22. package/src/lists/registry.ts +95 -0
  23. package/src/middleware/api-key.ts +33 -7
  24. package/src/middleware/rate-limit.ts +73 -49
  25. package/src/routes/_shared.ts +30 -0
  26. package/src/routes/admin/api-keys.ts +1 -1
  27. package/src/routes/admin/bulk.ts +7 -3
  28. package/src/routes/admin/contacts.ts +108 -59
  29. package/src/routes/admin/events.ts +65 -0
  30. package/src/routes/admin/index.ts +2 -0
  31. package/src/routes/admin/journeys.ts +3 -1
  32. package/src/routes/admin/preferences.ts +2 -2
  33. package/src/routes/admin/reporting.ts +3 -3
  34. package/src/routes/admin/timeline.ts +5 -2
  35. package/src/routes/admin/webhooks.ts +466 -0
  36. package/src/routes/campaigns/index.ts +252 -0
  37. package/src/routes/contacts/index.ts +231 -0
  38. package/src/routes/email/preferences.ts +27 -3
  39. package/src/routes/email/unsubscribe.ts +7 -49
  40. package/src/routes/emails/index.ts +133 -0
  41. package/src/routes/events/index.ts +119 -0
  42. package/src/routes/index.ts +52 -2
  43. package/src/routes/lists/index.ts +258 -0
  44. package/src/routes/tracking/click.ts +59 -18
  45. package/src/routes/tracking/open.ts +62 -24
  46. package/src/routes/webhooks/sources.ts +69 -10
  47. package/src/webhook-sources/define-webhook-source.ts +57 -5
  48. package/src/webhook-sources/presets/clerk.ts +185 -0
  49. package/src/webhook-sources/presets/index.ts +80 -0
  50. package/src/webhook-sources/presets/segment.ts +120 -0
  51. package/src/webhook-sources/presets/stripe.ts +147 -0
  52. package/src/webhook-sources/presets/supabase.ts +131 -0
  53. package/src/webhook-sources/verify.ts +172 -0
  54. package/src/worker.ts +12 -0
  55. package/src/workflows/bucket-backfill.ts +32 -21
  56. package/src/workflows/bucket-reconcile.ts +20 -5
  57. package/src/workflows/deliver-webhook.ts +399 -0
  58. package/src/workflows/import-contacts.ts +28 -20
  59. package/src/workflows/send-campaign.ts +589 -0
  60. package/src/routes/ingest.ts +0 -71
@@ -0,0 +1,147 @@
1
+ import { z } from "zod";
2
+ import type { IngestEvent } from "../../lib/ingestion.js";
3
+ import { defineWebhookSource } from "../define-webhook-source.js";
4
+
5
+ /**
6
+ * Stripe webhook preset.
7
+ *
8
+ * Auth: Stripe's `stripe-signature: t=<ts>,v1=<hex>` header, verified with
9
+ * `node:crypto` (NO `stripe` SDK — decision #14). Set `STRIPE_WEBHOOK_SECRET`
10
+ * (the `whsec_…` endpoint secret) to auto-enable at `POST /v1/webhooks/stripe`.
11
+ *
12
+ * Event mapping (decision #16, normalized to the outbound vocabulary):
13
+ * - `customer.created` → `contact.created`
14
+ * - `customer.updated` → `contact.updated`
15
+ * - `customer.deleted` → `contact.deleted` (EVENT only — decision #15)
16
+ * - `customer.subscription.<action>` → `subscription.<action>`
17
+ * - `invoice.<action>` → `invoice.<action>`
18
+ *
19
+ * Identity: `userId = obj.id` for customers, `obj.customer` for subscriptions /
20
+ * invoices. `idempotencyKey = payload.id` (the Stripe event id) so at-least-once
21
+ * redelivery dedupes on `user_events.idempotencyKey`.
22
+ *
23
+ * D2 split: customer profile (`name`, `phone`, `metadata`) → `contactProperties`
24
+ * ONLY; everything else → `eventProperties` ONLY.
25
+ */
26
+
27
+ const stripeObjectSchema = z
28
+ .object({
29
+ id: z.string().optional(),
30
+ object: z.string().optional(),
31
+ customer: z.string().nullish(),
32
+ email: z.string().nullish(),
33
+ name: z.string().nullish(),
34
+ phone: z.string().nullish(),
35
+ metadata: z.record(z.string(), z.unknown()).nullish(),
36
+ })
37
+ .catchall(z.unknown());
38
+
39
+ const stripeWebhookSchema = z
40
+ .object({
41
+ id: z.string(),
42
+ type: z.string(),
43
+ object: z.string().optional(),
44
+ data: z
45
+ .object({
46
+ object: stripeObjectSchema,
47
+ })
48
+ .catchall(z.unknown()),
49
+ })
50
+ .catchall(z.unknown());
51
+
52
+ type StripePayload = z.infer<typeof stripeWebhookSchema>;
53
+
54
+ export const stripeSource = defineWebhookSource({
55
+ meta: {
56
+ id: "stripe",
57
+ name: "Stripe",
58
+ description:
59
+ "Receives Stripe customer/subscription/invoice webhooks (signature-verified).",
60
+ },
61
+ auth: {
62
+ type: "signature",
63
+ scheme: "stripe",
64
+ envKey: "STRIPE_WEBHOOK_SECRET",
65
+ header: "stripe-signature",
66
+ },
67
+ schema: stripeWebhookSchema,
68
+ async transform(payload: StripePayload): Promise<IngestEvent | null> {
69
+ const type = payload.type;
70
+ const obj = payload.data.object;
71
+
72
+ // Normalize the Stripe event name → Hogsend vocabulary + resolve identity.
73
+ let event: string;
74
+ let userId: string | undefined;
75
+ let isCustomerLifecycle = false;
76
+ let isDelete = false;
77
+
78
+ if (type === "customer.created" || type === "customer.updated") {
79
+ event =
80
+ type === "customer.created" ? "contact.created" : "contact.updated";
81
+ userId = obj.id;
82
+ isCustomerLifecycle = true;
83
+ } else if (type === "customer.deleted") {
84
+ event = "contact.deleted";
85
+ userId = obj.id;
86
+ isDelete = true;
87
+ } else if (type.startsWith("customer.subscription.")) {
88
+ // customer.subscription.created/updated/deleted → subscription.<action>
89
+ const action = type.slice("customer.subscription.".length);
90
+ event = `subscription.${action}`;
91
+ userId = typeof obj.customer === "string" ? obj.customer : undefined;
92
+ } else if (type.startsWith("invoice.")) {
93
+ const action = type.slice("invoice.".length);
94
+ event = `invoice.${action}`;
95
+ userId = typeof obj.customer === "string" ? obj.customer : undefined;
96
+ } else {
97
+ return null;
98
+ }
99
+
100
+ if (!userId) {
101
+ return null;
102
+ }
103
+
104
+ const userEmail = typeof obj.email === "string" ? obj.email : "";
105
+
106
+ const eventProperties: Record<string, unknown> = {
107
+ source: "stripe",
108
+ stripeCustomerId: userId,
109
+ stripeEventId: payload.id,
110
+ _stripeEvent: type,
111
+ stripeObject: obj.object,
112
+ };
113
+
114
+ // Only the customer create/update lifecycle carries a profile to merge.
115
+ // Deletes (decision #15) and subscription/invoice events are event-only.
116
+ if (!isCustomerLifecycle || isDelete) {
117
+ return {
118
+ event,
119
+ userId,
120
+ userEmail,
121
+ eventProperties,
122
+ contactProperties: {},
123
+ idempotencyKey: payload.id,
124
+ };
125
+ }
126
+
127
+ const contactProperties: Record<string, unknown> = {
128
+ ...(obj.metadata ?? {}),
129
+ };
130
+ if (typeof obj.name === "string") {
131
+ contactProperties.name = obj.name;
132
+ }
133
+ if (typeof obj.phone === "string") {
134
+ contactProperties.phone = obj.phone;
135
+ }
136
+ contactProperties.stripeCustomerId = userId;
137
+
138
+ return {
139
+ event,
140
+ userId,
141
+ userEmail,
142
+ eventProperties,
143
+ contactProperties,
144
+ idempotencyKey: payload.id,
145
+ };
146
+ },
147
+ });
@@ -0,0 +1,131 @@
1
+ import { z } from "zod";
2
+ import type { IngestEvent } from "../../lib/ingestion.js";
3
+ import { defineWebhookSource } from "../define-webhook-source.js";
4
+
5
+ /**
6
+ * Supabase `auth.users` webhook preset.
7
+ *
8
+ * Auth: Svix-signed when Supabase's "Send HTTP Request" hook is configured with
9
+ * a signing secret; falls back to the plain `x-supabase-webhook-secret` shared
10
+ * secret (via `fallbackMatchHeader`) for the database-webhook trigger path. Set
11
+ * `SUPABASE_WEBHOOK_SECRET` to auto-enable at `POST /v1/webhooks/supabase`.
12
+ *
13
+ * Only `schema === "auth" && table === "users"` rows are processed (other tables
14
+ * are skipped). Event mapping (decision #16, normalized):
15
+ * - `INSERT` → `contact.created`
16
+ * - `UPDATE` → `contact.updated`
17
+ * - `DELETE` → `contact.deleted` (EVENT only — decision #15)
18
+ *
19
+ * D2 split: profile fields → `contactProperties` ONLY; behavioral/source fields
20
+ * → `eventProperties` ONLY.
21
+ */
22
+
23
+ const supabaseUserRowSchema = z
24
+ .object({
25
+ id: z.string().optional(),
26
+ email: z.string().nullish(),
27
+ phone: z.string().nullish(),
28
+ email_confirmed_at: z.string().nullish(),
29
+ raw_user_meta_data: z.record(z.string(), z.unknown()).nullish(),
30
+ })
31
+ .catchall(z.unknown());
32
+
33
+ const supabaseWebhookSchema = z
34
+ .object({
35
+ type: z.enum(["INSERT", "UPDATE", "DELETE"]),
36
+ table: z.string(),
37
+ schema: z.string(),
38
+ record: supabaseUserRowSchema.nullish(),
39
+ old_record: supabaseUserRowSchema.nullish(),
40
+ })
41
+ .catchall(z.unknown());
42
+
43
+ type SupabasePayload = z.infer<typeof supabaseWebhookSchema>;
44
+
45
+ export const supabaseSource = defineWebhookSource({
46
+ meta: {
47
+ id: "supabase",
48
+ name: "Supabase",
49
+ description:
50
+ "Receives Supabase auth.users INSERT/UPDATE/DELETE webhooks (Svix-signed or shared-secret).",
51
+ },
52
+ auth: {
53
+ type: "signature",
54
+ scheme: "svix",
55
+ envKey: "SUPABASE_WEBHOOK_SECRET",
56
+ header: "svix-signature",
57
+ fallbackMatchHeader: "x-supabase-webhook-secret",
58
+ },
59
+ schema: supabaseWebhookSchema,
60
+ async transform(payload: SupabasePayload): Promise<IngestEvent | null> {
61
+ // Only auth.users mutations map to contacts; ignore everything else.
62
+ if (payload.schema !== "auth" || payload.table !== "users") {
63
+ return null;
64
+ }
65
+
66
+ let event: string;
67
+ switch (payload.type) {
68
+ case "INSERT":
69
+ event = "contact.created";
70
+ break;
71
+ case "UPDATE":
72
+ event = "contact.updated";
73
+ break;
74
+ case "DELETE":
75
+ event = "contact.deleted";
76
+ break;
77
+ default:
78
+ return null;
79
+ }
80
+
81
+ // DELETE carries the row in `old_record`; INSERT/UPDATE in `record`.
82
+ const row =
83
+ payload.type === "DELETE"
84
+ ? (payload.old_record ?? payload.record)
85
+ : (payload.record ?? payload.old_record);
86
+
87
+ if (!row) {
88
+ return null;
89
+ }
90
+
91
+ const userId = row.id;
92
+ if (!userId) {
93
+ return null;
94
+ }
95
+ const userEmail = typeof row.email === "string" ? row.email : "";
96
+
97
+ const eventProperties: Record<string, unknown> = {
98
+ source: "supabase",
99
+ supabaseUserId: userId,
100
+ _supabaseEvent: payload.type,
101
+ };
102
+
103
+ // Deletes carry no profile to merge — emit the event only (decision #15).
104
+ if (event === "contact.deleted") {
105
+ return {
106
+ event,
107
+ userId,
108
+ userEmail,
109
+ eventProperties,
110
+ contactProperties: {},
111
+ };
112
+ }
113
+
114
+ const contactProperties: Record<string, unknown> = {
115
+ ...(row.raw_user_meta_data ?? {}),
116
+ };
117
+ if (typeof row.phone === "string") {
118
+ contactProperties.phone = row.phone;
119
+ }
120
+ contactProperties.emailVerified = Boolean(row.email_confirmed_at);
121
+ contactProperties.supabaseUserId = userId;
122
+
123
+ return {
124
+ event,
125
+ userId,
126
+ userEmail,
127
+ eventProperties,
128
+ contactProperties,
129
+ };
130
+ },
131
+ });
@@ -0,0 +1,172 @@
1
+ import { createHmac, timingSafeEqual } from "node:crypto";
2
+ import { Webhook } from "svix";
3
+
4
+ /**
5
+ * The signature verification schemes understood by `defineWebhookSource`'s
6
+ * `auth.type: "signature"` variant. Each preset (Clerk, Supabase, Stripe,
7
+ * Segment) maps to one of these; the route resolves the secret from
8
+ * `env[auth.envKey]` and calls {@link verifySignature} BEFORE parsing/handing
9
+ * the payload to `transform()`.
10
+ *
11
+ * - `"svix"` — Standard Webhooks / Svix header set (`svix-id` /
12
+ * `svix-timestamp` / `svix-signature`). Reuses
13
+ * `svix`'s `Webhook.verify` (the same machinery `plugin-resend`
14
+ * uses for inbound Resend webhooks).
15
+ * - `"stripe"` — `stripe-signature: t=<ts>,v1=<hex>[,v1=<hex>...]`. Computes
16
+ * `HMAC_SHA256(secret, `${t}.${rawBody}`)` with `node:crypto`
17
+ * (NO `stripe` SDK), constant-time compares each `v1` candidate,
18
+ * and enforces the 5-minute timestamp tolerance.
19
+ * - `"hmac-hex"` — Generic `HMAC_SHA256(secret, rawBody)` rendered as lowercase
20
+ * hex, constant-time compared against the header value (e.g.
21
+ * Segment's `x-signature`).
22
+ */
23
+ export type SignatureScheme = "svix" | "stripe" | "hmac-hex";
24
+
25
+ export interface VerifySignatureArgs {
26
+ rawBody: string;
27
+ headers: Record<string, string>;
28
+ secret: string;
29
+ }
30
+
31
+ const STRIPE_TOLERANCE_SECONDS = 5 * 60;
32
+
33
+ /**
34
+ * Lowercase every header key so callers can pass the raw (possibly Title-Case)
35
+ * header record and we still find `svix-id` / `stripe-signature` / `x-signature`.
36
+ */
37
+ function lowerHeaders(headers: Record<string, string>): Record<string, string> {
38
+ const lowered: Record<string, string> = {};
39
+ for (const [key, value] of Object.entries(headers)) {
40
+ lowered[key.toLowerCase()] = value;
41
+ }
42
+ return lowered;
43
+ }
44
+
45
+ /**
46
+ * Constant-time string comparison that never short-circuits on length. Returns
47
+ * `false` (rather than throwing) on a length mismatch so callers fail closed.
48
+ */
49
+ function safeEqual(a: string, b: string): boolean {
50
+ const bufA = Buffer.from(a, "utf8");
51
+ const bufB = Buffer.from(b, "utf8");
52
+ if (bufA.length !== bufB.length) {
53
+ return false;
54
+ }
55
+ return timingSafeEqual(bufA, bufB);
56
+ }
57
+
58
+ function verifySvix(args: VerifySignatureArgs): boolean {
59
+ const headers = lowerHeaders(args.headers);
60
+ const id = headers["svix-id"];
61
+ const timestamp = headers["svix-timestamp"];
62
+ const signature = headers["svix-signature"];
63
+
64
+ if (!id || !timestamp || !signature) {
65
+ return false;
66
+ }
67
+
68
+ try {
69
+ const wh = new Webhook(args.secret);
70
+ wh.verify(args.rawBody, {
71
+ "svix-id": id,
72
+ "svix-timestamp": timestamp,
73
+ "svix-signature": signature,
74
+ });
75
+ return true;
76
+ } catch {
77
+ return false;
78
+ }
79
+ }
80
+
81
+ function verifyStripe(args: VerifySignatureArgs): boolean {
82
+ const headers = lowerHeaders(args.headers);
83
+ const header = headers["stripe-signature"];
84
+ if (!header) {
85
+ return false;
86
+ }
87
+
88
+ // `t=1700000000,v1=<hex>,v1=<hex>` — there may be more than one v1 candidate
89
+ // during a secret rotation and forward-compat `v0`/scheme fields we ignore.
90
+ let timestamp: string | undefined;
91
+ const signatures: string[] = [];
92
+ for (const part of header.split(",")) {
93
+ const eq = part.indexOf("=");
94
+ if (eq === -1) {
95
+ continue;
96
+ }
97
+ const key = part.slice(0, eq).trim();
98
+ const value = part.slice(eq + 1).trim();
99
+ if (key === "t") {
100
+ timestamp = value;
101
+ } else if (key === "v1") {
102
+ signatures.push(value);
103
+ }
104
+ }
105
+
106
+ if (!timestamp || signatures.length === 0) {
107
+ return false;
108
+ }
109
+
110
+ const timestampSeconds = Number(timestamp);
111
+ if (!Number.isFinite(timestampSeconds)) {
112
+ return false;
113
+ }
114
+
115
+ const nowSeconds = Math.floor(Date.now() / 1000);
116
+ if (Math.abs(nowSeconds - timestampSeconds) > STRIPE_TOLERANCE_SECONDS) {
117
+ return false;
118
+ }
119
+
120
+ const expected = createHmac("sha256", args.secret)
121
+ .update(`${timestamp}.${args.rawBody}`)
122
+ .digest("hex");
123
+
124
+ return signatures.some((candidate) => safeEqual(candidate, expected));
125
+ }
126
+
127
+ function verifyHmacHex(args: VerifySignatureArgs, headerName: string): boolean {
128
+ const headers = lowerHeaders(args.headers);
129
+ const provided = headers[headerName.toLowerCase()];
130
+ if (!provided) {
131
+ return false;
132
+ }
133
+
134
+ const expected = createHmac("sha256", args.secret)
135
+ .update(args.rawBody)
136
+ .digest("hex");
137
+
138
+ return safeEqual(provided.trim(), expected);
139
+ }
140
+
141
+ /**
142
+ * Verify an inbound provider webhook signature for the given scheme.
143
+ *
144
+ * FAILS CLOSED: returns `false` (never throws) whenever a required header is
145
+ * missing or the signature does not match. The route enforces that the secret
146
+ * itself is present before calling this — an unset signature secret is a 401,
147
+ * NOT an open pass-through (deliberate divergence from the `"match"` variant,
148
+ * which stays open when unconfigured).
149
+ *
150
+ * For `"hmac-hex"` the header carrying the hex digest is passed via `headerName`
151
+ * (e.g. Segment's `x-signature`); `svix`/`stripe` read their own well-known
152
+ * headers and ignore `headerName`.
153
+ */
154
+ export function verifySignature(
155
+ scheme: SignatureScheme,
156
+ args: VerifySignatureArgs,
157
+ headerName?: string,
158
+ ): boolean {
159
+ switch (scheme) {
160
+ case "svix":
161
+ return verifySvix(args);
162
+ case "stripe":
163
+ return verifyStripe(args);
164
+ case "hmac-hex":
165
+ return verifyHmacHex(args, headerName ?? "x-signature");
166
+ default: {
167
+ // Exhaustiveness guard — an unknown scheme fails closed.
168
+ const _never: never = scheme;
169
+ return _never;
170
+ }
171
+ }
172
+ }
package/src/worker.ts CHANGED
@@ -16,7 +16,15 @@ import {
16
16
  } from "./workflows/bucket-backfill.js";
17
17
  import { bucketReconcileTask } from "./workflows/bucket-reconcile.js";
18
18
  import { checkAlertsTask } from "./workflows/check-alerts.js";
19
+ import {
20
+ deliverWebhookTask,
21
+ reapDueWebhookDeliveriesTask,
22
+ } from "./workflows/deliver-webhook.js";
19
23
  import { importContactsTask } from "./workflows/import-contacts.js";
24
+ import {
25
+ reapStuckCampaignsTask,
26
+ sendCampaignTask,
27
+ } from "./workflows/send-campaign.js";
20
28
  import { sendEmailTask } from "./workflows/send-email.js";
21
29
 
22
30
  export interface CreateWorkerOptions {
@@ -62,6 +70,10 @@ export function createWorker(opts: CreateWorkerOptions): Worker {
62
70
  const baseWorkflows = [
63
71
  sendEmailTask,
64
72
  importContactsTask,
73
+ sendCampaignTask,
74
+ reapStuckCampaignsTask,
75
+ deliverWebhookTask,
76
+ reapDueWebhookDeliveriesTask,
65
77
  checkAlertsTask,
66
78
  bucketReconcileTask,
67
79
  bucketBackfillTask,
@@ -24,6 +24,7 @@ import {
24
24
  import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
25
25
  import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
26
26
  import { emitBucketTransition } from "../lib/bucket-emit.js";
27
+ import { contactKeySql } from "../lib/contacts.js";
27
28
  import { hatchet } from "../lib/hatchet.js";
28
29
  import type { Logger } from "../lib/logger.js";
29
30
  import { createLogger } from "../lib/logger.js";
@@ -231,16 +232,20 @@ async function backfillJoins(opts: {
231
232
  for (let i = 0; i < matcherIds.length; i += BATCH_SIZE) {
232
233
  const chunk = matcherIds.slice(i, i + BATCH_SIZE);
233
234
 
234
- // userEmail backfilled from the contacts row where available.
235
+ // userEmail backfilled from the contacts row where available. The chunk
236
+ // holds the RESOLVED key (coalesce(external_id, anonymous_id, id)) — for an
237
+ // email-only / anonymous contact that is the anonymous_id or the uuid id, NOT
238
+ // the (null) external_id. Looking up by `contacts.externalId` would miss
239
+ // those rows and write a NULL userEmail despite the contact having an email,
240
+ // so we key the lookup + the map by the SAME coalesce expression the chunk
241
+ // carries (matches reconcileBucketJoins, which reads userId + email off one
242
+ // contacts row).
243
+ const resolvedKey = contactKeySql();
235
244
  const chunkContacts = await db
236
- .select({ externalId: contacts.externalId, email: contacts.email })
245
+ .select({ userKey: resolvedKey, email: contacts.email })
237
246
  .from(contacts)
238
- .where(
239
- and(inArray(contacts.externalId, chunk), isNull(contacts.deletedAt)),
240
- );
241
- const emailByUser = new Map(
242
- chunkContacts.map((c) => [c.externalId, c.email]),
243
- );
247
+ .where(and(inArray(resolvedKey, chunk), isNull(contacts.deletedAt)));
248
+ const emailByUser = new Map(chunkContacts.map((c) => [c.userKey, c.email]));
244
249
 
245
250
  // Fix A: entryCount = 1 + prior memberships for each (user, bucket), the
246
251
  // same monotonic ordinal the live join computes (check-membership.ts). On a
@@ -456,7 +461,9 @@ async function selectEventMatchers(
456
461
  .as("present");
457
462
 
458
463
  const rows = await db
459
- .select({ userId: contacts.externalId })
464
+ .select({
465
+ userId: contactKeySql(),
466
+ })
460
467
  .from(contacts)
461
468
  .innerJoin(everFired, eq(everFired.userId, contacts.externalId))
462
469
  .leftJoin(present, eq(present.userId, contacts.externalId))
@@ -496,12 +503,15 @@ async function selectEventMatchers(
496
503
  * a per-contact `evaluateCondition` loop over live contacts. Property
497
504
  * sub-conditions evaluate against the contact's merged properties.
498
505
  *
499
- * KEYSET PAGINATION by `contacts.externalId` in BATCH_SIZE pages (mirrors
500
- * reconcileBucketJoins' `externalId asc` paging): each page selects
501
- * `WHERE externalId > :cursor ORDER BY externalId ASC LIMIT BATCH_SIZE`,
502
- * evaluates the criteria per contact, then advances the cursor to the last
503
- * externalId of the page repeating until a short page ends the scan. The whole
504
- * contacts table is never held in memory at once.
506
+ * KEYSET PAGINATION by `contacts.id` in BATCH_SIZE pages: each page selects
507
+ * `WHERE id > :cursor ORDER BY id ASC LIMIT BATCH_SIZE`, evaluates the criteria
508
+ * per contact, then advances the cursor to the last `id` of the page — repeating
509
+ * until a short page ends the scan. The whole contacts table is never held in
510
+ * memory at once. Paging on `id` (the non-null unique PK) NOT `external_id`,
511
+ * which is nullable (email-only / anonymous contacts) and would drop every
512
+ * null-external_id row and order NULLs unstably. (reconcileBucketJoins is not a
513
+ * keyset scan — it relies on matchers dropping out as they become active
514
+ * members — so this no longer mirrors it.)
505
515
  */
506
516
  async function selectCompositeMatchers(
507
517
  db: Database,
@@ -513,17 +523,18 @@ async function selectCompositeMatchers(
513
523
  for (;;) {
514
524
  const page = await db
515
525
  .select({
516
- externalId: contacts.externalId,
526
+ id: contacts.id,
527
+ userId: contactKeySql(),
517
528
  properties: contacts.properties,
518
529
  })
519
530
  .from(contacts)
520
531
  .where(
521
532
  and(
522
533
  isNull(contacts.deletedAt),
523
- cursor != null ? gt(contacts.externalId, cursor) : undefined,
534
+ cursor != null ? gt(contacts.id, cursor) : undefined,
524
535
  ),
525
536
  )
526
- .orderBy(sql`${contacts.externalId} asc`)
537
+ .orderBy(sql`${contacts.id} asc`)
527
538
  .limit(BATCH_SIZE);
528
539
 
529
540
  for (const contact of page) {
@@ -531,17 +542,17 @@ async function selectCompositeMatchers(
531
542
  condition: criteria,
532
543
  ctx: {
533
544
  db,
534
- userId: contact.externalId,
545
+ userId: contact.userId,
535
546
  journeyContext:
536
547
  (contact.properties as Record<string, unknown> | null) ?? {},
537
548
  },
538
549
  });
539
- if (isMember) matchers.push(contact.externalId);
550
+ if (isMember) matchers.push(contact.userId);
540
551
  }
541
552
 
542
553
  // A short page (fewer than a full batch) means the scan is exhausted.
543
554
  if (page.length < BATCH_SIZE) break;
544
- cursor = page[page.length - 1]?.externalId ?? null;
555
+ cursor = page[page.length - 1]?.id ?? null;
545
556
  if (cursor == null) break;
546
557
  }
547
558
 
@@ -41,6 +41,7 @@ import {
41
41
  import { getBucketRegistrySingleton } from "../buckets/registry-singleton.js";
42
42
  import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
43
43
  import { emitBucketTransition } from "../lib/bucket-emit.js";
44
+ import { contactKeySql } from "../lib/contacts.js";
44
45
  import { hatchet } from "../lib/hatchet.js";
45
46
  import type { Logger } from "../lib/logger.js";
46
47
  import { createLogger } from "../lib/logger.js";
@@ -841,18 +842,27 @@ async function reconcileBucketJoins(opts: {
841
842
  ? selectPresentInAllWindows(db, absenceLegs)
842
843
  : null;
843
844
 
845
+ // The membership/event tables key on the RESOLVED string key (external_id ??
846
+ // anonymous_id ?? contact.id), NOT necessarily external_id — email-only /
847
+ // anonymous contacts have a NULL external_id and are keyed on their uuid /
848
+ // anonymous_id. Joining on contacts.externalId would force external_id NOT NULL
849
+ // for every candidate (the coalesce would collapse to external_id) and silently
850
+ // drop exactly the dormant email-only contacts this cron exists to reconcile.
851
+ // Join on the SAME coalesce expression so the projected key matches the join.
852
+ const contactKey = contactKeySql();
853
+
844
854
  const baseQuery = db
845
855
  .select({
846
- userId: contacts.externalId,
856
+ userId: contactKey,
847
857
  email: contacts.email,
848
858
  })
849
859
  .from(contacts)
850
- .innerJoin(everFired, eq(everFired.userId, contacts.externalId))
851
- .leftJoin(activeMembers, eq(activeMembers.userId, contacts.externalId));
860
+ .innerJoin(everFired, eq(everFired.userId, contactKey))
861
+ .leftJoin(activeMembers, eq(activeMembers.userId, contactKey));
852
862
 
853
863
  const candidates = await (presentInAll
854
864
  ? baseQuery
855
- .leftJoin(presentInAll, eq(presentInAll.userId, contacts.externalId))
865
+ .leftJoin(presentInAll, eq(presentInAll.userId, contactKey))
856
866
  .where(
857
867
  and(
858
868
  isNull(contacts.deletedAt),
@@ -864,7 +874,12 @@ async function reconcileBucketJoins(opts: {
864
874
  and(isNull(contacts.deletedAt), isNull(activeMembers.userId)),
865
875
  )
866
876
  )
867
- .orderBy(sql`${contacts.externalId} asc`)
877
+ // Deterministic scan order for the bounded re-run (no keyset cursor; the
878
+ // scan advances as reconciled matchers become active members and drop out).
879
+ // Order by contacts.id (the non-null unique PK) so the scan is null-safe and
880
+ // stable even for null-external_id contacts now that the join is on the
881
+ // coalesce key.
882
+ .orderBy(sql`${contacts.id} asc`)
868
883
  .limit(BATCH_SIZE);
869
884
 
870
885
  // SET-BASED / EXACT shapes (Fix #3) — every candidate row is a true matcher,