@hogsend/engine 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/package.json +7 -6
  2. package/src/app.ts +36 -1
  3. package/src/buckets/check-membership.ts +34 -15
  4. package/src/container.ts +33 -0
  5. package/src/env.ts +29 -0
  6. package/src/index.ts +47 -1
  7. package/src/journeys/define-journey.ts +26 -2
  8. package/src/journeys/journey-context.ts +5 -1
  9. package/src/lib/boot.ts +1 -1
  10. package/src/lib/bucket-emit.ts +47 -2
  11. package/src/lib/contacts.ts +1105 -18
  12. package/src/lib/email-service-types.ts +8 -0
  13. package/src/lib/ingestion.ts +63 -33
  14. package/src/lib/mailer.ts +88 -0
  15. package/src/lib/outbound.ts +216 -0
  16. package/src/lib/preferences.ts +137 -0
  17. package/src/lib/tracked.ts +204 -37
  18. package/src/lib/tracking-events.ts +67 -2
  19. package/src/lib/webhook-signing.ts +151 -0
  20. package/src/lists/define-list.ts +81 -0
  21. package/src/lists/registry-singleton.ts +39 -0
  22. package/src/lists/registry.ts +95 -0
  23. package/src/middleware/api-key.ts +33 -7
  24. package/src/middleware/rate-limit.ts +73 -49
  25. package/src/routes/_shared.ts +30 -0
  26. package/src/routes/admin/api-keys.ts +1 -1
  27. package/src/routes/admin/bulk.ts +7 -3
  28. package/src/routes/admin/contacts.ts +108 -59
  29. package/src/routes/admin/events.ts +65 -0
  30. package/src/routes/admin/index.ts +2 -0
  31. package/src/routes/admin/journeys.ts +3 -1
  32. package/src/routes/admin/preferences.ts +2 -2
  33. package/src/routes/admin/reporting.ts +3 -3
  34. package/src/routes/admin/timeline.ts +5 -2
  35. package/src/routes/admin/webhooks.ts +466 -0
  36. package/src/routes/campaigns/index.ts +252 -0
  37. package/src/routes/contacts/index.ts +231 -0
  38. package/src/routes/email/preferences.ts +27 -3
  39. package/src/routes/email/unsubscribe.ts +7 -49
  40. package/src/routes/emails/index.ts +133 -0
  41. package/src/routes/events/index.ts +119 -0
  42. package/src/routes/index.ts +52 -2
  43. package/src/routes/lists/index.ts +258 -0
  44. package/src/routes/tracking/click.ts +59 -18
  45. package/src/routes/tracking/open.ts +62 -24
  46. package/src/routes/webhooks/sources.ts +69 -10
  47. package/src/webhook-sources/define-webhook-source.ts +57 -5
  48. package/src/webhook-sources/presets/clerk.ts +185 -0
  49. package/src/webhook-sources/presets/index.ts +80 -0
  50. package/src/webhook-sources/presets/segment.ts +120 -0
  51. package/src/webhook-sources/presets/stripe.ts +147 -0
  52. package/src/webhook-sources/presets/supabase.ts +131 -0
  53. package/src/webhook-sources/verify.ts +172 -0
  54. package/src/worker.ts +12 -0
  55. package/src/workflows/bucket-backfill.ts +32 -21
  56. package/src/workflows/bucket-reconcile.ts +20 -5
  57. package/src/workflows/deliver-webhook.ts +399 -0
  58. package/src/workflows/import-contacts.ts +28 -20
  59. package/src/workflows/send-campaign.ts +589 -0
  60. package/src/routes/ingest.ts +0 -71
@@ -0,0 +1,589 @@
1
+ import {
2
+ bucketMemberships,
3
+ campaigns,
4
+ contacts,
5
+ type Database,
6
+ emailPreferences,
7
+ } from "@hogsend/db";
8
+ import type { TemplateName } from "@hogsend/email";
9
+ import { and, eq, gt, inArray, isNull, lt, sql } from "drizzle-orm";
10
+ import { normalizeEmail } from "../lib/contacts.js";
11
+ import { getDb } from "../lib/db.js";
12
+ import { getEmailService } from "../lib/email.js";
13
+ import { hatchet } from "../lib/hatchet.js";
14
+ import { createLogger } from "../lib/logger.js";
15
+ import { getListRegistry } from "../lists/registry-singleton.js";
16
+
17
+ /** Page size for resolving recipients + sending. */
18
+ const CHUNK_SIZE = 100;
19
+
20
+ /** A resolved recipient — every send needs at minimum an email. */
21
+ interface CampaignRecipient {
22
+ email: string;
23
+ userId?: string;
24
+ }
25
+
26
+ /** Statuses that are TERMINAL — a duplicate/late enqueue must not re-send. */
27
+ const TERMINAL_STATUSES = ["sent"] as const;
28
+
29
+ /**
30
+ * Built-in durable campaign / broadcast task (Loops "campaign" parity). Sends a
31
+ * single template to every subscribed member of a list (or every active member
32
+ * of a bucket).
33
+ *
34
+ * Retry-safety: each send carries an idempotency key
35
+ * `campaign:<campaignId>:<email>` (email_sends.idempotency_key, migration 0015),
36
+ * so a Hatchet retry re-runs the whole loop but every already-dispatched send
37
+ * short-circuits to its prior row instead of dispatching a duplicate provider
38
+ * call. Counts are derived as-you-go from each `send()` result status — which is
39
+ * itself idempotency-aware (a retried send returns the prior row's status), so
40
+ * the tallies stay consistent across re-attempts. Final counts overwrite (not
41
+ * increment) the row, so a retry re-derives them from scratch rather than
42
+ * double-counting.
43
+ *
44
+ * Resume-on-retry: the terminal guard short-circuits ONLY a `sent` campaign — a
45
+ * `failed`/`sending` row is NOT terminal, so a Hatchet retry (or the reaper's
46
+ * re-enqueue) re-resolves the audience and re-loops. Already-dispatched sends
47
+ * no-op via the idempotency key, so the re-run safely completes the TAIL of a
48
+ * partial send instead of abandoning it. The catch block therefore does NOT
49
+ * stamp `failed` before re-throwing — that would make the retry short-circuit
50
+ * and silently under-deliver. A run that exhausts its retries is reaped to
51
+ * `failed`/re-enqueued by {@link reapStuckCampaignsTask}.
52
+ */
53
+ export const sendCampaignTask = hatchet.task({
54
+ name: "send-campaign",
55
+ // ONE durability re-attempt for a worker crash/timeout — the per-send
56
+ // idempotency key makes a re-run safe (no double-send). Not a transient-retry
57
+ // loop: the provider owns its own send backoff.
58
+ retries: 1,
59
+ executionTimeout: "600s",
60
+ fn: async (input: { campaignId: string }) => {
61
+ const db = getDb();
62
+ const logger = createLogger(process.env.LOG_LEVEL ?? "info");
63
+ const emailService = getEmailService();
64
+
65
+ const rows = await db
66
+ .select()
67
+ .from(campaigns)
68
+ .where(eq(campaigns.id, input.campaignId))
69
+ .limit(1);
70
+ const campaign = rows[0];
71
+ if (!campaign) {
72
+ logger.warn("send-campaign: campaign not found", {
73
+ campaignId: input.campaignId,
74
+ });
75
+ return { status: "failed", reason: "not_found" as const };
76
+ }
77
+
78
+ // Already terminal — a duplicate/late enqueue must not re-send. ONLY `sent`
79
+ // is terminal: a `failed`/`sending` row is intentionally re-runnable so a
80
+ // Hatchet retry (or a reaper re-enqueue) re-resolves the audience and
81
+ // completes the unsent TAIL of a partial send (already-sent recipients
82
+ // no-op via the per-send idempotency key — risk: silent under-delivery).
83
+ if ((TERMINAL_STATUSES as readonly string[]).includes(campaign.status)) {
84
+ return { status: campaign.status, skipped: true };
85
+ }
86
+
87
+ await db
88
+ .update(campaigns)
89
+ .set({ status: "sending", startedAt: new Date(), updatedAt: new Date() })
90
+ .where(eq(campaigns.id, input.campaignId));
91
+
92
+ let sentCount = 0;
93
+ let skippedCount = 0;
94
+ let failedCount = 0;
95
+ let totalRecipients = 0;
96
+
97
+ const flushCounts = async (): Promise<void> => {
98
+ await db
99
+ .update(campaigns)
100
+ .set({
101
+ totalRecipients,
102
+ sentCount,
103
+ skippedCount,
104
+ failedCount,
105
+ updatedAt: new Date(),
106
+ })
107
+ .where(eq(campaigns.id, input.campaignId));
108
+ };
109
+
110
+ try {
111
+ const recipients =
112
+ campaign.audienceKind === "bucket"
113
+ ? resolveBucketRecipients(db, campaign.audienceId)
114
+ : resolveListRecipients(db, campaign.audienceId);
115
+
116
+ let chunk: CampaignRecipient[] = [];
117
+ for await (const recipient of recipients) {
118
+ chunk.push(recipient);
119
+ if (chunk.length < CHUNK_SIZE) continue;
120
+ await sendChunk();
121
+ }
122
+ // Final partial chunk.
123
+ if (chunk.length > 0) await sendChunk();
124
+
125
+ async function sendChunk(): Promise<void> {
126
+ const batch = chunk;
127
+ chunk = [];
128
+ totalRecipients += batch.length;
129
+
130
+ const results = await Promise.allSettled(
131
+ batch.map((r) =>
132
+ emailService.send({
133
+ template: campaign?.templateKey as TemplateName,
134
+ props: (campaign?.props ?? {}) as never,
135
+ to: r.email,
136
+ userId: r.userId,
137
+ userEmail: r.email,
138
+ subject: campaign?.subject ?? undefined,
139
+ from: campaign?.fromEmail ?? undefined,
140
+ // A list's audienceId IS a real subscription category, so pass it
141
+ // through for suppression + the unsubscribe link. A bucket's
142
+ // audienceId is NOT a category — forcing it here would mint an
143
+ // unsubscribe link keyed on the bucket id (`categories[bucketId] =
144
+ // false`) that the bucket resolver never honors (it only checks
145
+ // unsubscribedAll/suppressed), silently no-op'ing the unsubscribe.
146
+ // For a bucket, pass undefined so the template's OWN declared
147
+ // category (e.g. `product-updates`) drives both suppression and a
148
+ // real, honored List-Unsubscribe target.
149
+ category:
150
+ campaign?.audienceKind === "bucket"
151
+ ? undefined
152
+ : campaign?.audienceId,
153
+ // The idempotency key dedupes a retried send to its prior row.
154
+ idempotencyKey: `campaign:${input.campaignId}:${r.email}`,
155
+ }),
156
+ ),
157
+ );
158
+
159
+ for (const result of results) {
160
+ if (result.status === "rejected") {
161
+ failedCount++;
162
+ continue;
163
+ }
164
+ const status = result.value.status;
165
+ if (status === "sent") {
166
+ sentCount++;
167
+ } else {
168
+ // suppressed | unsubscribed | skipped (frequency-capped) — counted
169
+ // as skipped, not a delivery failure.
170
+ skippedCount++;
171
+ }
172
+ }
173
+
174
+ await flushCounts();
175
+ }
176
+
177
+ await db
178
+ .update(campaigns)
179
+ .set({
180
+ status: "sent",
181
+ completedAt: new Date(),
182
+ totalRecipients,
183
+ sentCount,
184
+ skippedCount,
185
+ failedCount,
186
+ updatedAt: new Date(),
187
+ })
188
+ .where(eq(campaigns.id, input.campaignId));
189
+
190
+ logger.info("send-campaign: complete", {
191
+ campaignId: input.campaignId,
192
+ totalRecipients,
193
+ sentCount,
194
+ skippedCount,
195
+ failedCount,
196
+ });
197
+
198
+ return {
199
+ status: "sent" as const,
200
+ totalRecipients,
201
+ sentCount,
202
+ skippedCount,
203
+ failedCount,
204
+ };
205
+ } catch (error) {
206
+ // Do NOT stamp `failed` here. A `failed` stamp before the re-throw makes
207
+ // the single Hatchet retry hit the terminal guard and short-circuit
208
+ // WITHOUT sending the remaining recipients — silently abandoning the tail
209
+ // of a partial send. Instead we persist the progress counts, leave the
210
+ // status `sending` (re-runnable), and re-throw so the genuine retry
211
+ // re-enters the loop and finishes the unsent tail (already-sent recipients
212
+ // no-op via their idempotency key). A run that EXHAUSTS its retries is
213
+ // transitioned to `failed` (or re-enqueued) by `reapStuckCampaignsTask`.
214
+ await db
215
+ .update(campaigns)
216
+ .set({
217
+ totalRecipients,
218
+ sentCount,
219
+ skippedCount,
220
+ failedCount,
221
+ updatedAt: new Date(),
222
+ })
223
+ .where(eq(campaigns.id, input.campaignId));
224
+
225
+ logger.error("send-campaign: errored mid-run (will retry)", {
226
+ campaignId: input.campaignId,
227
+ error: error instanceof Error ? error.message : String(error),
228
+ });
229
+ throw error;
230
+ }
231
+ },
232
+ });
233
+
234
+ /**
235
+ * How long a campaign may sit in a non-terminal in-flight state (`queued` /
236
+ * `sending`) before the reaper treats it as STALE and re-drives it. Must be
237
+ * comfortably longer than the send task's `executionTimeout` (600s) so a
238
+ * legitimately long but still-running send is never re-enqueued underneath
239
+ * itself; the per-send idempotency key makes an overlap harmless anyway.
240
+ */
241
+ const STALE_AFTER_MS = Number(
242
+ process.env.CAMPAIGN_STALE_AFTER_MS ?? 15 * 60 * 1000,
243
+ );
244
+
245
+ /**
246
+ * After a campaign has sat in a non-terminal in-flight state this long (measured
247
+ * from `updatedAt`, which the send task bumps on every progress flush) it is
248
+ * declared `failed` rather than re-enqueued forever — a poison campaign (e.g. a
249
+ * template that always throws) stops being re-driven and surfaces to operators.
250
+ */
251
+ const GIVE_UP_AFTER_MS = Number(
252
+ process.env.CAMPAIGN_GIVE_UP_AFTER_MS ?? 6 * 60 * 60 * 1000,
253
+ );
254
+
255
+ /**
256
+ * Engine-owned reaper cron for campaigns left in a non-terminal in-flight state
257
+ * with no live run to finish them (closes the "stuck forever" gap):
258
+ *
259
+ * - A `sending` campaign whose worker was hard-killed (OOM/SIGKILL/pod
260
+ * eviction) or whose run exceeded `executionTimeout` AFTER its retry — the JS
261
+ * catch never ran, so the row is stuck `sending` with no live run.
262
+ * - A `queued` campaign whose enqueue threw at create time (broker down /
263
+ * network) — the row was committed but no run was ever created (orphan).
264
+ *
265
+ * Recovery is a simple RE-ENQUEUE of `sendCampaignTask` (safe: the per-send
266
+ * idempotency key no-ops already-sent recipients and the re-run completes the
267
+ * unsent tail). A campaign that stays stuck past `GIVE_UP_AFTER_MS` is declared
268
+ * `failed` so it stops being re-driven and surfaces to operators.
269
+ *
270
+ * Self-bootstraps `db` (memoized `getDb()` singleton) / `logger` from
271
+ * `process.env` (cron runs have no request container), cloned from
272
+ * `bucket-reconcile.ts`. NON-cancelling single-flight concurrency so an
273
+ * overrunning sweep finishes rather than being cancelled.
274
+ */
275
+ export const reapStuckCampaignsTask = hatchet.task({
276
+ name: "reap-stuck-campaigns",
277
+ onCrons: [process.env.CAMPAIGN_REAPER_CRON ?? "*/5 * * * *"],
278
+ retries: 1,
279
+ executionTimeout: "120s",
280
+ fn: async () => {
281
+ const db = getDb();
282
+ const logger = createLogger(process.env.LOG_LEVEL ?? "info");
283
+
284
+ const now = Date.now();
285
+ const staleBefore = new Date(now - STALE_AFTER_MS);
286
+ const giveUpBefore = new Date(now - GIVE_UP_AFTER_MS);
287
+
288
+ // (1) Declare poison campaigns `failed` first (stuck past the give-up
289
+ // window), so they are not re-enqueued below.
290
+ const failedRows = await db
291
+ .update(campaigns)
292
+ .set({ status: "failed", completedAt: new Date(), updatedAt: new Date() })
293
+ .where(
294
+ and(
295
+ inArray(campaigns.status, ["queued", "sending"]),
296
+ lt(campaigns.updatedAt, giveUpBefore),
297
+ ),
298
+ )
299
+ .returning({ id: campaigns.id });
300
+
301
+ // (2) Re-enqueue stale-but-not-poison in-flight campaigns. The CAS bumps
302
+ // `updatedAt` so the same row is not re-picked on the very next tick before
303
+ // the re-driven run makes progress; the per-send idempotency key keeps the
304
+ // re-enqueue safe even if the original run is somehow still alive.
305
+ const staleRows = await db
306
+ .update(campaigns)
307
+ .set({ updatedAt: new Date() })
308
+ .where(
309
+ and(
310
+ inArray(campaigns.status, ["queued", "sending"]),
311
+ lt(campaigns.updatedAt, staleBefore),
312
+ ),
313
+ )
314
+ .returning({ id: campaigns.id });
315
+
316
+ for (const row of staleRows) {
317
+ try {
318
+ await sendCampaignTask.run({ campaignId: row.id });
319
+ } catch (err) {
320
+ logger.warn("reap-stuck-campaigns: re-enqueue failed", {
321
+ campaignId: row.id,
322
+ error: err instanceof Error ? err.message : String(err),
323
+ });
324
+ }
325
+ }
326
+
327
+ if (failedRows.length > 0 || staleRows.length > 0) {
328
+ logger.info("reap-stuck-campaigns: swept", {
329
+ failed: failedRows.length,
330
+ reEnqueued: staleRows.length,
331
+ });
332
+ }
333
+
334
+ return {
335
+ failed: failedRows.length,
336
+ reEnqueued: staleRows.length,
337
+ };
338
+ },
339
+ });
340
+
341
+ /**
342
+ * Single-sourced keyset-pagination control flow shared by every recipient
343
+ * resolver. Owns the cursor lifecycle (init → page → empty/short-page break →
344
+ * advance) so the paging invariants live in ONE place; each resolver supplies
345
+ * only its `page(cursor)` query (which owns its own `where`/`orderBy`/`limit`),
346
+ * a `cursorOf(row)` extractor for the keyset column, and a `map(row)` that turns
347
+ * a row into a recipient (or `undefined` to skip it, e.g. a null email or an
348
+ * opt-in row that isn't actually subscribed). Breaks on an empty page OR a page
349
+ * shorter than `CHUNK_SIZE` (the last page), then advances to the last row's
350
+ * cursor — bailing if that cursor is missing to avoid an infinite loop.
351
+ */
352
+ async function* keysetPaginate<Row>(opts: {
353
+ page: (cursor: string | undefined) => Promise<Row[]>;
354
+ cursorOf: (row: Row) => string | undefined;
355
+ map: (row: Row) => CampaignRecipient | undefined;
356
+ }): AsyncGenerator<CampaignRecipient> {
357
+ let cursor: string | undefined;
358
+ while (true) {
359
+ const rows = await opts.page(cursor);
360
+ if (rows.length === 0) break;
361
+
362
+ for (const row of rows) {
363
+ const recipient = opts.map(row);
364
+ if (recipient) yield recipient;
365
+ }
366
+
367
+ if (rows.length < CHUNK_SIZE) break;
368
+ cursor = opts.cursorOf(rows[rows.length - 1] as Row);
369
+ if (!cursor) break;
370
+ }
371
+ }
372
+
373
+ /**
374
+ * Active, non-deleted members of a bucket, joined to a live contact for the
375
+ * email — mirrors the bucket-access member query. Paged by the keyset cursor on
376
+ * `bucket_memberships.id`.
377
+ *
378
+ * Compliance: `bucket_memberships.userEmail` is written verbatim from the RAW
379
+ * event payload on the realtime join path (un-normalized, unlike
380
+ * `contacts.email`), so the recipient email is NORMALIZED (`normalizeEmail`)
381
+ * before it is yielded — otherwise a mixed-case membership email
382
+ * (`User@Example.com`) would not case-match its NORMALIZED `email_preferences`
383
+ * row (`user@example.com`) and the mailer's case-sensitive suppression check
384
+ * would MISS the row, leaking a marketing blast to a suppressed/unsubscribed
385
+ * contact (CAN-SPAM/GDPR). Defense-in-depth: this resolver ALSO pre-filters
386
+ * `unsubscribedAll`/`suppressed` at the audience layer (mirroring the list
387
+ * resolver) via a LEFT JOIN to `email_preferences` on the NORMALIZED email, so
388
+ * a globally-unsubscribed / suppressed bucket member is excluded up front
389
+ * rather than relying solely on the per-send mailer check (which avoids a
390
+ * wasted provider attempt + a `failed` email_sends row, and closes the gap if
391
+ * the per-send check ever case-splits).
392
+ */
393
+ async function* resolveBucketRecipients(
394
+ db: Database,
395
+ bucketId: string,
396
+ ): AsyncGenerator<CampaignRecipient> {
397
+ // The recipient's normalized email — the membership email may be mixed-case
398
+ // (written verbatim from the raw event), the contact email is the fallback.
399
+ const recipientEmail = sql<string>`lower(trim(coalesce(${bucketMemberships.userEmail}, ${contacts.email})))`;
400
+
401
+ yield* keysetPaginate({
402
+ page: (cursor) => {
403
+ const conditions = [
404
+ eq(bucketMemberships.bucketId, bucketId),
405
+ eq(bucketMemberships.status, "active"),
406
+ isNull(bucketMemberships.deletedAt),
407
+ isNull(contacts.deletedAt),
408
+ // Exclude globally-unsubscribed / suppressed members up front via a
409
+ // correlated NOT EXISTS (an EXISTS subquery, NOT a JOIN, so a member
410
+ // with two prefs rows sharing the email is not fanned out into
411
+ // duplicate recipients). An absent prefs row matches nothing → the
412
+ // member is included (subscribed-by-default), mirroring the list
413
+ // resolver's stance. Keyed on lower(email) so a mixed-case membership
414
+ // email still matches its normalized prefs row (CAN-SPAM/GDPR: see the
415
+ // fn docstring).
416
+ sql`not exists (
417
+ select 1 from ${emailPreferences}
418
+ where lower(${emailPreferences.email}) = ${recipientEmail}
419
+ and (${emailPreferences.unsubscribedAll} = true
420
+ or ${emailPreferences.suppressed} = true)
421
+ )`,
422
+ ];
423
+ if (cursor) conditions.push(gt(bucketMemberships.id, cursor));
424
+
425
+ return db
426
+ .select({
427
+ id: bucketMemberships.id,
428
+ userId: bucketMemberships.userId,
429
+ membershipEmail: bucketMemberships.userEmail,
430
+ contactEmail: contacts.email,
431
+ })
432
+ .from(bucketMemberships)
433
+ .innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
434
+ .where(and(...conditions))
435
+ .orderBy(bucketMemberships.id)
436
+ .limit(CHUNK_SIZE);
437
+ },
438
+ cursorOf: (row) => row.id,
439
+ map: (row) => {
440
+ const raw = row.membershipEmail ?? row.contactEmail;
441
+ if (!raw) return undefined;
442
+ // Normalize so the recipient matches the normalized email_preferences
443
+ // keyspace the mailer's suppression check queries (see fn docstring).
444
+ return { email: normalizeEmail(raw), userId: row.userId };
445
+ },
446
+ });
447
+ }
448
+
449
+ /**
450
+ * Subscribed recipients of a list. A list shares the
451
+ * `email_preferences.categories` JSONB namespace, so subscription is the LOCKED
452
+ * polarity rule (`ListRegistry.isSubscribed`). The resolution STRATEGY depends
453
+ * on the list's default polarity so the audience matches that single source of
454
+ * truth EXACTLY — the same rule the mailer's per-send suppression check applies:
455
+ *
456
+ * - OPT-OUT list (`defaultOptIn: true`, e.g. a newsletter): a contact is
457
+ * subscribed UNLESS `categories[id] === false`. The audience is therefore
458
+ * "all contacts minus those who opted out", INCLUDING the common case of a
459
+ * contact with NO preferences row at all (subscribed by default). Scanning
460
+ * `email_preferences` alone would silently under-deliver to roughly only the
461
+ * subset that touched the preference center, so we resolve from `contacts`
462
+ * LEFT JOIN `email_preferences` and exclude opted-out / unsubscribed /
463
+ * suppressed rows.
464
+ *
465
+ * - OPT-IN list (`defaultOptIn: false`, must explicitly join): a contact is
466
+ * subscribed only when `categories[id] === true` — an explicit membership
467
+ * signal is REQUIRED. The audience is exactly the `email_preferences` rows
468
+ * carrying that explicit `true`, so a `contacts`-wide scan would be both
469
+ * wasteful and wrong (it would reach contacts who never opted in). We scan
470
+ * `email_preferences` directly.
471
+ *
472
+ * Either way globally-unsubscribed (`unsubscribedAll`) and suppressed
473
+ * (bounce/complaint) contacts are excluded up front — the mailer's own check
474
+ * would catch them, but skipping here avoids a wasted send + a `failed`
475
+ * email_sends row.
476
+ */
477
+ async function* resolveListRecipients(
478
+ db: Database,
479
+ listId: string,
480
+ ): AsyncGenerator<CampaignRecipient> {
481
+ const listRegistry = getListRegistry();
482
+ const subscribedByDefault = listRegistry.isSubscribedByDefault(listId);
483
+
484
+ if (subscribedByDefault) {
485
+ yield* resolveOptOutListRecipients(db, listId);
486
+ return;
487
+ }
488
+ yield* resolveOptInListRecipients(db, listId);
489
+ }
490
+
491
+ /**
492
+ * Opt-IN list resolver (`defaultOptIn: false`): an explicit `categories[id] ===
493
+ * true` is required, so the `email_preferences` scan is both correct and the
494
+ * narrowest possible audience. Paged by the keyset cursor on
495
+ * `email_preferences.id`.
496
+ */
497
+ async function* resolveOptInListRecipients(
498
+ db: Database,
499
+ listId: string,
500
+ ): AsyncGenerator<CampaignRecipient> {
501
+ const listRegistry = getListRegistry();
502
+ yield* keysetPaginate({
503
+ page: (cursor) => {
504
+ const conditions = [
505
+ eq(emailPreferences.unsubscribedAll, false),
506
+ eq(emailPreferences.suppressed, false),
507
+ ];
508
+ if (cursor) conditions.push(gt(emailPreferences.id, cursor));
509
+
510
+ return db
511
+ .select({
512
+ id: emailPreferences.id,
513
+ userId: emailPreferences.userId,
514
+ email: emailPreferences.email,
515
+ categories: emailPreferences.categories,
516
+ })
517
+ .from(emailPreferences)
518
+ .where(and(...conditions))
519
+ .orderBy(emailPreferences.id)
520
+ .limit(CHUNK_SIZE);
521
+ },
522
+ cursorOf: (row) => row.id,
523
+ map: (row) => {
524
+ const categories = (row.categories ?? {}) as Record<string, boolean>;
525
+ if (!listRegistry.isSubscribed(categories, listId)) return undefined;
526
+ return { email: normalizeEmail(row.email), userId: row.userId };
527
+ },
528
+ });
529
+ }
530
+
531
+ /**
532
+ * Opt-OUT list resolver (`defaultOptIn: true`): the audience is every live
533
+ * contact with an email MINUS those who explicitly opted out of this list, are
534
+ * globally unsubscribed, or are suppressed. Resolved from `contacts` LEFT JOIN
535
+ * `email_preferences` (a contact with NO prefs row is subscribed by default and
536
+ * MUST be reachable), paged by the keyset cursor on `contacts.id`.
537
+ */
538
+ async function* resolveOptOutListRecipients(
539
+ db: Database,
540
+ listId: string,
541
+ ): AsyncGenerator<CampaignRecipient> {
542
+ const contactEmail = sql<string>`lower(${contacts.email})`;
543
+ yield* keysetPaginate({
544
+ page: (cursor) => {
545
+ const conditions = [
546
+ isNull(contacts.deletedAt),
547
+ sql`${contacts.email} is not null`,
548
+ // Exclude opted-out / globally-unsubscribed / suppressed via a
549
+ // correlated NOT EXISTS (an EXISTS subquery, NOT a JOIN, so a contact
550
+ // whose email maps to multiple prefs rows is not fanned out into
551
+ // duplicate recipients). An absent prefs row matches nothing → the
552
+ // contact is included (subscribed by default — exactly the case the
553
+ // prior email_preferences-only scan silently dropped). "Opted out" of
554
+ // THIS list means categories[listId] === false.
555
+ sql`not exists (
556
+ select 1 from ${emailPreferences}
557
+ where lower(${emailPreferences.email}) = ${contactEmail}
558
+ and (${emailPreferences.unsubscribedAll} = true
559
+ or ${emailPreferences.suppressed} = true
560
+ or (${emailPreferences.categories} ->> ${listId})::boolean = false)
561
+ )`,
562
+ ];
563
+ if (cursor) conditions.push(gt(contacts.id, cursor));
564
+
565
+ return db
566
+ .select({
567
+ id: contacts.id,
568
+ userId: contacts.externalId,
569
+ contactId: contacts.id,
570
+ email: contacts.email,
571
+ })
572
+ .from(contacts)
573
+ .where(and(...conditions))
574
+ .orderBy(contacts.id)
575
+ .limit(CHUNK_SIZE);
576
+ },
577
+ cursorOf: (row) => row.id,
578
+ map: (row) => {
579
+ if (!row.email) return undefined;
580
+ // The send identity key mirrors the email_sends user_id fallback
581
+ // (externalId ?? contactId) so the per-recipient idempotency namespace +
582
+ // unsubscribe token stay consistent for a contact with no external id.
583
+ return {
584
+ email: normalizeEmail(row.email),
585
+ userId: row.userId ?? row.contactId,
586
+ };
587
+ },
588
+ });
589
+ }
@@ -1,71 +0,0 @@
1
- import { createRoute, OpenAPIHono, z } from "@hono/zod-openapi";
2
- import type { AppEnv } from "../app.js";
3
- import { ingestEvent } from "../lib/ingestion.js";
4
-
5
- const ingestRequestSchema = z.object({
6
- event: z.string().min(1),
7
- userId: z.string().min(1),
8
- userEmail: z.string().email().optional(),
9
- properties: z.record(z.string(), z.unknown()).optional(),
10
- idempotencyKey: z.string().optional(),
11
- timestamp: z.string().datetime().optional(),
12
- });
13
-
14
- const ingestResponseSchema = z.object({
15
- stored: z.boolean(),
16
- exits: z.array(
17
- z.object({
18
- journeyId: z.string(),
19
- stateId: z.string(),
20
- exited: z.boolean(),
21
- }),
22
- ),
23
- });
24
-
25
- const ingestRoute = createRoute({
26
- method: "post",
27
- path: "/",
28
- tags: ["Ingestion"],
29
- summary: "Ingest an event",
30
- description:
31
- "Receives events from direct API calls. Stores the event, pushes it to Hatchet for journey routing, and processes exit conditions.",
32
- request: {
33
- body: {
34
- content: {
35
- "application/json": { schema: ingestRequestSchema },
36
- },
37
- },
38
- },
39
- responses: {
40
- 202: {
41
- content: {
42
- "application/json": { schema: ingestResponseSchema },
43
- },
44
- description: "Event accepted and dispatched",
45
- },
46
- },
47
- });
48
-
49
- export const ingestRouter = new OpenAPIHono<AppEnv>().openapi(
50
- ingestRoute,
51
- async (c) => {
52
- const body = c.req.valid("json");
53
- const { db, registry, hatchet, logger } = c.get("container");
54
-
55
- const result = await ingestEvent({
56
- db,
57
- registry,
58
- hatchet,
59
- logger,
60
- event: {
61
- event: body.event,
62
- userId: body.userId,
63
- userEmail: body.userEmail ?? "",
64
- properties: body.properties ?? {},
65
- idempotencyKey: body.idempotencyKey,
66
- },
67
- });
68
-
69
- return c.json(result, 202);
70
- },
71
- );