@hogsend/engine 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/buckets/check-membership.ts +34 -15
- package/src/container.ts +33 -0
- package/src/env.ts +4 -0
- package/src/index.ts +13 -0
- package/src/journeys/journey-context.ts +5 -1
- package/src/lib/boot.ts +1 -1
- package/src/lib/bucket-emit.ts +2 -2
- package/src/lib/contacts.ts +1083 -18
- package/src/lib/email-service-types.ts +8 -0
- package/src/lib/ingestion.ts +63 -33
- package/src/lib/mailer.ts +1 -0
- package/src/lib/preferences.ts +106 -0
- package/src/lib/tracked.ts +159 -34
- package/src/lib/tracking-events.ts +1 -1
- package/src/lists/define-list.ts +81 -0
- package/src/lists/registry-singleton.ts +39 -0
- package/src/lists/registry.ts +95 -0
- package/src/middleware/api-key.ts +33 -7
- package/src/middleware/rate-limit.ts +73 -49
- package/src/routes/_shared.ts +30 -0
- package/src/routes/admin/api-keys.ts +1 -1
- package/src/routes/admin/bulk.ts +7 -3
- package/src/routes/admin/contacts.ts +66 -57
- package/src/routes/admin/events.ts +65 -0
- package/src/routes/admin/journeys.ts +3 -1
- package/src/routes/admin/preferences.ts +2 -2
- package/src/routes/admin/reporting.ts +3 -3
- package/src/routes/admin/timeline.ts +5 -2
- package/src/routes/campaigns/index.ts +252 -0
- package/src/routes/contacts/index.ts +188 -0
- package/src/routes/email/preferences.ts +27 -3
- package/src/routes/email/unsubscribe.ts +7 -49
- package/src/routes/emails/index.ts +133 -0
- package/src/routes/events/index.ts +119 -0
- package/src/routes/index.ts +52 -2
- package/src/routes/lists/index.ts +222 -0
- package/src/worker.ts +6 -0
- package/src/workflows/bucket-backfill.ts +32 -21
- package/src/workflows/bucket-reconcile.ts +20 -5
- package/src/workflows/import-contacts.ts +28 -20
- package/src/workflows/send-campaign.ts +589 -0
- package/src/routes/ingest.ts +0 -71
|
@@ -0,0 +1,589 @@
|
|
|
1
|
+
import {
|
|
2
|
+
bucketMemberships,
|
|
3
|
+
campaigns,
|
|
4
|
+
contacts,
|
|
5
|
+
type Database,
|
|
6
|
+
emailPreferences,
|
|
7
|
+
} from "@hogsend/db";
|
|
8
|
+
import type { TemplateName } from "@hogsend/email";
|
|
9
|
+
import { and, eq, gt, inArray, isNull, lt, sql } from "drizzle-orm";
|
|
10
|
+
import { normalizeEmail } from "../lib/contacts.js";
|
|
11
|
+
import { getDb } from "../lib/db.js";
|
|
12
|
+
import { getEmailService } from "../lib/email.js";
|
|
13
|
+
import { hatchet } from "../lib/hatchet.js";
|
|
14
|
+
import { createLogger } from "../lib/logger.js";
|
|
15
|
+
import { getListRegistry } from "../lists/registry-singleton.js";
|
|
16
|
+
|
|
17
|
+
/** Page size for resolving recipients + sending. */
|
|
18
|
+
const CHUNK_SIZE = 100;
|
|
19
|
+
|
|
20
|
+
/** A resolved recipient — every send needs at minimum an email. */
|
|
21
|
+
interface CampaignRecipient {
|
|
22
|
+
email: string;
|
|
23
|
+
userId?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Statuses that are TERMINAL — a duplicate/late enqueue must not re-send. */
|
|
27
|
+
const TERMINAL_STATUSES = ["sent"] as const;
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Built-in durable campaign / broadcast task (Loops "campaign" parity). Sends a
|
|
31
|
+
* single template to every subscribed member of a list (or every active member
|
|
32
|
+
* of a bucket).
|
|
33
|
+
*
|
|
34
|
+
* Retry-safety: each send carries an idempotency key
|
|
35
|
+
* `campaign:<campaignId>:<email>` (email_sends.idempotency_key, migration 0015),
|
|
36
|
+
* so a Hatchet retry re-runs the whole loop but every already-dispatched send
|
|
37
|
+
* short-circuits to its prior row instead of dispatching a duplicate provider
|
|
38
|
+
* call. Counts are derived as-you-go from each `send()` result status — which is
|
|
39
|
+
* itself idempotency-aware (a retried send returns the prior row's status), so
|
|
40
|
+
* the tallies stay consistent across re-attempts. Final counts overwrite (not
|
|
41
|
+
* increment) the row, so a retry re-derives them from scratch rather than
|
|
42
|
+
* double-counting.
|
|
43
|
+
*
|
|
44
|
+
* Resume-on-retry: the terminal guard short-circuits ONLY a `sent` campaign — a
|
|
45
|
+
* `failed`/`sending` row is NOT terminal, so a Hatchet retry (or the reaper's
|
|
46
|
+
* re-enqueue) re-resolves the audience and re-loops. Already-dispatched sends
|
|
47
|
+
* no-op via the idempotency key, so the re-run safely completes the TAIL of a
|
|
48
|
+
* partial send instead of abandoning it. The catch block therefore does NOT
|
|
49
|
+
* stamp `failed` before re-throwing — that would make the retry short-circuit
|
|
50
|
+
* and silently under-deliver. A run that exhausts its retries is reaped to
|
|
51
|
+
* `failed`/re-enqueued by {@link reapStuckCampaignsTask}.
|
|
52
|
+
*/
|
|
53
|
+
export const sendCampaignTask = hatchet.task({
|
|
54
|
+
name: "send-campaign",
|
|
55
|
+
// ONE durability re-attempt for a worker crash/timeout — the per-send
|
|
56
|
+
// idempotency key makes a re-run safe (no double-send). Not a transient-retry
|
|
57
|
+
// loop: the provider owns its own send backoff.
|
|
58
|
+
retries: 1,
|
|
59
|
+
executionTimeout: "600s",
|
|
60
|
+
fn: async (input: { campaignId: string }) => {
|
|
61
|
+
const db = getDb();
|
|
62
|
+
const logger = createLogger(process.env.LOG_LEVEL ?? "info");
|
|
63
|
+
const emailService = getEmailService();
|
|
64
|
+
|
|
65
|
+
const rows = await db
|
|
66
|
+
.select()
|
|
67
|
+
.from(campaigns)
|
|
68
|
+
.where(eq(campaigns.id, input.campaignId))
|
|
69
|
+
.limit(1);
|
|
70
|
+
const campaign = rows[0];
|
|
71
|
+
if (!campaign) {
|
|
72
|
+
logger.warn("send-campaign: campaign not found", {
|
|
73
|
+
campaignId: input.campaignId,
|
|
74
|
+
});
|
|
75
|
+
return { status: "failed", reason: "not_found" as const };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Already terminal — a duplicate/late enqueue must not re-send. ONLY `sent`
|
|
79
|
+
// is terminal: a `failed`/`sending` row is intentionally re-runnable so a
|
|
80
|
+
// Hatchet retry (or a reaper re-enqueue) re-resolves the audience and
|
|
81
|
+
// completes the unsent TAIL of a partial send (already-sent recipients
|
|
82
|
+
// no-op via the per-send idempotency key — risk: silent under-delivery).
|
|
83
|
+
if ((TERMINAL_STATUSES as readonly string[]).includes(campaign.status)) {
|
|
84
|
+
return { status: campaign.status, skipped: true };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
await db
|
|
88
|
+
.update(campaigns)
|
|
89
|
+
.set({ status: "sending", startedAt: new Date(), updatedAt: new Date() })
|
|
90
|
+
.where(eq(campaigns.id, input.campaignId));
|
|
91
|
+
|
|
92
|
+
let sentCount = 0;
|
|
93
|
+
let skippedCount = 0;
|
|
94
|
+
let failedCount = 0;
|
|
95
|
+
let totalRecipients = 0;
|
|
96
|
+
|
|
97
|
+
const flushCounts = async (): Promise<void> => {
|
|
98
|
+
await db
|
|
99
|
+
.update(campaigns)
|
|
100
|
+
.set({
|
|
101
|
+
totalRecipients,
|
|
102
|
+
sentCount,
|
|
103
|
+
skippedCount,
|
|
104
|
+
failedCount,
|
|
105
|
+
updatedAt: new Date(),
|
|
106
|
+
})
|
|
107
|
+
.where(eq(campaigns.id, input.campaignId));
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
const recipients =
|
|
112
|
+
campaign.audienceKind === "bucket"
|
|
113
|
+
? resolveBucketRecipients(db, campaign.audienceId)
|
|
114
|
+
: resolveListRecipients(db, campaign.audienceId);
|
|
115
|
+
|
|
116
|
+
let chunk: CampaignRecipient[] = [];
|
|
117
|
+
for await (const recipient of recipients) {
|
|
118
|
+
chunk.push(recipient);
|
|
119
|
+
if (chunk.length < CHUNK_SIZE) continue;
|
|
120
|
+
await sendChunk();
|
|
121
|
+
}
|
|
122
|
+
// Final partial chunk.
|
|
123
|
+
if (chunk.length > 0) await sendChunk();
|
|
124
|
+
|
|
125
|
+
async function sendChunk(): Promise<void> {
|
|
126
|
+
const batch = chunk;
|
|
127
|
+
chunk = [];
|
|
128
|
+
totalRecipients += batch.length;
|
|
129
|
+
|
|
130
|
+
const results = await Promise.allSettled(
|
|
131
|
+
batch.map((r) =>
|
|
132
|
+
emailService.send({
|
|
133
|
+
template: campaign?.templateKey as TemplateName,
|
|
134
|
+
props: (campaign?.props ?? {}) as never,
|
|
135
|
+
to: r.email,
|
|
136
|
+
userId: r.userId,
|
|
137
|
+
userEmail: r.email,
|
|
138
|
+
subject: campaign?.subject ?? undefined,
|
|
139
|
+
from: campaign?.fromEmail ?? undefined,
|
|
140
|
+
// A list's audienceId IS a real subscription category, so pass it
|
|
141
|
+
// through for suppression + the unsubscribe link. A bucket's
|
|
142
|
+
// audienceId is NOT a category — forcing it here would mint an
|
|
143
|
+
// unsubscribe link keyed on the bucket id (`categories[bucketId] =
|
|
144
|
+
// false`) that the bucket resolver never honors (it only checks
|
|
145
|
+
// unsubscribedAll/suppressed), silently no-op'ing the unsubscribe.
|
|
146
|
+
// For a bucket, pass undefined so the template's OWN declared
|
|
147
|
+
// category (e.g. `product-updates`) drives both suppression and a
|
|
148
|
+
// real, honored List-Unsubscribe target.
|
|
149
|
+
category:
|
|
150
|
+
campaign?.audienceKind === "bucket"
|
|
151
|
+
? undefined
|
|
152
|
+
: campaign?.audienceId,
|
|
153
|
+
// The idempotency key dedupes a retried send to its prior row.
|
|
154
|
+
idempotencyKey: `campaign:${input.campaignId}:${r.email}`,
|
|
155
|
+
}),
|
|
156
|
+
),
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
for (const result of results) {
|
|
160
|
+
if (result.status === "rejected") {
|
|
161
|
+
failedCount++;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
const status = result.value.status;
|
|
165
|
+
if (status === "sent") {
|
|
166
|
+
sentCount++;
|
|
167
|
+
} else {
|
|
168
|
+
// suppressed | unsubscribed | skipped (frequency-capped) — counted
|
|
169
|
+
// as skipped, not a delivery failure.
|
|
170
|
+
skippedCount++;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
await flushCounts();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
await db
|
|
178
|
+
.update(campaigns)
|
|
179
|
+
.set({
|
|
180
|
+
status: "sent",
|
|
181
|
+
completedAt: new Date(),
|
|
182
|
+
totalRecipients,
|
|
183
|
+
sentCount,
|
|
184
|
+
skippedCount,
|
|
185
|
+
failedCount,
|
|
186
|
+
updatedAt: new Date(),
|
|
187
|
+
})
|
|
188
|
+
.where(eq(campaigns.id, input.campaignId));
|
|
189
|
+
|
|
190
|
+
logger.info("send-campaign: complete", {
|
|
191
|
+
campaignId: input.campaignId,
|
|
192
|
+
totalRecipients,
|
|
193
|
+
sentCount,
|
|
194
|
+
skippedCount,
|
|
195
|
+
failedCount,
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
status: "sent" as const,
|
|
200
|
+
totalRecipients,
|
|
201
|
+
sentCount,
|
|
202
|
+
skippedCount,
|
|
203
|
+
failedCount,
|
|
204
|
+
};
|
|
205
|
+
} catch (error) {
|
|
206
|
+
// Do NOT stamp `failed` here. A `failed` stamp before the re-throw makes
|
|
207
|
+
// the single Hatchet retry hit the terminal guard and short-circuit
|
|
208
|
+
// WITHOUT sending the remaining recipients — silently abandoning the tail
|
|
209
|
+
// of a partial send. Instead we persist the progress counts, leave the
|
|
210
|
+
// status `sending` (re-runnable), and re-throw so the genuine retry
|
|
211
|
+
// re-enters the loop and finishes the unsent tail (already-sent recipients
|
|
212
|
+
// no-op via their idempotency key). A run that EXHAUSTS its retries is
|
|
213
|
+
// transitioned to `failed` (or re-enqueued) by `reapStuckCampaignsTask`.
|
|
214
|
+
await db
|
|
215
|
+
.update(campaigns)
|
|
216
|
+
.set({
|
|
217
|
+
totalRecipients,
|
|
218
|
+
sentCount,
|
|
219
|
+
skippedCount,
|
|
220
|
+
failedCount,
|
|
221
|
+
updatedAt: new Date(),
|
|
222
|
+
})
|
|
223
|
+
.where(eq(campaigns.id, input.campaignId));
|
|
224
|
+
|
|
225
|
+
logger.error("send-campaign: errored mid-run (will retry)", {
|
|
226
|
+
campaignId: input.campaignId,
|
|
227
|
+
error: error instanceof Error ? error.message : String(error),
|
|
228
|
+
});
|
|
229
|
+
throw error;
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* How long a campaign may sit in a non-terminal in-flight state (`queued` /
|
|
236
|
+
* `sending`) before the reaper treats it as STALE and re-drives it. Must be
|
|
237
|
+
* comfortably longer than the send task's `executionTimeout` (600s) so a
|
|
238
|
+
* legitimately long but still-running send is never re-enqueued underneath
|
|
239
|
+
* itself; the per-send idempotency key makes an overlap harmless anyway.
|
|
240
|
+
*/
|
|
241
|
+
const STALE_AFTER_MS = Number(
|
|
242
|
+
process.env.CAMPAIGN_STALE_AFTER_MS ?? 15 * 60 * 1000,
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* After a campaign has sat in a non-terminal in-flight state this long (measured
|
|
247
|
+
* from `updatedAt`, which the send task bumps on every progress flush) it is
|
|
248
|
+
* declared `failed` rather than re-enqueued forever — a poison campaign (e.g. a
|
|
249
|
+
* template that always throws) stops being re-driven and surfaces to operators.
|
|
250
|
+
*/
|
|
251
|
+
const GIVE_UP_AFTER_MS = Number(
|
|
252
|
+
process.env.CAMPAIGN_GIVE_UP_AFTER_MS ?? 6 * 60 * 60 * 1000,
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Engine-owned reaper cron for campaigns left in a non-terminal in-flight state
|
|
257
|
+
* with no live run to finish them (closes the "stuck forever" gap):
|
|
258
|
+
*
|
|
259
|
+
* - A `sending` campaign whose worker was hard-killed (OOM/SIGKILL/pod
|
|
260
|
+
* eviction) or whose run exceeded `executionTimeout` AFTER its retry — the JS
|
|
261
|
+
* catch never ran, so the row is stuck `sending` with no live run.
|
|
262
|
+
* - A `queued` campaign whose enqueue threw at create time (broker down /
|
|
263
|
+
* network) — the row was committed but no run was ever created (orphan).
|
|
264
|
+
*
|
|
265
|
+
* Recovery is a simple RE-ENQUEUE of `sendCampaignTask` (safe: the per-send
|
|
266
|
+
* idempotency key no-ops already-sent recipients and the re-run completes the
|
|
267
|
+
* unsent tail). A campaign that stays stuck past `GIVE_UP_AFTER_MS` is declared
|
|
268
|
+
* `failed` so it stops being re-driven and surfaces to operators.
|
|
269
|
+
*
|
|
270
|
+
* Self-bootstraps `db` (memoized `getDb()` singleton) / `logger` from
|
|
271
|
+
* `process.env` (cron runs have no request container), cloned from
|
|
272
|
+
* `bucket-reconcile.ts`. NON-cancelling single-flight concurrency so an
|
|
273
|
+
* overrunning sweep finishes rather than being cancelled.
|
|
274
|
+
*/
|
|
275
|
+
export const reapStuckCampaignsTask = hatchet.task({
|
|
276
|
+
name: "reap-stuck-campaigns",
|
|
277
|
+
onCrons: [process.env.CAMPAIGN_REAPER_CRON ?? "*/5 * * * *"],
|
|
278
|
+
retries: 1,
|
|
279
|
+
executionTimeout: "120s",
|
|
280
|
+
fn: async () => {
|
|
281
|
+
const db = getDb();
|
|
282
|
+
const logger = createLogger(process.env.LOG_LEVEL ?? "info");
|
|
283
|
+
|
|
284
|
+
const now = Date.now();
|
|
285
|
+
const staleBefore = new Date(now - STALE_AFTER_MS);
|
|
286
|
+
const giveUpBefore = new Date(now - GIVE_UP_AFTER_MS);
|
|
287
|
+
|
|
288
|
+
// (1) Declare poison campaigns `failed` first (stuck past the give-up
|
|
289
|
+
// window), so they are not re-enqueued below.
|
|
290
|
+
const failedRows = await db
|
|
291
|
+
.update(campaigns)
|
|
292
|
+
.set({ status: "failed", completedAt: new Date(), updatedAt: new Date() })
|
|
293
|
+
.where(
|
|
294
|
+
and(
|
|
295
|
+
inArray(campaigns.status, ["queued", "sending"]),
|
|
296
|
+
lt(campaigns.updatedAt, giveUpBefore),
|
|
297
|
+
),
|
|
298
|
+
)
|
|
299
|
+
.returning({ id: campaigns.id });
|
|
300
|
+
|
|
301
|
+
// (2) Re-enqueue stale-but-not-poison in-flight campaigns. The CAS bumps
|
|
302
|
+
// `updatedAt` so the same row is not re-picked on the very next tick before
|
|
303
|
+
// the re-driven run makes progress; the per-send idempotency key keeps the
|
|
304
|
+
// re-enqueue safe even if the original run is somehow still alive.
|
|
305
|
+
const staleRows = await db
|
|
306
|
+
.update(campaigns)
|
|
307
|
+
.set({ updatedAt: new Date() })
|
|
308
|
+
.where(
|
|
309
|
+
and(
|
|
310
|
+
inArray(campaigns.status, ["queued", "sending"]),
|
|
311
|
+
lt(campaigns.updatedAt, staleBefore),
|
|
312
|
+
),
|
|
313
|
+
)
|
|
314
|
+
.returning({ id: campaigns.id });
|
|
315
|
+
|
|
316
|
+
for (const row of staleRows) {
|
|
317
|
+
try {
|
|
318
|
+
await sendCampaignTask.run({ campaignId: row.id });
|
|
319
|
+
} catch (err) {
|
|
320
|
+
logger.warn("reap-stuck-campaigns: re-enqueue failed", {
|
|
321
|
+
campaignId: row.id,
|
|
322
|
+
error: err instanceof Error ? err.message : String(err),
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (failedRows.length > 0 || staleRows.length > 0) {
|
|
328
|
+
logger.info("reap-stuck-campaigns: swept", {
|
|
329
|
+
failed: failedRows.length,
|
|
330
|
+
reEnqueued: staleRows.length,
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
failed: failedRows.length,
|
|
336
|
+
reEnqueued: staleRows.length,
|
|
337
|
+
};
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Single-sourced keyset-pagination control flow shared by every recipient
|
|
343
|
+
* resolver. Owns the cursor lifecycle (init → page → empty/short-page break →
|
|
344
|
+
* advance) so the paging invariants live in ONE place; each resolver supplies
|
|
345
|
+
* only its `page(cursor)` query (which owns its own `where`/`orderBy`/`limit`),
|
|
346
|
+
* a `cursorOf(row)` extractor for the keyset column, and a `map(row)` that turns
|
|
347
|
+
* a row into a recipient (or `undefined` to skip it, e.g. a null email or an
|
|
348
|
+
* opt-in row that isn't actually subscribed). Breaks on an empty page OR a page
|
|
349
|
+
* shorter than `CHUNK_SIZE` (the last page), then advances to the last row's
|
|
350
|
+
* cursor — bailing if that cursor is missing to avoid an infinite loop.
|
|
351
|
+
*/
|
|
352
|
+
async function* keysetPaginate<Row>(opts: {
|
|
353
|
+
page: (cursor: string | undefined) => Promise<Row[]>;
|
|
354
|
+
cursorOf: (row: Row) => string | undefined;
|
|
355
|
+
map: (row: Row) => CampaignRecipient | undefined;
|
|
356
|
+
}): AsyncGenerator<CampaignRecipient> {
|
|
357
|
+
let cursor: string | undefined;
|
|
358
|
+
while (true) {
|
|
359
|
+
const rows = await opts.page(cursor);
|
|
360
|
+
if (rows.length === 0) break;
|
|
361
|
+
|
|
362
|
+
for (const row of rows) {
|
|
363
|
+
const recipient = opts.map(row);
|
|
364
|
+
if (recipient) yield recipient;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (rows.length < CHUNK_SIZE) break;
|
|
368
|
+
cursor = opts.cursorOf(rows[rows.length - 1] as Row);
|
|
369
|
+
if (!cursor) break;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Active, non-deleted members of a bucket, joined to a live contact for the
|
|
375
|
+
* email — mirrors the bucket-access member query. Paged by the keyset cursor on
|
|
376
|
+
* `bucket_memberships.id`.
|
|
377
|
+
*
|
|
378
|
+
* Compliance: `bucket_memberships.userEmail` is written verbatim from the RAW
|
|
379
|
+
* event payload on the realtime join path (un-normalized, unlike
|
|
380
|
+
* `contacts.email`), so the recipient email is NORMALIZED (`normalizeEmail`)
|
|
381
|
+
* before it is yielded — otherwise a mixed-case membership email
|
|
382
|
+
* (`User@Example.com`) would not case-match its NORMALIZED `email_preferences`
|
|
383
|
+
* row (`user@example.com`) and the mailer's case-sensitive suppression check
|
|
384
|
+
* would MISS the row, leaking a marketing blast to a suppressed/unsubscribed
|
|
385
|
+
* contact (CAN-SPAM/GDPR). Defense-in-depth: this resolver ALSO pre-filters
|
|
386
|
+
* `unsubscribedAll`/`suppressed` at the audience layer (mirroring the list
|
|
387
|
+
* resolver) via a LEFT JOIN to `email_preferences` on the NORMALIZED email, so
|
|
388
|
+
* a globally-unsubscribed / suppressed bucket member is excluded up front
|
|
389
|
+
* rather than relying solely on the per-send mailer check (which avoids a
|
|
390
|
+
* wasted provider attempt + a `failed` email_sends row, and closes the gap if
|
|
391
|
+
* the per-send check ever case-splits).
|
|
392
|
+
*/
|
|
393
|
+
async function* resolveBucketRecipients(
|
|
394
|
+
db: Database,
|
|
395
|
+
bucketId: string,
|
|
396
|
+
): AsyncGenerator<CampaignRecipient> {
|
|
397
|
+
// The recipient's normalized email — the membership email may be mixed-case
|
|
398
|
+
// (written verbatim from the raw event), the contact email is the fallback.
|
|
399
|
+
const recipientEmail = sql<string>`lower(trim(coalesce(${bucketMemberships.userEmail}, ${contacts.email})))`;
|
|
400
|
+
|
|
401
|
+
yield* keysetPaginate({
|
|
402
|
+
page: (cursor) => {
|
|
403
|
+
const conditions = [
|
|
404
|
+
eq(bucketMemberships.bucketId, bucketId),
|
|
405
|
+
eq(bucketMemberships.status, "active"),
|
|
406
|
+
isNull(bucketMemberships.deletedAt),
|
|
407
|
+
isNull(contacts.deletedAt),
|
|
408
|
+
// Exclude globally-unsubscribed / suppressed members up front via a
|
|
409
|
+
// correlated NOT EXISTS (an EXISTS subquery, NOT a JOIN, so a member
|
|
410
|
+
// with two prefs rows sharing the email is not fanned out into
|
|
411
|
+
// duplicate recipients). An absent prefs row matches nothing → the
|
|
412
|
+
// member is included (subscribed-by-default), mirroring the list
|
|
413
|
+
// resolver's stance. Keyed on lower(email) so a mixed-case membership
|
|
414
|
+
// email still matches its normalized prefs row (CAN-SPAM/GDPR: see the
|
|
415
|
+
// fn docstring).
|
|
416
|
+
sql`not exists (
|
|
417
|
+
select 1 from ${emailPreferences}
|
|
418
|
+
where lower(${emailPreferences.email}) = ${recipientEmail}
|
|
419
|
+
and (${emailPreferences.unsubscribedAll} = true
|
|
420
|
+
or ${emailPreferences.suppressed} = true)
|
|
421
|
+
)`,
|
|
422
|
+
];
|
|
423
|
+
if (cursor) conditions.push(gt(bucketMemberships.id, cursor));
|
|
424
|
+
|
|
425
|
+
return db
|
|
426
|
+
.select({
|
|
427
|
+
id: bucketMemberships.id,
|
|
428
|
+
userId: bucketMemberships.userId,
|
|
429
|
+
membershipEmail: bucketMemberships.userEmail,
|
|
430
|
+
contactEmail: contacts.email,
|
|
431
|
+
})
|
|
432
|
+
.from(bucketMemberships)
|
|
433
|
+
.innerJoin(contacts, eq(contacts.externalId, bucketMemberships.userId))
|
|
434
|
+
.where(and(...conditions))
|
|
435
|
+
.orderBy(bucketMemberships.id)
|
|
436
|
+
.limit(CHUNK_SIZE);
|
|
437
|
+
},
|
|
438
|
+
cursorOf: (row) => row.id,
|
|
439
|
+
map: (row) => {
|
|
440
|
+
const raw = row.membershipEmail ?? row.contactEmail;
|
|
441
|
+
if (!raw) return undefined;
|
|
442
|
+
// Normalize so the recipient matches the normalized email_preferences
|
|
443
|
+
// keyspace the mailer's suppression check queries (see fn docstring).
|
|
444
|
+
return { email: normalizeEmail(raw), userId: row.userId };
|
|
445
|
+
},
|
|
446
|
+
});
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Subscribed recipients of a list. A list shares the
|
|
451
|
+
* `email_preferences.categories` JSONB namespace, so subscription is the LOCKED
|
|
452
|
+
* polarity rule (`ListRegistry.isSubscribed`). The resolution STRATEGY depends
|
|
453
|
+
* on the list's default polarity so the audience matches that single source of
|
|
454
|
+
* truth EXACTLY — the same rule the mailer's per-send suppression check applies:
|
|
455
|
+
*
|
|
456
|
+
* - OPT-OUT list (`defaultOptIn: true`, e.g. a newsletter): a contact is
|
|
457
|
+
* subscribed UNLESS `categories[id] === false`. The audience is therefore
|
|
458
|
+
* "all contacts minus those who opted out", INCLUDING the common case of a
|
|
459
|
+
* contact with NO preferences row at all (subscribed by default). Scanning
|
|
460
|
+
* `email_preferences` alone would silently under-deliver to roughly only the
|
|
461
|
+
* subset that touched the preference center, so we resolve from `contacts`
|
|
462
|
+
* LEFT JOIN `email_preferences` and exclude opted-out / unsubscribed /
|
|
463
|
+
* suppressed rows.
|
|
464
|
+
*
|
|
465
|
+
* - OPT-IN list (`defaultOptIn: false`, must explicitly join): a contact is
|
|
466
|
+
* subscribed only when `categories[id] === true` — an explicit membership
|
|
467
|
+
* signal is REQUIRED. The audience is exactly the `email_preferences` rows
|
|
468
|
+
* carrying that explicit `true`, so a `contacts`-wide scan would be both
|
|
469
|
+
* wasteful and wrong (it would reach contacts who never opted in). We scan
|
|
470
|
+
* `email_preferences` directly.
|
|
471
|
+
*
|
|
472
|
+
* Either way globally-unsubscribed (`unsubscribedAll`) and suppressed
|
|
473
|
+
* (bounce/complaint) contacts are excluded up front — the mailer's own check
|
|
474
|
+
* would catch them, but skipping here avoids a wasted send + a `failed`
|
|
475
|
+
* email_sends row.
|
|
476
|
+
*/
|
|
477
|
+
async function* resolveListRecipients(
|
|
478
|
+
db: Database,
|
|
479
|
+
listId: string,
|
|
480
|
+
): AsyncGenerator<CampaignRecipient> {
|
|
481
|
+
const listRegistry = getListRegistry();
|
|
482
|
+
const subscribedByDefault = listRegistry.isSubscribedByDefault(listId);
|
|
483
|
+
|
|
484
|
+
if (subscribedByDefault) {
|
|
485
|
+
yield* resolveOptOutListRecipients(db, listId);
|
|
486
|
+
return;
|
|
487
|
+
}
|
|
488
|
+
yield* resolveOptInListRecipients(db, listId);
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Opt-IN list resolver (`defaultOptIn: false`): an explicit `categories[id] ===
|
|
493
|
+
* true` is required, so the `email_preferences` scan is both correct and the
|
|
494
|
+
* narrowest possible audience. Paged by the keyset cursor on
|
|
495
|
+
* `email_preferences.id`.
|
|
496
|
+
*/
|
|
497
|
+
async function* resolveOptInListRecipients(
|
|
498
|
+
db: Database,
|
|
499
|
+
listId: string,
|
|
500
|
+
): AsyncGenerator<CampaignRecipient> {
|
|
501
|
+
const listRegistry = getListRegistry();
|
|
502
|
+
yield* keysetPaginate({
|
|
503
|
+
page: (cursor) => {
|
|
504
|
+
const conditions = [
|
|
505
|
+
eq(emailPreferences.unsubscribedAll, false),
|
|
506
|
+
eq(emailPreferences.suppressed, false),
|
|
507
|
+
];
|
|
508
|
+
if (cursor) conditions.push(gt(emailPreferences.id, cursor));
|
|
509
|
+
|
|
510
|
+
return db
|
|
511
|
+
.select({
|
|
512
|
+
id: emailPreferences.id,
|
|
513
|
+
userId: emailPreferences.userId,
|
|
514
|
+
email: emailPreferences.email,
|
|
515
|
+
categories: emailPreferences.categories,
|
|
516
|
+
})
|
|
517
|
+
.from(emailPreferences)
|
|
518
|
+
.where(and(...conditions))
|
|
519
|
+
.orderBy(emailPreferences.id)
|
|
520
|
+
.limit(CHUNK_SIZE);
|
|
521
|
+
},
|
|
522
|
+
cursorOf: (row) => row.id,
|
|
523
|
+
map: (row) => {
|
|
524
|
+
const categories = (row.categories ?? {}) as Record<string, boolean>;
|
|
525
|
+
if (!listRegistry.isSubscribed(categories, listId)) return undefined;
|
|
526
|
+
return { email: normalizeEmail(row.email), userId: row.userId };
|
|
527
|
+
},
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Opt-OUT list resolver (`defaultOptIn: true`): the audience is every live
|
|
533
|
+
* contact with an email MINUS those who explicitly opted out of this list, are
|
|
534
|
+
* globally unsubscribed, or are suppressed. Resolved from `contacts` LEFT JOIN
|
|
535
|
+
* `email_preferences` (a contact with NO prefs row is subscribed by default and
|
|
536
|
+
* MUST be reachable), paged by the keyset cursor on `contacts.id`.
|
|
537
|
+
*/
|
|
538
|
+
async function* resolveOptOutListRecipients(
|
|
539
|
+
db: Database,
|
|
540
|
+
listId: string,
|
|
541
|
+
): AsyncGenerator<CampaignRecipient> {
|
|
542
|
+
const contactEmail = sql<string>`lower(${contacts.email})`;
|
|
543
|
+
yield* keysetPaginate({
|
|
544
|
+
page: (cursor) => {
|
|
545
|
+
const conditions = [
|
|
546
|
+
isNull(contacts.deletedAt),
|
|
547
|
+
sql`${contacts.email} is not null`,
|
|
548
|
+
// Exclude opted-out / globally-unsubscribed / suppressed via a
|
|
549
|
+
// correlated NOT EXISTS (an EXISTS subquery, NOT a JOIN, so a contact
|
|
550
|
+
// whose email maps to multiple prefs rows is not fanned out into
|
|
551
|
+
// duplicate recipients). An absent prefs row matches nothing → the
|
|
552
|
+
// contact is included (subscribed by default — exactly the case the
|
|
553
|
+
// prior email_preferences-only scan silently dropped). "Opted out" of
|
|
554
|
+
// THIS list means categories[listId] === false.
|
|
555
|
+
sql`not exists (
|
|
556
|
+
select 1 from ${emailPreferences}
|
|
557
|
+
where lower(${emailPreferences.email}) = ${contactEmail}
|
|
558
|
+
and (${emailPreferences.unsubscribedAll} = true
|
|
559
|
+
or ${emailPreferences.suppressed} = true
|
|
560
|
+
or (${emailPreferences.categories} ->> ${listId})::boolean = false)
|
|
561
|
+
)`,
|
|
562
|
+
];
|
|
563
|
+
if (cursor) conditions.push(gt(contacts.id, cursor));
|
|
564
|
+
|
|
565
|
+
return db
|
|
566
|
+
.select({
|
|
567
|
+
id: contacts.id,
|
|
568
|
+
userId: contacts.externalId,
|
|
569
|
+
contactId: contacts.id,
|
|
570
|
+
email: contacts.email,
|
|
571
|
+
})
|
|
572
|
+
.from(contacts)
|
|
573
|
+
.where(and(...conditions))
|
|
574
|
+
.orderBy(contacts.id)
|
|
575
|
+
.limit(CHUNK_SIZE);
|
|
576
|
+
},
|
|
577
|
+
cursorOf: (row) => row.id,
|
|
578
|
+
map: (row) => {
|
|
579
|
+
if (!row.email) return undefined;
|
|
580
|
+
// The send identity key mirrors the email_sends user_id fallback
|
|
581
|
+
// (externalId ?? contactId) so the per-recipient idempotency namespace +
|
|
582
|
+
// unsubscribe token stay consistent for a contact with no external id.
|
|
583
|
+
return {
|
|
584
|
+
email: normalizeEmail(row.email),
|
|
585
|
+
userId: row.userId ?? row.contactId,
|
|
586
|
+
};
|
|
587
|
+
},
|
|
588
|
+
});
|
|
589
|
+
}
|
package/src/routes/ingest.ts
DELETED
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
import { createRoute, OpenAPIHono, z } from "@hono/zod-openapi";
|
|
2
|
-
import type { AppEnv } from "../app.js";
|
|
3
|
-
import { ingestEvent } from "../lib/ingestion.js";
|
|
4
|
-
|
|
5
|
-
const ingestRequestSchema = z.object({
|
|
6
|
-
event: z.string().min(1),
|
|
7
|
-
userId: z.string().min(1),
|
|
8
|
-
userEmail: z.string().email().optional(),
|
|
9
|
-
properties: z.record(z.string(), z.unknown()).optional(),
|
|
10
|
-
idempotencyKey: z.string().optional(),
|
|
11
|
-
timestamp: z.string().datetime().optional(),
|
|
12
|
-
});
|
|
13
|
-
|
|
14
|
-
const ingestResponseSchema = z.object({
|
|
15
|
-
stored: z.boolean(),
|
|
16
|
-
exits: z.array(
|
|
17
|
-
z.object({
|
|
18
|
-
journeyId: z.string(),
|
|
19
|
-
stateId: z.string(),
|
|
20
|
-
exited: z.boolean(),
|
|
21
|
-
}),
|
|
22
|
-
),
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
const ingestRoute = createRoute({
|
|
26
|
-
method: "post",
|
|
27
|
-
path: "/",
|
|
28
|
-
tags: ["Ingestion"],
|
|
29
|
-
summary: "Ingest an event",
|
|
30
|
-
description:
|
|
31
|
-
"Receives events from direct API calls. Stores the event, pushes it to Hatchet for journey routing, and processes exit conditions.",
|
|
32
|
-
request: {
|
|
33
|
-
body: {
|
|
34
|
-
content: {
|
|
35
|
-
"application/json": { schema: ingestRequestSchema },
|
|
36
|
-
},
|
|
37
|
-
},
|
|
38
|
-
},
|
|
39
|
-
responses: {
|
|
40
|
-
202: {
|
|
41
|
-
content: {
|
|
42
|
-
"application/json": { schema: ingestResponseSchema },
|
|
43
|
-
},
|
|
44
|
-
description: "Event accepted and dispatched",
|
|
45
|
-
},
|
|
46
|
-
},
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
export const ingestRouter = new OpenAPIHono<AppEnv>().openapi(
|
|
50
|
-
ingestRoute,
|
|
51
|
-
async (c) => {
|
|
52
|
-
const body = c.req.valid("json");
|
|
53
|
-
const { db, registry, hatchet, logger } = c.get("container");
|
|
54
|
-
|
|
55
|
-
const result = await ingestEvent({
|
|
56
|
-
db,
|
|
57
|
-
registry,
|
|
58
|
-
hatchet,
|
|
59
|
-
logger,
|
|
60
|
-
event: {
|
|
61
|
-
event: body.event,
|
|
62
|
-
userId: body.userId,
|
|
63
|
-
userEmail: body.userEmail ?? "",
|
|
64
|
-
properties: body.properties ?? {},
|
|
65
|
-
idempotencyKey: body.idempotencyKey,
|
|
66
|
-
},
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
return c.json(result, 202);
|
|
70
|
-
},
|
|
71
|
-
);
|