@hogsend/engine 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/package.json +7 -6
  2. package/src/app.ts +36 -1
  3. package/src/container.ts +80 -8
  4. package/src/destinations/define-destination.ts +104 -0
  5. package/src/destinations/presets/index.ts +94 -0
  6. package/src/destinations/presets/posthog.ts +71 -0
  7. package/src/destinations/presets/segment.ts +75 -0
  8. package/src/destinations/presets/slack.ts +66 -0
  9. package/src/destinations/presets/webhook.ts +37 -0
  10. package/src/destinations/registry-singleton.ts +78 -0
  11. package/src/env.ts +40 -0
  12. package/src/index.ts +59 -1
  13. package/src/journeys/define-journey.ts +26 -3
  14. package/src/journeys/journey-context.ts +1 -17
  15. package/src/lib/analytics-singleton.ts +7 -0
  16. package/src/lib/bucket-emit.ts +45 -0
  17. package/src/lib/contacts.ts +28 -6
  18. package/src/lib/mailer.ts +102 -0
  19. package/src/lib/outbound.ts +223 -0
  20. package/src/lib/preferences.ts +31 -0
  21. package/src/lib/seed-posthog-destination.ts +93 -0
  22. package/src/lib/tracked.ts +45 -3
  23. package/src/lib/tracking-events.ts +77 -10
  24. package/src/lib/webhook-signing.ts +152 -0
  25. package/src/routes/admin/contacts.ts +43 -3
  26. package/src/routes/admin/index.ts +2 -0
  27. package/src/routes/admin/webhooks.ts +557 -0
  28. package/src/routes/contacts/index.ts +48 -5
  29. package/src/routes/lists/index.ts +41 -5
  30. package/src/routes/tracking/click.ts +58 -22
  31. package/src/routes/tracking/open.ts +53 -22
  32. package/src/routes/webhooks/sources.ts +69 -10
  33. package/src/webhook-sources/define-webhook-source.ts +57 -5
  34. package/src/webhook-sources/presets/clerk.ts +185 -0
  35. package/src/webhook-sources/presets/index.ts +80 -0
  36. package/src/webhook-sources/presets/segment.ts +120 -0
  37. package/src/webhook-sources/presets/stripe.ts +147 -0
  38. package/src/webhook-sources/presets/supabase.ts +131 -0
  39. package/src/webhook-sources/verify.ts +172 -0
  40. package/src/worker.ts +6 -0
  41. package/src/workflows/deliver-webhook.ts +484 -0
@@ -0,0 +1,484 @@
1
+ import {
2
+ deadLetterQueue,
3
+ webhookDeliveries,
4
+ webhookEndpoints,
5
+ } from "@hogsend/db";
6
+ import { and, eq, lt, or, sql } from "drizzle-orm";
7
+ import type {
8
+ DestinationEnvelope,
9
+ DestinationTransformResult,
10
+ } from "../destinations/define-destination.js";
11
+ import { webhookDestination } from "../destinations/presets/webhook.js";
12
+ import { getDestinationRegistry } from "../destinations/registry-singleton.js";
13
+ import { getDb } from "../lib/db.js";
14
+ import { hatchet } from "../lib/hatchet.js";
15
+ import { createLogger } from "../lib/logger.js";
16
+
17
+ /**
18
+ * Outbound webhook delivery — the durable per-(event × endpoint) POST attempt
19
+ * plus the reaper cron that schedules retries and recovers orphaned `sending`
20
+ * rows.
21
+ *
22
+ * Delivery model (Section 1.5, LOCKED decision 5/6): one `webhook_deliveries`
23
+ * row + one `runNoWait` per endpoint (independent retry/backoff/dead-letter),
24
+ * with a 1-minute reaper cron as the retry scheduler AND the orphan-`sending`
25
+ * recovery — mirroring `reapStuckCampaignsTask`. Hatchet's own retry is OFF
26
+ * (`retries: 0`); `nextRetryAt` is the single retry clock.
27
+ *
28
+ * The task resolves a delivery-time DESTINATION TRANSFORM by `endpoint.kind`
29
+ * (default "webhook") from the process destination registry, applied to the
30
+ * FROZEN `payload` envelope on the row + the LIVE endpoint read at delivery
31
+ * time. For "webhook" the transform signs with the live secret, so a
32
+ * rotate-secret invalidates in-flight deliveries to a compromised secret
33
+ * (acceptable under at-least-once) and the `body` is the EXACT bytes POSTed —
34
+ * never re-serialized between sign and send (Open Risk 8). A keyed destination
35
+ * (e.g. "posthog") rewrites url/headers/body; a `null` result skips delivery
36
+ * (successful no-op); a transform throw (bad config) is a NON-retryable
37
+ * permanent failure (straight to DLQ, like a persistent 4xx). All
38
+ * retry/backoff/DLQ/reaper/CAS logic operates on the ROW, not the wire.
39
+ */
40
+
41
+ /** Statuses that are TERMINAL — a duplicate/late enqueue must not re-deliver. */
42
+ const TERMINAL_STATUSES = ["delivered", "failed", "discarded"] as const;
43
+
44
+ /** Max delivery attempts before the row is dead-lettered (env-tunable). */
45
+ const MAX_ATTEMPTS = Number(process.env.OUTBOUND_WEBHOOK_MAX_ATTEMPTS ?? 8);
46
+ /** Per-attempt POST timeout (AbortController), ms. */
47
+ const TIMEOUT_MS = Number(process.env.OUTBOUND_WEBHOOK_TIMEOUT_MS ?? 15000);
48
+ /** Exponential backoff base, ms. delay = BASE * 2^attempt + jitter(0..BASE). */
49
+ const BASE_DELAY_MS = Number(
50
+ process.env.OUTBOUND_WEBHOOK_BASE_DELAY_MS ?? 5000,
51
+ );
52
+ /** Backoff ceiling, ms (default 6h). */
53
+ const MAX_DELAY_MS = Number(
54
+ process.env.OUTBOUND_WEBHOOK_MAX_DELAY_MS ?? 6 * 60 * 60 * 1000,
55
+ );
56
+ /** A `sending` row older than this (no live run) is re-driven by the reaper. */
57
+ const STUCK_AFTER_MS = Number(
58
+ process.env.OUTBOUND_WEBHOOK_STUCK_AFTER_MS ?? 5 * 60 * 1000,
59
+ );
60
+
61
+ /** Response-body snippet cap persisted for forensics (≤1KB). */
62
+ const SNIPPET_MAX = 1024;
63
+
64
+ /**
65
+ * Exponential backoff with full jitter, capped at `MAX_DELAY_MS`:
66
+ * min(BASE * 2^attempt + jitter(0..BASE), MAX_DELAY).
67
+ * `attempt` is the (already-incremented) attempt count, so the FIRST retry after
68
+ * one failed attempt waits ~`BASE * 2` (a real backoff, not a near-zero retry).
69
+ */
70
+ function backoffMs(attempt: number): number {
71
+ const exp = BASE_DELAY_MS * 2 ** attempt;
72
+ const jitter = Math.floor(Math.random() * BASE_DELAY_MS);
73
+ return Math.min(exp + jitter, MAX_DELAY_MS);
74
+ }
75
+
76
+ /**
77
+ * Retry classification (mirrors `plugin-resend` `isRetryableStatusCode`, with
78
+ * the `408`/`429` carve-outs from Section 1.5 step 7). Network/timeout failures
79
+ * (no HTTP status) are retryable and handled by the caller (status === null).
80
+ *
81
+ * A persistent 4xx (e.g. `410 Gone`, `400 Bad Request`) is NOT retryable — a
82
+ * misconfigured/decommissioned endpoint should fast-fail, not burn 8 attempts.
83
+ * `408 Request Timeout` and `429 Too Many Requests` are the retryable 4xx
84
+ * exceptions; everything `>= 500` is retryable.
85
+ */
86
+ function isRetryableStatus(status: number): boolean {
87
+ if (status === 408 || status === 429) return true;
88
+ if (status >= 500) return true;
89
+ return false;
90
+ }
91
+
92
+ /**
93
+ * One durable delivery attempt for a single `webhook_deliveries` row.
94
+ *
95
+ * `retries: 0` — the reaper (driven off `nextRetryAt`) is the retry scheduler,
96
+ * NOT Hatchet's own backoff (which would double up on the reaper's). The CAS to
97
+ * `sending` (step 3) prevents an overlapping reaper re-drive from double-POSTing
98
+ * the same row.
99
+ */
100
+ export const deliverWebhookTask = hatchet.task({
101
+ name: "deliver-webhook",
102
+ retries: 0,
103
+ executionTimeout: "30s",
104
+ fn: async (input: { deliveryId: string }) => {
105
+ const db = getDb();
106
+ const logger = createLogger(process.env.LOG_LEVEL ?? "info");
107
+
108
+ // (1) Load the delivery row. Absent → nothing to do (a hard delete cascaded
109
+ // it away between enqueue and run).
110
+ const [row] = await db
111
+ .select()
112
+ .from(webhookDeliveries)
113
+ .where(eq(webhookDeliveries.id, input.deliveryId))
114
+ .limit(1);
115
+ if (!row) {
116
+ return { status: "skipped", reason: "not_found" as const };
117
+ }
118
+ // Already terminal — a duplicate/late enqueue (or a reaper re-drive that
119
+ // raced a just-finished run) must not re-deliver.
120
+ if ((TERMINAL_STATUSES as readonly string[]).includes(row.status)) {
121
+ return { status: row.status, skipped: true };
122
+ }
123
+
124
+ // (2) Load the endpoint. Absent (cascade-deleted) OR disabled → `discarded`:
125
+ // an operator action, NOT a delivery error, so it is NOT dead-lettered.
126
+ const [endpoint] = await db
127
+ .select()
128
+ .from(webhookEndpoints)
129
+ .where(eq(webhookEndpoints.id, row.endpointId))
130
+ .limit(1);
131
+ if (!endpoint || endpoint.disabled) {
132
+ await db
133
+ .update(webhookDeliveries)
134
+ .set({
135
+ status: "discarded",
136
+ nextRetryAt: null,
137
+ updatedAt: new Date(),
138
+ })
139
+ .where(eq(webhookDeliveries.id, row.id));
140
+ return {
141
+ status: "discarded" as const,
142
+ reason: endpoint
143
+ ? ("endpoint_disabled" as const)
144
+ : ("endpoint_deleted" as const),
145
+ };
146
+ }
147
+
148
+ // (3) CAS the row to `sending` so an overlapping reaper re-drive cannot
149
+ // double-POST. The status guard (still non-terminal) makes a concurrent
150
+ // claim affect zero rows; the loser of the race bails out here.
151
+ const claimed = await db
152
+ .update(webhookDeliveries)
153
+ .set({
154
+ status: "sending",
155
+ lastAttemptAt: new Date(),
156
+ updatedAt: new Date(),
157
+ })
158
+ .where(
159
+ and(
160
+ eq(webhookDeliveries.id, row.id),
161
+ eq(webhookDeliveries.status, row.status),
162
+ ),
163
+ )
164
+ .returning({ id: webhookDeliveries.id });
165
+ if (claimed.length === 0) {
166
+ return { status: "skipped", reason: "lost_cas" as const };
167
+ }
168
+
169
+ // (4) Resolve the delivery-time DESTINATION TRANSFORM by the endpoint's
170
+ // `kind` (default "webhook" — byte-identical to the pre-destination signed
171
+ // POST) from the process destination registry. The transform turns the
172
+ // FROZEN row envelope + LIVE endpoint into the concrete HTTP request. For
173
+ // "webhook" it signs from the row payload + live secret; for a keyed
174
+ // destination it rewrites url/headers/body. An UNKNOWN kind (no registered
175
+ // transform) falls back to the always-on `webhook` preset, preserving the
176
+ // pre-registry `ADAPTERS[kind] ?? webhookAdapter` behaviour. A `null` result
177
+ // SKIPS delivery for this event (a successful no-op — marked delivered, no
178
+ // POST). A THROW (bad/missing config) is NOT transient — it routes straight
179
+ // to the failed+DLQ branch like a persistent 4xx (`adapterFailed`).
180
+ const destination =
181
+ getDestinationRegistry().get(endpoint.kind ?? "webhook") ??
182
+ webhookDestination;
183
+ let req: DestinationTransformResult | null = null;
184
+ let transformSkipped = false;
185
+ let adapterFailed = false;
186
+ let adapterUrl = endpoint.url;
187
+ let responseStatus: number | null = null;
188
+ let responseBodySnippet: string | null = null;
189
+ let lastError: string | null = null;
190
+ try {
191
+ req = destination.transform(
192
+ row.payload as unknown as DestinationEnvelope,
193
+ {
194
+ endpoint,
195
+ logger,
196
+ },
197
+ );
198
+ if (req === null) {
199
+ transformSkipped = true;
200
+ } else {
201
+ adapterUrl = req.url;
202
+ }
203
+ } catch (err) {
204
+ adapterFailed = true;
205
+ lastError = err instanceof Error ? err.message : String(err);
206
+ }
207
+
208
+ // (4a) Transform returned null → SKIP: mark the row delivered without a POST
209
+ // (a successful no-op for an event this destination chose not to forward).
210
+ // The `delivered` status keeps the row terminal so the reaper never
211
+ // re-drives it. No endpoint `lastDeliveryAt` bump — nothing was sent.
212
+ if (transformSkipped) {
213
+ const skippedAt = new Date();
214
+ await db
215
+ .update(webhookDeliveries)
216
+ .set({
217
+ status: "delivered",
218
+ attemptCount: row.attemptCount + 1,
219
+ responseStatus: null,
220
+ responseBodySnippet: null,
221
+ deliveredAt: skippedAt,
222
+ nextRetryAt: null,
223
+ lastError: null,
224
+ lastAttemptAt: skippedAt,
225
+ updatedAt: skippedAt,
226
+ })
227
+ .where(eq(webhookDeliveries.id, row.id));
228
+ logger.info("deliver-webhook: skipped by destination transform", {
229
+ deliveryId: row.id,
230
+ endpointId: endpoint.id,
231
+ kind: endpoint.kind ?? "webhook",
232
+ eventType: row.eventType,
233
+ });
234
+ return { status: "delivered" as const, skipped: true };
235
+ }
236
+
237
+ // (5) POST with an AbortController timeout. A network error / timeout leaves
238
+ // `responseStatus` null (a retryable failure, handled below). Skipped when
239
+ // the transform threw (permanent config failure → straight to DLQ).
240
+ if (req) {
241
+ const controller = new AbortController();
242
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
243
+ try {
244
+ const res = await fetch(req.url, {
245
+ method: req.method ?? "POST",
246
+ headers: req.headers,
247
+ body: req.body,
248
+ signal: controller.signal,
249
+ });
250
+ responseStatus = res.status;
251
+ const text = await res.text().catch(() => "");
252
+ responseBodySnippet = text ? text.slice(0, SNIPPET_MAX) : null;
253
+ const ok =
254
+ req.isSuccess?.(responseStatus, responseBodySnippet ?? "") ??
255
+ (responseStatus >= 200 && responseStatus < 300);
256
+ if (!ok) {
257
+ lastError = `HTTP ${responseStatus}`;
258
+ }
259
+ } catch (err) {
260
+ lastError =
261
+ err instanceof Error
262
+ ? controller.signal.aborted
263
+ ? `Timeout after ${TIMEOUT_MS}ms`
264
+ : err.message
265
+ : String(err);
266
+ } finally {
267
+ clearTimeout(timer);
268
+ }
269
+ }
270
+
271
+ const now = new Date();
272
+
273
+ // (6) success → delivered (TERMINAL). Also bump the endpoint's
274
+ // lastDeliveryAt. The transform's `isSuccess` (or the default 2xx rule) is
275
+ // the authority — re-checked here against the resolved request.
276
+ const delivered =
277
+ req !== null &&
278
+ responseStatus !== null &&
279
+ (req.isSuccess?.(responseStatus, responseBodySnippet ?? "") ??
280
+ (responseStatus >= 200 && responseStatus < 300));
281
+ if (delivered) {
282
+ await db
283
+ .update(webhookDeliveries)
284
+ .set({
285
+ status: "delivered",
286
+ attemptCount: row.attemptCount + 1,
287
+ responseStatus,
288
+ responseBodySnippet,
289
+ deliveredAt: now,
290
+ nextRetryAt: null,
291
+ lastError: null,
292
+ lastAttemptAt: now,
293
+ updatedAt: now,
294
+ })
295
+ .where(eq(webhookDeliveries.id, row.id));
296
+ await db
297
+ .update(webhookEndpoints)
298
+ .set({ lastDeliveryAt: now, updatedAt: now })
299
+ .where(eq(webhookEndpoints.id, endpoint.id));
300
+ logger.info("deliver-webhook: delivered", {
301
+ deliveryId: row.id,
302
+ endpointId: endpoint.id,
303
+ eventType: row.eventType,
304
+ responseStatus,
305
+ });
306
+ return { status: "delivered" as const, responseStatus };
307
+ }
308
+
309
+ const attemptCount = row.attemptCount + 1;
310
+
311
+ // (7) Permanent (non-retryable) failures fast-fail, skipping the remaining
312
+ // retry budget:
313
+ // - a transform THROW (bad/missing destination config) — config is not
314
+ // transient, so a single attempt is enough to declare it dead.
315
+ // - a persistent-4xx: a non-retryable client error (anything 4xx except
316
+ // 408/429) after attempt >= 2 — a `410 Gone` must not burn 8 attempts.
317
+ // The `>= 2` guard tolerates a single transient 4xx blip first.
318
+ const httpFastFail =
319
+ responseStatus !== null &&
320
+ !isRetryableStatus(responseStatus) &&
321
+ attemptCount >= 2;
322
+ const permanentFail = adapterFailed || httpFastFail;
323
+
324
+ // (8) Retryable failure with attempts remaining → back to `pending` with the
325
+ // next backoff deadline; the reaper re-drives it once `nextRetryAt` passes.
326
+ if (!permanentFail && attemptCount < MAX_ATTEMPTS) {
327
+ const nextRetryAt = new Date(now.getTime() + backoffMs(attemptCount));
328
+ await db
329
+ .update(webhookDeliveries)
330
+ .set({
331
+ status: "pending",
332
+ attemptCount,
333
+ responseStatus,
334
+ responseBodySnippet,
335
+ nextRetryAt,
336
+ lastError,
337
+ lastAttemptAt: now,
338
+ updatedAt: now,
339
+ })
340
+ .where(eq(webhookDeliveries.id, row.id));
341
+ logger.warn("deliver-webhook: retry scheduled", {
342
+ deliveryId: row.id,
343
+ endpointId: endpoint.id,
344
+ attemptCount,
345
+ responseStatus,
346
+ nextRetryAt: nextRetryAt.toISOString(),
347
+ error: lastError,
348
+ });
349
+ return {
350
+ status: "pending" as const,
351
+ attemptCount,
352
+ nextRetryAt: nextRetryAt.toISOString(),
353
+ };
354
+ }
355
+
356
+ // (9) Exhausted (attempts >= MAX) OR a persistent-4xx fast-fail → `failed`
357
+ // (TERMINAL) + a forensic `dead_letter_queue` mirror, in one transaction so
358
+ // the terminal status and the DLQ row commit together. This is the DLQ's
359
+ // first real producer (LOCKED decision 8).
360
+ const exhaustError = `Exhausted ${attemptCount}: ${lastError ?? "unknown"}`;
361
+ await db.transaction(async (tx) => {
362
+ await tx
363
+ .update(webhookDeliveries)
364
+ .set({
365
+ status: "failed",
366
+ attemptCount,
367
+ responseStatus,
368
+ responseBodySnippet,
369
+ nextRetryAt: null,
370
+ lastError,
371
+ lastAttemptAt: now,
372
+ updatedAt: now,
373
+ })
374
+ .where(eq(webhookDeliveries.id, row.id));
375
+ await tx.insert(deadLetterQueue).values({
376
+ source: "webhook-delivery",
377
+ sourceId: row.id,
378
+ payload: {
379
+ endpointId: endpoint.id,
380
+ // The adapter-RESOLVED url + the endpoint kind, so a failed keyed
381
+ // delivery is debuggable (NOT the raw endpoint.url, which for a keyed
382
+ // destination is not the URL actually POSTed to).
383
+ url: adapterUrl,
384
+ kind: endpoint.kind ?? "webhook",
385
+ eventType: row.eventType,
386
+ webhookId: row.webhookId,
387
+ body: row.payload,
388
+ },
389
+ error: exhaustError,
390
+ retryCount: attemptCount,
391
+ status: "pending",
392
+ });
393
+ });
394
+ logger.error("deliver-webhook: failed (dead-lettered)", {
395
+ deliveryId: row.id,
396
+ endpointId: endpoint.id,
397
+ kind: endpoint.kind ?? "webhook",
398
+ eventType: row.eventType,
399
+ attemptCount,
400
+ responseStatus,
401
+ fastFail: permanentFail,
402
+ adapterFailed,
403
+ error: lastError,
404
+ });
405
+ return { status: "failed" as const, attemptCount, fastFail: permanentFail };
406
+ },
407
+ });
408
+
409
+ /** Max rows a single reaper sweep re-drives (bounds the per-tick fan-out). */
410
+ const REAPER_BATCH = 500;
411
+
412
+ /**
413
+ * Engine-owned reaper cron for outbound webhook deliveries (Section 1.5, cloned
414
+ * from `reapStuckCampaignsTask`). It is BOTH the retry scheduler AND the
415
+ * orphan-`sending` recovery:
416
+ *
417
+ * - A `pending` row whose `nextRetryAt` has passed (or is null — a freshly
418
+ * enqueued row whose `runNoWait` failed at emit time) is re-driven.
419
+ * - A `sending` row whose worker died mid-POST (OOM/SIGKILL/timeout, so the JS
420
+ * never reached a terminal write) is re-driven once it is older than
421
+ * `STUCK_AFTER_MS` (measured from `updatedAt`, which step 3's CAS bumped).
422
+ *
423
+ * Recovery is `deliverWebhookTask.run({ deliveryId })`; the delivery task's own
424
+ * `sending` CAS guard makes an overlap with a still-live run safe (the loser
425
+ * no-ops). Self-bootstraps `db`/`logger` from `process.env` (cron runs have no
426
+ * request container).
427
+ */
428
+ export const reapDueWebhookDeliveriesTask = hatchet.task({
429
+ name: "reap-due-webhook-deliveries",
430
+ onCrons: [process.env.OUTBOUND_WEBHOOK_REAPER_CRON ?? "*/1 * * * *"],
431
+ retries: 1,
432
+ executionTimeout: "120s",
433
+ fn: async () => {
434
+ const db = getDb();
435
+ const logger = createLogger(process.env.LOG_LEVEL ?? "info");
436
+
437
+ const now = new Date();
438
+ const stuckBefore = new Date(now.getTime() - STUCK_AFTER_MS);
439
+
440
+ // Due-pending (retry clock elapsed or never set) OR stale-sending (orphan).
441
+ const due = await db
442
+ .select({ id: webhookDeliveries.id })
443
+ .from(webhookDeliveries)
444
+ .where(
445
+ or(
446
+ and(
447
+ eq(webhookDeliveries.status, "pending"),
448
+ or(
449
+ sql`${webhookDeliveries.nextRetryAt} is null`,
450
+ lt(webhookDeliveries.nextRetryAt, now),
451
+ ),
452
+ ),
453
+ and(
454
+ eq(webhookDeliveries.status, "sending"),
455
+ lt(webhookDeliveries.updatedAt, stuckBefore),
456
+ ),
457
+ ),
458
+ )
459
+ .orderBy(webhookDeliveries.nextRetryAt)
460
+ .limit(REAPER_BATCH);
461
+
462
+ let reDriven = 0;
463
+ for (const row of due) {
464
+ try {
465
+ await deliverWebhookTask.run({ deliveryId: row.id });
466
+ reDriven += 1;
467
+ } catch (err) {
468
+ logger.warn("reap-due-webhook-deliveries: re-drive failed", {
469
+ deliveryId: row.id,
470
+ error: err instanceof Error ? err.message : String(err),
471
+ });
472
+ }
473
+ }
474
+
475
+ if (due.length > 0) {
476
+ logger.info("reap-due-webhook-deliveries: swept", {
477
+ candidates: due.length,
478
+ reDriven,
479
+ });
480
+ }
481
+
482
+ return { candidates: due.length, reDriven };
483
+ },
484
+ });