@hogsend/engine 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -6
- package/src/app.ts +36 -1
- package/src/container.ts +80 -8
- package/src/destinations/define-destination.ts +104 -0
- package/src/destinations/presets/index.ts +94 -0
- package/src/destinations/presets/posthog.ts +71 -0
- package/src/destinations/presets/segment.ts +75 -0
- package/src/destinations/presets/slack.ts +66 -0
- package/src/destinations/presets/webhook.ts +37 -0
- package/src/destinations/registry-singleton.ts +78 -0
- package/src/env.ts +40 -0
- package/src/index.ts +59 -1
- package/src/journeys/define-journey.ts +26 -3
- package/src/journeys/journey-context.ts +1 -17
- package/src/lib/analytics-singleton.ts +7 -0
- package/src/lib/bucket-emit.ts +45 -0
- package/src/lib/contacts.ts +28 -6
- package/src/lib/mailer.ts +102 -0
- package/src/lib/outbound.ts +223 -0
- package/src/lib/preferences.ts +31 -0
- package/src/lib/seed-posthog-destination.ts +93 -0
- package/src/lib/tracked.ts +45 -3
- package/src/lib/tracking-events.ts +77 -10
- package/src/lib/webhook-signing.ts +152 -0
- package/src/routes/admin/contacts.ts +43 -3
- package/src/routes/admin/index.ts +2 -0
- package/src/routes/admin/webhooks.ts +557 -0
- package/src/routes/contacts/index.ts +48 -5
- package/src/routes/lists/index.ts +41 -5
- package/src/routes/tracking/click.ts +58 -22
- package/src/routes/tracking/open.ts +53 -22
- package/src/routes/webhooks/sources.ts +69 -10
- package/src/webhook-sources/define-webhook-source.ts +57 -5
- package/src/webhook-sources/presets/clerk.ts +185 -0
- package/src/webhook-sources/presets/index.ts +80 -0
- package/src/webhook-sources/presets/segment.ts +120 -0
- package/src/webhook-sources/presets/stripe.ts +147 -0
- package/src/webhook-sources/presets/supabase.ts +131 -0
- package/src/webhook-sources/verify.ts +172 -0
- package/src/worker.ts +6 -0
- package/src/workflows/deliver-webhook.ts +484 -0
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
import {
|
|
2
|
+
deadLetterQueue,
|
|
3
|
+
webhookDeliveries,
|
|
4
|
+
webhookEndpoints,
|
|
5
|
+
} from "@hogsend/db";
|
|
6
|
+
import { and, eq, lt, or, sql } from "drizzle-orm";
|
|
7
|
+
import type {
|
|
8
|
+
DestinationEnvelope,
|
|
9
|
+
DestinationTransformResult,
|
|
10
|
+
} from "../destinations/define-destination.js";
|
|
11
|
+
import { webhookDestination } from "../destinations/presets/webhook.js";
|
|
12
|
+
import { getDestinationRegistry } from "../destinations/registry-singleton.js";
|
|
13
|
+
import { getDb } from "../lib/db.js";
|
|
14
|
+
import { hatchet } from "../lib/hatchet.js";
|
|
15
|
+
import { createLogger } from "../lib/logger.js";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Outbound webhook delivery — the durable per-(event × endpoint) POST attempt
|
|
19
|
+
* plus the reaper cron that schedules retries and recovers orphaned `sending`
|
|
20
|
+
* rows.
|
|
21
|
+
*
|
|
22
|
+
* Delivery model (Section 1.5, LOCKED decision 5/6): one `webhook_deliveries`
|
|
23
|
+
* row + one `runNoWait` per endpoint (independent retry/backoff/dead-letter),
|
|
24
|
+
* with a 1-minute reaper cron as the retry scheduler AND the orphan-`sending`
|
|
25
|
+
* recovery — mirroring `reapStuckCampaignsTask`. Hatchet's own retry is OFF
|
|
26
|
+
* (`retries: 0`); `nextRetryAt` is the single retry clock.
|
|
27
|
+
*
|
|
28
|
+
* The task resolves a delivery-time DESTINATION TRANSFORM by `endpoint.kind`
|
|
29
|
+
* (default "webhook") from the process destination registry, applied to the
|
|
30
|
+
* FROZEN `payload` envelope on the row + the LIVE endpoint read at delivery
|
|
31
|
+
* time. For "webhook" the transform signs with the live secret, so a
|
|
32
|
+
* rotate-secret invalidates in-flight deliveries to a compromised secret
|
|
33
|
+
* (acceptable under at-least-once) and the `body` is the EXACT bytes POSTed —
|
|
34
|
+
* never re-serialized between sign and send (Open Risk 8). A keyed destination
|
|
35
|
+
* (e.g. "posthog") rewrites url/headers/body; a `null` result skips delivery
|
|
36
|
+
* (successful no-op); a transform throw (bad config) is a NON-retryable
|
|
37
|
+
* permanent failure (straight to DLQ, like a persistent 4xx). All
|
|
38
|
+
* retry/backoff/DLQ/reaper/CAS logic operates on the ROW, not the wire.
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
/** Statuses that are TERMINAL — a duplicate/late enqueue must not re-deliver. */
|
|
42
|
+
const TERMINAL_STATUSES = ["delivered", "failed", "discarded"] as const;
|
|
43
|
+
|
|
44
|
+
/** Max delivery attempts before the row is dead-lettered (env-tunable). */
|
|
45
|
+
const MAX_ATTEMPTS = Number(process.env.OUTBOUND_WEBHOOK_MAX_ATTEMPTS ?? 8);
|
|
46
|
+
/** Per-attempt POST timeout (AbortController), ms. */
|
|
47
|
+
const TIMEOUT_MS = Number(process.env.OUTBOUND_WEBHOOK_TIMEOUT_MS ?? 15000);
|
|
48
|
+
/** Exponential backoff base, ms. delay = BASE * 2^attempt + jitter(0..BASE). */
|
|
49
|
+
const BASE_DELAY_MS = Number(
|
|
50
|
+
process.env.OUTBOUND_WEBHOOK_BASE_DELAY_MS ?? 5000,
|
|
51
|
+
);
|
|
52
|
+
/** Backoff ceiling, ms (default 6h). */
|
|
53
|
+
const MAX_DELAY_MS = Number(
|
|
54
|
+
process.env.OUTBOUND_WEBHOOK_MAX_DELAY_MS ?? 6 * 60 * 60 * 1000,
|
|
55
|
+
);
|
|
56
|
+
/** A `sending` row older than this (no live run) is re-driven by the reaper. */
|
|
57
|
+
const STUCK_AFTER_MS = Number(
|
|
58
|
+
process.env.OUTBOUND_WEBHOOK_STUCK_AFTER_MS ?? 5 * 60 * 1000,
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
/** Response-body snippet cap persisted for forensics (≤1KB). */
|
|
62
|
+
const SNIPPET_MAX = 1024;
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Exponential backoff with full jitter, capped at `MAX_DELAY_MS`:
|
|
66
|
+
* min(BASE * 2^attempt + jitter(0..BASE), MAX_DELAY).
|
|
67
|
+
* `attempt` is the (already-incremented) attempt count, so the FIRST retry after
|
|
68
|
+
* one failed attempt waits ~`BASE * 2` (a real backoff, not a near-zero retry).
|
|
69
|
+
*/
|
|
70
|
+
function backoffMs(attempt: number): number {
|
|
71
|
+
const exp = BASE_DELAY_MS * 2 ** attempt;
|
|
72
|
+
const jitter = Math.floor(Math.random() * BASE_DELAY_MS);
|
|
73
|
+
return Math.min(exp + jitter, MAX_DELAY_MS);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Retry classification (mirrors `plugin-resend` `isRetryableStatusCode`, with
|
|
78
|
+
* the `408`/`429` carve-outs from Section 1.5 step 7). Network/timeout failures
|
|
79
|
+
* (no HTTP status) are retryable and handled by the caller (status === null).
|
|
80
|
+
*
|
|
81
|
+
* A persistent 4xx (e.g. `410 Gone`, `400 Bad Request`) is NOT retryable — a
|
|
82
|
+
* misconfigured/decommissioned endpoint should fast-fail, not burn 8 attempts.
|
|
83
|
+
* `408 Request Timeout` and `429 Too Many Requests` are the retryable 4xx
|
|
84
|
+
* exceptions; everything `>= 500` is retryable.
|
|
85
|
+
*/
|
|
86
|
+
function isRetryableStatus(status: number): boolean {
|
|
87
|
+
if (status === 408 || status === 429) return true;
|
|
88
|
+
if (status >= 500) return true;
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* One durable delivery attempt for a single `webhook_deliveries` row.
|
|
94
|
+
*
|
|
95
|
+
* `retries: 0` — the reaper (driven off `nextRetryAt`) is the retry scheduler,
|
|
96
|
+
* NOT Hatchet's own backoff (which would double up on the reaper's). The CAS to
|
|
97
|
+
* `sending` (step 3) prevents an overlapping reaper re-drive from double-POSTing
|
|
98
|
+
* the same row.
|
|
99
|
+
*/
|
|
100
|
+
export const deliverWebhookTask = hatchet.task({
|
|
101
|
+
name: "deliver-webhook",
|
|
102
|
+
retries: 0,
|
|
103
|
+
executionTimeout: "30s",
|
|
104
|
+
fn: async (input: { deliveryId: string }) => {
|
|
105
|
+
const db = getDb();
|
|
106
|
+
const logger = createLogger(process.env.LOG_LEVEL ?? "info");
|
|
107
|
+
|
|
108
|
+
// (1) Load the delivery row. Absent → nothing to do (a hard delete cascaded
|
|
109
|
+
// it away between enqueue and run).
|
|
110
|
+
const [row] = await db
|
|
111
|
+
.select()
|
|
112
|
+
.from(webhookDeliveries)
|
|
113
|
+
.where(eq(webhookDeliveries.id, input.deliveryId))
|
|
114
|
+
.limit(1);
|
|
115
|
+
if (!row) {
|
|
116
|
+
return { status: "skipped", reason: "not_found" as const };
|
|
117
|
+
}
|
|
118
|
+
// Already terminal — a duplicate/late enqueue (or a reaper re-drive that
|
|
119
|
+
// raced a just-finished run) must not re-deliver.
|
|
120
|
+
if ((TERMINAL_STATUSES as readonly string[]).includes(row.status)) {
|
|
121
|
+
return { status: row.status, skipped: true };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// (2) Load the endpoint. Absent (cascade-deleted) OR disabled → `discarded`:
|
|
125
|
+
// an operator action, NOT a delivery error, so it is NOT dead-lettered.
|
|
126
|
+
const [endpoint] = await db
|
|
127
|
+
.select()
|
|
128
|
+
.from(webhookEndpoints)
|
|
129
|
+
.where(eq(webhookEndpoints.id, row.endpointId))
|
|
130
|
+
.limit(1);
|
|
131
|
+
if (!endpoint || endpoint.disabled) {
|
|
132
|
+
await db
|
|
133
|
+
.update(webhookDeliveries)
|
|
134
|
+
.set({
|
|
135
|
+
status: "discarded",
|
|
136
|
+
nextRetryAt: null,
|
|
137
|
+
updatedAt: new Date(),
|
|
138
|
+
})
|
|
139
|
+
.where(eq(webhookDeliveries.id, row.id));
|
|
140
|
+
return {
|
|
141
|
+
status: "discarded" as const,
|
|
142
|
+
reason: endpoint
|
|
143
|
+
? ("endpoint_disabled" as const)
|
|
144
|
+
: ("endpoint_deleted" as const),
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// (3) CAS the row to `sending` so an overlapping reaper re-drive cannot
|
|
149
|
+
// double-POST. The status guard (still non-terminal) makes a concurrent
|
|
150
|
+
// claim affect zero rows; the loser of the race bails out here.
|
|
151
|
+
const claimed = await db
|
|
152
|
+
.update(webhookDeliveries)
|
|
153
|
+
.set({
|
|
154
|
+
status: "sending",
|
|
155
|
+
lastAttemptAt: new Date(),
|
|
156
|
+
updatedAt: new Date(),
|
|
157
|
+
})
|
|
158
|
+
.where(
|
|
159
|
+
and(
|
|
160
|
+
eq(webhookDeliveries.id, row.id),
|
|
161
|
+
eq(webhookDeliveries.status, row.status),
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
.returning({ id: webhookDeliveries.id });
|
|
165
|
+
if (claimed.length === 0) {
|
|
166
|
+
return { status: "skipped", reason: "lost_cas" as const };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// (4) Resolve the delivery-time DESTINATION TRANSFORM by the endpoint's
|
|
170
|
+
// `kind` (default "webhook" — byte-identical to the pre-destination signed
|
|
171
|
+
// POST) from the process destination registry. The transform turns the
|
|
172
|
+
// FROZEN row envelope + LIVE endpoint into the concrete HTTP request. For
|
|
173
|
+
// "webhook" it signs from the row payload + live secret; for a keyed
|
|
174
|
+
// destination it rewrites url/headers/body. An UNKNOWN kind (no registered
|
|
175
|
+
// transform) falls back to the always-on `webhook` preset, preserving the
|
|
176
|
+
// pre-registry `ADAPTERS[kind] ?? webhookAdapter` behaviour. A `null` result
|
|
177
|
+
// SKIPS delivery for this event (a successful no-op — marked delivered, no
|
|
178
|
+
// POST). A THROW (bad/missing config) is NOT transient — it routes straight
|
|
179
|
+
// to the failed+DLQ branch like a persistent 4xx (`adapterFailed`).
|
|
180
|
+
const destination =
|
|
181
|
+
getDestinationRegistry().get(endpoint.kind ?? "webhook") ??
|
|
182
|
+
webhookDestination;
|
|
183
|
+
let req: DestinationTransformResult | null = null;
|
|
184
|
+
let transformSkipped = false;
|
|
185
|
+
let adapterFailed = false;
|
|
186
|
+
let adapterUrl = endpoint.url;
|
|
187
|
+
let responseStatus: number | null = null;
|
|
188
|
+
let responseBodySnippet: string | null = null;
|
|
189
|
+
let lastError: string | null = null;
|
|
190
|
+
try {
|
|
191
|
+
req = destination.transform(
|
|
192
|
+
row.payload as unknown as DestinationEnvelope,
|
|
193
|
+
{
|
|
194
|
+
endpoint,
|
|
195
|
+
logger,
|
|
196
|
+
},
|
|
197
|
+
);
|
|
198
|
+
if (req === null) {
|
|
199
|
+
transformSkipped = true;
|
|
200
|
+
} else {
|
|
201
|
+
adapterUrl = req.url;
|
|
202
|
+
}
|
|
203
|
+
} catch (err) {
|
|
204
|
+
adapterFailed = true;
|
|
205
|
+
lastError = err instanceof Error ? err.message : String(err);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// (4a) Transform returned null → SKIP: mark the row delivered without a POST
|
|
209
|
+
// (a successful no-op for an event this destination chose not to forward).
|
|
210
|
+
// The `delivered` status keeps the row terminal so the reaper never
|
|
211
|
+
// re-drives it. No endpoint `lastDeliveryAt` bump — nothing was sent.
|
|
212
|
+
if (transformSkipped) {
|
|
213
|
+
const skippedAt = new Date();
|
|
214
|
+
await db
|
|
215
|
+
.update(webhookDeliveries)
|
|
216
|
+
.set({
|
|
217
|
+
status: "delivered",
|
|
218
|
+
attemptCount: row.attemptCount + 1,
|
|
219
|
+
responseStatus: null,
|
|
220
|
+
responseBodySnippet: null,
|
|
221
|
+
deliveredAt: skippedAt,
|
|
222
|
+
nextRetryAt: null,
|
|
223
|
+
lastError: null,
|
|
224
|
+
lastAttemptAt: skippedAt,
|
|
225
|
+
updatedAt: skippedAt,
|
|
226
|
+
})
|
|
227
|
+
.where(eq(webhookDeliveries.id, row.id));
|
|
228
|
+
logger.info("deliver-webhook: skipped by destination transform", {
|
|
229
|
+
deliveryId: row.id,
|
|
230
|
+
endpointId: endpoint.id,
|
|
231
|
+
kind: endpoint.kind ?? "webhook",
|
|
232
|
+
eventType: row.eventType,
|
|
233
|
+
});
|
|
234
|
+
return { status: "delivered" as const, skipped: true };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// (5) POST with an AbortController timeout. A network error / timeout leaves
|
|
238
|
+
// `responseStatus` null (a retryable failure, handled below). Skipped when
|
|
239
|
+
// the transform threw (permanent config failure → straight to DLQ).
|
|
240
|
+
if (req) {
|
|
241
|
+
const controller = new AbortController();
|
|
242
|
+
const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
|
|
243
|
+
try {
|
|
244
|
+
const res = await fetch(req.url, {
|
|
245
|
+
method: req.method ?? "POST",
|
|
246
|
+
headers: req.headers,
|
|
247
|
+
body: req.body,
|
|
248
|
+
signal: controller.signal,
|
|
249
|
+
});
|
|
250
|
+
responseStatus = res.status;
|
|
251
|
+
const text = await res.text().catch(() => "");
|
|
252
|
+
responseBodySnippet = text ? text.slice(0, SNIPPET_MAX) : null;
|
|
253
|
+
const ok =
|
|
254
|
+
req.isSuccess?.(responseStatus, responseBodySnippet ?? "") ??
|
|
255
|
+
(responseStatus >= 200 && responseStatus < 300);
|
|
256
|
+
if (!ok) {
|
|
257
|
+
lastError = `HTTP ${responseStatus}`;
|
|
258
|
+
}
|
|
259
|
+
} catch (err) {
|
|
260
|
+
lastError =
|
|
261
|
+
err instanceof Error
|
|
262
|
+
? controller.signal.aborted
|
|
263
|
+
? `Timeout after ${TIMEOUT_MS}ms`
|
|
264
|
+
: err.message
|
|
265
|
+
: String(err);
|
|
266
|
+
} finally {
|
|
267
|
+
clearTimeout(timer);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const now = new Date();
|
|
272
|
+
|
|
273
|
+
// (6) success → delivered (TERMINAL). Also bump the endpoint's
|
|
274
|
+
// lastDeliveryAt. The transform's `isSuccess` (or the default 2xx rule) is
|
|
275
|
+
// the authority — re-checked here against the resolved request.
|
|
276
|
+
const delivered =
|
|
277
|
+
req !== null &&
|
|
278
|
+
responseStatus !== null &&
|
|
279
|
+
(req.isSuccess?.(responseStatus, responseBodySnippet ?? "") ??
|
|
280
|
+
(responseStatus >= 200 && responseStatus < 300));
|
|
281
|
+
if (delivered) {
|
|
282
|
+
await db
|
|
283
|
+
.update(webhookDeliveries)
|
|
284
|
+
.set({
|
|
285
|
+
status: "delivered",
|
|
286
|
+
attemptCount: row.attemptCount + 1,
|
|
287
|
+
responseStatus,
|
|
288
|
+
responseBodySnippet,
|
|
289
|
+
deliveredAt: now,
|
|
290
|
+
nextRetryAt: null,
|
|
291
|
+
lastError: null,
|
|
292
|
+
lastAttemptAt: now,
|
|
293
|
+
updatedAt: now,
|
|
294
|
+
})
|
|
295
|
+
.where(eq(webhookDeliveries.id, row.id));
|
|
296
|
+
await db
|
|
297
|
+
.update(webhookEndpoints)
|
|
298
|
+
.set({ lastDeliveryAt: now, updatedAt: now })
|
|
299
|
+
.where(eq(webhookEndpoints.id, endpoint.id));
|
|
300
|
+
logger.info("deliver-webhook: delivered", {
|
|
301
|
+
deliveryId: row.id,
|
|
302
|
+
endpointId: endpoint.id,
|
|
303
|
+
eventType: row.eventType,
|
|
304
|
+
responseStatus,
|
|
305
|
+
});
|
|
306
|
+
return { status: "delivered" as const, responseStatus };
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const attemptCount = row.attemptCount + 1;
|
|
310
|
+
|
|
311
|
+
// (7) Permanent (non-retryable) failures fast-fail, skipping the remaining
|
|
312
|
+
// retry budget:
|
|
313
|
+
// - a transform THROW (bad/missing destination config) — config is not
|
|
314
|
+
// transient, so a single attempt is enough to declare it dead.
|
|
315
|
+
// - a persistent-4xx: a non-retryable client error (anything 4xx except
|
|
316
|
+
// 408/429) after attempt >= 2 — a `410 Gone` must not burn 8 attempts.
|
|
317
|
+
// The `>= 2` guard tolerates a single transient 4xx blip first.
|
|
318
|
+
const httpFastFail =
|
|
319
|
+
responseStatus !== null &&
|
|
320
|
+
!isRetryableStatus(responseStatus) &&
|
|
321
|
+
attemptCount >= 2;
|
|
322
|
+
const permanentFail = adapterFailed || httpFastFail;
|
|
323
|
+
|
|
324
|
+
// (8) Retryable failure with attempts remaining → back to `pending` with the
|
|
325
|
+
// next backoff deadline; the reaper re-drives it once `nextRetryAt` passes.
|
|
326
|
+
if (!permanentFail && attemptCount < MAX_ATTEMPTS) {
|
|
327
|
+
const nextRetryAt = new Date(now.getTime() + backoffMs(attemptCount));
|
|
328
|
+
await db
|
|
329
|
+
.update(webhookDeliveries)
|
|
330
|
+
.set({
|
|
331
|
+
status: "pending",
|
|
332
|
+
attemptCount,
|
|
333
|
+
responseStatus,
|
|
334
|
+
responseBodySnippet,
|
|
335
|
+
nextRetryAt,
|
|
336
|
+
lastError,
|
|
337
|
+
lastAttemptAt: now,
|
|
338
|
+
updatedAt: now,
|
|
339
|
+
})
|
|
340
|
+
.where(eq(webhookDeliveries.id, row.id));
|
|
341
|
+
logger.warn("deliver-webhook: retry scheduled", {
|
|
342
|
+
deliveryId: row.id,
|
|
343
|
+
endpointId: endpoint.id,
|
|
344
|
+
attemptCount,
|
|
345
|
+
responseStatus,
|
|
346
|
+
nextRetryAt: nextRetryAt.toISOString(),
|
|
347
|
+
error: lastError,
|
|
348
|
+
});
|
|
349
|
+
return {
|
|
350
|
+
status: "pending" as const,
|
|
351
|
+
attemptCount,
|
|
352
|
+
nextRetryAt: nextRetryAt.toISOString(),
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// (9) Exhausted (attempts >= MAX) OR a persistent-4xx fast-fail → `failed`
|
|
357
|
+
// (TERMINAL) + a forensic `dead_letter_queue` mirror, in one transaction so
|
|
358
|
+
// the terminal status and the DLQ row commit together. This is the DLQ's
|
|
359
|
+
// first real producer (LOCKED decision 8).
|
|
360
|
+
const exhaustError = `Exhausted ${attemptCount}: ${lastError ?? "unknown"}`;
|
|
361
|
+
await db.transaction(async (tx) => {
|
|
362
|
+
await tx
|
|
363
|
+
.update(webhookDeliveries)
|
|
364
|
+
.set({
|
|
365
|
+
status: "failed",
|
|
366
|
+
attemptCount,
|
|
367
|
+
responseStatus,
|
|
368
|
+
responseBodySnippet,
|
|
369
|
+
nextRetryAt: null,
|
|
370
|
+
lastError,
|
|
371
|
+
lastAttemptAt: now,
|
|
372
|
+
updatedAt: now,
|
|
373
|
+
})
|
|
374
|
+
.where(eq(webhookDeliveries.id, row.id));
|
|
375
|
+
await tx.insert(deadLetterQueue).values({
|
|
376
|
+
source: "webhook-delivery",
|
|
377
|
+
sourceId: row.id,
|
|
378
|
+
payload: {
|
|
379
|
+
endpointId: endpoint.id,
|
|
380
|
+
// The adapter-RESOLVED url + the endpoint kind, so a failed keyed
|
|
381
|
+
// delivery is debuggable (NOT the raw endpoint.url, which for a keyed
|
|
382
|
+
// destination is not the URL actually POSTed to).
|
|
383
|
+
url: adapterUrl,
|
|
384
|
+
kind: endpoint.kind ?? "webhook",
|
|
385
|
+
eventType: row.eventType,
|
|
386
|
+
webhookId: row.webhookId,
|
|
387
|
+
body: row.payload,
|
|
388
|
+
},
|
|
389
|
+
error: exhaustError,
|
|
390
|
+
retryCount: attemptCount,
|
|
391
|
+
status: "pending",
|
|
392
|
+
});
|
|
393
|
+
});
|
|
394
|
+
logger.error("deliver-webhook: failed (dead-lettered)", {
|
|
395
|
+
deliveryId: row.id,
|
|
396
|
+
endpointId: endpoint.id,
|
|
397
|
+
kind: endpoint.kind ?? "webhook",
|
|
398
|
+
eventType: row.eventType,
|
|
399
|
+
attemptCount,
|
|
400
|
+
responseStatus,
|
|
401
|
+
fastFail: permanentFail,
|
|
402
|
+
adapterFailed,
|
|
403
|
+
error: lastError,
|
|
404
|
+
});
|
|
405
|
+
return { status: "failed" as const, attemptCount, fastFail: permanentFail };
|
|
406
|
+
},
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
/** Max rows a single reaper sweep re-drives (bounds the per-tick fan-out). */
|
|
410
|
+
const REAPER_BATCH = 500;
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Engine-owned reaper cron for outbound webhook deliveries (Section 1.5, cloned
|
|
414
|
+
* from `reapStuckCampaignsTask`). It is BOTH the retry scheduler AND the
|
|
415
|
+
* orphan-`sending` recovery:
|
|
416
|
+
*
|
|
417
|
+
* - A `pending` row whose `nextRetryAt` has passed (or is null — a freshly
|
|
418
|
+
* enqueued row whose `runNoWait` failed at emit time) is re-driven.
|
|
419
|
+
* - A `sending` row whose worker died mid-POST (OOM/SIGKILL/timeout, so the JS
|
|
420
|
+
* never reached a terminal write) is re-driven once it is older than
|
|
421
|
+
* `STUCK_AFTER_MS` (measured from `updatedAt`, which step 3's CAS bumped).
|
|
422
|
+
*
|
|
423
|
+
* Recovery is `deliverWebhookTask.run({ deliveryId })`; the delivery task's own
|
|
424
|
+
* `sending` CAS guard makes an overlap with a still-live run safe (the loser
|
|
425
|
+
* no-ops). Self-bootstraps `db`/`logger` from `process.env` (cron runs have no
|
|
426
|
+
* request container).
|
|
427
|
+
*/
|
|
428
|
+
export const reapDueWebhookDeliveriesTask = hatchet.task({
|
|
429
|
+
name: "reap-due-webhook-deliveries",
|
|
430
|
+
onCrons: [process.env.OUTBOUND_WEBHOOK_REAPER_CRON ?? "*/1 * * * *"],
|
|
431
|
+
retries: 1,
|
|
432
|
+
executionTimeout: "120s",
|
|
433
|
+
fn: async () => {
|
|
434
|
+
const db = getDb();
|
|
435
|
+
const logger = createLogger(process.env.LOG_LEVEL ?? "info");
|
|
436
|
+
|
|
437
|
+
const now = new Date();
|
|
438
|
+
const stuckBefore = new Date(now.getTime() - STUCK_AFTER_MS);
|
|
439
|
+
|
|
440
|
+
// Due-pending (retry clock elapsed or never set) OR stale-sending (orphan).
|
|
441
|
+
const due = await db
|
|
442
|
+
.select({ id: webhookDeliveries.id })
|
|
443
|
+
.from(webhookDeliveries)
|
|
444
|
+
.where(
|
|
445
|
+
or(
|
|
446
|
+
and(
|
|
447
|
+
eq(webhookDeliveries.status, "pending"),
|
|
448
|
+
or(
|
|
449
|
+
sql`${webhookDeliveries.nextRetryAt} is null`,
|
|
450
|
+
lt(webhookDeliveries.nextRetryAt, now),
|
|
451
|
+
),
|
|
452
|
+
),
|
|
453
|
+
and(
|
|
454
|
+
eq(webhookDeliveries.status, "sending"),
|
|
455
|
+
lt(webhookDeliveries.updatedAt, stuckBefore),
|
|
456
|
+
),
|
|
457
|
+
),
|
|
458
|
+
)
|
|
459
|
+
.orderBy(webhookDeliveries.nextRetryAt)
|
|
460
|
+
.limit(REAPER_BATCH);
|
|
461
|
+
|
|
462
|
+
let reDriven = 0;
|
|
463
|
+
for (const row of due) {
|
|
464
|
+
try {
|
|
465
|
+
await deliverWebhookTask.run({ deliveryId: row.id });
|
|
466
|
+
reDriven += 1;
|
|
467
|
+
} catch (err) {
|
|
468
|
+
logger.warn("reap-due-webhook-deliveries: re-drive failed", {
|
|
469
|
+
deliveryId: row.id,
|
|
470
|
+
error: err instanceof Error ? err.message : String(err),
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (due.length > 0) {
|
|
476
|
+
logger.info("reap-due-webhook-deliveries: swept", {
|
|
477
|
+
candidates: due.length,
|
|
478
|
+
reDriven,
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return { candidates: due.length, reDriven };
|
|
483
|
+
},
|
|
484
|
+
});
|