@hogsend/engine 0.13.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hogsend/engine",
3
- "version": "0.13.1",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -40,14 +40,14 @@
40
40
  "svix": "^1.95.1",
41
41
  "winston": "^3.19.0",
42
42
  "zod": "^4.4.3",
43
- "@hogsend/core": "^0.13.1",
44
- "@hogsend/db": "^0.13.1",
45
- "@hogsend/email": "^0.13.1",
46
- "@hogsend/plugin-posthog": "^0.13.1",
47
- "@hogsend/plugin-resend": "^0.13.1"
43
+ "@hogsend/core": "^0.14.0",
44
+ "@hogsend/db": "^0.14.0",
45
+ "@hogsend/email": "^0.14.0",
46
+ "@hogsend/plugin-posthog": "^0.14.0",
47
+ "@hogsend/plugin-resend": "^0.14.0"
48
48
  },
49
49
  "optionalDependencies": {
50
- "@hogsend/plugin-postmark": "^0.13.1"
50
+ "@hogsend/plugin-postmark": "^0.14.0"
51
51
  },
52
52
  "devDependencies": {
53
53
  "@types/node": "^22.15.3",
@@ -53,6 +53,41 @@ export const posthogDestination = defineDestination({
53
53
  userEmail?: string | null;
54
54
  };
55
55
  const distinctId = data.userId ?? data.to ?? data.userEmail ?? undefined;
56
+ // `email.action` is the semantic-link envelope: the CONSUMER's event name
57
+ // (data.event, e.g. "nps.submitted") is what PostHog should capture, with
58
+ // the author's properties flattened to the top level. Other catalog events
59
+ // capture under their spine name (with the optional remap).
60
+ if (envelope.type === "email.action") {
61
+ const action = envelope.data as {
62
+ event: string;
63
+ properties: Record<string, unknown> | null;
64
+ emailSendId: string;
65
+ templateKey: string | null;
66
+ linkId: string;
67
+ linkUrl: string;
68
+ to: string;
69
+ userId: string | null;
70
+ at: string;
71
+ };
72
+ return {
73
+ url: `${host}/capture/`,
74
+ method: "POST",
75
+ headers: { "Content-Type": "application/json" },
76
+ body: JSON.stringify({
77
+ api_key: config.apiKey,
78
+ event: action.event,
79
+ distinct_id: distinctId,
80
+ timestamp: envelope.timestamp,
81
+ properties: {
82
+ ...(action.properties ?? {}),
83
+ emailSendId: action.emailSendId,
84
+ templateKey: action.templateKey,
85
+ linkId: action.linkId,
86
+ $lib: "hogsend",
87
+ },
88
+ }),
89
+ };
90
+ }
56
91
  // Optional event-name remap (identity by default).
57
92
  const eventName = config.eventNames?.[envelope.type] ?? envelope.type;
58
93
  return {
package/src/index.ts CHANGED
@@ -229,6 +229,13 @@ export {
229
229
  } from "./lib/redis.js";
230
230
  // --- Self-service password reset (engine-owned, self-contained email) ---
231
231
  export { sendResetPasswordEmail } from "./lib/reset-email.js";
232
+ export {
233
+ type ConfirmSemanticClickInput,
234
+ type ConfirmSemanticClickResult,
235
+ confirmSemanticClick,
236
+ SEMANTIC_BURST_DISTINCT_LINKS,
237
+ SEMANTIC_BURST_WINDOW_MS,
238
+ } from "./lib/semantic-click.js";
232
239
  export { type MountStudioResult, mountStudio } from "./lib/studio.js";
233
240
  export {
234
241
  type ResolveTimezoneInput,
@@ -22,11 +22,17 @@ import type {
22
22
  IfPast,
23
23
  JourneyContext,
24
24
  TimeOfDayBuilder,
25
+ WaitForEventResult,
25
26
  Weekday,
26
27
  WhenBuilder,
27
28
  } from "@hogsend/core/types";
28
- import { type Database, emailSends, journeyStates } from "@hogsend/db";
29
- import { and, count, eq, max, notInArray } from "drizzle-orm";
29
+ import {
30
+ type Database,
31
+ emailSends,
32
+ journeyStates,
33
+ userEvents,
34
+ } from "@hogsend/db";
35
+ import { and, count, desc, eq, gte, max, notInArray } from "drizzle-orm";
30
36
  import { checkEmailPreferences } from "../lib/enrollment-guards.js";
31
37
  import { ingestEvent } from "../lib/ingestion.js";
32
38
  import type { Logger } from "../lib/logger.js";
@@ -206,7 +212,8 @@ export function createJourneyContext(
206
212
  event: string,
207
213
  timeout: DurationObject,
208
214
  nodeId: string,
209
- ): Promise<{ timedOut: boolean }> => {
215
+ lookback?: DurationObject,
216
+ ): Promise<WaitForEventResult> => {
210
217
  // Reject a timeout longer than the journey task's executionTimeout up front
211
218
  // so it fails fast at authoring time. (Eviction-capable engines may allow
212
219
  // longer wall-clock waits, but we cap to the configured ceiling — raise
@@ -217,6 +224,41 @@ export function createJourneyContext(
217
224
  );
218
225
  }
219
226
 
227
+ // Optional lookback: the durable wait only matches events pushed AFTER it
228
+ // is established, so an answer landing in the gap (between a send and its
229
+ // wait, or between two back-to-back waits) would otherwise be permanently
230
+ // unobservable — its first-answer idempotency key is already claimed and
231
+ // can never re-push. A recent matching user_events row resolves the wait
232
+ // immediately, payload included.
233
+ if (lookback) {
234
+ const since = new Date(Date.now() - durationToMs(lookback));
235
+ const recent = await db
236
+ .select({ properties: userEvents.properties })
237
+ .from(userEvents)
238
+ .where(
239
+ and(
240
+ eq(userEvents.userId, userId),
241
+ eq(userEvents.event, event),
242
+ gte(userEvents.occurredAt, since),
243
+ ),
244
+ )
245
+ .orderBy(desc(userEvents.occurredAt))
246
+ .limit(1);
247
+ const row = recent[0];
248
+ if (row) {
249
+ const scalars = Object.fromEntries(
250
+ Object.entries(row.properties ?? {}).filter(
251
+ ([, v]) =>
252
+ typeof v === "string" ||
253
+ typeof v === "number" ||
254
+ typeof v === "boolean" ||
255
+ v === null,
256
+ ),
257
+ ) as NonNullable<WaitForEventResult["properties"]>;
258
+ return { timedOut: false, properties: scalars };
259
+ }
260
+ }
261
+
220
262
  await enterWait(nodeId);
221
263
 
222
264
  // Wait for the user-scoped event or the timeout. The event branch filters on
@@ -241,9 +283,34 @@ export function createJourneyContext(
241
283
  {}) as Record<string, unknown>;
242
284
  const timedOut = !("event" in fired);
243
285
 
286
+ // Surface the matched event's payload (best-effort). The engine returns
287
+ // matches as `[{ id, data }]` where `data` is the pushed ingest payload
288
+ // ({ userId, userEmail, properties }); the pre-eviction path may hand the
289
+ // payload back un-wrapped — tolerate both, mirroring the CREATE-strip.
290
+ let properties: WaitForEventResult["properties"];
291
+ if (!timedOut) {
292
+ const matches = fired.event;
293
+ const first = Array.isArray(matches) ? matches[0] : matches;
294
+ const payload =
295
+ first && typeof first === "object" && "data" in first
296
+ ? (first as { data?: unknown }).data
297
+ : first;
298
+ const candidate =
299
+ payload && typeof payload === "object" && "properties" in payload
300
+ ? (payload as { properties?: unknown }).properties
301
+ : undefined;
302
+ if (
303
+ candidate &&
304
+ typeof candidate === "object" &&
305
+ !Array.isArray(candidate)
306
+ ) {
307
+ properties = candidate as NonNullable<WaitForEventResult["properties"]>;
308
+ }
309
+ }
310
+
244
311
  await resumeFromWait();
245
312
 
246
- return { timedOut };
313
+ return { timedOut, ...(properties ? { properties } : {}) };
247
314
  };
248
315
 
249
316
  return {
@@ -275,11 +342,12 @@ export function createJourneyContext(
275
342
  );
276
343
  },
277
344
 
278
- async waitForEvent({ event, timeout, label }) {
345
+ async waitForEvent({ event, timeout, label, lookback }) {
279
346
  return performWaitForEvent(
280
347
  event,
281
348
  timeout,
282
349
  label ?? `wait-event:${event}`,
350
+ lookback,
283
351
  );
284
352
  },
285
353
 
@@ -68,6 +68,7 @@ export async function ingestEvent(opts: {
68
68
 
69
69
  // (2) Idempotency dedup + `user_events` insert keyed on the resolved key, with
70
70
  // ONLY eventProperties in the properties bag (D2).
71
+ let idempotentInsertId: string | undefined;
71
72
  if (event.idempotencyKey) {
72
73
  const result = await db
73
74
  .insert(userEvents)
@@ -86,6 +87,7 @@ export async function ingestEvent(opts: {
86
87
  if (result.length === 0) {
87
88
  return { stored: false, exits: [] };
88
89
  }
90
+ idempotentInsertId = result[0]?.id;
89
91
  } else {
90
92
  await db.insert(userEvents).values({
91
93
  userId: resolvedKey,
@@ -109,7 +111,13 @@ export async function ingestEvent(opts: {
109
111
 
110
112
  // (4) Hatchet push + (5) checkExits, both keyed on the resolved key. The push
111
113
  // payload wire key STAYS `properties` (bucket tests assert on it — risk 9).
112
- const [, exits] = await Promise.all([
114
+ //
115
+ // An idempotency claim must not outlive a FAILED publish: journeys were never
116
+ // notified, and the consumed key would make every retry a silent no-op (the
117
+ // event becomes permanently invisible to journeys/destinations). So on a push
118
+ // failure the just-inserted row is compensating-deleted before rethrowing —
119
+ // the caller's retry (same key) can then re-claim and re-publish.
120
+ const [pushResult, exitsResult] = await Promise.allSettled([
113
121
  hatchet.events.push(event.event, {
114
122
  userId: resolvedKey,
115
123
  userEmail: event.userEmail ?? "",
@@ -121,6 +129,29 @@ export async function ingestEvent(opts: {
121
129
  properties: event.eventProperties,
122
130
  }),
123
131
  ]);
132
+ if (pushResult.status === "rejected") {
133
+ if (idempotentInsertId) {
134
+ try {
135
+ await db
136
+ .delete(userEvents)
137
+ .where(eq(userEvents.id, idempotentInsertId));
138
+ } catch (cleanupErr) {
139
+ logger.warn("ingestEvent: failed to roll back idempotency claim", {
140
+ event: event.event,
141
+ idempotencyKey: event.idempotencyKey,
142
+ error:
143
+ cleanupErr instanceof Error
144
+ ? cleanupErr.message
145
+ : String(cleanupErr),
146
+ });
147
+ }
148
+ }
149
+ throw pushResult.reason;
150
+ }
151
+ if (exitsResult.status === "rejected") {
152
+ throw exitsResult.reason;
153
+ }
154
+ const exits = exitsResult.value;
124
155
 
125
156
  // (6) Real-time bucket membership re-evaluation (Section 6.1). NOT part of the
126
157
  // Promise.all above: its property eval reads contact state ⊕ this-ingest
@@ -90,6 +90,23 @@ export interface OutboundPayloads {
90
90
  "email.delivered": EmailEventPayload;
91
91
  "email.opened": EmailEventPayload;
92
92
  "email.clicked": EmailEventPayload & { linkUrl?: string; linkId?: string };
93
+ /**
94
+ * A SEMANTIC link answered — the in-email action event (consumer-named, e.g.
95
+ * "nps.submitted"). Emitted at most once per (send, event name): first
96
+ * answer wins, scanner bursts are suppressed. `event`/`properties` carry the
97
+ * consumer semantics; the rest is send context.
98
+ */
99
+ "email.action": {
100
+ event: string;
101
+ properties: Record<string, unknown> | null;
102
+ emailSendId: string;
103
+ templateKey: string | null;
104
+ userId: string | null;
105
+ to: string;
106
+ at: string;
107
+ linkId: string;
108
+ linkUrl: string;
109
+ };
93
110
  "email.bounced": EmailEventPayload & {
94
111
  bounceType?: string;
95
112
  bounceReason?: string;
@@ -18,6 +18,7 @@ const POSTHOG_FUNNEL_EVENTS = [
18
18
  "email.delivered",
19
19
  "email.opened",
20
20
  "email.clicked",
21
+ "email.action",
21
22
  "email.bounced",
22
23
  "email.complained",
23
24
  ] as const;
@@ -51,7 +52,11 @@ export async function seedPostHogDestination(opts: {
51
52
  );
52
53
 
53
54
  const existing = await tx
54
- .select({ id: webhookEndpoints.id })
55
+ .select({
56
+ id: webhookEndpoints.id,
57
+ url: webhookEndpoints.url,
58
+ eventTypes: webhookEndpoints.eventTypes,
59
+ })
55
60
  .from(webhookEndpoints)
56
61
  .where(
57
62
  and(
@@ -61,7 +66,34 @@ export async function seedPostHogDestination(opts: {
61
66
  )
62
67
  .limit(1);
63
68
 
64
- if (existing.length > 0) {
69
+ const found = existing[0];
70
+ if (found) {
71
+ // Reconcile the ENGINE-seeded row (identified by its sentinel URL) when
72
+ // the funnel list has grown since it was inserted — its stored
73
+ // eventTypes are a snapshot, and emitOutbound matches by jsonb
74
+ // containment, so a pre-upgrade row would silently never receive newer
75
+ // events (e.g. email.action). Operator-created endpoints are left
76
+ // untouched: subscriber-chooses-events is the contract there.
77
+ if (found.url === "posthog://capture") {
78
+ const current = Array.isArray(found.eventTypes)
79
+ ? (found.eventTypes as string[])
80
+ : [];
81
+ const missing = POSTHOG_FUNNEL_EVENTS.filter(
82
+ (e) => !current.includes(e),
83
+ );
84
+ if (missing.length > 0) {
85
+ await tx
86
+ .update(webhookEndpoints)
87
+ .set({
88
+ eventTypes: [...current, ...missing],
89
+ updatedAt: new Date(),
90
+ })
91
+ .where(eq(webhookEndpoints.id, found.id));
92
+ logger.info("Reconciled seeded PostHog destination event types", {
93
+ added: missing,
94
+ });
95
+ }
96
+ }
65
97
  return { seeded: false };
66
98
  }
67
99
 
@@ -0,0 +1,194 @@
1
+ import type { HatchetClient } from "@hatchet-dev/typescript-sdk/v1/index.js";
2
+ import type { JourneyRegistry } from "@hogsend/core/registry";
3
+ import {
4
+ type Database,
5
+ linkClicks,
6
+ trackedLinks,
7
+ userEvents,
8
+ } from "@hogsend/db";
9
+ import { and, countDistinct, eq, gte, isNull, lte } from "drizzle-orm";
10
+ import type { Logger } from "./logger.js";
11
+ import { emitOutbound } from "./outbound.js";
12
+ import {
13
+ pushTrackingEvent,
14
+ resolveEmailSendContext,
15
+ } from "./tracking-events.js";
16
+
17
+ /**
18
+ * Scanner-burst window: SafeLinks/Proofpoint-style scanners follow EVERY link
19
+ * in an email within seconds of delivery; humans don't. Confirmation of a
20
+ * semantic answer is DEFERRED until the window around the candidate click has
21
+ * fully elapsed, so the gate sees the WHOLE burst — including clicks that land
22
+ * AFTER the candidate. An inline check could never suppress a scanner's first
23
+ * click (the burst isn't visible yet); this one can.
24
+ */
25
+ export const SEMANTIC_BURST_WINDOW_MS = 30_000;
26
+ export const SEMANTIC_BURST_DISTINCT_LINKS = 3;
27
+
28
+ // Type alias (NOT interface) so it picks up an implicit index signature and
29
+ // satisfies Hatchet's JsonObject task-input constraint.
30
+ export type ConfirmSemanticClickInput = {
31
+ trackedLinkId: string;
32
+ /** ISO instant of the candidate click. */
33
+ clickedAt: string;
34
+ };
35
+
36
+ export type ConfirmSemanticClickResult =
37
+ | { status: "confirmed"; event: string }
38
+ | { status: "suppressed"; distinctLinks: number }
39
+ /** Another link's answer claimed this send's slot first. */
40
+ | { status: "lost" }
41
+ | { status: "skipped"; reason: string };
42
+
43
+ export interface ConfirmSemanticClickDeps {
44
+ db: Database;
45
+ hatchet: HatchetClient;
46
+ registry: JourneyRegistry;
47
+ logger: Logger;
48
+ }
49
+
50
+ /**
51
+ * Confirm (or suppress) one provisional semantic-link answer. Idempotent end
52
+ * to end, so the wrapping Hatchet task can retry safely:
53
+ *
54
+ * 1. Sleep out the remainder of the burst window past the candidate click.
55
+ * 2. Count DISTINCT links of the send clicked inside the window around the
56
+ * candidate — at/over the threshold the whole burst is scanner traffic
57
+ * and the answer is suppressed (the raw clicks stay recorded).
58
+ * 3. Claim the send's answer slot via `ingestEvent` with the
59
+ * `sem:<emailSendId>:<event>` idempotency key (first answer wins; a
60
+ * failed Hatchet publish rolls the claim back inside `ingestEvent`, so a
61
+ * retry re-claims).
62
+ * 4. If we claimed (or a crashed earlier attempt of THIS link did — detected
63
+ * by the stored row's `linkId`), stamp `semanticEmittedAt` and emit the
64
+ * `email.action` outbound envelope with the same key as `dedupeKey`, so
65
+ * re-runs are per-endpoint no-ops.
66
+ */
67
+ export async function confirmSemanticClick(
68
+ deps: ConfirmSemanticClickDeps,
69
+ input: ConfirmSemanticClickInput,
70
+ ): Promise<ConfirmSemanticClickResult> {
71
+ const { db, hatchet, registry, logger } = deps;
72
+
73
+ const clickedAtMs = Date.parse(input.clickedAt);
74
+ if (Number.isNaN(clickedAtMs)) {
75
+ return { status: "skipped", reason: "bad_clicked_at" };
76
+ }
77
+
78
+ const rows = await db
79
+ .select({
80
+ id: trackedLinks.id,
81
+ emailSendId: trackedLinks.emailSendId,
82
+ originalUrl: trackedLinks.originalUrl,
83
+ event: trackedLinks.event,
84
+ eventProperties: trackedLinks.eventProperties,
85
+ })
86
+ .from(trackedLinks)
87
+ .where(eq(trackedLinks.id, input.trackedLinkId))
88
+ .limit(1);
89
+ const link = rows[0];
90
+ if (!link?.event) {
91
+ return { status: "skipped", reason: "not_semantic" };
92
+ }
93
+ const semanticEvent = link.event;
94
+
95
+ // (1) Let the burst window close before judging the click.
96
+ const remainingMs = clickedAtMs + SEMANTIC_BURST_WINDOW_MS - Date.now();
97
+ if (remainingMs > 0) {
98
+ await new Promise((resolve) => setTimeout(resolve, remainingMs));
99
+ }
100
+
101
+ // (2) Whole-burst check: distinct links of this send clicked in the window
102
+ // AROUND the candidate (before AND after — the deferral is what makes the
103
+ // "after" half visible).
104
+ const windowStart = new Date(clickedAtMs - SEMANTIC_BURST_WINDOW_MS);
105
+ const windowEnd = new Date(clickedAtMs + SEMANTIC_BURST_WINDOW_MS);
106
+ const burst = await db
107
+ .select({ n: countDistinct(linkClicks.trackedLinkId) })
108
+ .from(linkClicks)
109
+ .innerJoin(trackedLinks, eq(linkClicks.trackedLinkId, trackedLinks.id))
110
+ .where(
111
+ and(
112
+ eq(trackedLinks.emailSendId, link.emailSendId),
113
+ gte(linkClicks.clickedAt, windowStart),
114
+ lte(linkClicks.clickedAt, windowEnd),
115
+ ),
116
+ );
117
+ const distinctLinks = burst[0]?.n ?? 0;
118
+ if (distinctLinks >= SEMANTIC_BURST_DISTINCT_LINKS) {
119
+ logger.warn("Semantic answer suppressed: scanner-like click burst", {
120
+ emailSendId: link.emailSendId,
121
+ linkId: link.id,
122
+ event: semanticEvent,
123
+ distinctLinks,
124
+ });
125
+ return { status: "suppressed", distinctLinks };
126
+ }
127
+
128
+ const ctx = await resolveEmailSendContext(db, link.emailSendId);
129
+ if (!ctx) {
130
+ return { status: "skipped", reason: "no_send_context" };
131
+ }
132
+
133
+ // (3) Claim the answer slot. Duplicate key → stored=false BEFORE the Hatchet
134
+ // push, so journeys/destinations see at most one answer per (send, event).
135
+ const semKey = `sem:${link.emailSendId}:${semanticEvent}`;
136
+ const result = await pushTrackingEvent({
137
+ db,
138
+ hatchet,
139
+ registry,
140
+ logger,
141
+ event: semanticEvent,
142
+ emailSendId: link.emailSendId,
143
+ properties: {
144
+ ...(link.eventProperties ?? {}),
145
+ linkId: link.id,
146
+ },
147
+ resolvedContext: ctx,
148
+ idempotencyKey: semKey,
149
+ });
150
+
151
+ // (4) Claimer determination. stored=false usually means another link won —
152
+ // but if the stored row carries THIS link's id, it is a crashed earlier
153
+ // attempt of this very confirmation, and the (idempotent) tail must re-run.
154
+ let isClaimer = result?.stored ?? false;
155
+ if (!isClaimer) {
156
+ const existing = await db
157
+ .select({ properties: userEvents.properties })
158
+ .from(userEvents)
159
+ .where(eq(userEvents.idempotencyKey, semKey))
160
+ .limit(1);
161
+ isClaimer = existing[0]?.properties?.linkId === link.id;
162
+ if (!isClaimer) {
163
+ return { status: "lost" };
164
+ }
165
+ }
166
+
167
+ await db
168
+ .update(trackedLinks)
169
+ .set({ semanticEmittedAt: new Date(), updatedAt: new Date() })
170
+ .where(
171
+ and(eq(trackedLinks.id, link.id), isNull(trackedLinks.semanticEmittedAt)),
172
+ );
173
+
174
+ await emitOutbound({
175
+ db,
176
+ hatchet,
177
+ logger,
178
+ event: "email.action",
179
+ dedupeKey: semKey,
180
+ payload: {
181
+ event: semanticEvent,
182
+ properties: link.eventProperties ?? null,
183
+ emailSendId: link.emailSendId,
184
+ templateKey: ctx.templateKey ?? null,
185
+ userId: ctx.userId ?? null,
186
+ to: ctx.to ?? ctx.userEmail ?? "",
187
+ at: new Date().toISOString(),
188
+ linkId: link.id,
189
+ linkUrl: link.originalUrl,
190
+ },
191
+ });
192
+
193
+ return { status: "confirmed", event: semanticEvent };
194
+ }
@@ -2,7 +2,7 @@ import type { HatchetClient } from "@hatchet-dev/typescript-sdk/v1/index.js";
2
2
  import type { JourneyRegistry } from "@hogsend/core/registry";
3
3
  import { type Database, emailSends, journeyStates } from "@hogsend/db";
4
4
  import { eq } from "drizzle-orm";
5
- import { ingestEvent } from "./ingestion.js";
5
+ import { type IngestResult, ingestEvent } from "./ingestion.js";
6
6
  import type { Logger } from "./logger.js";
7
7
 
8
8
  interface EmailSendContext {
@@ -122,6 +122,13 @@ export interface PushTrackingEventOpts {
122
122
  * lazily.
123
123
  */
124
124
  resolvedContext?: EmailSendContext | null;
125
+ /**
126
+ * Threaded straight into `ingestEvent` — a duplicate key returns
127
+ * `{ stored: false }` BEFORE the Hatchet push, so journeys never see the
128
+ * duplicate. Semantic link answers use `sem:<emailSendId>:<event>` for
129
+ * first-answer-per-send semantics.
130
+ */
131
+ idempotencyKey?: string;
125
132
  }
126
133
 
127
134
  /**
@@ -136,14 +143,14 @@ export interface PushTrackingEventOpts {
136
143
  */
137
144
  export async function pushTrackingEvent(
138
145
  opts: PushTrackingEventOpts,
139
- ): Promise<void> {
146
+ ): Promise<IngestResult | undefined> {
140
147
  const { db, hatchet, registry, logger, event, emailSendId } = opts;
141
148
 
142
149
  const ctx =
143
150
  opts.resolvedContext !== undefined
144
151
  ? opts.resolvedContext
145
152
  : await resolveEmailSendContext(db, emailSendId);
146
- if (!ctx) return;
153
+ if (!ctx) return undefined;
147
154
 
148
155
  const properties: Record<string, unknown> = {
149
156
  emailSendId,
@@ -151,7 +158,7 @@ export async function pushTrackingEvent(
151
158
  ...opts.properties,
152
159
  };
153
160
 
154
- await ingestEvent({
161
+ return await ingestEvent({
155
162
  db,
156
163
  registry,
157
164
  hatchet,
@@ -161,6 +168,7 @@ export async function pushTrackingEvent(
161
168
  userId: ctx.userId,
162
169
  userEmail: ctx.userEmail,
163
170
  eventProperties: properties,
171
+ idempotencyKey: opts.idempotencyKey,
164
172
  },
165
173
  });
166
174
  }
@@ -1,14 +1,127 @@
1
+ import { randomUUID } from "node:crypto";
1
2
  import type { Database } from "@hogsend/db";
2
3
  import { trackedLinks } from "@hogsend/db";
4
+ import {
5
+ EMAIL_ACTION_EVENT_ATTR,
6
+ EMAIL_ACTION_PROPS_ATTR,
7
+ } from "@hogsend/email";
3
8
 
4
- const HREF_RE = /href="(https?:\/\/[^"]+)"/gi;
9
+ const ANCHOR_RE = /<a\b[^>]*>/gi;
10
+ const HREF_RE = /\bhref="(https?:\/\/[^"]+)"/i;
11
+ const EVENT_ATTR_RE = new RegExp(
12
+ `\\b${EMAIL_ACTION_EVENT_ATTR}="([^"]*)"`,
13
+ "i",
14
+ );
15
+ const PROPS_ATTR_RE = new RegExp(
16
+ `\\b${EMAIL_ACTION_PROPS_ATTR}="([^"]*)"`,
17
+ "i",
18
+ );
19
+ const STRIP_SEMANTIC_ATTRS_RE = new RegExp(
20
+ `\\s*(?:${EMAIL_ACTION_EVENT_ATTR}|${EMAIL_ACTION_PROPS_ATTR})="[^"]*"`,
21
+ "gi",
22
+ );
5
23
 
6
24
  const SKIP_PATTERNS = ["/v1/email/unsubscribe", "/v1/email/preferences"];
7
25
 
26
+ // Engine-owned event vocabularies (both `email.opened` dot-style and
27
+ // `journey:completed` colon-style exist) — a consumer semantic event in these
28
+ // namespaces would corrupt insights or trigger engine-internal logic.
29
+ const RESERVED_EVENT_NAME_RE = /^(?:email|journey|bucket|contact)[.:]/;
30
+
31
+ // Semantic payloads re-emit on every answer and persist indefinitely — keep
32
+ // them small and scalar (non-scalars don't survive the Hatchet wire anyway).
33
+ const MAX_PROPS_JSON_LENGTH = 2048;
34
+
8
35
  function shouldSkipUrl(url: string): boolean {
9
36
  return SKIP_PATTERNS.some((pattern) => url.includes(pattern));
10
37
  }
11
38
 
39
+ // React entity-escapes attribute values at render time. Decode the five
40
+ // entities it emits; `&amp;` LAST so `&amp;quot;` round-trips to `&quot;`.
41
+ function decodeAttributeValue(value: string): string {
42
+ return value
43
+ .replace(/&quot;/g, '"')
44
+ .replace(/&#x27;/g, "'")
45
+ .replace(/&lt;/g, "<")
46
+ .replace(/&gt;/g, ">")
47
+ .replace(/&amp;/g, "&");
48
+ }
49
+
50
+ interface SemanticAttrs {
51
+ event: string;
52
+ /** Raw (encoded) props attribute — part of the dedupe key. */
53
+ propsRaw: string | null;
54
+ properties: Record<string, unknown> | null;
55
+ }
56
+
57
+ /**
58
+ * Extract + validate the semantic metadata off one `<a …>` tag. Returns null
59
+ * for a plain link. Throws on author error — a semantic link that can't be
60
+ * honored must fail the SEND loudly, not degrade into a silent plain link.
61
+ */
62
+ function parseSemanticAttrs(tag: string): SemanticAttrs | null {
63
+ const eventMatch = tag.match(EVENT_ATTR_RE);
64
+ if (!eventMatch) return null;
65
+
66
+ const event = decodeAttributeValue(eventMatch[1] ?? "").trim();
67
+ if (!event) {
68
+ throw new Error(`Semantic link has an empty ${EMAIL_ACTION_EVENT_ATTR}`);
69
+ }
70
+ if (RESERVED_EVENT_NAME_RE.test(event)) {
71
+ throw new Error(
72
+ `Semantic link event "${event}" uses a reserved namespace (email/journey/bucket/contact)`,
73
+ );
74
+ }
75
+
76
+ const propsMatch = tag.match(PROPS_ATTR_RE);
77
+ if (!propsMatch) return { event, propsRaw: null, properties: null };
78
+
79
+ const propsRaw = propsMatch[1] ?? "";
80
+ const decoded = decodeAttributeValue(propsRaw);
81
+ if (decoded.length > MAX_PROPS_JSON_LENGTH) {
82
+ throw new Error(
83
+ `Semantic link "${event}" properties exceed ${MAX_PROPS_JSON_LENGTH} chars`,
84
+ );
85
+ }
86
+
87
+ let parsed: unknown;
88
+ try {
89
+ parsed = JSON.parse(decoded);
90
+ } catch {
91
+ throw new Error(
92
+ `Semantic link "${event}" has unparseable ${EMAIL_ACTION_PROPS_ATTR}`,
93
+ );
94
+ }
95
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
96
+ throw new Error(
97
+ `Semantic link "${event}" properties must be a JSON object`,
98
+ );
99
+ }
100
+ for (const [key, value] of Object.entries(parsed)) {
101
+ const t = typeof value;
102
+ if (value !== null && t !== "string" && t !== "number" && t !== "boolean") {
103
+ throw new Error(
104
+ `Semantic link "${event}" property "${key}" must be a scalar (string/number/boolean/null)`,
105
+ );
106
+ }
107
+ }
108
+
109
+ return {
110
+ event,
111
+ propsRaw,
112
+ properties: parsed as Record<string, unknown>,
113
+ };
114
+ }
115
+
116
+ // One tracked_links row per distinct (url, event, props) tuple: identical
117
+ // semantic links share a row; the same URL under DIFFERENT events/props must
118
+ // NOT collapse (the old URL-only dedupe would merge "yes" and "no" answers
119
+ // that point at the same thanks page).
120
+ function linkKey(url: string, semantic: SemanticAttrs | null): string {
121
+ const sep = String.fromCharCode(0);
122
+ return [url, semantic?.event ?? "", semantic?.propsRaw ?? ""].join(sep);
123
+ }
124
+
12
125
  export async function rewriteLinks(opts: {
13
126
  html: string;
14
127
  emailSendId: string;
@@ -17,32 +130,51 @@ export async function rewriteLinks(opts: {
17
130
  }): Promise<string> {
18
131
  const { html, emailSendId, baseUrl, db } = opts;
19
132
 
20
- const uniqueUrls = new Set<string>();
133
+ const pending = new Map<
134
+ string,
135
+ { id: string; url: string; semantic: SemanticAttrs | null }
136
+ >();
137
+
138
+ for (const match of html.matchAll(ANCHOR_RE)) {
139
+ const tag = match[0];
140
+ const url = tag.match(HREF_RE)?.[1];
141
+ const semantic = parseSemanticAttrs(tag);
21
142
 
22
- for (const match of html.matchAll(HREF_RE)) {
23
- const url = match[1];
24
- if (url && !shouldSkipUrl(url)) {
25
- uniqueUrls.add(url);
143
+ if (!url || shouldSkipUrl(url)) {
144
+ if (semantic) {
145
+ throw new Error(
146
+ `Semantic link "${semantic.event}" needs an absolute http(s) href outside unsubscribe/preference URLs`,
147
+ );
148
+ }
149
+ continue;
26
150
  }
27
- }
28
151
 
29
- if (uniqueUrls.size === 0) return html;
152
+ const key = linkKey(url, semantic);
153
+ if (!pending.has(key)) {
154
+ pending.set(key, { id: randomUUID(), url, semantic });
155
+ }
156
+ }
30
157
 
31
- const urlList = [...uniqueUrls];
32
- const rows = await db
33
- .insert(trackedLinks)
34
- .values(urlList.map((url) => ({ emailSendId, originalUrl: url })))
35
- .returning({ id: trackedLinks.id, originalUrl: trackedLinks.originalUrl });
158
+ if (pending.size === 0) return html;
36
159
 
37
- const urlToId = new Map<string, string>();
38
- for (const row of rows) {
39
- urlToId.set(row.originalUrl, row.id);
40
- }
160
+ await db.insert(trackedLinks).values(
161
+ [...pending.values()].map((link) => ({
162
+ id: link.id,
163
+ emailSendId,
164
+ originalUrl: link.url,
165
+ event: link.semantic?.event,
166
+ eventProperties: link.semantic?.properties ?? undefined,
167
+ })),
168
+ );
41
169
 
42
- return html.replace(HREF_RE, (full, url: string) => {
43
- if (shouldSkipUrl(url)) return full;
44
- const linkId = urlToId.get(url);
45
- return linkId ? `href="${baseUrl}/v1/t/c/${linkId}"` : full;
170
+ return html.replace(ANCHOR_RE, (tag) => {
171
+ const url = tag.match(HREF_RE)?.[1];
172
+ if (!url || shouldSkipUrl(url)) return tag;
173
+ const link = pending.get(linkKey(url, parseSemanticAttrs(tag)));
174
+ if (!link) return tag;
175
+ return tag
176
+ .replace(HREF_RE, `href="${baseUrl}/v1/t/c/${link.id}"`)
177
+ .replace(STRIP_SEMANTIC_ATTRS_RE, "");
46
178
  });
47
179
  }
48
180
 
@@ -25,7 +25,7 @@ import { Webhook } from "svix";
25
25
  */
26
26
 
27
27
  /**
28
- * The 13-event catalog — the SINGLE source of truth (schema, routes, client,
28
+ * The 14-event catalog — the SINGLE source of truth (schema, routes, client,
29
29
  * CLI all derive from this). The `webhook.test` sentinel is intentionally NOT a
30
30
  * member (it is delivered out-of-band regardless of an endpoint's `eventTypes`).
31
31
  */
@@ -38,6 +38,7 @@ export const WEBHOOK_EVENT_TYPES = [
38
38
  "email.delivered",
39
39
  "email.opened",
40
40
  "email.clicked",
41
+ "email.action",
41
42
  "email.bounced",
42
43
  "email.complained",
43
44
  "journey.completed",
@@ -8,6 +8,7 @@ import {
8
8
  pushTrackingEvent,
9
9
  resolveEmailSendContext,
10
10
  } from "../../lib/tracking-events.js";
11
+ import { confirmSemanticClickTask } from "../../workflows/confirm-semantic-click.js";
11
12
 
12
13
  const clickRoute = createRoute({
13
14
  method: "get",
@@ -36,6 +37,8 @@ export const clickRouter = new OpenAPIHono<AppEnv>().openapi(
36
37
  id: trackedLinks.id,
37
38
  originalUrl: trackedLinks.originalUrl,
38
39
  emailSendId: trackedLinks.emailSendId,
40
+ event: trackedLinks.event,
41
+ eventProperties: trackedLinks.eventProperties,
39
42
  })
40
43
  .from(trackedLinks)
41
44
  .where(eq(trackedLinks.id, id))
@@ -85,6 +88,26 @@ export const clickRouter = new OpenAPIHono<AppEnv>().openapi(
85
88
 
86
89
  const { hatchet, registry, logger } = c.get("container");
87
90
 
91
+ // SEMANTIC link: the click is a PROVISIONAL answer. Confirmation is
92
+ // deferred past the scanner-burst window (a Hatchet task) so the gate can
93
+ // see the WHOLE burst — an inline check could never suppress a scanner's
94
+ // first click. The task claims the send's answer slot (first answer wins)
95
+ // and emits the consumer event + email.action outbound.
96
+ if (link.event) {
97
+ void confirmSemanticClickTask
98
+ .runNoWait({
99
+ trackedLinkId: link.id,
100
+ clickedAt: new Date().toISOString(),
101
+ })
102
+ .catch((err: unknown) => {
103
+ logger.warn("Failed to enqueue semantic click confirmation", {
104
+ linkId: link.id,
105
+ event: link.event,
106
+ error: err instanceof Error ? err.message : String(err),
107
+ });
108
+ });
109
+ }
110
+
88
111
  // Resolve the send context ONCE (off the response path) and feed both the
89
112
  // re-ingest and the PER-HIT outbound emit — avoiding a duplicate
90
113
  // `resolveEmailSendContext` read on the click hot path. NO `dedupeKey`: a
package/src/worker.ts CHANGED
@@ -16,6 +16,7 @@ import {
16
16
  } from "./workflows/bucket-backfill.js";
17
17
  import { bucketReconcileTask } from "./workflows/bucket-reconcile.js";
18
18
  import { checkAlertsTask } from "./workflows/check-alerts.js";
19
+ import { confirmSemanticClickTask } from "./workflows/confirm-semantic-click.js";
19
20
  import {
20
21
  deliverWebhookTask,
21
22
  reapDueWebhookDeliveriesTask,
@@ -81,6 +82,7 @@ export function createWorker(opts: CreateWorkerOptions): Worker {
81
82
  reapStuckCampaignsTask,
82
83
  deliverWebhookTask,
83
84
  reapDueWebhookDeliveriesTask,
85
+ confirmSemanticClickTask,
84
86
  checkAlertsTask,
85
87
  bucketReconcileTask,
86
88
  bucketBackfillTask,
@@ -0,0 +1,37 @@
1
+ import { getJourneyRegistrySingleton } from "../journeys/registry-singleton.js";
2
+ import { getDb } from "../lib/db.js";
3
+ import { hatchet } from "../lib/hatchet.js";
4
+ import { createLogger } from "../lib/logger.js";
5
+ import {
6
+ type ConfirmSemanticClickInput,
7
+ confirmSemanticClick,
8
+ } from "../lib/semantic-click.js";
9
+
10
+ /**
11
+ * Deferred confirmation of a semantic-link answer, enqueued per candidate
12
+ * click by the click route. The deferral (≈ the burst window, 30s) is the
13
+ * point: an inline gate can never suppress a scanner's FIRST click because
14
+ * the rest of the burst hasn't happened yet — this task judges the click with
15
+ * the whole window visible on both sides.
16
+ *
17
+ * Retries are safe: the claim is an idempotency-keyed `user_events` insert
18
+ * whose failed-publish path rolls back inside `ingestEvent`, the stamp is
19
+ * `IS NULL`-guarded, and the outbound emit carries a `dedupeKey`. Self-
20
+ * bootstraps deps from the process (cron-style; no request container).
21
+ */
22
+ export const confirmSemanticClickTask = hatchet.task({
23
+ name: "confirm-semantic-click",
24
+ retries: 3,
25
+ executionTimeout: "90s",
26
+ fn: async (input: ConfirmSemanticClickInput) => {
27
+ return confirmSemanticClick(
28
+ {
29
+ db: getDb(),
30
+ hatchet,
31
+ registry: getJourneyRegistrySingleton(),
32
+ logger: createLogger(process.env.LOG_LEVEL ?? "info"),
33
+ },
34
+ input,
35
+ );
36
+ },
37
+ });