@hogsend/engine 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,14 @@
1
1
  import type { HatchetClient } from "@hatchet-dev/typescript-sdk/v1/index.js";
2
+ import type { AnalyticsProvider } from "@hogsend/core";
2
3
  import { evaluatePropertyConditions } from "@hogsend/core";
3
4
  import type { JourneyRegistry } from "@hogsend/core/registry";
4
5
  import { type Database, journeyStates, userEvents } from "@hogsend/db";
5
6
  import { and, eq, inArray, isNull } from "drizzle-orm";
6
7
  import { checkBucketMembership } from "../buckets/check-membership.js";
8
+ import {
9
+ logResidualTwins,
10
+ mergeAnalyticsIdentities,
11
+ } from "./analytics-identity.js";
7
12
  import { resolveOrCreateContact } from "./contacts.js";
8
13
  import type { Logger } from "./logger.js";
9
14
 
@@ -58,8 +63,17 @@ export async function ingestEvent(opts: {
58
63
  hatchet: HatchetClient;
59
64
  logger: Logger;
60
65
  event: IngestEvent;
66
+ /**
67
+ * The active analytics provider (`c.get("container").analytics`). When the
68
+ * identity resolve folds two keys into one (collide-MERGE or canonical-key
69
+ * flip), the engine fires the provider-neutral `mergeIdentities` primitive so
70
+ * the analytics person store stitches the same way the contact store did
71
+ * (§5.3). Optional: absent ⇒ DB-only resolve (no stitch), exactly as before; a
72
+ * provider without `identityMerge` no-ops cleanly.
73
+ */
74
+ analytics?: AnalyticsProvider;
61
75
  }): Promise<IngestResult> {
62
- const { db, registry, hatchet, logger, event } = opts;
76
+ const { db, registry, hatchet, logger, event, analytics } = opts;
63
77
 
64
78
  // (1) Resolve identity FIRST (awaited — no longer fire-and-forget). The
65
79
  // contact-referencing tables join on a NOT NULL text key, so an email-only /
@@ -68,7 +82,13 @@ export async function ingestEvent(opts: {
68
82
  // `contacts.properties` (D2 split) and returns BOTH the canonical contact id
69
83
  // AND its resolved string key (external_id ?? anonymous_id ?? contact.id —
70
84
  // risk 1/6), so no second read-back of the contact row is needed.
71
- const { resolvedKey } = await resolveOrCreateContact({
85
+ const {
86
+ id: contactId,
87
+ resolvedKey,
88
+ mergedKeys,
89
+ mergedIdentifiedKeys,
90
+ merged,
91
+ } = await resolveOrCreateContact({
72
92
  db,
73
93
  userId: event.userId,
74
94
  email: event.userEmail || undefined,
@@ -112,6 +132,36 @@ export async function ingestEvent(opts: {
112
132
  });
113
133
  }
114
134
 
135
+ // (2b) §5.3 — fire the provider-neutral identity merge at the two resolver
136
+ // outcomes where two keys fold into one (collide-MERGE or canonical-key flip).
137
+ // Placed INSIDE the idempotency-guarded block (after a FRESH insert; the
138
+ // duplicate path returned early above) so a Hatchet/client retry with the same
139
+ // idempotencyKey does NOT re-fire `alias` — honoring the "only at the moment
140
+ // two keys first become one" contract (PostHog `alias` is harmless on replay
141
+ // but firing per-retry adds queue noise). MF-2: `mergedKeys` already excludes
142
+ // identified `external_id`s (the resolver split them out); fire only the safe
143
+ // anon/uuid keys, and surface the excluded identified twins for observability.
144
+ if (mergedKeys?.length || mergedIdentifiedKeys?.length) {
145
+ if (mergedKeys?.length) {
146
+ mergeAnalyticsIdentities({
147
+ analytics,
148
+ survivorKey: resolvedKey,
149
+ loserKeys: mergedKeys,
150
+ reason: merged ? "collide_merge" : "key_flip",
151
+ contactId,
152
+ logger,
153
+ });
154
+ }
155
+ if (mergedIdentifiedKeys?.length) {
156
+ logResidualTwins({
157
+ survivorKey: resolvedKey,
158
+ identifiedLoserKeys: mergedIdentifiedKeys,
159
+ contactId,
160
+ logger,
161
+ });
162
+ }
163
+ }
164
+
115
165
  // (3) Build the JSON-serializable subset of eventProperties for the Hatchet
116
166
  // push payload (scalars only — the SDK serializes the envelope).
117
167
  const serializableProperties = Object.fromEntries(
@@ -90,6 +90,23 @@ export interface OutboundPayloads {
90
90
  "email.delivered": EmailEventPayload;
91
91
  "email.opened": EmailEventPayload;
92
92
  "email.clicked": EmailEventPayload & { linkUrl?: string; linkId?: string };
93
+ /**
94
+ * A NON-email tracked link was clicked (Discord/referral/ad-hoc
95
+ * `createTrackedLink`). The deliberate counterpart to `email.clicked` — a
96
+ * non-email click has no `email_sends` row, so it carries `emailSendId: null`
97
+ * and `messageId: null` and never masquerades as an email click
98
+ * (MF-missing #3). `userId` is the link's stitch subject (`distinct_id`) when
99
+ * the link is identity-bearing, else null for a broadcast link.
100
+ */
101
+ "link.clicked": {
102
+ linkId: string;
103
+ source: string | null;
104
+ userId: string | null;
105
+ emailSendId: null;
106
+ messageId: null;
107
+ linkUrl: string;
108
+ at: string;
109
+ };
93
110
  /**
94
111
  * A SEMANTIC link answered — the in-email action event (consumer-named, e.g.
95
112
  * "nps.submitted"). Emitted at most once per (send, event name): first
@@ -90,6 +90,15 @@ export async function confirmSemanticClick(
90
90
  if (!link?.event) {
91
91
  return { status: "skipped", reason: "not_semantic" };
92
92
  }
93
+ // The confirm path is EMAIL-semantic end to end (it claims a send's answer
94
+ // slot keyed on `emailSendId` and emits `email.action`). The click route only
95
+ // enqueues this task for links with a non-null `emailSendId`, but `emailSendId`
96
+ // is nullable since the identity-stitching minor — guard defensively and
97
+ // narrow the type for the rest of the function.
98
+ if (!link.emailSendId) {
99
+ return { status: "skipped", reason: "non_email_link" };
100
+ }
101
+ const emailSendId = link.emailSendId;
93
102
  const semanticEvent = link.event;
94
103
 
95
104
  // (1) Let the burst window close before judging the click.
@@ -109,7 +118,7 @@ export async function confirmSemanticClick(
109
118
  .innerJoin(trackedLinks, eq(linkClicks.trackedLinkId, trackedLinks.id))
110
119
  .where(
111
120
  and(
112
- eq(trackedLinks.emailSendId, link.emailSendId),
121
+ eq(trackedLinks.emailSendId, emailSendId),
113
122
  gte(linkClicks.clickedAt, windowStart),
114
123
  lte(linkClicks.clickedAt, windowEnd),
115
124
  ),
@@ -117,7 +126,7 @@ export async function confirmSemanticClick(
117
126
  const distinctLinks = burst[0]?.n ?? 0;
118
127
  if (distinctLinks >= SEMANTIC_BURST_DISTINCT_LINKS) {
119
128
  logger.warn("Semantic answer suppressed: scanner-like click burst", {
120
- emailSendId: link.emailSendId,
129
+ emailSendId,
121
130
  linkId: link.id,
122
131
  event: semanticEvent,
123
132
  distinctLinks,
@@ -125,21 +134,21 @@ export async function confirmSemanticClick(
125
134
  return { status: "suppressed", distinctLinks };
126
135
  }
127
136
 
128
- const ctx = await resolveEmailSendContext(db, link.emailSendId);
137
+ const ctx = await resolveEmailSendContext(db, emailSendId);
129
138
  if (!ctx) {
130
139
  return { status: "skipped", reason: "no_send_context" };
131
140
  }
132
141
 
133
142
  // (3) Claim the answer slot. Duplicate key → stored=false BEFORE the Hatchet
134
143
  // push, so journeys/destinations see at most one answer per (send, event).
135
- const semKey = `sem:${link.emailSendId}:${semanticEvent}`;
144
+ const semKey = `sem:${emailSendId}:${semanticEvent}`;
136
145
  const result = await pushTrackingEvent({
137
146
  db,
138
147
  hatchet,
139
148
  registry,
140
149
  logger,
141
150
  event: semanticEvent,
142
- emailSendId: link.emailSendId,
151
+ emailSendId,
143
152
  properties: {
144
153
  ...(link.eventProperties ?? {}),
145
154
  linkId: link.id,
@@ -180,7 +189,7 @@ export async function confirmSemanticClick(
180
189
  payload: {
181
190
  event: semanticEvent,
182
191
  properties: link.eventProperties ?? null,
183
- emailSendId: link.emailSendId,
192
+ emailSendId,
184
193
  templateKey: ctx.templateKey ?? null,
185
194
  userId: ctx.userId ?? null,
186
195
  to: ctx.to ?? ctx.userEmail ?? "",
@@ -15,8 +15,12 @@ interface EmailSendContext {
15
15
 
16
16
  export async function resolveEmailSendContext(
17
17
  db: Database,
18
- emailSendId: string,
18
+ emailSendId: string | null,
19
19
  ): Promise<EmailSendContext | null> {
20
+ // A non-email tracked link (Discord/referral/ad-hoc `createTrackedLink`) has
21
+ // a NULL `email_send_id` — there is no send row to resolve, so short-circuit
22
+ // to null rather than issue a `WHERE id = NULL` query that matches nothing.
23
+ if (!emailSendId) return null;
20
24
  const rows = await db
21
25
  .select({
22
26
  toEmail: emailSends.toEmail,
@@ -240,3 +240,40 @@ export async function prepareTrackedHtml(opts: {
240
240
  });
241
241
  return result;
242
242
  }
243
+
244
+ /**
245
+ * The mint surface for a NON-email tracked link (Discord, referral, ad-hoc).
246
+ * Inserts a `tracked_links` row with a NULL `emailSendId` and returns the
247
+ * `/v1/t/c/:id` redirect URL to use in place of the raw destination.
248
+ *
249
+ * This is the SINGLE chokepoint enforcing "broadcast links carry no subject":
250
+ * a link only becomes identity-bearing when the caller EXPLICITLY passes
251
+ * `distinctId` (the canonical contact key the click should stitch into). Per
252
+ * MF-4, the referral path does NOT pass `distinctId` by default (referral
253
+ * pages are shareable → broadcast), and the Discord destination passes
254
+ * `distinctId: undefined`. The `hs_t` mint at click time is still gated by
255
+ * `TRACKING_IDENTITY_TOKEN` (default false); a row with a NULL `distinctId`
256
+ * never mints a token regardless.
257
+ */
258
+ export async function createTrackedLink(opts: {
259
+ db: Database;
260
+ url: string;
261
+ /**
262
+ * The canonical contact key a click should fold the visitor's anon session
263
+ * into. OMIT for a broadcast link (the safe default) — only an explicit,
264
+ * single-subject, non-shareable link should pass this.
265
+ */
266
+ distinctId?: string;
267
+ source: "discord" | "referral" | "link";
268
+ baseUrl: string;
269
+ }): Promise<string> {
270
+ const id = randomUUID();
271
+ await opts.db.insert(trackedLinks).values({
272
+ id,
273
+ emailSendId: null,
274
+ distinctId: opts.distinctId ?? null,
275
+ source: opts.source,
276
+ originalUrl: opts.url,
277
+ });
278
+ return `${opts.baseUrl}/v1/t/c/${id}`;
279
+ }
@@ -25,9 +25,14 @@ import { Webhook } from "svix";
25
25
  */
26
26
 
27
27
  /**
28
- * The 14-event catalog — the SINGLE source of truth (schema, routes, client,
28
+ * The 15-event catalog — the SINGLE source of truth (schema, routes, client,
29
29
  * CLI all derive from this). The `webhook.test` sentinel is intentionally NOT a
30
30
  * member (it is delivered out-of-band regardless of an endpoint's `eventTypes`).
31
+ *
32
+ * `link.clicked` is the NON-email click event: a click on a tracked link that
33
+ * has no email send (Discord/referral/ad-hoc `createTrackedLink`). It is the
34
+ * deliberate counterpart to `email.clicked` so a non-email click never fires a
35
+ * malformed `email.clicked` (MF-missing #3).
31
36
  */
32
37
  export const WEBHOOK_EVENT_TYPES = [
33
38
  "contact.created",
@@ -44,6 +49,7 @@ export const WEBHOOK_EVENT_TYPES = [
44
49
  "journey.completed",
45
50
  "bucket.entered",
46
51
  "bucket.left",
52
+ "link.clicked",
47
53
  ] as const;
48
54
 
49
55
  export type WebhookEventType = (typeof WEBHOOK_EVENT_TYPES)[number];
@@ -144,6 +144,12 @@ const replayRoute = createRoute({
144
144
  },
145
145
  description: "Replay results",
146
146
  },
147
+ 400: {
148
+ content: {
149
+ "application/json": { schema: errorSchema },
150
+ },
151
+ description: "No replay selection (eventIds or filter) provided",
152
+ },
147
153
  },
148
154
  });
149
155
 
@@ -362,14 +368,26 @@ export const bulkRouter = new OpenAPIHono<AppEnv>()
362
368
  conditions.push(lte(userEvents.occurredAt, new Date(body.filter.to)));
363
369
  }
364
370
 
365
- const where = conditions.length > 0 ? and(...conditions) : undefined;
371
+ // Refuse an unscoped replay. With no `eventIds` and no filter the WHERE
372
+ // would collapse to `undefined`, silently re-pushing the most-recent
373
+ // `limit` events back through the full ingestion pipeline (re-triggering
374
+ // journeys, re-evaluating exits). Require an explicit selection.
375
+ if (conditions.length === 0) {
376
+ return c.json(
377
+ {
378
+ error:
379
+ "Replay requires `eventIds` or at least one `filter` field (event, userId, from, to).",
380
+ },
381
+ 400,
382
+ );
383
+ }
366
384
 
367
385
  events = await db
368
386
  .select()
369
387
  .from(userEvents)
370
- .where(where)
388
+ .where(and(...conditions))
371
389
  .orderBy(desc(userEvents.occurredAt))
372
- .limit(body.limit ?? 100);
390
+ .limit(body.limit);
373
391
  }
374
392
 
375
393
  let replayed = 0;
@@ -147,6 +147,14 @@ export const preferencesRouter = new OpenAPIHono<AppEnv>()
147
147
  ? {
148
148
  suppressed: body.suppressed,
149
149
  suppressedAt: body.suppressed ? new Date() : null,
150
+ // Un-suppressing clears the bounce slate. `bounceCount` only
151
+ // drives the auto-suppress threshold (the send-gate keys off
152
+ // `suppressed`/`unsubscribedAll`), so a leftover count would
153
+ // otherwise keep a bounced recipient pinned to the suppression
154
+ // list with no way to remove them.
155
+ ...(body.suppressed
156
+ ? {}
157
+ : { bounceCount: 0, lastBounceAt: null }),
150
158
  }
151
159
  : {}),
152
160
  ...(body.categories !== undefined
@@ -362,8 +362,19 @@ reportingRouter.get("/sends/export", async (c) => {
362
362
  );
363
363
  }
364
364
 
365
+ // The export intentionally returns all matching sends, but is hard-capped at
366
+ // MAX_EXPORT_ROWS. Signal when the result was truncated so a caller never
367
+ // mistakes a partial CSV for the complete history.
368
+ const truncated = rows.length >= MAX_EXPORT_ROWS;
369
+
365
370
  return c.body(lines.join("\n"), 200, {
366
371
  "Content-Type": "text/csv; charset=utf-8",
367
372
  "Content-Disposition": 'attachment; filename="email-sends.csv"',
373
+ ...(truncated
374
+ ? {
375
+ "X-Hogsend-Export-Truncated": "true",
376
+ "X-Hogsend-Export-Limit": String(MAX_EXPORT_ROWS),
377
+ }
378
+ : {}),
368
379
  });
369
380
  });
@@ -1,6 +1,6 @@
1
1
  import { emailPreferences } from "@hogsend/db";
2
2
  import { createRoute, OpenAPIHono, z } from "@hono/zod-openapi";
3
- import { and, count, desc, eq, gt, type SQL } from "drizzle-orm";
3
+ import { and, count, desc, eq, gt, or, type SQL } from "drizzle-orm";
4
4
  import type { AppEnv } from "../../app.js";
5
5
  import { serializePrefs } from "../../lib/contacts.js";
6
6
 
@@ -8,6 +8,11 @@ import { serializePrefs } from "../../lib/contacts.js";
8
8
  // `complained` has no dedicated column — a complaint sets `suppressed` without
9
9
  // incrementing `bounceCount` (see mailer `handleComplaint`), so we identify it
10
10
  // as suppressed-but-not-bounced.
11
+ //
12
+ // IMPORTANT: the `email_preferences` table holds a row for (nearly) every
13
+ // contact, most of whom are NOT suppressed. The "All" view must therefore
14
+ // restrict to recipients suppressed in *some* way — returning `undefined`
15
+ // here would drop the WHERE clause entirely and list every contact.
11
16
  function typeFilter(
12
17
  type: "bounced" | "unsubscribed" | "complained" | undefined,
13
18
  ): SQL | undefined {
@@ -22,7 +27,12 @@ function typeFilter(
22
27
  eq(emailPreferences.bounceCount, 0),
23
28
  );
24
29
  default:
25
- return undefined;
30
+ // "All" = the union of every suppression reason.
31
+ return or(
32
+ eq(emailPreferences.suppressed, true),
33
+ eq(emailPreferences.unsubscribedAll, true),
34
+ gt(emailPreferences.bounceCount, 0),
35
+ );
26
36
  }
27
37
  }
28
38
 
@@ -39,6 +39,10 @@ const upsertRoute = createRoute({
39
39
  schema: z.object({
40
40
  email: z.string().email().optional(),
41
41
  userId: z.string().min(1).optional(),
42
+ // §4: caller's analytics anon id — the resolver's 2nd-precedence
43
+ // key. An EXTRA, never a third identity arm: `requireIdentity`
44
+ // still requires email or userId below.
45
+ anonymousId: z.string().min(1).max(200).optional(),
42
46
  properties: z.record(z.string(), z.unknown()).optional(),
43
47
  lists: z.record(z.string(), z.boolean()).optional(),
44
48
  }),
@@ -142,6 +146,9 @@ export const contactsRouter = new OpenAPIHono<AppEnv>()
142
146
  db,
143
147
  userId: body.userId,
144
148
  email: body.email,
149
+ // §4: 2nd-precedence resolver key (zero-merge stitch). Identity is still
150
+ // enforced via `requireIdentity` (email/userId) above.
151
+ anonymousId: body.anonymousId,
145
152
  contactProperties: body.properties,
146
153
  });
147
154
 
@@ -9,6 +9,14 @@ const eventRequestSchema = z.object({
9
9
  name: z.string().min(1),
10
10
  email: z.string().email().optional(),
11
11
  userId: z.string().min(1).optional(),
12
+ // §4: the caller's analytics anon id (e.g. posthog-js `get_distinct_id()`).
13
+ // 2nd in the resolver's key precedence (`external → email → anonymous →
14
+ // discord`), so when no `external_id` is attached the contact's canonical key
15
+ // BECOMES this value — the browser's own anon events and the server's captures
16
+ // then land on ONE analytics person with zero merge calls. An EXTRA, never a
17
+ // third identity arm: `requireIdentity` still requires email or userId
18
+ // (anon-only public ingest is an abuse vector).
19
+ anonymousId: z.string().min(1).max(200).optional(),
12
20
  eventProperties: z.record(z.string(), z.unknown()).optional(),
13
21
  contactProperties: z.record(z.string(), z.unknown()).optional(),
14
22
  lists: z.record(z.string(), z.boolean()).optional(),
@@ -68,7 +76,7 @@ const eventRoute = createRoute({
68
76
  export const eventsRouter = new OpenAPIHono<AppEnv>().openapi(
69
77
  eventRoute,
70
78
  async (c) => {
71
- const { db, registry, hatchet, logger } = c.get("container");
79
+ const { db, registry, hatchet, logger, analytics } = c.get("container");
72
80
  const body = c.req.valid("json");
73
81
 
74
82
  const guard = requireIdentity(c, body);
@@ -83,10 +91,17 @@ export const eventsRouter = new OpenAPIHono<AppEnv>().openapi(
83
91
  registry,
84
92
  hatchet,
85
93
  logger,
94
+ // §5.3: thread the active analytics provider so a collide-MERGE / key-flip
95
+ // fires the provider-neutral `mergeIdentities` stitch. Absent ⇒ no-op.
96
+ analytics,
86
97
  event: {
87
98
  event: body.name,
88
99
  userId: body.userId,
89
100
  userEmail: body.email,
101
+ // §4: 2nd-precedence resolver key — lets the contact's canonical key
102
+ // equal the browser anon id (zero-merge stitch). Identity is still
103
+ // enforced via `requireIdentity` (email/userId) above.
104
+ anonymousId: body.anonymousId,
90
105
  eventProperties: body.eventProperties ?? {},
91
106
  contactProperties: body.contactProperties,
92
107
  idempotencyKey,
@@ -150,8 +150,15 @@ export const answerRouter = new OpenAPIHono<AppEnv>()
150
150
  );
151
151
  }
152
152
 
153
- const ctx = await resolveEmailSendContext(db, link.emailSendId);
154
- if (ctx) {
153
+ // The answer/comment flow is EMAIL-semantic (it re-ingests a
154
+ // `<event>.comment` keyed on the send). A non-email semantic link has no
155
+ // send to attribute the comment to — `emailSendId` is nullable since the
156
+ // identity-stitching minor, so narrow it here.
157
+ const emailSendId = link.emailSendId;
158
+ const ctx = emailSendId
159
+ ? await resolveEmailSendContext(db, emailSendId)
160
+ : null;
161
+ if (ctx && emailSendId) {
155
162
  // `<event>.comment` is a consumer-namespace event — journeys can wait
156
163
  // on it and destinations receive it like any other. First comment per
157
164
  // (send, event) wins; repeats are no-ops.
@@ -161,7 +168,7 @@ export const answerRouter = new OpenAPIHono<AppEnv>()
161
168
  registry,
162
169
  logger,
163
170
  event: `${link.event}.comment`,
164
- emailSendId: link.emailSendId,
171
+ emailSendId,
165
172
  properties: {
166
173
  comment,
167
174
  parentEvent: link.event,
@@ -169,7 +176,7 @@ export const answerRouter = new OpenAPIHono<AppEnv>()
169
176
  linkId: link.id,
170
177
  },
171
178
  resolvedContext: ctx,
172
- idempotencyKey: `semc:${link.emailSendId}:${link.event}`,
179
+ idempotencyKey: `semc:${emailSendId}:${link.event}`,
173
180
  }).catch((err) => {
174
181
  logger.warn("Failed to ingest answer comment", {
175
182
  linkId: link.id,