@hogsend/engine 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hogsend/engine",
3
- "version": "0.22.0",
3
+ "version": "0.23.1",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -40,14 +40,14 @@
40
40
  "svix": "^1.95.1",
41
41
  "winston": "^3.19.0",
42
42
  "zod": "^4.4.3",
43
- "@hogsend/core": "^0.22.0",
44
- "@hogsend/db": "^0.22.0",
45
- "@hogsend/email": "^0.22.0",
46
- "@hogsend/plugin-posthog": "^0.22.0",
47
- "@hogsend/plugin-resend": "^0.22.0"
43
+ "@hogsend/core": "^0.23.1",
44
+ "@hogsend/db": "^0.23.1",
45
+ "@hogsend/email": "^0.23.1",
46
+ "@hogsend/plugin-posthog": "^0.23.1",
47
+ "@hogsend/plugin-resend": "^0.23.1"
48
48
  },
49
49
  "optionalDependencies": {
50
- "@hogsend/plugin-postmark": "^0.22.0"
50
+ "@hogsend/plugin-postmark": "^0.23.1"
51
51
  },
52
52
  "devDependencies": {
53
53
  "@types/node": "^22.15.3",
package/src/container.ts CHANGED
@@ -56,6 +56,10 @@ import type {
56
56
  FrequencyCapConfig,
57
57
  } from "./lib/email-service-types.js";
58
58
  import { hatchet } from "./lib/hatchet.js";
59
+ import {
60
+ createIdentityService,
61
+ type IdentityService,
62
+ } from "./lib/identity-service.js";
59
63
  import { createLogger, type Logger } from "./lib/logger.js";
60
64
  import { createTrackedMailer } from "./lib/mailer.js";
61
65
  import { createRedisSecondaryStorage, getRedis } from "./lib/redis.js";
@@ -124,6 +128,15 @@ export interface HogsendClient {
124
128
  * treats that as a silent no-op.
125
129
  */
126
130
  analytics?: AnalyticsProvider;
131
+ /**
132
+ * Identity-attach helper that resolves/merges a contact AND propagates the
133
+ * analytics merge (§5.3) in one call, for identity-attach OUTSIDE the
134
+ * `/v1/events` ingest path. Discord `/link` (§7) wires its `resolveContact`
135
+ * callback to `client.identity.linkContact` so a successful contact-merge
136
+ * folds the discord-keyed person into the canonical one through the SAME
137
+ * engine emission ingest uses — never bespoke per-consumer plumbing.
138
+ */
139
+ identity: IdentityService;
127
140
  registry: JourneyRegistry;
128
141
  /**
129
142
  * The bucket registry (id map + event/property inverted indexes for candidate
@@ -614,6 +627,13 @@ export function createHogsendClient(
614
627
  // undefined (no provider configured) — the reads stay no-ops.
615
628
  setAnalytics(analytics);
616
629
 
630
+ // Identity-attach helper (§7): bound to THIS container's db + resolved
631
+ // analytics provider so a contact-merge outside the `/v1/events` ingest path
632
+ // (Discord `/link`) propagates the analytics merge through the same engine
633
+ // emission ingest uses. Closes over `analytics` (may be undefined → the merge
634
+ // emission no-ops; the resolve still happens).
635
+ const identity = createIdentityService({ db, analytics, logger });
636
+
617
637
  // Build + install the outbound DESTINATION registry (Phase 3) the
618
638
  // self-booting delivery task resolves by `webhook_endpoints.kind`. Order is
619
639
  // load-bearing: the env-enabled presets come FIRST and the consumer's
@@ -700,6 +720,7 @@ export function createHogsendClient(
700
720
  templates,
701
721
  analyticsProviders,
702
722
  analytics,
723
+ identity,
703
724
  registry,
704
725
  bucketRegistry,
705
726
  listRegistry,
package/src/index.ts CHANGED
@@ -165,6 +165,11 @@ export {
165
165
  setJourneyRegistry,
166
166
  } from "./journeys/registry-singleton.js";
167
167
  // --- Analytics provider registry (the analytics sibling) ---
168
+ export {
169
+ type IdentityMergeReason,
170
+ logResidualTwins,
171
+ mergeAnalyticsIdentities,
172
+ } from "./lib/analytics-identity.js";
168
173
  export { AnalyticsProviderRegistry } from "./lib/analytics-provider-registry.js";
169
174
  export { analyticsProvidersFromEnv } from "./lib/analytics-providers-from-env.js";
170
175
  // --- Auth ---
@@ -265,9 +270,16 @@ export { checkEmailPreferences } from "./lib/enrollment-guards.js";
265
270
  export { isFrequencyCapped } from "./lib/frequency-cap.js";
266
271
  export { addrSpecOf, hostOfFromAddress } from "./lib/from-address.js";
267
272
  export { hatchet } from "./lib/hatchet.js";
273
+ // --- Identity service (resolve/merge + analytics merge propagation, §7) ---
274
+ export {
275
+ createIdentityService,
276
+ type IdentityService,
277
+ type LinkContactArgs,
278
+ } from "./lib/identity-service.js";
268
279
  export {
269
280
  generateIdentityToken,
270
281
  type IdentityTokenPayload,
282
+ type IdentityTokenScope,
271
283
  InvalidIdentityTokenError,
272
284
  validateIdentityToken,
273
285
  } from "./lib/identity-token.js";
@@ -350,6 +362,7 @@ export {
350
362
  } from "./lib/tracked.js";
351
363
  // --- Tracking ---
352
364
  export {
365
+ createTrackedLink,
353
366
  injectOpenPixel,
354
367
  prepareTrackedHtml,
355
368
  rewriteLinks,
@@ -0,0 +1,112 @@
1
+ import type { AnalyticsProvider } from "@hogsend/core";
2
+ import type { Logger } from "./logger.js";
3
+
4
+ /**
5
+ * The reason a merge was emitted — surfaced on the `identity.merge.emitted`
6
+ * structured log so an operator can see WHICH resolver path stitched (§10.5). A
7
+ * declining `collide_merge` / `key_flip` volume after anon threading lands
8
+ * (Stage 1) is the empirical "forks prevented" signal.
9
+ */
10
+ export type IdentityMergeReason =
11
+ | "collide_merge"
12
+ | "key_flip"
13
+ | "click_identify"
14
+ | "discord_link";
15
+
16
+ /**
17
+ * Fan out the provider-neutral `mergeIdentities` primitive (§5.3) once per
18
+ * loser key, folding each absorbed (anonymous/uuid) key INTO the surviving
19
+ * canonical contact key. Fire-and-forget and never throws: analytics is
20
+ * non-load-bearing, so a provider error must not fail the ingest that triggered
21
+ * the merge.
22
+ *
23
+ * Direction is load-bearing (MF-1): `survivorKey` is the SURVIVING/canonical
24
+ * (identified) id and each `loserKey` is the ABSORBED (anonymous) one — mapped
25
+ * straight to `mergeIdentities({ distinctId: survivorKey, alias: loserKey })`.
26
+ *
27
+ * No-ops cleanly when:
28
+ * - no provider is injected (`!analytics`),
29
+ * - the active provider can't merge (`!capabilities.identityMerge` — a legacy
30
+ * adapter or a provider without an `alias` wire),
31
+ * - or it carries no `mergeIdentities` method.
32
+ *
33
+ * MF-2: callers MUST pass only the SAFE-to-absorb loser keys (anonymous/uuid,
34
+ * never an `external_id` that already identified a PostHog person). Aliasing an
35
+ * already-identified key is the identified→identified merge PostHog refuses
36
+ * (R2/R4) — it silently no-ops AND spams "Refused to merge" warnings on the
37
+ * normal merge path. The filtering happens at the emission point (the resolver
38
+ * splits its loser keys into safe vs. identified); this helper only fans out
39
+ * what it is given and skips a self-alias (`loserKey === survivorKey`).
40
+ */
41
+ export function mergeAnalyticsIdentities(opts: {
42
+ analytics?: AnalyticsProvider;
43
+ survivorKey: string;
44
+ loserKeys: string[];
45
+ /** Stitching path, for the `identity.merge.emitted` observability log. */
46
+ reason: IdentityMergeReason;
47
+ /** The contact id, for correlating the merge log to a contact row. */
48
+ contactId?: string;
49
+ logger?: Logger;
50
+ }): void {
51
+ const { analytics, survivorKey, loserKeys, reason, contactId, logger } = opts;
52
+
53
+ if (!analytics?.capabilities.identityMerge) {
54
+ if (loserKeys.length > 0) {
55
+ logger?.debug("identity.merge.skipped", {
56
+ reason: analytics ? "no_capability" : "no_provider",
57
+ });
58
+ }
59
+ return;
60
+ }
61
+ if (!analytics.mergeIdentities) return;
62
+
63
+ for (const loserKey of loserKeys) {
64
+ if (!loserKey || loserKey === survivorKey) {
65
+ logger?.debug("identity.merge.skipped", { reason: "self_alias" });
66
+ continue;
67
+ }
68
+ try {
69
+ analytics.mergeIdentities({ distinctId: survivorKey, alias: loserKey });
70
+ logger?.info("identity.merge.emitted", {
71
+ provider: analytics.meta.id,
72
+ survivorKey,
73
+ alias: loserKey,
74
+ reason,
75
+ ...(contactId ? { contactId } : {}),
76
+ });
77
+ } catch (err) {
78
+ // Best-effort: analytics is non-load-bearing — never throw.
79
+ logger?.warn("identity.merge.failed", {
80
+ provider: analytics.meta.id,
81
+ reason,
82
+ error: err instanceof Error ? err.message : String(err),
83
+ });
84
+ }
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Emit the `identity.merge.residual_twin` observability log (§10.5) for each
90
+ * loser key MF-2 excluded from the safe fan-out: a loser carrying an
91
+ * `external_id` is, by the engine's own model, an already-identified PostHog
92
+ * person, and PostHog refuses to merge two identified persons on the safe path.
93
+ * These twins are the known steady-state residual (OQ-1) made visible — NOT an
94
+ * error, just the honest "one email → one person, except across two prior
95
+ * identified persons" outcome surfaced for monitoring.
96
+ */
97
+ export function logResidualTwins(opts: {
98
+ survivorKey: string;
99
+ identifiedLoserKeys: string[];
100
+ contactId?: string;
101
+ logger?: Logger;
102
+ }): void {
103
+ const { survivorKey, identifiedLoserKeys, contactId, logger } = opts;
104
+ for (const loserExternalId of identifiedLoserKeys) {
105
+ if (!loserExternalId || loserExternalId === survivorKey) continue;
106
+ logger?.info("identity.merge.residual_twin", {
107
+ survivorKey,
108
+ loserExternalId,
109
+ ...(contactId ? { contactId } : {}),
110
+ });
111
+ }
112
+ }
@@ -353,6 +353,23 @@ export async function resolveOrCreateContact(opts: {
353
353
  created: boolean;
354
354
  linked: boolean;
355
355
  merged: boolean;
356
+ /**
357
+ * SAFE-to-absorb loser keys (§5.3 MF-2): the anonymous/uuid keys the resolver
358
+ * folded INTO `resolvedKey` this call — populated only on a collide-MERGE or a
359
+ * canonical-key flip that absorbed an anon/uuid key. Callers fan these out via
360
+ * `mergeAnalyticsIdentities({ distinctId: resolvedKey, alias: <key> })`. An
361
+ * `external_id` is NEVER listed here (it carried an identified PostHog person;
362
+ * aliasing it is the merge PostHog refuses — R2/R4); it surfaces in
363
+ * {@link mergedIdentifiedKeys} instead. Empty/absent ⇒ nothing to stitch.
364
+ */
365
+ mergedKeys?: string[];
366
+ /**
367
+ * Loser keys MF-2 could NOT safely absorb — already-identified `external_id`s
368
+ * (and the superseded `external_id` on a key flip). These are the known
369
+ * steady-state twin residual (§10, OQ-1); callers log them as
370
+ * `identity.merge.residual_twin` for observability. Never aliased.
371
+ */
372
+ mergedIdentifiedKeys?: string[];
356
373
  }> {
357
374
  const { db, contactProperties } = opts;
358
375
  const userId = opts.userId?.trim() || undefined;
@@ -423,7 +440,29 @@ export async function resolveOrCreateContact(opts: {
423
440
  // --- CASE: fill-in-link (single existing row) ---
424
441
  const single = candidates[0];
425
442
  if (candidates.length === 1 && single) {
426
- const { id, resolvedKey } = await fillInLink(tx, single, {
443
+ const { id, resolvedKey, mergedKeys, mergedIdentifiedKeys } =
444
+ await fillInLink(tx, single, {
445
+ userId,
446
+ email,
447
+ anonymousId,
448
+ discordId,
449
+ patch,
450
+ hasPatch,
451
+ });
452
+ return {
453
+ id,
454
+ resolvedKey,
455
+ created: false,
456
+ linked: true,
457
+ merged: false,
458
+ mergedKeys,
459
+ mergedIdentifiedKeys,
460
+ };
461
+ }
462
+
463
+ // --- CASE: collide-MERGE (2-3 distinct rows) ---
464
+ const { id, resolvedKey, mergedKeys, mergedIdentifiedKeys } =
465
+ await mergeContacts(tx, candidates, {
427
466
  userId,
428
467
  email,
429
468
  anonymousId,
@@ -431,19 +470,15 @@ export async function resolveOrCreateContact(opts: {
431
470
  patch,
432
471
  hasPatch,
433
472
  });
434
- return { id, resolvedKey, created: false, linked: true, merged: false };
435
- }
436
-
437
- // --- CASE: collide-MERGE (2-3 distinct rows) ---
438
- const { id, resolvedKey } = await mergeContacts(tx, candidates, {
439
- userId,
440
- email,
441
- anonymousId,
442
- discordId,
443
- patch,
444
- hasPatch,
445
- });
446
- return { id, resolvedKey, created: false, linked: true, merged: true };
473
+ return {
474
+ id,
475
+ resolvedKey,
476
+ created: false,
477
+ linked: true,
478
+ merged: true,
479
+ mergedKeys,
480
+ mergedIdentifiedKeys,
481
+ };
447
482
  });
448
483
  }
449
484
 
@@ -465,7 +500,12 @@ async function fillInLink(
465
500
  tx: Tx,
466
501
  row: ContactRow,
467
502
  ctx: ResolveCtx,
468
- ): Promise<{ id: string; resolvedKey: string }> {
503
+ ): Promise<{
504
+ id: string;
505
+ resolvedKey: string;
506
+ mergedKeys?: string[];
507
+ mergedIdentifiedKeys?: string[];
508
+ }> {
469
509
  const set: Record<string, unknown> = {
470
510
  lastSeenAt: new Date(),
471
511
  updatedAt: new Date(),
@@ -513,6 +553,15 @@ async function fillInLink(
513
553
  // updated row (with its new email/keys) is what foldJourneyStates/email_sends
514
554
  // denormalize into.
515
555
  const newKey = nextExternalId ?? nextAnonymousId ?? row.id;
556
+ // §5.3 emission point 2 (canonical-key flip): when the key flips, the OLD key
557
+ // is folded into the NEW one. MF-3 gate — only emit a merge when `oldKey` was
558
+ // an anonymous/uuid key (never an `external_id` being superseded; that is the
559
+ // twin case, OQ-1). In practice a flip in fillInLink only fires when the row
560
+ // had NO external_id (attaching one never happens to an already-external row),
561
+ // so `oldKey` is structurally always anon/uuid here — the explicit gate guards
562
+ // the invariant regardless.
563
+ let mergedKeys: string[] | undefined;
564
+ let mergedIdentifiedKeys: string[] | undefined;
516
565
  if (newKey !== oldKey) {
517
566
  const updatedRow: ContactRow = {
518
567
  ...row,
@@ -521,6 +570,14 @@ async function fillInLink(
521
570
  email: (set.email as string | undefined) ?? row.email,
522
571
  };
523
572
  await repointOwnHistory(tx, oldKey, newKey, updatedRow);
573
+
574
+ const oldKeyWasExternalId =
575
+ row.externalId != null && oldKey === row.externalId;
576
+ if (oldKeyWasExternalId) {
577
+ mergedIdentifiedKeys = [oldKey];
578
+ } else {
579
+ mergedKeys = [oldKey];
580
+ }
524
581
  }
525
582
 
526
583
  for (const key of promoted) {
@@ -540,7 +597,7 @@ async function fillInLink(
540
597
 
541
598
  // `newKey` IS the post-fill canonical key (external_id ?? anonymous_id ?? id) —
542
599
  // the same value the old read-back derived.
543
- return { id: row.id, resolvedKey: newKey };
600
+ return { id: row.id, resolvedKey: newKey, mergedKeys, mergedIdentifiedKeys };
544
601
  }
545
602
 
546
603
  /**
@@ -551,10 +608,22 @@ async function mergeContacts(
551
608
  tx: Tx,
552
609
  candidates: ContactRow[],
553
610
  ctx: ResolveCtx,
554
- ): Promise<{ id: string; resolvedKey: string }> {
611
+ ): Promise<{
612
+ id: string;
613
+ resolvedKey: string;
614
+ mergedKeys?: string[];
615
+ mergedIdentifiedKeys?: string[];
616
+ }> {
555
617
  const { survivor, losers } = pickSurvivor(candidates);
556
618
  const survivorKey = contactKey(survivor);
557
619
 
620
+ // §5.3 emission point 1 (collide-MERGE) accumulators. MF-2: a loser's
621
+ // anonymous/uuid key is SAFE to absorb (it never identified a PostHog person);
622
+ // a loser's `external_id` is an already-identified person PostHog refuses to
623
+ // merge on the safe path — it is recorded as the twin residual, NEVER aliased.
624
+ const safeLoserKeys: string[] = [];
625
+ const identifiedLoserKeys: string[] = [];
626
+
558
627
  for (const loser of losers) {
559
628
  const loserStrKeys = [loser.externalId, loser.anonymousId, loser.id].filter(
560
629
  (k): k is string => Boolean(k),
@@ -563,6 +632,19 @@ async function mergeContacts(
563
632
  // anonymous id (its user_id rows were keyed on contacts.id).
564
633
  const loserKeysToRewrite = loserStrKeys;
565
634
 
635
+ // MF-2 split: the SAFE-to-absorb key is the loser's anonymous/uuid key —
636
+ // `loser.anonymousId`, or `loser.id` ONLY when the loser was never
637
+ // identified (no external_id). When the loser HAS an external_id, that
638
+ // external_id was its canonical key, so its events were captured under it
639
+ // (identified) → residual; `loser.id` never carried events in that case, so
640
+ // there is no safe key to alias from it.
641
+ if (loser.externalId) {
642
+ identifiedLoserKeys.push(loser.externalId);
643
+ if (loser.anonymousId) safeLoserKeys.push(loser.anonymousId);
644
+ } else {
645
+ safeLoserKeys.push(loser.anonymousId ?? loser.id);
646
+ }
647
+
566
648
  // (ii) user_events.user_id rewrite.
567
649
  await tx
568
650
  .update(userEvents)
@@ -703,8 +785,16 @@ async function mergeContacts(
703
785
  }
704
786
 
705
787
  // `newSurvivorKey` IS the post-merge canonical key of the survivor — the same
706
- // value the old read-back derived for the merged row.
707
- return { id: survivor.id, resolvedKey: newSurvivorKey };
788
+ // value the old read-back derived for the merged row. The merge folds every
789
+ // loser key into it, so callers fan out `mergeAnalyticsIdentities` aliasing
790
+ // each SAFE loser key into `newSurvivorKey` (§5.3 emission point 1).
791
+ return {
792
+ id: survivor.id,
793
+ resolvedKey: newSurvivorKey,
794
+ mergedKeys: safeLoserKeys.length > 0 ? safeLoserKeys : undefined,
795
+ mergedIdentifiedKeys:
796
+ identifiedLoserKeys.length > 0 ? identifiedLoserKeys : undefined,
797
+ };
708
798
  }
709
799
 
710
800
  /**
@@ -1121,6 +1211,10 @@ export async function upsertContact(opts: {
1121
1211
  created: boolean;
1122
1212
  linked: boolean;
1123
1213
  merged: boolean;
1214
+ /** §5.3 MF-2: safe-to-absorb loser keys folded this call (anon/uuid). */
1215
+ mergedKeys?: string[];
1216
+ /** §5.3 MF-2: already-identified loser keys (twin residual); never aliased. */
1217
+ mergedIdentifiedKeys?: string[];
1124
1218
  }> {
1125
1219
  return resolveOrCreateContact({
1126
1220
  db: opts.db,
@@ -0,0 +1,107 @@
1
+ import type { AnalyticsProvider } from "@hogsend/core";
2
+ import type { Database } from "@hogsend/db";
3
+ import {
4
+ logResidualTwins,
5
+ mergeAnalyticsIdentities,
6
+ } from "./analytics-identity.js";
7
+ import { resolveOrCreateContact } from "./contacts.js";
8
+ import type { Logger } from "./logger.js";
9
+
10
+ /**
11
+ * Args for {@link IdentityService.linkContact} — the same identity-attach inputs
12
+ * `resolveOrCreateContact` accepts (at least one of `userId`/`email`/
13
+ * `anonymousId`/`discordId` is required by the resolver), minus the `db` (the
14
+ * service closes over the container's db).
15
+ */
16
+ export interface LinkContactArgs {
17
+ userId?: string;
18
+ email?: string;
19
+ anonymousId?: string;
20
+ discordId?: string;
21
+ contactProperties?: Record<string, unknown>;
22
+ }
23
+
24
+ /**
25
+ * The container-held identity helper (`client.identity`). It exists so any
26
+ * identity-attach OUTSIDE the `/v1/events` ingest path — most notably Discord
27
+ * `/link` (§7), but also any consumer wiring — folds two keys into one analytics
28
+ * person through the SAME engine emission used by `ingestEvent` (§5.3), rather
29
+ * than each consumer hand-rolling its own `resolveOrCreateContact` +
30
+ * `mergeIdentities` plumbing (the bespoke path the spec calls out as the bug).
31
+ */
32
+ export interface IdentityService {
33
+ /**
34
+ * Resolve / merge a contact AND propagate the analytics merge in one call.
35
+ *
36
+ * Wraps `resolveOrCreateContact` (the resolver stays analytics-free — it takes
37
+ * only `db`) then, on a collide-MERGE or canonical-key flip that absorbed an
38
+ * anonymous/uuid key, fans out the provider-neutral `mergeIdentities` primitive
39
+ * via {@link mergeAnalyticsIdentities} with `reason: "discord_link"`. MF-2:
40
+ * `mergedKeys` already excludes identified `external_id`s (the resolver split
41
+ * them out) — only the safe anon/uuid keys are aliased; the excluded
42
+ * identified twins surface as `identity.merge.residual_twin` for observability.
43
+ *
44
+ * The SURVIVOR RULE makes `resolvedKey` the survivor (`distinctId`) and each
45
+ * loser its absorbed `alias` — e.g. on a Discord `/link` that merges the
46
+ * discord-keyed contact into the email contact, `distinctId = resolvedKey`
47
+ * (survivor, email/external) and `alias = <discord-contact uuid>` (the
48
+ * loser's anon/uuid key the Discord-platform events were captured under).
49
+ *
50
+ * Best-effort and analytics-non-load-bearing: the merge emission never throws
51
+ * (the helper swallows provider errors), so a missing/incapable provider
52
+ * no-ops cleanly — the contact resolve still happened and is returned.
53
+ */
54
+ linkContact(args: LinkContactArgs): ReturnType<typeof resolveOrCreateContact>;
55
+ }
56
+
57
+ /**
58
+ * Build the {@link IdentityService} bound to a container's db + active analytics
59
+ * provider. `analytics` is undefined when nothing is configured (the merge
60
+ * emission no-ops); the resolver itself is unaffected.
61
+ */
62
+ export function createIdentityService(deps: {
63
+ db: Database;
64
+ analytics?: AnalyticsProvider;
65
+ logger?: Logger;
66
+ }): IdentityService {
67
+ const { db, analytics, logger } = deps;
68
+
69
+ return {
70
+ async linkContact(args) {
71
+ const result = await resolveOrCreateContact({ db, ...args });
72
+
73
+ const {
74
+ id: contactId,
75
+ resolvedKey,
76
+ mergedKeys,
77
+ mergedIdentifiedKeys,
78
+ } = result;
79
+
80
+ // §5.3 emission point 1, reused (§7): fire the analytics merge ONLY when
81
+ // the resolver actually folded keys this call. MF-2: `mergedKeys` carries
82
+ // the safe anon/uuid losers (the discord-contact uuid on a `/link` merge);
83
+ // identified `external_id`s are excluded by the resolver and surfaced as
84
+ // residual twins below — never aliased (the merge PostHog refuses, R2/R4).
85
+ if (mergedKeys?.length) {
86
+ mergeAnalyticsIdentities({
87
+ analytics,
88
+ survivorKey: resolvedKey,
89
+ loserKeys: mergedKeys,
90
+ reason: "discord_link",
91
+ contactId,
92
+ logger,
93
+ });
94
+ }
95
+ if (mergedIdentifiedKeys?.length) {
96
+ logResidualTwins({
97
+ survivorKey: resolvedKey,
98
+ identifiedLoserKeys: mergedIdentifiedKeys,
99
+ contactId,
100
+ logger,
101
+ });
102
+ }
103
+
104
+ return result;
105
+ },
106
+ };
107
+ }
@@ -8,8 +8,12 @@ import {
8
8
  /**
9
9
  * Short-lived identity token appended to tracked-link redirects as `hs_t`
10
10
  * (opt-in via TRACKING_IDENTITY_TOKEN). The landing site exchanges it at
11
- * `POST /v1/t/identify` for the distinct id and calls `posthog.identify`
12
- * stitching the email click to the web session.
11
+ * `POST /v1/t/identify`, where the engine fires a SERVER-SIDE `alias` folding
12
+ * the caller's own anon session into the token's canonical id — stitching the
13
+ * click to the web session. Minted for EMAIL links by default; non-email
14
+ * (Discord/referral) links carry a token only when explicitly stitch-bearing
15
+ * (`tracked_links.distinct_id` set) — referral links are token-less by default
16
+ * (MF-4 anti-hijack).
13
17
  *
14
18
  * ENCRYPTED (AES-256-GCM keyed off BETTER_AUTH_SECRET), not merely signed:
15
19
  * the distinct id can fall back to an email address, and a signed-but-
@@ -18,11 +22,38 @@ import {
18
22
  * auth tag also covers integrity, so tampering fails decryption.
19
23
  */
20
24
 
25
+ /**
26
+ * The only merge mode a token may authorize: fold the CALLER's own anonymous
27
+ * session INTO the token's canonical `distinctId`. There is deliberately no
28
+ * "become the subject" / overwrite mode — that is the anti-hijack invariant.
29
+ */
30
+ export type IdentityTokenScope = "anon-absorb";
31
+
21
32
  export interface IdentityTokenPayload {
22
- /** The distinct id the landing site should identify as. */
33
+ /**
34
+ * The canonical contact key the landing site should fold INTO — the ONLY
35
+ * ever-identified id. NEVER a per-link or anonymous id.
36
+ */
23
37
  distinctId: string;
24
- emailSendId: string;
38
+ /**
39
+ * Where the token was minted: `"email:<sendId>"` | `"link:<linkId>"`.
40
+ * Referral links are excluded by default (they carry no identity token).
41
+ */
42
+ src: string;
43
+ /**
44
+ * The authorized merge mode. Only `"anon-absorb"` is ever minted. OPTIONAL on
45
+ * the wire for the rolling-deploy window (MF-7): a token minted by the still-old
46
+ * click route carries no `scope`, so `validateIdentityToken` treats a MISSING
47
+ * scope as `"anon-absorb"` (allow) and rejects only a PRESENT-and-wrong value.
48
+ */
49
+ scope?: IdentityTokenScope;
25
50
  exp: number;
51
+ /**
52
+ * @deprecated Alias of `src` for ONE minor (mirrors the `resendId` → `messageId`
53
+ * deprecation window). Old tokens carry only `emailSendId`; new email tokens
54
+ * carry both. Reads should prefer `src`.
55
+ */
56
+ emailSendId?: string;
26
57
  }
27
58
 
28
59
  export class InvalidIdentityTokenError extends Error {
@@ -43,11 +74,27 @@ function deriveKey(secret: string): Buffer {
43
74
  export function generateIdentityToken(opts: {
44
75
  secret: string;
45
76
  distinctId: string;
46
- emailSendId: string;
77
+ /**
78
+ * Mint provenance: `"email:<sendId>"` | `"link:<linkId>"`. When omitted, falls
79
+ * back to `email:<emailSendId>` for the legacy email-link caller.
80
+ */
81
+ src?: string;
82
+ /** Defaults to `"anon-absorb"` — the only mode a token may authorize. */
83
+ scope?: IdentityTokenScope;
84
+ /**
85
+ * @deprecated Pass `src` instead. Kept for the one-minor deprecation window so
86
+ * existing email-link callers compile unchanged; mirrored into the payload's
87
+ * deprecated `emailSendId` field and used to synthesize `src` when `src` is
88
+ * absent.
89
+ */
90
+ emailSendId?: string;
47
91
  expiresInSeconds?: number;
48
92
  }): string {
93
+ const src = opts.src ?? (opts.emailSendId ? `email:${opts.emailSendId}` : "");
49
94
  const payload: IdentityTokenPayload = {
50
95
  distinctId: opts.distinctId,
96
+ src,
97
+ scope: opts.scope ?? "anon-absorb",
51
98
  emailSendId: opts.emailSendId,
52
99
  exp:
53
100
  Math.floor(Date.now() / 1000) +
@@ -108,5 +155,18 @@ export function validateIdentityToken(opts: {
108
155
  if (payload.exp < Math.floor(Date.now() / 1000)) {
109
156
  throw new InvalidIdentityTokenError("Token expired");
110
157
  }
158
+ // MF-7 — missing-scope-ALLOW. The API and worker deploy independently from
159
+ // the same image, so a token minted by the still-old click route carries no
160
+ // `scope`. Treat a MISSING scope as the only legal mode (`"anon-absorb"`);
161
+ // reject ONLY a present-and-wrong value. Old tokens (no `scope`, no `src`)
162
+ // still validate — this check never widened the required-shape gate above.
163
+ if (payload.scope !== undefined && payload.scope !== "anon-absorb") {
164
+ throw new InvalidIdentityTokenError("Unsupported token scope");
165
+ }
166
+ // Backfill `src` from the deprecated `emailSendId` for old tokens, so the one
167
+ // response schema (`{ distinctId, src, emailSendId? }`) is always populated.
168
+ if (typeof payload.src !== "string" || payload.src.length === 0) {
169
+ payload.src = payload.emailSendId ? `email:${payload.emailSendId}` : "";
170
+ }
111
171
  return payload;
112
172
  }