@hogsend/engine 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/container.ts +21 -0
- package/src/index.ts +13 -0
- package/src/lib/analytics-identity.ts +112 -0
- package/src/lib/contacts.ts +113 -19
- package/src/lib/identity-service.ts +107 -0
- package/src/lib/identity-token.ts +65 -5
- package/src/lib/ingestion.ts +52 -2
- package/src/lib/outbound.ts +17 -0
- package/src/lib/semantic-click.ts +15 -6
- package/src/lib/tracking-events.ts +5 -1
- package/src/lib/tracking.ts +37 -0
- package/src/lib/webhook-signing.ts +7 -1
- package/src/routes/contacts/index.ts +7 -0
- package/src/routes/events/index.ts +16 -1
- package/src/routes/tracking/answer.ts +11 -4
- package/src/routes/tracking/click.ts +130 -71
- package/src/routes/tracking/identify.ts +62 -15
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hogsend/engine",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.23.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -40,14 +40,14 @@
|
|
|
40
40
|
"svix": "^1.95.1",
|
|
41
41
|
"winston": "^3.19.0",
|
|
42
42
|
"zod": "^4.4.3",
|
|
43
|
-
"@hogsend/core": "^0.
|
|
44
|
-
"@hogsend/db": "^0.
|
|
45
|
-
"@hogsend/email": "^0.
|
|
46
|
-
"@hogsend/plugin-posthog": "^0.
|
|
47
|
-
"@hogsend/plugin-resend": "^0.
|
|
43
|
+
"@hogsend/core": "^0.23.0",
|
|
44
|
+
"@hogsend/db": "^0.23.0",
|
|
45
|
+
"@hogsend/email": "^0.23.0",
|
|
46
|
+
"@hogsend/plugin-posthog": "^0.23.0",
|
|
47
|
+
"@hogsend/plugin-resend": "^0.23.0"
|
|
48
48
|
},
|
|
49
49
|
"optionalDependencies": {
|
|
50
|
-
"@hogsend/plugin-postmark": "^0.
|
|
50
|
+
"@hogsend/plugin-postmark": "^0.23.0"
|
|
51
51
|
},
|
|
52
52
|
"devDependencies": {
|
|
53
53
|
"@types/node": "^22.15.3",
|
package/src/container.ts
CHANGED
|
@@ -56,6 +56,10 @@ import type {
|
|
|
56
56
|
FrequencyCapConfig,
|
|
57
57
|
} from "./lib/email-service-types.js";
|
|
58
58
|
import { hatchet } from "./lib/hatchet.js";
|
|
59
|
+
import {
|
|
60
|
+
createIdentityService,
|
|
61
|
+
type IdentityService,
|
|
62
|
+
} from "./lib/identity-service.js";
|
|
59
63
|
import { createLogger, type Logger } from "./lib/logger.js";
|
|
60
64
|
import { createTrackedMailer } from "./lib/mailer.js";
|
|
61
65
|
import { createRedisSecondaryStorage, getRedis } from "./lib/redis.js";
|
|
@@ -124,6 +128,15 @@ export interface HogsendClient {
|
|
|
124
128
|
* treats that as a silent no-op.
|
|
125
129
|
*/
|
|
126
130
|
analytics?: AnalyticsProvider;
|
|
131
|
+
/**
|
|
132
|
+
* Identity-attach helper that resolves/merges a contact AND propagates the
|
|
133
|
+
* analytics merge (§5.3) in one call, for identity-attach OUTSIDE the
|
|
134
|
+
* `/v1/events` ingest path. Discord `/link` (§7) wires its `resolveContact`
|
|
135
|
+
* callback to `client.identity.linkContact` so a successful contact-merge
|
|
136
|
+
* folds the discord-keyed person into the canonical one through the SAME
|
|
137
|
+
* engine emission ingest uses — never bespoke per-consumer plumbing.
|
|
138
|
+
*/
|
|
139
|
+
identity: IdentityService;
|
|
127
140
|
registry: JourneyRegistry;
|
|
128
141
|
/**
|
|
129
142
|
* The bucket registry (id map + event/property inverted indexes for candidate
|
|
@@ -614,6 +627,13 @@ export function createHogsendClient(
|
|
|
614
627
|
// undefined (no provider configured) — the reads stay no-ops.
|
|
615
628
|
setAnalytics(analytics);
|
|
616
629
|
|
|
630
|
+
// Identity-attach helper (§7): bound to THIS container's db + resolved
|
|
631
|
+
// analytics provider so a contact-merge outside the `/v1/events` ingest path
|
|
632
|
+
// (Discord `/link`) propagates the analytics merge through the same engine
|
|
633
|
+
// emission ingest uses. Closes over `analytics` (may be undefined → the merge
|
|
634
|
+
// emission no-ops; the resolve still happens).
|
|
635
|
+
const identity = createIdentityService({ db, analytics, logger });
|
|
636
|
+
|
|
617
637
|
// Build + install the outbound DESTINATION registry (Phase 3) the
|
|
618
638
|
// self-booting delivery task resolves by `webhook_endpoints.kind`. Order is
|
|
619
639
|
// load-bearing: the env-enabled presets come FIRST and the consumer's
|
|
@@ -700,6 +720,7 @@ export function createHogsendClient(
|
|
|
700
720
|
templates,
|
|
701
721
|
analyticsProviders,
|
|
702
722
|
analytics,
|
|
723
|
+
identity,
|
|
703
724
|
registry,
|
|
704
725
|
bucketRegistry,
|
|
705
726
|
listRegistry,
|
package/src/index.ts
CHANGED
|
@@ -165,6 +165,11 @@ export {
|
|
|
165
165
|
setJourneyRegistry,
|
|
166
166
|
} from "./journeys/registry-singleton.js";
|
|
167
167
|
// --- Analytics provider registry (the analytics sibling) ---
|
|
168
|
+
export {
|
|
169
|
+
type IdentityMergeReason,
|
|
170
|
+
logResidualTwins,
|
|
171
|
+
mergeAnalyticsIdentities,
|
|
172
|
+
} from "./lib/analytics-identity.js";
|
|
168
173
|
export { AnalyticsProviderRegistry } from "./lib/analytics-provider-registry.js";
|
|
169
174
|
export { analyticsProvidersFromEnv } from "./lib/analytics-providers-from-env.js";
|
|
170
175
|
// --- Auth ---
|
|
@@ -265,9 +270,16 @@ export { checkEmailPreferences } from "./lib/enrollment-guards.js";
|
|
|
265
270
|
export { isFrequencyCapped } from "./lib/frequency-cap.js";
|
|
266
271
|
export { addrSpecOf, hostOfFromAddress } from "./lib/from-address.js";
|
|
267
272
|
export { hatchet } from "./lib/hatchet.js";
|
|
273
|
+
// --- Identity service (resolve/merge + analytics merge propagation, §7) ---
|
|
274
|
+
export {
|
|
275
|
+
createIdentityService,
|
|
276
|
+
type IdentityService,
|
|
277
|
+
type LinkContactArgs,
|
|
278
|
+
} from "./lib/identity-service.js";
|
|
268
279
|
export {
|
|
269
280
|
generateIdentityToken,
|
|
270
281
|
type IdentityTokenPayload,
|
|
282
|
+
type IdentityTokenScope,
|
|
271
283
|
InvalidIdentityTokenError,
|
|
272
284
|
validateIdentityToken,
|
|
273
285
|
} from "./lib/identity-token.js";
|
|
@@ -350,6 +362,7 @@ export {
|
|
|
350
362
|
} from "./lib/tracked.js";
|
|
351
363
|
// --- Tracking ---
|
|
352
364
|
export {
|
|
365
|
+
createTrackedLink,
|
|
353
366
|
injectOpenPixel,
|
|
354
367
|
prepareTrackedHtml,
|
|
355
368
|
rewriteLinks,
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { AnalyticsProvider } from "@hogsend/core";
|
|
2
|
+
import type { Logger } from "./logger.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* The reason a merge was emitted — surfaced on the `identity.merge.emitted`
|
|
6
|
+
* structured log so an operator can see WHICH resolver path stitched (§10.5). A
|
|
7
|
+
* declining `collide_merge` / `key_flip` volume after anon threading lands
|
|
8
|
+
* (Stage 1) is the empirical "forks prevented" signal.
|
|
9
|
+
*/
|
|
10
|
+
export type IdentityMergeReason =
|
|
11
|
+
| "collide_merge"
|
|
12
|
+
| "key_flip"
|
|
13
|
+
| "click_identify"
|
|
14
|
+
| "discord_link";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Fan out the provider-neutral `mergeIdentities` primitive (§5.3) once per
|
|
18
|
+
* loser key, folding each absorbed (anonymous/uuid) key INTO the surviving
|
|
19
|
+
* canonical contact key. Fire-and-forget and never throws: analytics is
|
|
20
|
+
* non-load-bearing, so a provider error must not fail the ingest that triggered
|
|
21
|
+
* the merge.
|
|
22
|
+
*
|
|
23
|
+
* Direction is load-bearing (MF-1): `survivorKey` is the SURVIVING/canonical
|
|
24
|
+
* (identified) id and each `loserKey` is the ABSORBED (anonymous) one — mapped
|
|
25
|
+
* straight to `mergeIdentities({ distinctId: survivorKey, alias: loserKey })`.
|
|
26
|
+
*
|
|
27
|
+
* No-ops cleanly when:
|
|
28
|
+
* - no provider is injected (`!analytics`),
|
|
29
|
+
* - the active provider can't merge (`!capabilities.identityMerge` — a legacy
|
|
30
|
+
* adapter or a provider without an `alias` wire),
|
|
31
|
+
* - or it carries no `mergeIdentities` method.
|
|
32
|
+
*
|
|
33
|
+
* MF-2: callers MUST pass only the SAFE-to-absorb loser keys (anonymous/uuid,
|
|
34
|
+
* never an `external_id` that already identified a PostHog person). Aliasing an
|
|
35
|
+
* already-identified key is the identified→identified merge PostHog refuses
|
|
36
|
+
* (R2/R4) — it silently no-ops AND spams "Refused to merge" warnings on the
|
|
37
|
+
* normal merge path. The filtering happens at the emission point (the resolver
|
|
38
|
+
* splits its loser keys into safe vs. identified); this helper only fans out
|
|
39
|
+
* what it is given and skips a self-alias (`loserKey === survivorKey`).
|
|
40
|
+
*/
|
|
41
|
+
export function mergeAnalyticsIdentities(opts: {
|
|
42
|
+
analytics?: AnalyticsProvider;
|
|
43
|
+
survivorKey: string;
|
|
44
|
+
loserKeys: string[];
|
|
45
|
+
/** Stitching path, for the `identity.merge.emitted` observability log. */
|
|
46
|
+
reason: IdentityMergeReason;
|
|
47
|
+
/** The contact id, for correlating the merge log to a contact row. */
|
|
48
|
+
contactId?: string;
|
|
49
|
+
logger?: Logger;
|
|
50
|
+
}): void {
|
|
51
|
+
const { analytics, survivorKey, loserKeys, reason, contactId, logger } = opts;
|
|
52
|
+
|
|
53
|
+
if (!analytics?.capabilities.identityMerge) {
|
|
54
|
+
if (loserKeys.length > 0) {
|
|
55
|
+
logger?.debug("identity.merge.skipped", {
|
|
56
|
+
reason: analytics ? "no_capability" : "no_provider",
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
if (!analytics.mergeIdentities) return;
|
|
62
|
+
|
|
63
|
+
for (const loserKey of loserKeys) {
|
|
64
|
+
if (!loserKey || loserKey === survivorKey) {
|
|
65
|
+
logger?.debug("identity.merge.skipped", { reason: "self_alias" });
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
try {
|
|
69
|
+
analytics.mergeIdentities({ distinctId: survivorKey, alias: loserKey });
|
|
70
|
+
logger?.info("identity.merge.emitted", {
|
|
71
|
+
provider: analytics.meta.id,
|
|
72
|
+
survivorKey,
|
|
73
|
+
alias: loserKey,
|
|
74
|
+
reason,
|
|
75
|
+
...(contactId ? { contactId } : {}),
|
|
76
|
+
});
|
|
77
|
+
} catch (err) {
|
|
78
|
+
// Best-effort: analytics is non-load-bearing — never throw.
|
|
79
|
+
logger?.warn("identity.merge.failed", {
|
|
80
|
+
provider: analytics.meta.id,
|
|
81
|
+
reason,
|
|
82
|
+
error: err instanceof Error ? err.message : String(err),
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Emit the `identity.merge.residual_twin` observability log (§10.5) for each
|
|
90
|
+
* loser key MF-2 excluded from the safe fan-out: a loser carrying an
|
|
91
|
+
* `external_id` is, by the engine's own model, an already-identified PostHog
|
|
92
|
+
* person, and PostHog refuses to merge two identified persons on the safe path.
|
|
93
|
+
* These twins are the known steady-state residual (OQ-1) made visible — NOT an
|
|
94
|
+
* error, just the honest "one email → one person, except across two prior
|
|
95
|
+
* identified persons" outcome surfaced for monitoring.
|
|
96
|
+
*/
|
|
97
|
+
export function logResidualTwins(opts: {
|
|
98
|
+
survivorKey: string;
|
|
99
|
+
identifiedLoserKeys: string[];
|
|
100
|
+
contactId?: string;
|
|
101
|
+
logger?: Logger;
|
|
102
|
+
}): void {
|
|
103
|
+
const { survivorKey, identifiedLoserKeys, contactId, logger } = opts;
|
|
104
|
+
for (const loserExternalId of identifiedLoserKeys) {
|
|
105
|
+
if (!loserExternalId || loserExternalId === survivorKey) continue;
|
|
106
|
+
logger?.info("identity.merge.residual_twin", {
|
|
107
|
+
survivorKey,
|
|
108
|
+
loserExternalId,
|
|
109
|
+
...(contactId ? { contactId } : {}),
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
package/src/lib/contacts.ts
CHANGED
|
@@ -353,6 +353,23 @@ export async function resolveOrCreateContact(opts: {
|
|
|
353
353
|
created: boolean;
|
|
354
354
|
linked: boolean;
|
|
355
355
|
merged: boolean;
|
|
356
|
+
/**
|
|
357
|
+
* SAFE-to-absorb loser keys (§5.3 MF-2): the anonymous/uuid keys the resolver
|
|
358
|
+
* folded INTO `resolvedKey` this call — populated only on a collide-MERGE or a
|
|
359
|
+
* canonical-key flip that absorbed an anon/uuid key. Callers fan these out via
|
|
360
|
+
* `mergeAnalyticsIdentities({ distinctId: resolvedKey, alias: <key> })`. An
|
|
361
|
+
* `external_id` is NEVER listed here (it carried an identified PostHog person;
|
|
362
|
+
* aliasing it is the merge PostHog refuses — R2/R4); it surfaces in
|
|
363
|
+
* {@link mergedIdentifiedKeys} instead. Empty/absent ⇒ nothing to stitch.
|
|
364
|
+
*/
|
|
365
|
+
mergedKeys?: string[];
|
|
366
|
+
/**
|
|
367
|
+
* Loser keys MF-2 could NOT safely absorb — already-identified `external_id`s
|
|
368
|
+
* (and the superseded `external_id` on a key flip). These are the known
|
|
369
|
+
* steady-state twin residual (§10, OQ-1); callers log them as
|
|
370
|
+
* `identity.merge.residual_twin` for observability. Never aliased.
|
|
371
|
+
*/
|
|
372
|
+
mergedIdentifiedKeys?: string[];
|
|
356
373
|
}> {
|
|
357
374
|
const { db, contactProperties } = opts;
|
|
358
375
|
const userId = opts.userId?.trim() || undefined;
|
|
@@ -423,7 +440,29 @@ export async function resolveOrCreateContact(opts: {
|
|
|
423
440
|
// --- CASE: fill-in-link (single existing row) ---
|
|
424
441
|
const single = candidates[0];
|
|
425
442
|
if (candidates.length === 1 && single) {
|
|
426
|
-
const { id, resolvedKey } =
|
|
443
|
+
const { id, resolvedKey, mergedKeys, mergedIdentifiedKeys } =
|
|
444
|
+
await fillInLink(tx, single, {
|
|
445
|
+
userId,
|
|
446
|
+
email,
|
|
447
|
+
anonymousId,
|
|
448
|
+
discordId,
|
|
449
|
+
patch,
|
|
450
|
+
hasPatch,
|
|
451
|
+
});
|
|
452
|
+
return {
|
|
453
|
+
id,
|
|
454
|
+
resolvedKey,
|
|
455
|
+
created: false,
|
|
456
|
+
linked: true,
|
|
457
|
+
merged: false,
|
|
458
|
+
mergedKeys,
|
|
459
|
+
mergedIdentifiedKeys,
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// --- CASE: collide-MERGE (2-3 distinct rows) ---
|
|
464
|
+
const { id, resolvedKey, mergedKeys, mergedIdentifiedKeys } =
|
|
465
|
+
await mergeContacts(tx, candidates, {
|
|
427
466
|
userId,
|
|
428
467
|
email,
|
|
429
468
|
anonymousId,
|
|
@@ -431,19 +470,15 @@ export async function resolveOrCreateContact(opts: {
|
|
|
431
470
|
patch,
|
|
432
471
|
hasPatch,
|
|
433
472
|
});
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
patch,
|
|
444
|
-
hasPatch,
|
|
445
|
-
});
|
|
446
|
-
return { id, resolvedKey, created: false, linked: true, merged: true };
|
|
473
|
+
return {
|
|
474
|
+
id,
|
|
475
|
+
resolvedKey,
|
|
476
|
+
created: false,
|
|
477
|
+
linked: true,
|
|
478
|
+
merged: true,
|
|
479
|
+
mergedKeys,
|
|
480
|
+
mergedIdentifiedKeys,
|
|
481
|
+
};
|
|
447
482
|
});
|
|
448
483
|
}
|
|
449
484
|
|
|
@@ -465,7 +500,12 @@ async function fillInLink(
|
|
|
465
500
|
tx: Tx,
|
|
466
501
|
row: ContactRow,
|
|
467
502
|
ctx: ResolveCtx,
|
|
468
|
-
): Promise<{
|
|
503
|
+
): Promise<{
|
|
504
|
+
id: string;
|
|
505
|
+
resolvedKey: string;
|
|
506
|
+
mergedKeys?: string[];
|
|
507
|
+
mergedIdentifiedKeys?: string[];
|
|
508
|
+
}> {
|
|
469
509
|
const set: Record<string, unknown> = {
|
|
470
510
|
lastSeenAt: new Date(),
|
|
471
511
|
updatedAt: new Date(),
|
|
@@ -513,6 +553,15 @@ async function fillInLink(
|
|
|
513
553
|
// updated row (with its new email/keys) is what foldJourneyStates/email_sends
|
|
514
554
|
// denormalize into.
|
|
515
555
|
const newKey = nextExternalId ?? nextAnonymousId ?? row.id;
|
|
556
|
+
// §5.3 emission point 2 (canonical-key flip): when the key flips, the OLD key
|
|
557
|
+
// is folded into the NEW one. MF-3 gate — only emit a merge when `oldKey` was
|
|
558
|
+
// an anonymous/uuid key (never an `external_id` being superseded; that is the
|
|
559
|
+
// twin case, OQ-1). In practice a flip in fillInLink only fires when the row
|
|
560
|
+
// had NO external_id (attaching one never happens to an already-external row),
|
|
561
|
+
// so `oldKey` is structurally always anon/uuid here — the explicit gate guards
|
|
562
|
+
// the invariant regardless.
|
|
563
|
+
let mergedKeys: string[] | undefined;
|
|
564
|
+
let mergedIdentifiedKeys: string[] | undefined;
|
|
516
565
|
if (newKey !== oldKey) {
|
|
517
566
|
const updatedRow: ContactRow = {
|
|
518
567
|
...row,
|
|
@@ -521,6 +570,14 @@ async function fillInLink(
|
|
|
521
570
|
email: (set.email as string | undefined) ?? row.email,
|
|
522
571
|
};
|
|
523
572
|
await repointOwnHistory(tx, oldKey, newKey, updatedRow);
|
|
573
|
+
|
|
574
|
+
const oldKeyWasExternalId =
|
|
575
|
+
row.externalId != null && oldKey === row.externalId;
|
|
576
|
+
if (oldKeyWasExternalId) {
|
|
577
|
+
mergedIdentifiedKeys = [oldKey];
|
|
578
|
+
} else {
|
|
579
|
+
mergedKeys = [oldKey];
|
|
580
|
+
}
|
|
524
581
|
}
|
|
525
582
|
|
|
526
583
|
for (const key of promoted) {
|
|
@@ -540,7 +597,7 @@ async function fillInLink(
|
|
|
540
597
|
|
|
541
598
|
// `newKey` IS the post-fill canonical key (external_id ?? anonymous_id ?? id) —
|
|
542
599
|
// the same value the old read-back derived.
|
|
543
|
-
return { id: row.id, resolvedKey: newKey };
|
|
600
|
+
return { id: row.id, resolvedKey: newKey, mergedKeys, mergedIdentifiedKeys };
|
|
544
601
|
}
|
|
545
602
|
|
|
546
603
|
/**
|
|
@@ -551,10 +608,22 @@ async function mergeContacts(
|
|
|
551
608
|
tx: Tx,
|
|
552
609
|
candidates: ContactRow[],
|
|
553
610
|
ctx: ResolveCtx,
|
|
554
|
-
): Promise<{
|
|
611
|
+
): Promise<{
|
|
612
|
+
id: string;
|
|
613
|
+
resolvedKey: string;
|
|
614
|
+
mergedKeys?: string[];
|
|
615
|
+
mergedIdentifiedKeys?: string[];
|
|
616
|
+
}> {
|
|
555
617
|
const { survivor, losers } = pickSurvivor(candidates);
|
|
556
618
|
const survivorKey = contactKey(survivor);
|
|
557
619
|
|
|
620
|
+
// §5.3 emission point 1 (collide-MERGE) accumulators. MF-2: a loser's
|
|
621
|
+
// anonymous/uuid key is SAFE to absorb (it never identified a PostHog person);
|
|
622
|
+
// a loser's `external_id` is an already-identified person PostHog refuses to
|
|
623
|
+
// merge on the safe path — it is recorded as the twin residual, NEVER aliased.
|
|
624
|
+
const safeLoserKeys: string[] = [];
|
|
625
|
+
const identifiedLoserKeys: string[] = [];
|
|
626
|
+
|
|
558
627
|
for (const loser of losers) {
|
|
559
628
|
const loserStrKeys = [loser.externalId, loser.anonymousId, loser.id].filter(
|
|
560
629
|
(k): k is string => Boolean(k),
|
|
@@ -563,6 +632,19 @@ async function mergeContacts(
|
|
|
563
632
|
// anonymous id (its user_id rows were keyed on contacts.id).
|
|
564
633
|
const loserKeysToRewrite = loserStrKeys;
|
|
565
634
|
|
|
635
|
+
// MF-2 split: the SAFE-to-absorb key is the loser's anonymous/uuid key —
|
|
636
|
+
// `loser.anonymousId`, or `loser.id` ONLY when the loser was never
|
|
637
|
+
// identified (no external_id). When the loser HAS an external_id, that
|
|
638
|
+
// external_id was its canonical key, so its events were captured under it
|
|
639
|
+
// (identified) → residual; `loser.id` never carried events in that case, so
|
|
640
|
+
// there is no safe key to alias from it.
|
|
641
|
+
if (loser.externalId) {
|
|
642
|
+
identifiedLoserKeys.push(loser.externalId);
|
|
643
|
+
if (loser.anonymousId) safeLoserKeys.push(loser.anonymousId);
|
|
644
|
+
} else {
|
|
645
|
+
safeLoserKeys.push(loser.anonymousId ?? loser.id);
|
|
646
|
+
}
|
|
647
|
+
|
|
566
648
|
// (ii) user_events.user_id rewrite.
|
|
567
649
|
await tx
|
|
568
650
|
.update(userEvents)
|
|
@@ -703,8 +785,16 @@ async function mergeContacts(
|
|
|
703
785
|
}
|
|
704
786
|
|
|
705
787
|
// `newSurvivorKey` IS the post-merge canonical key of the survivor — the same
|
|
706
|
-
// value the old read-back derived for the merged row.
|
|
707
|
-
|
|
788
|
+
// value the old read-back derived for the merged row. The merge folds every
|
|
789
|
+
// loser key into it, so callers fan out `mergeAnalyticsIdentities` aliasing
|
|
790
|
+
// each SAFE loser key into `newSurvivorKey` (§5.3 emission point 1).
|
|
791
|
+
return {
|
|
792
|
+
id: survivor.id,
|
|
793
|
+
resolvedKey: newSurvivorKey,
|
|
794
|
+
mergedKeys: safeLoserKeys.length > 0 ? safeLoserKeys : undefined,
|
|
795
|
+
mergedIdentifiedKeys:
|
|
796
|
+
identifiedLoserKeys.length > 0 ? identifiedLoserKeys : undefined,
|
|
797
|
+
};
|
|
708
798
|
}
|
|
709
799
|
|
|
710
800
|
/**
|
|
@@ -1121,6 +1211,10 @@ export async function upsertContact(opts: {
|
|
|
1121
1211
|
created: boolean;
|
|
1122
1212
|
linked: boolean;
|
|
1123
1213
|
merged: boolean;
|
|
1214
|
+
/** §5.3 MF-2: safe-to-absorb loser keys folded this call (anon/uuid). */
|
|
1215
|
+
mergedKeys?: string[];
|
|
1216
|
+
/** §5.3 MF-2: already-identified loser keys (twin residual); never aliased. */
|
|
1217
|
+
mergedIdentifiedKeys?: string[];
|
|
1124
1218
|
}> {
|
|
1125
1219
|
return resolveOrCreateContact({
|
|
1126
1220
|
db: opts.db,
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import type { AnalyticsProvider } from "@hogsend/core";
|
|
2
|
+
import type { Database } from "@hogsend/db";
|
|
3
|
+
import {
|
|
4
|
+
logResidualTwins,
|
|
5
|
+
mergeAnalyticsIdentities,
|
|
6
|
+
} from "./analytics-identity.js";
|
|
7
|
+
import { resolveOrCreateContact } from "./contacts.js";
|
|
8
|
+
import type { Logger } from "./logger.js";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Args for {@link IdentityService.linkContact} — the same identity-attach inputs
|
|
12
|
+
* `resolveOrCreateContact` accepts (at least one of `userId`/`email`/
|
|
13
|
+
* `anonymousId`/`discordId` is required by the resolver), minus the `db` (the
|
|
14
|
+
* service closes over the container's db).
|
|
15
|
+
*/
|
|
16
|
+
export interface LinkContactArgs {
|
|
17
|
+
userId?: string;
|
|
18
|
+
email?: string;
|
|
19
|
+
anonymousId?: string;
|
|
20
|
+
discordId?: string;
|
|
21
|
+
contactProperties?: Record<string, unknown>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* The container-held identity helper (`client.identity`). It exists so any
|
|
26
|
+
* identity-attach OUTSIDE the `/v1/events` ingest path — most notably Discord
|
|
27
|
+
* `/link` (§7), but also any consumer wiring — folds two keys into one analytics
|
|
28
|
+
* person through the SAME engine emission used by `ingestEvent` (§5.3), rather
|
|
29
|
+
* than each consumer hand-rolling its own `resolveOrCreateContact` +
|
|
30
|
+
* `mergeIdentities` plumbing (the bespoke path the spec calls out as the bug).
|
|
31
|
+
*/
|
|
32
|
+
export interface IdentityService {
|
|
33
|
+
/**
|
|
34
|
+
* Resolve / merge a contact AND propagate the analytics merge in one call.
|
|
35
|
+
*
|
|
36
|
+
* Wraps `resolveOrCreateContact` (the resolver stays analytics-free — it takes
|
|
37
|
+
* only `db`) then, on a collide-MERGE or canonical-key flip that absorbed an
|
|
38
|
+
* anonymous/uuid key, fans out the provider-neutral `mergeIdentities` primitive
|
|
39
|
+
* via {@link mergeAnalyticsIdentities} with `reason: "discord_link"`. MF-2:
|
|
40
|
+
* `mergedKeys` already excludes identified `external_id`s (the resolver split
|
|
41
|
+
* them out) — only the safe anon/uuid keys are aliased; the excluded
|
|
42
|
+
* identified twins surface as `identity.merge.residual_twin` for observability.
|
|
43
|
+
*
|
|
44
|
+
* The SURVIVOR RULE makes `resolvedKey` the survivor (`distinctId`) and each
|
|
45
|
+
* loser its absorbed `alias` — e.g. on a Discord `/link` that merges the
|
|
46
|
+
* discord-keyed contact into the email contact, `distinctId = resolvedKey`
|
|
47
|
+
* (survivor, email/external) and `alias = <discord-contact uuid>` (the
|
|
48
|
+
* loser's anon/uuid key the Discord-platform events were captured under).
|
|
49
|
+
*
|
|
50
|
+
* Best-effort and analytics-non-load-bearing: the merge emission never throws
|
|
51
|
+
* (the helper swallows provider errors), so a missing/incapable provider
|
|
52
|
+
* no-ops cleanly — the contact resolve still happened and is returned.
|
|
53
|
+
*/
|
|
54
|
+
linkContact(args: LinkContactArgs): ReturnType<typeof resolveOrCreateContact>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Build the {@link IdentityService} bound to a container's db + active analytics
|
|
59
|
+
* provider. `analytics` is undefined when nothing is configured (the merge
|
|
60
|
+
* emission no-ops); the resolver itself is unaffected.
|
|
61
|
+
*/
|
|
62
|
+
export function createIdentityService(deps: {
|
|
63
|
+
db: Database;
|
|
64
|
+
analytics?: AnalyticsProvider;
|
|
65
|
+
logger?: Logger;
|
|
66
|
+
}): IdentityService {
|
|
67
|
+
const { db, analytics, logger } = deps;
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
async linkContact(args) {
|
|
71
|
+
const result = await resolveOrCreateContact({ db, ...args });
|
|
72
|
+
|
|
73
|
+
const {
|
|
74
|
+
id: contactId,
|
|
75
|
+
resolvedKey,
|
|
76
|
+
mergedKeys,
|
|
77
|
+
mergedIdentifiedKeys,
|
|
78
|
+
} = result;
|
|
79
|
+
|
|
80
|
+
// §5.3 emission point 1, reused (§7): fire the analytics merge ONLY when
|
|
81
|
+
// the resolver actually folded keys this call. MF-2: `mergedKeys` carries
|
|
82
|
+
// the safe anon/uuid losers (the discord-contact uuid on a `/link` merge);
|
|
83
|
+
// identified `external_id`s are excluded by the resolver and surfaced as
|
|
84
|
+
// residual twins below — never aliased (the merge PostHog refuses, R2/R4).
|
|
85
|
+
if (mergedKeys?.length) {
|
|
86
|
+
mergeAnalyticsIdentities({
|
|
87
|
+
analytics,
|
|
88
|
+
survivorKey: resolvedKey,
|
|
89
|
+
loserKeys: mergedKeys,
|
|
90
|
+
reason: "discord_link",
|
|
91
|
+
contactId,
|
|
92
|
+
logger,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
if (mergedIdentifiedKeys?.length) {
|
|
96
|
+
logResidualTwins({
|
|
97
|
+
survivorKey: resolvedKey,
|
|
98
|
+
identifiedLoserKeys: mergedIdentifiedKeys,
|
|
99
|
+
contactId,
|
|
100
|
+
logger,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return result;
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
}
|
|
@@ -8,8 +8,12 @@ import {
|
|
|
8
8
|
/**
|
|
9
9
|
* Short-lived identity token appended to tracked-link redirects as `hs_t`
|
|
10
10
|
* (opt-in via TRACKING_IDENTITY_TOKEN). The landing site exchanges it at
|
|
11
|
-
* `POST /v1/t/identify
|
|
12
|
-
*
|
|
11
|
+
* `POST /v1/t/identify`, where the engine fires a SERVER-SIDE `alias` folding
|
|
12
|
+
* the caller's own anon session into the token's canonical id — stitching the
|
|
13
|
+
* click to the web session. Minted for EMAIL links by default; non-email
|
|
14
|
+
* (Discord/referral) links carry a token only when explicitly stitch-bearing
|
|
15
|
+
* (`tracked_links.distinct_id` set) — referral links are token-less by default
|
|
16
|
+
* (MF-4 anti-hijack).
|
|
13
17
|
*
|
|
14
18
|
* ENCRYPTED (AES-256-GCM keyed off BETTER_AUTH_SECRET), not merely signed:
|
|
15
19
|
* the distinct id can fall back to an email address, and a signed-but-
|
|
@@ -18,11 +22,38 @@ import {
|
|
|
18
22
|
* auth tag also covers integrity, so tampering fails decryption.
|
|
19
23
|
*/
|
|
20
24
|
|
|
25
|
+
/**
|
|
26
|
+
* The only merge mode a token may authorize: fold the CALLER's own anonymous
|
|
27
|
+
* session INTO the token's canonical `distinctId`. There is deliberately no
|
|
28
|
+
* "become the subject" / overwrite mode — that is the anti-hijack invariant.
|
|
29
|
+
*/
|
|
30
|
+
export type IdentityTokenScope = "anon-absorb";
|
|
31
|
+
|
|
21
32
|
export interface IdentityTokenPayload {
|
|
22
|
-
/**
|
|
33
|
+
/**
|
|
34
|
+
* The canonical contact key the landing site should fold INTO — the ONLY
|
|
35
|
+
* ever-identified id. NEVER a per-link or anonymous id.
|
|
36
|
+
*/
|
|
23
37
|
distinctId: string;
|
|
24
|
-
|
|
38
|
+
/**
|
|
39
|
+
* Where the token was minted: `"email:<sendId>"` | `"link:<linkId>"`.
|
|
40
|
+
* Referral links are excluded by default (they carry no identity token).
|
|
41
|
+
*/
|
|
42
|
+
src: string;
|
|
43
|
+
/**
|
|
44
|
+
* The authorized merge mode. Only `"anon-absorb"` is ever minted. OPTIONAL on
|
|
45
|
+
* the wire for the rolling-deploy window (MF-7): a token minted by the still-old
|
|
46
|
+
* click route carries no `scope`, so `validateIdentityToken` treats a MISSING
|
|
47
|
+
* scope as `"anon-absorb"` (allow) and rejects only a PRESENT-and-wrong value.
|
|
48
|
+
*/
|
|
49
|
+
scope?: IdentityTokenScope;
|
|
25
50
|
exp: number;
|
|
51
|
+
/**
|
|
52
|
+
* @deprecated Alias of `src` for ONE minor (mirrors the `resendId` → `messageId`
|
|
53
|
+
* deprecation window). Old tokens carry only `emailSendId`; new email tokens
|
|
54
|
+
* carry both. Reads should prefer `src`.
|
|
55
|
+
*/
|
|
56
|
+
emailSendId?: string;
|
|
26
57
|
}
|
|
27
58
|
|
|
28
59
|
export class InvalidIdentityTokenError extends Error {
|
|
@@ -43,11 +74,27 @@ function deriveKey(secret: string): Buffer {
|
|
|
43
74
|
export function generateIdentityToken(opts: {
|
|
44
75
|
secret: string;
|
|
45
76
|
distinctId: string;
|
|
46
|
-
|
|
77
|
+
/**
|
|
78
|
+
* Mint provenance: `"email:<sendId>"` | `"link:<linkId>"`. When omitted, falls
|
|
79
|
+
* back to `email:<emailSendId>` for the legacy email-link caller.
|
|
80
|
+
*/
|
|
81
|
+
src?: string;
|
|
82
|
+
/** Defaults to `"anon-absorb"` — the only mode a token may authorize. */
|
|
83
|
+
scope?: IdentityTokenScope;
|
|
84
|
+
/**
|
|
85
|
+
* @deprecated Pass `src` instead. Kept for the one-minor deprecation window so
|
|
86
|
+
* existing email-link callers compile unchanged; mirrored into the payload's
|
|
87
|
+
* deprecated `emailSendId` field and used to synthesize `src` when `src` is
|
|
88
|
+
* absent.
|
|
89
|
+
*/
|
|
90
|
+
emailSendId?: string;
|
|
47
91
|
expiresInSeconds?: number;
|
|
48
92
|
}): string {
|
|
93
|
+
const src = opts.src ?? (opts.emailSendId ? `email:${opts.emailSendId}` : "");
|
|
49
94
|
const payload: IdentityTokenPayload = {
|
|
50
95
|
distinctId: opts.distinctId,
|
|
96
|
+
src,
|
|
97
|
+
scope: opts.scope ?? "anon-absorb",
|
|
51
98
|
emailSendId: opts.emailSendId,
|
|
52
99
|
exp:
|
|
53
100
|
Math.floor(Date.now() / 1000) +
|
|
@@ -108,5 +155,18 @@ export function validateIdentityToken(opts: {
|
|
|
108
155
|
if (payload.exp < Math.floor(Date.now() / 1000)) {
|
|
109
156
|
throw new InvalidIdentityTokenError("Token expired");
|
|
110
157
|
}
|
|
158
|
+
// MF-7 — missing-scope-ALLOW. The API and worker deploy independently from
|
|
159
|
+
// the same image, so a token minted by the still-old click route carries no
|
|
160
|
+
// `scope`. Treat a MISSING scope as the only legal mode (`"anon-absorb"`);
|
|
161
|
+
// reject ONLY a present-and-wrong value. Old tokens (no `scope`, no `src`)
|
|
162
|
+
// still validate — this check never widened the required-shape gate above.
|
|
163
|
+
if (payload.scope !== undefined && payload.scope !== "anon-absorb") {
|
|
164
|
+
throw new InvalidIdentityTokenError("Unsupported token scope");
|
|
165
|
+
}
|
|
166
|
+
// Backfill `src` from the deprecated `emailSendId` for old tokens, so the one
|
|
167
|
+
// response schema (`{ distinctId, src, emailSendId? }`) is always populated.
|
|
168
|
+
if (typeof payload.src !== "string" || payload.src.length === 0) {
|
|
169
|
+
payload.src = payload.emailSendId ? `email:${payload.emailSendId}` : "";
|
|
170
|
+
}
|
|
111
171
|
return payload;
|
|
112
172
|
}
|