fullstackgtm 0.21.2 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -0
- package/README.md +15 -0
- package/dist/bulkUpdate.d.ts +16 -1
- package/dist/bulkUpdate.js +88 -5
- package/dist/cli.js +670 -8
- package/dist/dedupe.d.ts +14 -0
- package/dist/dedupe.js +140 -0
- package/dist/enrich.d.ts +220 -0
- package/dist/enrich.js +724 -0
- package/dist/enrichApollo.d.ts +59 -0
- package/dist/enrichApollo.js +192 -0
- package/dist/index.d.ts +5 -1
- package/dist/index.js +5 -1
- package/dist/marketReport.js +97 -44
- package/dist/reassign.d.ts +19 -0
- package/dist/reassign.js +87 -0
- package/dist/suggest.js +67 -5
- package/llms.txt +15 -0
- package/package.json +1 -1
- package/src/bulkUpdate.ts +109 -6
- package/src/cli.ts +756 -8
- package/src/dedupe.ts +182 -0
- package/src/enrich.ts +1016 -0
- package/src/enrichApollo.ts +250 -0
- package/src/index.ts +48 -1
- package/src/marketReport.ts +116 -62
- package/src/reassign.ts +117 -0
- package/src/suggest.ts +69 -5
package/src/dedupe.ts
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Governed duplicate cleanup: `dedupe` groups records by a normalized
|
|
3
|
+
* identity key (account domain, contact email, or name) and builds a
|
|
4
|
+
* dry-run PatchPlan of merge_records operations — one per duplicate group,
|
|
5
|
+
* with a DETERMINISTIC survivor. It NEVER writes — the plan flows through
|
|
6
|
+
* the same plans-approve → apply gate as every other plan.
|
|
7
|
+
*
|
|
8
|
+
* The merge contract matches the connectors (see mergeRecords in
|
|
9
|
+
* connectors/hubspot.ts): afterValue = the survivor id, beforeValue = the
|
|
10
|
+
* ids of EVERY record in the group (survivor included). Merges are
|
|
11
|
+
* IRREVERSIBLE on every provider that supports them, so every operation is
|
|
12
|
+
* riskLevel high and approvalRequired.
|
|
13
|
+
*
|
|
14
|
+
* Survivor selection ("--keep"):
|
|
15
|
+
* richest (default) the record with the most non-empty canonical data
|
|
16
|
+
* fields (bookkeeping fields like id/crmId/identities
|
|
17
|
+
* don't count); ties break to the lowest numeric id
|
|
18
|
+
* oldest the lowest numeric id (CRMs assign ids in creation
|
|
19
|
+
* order)
|
|
20
|
+
*/
|
|
21
|
+
import { normalizeDomain } from "./merge.ts";
|
|
22
|
+
import { stableHash } from "./rules.ts";
|
|
23
|
+
import type {
|
|
24
|
+
CanonicalGtmSnapshot,
|
|
25
|
+
GtmObjectType,
|
|
26
|
+
PatchOperation,
|
|
27
|
+
PatchPlan,
|
|
28
|
+
} from "./types.ts";
|
|
29
|
+
|
|
30
|
+
export type DedupeOptions = {
|
|
31
|
+
objectType: "account" | "contact" | "deal";
|
|
32
|
+
/** identity key records are grouped by (normalized before grouping) */
|
|
33
|
+
key: "domain" | "email" | "name";
|
|
34
|
+
/** survivor selection — deterministic either way (default "richest") */
|
|
35
|
+
keep?: "richest" | "oldest";
|
|
36
|
+
reason?: string;
|
|
37
|
+
/** refuse to build plans larger than this (default 500 operations) */
|
|
38
|
+
maxOperations?: number;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const COLLECTIONS: Record<DedupeOptions["objectType"], "accounts" | "contacts" | "deals"> = {
|
|
42
|
+
account: "accounts",
|
|
43
|
+
contact: "contacts",
|
|
44
|
+
deal: "deals",
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/** Which identity keys make sense per object type. */
|
|
48
|
+
const VALID_KEYS: Record<DedupeOptions["objectType"], Array<DedupeOptions["key"]>> = {
|
|
49
|
+
account: ["domain", "name"],
|
|
50
|
+
contact: ["email", "name"],
|
|
51
|
+
deal: ["name"],
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Bookkeeping fields excluded from the richness count: they are populated
|
|
56
|
+
* (or not) by the sync machinery, not by the quality of the record's data,
|
|
57
|
+
* so counting them would let plumbing decide which record survives a merge.
|
|
58
|
+
*/
|
|
59
|
+
const NON_DATA_FIELDS = new Set(["id", "provider", "crmId", "identities", "raw", "provenance"]);
|
|
60
|
+
|
|
61
|
+
function populatedDataFields(record: Record<string, unknown>): number {
|
|
62
|
+
return Object.entries(record).filter(
|
|
63
|
+
([field, value]) =>
|
|
64
|
+
!NON_DATA_FIELDS.has(field) && value !== undefined && value !== null && value !== "",
|
|
65
|
+
).length;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** True when id `a` sorts before id `b` — numeric when both ids are numeric. */
|
|
69
|
+
function idBefore(a: string, b: string): boolean {
|
|
70
|
+
const numericA = Number(a);
|
|
71
|
+
const numericB = Number(b);
|
|
72
|
+
if (Number.isFinite(numericA) && Number.isFinite(numericB) && numericA !== numericB) {
|
|
73
|
+
return numericA < numericB;
|
|
74
|
+
}
|
|
75
|
+
return a < b;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Normalize a record's identity key; undefined when the field is empty. */
|
|
79
|
+
export function dedupeKey(
|
|
80
|
+
record: Record<string, unknown>,
|
|
81
|
+
key: DedupeOptions["key"],
|
|
82
|
+
): string | undefined {
|
|
83
|
+
if (key === "domain") return normalizeDomain(record.domain as string | undefined);
|
|
84
|
+
const raw = record[key];
|
|
85
|
+
if (raw === undefined || raw === null) return undefined;
|
|
86
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
87
|
+
return normalized || undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function buildDedupePlan(
|
|
91
|
+
snapshot: CanonicalGtmSnapshot,
|
|
92
|
+
options: DedupeOptions,
|
|
93
|
+
): PatchPlan {
|
|
94
|
+
const keep = options.keep ?? "richest";
|
|
95
|
+
const maxOperations = options.maxOperations ?? 500;
|
|
96
|
+
if (!VALID_KEYS[options.objectType].includes(options.key)) {
|
|
97
|
+
throw new Error(
|
|
98
|
+
`Cannot dedupe ${COLLECTIONS[options.objectType]} by "${options.key}". Valid keys for ${options.objectType}s: ${VALID_KEYS[options.objectType].join(", ")}.`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
if (keep !== "richest" && keep !== "oldest") {
|
|
102
|
+
throw new Error(`--keep must be richest or oldest, got "${keep}".`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const records = snapshot[COLLECTIONS[options.objectType]] as Array<Record<string, unknown>>;
|
|
106
|
+
const groups = new Map<string, Array<Record<string, unknown>>>();
|
|
107
|
+
for (const record of records) {
|
|
108
|
+
const key = dedupeKey(record, options.key);
|
|
109
|
+
if (!key) continue; // records without the identity key cannot be duplicates by it
|
|
110
|
+
const existing = groups.get(key) ?? [];
|
|
111
|
+
existing.push(record);
|
|
112
|
+
groups.set(key, existing);
|
|
113
|
+
}
|
|
114
|
+
for (const [key, members] of Array.from(groups.entries())) {
|
|
115
|
+
if (members.length < 2) groups.delete(key);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (groups.size > maxOperations) {
|
|
119
|
+
throw new Error(
|
|
120
|
+
`Found ${groups.size} duplicate groups — above the ${maxOperations}-group safety cap. Raise --max-operations explicitly after reviewing the volume.`,
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const operations: PatchOperation[] = [];
|
|
125
|
+
let duplicateRecordCount = 0;
|
|
126
|
+
for (const [key, members] of groups) {
|
|
127
|
+
duplicateRecordCount += members.length;
|
|
128
|
+
// deterministic survivor: richest data first (ties to lowest id), or
|
|
129
|
+
// simply the lowest id when keeping the oldest
|
|
130
|
+
const survivor = [...members].sort((a, b) => {
|
|
131
|
+
if (keep === "richest") {
|
|
132
|
+
const richness = populatedDataFields(b) - populatedDataFields(a);
|
|
133
|
+
if (richness !== 0) return richness;
|
|
134
|
+
}
|
|
135
|
+
return idBefore(String(a.id), String(b.id)) ? -1 : 1;
|
|
136
|
+
})[0];
|
|
137
|
+
const groupIds = members
|
|
138
|
+
.map((member) => String(member.id))
|
|
139
|
+
.sort((a, b) => (idBefore(a, b) ? -1 : 1));
|
|
140
|
+
const survivorName =
|
|
141
|
+
typeof survivor.name === "string" && survivor.name
|
|
142
|
+
? survivor.name
|
|
143
|
+
: typeof survivor.email === "string" && survivor.email
|
|
144
|
+
? survivor.email
|
|
145
|
+
: String(survivor.id);
|
|
146
|
+
const keepDetail =
|
|
147
|
+
keep === "richest"
|
|
148
|
+
? `${populatedDataFields(survivor)} populated data fields, the most in the group (ties break to the lowest id)`
|
|
149
|
+
: "the lowest id in the group (oldest record)";
|
|
150
|
+
operations.push({
|
|
151
|
+
id: `op_${stableHash(`dedupe:${options.objectType}:${options.key}:${groupIds.join(",")}`)}`,
|
|
152
|
+
objectType: options.objectType as GtmObjectType,
|
|
153
|
+
objectId: String(survivor.id),
|
|
154
|
+
operation: "merge_records",
|
|
155
|
+
field: "merge",
|
|
156
|
+
beforeValue: groupIds,
|
|
157
|
+
afterValue: String(survivor.id),
|
|
158
|
+
reason:
|
|
159
|
+
options.reason ??
|
|
160
|
+
`${members.length} ${COLLECTIONS[options.objectType]} share ${options.key} "${key}". Merge into "${survivorName}" (${survivor.id}) — survivor has ${keepDetail}.`,
|
|
161
|
+
riskLevel: "high",
|
|
162
|
+
approvalRequired: true,
|
|
163
|
+
sourceRuleOrPolicy: "dedupe",
|
|
164
|
+
groupId: `grp_${options.objectType}_${String(survivor.id)}`,
|
|
165
|
+
rollback:
|
|
166
|
+
"IRREVERSIBLE: provider merges cannot be unmerged. The pre-apply snapshot retains every record's field values; recreate a record manually from it if a merge was wrong.",
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
id: `patch_plan_${stableHash(
|
|
172
|
+
`dedupe:${snapshot.provider}:${snapshot.generatedAt}:${options.objectType}:${options.key}:${keep}:${operations.length}`,
|
|
173
|
+
)}`,
|
|
174
|
+
title: `Dedupe: ${COLLECTIONS[options.objectType]} sharing the same ${options.key}`,
|
|
175
|
+
createdAt: snapshot.generatedAt,
|
|
176
|
+
status: operations.length > 0 ? "needs_approval" : "draft",
|
|
177
|
+
dryRun: true,
|
|
178
|
+
summary: `${groups.size} duplicate group(s) across ${duplicateRecordCount} ${COLLECTIONS[options.objectType]} (key: ${options.key}, keep: ${keep}); ${operations.length} proposed dry-run merge_records operation(s). Merges are IRREVERSIBLE — review each survivor before approving.`,
|
|
179
|
+
findings: [],
|
|
180
|
+
operations,
|
|
181
|
+
};
|
|
182
|
+
}
|