fullstackgtm 0.21.2 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/dist/bulkUpdate.d.ts +16 -1
- package/dist/bulkUpdate.js +88 -5
- package/dist/cli.js +214 -4
- package/dist/dedupe.d.ts +14 -0
- package/dist/dedupe.js +140 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +3 -1
- package/dist/marketReport.js +95 -42
- package/dist/reassign.d.ts +19 -0
- package/dist/reassign.js +87 -0
- package/dist/suggest.js +67 -5
- package/package.json +1 -1
- package/src/bulkUpdate.ts +109 -6
- package/src/cli.ts +231 -4
- package/src/dedupe.ts +182 -0
- package/src/index.ts +3 -1
- package/src/marketReport.ts +110 -56
- package/src/reassign.ts +117 -0
- package/src/suggest.ts +69 -5
package/src/cli.ts
CHANGED
|
@@ -75,6 +75,8 @@ import {
|
|
|
75
75
|
} from "./llm.ts";
|
|
76
76
|
import { resolveRecord, type ResolveCandidate } from "./resolve.ts";
|
|
77
77
|
import { buildBulkUpdatePlan } from "./bulkUpdate.ts";
|
|
78
|
+
import { buildDedupePlan, type DedupeOptions } from "./dedupe.ts";
|
|
79
|
+
import { buildReassignPlans, type ReassignObjectType } from "./reassign.ts";
|
|
78
80
|
import { suggestValues, type ValueSuggestion } from "./suggest.ts";
|
|
79
81
|
import type { FieldMappings } from "./mappings.ts";
|
|
80
82
|
import type {
|
|
@@ -135,7 +137,7 @@ Usage:
|
|
|
135
137
|
against the stored capture it cites before it's accepted — then
|
|
136
138
|
compute deterministic front states and drift, render the field
|
|
137
139
|
report. refresh = capture → classify → drift → report in one step
|
|
138
|
-
fullstackgtm bulk-update <account|contact|deal> --where <expr> [--where …] (--set <field>=<value> [--set …] | --archive | --create-task <text>) [--require <field>=<value> …] [--guard <object>:<where>[;<where>]:<none|some> …] [source options] [--save] [--json] [--out <path>]
|
|
140
|
+
fullstackgtm bulk-update <account|contact|deal> --where <expr> [--where …] (--set <field>=<value> [--set …] | --archive [--force-archive-duplicates] | --create-task <text>) [--require <field>=<value> …] [--guard <object>:<where>[;<where>]:<none|some> …] [source options] [--save] [--json] [--out <path>]
|
|
139
141
|
governed generic writes: filter the snapshot
|
|
140
142
|
(field=value, field!=value, field~substr, field!~substr,
|
|
141
143
|
field:empty, field:notempty, '|' = any-of; canonical fields
|
|
@@ -147,6 +149,35 @@ Usage:
|
|
|
147
149
|
apply time (incl. mid-apply rechecks); equality filters
|
|
148
150
|
double as preconditions; per-record ops apply
|
|
149
151
|
all-or-nothing; guards assert cross-record conditions.
|
|
152
|
+
--set <field>=from:<sourceField> derives the value PER
|
|
153
|
+
RECORD from the snapshot (relational sources like
|
|
154
|
+
account.ownerId included); records whose source is empty
|
|
155
|
+
are skipped and counted, never guessed. --archive refuses
|
|
156
|
+
records that share their identity key (account domain /
|
|
157
|
+
contact email) with another record — merge those with
|
|
158
|
+
\`dedupe\` instead, or --force-archive-duplicates.
|
|
159
|
+
fullstackgtm dedupe <account|contact|deal> --key <domain|email|name> [--keep richest|oldest] [source options] [--reason <text>] [--max-operations <n>] [--save] [--json] [--out <path>]
|
|
160
|
+
find duplicate groups by normalized identity key and build
|
|
161
|
+
a dry-run plan of merge_records operations — one per group,
|
|
162
|
+
deterministic survivor (richest = most populated data
|
|
163
|
+
fields, ties to lowest id; oldest = lowest id). Approve and
|
|
164
|
+
apply like any plan; merges are IRREVERSIBLE on apply.
|
|
165
|
+
fullstackgtm reassign --from <ownerId> --to <ownerId> [--objects account,contact,deal] [--where <expr> …] [--except-deal-stage <stage>] [--include-closed-deals] [source options] [--save] [--json] [--out <path>]
|
|
166
|
+
ownership handoff playbook: one bulk-update-style plan per
|
|
167
|
+
object type (ownerId=<from> → <to>). Extra --where scoping
|
|
168
|
+
is account-lifted for deals/contacts (domain~.de becomes
|
|
169
|
+
account.domain~.de); --except-deal-stage <stage> excludes
|
|
170
|
+
deals in that stage AND every record whose account has an
|
|
171
|
+
open deal in it, re-verified per record at apply time.
|
|
172
|
+
Deal plans cover open deals only unless
|
|
173
|
+
--include-closed-deals.
|
|
174
|
+
fullstackgtm fix --rule <ruleId> --provider <name> [--min-confidence high|low] [--include-creates] [--today <iso>] [--yes]
|
|
175
|
+
one-shot composite: audit ONE rule → save the plan →
|
|
176
|
+
suggest values → approve only suggestion-backed operations
|
|
177
|
+
meeting the confidence bar (plus operations that need no
|
|
178
|
+
value) → with --yes, apply through the provider and print a
|
|
179
|
+
stage-by-stage summary. Without --yes it stops after
|
|
180
|
+
approval and prints the apply command.
|
|
150
181
|
fullstackgtm suggest --plan-id <id> | --plan <path> [source options] [--json] [--out <path>]
|
|
151
182
|
derive values for requires_human_* placeholders
|
|
152
183
|
from snapshot evidence, with confidence + reasons
|
|
@@ -1257,29 +1288,213 @@ async function bulkUpdateCommand(args: string[]) {
|
|
|
1257
1288
|
where,
|
|
1258
1289
|
set: Object.keys(set).length > 0 ? set : undefined,
|
|
1259
1290
|
archive: rest.includes("--archive"),
|
|
1291
|
+
forceArchiveDuplicates: rest.includes("--force-archive-duplicates"),
|
|
1260
1292
|
createTask: option(rest, "--create-task") ?? undefined,
|
|
1261
1293
|
require: repeatedOption(rest, "--require"),
|
|
1262
1294
|
guard: repeatedOption(rest, "--guard"),
|
|
1263
1295
|
reason: option(rest, "--reason") ?? undefined,
|
|
1264
1296
|
maxOperations: numericOption(rest, "--max-operations"),
|
|
1265
1297
|
});
|
|
1266
|
-
|
|
1298
|
+
await emitPlan(plan, rest);
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
/** Shared plan output plumbing: --out, --save (with the approve/apply hint), --json or markdown. */
|
|
1302
|
+
async function emitPlan(plan: PatchPlan, args: string[]) {
|
|
1303
|
+
const out = option(args, "--out");
|
|
1267
1304
|
if (out) {
|
|
1268
1305
|
writeFileSync(resolve(process.cwd(), out), `${JSON.stringify(plan, null, 2)}\n`);
|
|
1269
1306
|
}
|
|
1270
|
-
if (
|
|
1307
|
+
if (args.includes("--save")) {
|
|
1271
1308
|
await createFilePlanStore().save(plan);
|
|
1272
1309
|
console.error(
|
|
1273
1310
|
`Saved plan ${plan.id} (${plan.operations.length} operations). Review with \`fullstackgtm plans show ${plan.id}\`, approve with \`fullstackgtm plans approve ${plan.id} --operations <ids|all>\`, then \`fullstackgtm apply --plan-id ${plan.id} --provider <name>\`.`,
|
|
1274
1311
|
);
|
|
1275
1312
|
}
|
|
1276
|
-
if (
|
|
1313
|
+
if (args.includes("--json")) {
|
|
1277
1314
|
console.log(JSON.stringify(plan, null, 2));
|
|
1278
1315
|
} else {
|
|
1279
1316
|
console.log(patchPlanToMarkdown(plan));
|
|
1280
1317
|
}
|
|
1281
1318
|
}
|
|
1282
1319
|
|
|
1320
|
+
/**
|
|
1321
|
+
* Governed duplicate cleanup: group by a normalized identity key, propose one
|
|
1322
|
+
* merge_records per duplicate group with a deterministic survivor. Never
|
|
1323
|
+
* writes — approve and apply the plan like any audit plan.
|
|
1324
|
+
*/
|
|
1325
|
+
async function dedupeCommand(args: string[]) {
|
|
1326
|
+
const [objectType, ...rest] = args;
|
|
1327
|
+
if (!objectType || !["account", "contact", "deal"].includes(objectType)) {
|
|
1328
|
+
throw new Error(
|
|
1329
|
+
"Usage: fullstackgtm dedupe <account|contact|deal> --key <domain|email|name> [--keep richest|oldest] [source options] [--reason <text>] [--max-operations <n>] [--save] [--out <path>] [--json]",
|
|
1330
|
+
);
|
|
1331
|
+
}
|
|
1332
|
+
const key = option(rest, "--key");
|
|
1333
|
+
if (!key || !["domain", "email", "name"].includes(key)) {
|
|
1334
|
+
throw new Error("dedupe requires --key <domain|email|name> (the identity field duplicates share).");
|
|
1335
|
+
}
|
|
1336
|
+
const keep = option(rest, "--keep") ?? undefined;
|
|
1337
|
+
const snapshot = await readSnapshot(rest);
|
|
1338
|
+
const plan = buildDedupePlan(snapshot, {
|
|
1339
|
+
objectType: objectType as DedupeOptions["objectType"],
|
|
1340
|
+
key: key as DedupeOptions["key"],
|
|
1341
|
+
keep: (keep ?? undefined) as DedupeOptions["keep"],
|
|
1342
|
+
reason: option(rest, "--reason") ?? undefined,
|
|
1343
|
+
maxOperations: numericOption(rest, "--max-operations"),
|
|
1344
|
+
});
|
|
1345
|
+
await emitPlan(plan, rest);
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
/**
|
|
1349
|
+
* Ownership handoff playbook: compile one bulk-update-style plan per object
|
|
1350
|
+
* type. Each plan carries its full filter, so eligibility (including the
|
|
1351
|
+
* --except-deal-stage exclusion) is re-verified per record at apply time.
|
|
1352
|
+
*/
|
|
1353
|
+
async function reassignCommand(args: string[]) {
|
|
1354
|
+
const from = option(args, "--from");
|
|
1355
|
+
const to = option(args, "--to");
|
|
1356
|
+
if (!from || !to) {
|
|
1357
|
+
throw new Error(
|
|
1358
|
+
"Usage: fullstackgtm reassign --from <ownerId> --to <ownerId> [--objects account,contact,deal] [--where <expr> …] [--except-deal-stage <stage>] [--include-closed-deals] [source options] [--reason <text>] [--max-operations <n>] [--save] [--out <path>] [--json]",
|
|
1359
|
+
);
|
|
1360
|
+
}
|
|
1361
|
+
const objects = option(args, "--objects")
|
|
1362
|
+
?.split(",")
|
|
1363
|
+
.map((value) => value.trim())
|
|
1364
|
+
.filter(Boolean) as ReassignObjectType[] | undefined;
|
|
1365
|
+
const snapshot = await readSnapshot(args);
|
|
1366
|
+
const plans = buildReassignPlans(snapshot, {
|
|
1367
|
+
fromOwnerId: from,
|
|
1368
|
+
toOwnerId: to,
|
|
1369
|
+
objects,
|
|
1370
|
+
where: repeatedOption(args, "--where"),
|
|
1371
|
+
exceptDealStage: option(args, "--except-deal-stage") ?? undefined,
|
|
1372
|
+
includeClosedDeals: args.includes("--include-closed-deals"),
|
|
1373
|
+
reason: option(args, "--reason") ?? undefined,
|
|
1374
|
+
maxOperations: numericOption(args, "--max-operations"),
|
|
1375
|
+
});
|
|
1376
|
+
const out = option(args, "--out");
|
|
1377
|
+
if (out) {
|
|
1378
|
+
writeFileSync(resolve(process.cwd(), out), `${JSON.stringify(plans, null, 2)}\n`);
|
|
1379
|
+
}
|
|
1380
|
+
if (args.includes("--json")) {
|
|
1381
|
+
console.log(JSON.stringify(plans, null, 2));
|
|
1382
|
+
return;
|
|
1383
|
+
}
|
|
1384
|
+
const store = args.includes("--save") ? createFilePlanStore() : null;
|
|
1385
|
+
for (const plan of plans) {
|
|
1386
|
+
if (store) await store.save(plan);
|
|
1387
|
+
console.log(`${plan.id} ${String(plan.operations.length).padStart(3)} operation(s) ${plan.title}`);
|
|
1388
|
+
console.log(` ${plan.summary}`);
|
|
1389
|
+
}
|
|
1390
|
+
if (store) {
|
|
1391
|
+
console.log(
|
|
1392
|
+
`\nSaved ${plans.length} plan(s). For each: \`fullstackgtm plans show <id>\`, \`fullstackgtm plans approve <id> --operations <ids|all>\`, then \`fullstackgtm apply --plan-id <id> --provider <name>\`.`,
|
|
1393
|
+
);
|
|
1394
|
+
} else {
|
|
1395
|
+
console.log("\nDry run only — re-run with --save to store the plans for approval.");
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
/**
|
|
1400
|
+
* One-shot composite for a single audit rule: audit → save → suggest →
|
|
1401
|
+
* approve only suggestion-backed operations meeting the confidence bar (plus
|
|
1402
|
+
* operations that carry concrete values and need no human input) → apply
|
|
1403
|
+
* (only with --yes). Every stage goes through the same gates as the manual
|
|
1404
|
+
* chain; placeholder values below the bar stay unapproved.
|
|
1405
|
+
*/
|
|
1406
|
+
async function fixCommand(args: string[]) {
|
|
1407
|
+
const ruleId = option(args, "--rule");
|
|
1408
|
+
const provider = option(args, "--provider");
|
|
1409
|
+
if (!ruleId || !provider) {
|
|
1410
|
+
throw new Error(
|
|
1411
|
+
"Usage: fullstackgtm fix --rule <ruleId> --provider <name> [--min-confidence high|low] [--include-creates] [--today <iso>] [--yes]",
|
|
1412
|
+
);
|
|
1413
|
+
}
|
|
1414
|
+
const minConfidence = option(args, "--min-confidence") ?? "high";
|
|
1415
|
+
if (!["high", "low"].includes(minConfidence)) {
|
|
1416
|
+
throw new Error("--min-confidence must be high or low");
|
|
1417
|
+
}
|
|
1418
|
+
const includeCreates = args.includes("--include-creates");
|
|
1419
|
+
|
|
1420
|
+
const loaded = loadConfig(option(args, "--config") ?? undefined);
|
|
1421
|
+
const configured = await resolveConfiguredRules(loaded);
|
|
1422
|
+
const rule = configured.find((candidate) => candidate.id === ruleId);
|
|
1423
|
+
if (!rule) {
|
|
1424
|
+
throw new Error(`Unknown rule: ${ruleId}. Available rules: ${configured.map((r) => r.id).join(", ")}`);
|
|
1425
|
+
}
|
|
1426
|
+
const policy = mergePolicy(defaultPolicy(), loaded?.config);
|
|
1427
|
+
const today = option(args, "--today");
|
|
1428
|
+
if (today) policy.today = today;
|
|
1429
|
+
|
|
1430
|
+
const snapshot = await readSnapshot(args);
|
|
1431
|
+
const plan = auditSnapshot(snapshot, policy, [rule]);
|
|
1432
|
+
if (plan.operations.length === 0) {
|
|
1433
|
+
console.log(`fix ${ruleId}: audit proposed 0 operations — nothing to fix.`);
|
|
1434
|
+
return;
|
|
1435
|
+
}
|
|
1436
|
+
const store = createFilePlanStore();
|
|
1437
|
+
await store.save(plan);
|
|
1438
|
+
|
|
1439
|
+
const suggestions = suggestValues(plan, snapshot);
|
|
1440
|
+
const accepted = new Set(minConfidence === "low" ? ["high", "low"] : ["high"]);
|
|
1441
|
+
const overrides: Record<string, string> = {};
|
|
1442
|
+
let belowBar = 0;
|
|
1443
|
+
for (const suggestion of suggestions) {
|
|
1444
|
+
if (
|
|
1445
|
+
suggestion.suggestedValue &&
|
|
1446
|
+
(accepted.has(suggestion.confidence) || (includeCreates && suggestion.confidence === "create"))
|
|
1447
|
+
) {
|
|
1448
|
+
overrides[suggestion.operationId] = suggestion.suggestedValue;
|
|
1449
|
+
} else {
|
|
1450
|
+
belowBar += 1;
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
// Approve operations whose placeholder got a qualifying suggested value,
|
|
1454
|
+
// plus operations that already carry a concrete value (no human input
|
|
1455
|
+
// needed — nothing to guess). Everything else stays unapproved.
|
|
1456
|
+
const placeholderIds = new Set(suggestions.map((suggestion) => suggestion.operationId));
|
|
1457
|
+
const approvedIds = plan.operations
|
|
1458
|
+
.map((operation) => operation.id)
|
|
1459
|
+
.filter((id) => overrides[id] !== undefined || !placeholderIds.has(id));
|
|
1460
|
+
|
|
1461
|
+
const lines = [
|
|
1462
|
+
`fix ${ruleId} via ${provider}:`,
|
|
1463
|
+
` proposed: ${plan.operations.length} operation(s) — plan ${plan.id} (saved)`,
|
|
1464
|
+
` suggested: ${Object.keys(overrides).length} value(s) at ${minConfidence}+ confidence${includeCreates ? " (creates included)" : ""}${belowBar > 0 ? `; ${belowBar} below the bar (left unapproved)` : ""}`,
|
|
1465
|
+
` approved: ${approvedIds.length} of ${plan.operations.length}`,
|
|
1466
|
+
];
|
|
1467
|
+
if (approvedIds.length === 0) {
|
|
1468
|
+
lines.push(" applied: 0 — no operation met the confidence bar");
|
|
1469
|
+
console.log(lines.join("\n"));
|
|
1470
|
+
console.log(
|
|
1471
|
+
`\nWiden with --min-confidence low / --include-creates, or approve manually: \`fullstackgtm plans approve ${plan.id} --operations <ids> --value <opId>=<value>\`.`,
|
|
1472
|
+
);
|
|
1473
|
+
return;
|
|
1474
|
+
}
|
|
1475
|
+
await store.approveOperations(plan.id, approvedIds, overrides);
|
|
1476
|
+
|
|
1477
|
+
if (!args.includes("--yes")) {
|
|
1478
|
+
lines.push(" applied: 0 (stopped before apply — pass --yes to write)");
|
|
1479
|
+
console.log(lines.join("\n"));
|
|
1480
|
+
console.log(`\nApply with:\n fullstackgtm apply --plan-id ${plan.id} --provider ${provider}`);
|
|
1481
|
+
return;
|
|
1482
|
+
}
|
|
1483
|
+
const connector = await connectorFor(provider, args);
|
|
1484
|
+
const run = await applyPatchPlan(connector, plan, {
|
|
1485
|
+
approvedOperationIds: approvedIds,
|
|
1486
|
+
valueOverrides: overrides,
|
|
1487
|
+
});
|
|
1488
|
+
await store.recordRun(plan.id, run);
|
|
1489
|
+
const counts: Record<string, number> = { applied: 0, conflict: 0, skipped: 0, failed: 0 };
|
|
1490
|
+
for (const result of run.results) counts[result.status] = (counts[result.status] ?? 0) + 1;
|
|
1491
|
+
lines.push(
|
|
1492
|
+
` applied: ${counts.applied} · conflicts: ${counts.conflict} · skipped: ${counts.skipped} · failed: ${counts.failed}`,
|
|
1493
|
+
);
|
|
1494
|
+
console.log(lines.join("\n"));
|
|
1495
|
+
if (run.status === "failed") process.exitCode = 1;
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1283
1498
|
async function suggest(args: string[]) {
|
|
1284
1499
|
const planId = option(args, "--plan-id");
|
|
1285
1500
|
const planPath = option(args, "--plan");
|
|
@@ -2124,6 +2339,18 @@ export async function runCli(argv: string[]) {
|
|
|
2124
2339
|
await bulkUpdateCommand(args);
|
|
2125
2340
|
return;
|
|
2126
2341
|
}
|
|
2342
|
+
if (command === "dedupe") {
|
|
2343
|
+
await dedupeCommand(args);
|
|
2344
|
+
return;
|
|
2345
|
+
}
|
|
2346
|
+
if (command === "reassign") {
|
|
2347
|
+
await reassignCommand(args);
|
|
2348
|
+
return;
|
|
2349
|
+
}
|
|
2350
|
+
if (command === "fix") {
|
|
2351
|
+
await fixCommand(args);
|
|
2352
|
+
return;
|
|
2353
|
+
}
|
|
2127
2354
|
if (command === "market") {
|
|
2128
2355
|
await marketCommand(args);
|
|
2129
2356
|
return;
|
package/src/dedupe.ts
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Governed duplicate cleanup: `dedupe` groups records by a normalized
|
|
3
|
+
* identity key (account domain, contact email, or name) and builds a
|
|
4
|
+
* dry-run PatchPlan of merge_records operations — one per duplicate group,
|
|
5
|
+
* with a DETERMINISTIC survivor. It NEVER writes — the plan flows through
|
|
6
|
+
* the same plans-approve → apply gate as every other plan.
|
|
7
|
+
*
|
|
8
|
+
* The merge contract matches the connectors (see mergeRecords in
|
|
9
|
+
* connectors/hubspot.ts): afterValue = the survivor id, beforeValue = the
|
|
10
|
+
* ids of EVERY record in the group (survivor included). Merges are
|
|
11
|
+
* IRREVERSIBLE on every provider that supports them, so every operation is
|
|
12
|
+
* riskLevel high and approvalRequired.
|
|
13
|
+
*
|
|
14
|
+
* Survivor selection ("--keep"):
|
|
15
|
+
* richest (default) the record with the most non-empty canonical data
|
|
16
|
+
* fields (bookkeeping fields like id/crmId/identities
|
|
17
|
+
* don't count); ties break to the lowest numeric id
|
|
18
|
+
* oldest the lowest numeric id (CRMs assign ids in creation
|
|
19
|
+
* order)
|
|
20
|
+
*/
|
|
21
|
+
import { normalizeDomain } from "./merge.ts";
|
|
22
|
+
import { stableHash } from "./rules.ts";
|
|
23
|
+
import type {
|
|
24
|
+
CanonicalGtmSnapshot,
|
|
25
|
+
GtmObjectType,
|
|
26
|
+
PatchOperation,
|
|
27
|
+
PatchPlan,
|
|
28
|
+
} from "./types.ts";
|
|
29
|
+
|
|
30
|
+
export type DedupeOptions = {
|
|
31
|
+
objectType: "account" | "contact" | "deal";
|
|
32
|
+
/** identity key records are grouped by (normalized before grouping) */
|
|
33
|
+
key: "domain" | "email" | "name";
|
|
34
|
+
/** survivor selection — deterministic either way (default "richest") */
|
|
35
|
+
keep?: "richest" | "oldest";
|
|
36
|
+
reason?: string;
|
|
37
|
+
/** refuse to build plans larger than this (default 500 operations) */
|
|
38
|
+
maxOperations?: number;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const COLLECTIONS: Record<DedupeOptions["objectType"], "accounts" | "contacts" | "deals"> = {
|
|
42
|
+
account: "accounts",
|
|
43
|
+
contact: "contacts",
|
|
44
|
+
deal: "deals",
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/** Which identity keys make sense per object type. */
|
|
48
|
+
const VALID_KEYS: Record<DedupeOptions["objectType"], Array<DedupeOptions["key"]>> = {
|
|
49
|
+
account: ["domain", "name"],
|
|
50
|
+
contact: ["email", "name"],
|
|
51
|
+
deal: ["name"],
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Bookkeeping fields excluded from the richness count: they are populated
|
|
56
|
+
* (or not) by the sync machinery, not by the quality of the record's data,
|
|
57
|
+
* so counting them would let plumbing decide which record survives a merge.
|
|
58
|
+
*/
|
|
59
|
+
const NON_DATA_FIELDS = new Set(["id", "provider", "crmId", "identities", "raw", "provenance"]);
|
|
60
|
+
|
|
61
|
+
function populatedDataFields(record: Record<string, unknown>): number {
|
|
62
|
+
return Object.entries(record).filter(
|
|
63
|
+
([field, value]) =>
|
|
64
|
+
!NON_DATA_FIELDS.has(field) && value !== undefined && value !== null && value !== "",
|
|
65
|
+
).length;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** True when id `a` sorts before id `b` — numeric when both ids are numeric. */
|
|
69
|
+
function idBefore(a: string, b: string): boolean {
|
|
70
|
+
const numericA = Number(a);
|
|
71
|
+
const numericB = Number(b);
|
|
72
|
+
if (Number.isFinite(numericA) && Number.isFinite(numericB) && numericA !== numericB) {
|
|
73
|
+
return numericA < numericB;
|
|
74
|
+
}
|
|
75
|
+
return a < b;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Normalize a record's identity key; undefined when the field is empty. */
|
|
79
|
+
export function dedupeKey(
|
|
80
|
+
record: Record<string, unknown>,
|
|
81
|
+
key: DedupeOptions["key"],
|
|
82
|
+
): string | undefined {
|
|
83
|
+
if (key === "domain") return normalizeDomain(record.domain as string | undefined);
|
|
84
|
+
const raw = record[key];
|
|
85
|
+
if (raw === undefined || raw === null) return undefined;
|
|
86
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
87
|
+
return normalized || undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function buildDedupePlan(
|
|
91
|
+
snapshot: CanonicalGtmSnapshot,
|
|
92
|
+
options: DedupeOptions,
|
|
93
|
+
): PatchPlan {
|
|
94
|
+
const keep = options.keep ?? "richest";
|
|
95
|
+
const maxOperations = options.maxOperations ?? 500;
|
|
96
|
+
if (!VALID_KEYS[options.objectType].includes(options.key)) {
|
|
97
|
+
throw new Error(
|
|
98
|
+
`Cannot dedupe ${COLLECTIONS[options.objectType]} by "${options.key}". Valid keys for ${options.objectType}s: ${VALID_KEYS[options.objectType].join(", ")}.`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
if (keep !== "richest" && keep !== "oldest") {
|
|
102
|
+
throw new Error(`--keep must be richest or oldest, got "${keep}".`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const records = snapshot[COLLECTIONS[options.objectType]] as Array<Record<string, unknown>>;
|
|
106
|
+
const groups = new Map<string, Array<Record<string, unknown>>>();
|
|
107
|
+
for (const record of records) {
|
|
108
|
+
const key = dedupeKey(record, options.key);
|
|
109
|
+
if (!key) continue; // records without the identity key cannot be duplicates by it
|
|
110
|
+
const existing = groups.get(key) ?? [];
|
|
111
|
+
existing.push(record);
|
|
112
|
+
groups.set(key, existing);
|
|
113
|
+
}
|
|
114
|
+
for (const [key, members] of Array.from(groups.entries())) {
|
|
115
|
+
if (members.length < 2) groups.delete(key);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (groups.size > maxOperations) {
|
|
119
|
+
throw new Error(
|
|
120
|
+
`Found ${groups.size} duplicate groups — above the ${maxOperations}-group safety cap. Raise --max-operations explicitly after reviewing the volume.`,
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const operations: PatchOperation[] = [];
|
|
125
|
+
let duplicateRecordCount = 0;
|
|
126
|
+
for (const [key, members] of groups) {
|
|
127
|
+
duplicateRecordCount += members.length;
|
|
128
|
+
// deterministic survivor: richest data first (ties to lowest id), or
|
|
129
|
+
// simply the lowest id when keeping the oldest
|
|
130
|
+
const survivor = [...members].sort((a, b) => {
|
|
131
|
+
if (keep === "richest") {
|
|
132
|
+
const richness = populatedDataFields(b) - populatedDataFields(a);
|
|
133
|
+
if (richness !== 0) return richness;
|
|
134
|
+
}
|
|
135
|
+
return idBefore(String(a.id), String(b.id)) ? -1 : 1;
|
|
136
|
+
})[0];
|
|
137
|
+
const groupIds = members
|
|
138
|
+
.map((member) => String(member.id))
|
|
139
|
+
.sort((a, b) => (idBefore(a, b) ? -1 : 1));
|
|
140
|
+
const survivorName =
|
|
141
|
+
typeof survivor.name === "string" && survivor.name
|
|
142
|
+
? survivor.name
|
|
143
|
+
: typeof survivor.email === "string" && survivor.email
|
|
144
|
+
? survivor.email
|
|
145
|
+
: String(survivor.id);
|
|
146
|
+
const keepDetail =
|
|
147
|
+
keep === "richest"
|
|
148
|
+
? `${populatedDataFields(survivor)} populated data fields, the most in the group (ties break to the lowest id)`
|
|
149
|
+
: "the lowest id in the group (oldest record)";
|
|
150
|
+
operations.push({
|
|
151
|
+
id: `op_${stableHash(`dedupe:${options.objectType}:${options.key}:${groupIds.join(",")}`)}`,
|
|
152
|
+
objectType: options.objectType as GtmObjectType,
|
|
153
|
+
objectId: String(survivor.id),
|
|
154
|
+
operation: "merge_records",
|
|
155
|
+
field: "merge",
|
|
156
|
+
beforeValue: groupIds,
|
|
157
|
+
afterValue: String(survivor.id),
|
|
158
|
+
reason:
|
|
159
|
+
options.reason ??
|
|
160
|
+
`${members.length} ${COLLECTIONS[options.objectType]} share ${options.key} "${key}". Merge into "${survivorName}" (${survivor.id}) — survivor has ${keepDetail}.`,
|
|
161
|
+
riskLevel: "high",
|
|
162
|
+
approvalRequired: true,
|
|
163
|
+
sourceRuleOrPolicy: "dedupe",
|
|
164
|
+
groupId: `grp_${options.objectType}_${String(survivor.id)}`,
|
|
165
|
+
rollback:
|
|
166
|
+
"IRREVERSIBLE: provider merges cannot be unmerged. The pre-apply snapshot retains every record's field values; recreate a record manually from it if a merge was wrong.",
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
id: `patch_plan_${stableHash(
|
|
172
|
+
`dedupe:${snapshot.provider}:${snapshot.generatedAt}:${options.objectType}:${options.key}:${keep}:${operations.length}`,
|
|
173
|
+
)}`,
|
|
174
|
+
title: `Dedupe: ${COLLECTIONS[options.objectType]} sharing the same ${options.key}`,
|
|
175
|
+
createdAt: snapshot.generatedAt,
|
|
176
|
+
status: operations.length > 0 ? "needs_approval" : "draft",
|
|
177
|
+
dryRun: true,
|
|
178
|
+
summary: `${groups.size} duplicate group(s) across ${duplicateRecordCount} ${COLLECTIONS[options.objectType]} (key: ${options.key}, keep: ${keep}); ${operations.length} proposed dry-run merge_records operation(s). Merges are IRREVERSIBLE — review each survivor before approving.`,
|
|
179
|
+
findings: [],
|
|
180
|
+
operations,
|
|
181
|
+
};
|
|
182
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
export { auditSnapshot, defaultPolicy } from "./audit.ts";
|
|
2
|
-
export { buildBulkUpdatePlan, parseWhere, type BulkUpdateOptions } from "./bulkUpdate.ts";
|
|
2
|
+
export { buildBulkUpdatePlan, isFilterableField, parseWhere, type BulkUpdateOptions } from "./bulkUpdate.ts";
|
|
3
|
+
export { buildDedupePlan, dedupeKey, type DedupeOptions } from "./dedupe.ts";
|
|
4
|
+
export { buildReassignPlans, type ReassignObjectType, type ReassignOptions } from "./reassign.ts";
|
|
3
5
|
export {
|
|
4
6
|
CONFIG_FILE_NAME,
|
|
5
7
|
loadConfig,
|