fullstackgtm 0.21.1 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/bulkUpdate.ts CHANGED
@@ -27,6 +27,7 @@
27
27
  * `account.ownerId`, `account.contactCount`; accounts get `contactCount`
28
28
  * and `openDealCount`.
29
29
  */
30
+ import { normalizeDomain } from "./merge.ts";
30
31
  import { stableHash } from "./rules.ts";
31
32
  import type {
32
33
  CanonicalGtmSnapshot,
@@ -40,10 +41,23 @@ export type BulkUpdateOptions = {
40
41
  objectType: "account" | "contact" | "deal";
41
42
  /** raw --where expressions, AND-ed together; at least one is required */
42
43
  where: string[];
43
- /** canonical field → new value; one action only */
44
+ /**
45
+ * canonical field → new value; one action only. A value of the form
46
+ * `from:<sourceField>` is resolved PER RECORD from the filter view at
47
+ * plan time (relational pseudo-fields like account.ownerId included);
48
+ * records whose source value is empty are skipped, not failed, and
49
+ * counted in the plan summary.
50
+ */
44
51
  set?: Record<string, string>;
45
52
  /** propose archive_record instead of field writes */
46
53
  archive?: boolean;
54
+ /**
55
+ * bypass the archive duplicate guard: by default --archive refuses when a
56
+ * matched account/contact shares its identity key (normalized domain /
57
+ * lowercased email) with another record — those are duplicates, and
58
+ * archiving a duplicate discards its data where merging preserves it
59
+ */
60
+ forceArchiveDuplicates?: boolean;
47
61
  /** propose create_task on each matched record with this subject/body text */
48
62
  createTask?: string;
49
63
  /** explicit preconditions (field=value), re-verified at apply time */
@@ -131,6 +145,14 @@ const VALID_FIELDS: Record<BulkUpdateOptions["objectType"], Set<string>> = {
131
145
  deal: new Set(["id", "crmId", "accountId", "ownerId", "name", "amount", "currency", "stage", "closeDate", "dealType", "forecastCategory", "nextStep", "probability", "isClosed", "isWon", "lastActivityAt", "lastSyncAt", ...RELATIONAL_FIELDS]),
132
146
  };
133
147
 
148
+ /** True when `field` is filterable for this object type (relational pseudo-fields included). */
149
+ export function isFilterableField(
150
+ objectType: BulkUpdateOptions["objectType"],
151
+ field: string,
152
+ ): boolean {
153
+ return VALID_FIELDS[objectType].has(field);
154
+ }
155
+
134
156
  function assertValidFields(objectType: BulkUpdateOptions["objectType"], clauses: WhereClause[], context: string): void {
135
157
  for (const clause of clauses) {
136
158
  if (!VALID_FIELDS[objectType].has(clause.field)) {
@@ -251,10 +273,25 @@ export function buildBulkUpdatePlan(
251
273
  const clauses = options.where.map(parseWhere);
252
274
  assertValidFields(options.objectType, clauses, "--where");
253
275
  const WRITABLE_BLOCKLIST = new Set(["id", "crmId", "contactCount", "openDealCount", "openDealStages"]);
254
- for (const field of Object.keys(options.set ?? {})) {
276
+ // `from:<sourceField>` values resolve per record from the filter view —
277
+ // the source is validated with the same strictness as filters (relational
278
+ // pseudo-fields allowed; the WRITTEN field still must be canonical).
279
+ const assignments: Array<{ field: string; literal?: string; fromField?: string }> = [];
280
+ for (const [field, value] of Object.entries(options.set ?? {})) {
255
281
  if (!VALID_FIELDS[options.objectType].has(field) || WRITABLE_BLOCKLIST.has(field) || field.includes(".")) {
256
282
  throw new Error(`Cannot --set "${field}" on ${options.objectType}s — not a writable canonical field.`);
257
283
  }
284
+ if (value.startsWith("from:")) {
285
+ const fromField = value.slice("from:".length);
286
+ if (!VALID_FIELDS[options.objectType].has(fromField)) {
287
+ throw new Error(
288
+ `Cannot --set ${field}=from:${fromField} on ${options.objectType}s — unknown source field "${fromField}". Valid fields: ${[...VALID_FIELDS[options.objectType]].join(", ")}.`,
289
+ );
290
+ }
291
+ assignments.push({ field, fromField });
292
+ } else {
293
+ assignments.push({ field, literal: value });
294
+ }
258
295
  }
259
296
  const views = buildViews(snapshot, options.objectType);
260
297
  const matched = views.filter(({ view }) => clauses.every((c) => matches(view, c)));
@@ -264,6 +301,45 @@ export function buildBulkUpdatePlan(
264
301
  );
265
302
  }
266
303
 
304
+ // Archive duplicate guard: archiving a record that shares its identity key
305
+ // with another active record discards data a merge would preserve. Refuse
306
+ // and point at `dedupe` unless explicitly overridden. Deals are exempt —
307
+ // they carry no identity key.
308
+ if (options.archive && options.objectType !== "deal" && !options.forceArchiveDuplicates) {
309
+ const keyName = options.objectType === "account" ? "domain" : "email";
310
+ const keyOf = (record: Record<string, unknown>): string | undefined =>
311
+ options.objectType === "account"
312
+ ? normalizeDomain(record.domain as string | undefined)
313
+ : ((record.email as string | undefined)?.trim().toLowerCase() || undefined);
314
+ const allRecords = snapshot[COLLECTIONS[options.objectType]] as Array<Record<string, unknown>>;
315
+ const byKey = new Map<string, Array<Record<string, unknown>>>();
316
+ for (const record of allRecords) {
317
+ const key = keyOf(record);
318
+ if (!key) continue;
319
+ const existing = byKey.get(key) ?? [];
320
+ existing.push(record);
321
+ byKey.set(key, existing);
322
+ }
323
+ const collisions: string[] = [];
324
+ for (const { record } of matched) {
325
+ const key = keyOf(record);
326
+ if (!key) continue;
327
+ const others = (byKey.get(key) ?? []).filter((other) => other.id !== record.id);
328
+ if (others.length === 0) continue;
329
+ const label = (record.name as string | undefined) ?? (record.email as string | undefined) ?? "";
330
+ collisions.push(
331
+ `${options.objectType} ${record.id}${label ? ` "${label}"` : ""} shares ${keyName} "${key}" with ${others
332
+ .map((other) => `${other.id}${other.name ? ` "${other.name}"` : ""}`)
333
+ .join(", ")}`,
334
+ );
335
+ }
336
+ if (collisions.length > 0) {
337
+ throw new Error(
338
+ `Refusing to archive: ${collisions.length} matched record(s) look like duplicates of other records — archiving a duplicate DISCARDS its data, merging preserves it. Use \`fullstackgtm dedupe ${options.objectType} --key ${keyName}\` (merge_records) instead, or pass --force-archive-duplicates to archive anyway.\n - ${collisions.join("\n - ")}`,
339
+ );
340
+ }
341
+ }
342
+
267
343
  // Preconditions: explicit --require, plus every equality filter on a real
268
344
  // (re-readable, non-relational) field. The premise the plan was built on
269
345
  // is re-verified per record at apply time.
@@ -295,7 +371,9 @@ export function buildBulkUpdatePlan(
295
371
  const reason = options.reason ?? `bulk-update: ${action} where ${whereText}`;
296
372
 
297
373
  const operations: PatchOperation[] = [];
298
- for (const { record } of matched) {
374
+ // records skipped because a from:<sourceField> value was empty, per source
375
+ const skippedBySource = new Map<string, number>();
376
+ for (const { record, view } of matched) {
299
377
  const objectId = String(record.id);
300
378
  const groupId = `grp_${options.objectType}_${objectId}`;
301
379
  const preconditions = preconditionSpecs.map((p) => ({
@@ -338,7 +416,29 @@ export function buildBulkUpdatePlan(
338
416
  });
339
417
  continue;
340
418
  }
341
- for (const [field, value] of Object.entries(options.set!)) {
419
+ // Resolve every assignment for this record BEFORE emitting any of its
420
+ // operations: a record whose from:<sourceField> resolves empty is
421
+ // skipped whole (its operations share a groupId — half a record's
422
+ // updates is exactly what grouping exists to prevent).
423
+ const resolved: Array<{ field: string; value: string }> = [];
424
+ let emptySource: string | null = null;
425
+ for (const assignment of assignments) {
426
+ if (assignment.fromField !== undefined) {
427
+ const value = fieldValue(view, assignment.fromField);
428
+ if (value === "") {
429
+ emptySource = assignment.fromField;
430
+ break;
431
+ }
432
+ resolved.push({ field: assignment.field, value });
433
+ } else {
434
+ resolved.push({ field: assignment.field, value: assignment.literal! });
435
+ }
436
+ }
437
+ if (emptySource !== null) {
438
+ skippedBySource.set(emptySource, (skippedBySource.get(emptySource) ?? 0) + 1);
439
+ continue;
440
+ }
441
+ for (const { field, value } of resolved) {
342
442
  operations.push({
343
443
  ...shared,
344
444
  id: `op_${stableHash(`bulk-set:${options.objectType}:${objectId}:${field}:${value}`)}`,
@@ -360,13 +460,16 @@ export function buildBulkUpdatePlan(
360
460
  if (failure) throw new Error(`${failure} The guard already fails against the current snapshot — the plan would never apply.`);
361
461
  }
362
462
 
463
+ const skippedText = [...skippedBySource.entries()]
464
+ .map(([sourceField, count]) => ` ${count} skipped: empty ${sourceField}.`)
465
+ .join("");
363
466
  return {
364
- id: `patch_plan_${stableHash(`bulk:${snapshot.provider}:${snapshot.generatedAt}:${whereText}:${action}:${operations.length}`)}`,
467
+ id: `patch_plan_${stableHash(`bulk:${snapshot.provider}:${snapshot.generatedAt}:${options.objectType}:${whereText}:${action}:${operations.length}`)}`,
365
468
  title: `Bulk update: ${options.objectType}s where ${whereText}`,
366
469
  createdAt: snapshot.generatedAt,
367
470
  status: operations.length > 0 ? "needs_approval" : "draft",
368
471
  dryRun: true,
369
- summary: `${matched.length} ${COLLECTIONS[options.objectType]} matched (${whereText}); ${operations.length} proposed dry-run operations (${action}).${guards.length > 0 ? ` ${guards.length} apply-time guard(s).` : ""}`,
472
+ summary: `${matched.length} ${COLLECTIONS[options.objectType]} matched (${whereText}); ${operations.length} proposed dry-run operations (${action}).${skippedText}${guards.length > 0 ? ` ${guards.length} apply-time guard(s).` : ""}`,
370
473
  findings: [],
371
474
  operations,
372
475
  filter: { objectType: options.objectType, where: options.where },
package/src/cli.ts CHANGED
@@ -75,6 +75,8 @@ import {
75
75
  } from "./llm.ts";
76
76
  import { resolveRecord, type ResolveCandidate } from "./resolve.ts";
77
77
  import { buildBulkUpdatePlan } from "./bulkUpdate.ts";
78
+ import { buildDedupePlan, type DedupeOptions } from "./dedupe.ts";
79
+ import { buildReassignPlans, type ReassignObjectType } from "./reassign.ts";
78
80
  import { suggestValues, type ValueSuggestion } from "./suggest.ts";
79
81
  import type { FieldMappings } from "./mappings.ts";
80
82
  import type {
@@ -135,7 +137,7 @@ Usage:
135
137
  against the stored capture it cites before it's accepted — then
136
138
  compute deterministic front states and drift, render the field
137
139
  report. refresh = capture → classify → drift → report in one step
138
- fullstackgtm bulk-update <account|contact|deal> --where <expr> [--where …] (--set <field>=<value> [--set …] | --archive | --create-task <text>) [--require <field>=<value> …] [--guard <object>:<where>[;<where>]:<none|some> …] [source options] [--save] [--json] [--out <path>]
140
+ fullstackgtm bulk-update <account|contact|deal> --where <expr> [--where …] (--set <field>=<value> [--set …] | --archive [--force-archive-duplicates] | --create-task <text>) [--require <field>=<value> …] [--guard <object>:<where>[;<where>]:<none|some> …] [source options] [--save] [--json] [--out <path>]
139
141
  governed generic writes: filter the snapshot
140
142
  (field=value, field!=value, field~substr, field!~substr,
141
143
  field:empty, field:notempty, '|' = any-of; canonical fields
@@ -147,6 +149,35 @@ Usage:
147
149
  apply time (incl. mid-apply rechecks); equality filters
148
150
  double as preconditions; per-record ops apply
149
151
  all-or-nothing; guards assert cross-record conditions.
152
+ --set <field>=from:<sourceField> derives the value PER
153
+ RECORD from the snapshot (relational sources like
154
+ account.ownerId included); records whose source is empty
155
+ are skipped and counted, never guessed. --archive refuses
156
+ records that share their identity key (account domain /
157
+ contact email) with another record — merge those with
158
+ \`dedupe\` instead, or --force-archive-duplicates.
159
+ fullstackgtm dedupe <account|contact|deal> --key <domain|email|name> [--keep richest|oldest] [source options] [--reason <text>] [--max-operations <n>] [--save] [--json] [--out <path>]
160
+ find duplicate groups by normalized identity key and build
161
+ a dry-run plan of merge_records operations — one per group,
162
+ deterministic survivor (richest = most populated data
163
+ fields, ties to lowest id; oldest = lowest id). Approve and
164
+ apply like any plan; merges are IRREVERSIBLE on apply.
165
+ fullstackgtm reassign --from <ownerId> --to <ownerId> [--objects account,contact,deal] [--where <expr> …] [--except-deal-stage <stage>] [--include-closed-deals] [source options] [--save] [--json] [--out <path>]
166
+ ownership handoff playbook: one bulk-update-style plan per
167
+ object type (ownerId=<from> → <to>). Extra --where scoping
168
+ is account-lifted for deals/contacts (domain~.de becomes
169
+ account.domain~.de); --except-deal-stage <stage> excludes
170
+ deals in that stage AND every record whose account has an
171
+ open deal in it, re-verified per record at apply time.
172
+ Deal plans cover open deals only unless
173
+ --include-closed-deals.
174
+ fullstackgtm fix --rule <ruleId> --provider <name> [--min-confidence high|low] [--include-creates] [--today <iso>] [--yes]
175
+ one-shot composite: audit ONE rule → save the plan →
176
+ suggest values → approve only suggestion-backed operations
177
+ meeting the confidence bar (plus operations that need no
178
+ value) → with --yes, apply through the provider and print a
179
+ stage-by-stage summary. Without --yes it stops after
180
+ approval and prints the apply command.
150
181
  fullstackgtm suggest --plan-id <id> | --plan <path> [source options] [--json] [--out <path>]
151
182
  derive values for requires_human_* placeholders
152
183
  from snapshot evidence, with confidence + reasons
@@ -1257,29 +1288,213 @@ async function bulkUpdateCommand(args: string[]) {
1257
1288
  where,
1258
1289
  set: Object.keys(set).length > 0 ? set : undefined,
1259
1290
  archive: rest.includes("--archive"),
1291
+ forceArchiveDuplicates: rest.includes("--force-archive-duplicates"),
1260
1292
  createTask: option(rest, "--create-task") ?? undefined,
1261
1293
  require: repeatedOption(rest, "--require"),
1262
1294
  guard: repeatedOption(rest, "--guard"),
1263
1295
  reason: option(rest, "--reason") ?? undefined,
1264
1296
  maxOperations: numericOption(rest, "--max-operations"),
1265
1297
  });
1266
- const out = option(rest, "--out");
1298
+ await emitPlan(plan, rest);
1299
+ }
1300
+
1301
+ /** Shared plan output plumbing: --out, --save (with the approve/apply hint), --json or markdown. */
1302
+ async function emitPlan(plan: PatchPlan, args: string[]) {
1303
+ const out = option(args, "--out");
1267
1304
  if (out) {
1268
1305
  writeFileSync(resolve(process.cwd(), out), `${JSON.stringify(plan, null, 2)}\n`);
1269
1306
  }
1270
- if (rest.includes("--save")) {
1307
+ if (args.includes("--save")) {
1271
1308
  await createFilePlanStore().save(plan);
1272
1309
  console.error(
1273
1310
  `Saved plan ${plan.id} (${plan.operations.length} operations). Review with \`fullstackgtm plans show ${plan.id}\`, approve with \`fullstackgtm plans approve ${plan.id} --operations <ids|all>\`, then \`fullstackgtm apply --plan-id ${plan.id} --provider <name>\`.`,
1274
1311
  );
1275
1312
  }
1276
- if (rest.includes("--json")) {
1313
+ if (args.includes("--json")) {
1277
1314
  console.log(JSON.stringify(plan, null, 2));
1278
1315
  } else {
1279
1316
  console.log(patchPlanToMarkdown(plan));
1280
1317
  }
1281
1318
  }
1282
1319
 
1320
+ /**
1321
+ * Governed duplicate cleanup: group by a normalized identity key, propose one
1322
+ * merge_records per duplicate group with a deterministic survivor. Never
1323
+ * writes — approve and apply the plan like any audit plan.
1324
+ */
1325
+ async function dedupeCommand(args: string[]) {
1326
+ const [objectType, ...rest] = args;
1327
+ if (!objectType || !["account", "contact", "deal"].includes(objectType)) {
1328
+ throw new Error(
1329
+ "Usage: fullstackgtm dedupe <account|contact|deal> --key <domain|email|name> [--keep richest|oldest] [source options] [--reason <text>] [--max-operations <n>] [--save] [--out <path>] [--json]",
1330
+ );
1331
+ }
1332
+ const key = option(rest, "--key");
1333
+ if (!key || !["domain", "email", "name"].includes(key)) {
1334
+ throw new Error("dedupe requires --key <domain|email|name> (the identity field duplicates share).");
1335
+ }
1336
+ const keep = option(rest, "--keep") ?? undefined;
1337
+ const snapshot = await readSnapshot(rest);
1338
+ const plan = buildDedupePlan(snapshot, {
1339
+ objectType: objectType as DedupeOptions["objectType"],
1340
+ key: key as DedupeOptions["key"],
1341
+ keep: (keep ?? undefined) as DedupeOptions["keep"],
1342
+ reason: option(rest, "--reason") ?? undefined,
1343
+ maxOperations: numericOption(rest, "--max-operations"),
1344
+ });
1345
+ await emitPlan(plan, rest);
1346
+ }
1347
+
1348
+ /**
1349
+ * Ownership handoff playbook: compile one bulk-update-style plan per object
1350
+ * type. Each plan carries its full filter, so eligibility (including the
1351
+ * --except-deal-stage exclusion) is re-verified per record at apply time.
1352
+ */
1353
+ async function reassignCommand(args: string[]) {
1354
+ const from = option(args, "--from");
1355
+ const to = option(args, "--to");
1356
+ if (!from || !to) {
1357
+ throw new Error(
1358
+ "Usage: fullstackgtm reassign --from <ownerId> --to <ownerId> [--objects account,contact,deal] [--where <expr> …] [--except-deal-stage <stage>] [--include-closed-deals] [source options] [--reason <text>] [--max-operations <n>] [--save] [--out <path>] [--json]",
1359
+ );
1360
+ }
1361
+ const objects = option(args, "--objects")
1362
+ ?.split(",")
1363
+ .map((value) => value.trim())
1364
+ .filter(Boolean) as ReassignObjectType[] | undefined;
1365
+ const snapshot = await readSnapshot(args);
1366
+ const plans = buildReassignPlans(snapshot, {
1367
+ fromOwnerId: from,
1368
+ toOwnerId: to,
1369
+ objects,
1370
+ where: repeatedOption(args, "--where"),
1371
+ exceptDealStage: option(args, "--except-deal-stage") ?? undefined,
1372
+ includeClosedDeals: args.includes("--include-closed-deals"),
1373
+ reason: option(args, "--reason") ?? undefined,
1374
+ maxOperations: numericOption(args, "--max-operations"),
1375
+ });
1376
+ const out = option(args, "--out");
1377
+ if (out) {
1378
+ writeFileSync(resolve(process.cwd(), out), `${JSON.stringify(plans, null, 2)}\n`);
1379
+ }
1380
+ if (args.includes("--json")) {
1381
+ console.log(JSON.stringify(plans, null, 2));
1382
+ return;
1383
+ }
1384
+ const store = args.includes("--save") ? createFilePlanStore() : null;
1385
+ for (const plan of plans) {
1386
+ if (store) await store.save(plan);
1387
+ console.log(`${plan.id} ${String(plan.operations.length).padStart(3)} operation(s) ${plan.title}`);
1388
+ console.log(` ${plan.summary}`);
1389
+ }
1390
+ if (store) {
1391
+ console.log(
1392
+ `\nSaved ${plans.length} plan(s). For each: \`fullstackgtm plans show <id>\`, \`fullstackgtm plans approve <id> --operations <ids|all>\`, then \`fullstackgtm apply --plan-id <id> --provider <name>\`.`,
1393
+ );
1394
+ } else {
1395
+ console.log("\nDry run only — re-run with --save to store the plans for approval.");
1396
+ }
1397
+ }
1398
+
1399
+ /**
1400
+ * One-shot composite for a single audit rule: audit → save → suggest →
1401
+ * approve only suggestion-backed operations meeting the confidence bar (plus
1402
+ * operations that carry concrete values and need no human input) → apply
1403
+ * (only with --yes). Every stage goes through the same gates as the manual
1404
+ * chain; placeholder values below the bar stay unapproved.
1405
+ */
1406
+ async function fixCommand(args: string[]) {
1407
+ const ruleId = option(args, "--rule");
1408
+ const provider = option(args, "--provider");
1409
+ if (!ruleId || !provider) {
1410
+ throw new Error(
1411
+ "Usage: fullstackgtm fix --rule <ruleId> --provider <name> [--min-confidence high|low] [--include-creates] [--today <iso>] [--yes]",
1412
+ );
1413
+ }
1414
+ const minConfidence = option(args, "--min-confidence") ?? "high";
1415
+ if (!["high", "low"].includes(minConfidence)) {
1416
+ throw new Error("--min-confidence must be high or low");
1417
+ }
1418
+ const includeCreates = args.includes("--include-creates");
1419
+
1420
+ const loaded = loadConfig(option(args, "--config") ?? undefined);
1421
+ const configured = await resolveConfiguredRules(loaded);
1422
+ const rule = configured.find((candidate) => candidate.id === ruleId);
1423
+ if (!rule) {
1424
+ throw new Error(`Unknown rule: ${ruleId}. Available rules: ${configured.map((r) => r.id).join(", ")}`);
1425
+ }
1426
+ const policy = mergePolicy(defaultPolicy(), loaded?.config);
1427
+ const today = option(args, "--today");
1428
+ if (today) policy.today = today;
1429
+
1430
+ const snapshot = await readSnapshot(args);
1431
+ const plan = auditSnapshot(snapshot, policy, [rule]);
1432
+ if (plan.operations.length === 0) {
1433
+ console.log(`fix ${ruleId}: audit proposed 0 operations — nothing to fix.`);
1434
+ return;
1435
+ }
1436
+ const store = createFilePlanStore();
1437
+ await store.save(plan);
1438
+
1439
+ const suggestions = suggestValues(plan, snapshot);
1440
+ const accepted = new Set(minConfidence === "low" ? ["high", "low"] : ["high"]);
1441
+ const overrides: Record<string, string> = {};
1442
+ let belowBar = 0;
1443
+ for (const suggestion of suggestions) {
1444
+ if (
1445
+ suggestion.suggestedValue &&
1446
+ (accepted.has(suggestion.confidence) || (includeCreates && suggestion.confidence === "create"))
1447
+ ) {
1448
+ overrides[suggestion.operationId] = suggestion.suggestedValue;
1449
+ } else {
1450
+ belowBar += 1;
1451
+ }
1452
+ }
1453
+ // Approve operations whose placeholder got a qualifying suggested value,
1454
+ // plus operations that already carry a concrete value (no human input
1455
+ // needed — nothing to guess). Everything else stays unapproved.
1456
+ const placeholderIds = new Set(suggestions.map((suggestion) => suggestion.operationId));
1457
+ const approvedIds = plan.operations
1458
+ .map((operation) => operation.id)
1459
+ .filter((id) => overrides[id] !== undefined || !placeholderIds.has(id));
1460
+
1461
+ const lines = [
1462
+ `fix ${ruleId} via ${provider}:`,
1463
+ ` proposed: ${plan.operations.length} operation(s) — plan ${plan.id} (saved)`,
1464
+ ` suggested: ${Object.keys(overrides).length} value(s) at ${minConfidence}+ confidence${includeCreates ? " (creates included)" : ""}${belowBar > 0 ? `; ${belowBar} below the bar (left unapproved)` : ""}`,
1465
+ ` approved: ${approvedIds.length} of ${plan.operations.length}`,
1466
+ ];
1467
+ if (approvedIds.length === 0) {
1468
+ lines.push(" applied: 0 — no operation met the confidence bar");
1469
+ console.log(lines.join("\n"));
1470
+ console.log(
1471
+ `\nWiden with --min-confidence low / --include-creates, or approve manually: \`fullstackgtm plans approve ${plan.id} --operations <ids> --value <opId>=<value>\`.`,
1472
+ );
1473
+ return;
1474
+ }
1475
+ await store.approveOperations(plan.id, approvedIds, overrides);
1476
+
1477
+ if (!args.includes("--yes")) {
1478
+ lines.push(" applied: 0 (stopped before apply — pass --yes to write)");
1479
+ console.log(lines.join("\n"));
1480
+ console.log(`\nApply with:\n fullstackgtm apply --plan-id ${plan.id} --provider ${provider}`);
1481
+ return;
1482
+ }
1483
+ const connector = await connectorFor(provider, args);
1484
+ const run = await applyPatchPlan(connector, plan, {
1485
+ approvedOperationIds: approvedIds,
1486
+ valueOverrides: overrides,
1487
+ });
1488
+ await store.recordRun(plan.id, run);
1489
+ const counts: Record<string, number> = { applied: 0, conflict: 0, skipped: 0, failed: 0 };
1490
+ for (const result of run.results) counts[result.status] = (counts[result.status] ?? 0) + 1;
1491
+ lines.push(
1492
+ ` applied: ${counts.applied} · conflicts: ${counts.conflict} · skipped: ${counts.skipped} · failed: ${counts.failed}`,
1493
+ );
1494
+ console.log(lines.join("\n"));
1495
+ if (run.status === "failed") process.exitCode = 1;
1496
+ }
1497
+
1283
1498
  async function suggest(args: string[]) {
1284
1499
  const planId = option(args, "--plan-id");
1285
1500
  const planPath = option(args, "--plan");
@@ -2124,6 +2339,18 @@ export async function runCli(argv: string[]) {
2124
2339
  await bulkUpdateCommand(args);
2125
2340
  return;
2126
2341
  }
2342
+ if (command === "dedupe") {
2343
+ await dedupeCommand(args);
2344
+ return;
2345
+ }
2346
+ if (command === "reassign") {
2347
+ await reassignCommand(args);
2348
+ return;
2349
+ }
2350
+ if (command === "fix") {
2351
+ await fixCommand(args);
2352
+ return;
2353
+ }
2127
2354
  if (command === "market") {
2128
2355
  await marketCommand(args);
2129
2356
  return;
package/src/dedupe.ts ADDED
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Governed duplicate cleanup: `dedupe` groups records by a normalized
3
+ * identity key (account domain, contact email, or name) and builds a
4
+ * dry-run PatchPlan of merge_records operations — one per duplicate group,
5
+ * with a DETERMINISTIC survivor. It NEVER writes — the plan flows through
6
+ * the same plans-approve → apply gate as every other plan.
7
+ *
8
+ * The merge contract matches the connectors (see mergeRecords in
9
+ * connectors/hubspot.ts): afterValue = the survivor id, beforeValue = the
10
+ * ids of EVERY record in the group (survivor included). Merges are
11
+ * IRREVERSIBLE on every provider that supports them, so every operation is
12
+ * riskLevel high and approvalRequired.
13
+ *
14
+ * Survivor selection ("--keep"):
15
+ * richest (default) the record with the most non-empty canonical data
16
+ * fields (bookkeeping fields like id/crmId/identities
17
+ * don't count); ties break to the lowest numeric id
18
+ * oldest the lowest numeric id (CRMs assign ids in creation
19
+ * order)
20
+ */
21
+ import { normalizeDomain } from "./merge.ts";
22
+ import { stableHash } from "./rules.ts";
23
+ import type {
24
+ CanonicalGtmSnapshot,
25
+ GtmObjectType,
26
+ PatchOperation,
27
+ PatchPlan,
28
+ } from "./types.ts";
29
+
30
+ export type DedupeOptions = {
31
+ objectType: "account" | "contact" | "deal";
32
+ /** identity key records are grouped by (normalized before grouping) */
33
+ key: "domain" | "email" | "name";
34
+ /** survivor selection — deterministic either way (default "richest") */
35
+ keep?: "richest" | "oldest";
36
+ reason?: string;
37
+ /** refuse to build plans larger than this (default 500 operations) */
38
+ maxOperations?: number;
39
+ };
40
+
41
+ const COLLECTIONS: Record<DedupeOptions["objectType"], "accounts" | "contacts" | "deals"> = {
42
+ account: "accounts",
43
+ contact: "contacts",
44
+ deal: "deals",
45
+ };
46
+
47
+ /** Which identity keys make sense per object type. */
48
+ const VALID_KEYS: Record<DedupeOptions["objectType"], Array<DedupeOptions["key"]>> = {
49
+ account: ["domain", "name"],
50
+ contact: ["email", "name"],
51
+ deal: ["name"],
52
+ };
53
+
54
+ /**
55
+ * Bookkeeping fields excluded from the richness count: they are populated
56
+ * (or not) by the sync machinery, not by the quality of the record's data,
57
+ * so counting them would let plumbing decide which record survives a merge.
58
+ */
59
+ const NON_DATA_FIELDS = new Set(["id", "provider", "crmId", "identities", "raw", "provenance"]);
60
+
61
+ function populatedDataFields(record: Record<string, unknown>): number {
62
+ return Object.entries(record).filter(
63
+ ([field, value]) =>
64
+ !NON_DATA_FIELDS.has(field) && value !== undefined && value !== null && value !== "",
65
+ ).length;
66
+ }
67
+
68
+ /** True when id `a` sorts before id `b` — numeric when both ids are numeric. */
69
+ function idBefore(a: string, b: string): boolean {
70
+ const numericA = Number(a);
71
+ const numericB = Number(b);
72
+ if (Number.isFinite(numericA) && Number.isFinite(numericB) && numericA !== numericB) {
73
+ return numericA < numericB;
74
+ }
75
+ return a < b;
76
+ }
77
+
78
+ /** Normalize a record's identity key; undefined when the field is empty. */
79
+ export function dedupeKey(
80
+ record: Record<string, unknown>,
81
+ key: DedupeOptions["key"],
82
+ ): string | undefined {
83
+ if (key === "domain") return normalizeDomain(record.domain as string | undefined);
84
+ const raw = record[key];
85
+ if (raw === undefined || raw === null) return undefined;
86
+ const normalized = String(raw).trim().toLowerCase();
87
+ return normalized || undefined;
88
+ }
89
+
90
+ export function buildDedupePlan(
91
+ snapshot: CanonicalGtmSnapshot,
92
+ options: DedupeOptions,
93
+ ): PatchPlan {
94
+ const keep = options.keep ?? "richest";
95
+ const maxOperations = options.maxOperations ?? 500;
96
+ if (!VALID_KEYS[options.objectType].includes(options.key)) {
97
+ throw new Error(
98
+ `Cannot dedupe ${COLLECTIONS[options.objectType]} by "${options.key}". Valid keys for ${options.objectType}s: ${VALID_KEYS[options.objectType].join(", ")}.`,
99
+ );
100
+ }
101
+ if (keep !== "richest" && keep !== "oldest") {
102
+ throw new Error(`--keep must be richest or oldest, got "${keep}".`);
103
+ }
104
+
105
+ const records = snapshot[COLLECTIONS[options.objectType]] as Array<Record<string, unknown>>;
106
+ const groups = new Map<string, Array<Record<string, unknown>>>();
107
+ for (const record of records) {
108
+ const key = dedupeKey(record, options.key);
109
+ if (!key) continue; // records without the identity key cannot be duplicates by it
110
+ const existing = groups.get(key) ?? [];
111
+ existing.push(record);
112
+ groups.set(key, existing);
113
+ }
114
+ for (const [key, members] of Array.from(groups.entries())) {
115
+ if (members.length < 2) groups.delete(key);
116
+ }
117
+
118
+ if (groups.size > maxOperations) {
119
+ throw new Error(
120
+ `Found ${groups.size} duplicate groups — above the ${maxOperations}-group safety cap. Raise --max-operations explicitly after reviewing the volume.`,
121
+ );
122
+ }
123
+
124
+ const operations: PatchOperation[] = [];
125
+ let duplicateRecordCount = 0;
126
+ for (const [key, members] of groups) {
127
+ duplicateRecordCount += members.length;
128
+ // deterministic survivor: richest data first (ties to lowest id), or
129
+ // simply the lowest id when keeping the oldest
130
+ const survivor = [...members].sort((a, b) => {
131
+ if (keep === "richest") {
132
+ const richness = populatedDataFields(b) - populatedDataFields(a);
133
+ if (richness !== 0) return richness;
134
+ }
135
+ return idBefore(String(a.id), String(b.id)) ? -1 : 1;
136
+ })[0];
137
+ const groupIds = members
138
+ .map((member) => String(member.id))
139
+ .sort((a, b) => (idBefore(a, b) ? -1 : 1));
140
+ const survivorName =
141
+ typeof survivor.name === "string" && survivor.name
142
+ ? survivor.name
143
+ : typeof survivor.email === "string" && survivor.email
144
+ ? survivor.email
145
+ : String(survivor.id);
146
+ const keepDetail =
147
+ keep === "richest"
148
+ ? `${populatedDataFields(survivor)} populated data fields, the most in the group (ties break to the lowest id)`
149
+ : "the lowest id in the group (oldest record)";
150
+ operations.push({
151
+ id: `op_${stableHash(`dedupe:${options.objectType}:${options.key}:${groupIds.join(",")}`)}`,
152
+ objectType: options.objectType as GtmObjectType,
153
+ objectId: String(survivor.id),
154
+ operation: "merge_records",
155
+ field: "merge",
156
+ beforeValue: groupIds,
157
+ afterValue: String(survivor.id),
158
+ reason:
159
+ options.reason ??
160
+ `${members.length} ${COLLECTIONS[options.objectType]} share ${options.key} "${key}". Merge into "${survivorName}" (${survivor.id}) — survivor has ${keepDetail}.`,
161
+ riskLevel: "high",
162
+ approvalRequired: true,
163
+ sourceRuleOrPolicy: "dedupe",
164
+ groupId: `grp_${options.objectType}_${String(survivor.id)}`,
165
+ rollback:
166
+ "IRREVERSIBLE: provider merges cannot be unmerged. The pre-apply snapshot retains every record's field values; recreate a record manually from it if a merge was wrong.",
167
+ });
168
+ }
169
+
170
+ return {
171
+ id: `patch_plan_${stableHash(
172
+ `dedupe:${snapshot.provider}:${snapshot.generatedAt}:${options.objectType}:${options.key}:${keep}:${operations.length}`,
173
+ )}`,
174
+ title: `Dedupe: ${COLLECTIONS[options.objectType]} sharing the same ${options.key}`,
175
+ createdAt: snapshot.generatedAt,
176
+ status: operations.length > 0 ? "needs_approval" : "draft",
177
+ dryRun: true,
178
+ summary: `${groups.size} duplicate group(s) across ${duplicateRecordCount} ${COLLECTIONS[options.objectType]} (key: ${options.key}, keep: ${keep}); ${operations.length} proposed dry-run merge_records operation(s). Merges are IRREVERSIBLE — review each survivor before approving.`,
179
+ findings: [],
180
+ operations,
181
+ };
182
+ }
package/src/index.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  export { auditSnapshot, defaultPolicy } from "./audit.ts";
2
- export { buildBulkUpdatePlan, parseWhere, type BulkUpdateOptions } from "./bulkUpdate.ts";
2
+ export { buildBulkUpdatePlan, isFilterableField, parseWhere, type BulkUpdateOptions } from "./bulkUpdate.ts";
3
+ export { buildDedupePlan, dedupeKey, type DedupeOptions } from "./dedupe.ts";
4
+ export { buildReassignPlans, type ReassignObjectType, type ReassignOptions } from "./reassign.ts";
3
5
  export {
4
6
  CONFIG_FILE_NAME,
5
7
  loadConfig,