fullstackgtm 0.21.2 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli.ts CHANGED
@@ -73,8 +73,38 @@ import {
73
73
  type CallScorecard,
74
74
  type LlmProvider,
75
75
  } from "./llm.ts";
76
+ import {
77
+ buildEnrichPlan,
78
+ createFileEnrichRunStore,
79
+ DEFAULT_STALE_DAYS,
80
+ ENRICH_CONFIG_FILE_NAME,
81
+ enrichRunId,
82
+ inferIngestObjectType,
83
+ latestStamps,
84
+ loadEnrichConfig,
85
+ parseCsv,
86
+ resolveCrmField,
87
+ selectStaleWork,
88
+ stagedSourceRecords,
89
+ staleDaysFor,
90
+ type EnrichConfig,
91
+ type EnrichCounts,
92
+ type EnrichObjectType,
93
+ type EnrichRun,
94
+ type EnrichRunStore,
95
+ type EnrichSourceRecord,
96
+ } from "./enrich.ts";
97
+ import {
98
+ apolloPullKeysForAppend,
99
+ apolloPullKeysForRefresh,
100
+ createApolloClient,
101
+ pullApolloRecords,
102
+ type ApolloPullKey,
103
+ } from "./enrichApollo.ts";
76
104
  import { resolveRecord, type ResolveCandidate } from "./resolve.ts";
77
105
  import { buildBulkUpdatePlan } from "./bulkUpdate.ts";
106
+ import { buildDedupePlan, type DedupeOptions } from "./dedupe.ts";
107
+ import { buildReassignPlans, type ReassignObjectType } from "./reassign.ts";
78
108
  import { suggestValues, type ValueSuggestion } from "./suggest.ts";
79
109
  import type { FieldMappings } from "./mappings.ts";
80
110
  import type {
@@ -95,7 +125,8 @@ Usage:
95
125
  fullstackgtm login salesforce --device --client-id <consumer key> [--login-url <url>]
96
126
  fullstackgtm login salesforce --instance-url <url> [--no-validate]
97
127
  fullstackgtm login stripe [--no-validate]
98
- fullstackgtm login anthropic | openai store an LLM API key for call parse/score\n fullstackgtm logout <hubspot|salesforce|stripe|anthropic|openai|broker>
128
+ fullstackgtm login anthropic | openai store an LLM API key for call parse/score
129
+ fullstackgtm login apollo store an Apollo API key for enrich pulls\n fullstackgtm logout <hubspot|salesforce|stripe|anthropic|openai|apollo|broker>
99
130
 
100
131
  Secrets (tokens, client secrets) are NEVER passed as flags — they leak via
101
132
  the process list and shell history. Pipe them on stdin or enter them at the
@@ -135,7 +166,16 @@ Usage:
135
166
  against the stored capture it cites before it's accepted — then
136
167
  compute deterministic front states and drift, render the field
137
168
  report. refresh = capture → classify → drift → report in one step
138
- fullstackgtm bulk-update <account|contact|deal> --where <expr> [--where ] (--set <field>=<value> [--set ] | --archive | --create-task <text>) [--require <field>=<value> …] [--guard <object>:<where>[;<where>]:<none|some> …] [source options] [--save] [--json] [--out <path>]
169
+ fullstackgtm enrich append [--source apollo] [--objects companies,contacts] [--save] [--config <path>] [source options]
170
+ fullstackgtm enrich refresh [--source apollo] [--stale-days <n>] [--save] [--config <path>] [source options]
171
+ fullstackgtm enrich ingest <file.csv|payload.json> --source clay [--run-label <label>]
172
+ fullstackgtm enrich status [--runs] [--source <id>] [--json]
173
+ governed enrichment: pull (Apollo) or stage (Clay) third-party
174
+ data, match it to CRM records deterministically, and emit a
175
+ fill-blanks-only patch plan through the normal dry-run →
176
+ approve → apply gate. refresh re-checks stale stamped fields
177
+ and proposes updates only where the source value changed.
178
+ fullstackgtm bulk-update <account|contact|deal> --where <expr> [--where …] (--set <field>=<value> [--set …] | --archive [--force-archive-duplicates] | --create-task <text>) [--require <field>=<value> …] [--guard <object>:<where>[;<where>]:<none|some> …] [source options] [--save] [--json] [--out <path>]
139
179
  governed generic writes: filter the snapshot
140
180
  (field=value, field!=value, field~substr, field!~substr,
141
181
  field:empty, field:notempty, '|' = any-of; canonical fields
@@ -147,6 +187,35 @@ Usage:
147
187
  apply time (incl. mid-apply rechecks); equality filters
148
188
  double as preconditions; per-record ops apply
149
189
  all-or-nothing; guards assert cross-record conditions.
190
+ --set <field>=from:<sourceField> derives the value PER
191
+ RECORD from the snapshot (relational sources like
192
+ account.ownerId included); records whose source is empty
193
+ are skipped and counted, never guessed. --archive refuses
194
+ records that share their identity key (account domain /
195
+ contact email) with another record — merge those with
196
+ \`dedupe\` instead, or --force-archive-duplicates.
197
+ fullstackgtm dedupe <account|contact|deal> --key <domain|email|name> [--keep richest|oldest] [source options] [--reason <text>] [--max-operations <n>] [--save] [--json] [--out <path>]
198
+ find duplicate groups by normalized identity key and build
199
+ a dry-run plan of merge_records operations — one per group,
200
+ deterministic survivor (richest = most populated data
201
+ fields, ties to lowest id; oldest = lowest id). Approve and
202
+ apply like any plan; merges are IRREVERSIBLE on apply.
203
+ fullstackgtm reassign --from <ownerId> --to <ownerId> [--objects account,contact,deal] [--where <expr> …] [--except-deal-stage <stage>] [--include-closed-deals] [source options] [--save] [--json] [--out <path>]
204
+ ownership handoff playbook: one bulk-update-style plan per
205
+ object type (ownerId=<from> → <to>). Extra --where scoping
206
+ is account-lifted for deals/contacts (domain~.de becomes
207
+ account.domain~.de); --except-deal-stage <stage> excludes
208
+ deals in that stage AND every record whose account has an
209
+ open deal in it, re-verified per record at apply time.
210
+ Deal plans cover open deals only unless
211
+ --include-closed-deals.
212
+ fullstackgtm fix --rule <ruleId> --provider <name> [--min-confidence high|low] [--include-creates] [--today <iso>] [--yes]
213
+ one-shot composite: audit ONE rule → save the plan →
214
+ suggest values → approve only suggestion-backed operations
215
+ meeting the confidence bar (plus operations that need no
216
+ value) → with --yes, apply through the provider and print a
217
+ stage-by-stage summary. Without --yes it stops after
218
+ approval and prints the apply command.
150
219
  fullstackgtm suggest --plan-id <id> | --plan <path> [source options] [--json] [--out <path>]
151
220
  derive values for requires_human_* placeholders
152
221
  from snapshot evidence, with confidence + reasons
@@ -1201,6 +1270,467 @@ recomputed deterministically on every invocation — never stored.`);
1201
1270
  );
1202
1271
  }
1203
1272
 
1273
+ /**
1274
+ * The enrich layer: governed append/refresh of third-party data (Apollo pull,
1275
+ * Clay ingest) into the CRM through the normal dry-run → approval → apply
1276
+ * contract. State lives in the profile-scoped run store (checkpoint,
1277
+ * staleness ledger, observability in one); scheduling belongs to the
1278
+ * horizontal scheduler — enrich owns no cron logic.
1279
+ */
1280
+ async function enrichCommand(args: string[]) {
1281
+ const [subcommand, ...rest] = args;
1282
+
1283
+ // Catch --help BEFORE config load, credential resolution, or any network
1284
+ // call (the 0.14.1/0.18 bug class — `enrich append --help` executing a
1285
+ // paid Apollo pull would be its worst recurrence).
1286
+ if (!subcommand || subcommand === "--help" || subcommand === "-h" || rest.includes("--help") || rest.includes("-h")) {
1287
+ console.log(`Usage:
1288
+ enrich append [--source apollo] [--objects companies,contacts] [--save] [--config <path>]
1289
+ [source options] [--run-label <label>] [--json]
1290
+ enrich refresh [--source apollo] [--stale-days <n>] [--save] [--config <path>]
1291
+ [source options] [--run-label <label>] [--json]
1292
+ enrich ingest <file.csv|payload.json> --source clay [--run-label <label>] [--objects companies|contacts] [--config <path>]
1293
+ enrich status [--runs] [--source <id>] [--config <path>] [--json]
1294
+
1295
+ append pulls from an api source (Apollo — BYO key via \`login apollo\` or
1296
+ APOLLO_API_KEY) or reads data staged by \`enrich ingest\` (Clay CSV exports,
1297
+ webhook payload JSON), matches source records to CRM records via the ordered
1298
+ match keys in enrich.config.json (unique hit wins; zero hits falls through to
1299
+ the next key; multiple hits skip or flow into the suggest chain, per
1300
+ onAmbiguous), and emits a fill-blanks-only patch plan. Without --save it
1301
+ prints the dry-run diff and writes NOTHING; with --save the plan lands in the
1302
+ plan store as needs_approval and the run (counts, per-field enrichedAt stamps,
1303
+ resume cursor) lands in the profile's enrich run store. From there the normal
1304
+ chain takes over: plans approve → apply.
1305
+
1306
+ refresh computes its work set from the run-store stamps — fields enrich
1307
+ itself wrote, opted in with "refresh": true, older than the staleness window
1308
+ (--stale-days overrides per-field staleDays and policy.defaultStaleDays) —
1309
+ re-fetches the source, and proposes updates only where the source value
1310
+ actually changed. Every operation carries beforeValue, so apply-time
1311
+ compare-and-set rejects writes over a CRM that moved underneath the plan.
1312
+
1313
+ Conflict policy (MVP): "never" — enrich only fills blank fields and only
1314
+ re-touches fields its own ledger proves it stamped. system-only and always
1315
+ are phase 2. Recurring execution is the scheduler's job; enrich has no cron.`);
1316
+ return;
1317
+ }
1318
+
1319
+ if (!["append", "refresh", "ingest", "status"].includes(subcommand)) {
1320
+ throw new Error(`Unknown enrich subcommand: ${subcommand} (try: append, refresh, ingest, status)`);
1321
+ }
1322
+
1323
+ const configPath = () => resolve(process.cwd(), option(rest, "--config") ?? ENRICH_CONFIG_FILE_NAME);
1324
+ const store = createFileEnrichRunStore();
1325
+
1326
+ if (subcommand === "status") {
1327
+ await enrichStatus(store, rest, configPath());
1328
+ return;
1329
+ }
1330
+
1331
+ const config = loadEnrichConfig(configPath());
1332
+
1333
+ if (subcommand === "ingest") {
1334
+ await enrichIngest(store, config, rest);
1335
+ return;
1336
+ }
1337
+
1338
+ const mode = subcommand as "append" | "refresh";
1339
+ const source = resolveEnrichSource(config, rest);
1340
+ const sourceConfig = config.sources[source];
1341
+ const save = rest.includes("--save");
1342
+ const today = new Date().toISOString().slice(0, 10);
1343
+
1344
+ // Refresh work set comes from the staleness ledger, before any fetch.
1345
+ const allRuns = await store.list();
1346
+ let workSet: ReturnType<typeof selectStaleWork> = [];
1347
+ if (mode === "refresh") {
1348
+ const staleDaysOverride = numericOption(rest, "--stale-days");
1349
+ workSet = selectStaleWork(config, allRuns, source, { staleDaysOverride });
1350
+ if (workSet.length === 0) {
1351
+ const stamped = latestStamps(allRuns, source).size;
1352
+ console.log(
1353
+ stamped === 0
1354
+ ? `Nothing to refresh: no ${source} enrichment stamps yet. Run \`enrich append --source ${source} --save\` first.`
1355
+ : `Nothing to refresh: all ${stamped} stamped field(s) from ${source} are within their staleness window.`,
1356
+ );
1357
+ return;
1358
+ }
1359
+ }
1360
+
1361
+ const snapshot = await readSnapshot(rest);
1362
+
1363
+ // Assemble source records: api pull (checkpointed when --save) or staged ingest data.
1364
+ let run: EnrichRun | null = null;
1365
+ let records: EnrichSourceRecord[];
1366
+ let missCount = 0;
1367
+ if (sourceConfig.kind === "api") {
1368
+ const objectTypes = parseEnrichObjects(rest, config, source);
1369
+ const fieldsFor = (objectType: EnrichObjectType) =>
1370
+ (config.fields[objectType] ?? []).filter((field) => field.from[source] !== undefined);
1371
+ const pullKeys: ApolloPullKey[] =
1372
+ mode === "append"
1373
+ ? apolloPullKeysForAppend(snapshot, objectTypes, (objectType, record) =>
1374
+ fieldsFor(objectType).some((field) => {
1375
+ const value = record[resolveCrmField(objectType, field.crm)];
1376
+ return value === undefined || value === null || String(value).trim() === "";
1377
+ }),
1378
+ )
1379
+ : apolloPullKeysForRefresh(snapshot, workSet);
1380
+ if (pullKeys.length === 0) {
1381
+ console.log(
1382
+ mode === "append"
1383
+ ? "Nothing to enrich: no records with a blank mapped field and a pull key (companies need a domain, contacts an email)."
1384
+ : "Nothing to refresh: no stale records carry a pull key (companies need a domain, contacts an email).",
1385
+ );
1386
+ return;
1387
+ }
1388
+ const client = createApolloClient({
1389
+ getApiKey: () => apolloApiKey(),
1390
+ apiBaseUrl: process.env.APOLLO_API_BASE_URL,
1391
+ });
1392
+ if (save) {
1393
+ run = await openEnrichRun(store, source, mode, option(rest, "--run-label"), today);
1394
+ if (run.cursor) {
1395
+ console.error(
1396
+ `Resuming interrupted run ${run.runLabel} from cursor ${run.cursor} (${run.pulled?.length ?? 0} record(s) already pulled).`,
1397
+ );
1398
+ }
1399
+ }
1400
+ const result = await pullApolloRecords(client, pullKeys, {
1401
+ resumeAfter: run?.cursor ?? null,
1402
+ onProgress: run
1403
+ ? async (progress) => {
1404
+ run!.cursor = progress.lastKeyValue;
1405
+ if (progress.record) run!.pulled = [...(run!.pulled ?? []), progress.record];
1406
+ if (progress.miss) run!.missedKeys = [...(run!.missedKeys ?? []), progress.miss.value];
1407
+ await store.update(run!);
1408
+ }
1409
+ : undefined,
1410
+ });
1411
+ records = run ? [...(run.pulled ?? [])] : result.records;
1412
+ missCount = run ? (run.missedKeys?.length ?? 0) : result.misses.length;
1413
+ } else {
1414
+ const stagedLabel = option(rest, "--staged-run");
1415
+ const stagedRun = stagedLabel
1416
+ ? await store.get(stagedLabel)
1417
+ : await store.latest({ source, mode: "ingest" });
1418
+ if (!stagedRun || stagedRun.mode !== "ingest") {
1419
+ throw new Error(
1420
+ `No staged data for source "${source}". Stage it first: fullstackgtm enrich ingest <file.csv|payload.json> --source ${source}`,
1421
+ );
1422
+ }
1423
+ records = stagedSourceRecords(config, source, stagedRun);
1424
+ if (save) run = await openEnrichRun(store, source, mode, option(rest, "--run-label"), today);
1425
+ }
1426
+
1427
+ const result = buildEnrichPlan({
1428
+ config,
1429
+ source,
1430
+ mode,
1431
+ snapshot,
1432
+ records,
1433
+ workSet: mode === "refresh" ? workSet : undefined,
1434
+ runLabel: run?.runLabel ?? `${mode}-${source}-${today}`,
1435
+ });
1436
+ // Pull keys the source had no data for count as fetched-but-unmatched.
1437
+ result.counts.fetched += missCount;
1438
+ result.counts.unmatched += missCount;
1439
+
1440
+ if (!save) {
1441
+ if (rest.includes("--json")) {
1442
+ console.log(JSON.stringify(result.plan, null, 2));
1443
+ } else {
1444
+ console.log(patchPlanToMarkdown(result.plan));
1445
+ console.log(formatEnrichCounts(result.counts, result.ambiguities.length));
1446
+ console.log("\nDry run — nothing written. Re-run with --save to persist the plan and the run record.");
1447
+ }
1448
+ return;
1449
+ }
1450
+
1451
+ // --save: persist the plan (when it proposes anything) and finalize the run.
1452
+ const planIds: string[] = [];
1453
+ if (result.plan.operations.length > 0) {
1454
+ await createFilePlanStore().save(result.plan);
1455
+ planIds.push(result.plan.id);
1456
+ }
1457
+ const finalized: EnrichRun = {
1458
+ ...(run as EnrichRun),
1459
+ completedAt: new Date().toISOString(),
1460
+ cursor: null,
1461
+ counts: result.counts,
1462
+ planIds: [...((run as EnrichRun).planIds ?? []), ...planIds],
1463
+ stamps: [...((run as EnrichRun).stamps ?? []), ...result.stamps],
1464
+ ambiguities: result.ambiguities,
1465
+ };
1466
+ await store.update(finalized);
1467
+ console.log(formatEnrichCounts(result.counts, result.ambiguities.length));
1468
+ if (planIds.length > 0) {
1469
+ console.log(
1470
+ `Saved plan ${result.plan.id} (run ${finalized.runLabel}). Review with \`fullstackgtm plans show ${result.plan.id}\`, ` +
1471
+ `approve with \`fullstackgtm plans approve ${result.plan.id} --operations <ids|all>\`, then ` +
1472
+ `\`fullstackgtm apply --plan-id ${result.plan.id} --provider <name>\`.`,
1473
+ );
1474
+ } else {
1475
+ console.log(`Run ${finalized.runLabel} recorded; no operations to propose.`);
1476
+ }
1477
+ }
1478
+
1479
+ function formatEnrichCounts(counts: EnrichCounts, ambiguities: number) {
1480
+ return (
1481
+ `Source records: ${counts.fetched} fetched · ${counts.matched} matched · ` +
1482
+ `${counts.unmatched} unmatched · ${counts.ambiguous} ambiguous (${ambiguities} collision(s) recorded) · ` +
1483
+ `${counts.opsEmitted} operation(s) proposed`
1484
+ );
1485
+ }
1486
+
1487
+ function resolveEnrichSource(config: EnrichConfig, rest: string[]): string {
1488
+ const requested = option(rest, "--source");
1489
+ const declared = Object.keys(config.sources);
1490
+ if (requested) {
1491
+ if (!config.sources[requested]) {
1492
+ throw new Error(`Unknown enrich source "${requested}" (declared: ${declared.join(", ")})`);
1493
+ }
1494
+ return requested;
1495
+ }
1496
+ if (declared.length === 1) return declared[0];
1497
+ if (config.sources.apollo) return "apollo";
1498
+ throw new Error(`Multiple sources declared (${declared.join(", ")}) — pass --source <id>`);
1499
+ }
1500
+
1501
+ function parseEnrichObjects(rest: string[], config: EnrichConfig, source: string): EnrichObjectType[] {
1502
+ const configured = (["company", "contact"] as EnrichObjectType[]).filter((objectType) =>
1503
+ (config.fields[objectType] ?? []).some((field) => field.from[source] !== undefined),
1504
+ );
1505
+ const flag = option(rest, "--objects");
1506
+ if (!flag) {
1507
+ if (configured.length === 0) {
1508
+ throw new Error(`No fields map from source "${source}" — add "from": { "${source}": ... } entries to the config.`);
1509
+ }
1510
+ return configured;
1511
+ }
1512
+ const requested = Array.from(new Set(flag.split(",").map((part) => parseSingleObjectType(part))));
1513
+ for (const objectType of requested) {
1514
+ if (!configured.includes(objectType)) {
1515
+ throw new Error(`--objects ${flag}: no ${objectType} fields map from source "${source}" in the config.`);
1516
+ }
1517
+ }
1518
+ return requested;
1519
+ }
1520
+
1521
+ function apolloApiKey(): string {
1522
+ if (process.env.APOLLO_API_KEY) return process.env.APOLLO_API_KEY;
1523
+ const stored = getCredential("apollo");
1524
+ if (stored) return stored.accessToken;
1525
+ throw new Error(
1526
+ 'No Apollo credentials. Run `echo "$APOLLO_API_KEY" | fullstackgtm login apollo` once, or set APOLLO_API_KEY.',
1527
+ );
1528
+ }
1529
+
1530
+ /**
1531
+ * Open (or resume) a saved run. An interrupted run — same label, same source
1532
+ * and mode, never completed — is resumed from its cursor; a completed run
1533
+ * with the default label gets a -2/-3 suffix (runs are append-only).
1534
+ */
1535
+ async function openEnrichRun(
1536
+ store: EnrichRunStore,
1537
+ source: string,
1538
+ mode: "append" | "refresh",
1539
+ requestedLabel: string | null,
1540
+ today: string,
1541
+ ): Promise<EnrichRun> {
1542
+ const baseLabel = requestedLabel ?? `${mode}-${source}-${today}`;
1543
+ let label = baseLabel;
1544
+ for (let suffix = 2; ; suffix += 1) {
1545
+ const existing = await store.get(label);
1546
+ if (!existing) break;
1547
+ if (existing.source === source && existing.mode === mode && existing.completedAt === null) {
1548
+ return existing; // resume the interrupted run
1549
+ }
1550
+ if (requestedLabel) {
1551
+ throw new Error(`Run "${requestedLabel}" already exists and is completed — enrich runs are append-only.`);
1552
+ }
1553
+ label = `${baseLabel}-${suffix}`;
1554
+ }
1555
+ return store.append({
1556
+ id: enrichRunId(source, label),
1557
+ runLabel: label,
1558
+ source,
1559
+ mode,
1560
+ startedAt: new Date().toISOString(),
1561
+ completedAt: null,
1562
+ cursor: null,
1563
+ counts: { fetched: 0, matched: 0, unmatched: 0, ambiguous: 0, opsEmitted: 0 },
1564
+ planIds: [],
1565
+ stamps: [],
1566
+ });
1567
+ }
1568
+
1569
+ async function enrichIngest(store: EnrichRunStore, config: EnrichConfig, rest: string[]) {
1570
+ const file = rest.find((arg) => !arg.startsWith("--") && !isOptionValue(rest, arg));
1571
+ if (!file) throw new Error("Usage: fullstackgtm enrich ingest <file.csv|payload.json> --source <id> [--run-label <label>]");
1572
+ const source = option(rest, "--source");
1573
+ if (!source) throw new Error("enrich ingest requires --source <id> (the ingest source the data belongs to)");
1574
+ const sourceConfig = config.sources[source];
1575
+ if (!sourceConfig) {
1576
+ throw new Error(`Unknown enrich source "${source}" (declared: ${Object.keys(config.sources).join(", ")})`);
1577
+ }
1578
+ if (sourceConfig.kind !== "ingest") {
1579
+ throw new Error(`Source "${source}" is kind "${sourceConfig.kind}" — only ingest sources accept staged data.`);
1580
+ }
1581
+
1582
+ const raw = readFileSync(resolve(process.cwd(), file), "utf8");
1583
+ let rows: Array<Record<string, unknown>>;
1584
+ const isCsv = file.toLowerCase().endsWith(".csv") || sourceConfig.format === "csv";
1585
+ if (isCsv && !file.toLowerCase().endsWith(".json")) {
1586
+ rows = parseCsv(raw);
1587
+ } else {
1588
+ const parsed = JSON.parse(raw) as unknown;
1589
+ if (Array.isArray(parsed)) rows = parsed as Array<Record<string, unknown>>;
1590
+ else if (parsed && typeof parsed === "object" && Array.isArray((parsed as { rows?: unknown }).rows)) {
1591
+ rows = (parsed as { rows: Array<Record<string, unknown>> }).rows;
1592
+ } else if (parsed && typeof parsed === "object") {
1593
+ rows = [parsed as Record<string, unknown>];
1594
+ } else {
1595
+ throw new Error(`${file}: expected a JSON array, an object, or { "rows": [...] }`);
1596
+ }
1597
+ }
1598
+ if (rows.length === 0) throw new Error(`${file}: no rows to stage`);
1599
+
1600
+ const objectsFlag = option(rest, "--objects");
1601
+ const objectType: EnrichObjectType = objectsFlag
1602
+ ? parseSingleObjectType(objectsFlag)
1603
+ : inferIngestObjectType(config, source, rows);
1604
+
1605
+ const today = new Date().toISOString().slice(0, 10);
1606
+ const baseLabel = option(rest, "--run-label") ?? `ingest-${source}-${today}`;
1607
+ let label = baseLabel;
1608
+ for (let suffix = 2; await store.get(label); suffix += 1) {
1609
+ if (option(rest, "--run-label")) {
1610
+ throw new Error(`Run "${baseLabel}" already exists — enrich runs are append-only; pick a new --run-label.`);
1611
+ }
1612
+ label = `${baseLabel}-${suffix}`;
1613
+ }
1614
+ const now = new Date().toISOString();
1615
+ await store.append({
1616
+ id: enrichRunId(source, label),
1617
+ runLabel: label,
1618
+ source,
1619
+ mode: "ingest",
1620
+ startedAt: now,
1621
+ completedAt: now,
1622
+ cursor: null,
1623
+ counts: { fetched: rows.length, matched: 0, unmatched: 0, ambiguous: 0, opsEmitted: 0 },
1624
+ planIds: [],
1625
+ stamps: [],
1626
+ staged: rows,
1627
+ stagedObjectType: objectType,
1628
+ });
1629
+ console.log(
1630
+ `Staged ${rows.length} ${objectType} row(s) from ${file} as run ${label}. ` +
1631
+ `Next: fullstackgtm enrich append --source ${source} [source options] [--save]`,
1632
+ );
1633
+ }
1634
+
1635
+ function parseSingleObjectType(value: string): EnrichObjectType {
1636
+ const normalized = value.trim().toLowerCase();
1637
+ if (normalized === "companies" || normalized === "company") return "company";
1638
+ if (normalized === "contacts" || normalized === "contact") return "contact";
1639
+ throw new Error(`--objects must be companies or contacts (got "${value}")`);
1640
+ }
1641
+
1642
+ async function enrichStatus(store: EnrichRunStore, rest: string[], configFile: string) {
1643
+ const sourceFilter = option(rest, "--source");
1644
+ const allRuns = (await store.list()).filter((run) => !sourceFilter || run.source === sourceFilter);
1645
+ if (allRuns.length === 0) {
1646
+ console.log(
1647
+ sourceFilter
1648
+ ? `No enrich runs for source "${sourceFilter}".`
1649
+ : "No enrich runs yet. Start with `fullstackgtm enrich append --save` or stage data with `enrich ingest`.",
1650
+ );
1651
+ return;
1652
+ }
1653
+
1654
+ // Staleness windows come from the config when one is readable; status must
1655
+ // not REQUIRE a config (the run store alone is enough to report on).
1656
+ let config: EnrichConfig | null = null;
1657
+ if (existsSync(configFile)) {
1658
+ try {
1659
+ config = loadEnrichConfig(configFile);
1660
+ } catch {
1661
+ config = null;
1662
+ }
1663
+ }
1664
+
1665
+ const now = Date.now();
1666
+ const sources = Array.from(new Set(allRuns.map((run) => run.source)));
1667
+ const report = sources.map((source) => {
1668
+ const runs = allRuns.filter((run) => run.source === source);
1669
+ const last = runs[runs.length - 1];
1670
+ const interrupted = runs.filter((run) => run.completedAt === null);
1671
+ const stamps = Array.from(latestStamps(runs, source).values());
1672
+ const ages = stamps.map((stamp) => (now - Date.parse(stamp.enrichedAt)) / 86_400_000);
1673
+ const staleness = stamps.map((stamp, index) => {
1674
+ const windowDays = config
1675
+ ? staleDaysFor(config, stamp.objectType, stamp.field)
1676
+ : DEFAULT_STALE_DAYS;
1677
+ return ages[index] > windowDays;
1678
+ });
1679
+ return {
1680
+ source,
1681
+ runs: runs.length,
1682
+ lastRun: {
1683
+ runLabel: last.runLabel,
1684
+ mode: last.mode,
1685
+ startedAt: last.startedAt,
1686
+ completedAt: last.completedAt,
1687
+ counts: last.counts,
1688
+ planIds: last.planIds,
1689
+ },
1690
+ interrupted: interrupted.map((run) => ({ runLabel: run.runLabel, cursor: run.cursor })),
1691
+ stamps: {
1692
+ total: stamps.length,
1693
+ stale: staleness.filter(Boolean).length,
1694
+ oldestDays: ages.length ? Math.round(Math.max(...ages)) : null,
1695
+ newestDays: ages.length ? Math.round(Math.min(...ages)) : null,
1696
+ windowSource: config ? "enrich.config.json" : `default ${DEFAULT_STALE_DAYS}d`,
1697
+ },
1698
+ };
1699
+ });
1700
+
1701
+ if (rest.includes("--json")) {
1702
+ console.log(JSON.stringify({ sources: report, runs: rest.includes("--runs") ? allRuns : undefined }, null, 2));
1703
+ return;
1704
+ }
1705
+
1706
+ for (const entry of report) {
1707
+ const last = entry.lastRun;
1708
+ console.log(`${entry.source} — ${entry.runs} run(s)`);
1709
+ console.log(
1710
+ ` last: ${last.runLabel} (${last.mode}) ${last.completedAt ? `completed ${last.completedAt}` : "INTERRUPTED"}` +
1711
+ ` · ${last.counts.fetched} fetched, ${last.counts.matched} matched, ${last.counts.unmatched} unmatched,` +
1712
+ ` ${last.counts.ambiguous} ambiguous, ${last.counts.opsEmitted} ops` +
1713
+ (last.planIds.length ? ` · plans: ${last.planIds.join(", ")}` : ""),
1714
+ );
1715
+ for (const run of entry.interrupted) {
1716
+ console.log(` interrupted: ${run.runLabel} at cursor ${run.cursor ?? "(start)"} — re-run with --save to resume`);
1717
+ }
1718
+ console.log(
1719
+ ` stamps: ${entry.stamps.total} field(s) enriched · ${entry.stamps.stale} stale (window: ${entry.stamps.windowSource})` +
1720
+ (entry.stamps.total ? ` · age ${entry.stamps.newestDays}–${entry.stamps.oldestDays}d` : ""),
1721
+ );
1722
+ }
1723
+ if (rest.includes("--runs")) {
1724
+ console.log("");
1725
+ for (const run of allRuns) {
1726
+ console.log(
1727
+ `${run.runLabel} ${run.source.padEnd(8)} ${run.mode.padEnd(8)} ${run.completedAt ? "done" : "interrupted"}` +
1728
+ ` ${run.counts.opsEmitted} ops ${run.stamps.length} stamps${run.staged ? ` ${run.staged.length} staged` : ""}`,
1729
+ );
1730
+ }
1731
+ }
1732
+ }
1733
+
1204
1734
  /**
1205
1735
  * The resolve gate: exit 0 = safe to create, exit 2 = match found (exists or
1206
1736
  * ambiguous — do NOT blind-create), exit 1 = error. Built for sync jobs and
@@ -1257,29 +1787,213 @@ async function bulkUpdateCommand(args: string[]) {
1257
1787
  where,
1258
1788
  set: Object.keys(set).length > 0 ? set : undefined,
1259
1789
  archive: rest.includes("--archive"),
1790
+ forceArchiveDuplicates: rest.includes("--force-archive-duplicates"),
1260
1791
  createTask: option(rest, "--create-task") ?? undefined,
1261
1792
  require: repeatedOption(rest, "--require"),
1262
1793
  guard: repeatedOption(rest, "--guard"),
1263
1794
  reason: option(rest, "--reason") ?? undefined,
1264
1795
  maxOperations: numericOption(rest, "--max-operations"),
1265
1796
  });
1266
- const out = option(rest, "--out");
1797
+ await emitPlan(plan, rest);
1798
+ }
1799
+
1800
+ /** Shared plan output plumbing: --out, --save (with the approve/apply hint), --json or markdown. */
1801
+ async function emitPlan(plan: PatchPlan, args: string[]) {
1802
+ const out = option(args, "--out");
1267
1803
  if (out) {
1268
1804
  writeFileSync(resolve(process.cwd(), out), `${JSON.stringify(plan, null, 2)}\n`);
1269
1805
  }
1270
- if (rest.includes("--save")) {
1806
+ if (args.includes("--save")) {
1271
1807
  await createFilePlanStore().save(plan);
1272
1808
  console.error(
1273
1809
  `Saved plan ${plan.id} (${plan.operations.length} operations). Review with \`fullstackgtm plans show ${plan.id}\`, approve with \`fullstackgtm plans approve ${plan.id} --operations <ids|all>\`, then \`fullstackgtm apply --plan-id ${plan.id} --provider <name>\`.`,
1274
1810
  );
1275
1811
  }
1276
- if (rest.includes("--json")) {
1812
+ if (args.includes("--json")) {
1277
1813
  console.log(JSON.stringify(plan, null, 2));
1278
1814
  } else {
1279
1815
  console.log(patchPlanToMarkdown(plan));
1280
1816
  }
1281
1817
  }
1282
1818
 
1819
+ /**
1820
+ * Governed duplicate cleanup: group by a normalized identity key, propose one
1821
+ * merge_records per duplicate group with a deterministic survivor. Never
1822
+ * writes — approve and apply the plan like any audit plan.
1823
+ */
1824
+ async function dedupeCommand(args: string[]) {
1825
+ const [objectType, ...rest] = args;
1826
+ if (!objectType || !["account", "contact", "deal"].includes(objectType)) {
1827
+ throw new Error(
1828
+ "Usage: fullstackgtm dedupe <account|contact|deal> --key <domain|email|name> [--keep richest|oldest] [source options] [--reason <text>] [--max-operations <n>] [--save] [--out <path>] [--json]",
1829
+ );
1830
+ }
1831
+ const key = option(rest, "--key");
1832
+ if (!key || !["domain", "email", "name"].includes(key)) {
1833
+ throw new Error("dedupe requires --key <domain|email|name> (the identity field duplicates share).");
1834
+ }
1835
+ const keep = option(rest, "--keep") ?? undefined;
1836
+ const snapshot = await readSnapshot(rest);
1837
+ const plan = buildDedupePlan(snapshot, {
1838
+ objectType: objectType as DedupeOptions["objectType"],
1839
+ key: key as DedupeOptions["key"],
1840
+ keep: (keep ?? undefined) as DedupeOptions["keep"],
1841
+ reason: option(rest, "--reason") ?? undefined,
1842
+ maxOperations: numericOption(rest, "--max-operations"),
1843
+ });
1844
+ await emitPlan(plan, rest);
1845
+ }
1846
+
1847
+ /**
1848
+ * Ownership handoff playbook: compile one bulk-update-style plan per object
1849
+ * type. Each plan carries its full filter, so eligibility (including the
1850
+ * --except-deal-stage exclusion) is re-verified per record at apply time.
1851
+ */
1852
+ async function reassignCommand(args: string[]) {
1853
+ const from = option(args, "--from");
1854
+ const to = option(args, "--to");
1855
+ if (!from || !to) {
1856
+ throw new Error(
1857
+ "Usage: fullstackgtm reassign --from <ownerId> --to <ownerId> [--objects account,contact,deal] [--where <expr> …] [--except-deal-stage <stage>] [--include-closed-deals] [source options] [--reason <text>] [--max-operations <n>] [--save] [--out <path>] [--json]",
1858
+ );
1859
+ }
1860
+ const objects = option(args, "--objects")
1861
+ ?.split(",")
1862
+ .map((value) => value.trim())
1863
+ .filter(Boolean) as ReassignObjectType[] | undefined;
1864
+ const snapshot = await readSnapshot(args);
1865
+ const plans = buildReassignPlans(snapshot, {
1866
+ fromOwnerId: from,
1867
+ toOwnerId: to,
1868
+ objects,
1869
+ where: repeatedOption(args, "--where"),
1870
+ exceptDealStage: option(args, "--except-deal-stage") ?? undefined,
1871
+ includeClosedDeals: args.includes("--include-closed-deals"),
1872
+ reason: option(args, "--reason") ?? undefined,
1873
+ maxOperations: numericOption(args, "--max-operations"),
1874
+ });
1875
+ const out = option(args, "--out");
1876
+ if (out) {
1877
+ writeFileSync(resolve(process.cwd(), out), `${JSON.stringify(plans, null, 2)}\n`);
1878
+ }
1879
+ if (args.includes("--json")) {
1880
+ console.log(JSON.stringify(plans, null, 2));
1881
+ return;
1882
+ }
1883
+ const store = args.includes("--save") ? createFilePlanStore() : null;
1884
+ for (const plan of plans) {
1885
+ if (store) await store.save(plan);
1886
+ console.log(`${plan.id} ${String(plan.operations.length).padStart(3)} operation(s) ${plan.title}`);
1887
+ console.log(` ${plan.summary}`);
1888
+ }
1889
+ if (store) {
1890
+ console.log(
1891
+ `\nSaved ${plans.length} plan(s). For each: \`fullstackgtm plans show <id>\`, \`fullstackgtm plans approve <id> --operations <ids|all>\`, then \`fullstackgtm apply --plan-id <id> --provider <name>\`.`,
1892
+ );
1893
+ } else {
1894
+ console.log("\nDry run only — re-run with --save to store the plans for approval.");
1895
+ }
1896
+ }
1897
+
1898
+ /**
1899
+ * One-shot composite for a single audit rule: audit → save → suggest →
1900
+ * approve only suggestion-backed operations meeting the confidence bar (plus
1901
+ * operations that carry concrete values and need no human input) → apply
1902
+ * (only with --yes). Every stage goes through the same gates as the manual
1903
+ * chain; placeholder values below the bar stay unapproved.
1904
+ */
1905
+ async function fixCommand(args: string[]) {
1906
+ const ruleId = option(args, "--rule");
1907
+ const provider = option(args, "--provider");
1908
+ if (!ruleId || !provider) {
1909
+ throw new Error(
1910
+ "Usage: fullstackgtm fix --rule <ruleId> --provider <name> [--min-confidence high|low] [--include-creates] [--today <iso>] [--yes]",
1911
+ );
1912
+ }
1913
+ const minConfidence = option(args, "--min-confidence") ?? "high";
1914
+ if (!["high", "low"].includes(minConfidence)) {
1915
+ throw new Error("--min-confidence must be high or low");
1916
+ }
1917
+ const includeCreates = args.includes("--include-creates");
1918
+
1919
+ const loaded = loadConfig(option(args, "--config") ?? undefined);
1920
+ const configured = await resolveConfiguredRules(loaded);
1921
+ const rule = configured.find((candidate) => candidate.id === ruleId);
1922
+ if (!rule) {
1923
+ throw new Error(`Unknown rule: ${ruleId}. Available rules: ${configured.map((r) => r.id).join(", ")}`);
1924
+ }
1925
+ const policy = mergePolicy(defaultPolicy(), loaded?.config);
1926
+ const today = option(args, "--today");
1927
+ if (today) policy.today = today;
1928
+
1929
+ const snapshot = await readSnapshot(args);
1930
+ const plan = auditSnapshot(snapshot, policy, [rule]);
1931
+ if (plan.operations.length === 0) {
1932
+ console.log(`fix ${ruleId}: audit proposed 0 operations — nothing to fix.`);
1933
+ return;
1934
+ }
1935
+ const store = createFilePlanStore();
1936
+ await store.save(plan);
1937
+
1938
+ const suggestions = suggestValues(plan, snapshot);
1939
+ const accepted = new Set(minConfidence === "low" ? ["high", "low"] : ["high"]);
1940
+ const overrides: Record<string, string> = {};
1941
+ let belowBar = 0;
1942
+ for (const suggestion of suggestions) {
1943
+ if (
1944
+ suggestion.suggestedValue &&
1945
+ (accepted.has(suggestion.confidence) || (includeCreates && suggestion.confidence === "create"))
1946
+ ) {
1947
+ overrides[suggestion.operationId] = suggestion.suggestedValue;
1948
+ } else {
1949
+ belowBar += 1;
1950
+ }
1951
+ }
1952
+ // Approve operations whose placeholder got a qualifying suggested value,
1953
+ // plus operations that already carry a concrete value (no human input
1954
+ // needed — nothing to guess). Everything else stays unapproved.
1955
+ const placeholderIds = new Set(suggestions.map((suggestion) => suggestion.operationId));
1956
+ const approvedIds = plan.operations
1957
+ .map((operation) => operation.id)
1958
+ .filter((id) => overrides[id] !== undefined || !placeholderIds.has(id));
1959
+
1960
+ const lines = [
1961
+ `fix ${ruleId} via ${provider}:`,
1962
+ ` proposed: ${plan.operations.length} operation(s) — plan ${plan.id} (saved)`,
1963
+ ` suggested: ${Object.keys(overrides).length} value(s) at ${minConfidence}+ confidence${includeCreates ? " (creates included)" : ""}${belowBar > 0 ? `; ${belowBar} below the bar (left unapproved)` : ""}`,
1964
+ ` approved: ${approvedIds.length} of ${plan.operations.length}`,
1965
+ ];
1966
+ if (approvedIds.length === 0) {
1967
+ lines.push(" applied: 0 — no operation met the confidence bar");
1968
+ console.log(lines.join("\n"));
1969
+ console.log(
1970
+ `\nWiden with --min-confidence low / --include-creates, or approve manually: \`fullstackgtm plans approve ${plan.id} --operations <ids> --value <opId>=<value>\`.`,
1971
+ );
1972
+ return;
1973
+ }
1974
+ await store.approveOperations(plan.id, approvedIds, overrides);
1975
+
1976
+ if (!args.includes("--yes")) {
1977
+ lines.push(" applied: 0 (stopped before apply — pass --yes to write)");
1978
+ console.log(lines.join("\n"));
1979
+ console.log(`\nApply with:\n fullstackgtm apply --plan-id ${plan.id} --provider ${provider}`);
1980
+ return;
1981
+ }
1982
+ const connector = await connectorFor(provider, args);
1983
+ const run = await applyPatchPlan(connector, plan, {
1984
+ approvedOperationIds: approvedIds,
1985
+ valueOverrides: overrides,
1986
+ });
1987
+ await store.recordRun(plan.id, run);
1988
+ const counts: Record<string, number> = { applied: 0, conflict: 0, skipped: 0, failed: 0 };
1989
+ for (const result of run.results) counts[result.status] = (counts[result.status] ?? 0) + 1;
1990
+ lines.push(
1991
+ ` applied: ${counts.applied} · conflicts: ${counts.conflict} · skipped: ${counts.skipped} · failed: ${counts.failed}`,
1992
+ );
1993
+ console.log(lines.join("\n"));
1994
+ if (run.status === "failed") process.exitCode = 1;
1995
+ }
1996
+
1283
1997
  async function suggest(args: string[]) {
1284
1998
  const planId = option(args, "--plan-id");
1285
1999
  const planPath = option(args, "--plan");
@@ -1845,9 +2559,27 @@ async function login(args: string[]) {
1845
2559
  console.log(`Stored ${provider} API key in ${credentialsPath()}. \`fullstackgtm call parse\` and \`call score\` use it automatically.`);
1846
2560
  return;
1847
2561
  }
2562
+ if (provider === "apollo") {
2563
+ rejectArgvSecret(args, "--token", "--key", "--api-key");
2564
+ const key = await readSecret("Apollo API key");
2565
+ if (!key) throw new Error("No Apollo key provided.");
2566
+ if (!args.includes("--no-validate")) {
2567
+ const response = await fetch("https://api.apollo.io/api/v1/auth/health", {
2568
+ headers: { "X-Api-Key": key, Accept: "application/json" },
2569
+ });
2570
+ if (!response.ok) {
2571
+ throw new Error(`Apollo rejected the key: ${safeStatus(response)}`);
2572
+ }
2573
+ console.log("Key accepted by the Apollo API.");
2574
+ }
2575
+ const stamp = new Date().toISOString();
2576
+ storeCredential("apollo", { kind: "api_key", accessToken: key, createdAt: stamp, updatedAt: stamp });
2577
+ console.log(`Stored Apollo API key in ${credentialsPath()}. \`fullstackgtm enrich append|refresh\` use it automatically.`);
2578
+ return;
2579
+ }
1848
2580
  if (provider !== "hubspot") {
1849
2581
  throw new Error(
1850
- "login supports: hubspot, salesforce, stripe, anthropic, openai, or --via <hosted url>. Usage: fullstackgtm login <provider> | fullstackgtm login --via https://gtm.example.com",
2582
+ "login supports: hubspot, salesforce, stripe, anthropic, openai, apollo, or --via <hosted url>. Usage: fullstackgtm login <provider> | fullstackgtm login --via https://gtm.example.com",
1851
2583
  );
1852
2584
  }
1853
2585
  const now = new Date().toISOString();
@@ -2074,8 +2806,8 @@ export async function runCli(argv: string[]) {
2074
2806
  }
2075
2807
  // Commands without bespoke help fall back to the top-level usage on --help
2076
2808
  // instead of executing (audit used to silently run the sample audit).
2077
- // call/market/bulk-update print their own richer help.
2078
- if (!["call", "market", "bulk-update"].includes(command) && (args.includes("--help") || args.includes("-h"))) {
2809
+ // call/market/enrich/bulk-update print their own richer help.
2810
+ if (!["call", "market", "enrich", "bulk-update"].includes(command) && (args.includes("--help") || args.includes("-h"))) {
2079
2811
  console.log(usage());
2080
2812
  return;
2081
2813
  }
@@ -2124,10 +2856,26 @@ export async function runCli(argv: string[]) {
2124
2856
  await bulkUpdateCommand(args);
2125
2857
  return;
2126
2858
  }
2859
+ if (command === "dedupe") {
2860
+ await dedupeCommand(args);
2861
+ return;
2862
+ }
2863
+ if (command === "reassign") {
2864
+ await reassignCommand(args);
2865
+ return;
2866
+ }
2867
+ if (command === "fix") {
2868
+ await fixCommand(args);
2869
+ return;
2870
+ }
2127
2871
  if (command === "market") {
2128
2872
  await marketCommand(args);
2129
2873
  return;
2130
2874
  }
2875
+ if (command === "enrich") {
2876
+ await enrichCommand(args);
2877
+ return;
2878
+ }
2131
2879
  if (command === "profiles") {
2132
2880
  profilesCommand(args);
2133
2881
  return;