fullstackgtm 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/enrich.js ADDED
@@ -0,0 +1,724 @@
1
+ import { mkdirSync, readFileSync, readdirSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.js";
4
+ import { HUBSPOT_DEFAULT_FIELD_MAPPINGS } from "./mappings.js";
5
+ export const ENRICH_CONFIG_FILE_NAME = "enrich.config.json";
6
+ export const DEFAULT_STALE_DAYS = 90;
7
+ const OBJECT_TYPES = ["company", "contact"];
8
+ /** Match keys the matcher knows how to read off canonical snapshot records. */
9
+ const MATCH_KEYS = {
10
+ company: ["domain", "name"],
11
+ contact: ["email", "name"],
12
+ };
13
+ /** API source ids the MVP can pull from. */
14
+ export const SUPPORTED_API_SOURCES = ["apollo"];
15
+ /**
16
+ * Canonical fields enrich may target, plus the HubSpot property spellings the
17
+ * config may use for them (so `"crm": "numberofemployees"` and
18
+ * `"crm": "employeeCount"` both resolve). Reading the current value for the
19
+ * fill-blanks check happens against the canonical snapshot, so only fields
20
+ * with a canonical home are accepted — strict, with the accepted names in the
21
+ * error.
22
+ */
23
+ const CANONICAL_FIELDS = {
24
+ company: ["name", "domain", "industry", "employeeCount", "annualRevenue"],
25
+ contact: ["firstName", "lastName", "email", "phone", "title"],
26
+ };
27
+ const PROVIDER_FIELD_ALIASES = {
28
+ company: invertMapping(HUBSPOT_DEFAULT_FIELD_MAPPINGS.accounts),
29
+ contact: invertMapping(HUBSPOT_DEFAULT_FIELD_MAPPINGS.contacts),
30
+ };
31
+ function invertMapping(mapping) {
32
+ const inverted = {};
33
+ for (const [canonical, provider] of Object.entries(mapping))
34
+ inverted[provider] = canonical;
35
+ return inverted;
36
+ }
37
+ /** Resolve a config `crm` field name to the canonical snapshot field. */
38
+ export function resolveCrmField(objectType, name) {
39
+ if (CANONICAL_FIELDS[objectType].includes(name))
40
+ return name;
41
+ const canonical = PROVIDER_FIELD_ALIASES[objectType][name];
42
+ if (canonical && CANONICAL_FIELDS[objectType].includes(canonical))
43
+ return canonical;
44
+ throw new Error(`enrich config: unknown ${objectType} field "${name}". Accepted canonical fields: ` +
45
+ `${CANONICAL_FIELDS[objectType].join(", ")} (HubSpot property spellings like ` +
46
+ `${Object.keys(PROVIDER_FIELD_ALIASES[objectType]).join(", ")} also resolve).`);
47
+ }
48
+ function fail(message) {
49
+ throw new Error(`enrich config: ${message}`);
50
+ }
51
+ /**
52
+ * Strict, up-front validation (the 0.18 lesson: a config crash mid-run is
53
+ * worse than a refused config). Every problem names the offending entry and
54
+ * the accepted values.
55
+ */
56
+ export function parseEnrichConfig(raw) {
57
+ let parsed;
58
+ try {
59
+ parsed = JSON.parse(raw);
60
+ }
61
+ catch (error) {
62
+ fail(`not valid JSON (${error instanceof Error ? error.message : String(error)})`);
63
+ }
64
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
65
+ fail("expected a JSON object with sources, match, fields, and policy");
66
+ }
67
+ const config = parsed;
68
+ // sources
69
+ if (!config.sources || typeof config.sources !== "object" || Array.isArray(config.sources)) {
70
+ fail('missing "sources" — declare at least one, e.g. { "apollo": { "kind": "api" } }');
71
+ }
72
+ const sourceIds = Object.keys(config.sources);
73
+ if (sourceIds.length === 0)
74
+ fail('"sources" is empty — declare at least one source');
75
+ for (const [id, source] of Object.entries(config.sources)) {
76
+ if (!source || typeof source !== "object")
77
+ fail(`source "${id}" must be an object`);
78
+ if (source.kind !== "api" && source.kind !== "ingest") {
79
+ fail(`source "${id}": kind must be "api" or "ingest" (got ${JSON.stringify(source.kind)})`);
80
+ }
81
+ if (source.kind === "api" && !SUPPORTED_API_SOURCES.includes(id)) {
82
+ fail(`api source "${id}" is not supported yet — MVP pulls from: ${SUPPORTED_API_SOURCES.join(", ")}. ` +
83
+ 'Push-style sources stage data via `enrich ingest` with kind "ingest".');
84
+ }
85
+ if (source.format !== undefined && source.format !== "csv" && source.format !== "json") {
86
+ fail(`source "${id}": format must be "csv" or "json" (got ${JSON.stringify(source.format)})`);
87
+ }
88
+ }
89
+ // policy
90
+ if (!config.policy || typeof config.policy !== "object") {
91
+ fail('missing "policy" — e.g. { "overwrite": "never", "defaultStaleDays": 90 }');
92
+ }
93
+ const overwrite = config.policy.overwrite;
94
+ if (overwrite === "system-only" || overwrite === "always") {
95
+ fail(`policy.overwrite "${overwrite}" is not yet implemented (phase 2 of the conflict ladder — ` +
96
+ 'it needs per-field property history). MVP supports only "never" (fill blanks).');
97
+ }
98
+ if (overwrite !== "never") {
99
+ fail(`policy.overwrite must be "never" (got ${JSON.stringify(overwrite)})`);
100
+ }
101
+ const defaultStaleDays = config.policy.defaultStaleDays;
102
+ if (defaultStaleDays !== undefined && (!Number.isFinite(defaultStaleDays) || defaultStaleDays <= 0)) {
103
+ fail(`policy.defaultStaleDays must be a positive number (got ${JSON.stringify(defaultStaleDays)})`);
104
+ }
105
+ // match
106
+ if (!config.match || typeof config.match !== "object" || Array.isArray(config.match)) {
107
+ fail('missing "match" — e.g. { "company": { "keys": ["domain", "name"] } }');
108
+ }
109
+ for (const [objectType, match] of Object.entries(config.match)) {
110
+ if (!OBJECT_TYPES.includes(objectType)) {
111
+ fail(`match has unknown object type "${objectType}" (use: ${OBJECT_TYPES.join(", ")})`);
112
+ }
113
+ if (!match || !Array.isArray(match.keys) || match.keys.length === 0) {
114
+ fail(`match.${objectType}: "keys" must be a non-empty ordered array`);
115
+ }
116
+ for (const key of match.keys) {
117
+ const known = MATCH_KEYS[objectType];
118
+ if (!known.includes(key)) {
119
+ fail(`match.${objectType}: unknown key "${key}" (supported: ${known.join(", ")})`);
120
+ }
121
+ }
122
+ if (match.onAmbiguous !== undefined && match.onAmbiguous !== "skip" && match.onAmbiguous !== "suggest") {
123
+ fail(`match.${objectType}: onAmbiguous must be "skip" or "suggest" (got ${JSON.stringify(match.onAmbiguous)})`);
124
+ }
125
+ }
126
+ // fields
127
+ if (!config.fields || typeof config.fields !== "object" || Array.isArray(config.fields)) {
128
+ fail('missing "fields" — map CRM properties to source paths per object type');
129
+ }
130
+ let anyField = false;
131
+ for (const [objectType, fields] of Object.entries(config.fields)) {
132
+ if (!OBJECT_TYPES.includes(objectType)) {
133
+ fail(`fields has unknown object type "${objectType}" (use: ${OBJECT_TYPES.join(", ")})`);
134
+ }
135
+ if (!Array.isArray(fields))
136
+ fail(`fields.${objectType} must be an array`);
137
+ if (!config.match[objectType]) {
138
+ fail(`fields.${objectType} is configured but match.${objectType} is missing — the matcher needs ordered keys`);
139
+ }
140
+ const seen = new Set();
141
+ for (const field of fields) {
142
+ anyField = true;
143
+ if (!field || typeof field.crm !== "string" || field.crm.length === 0) {
144
+ fail(`fields.${objectType}: every entry needs a "crm" property name`);
145
+ }
146
+ const canonical = resolveCrmField(objectType, field.crm);
147
+ if (seen.has(canonical))
148
+ fail(`fields.${objectType}: duplicate mapping for "${field.crm}"`);
149
+ seen.add(canonical);
150
+ if (!field.from || typeof field.from !== "object" || Object.keys(field.from).length === 0) {
151
+ fail(`fields.${objectType}.${field.crm}: "from" must map at least one source to a path`);
152
+ }
153
+ for (const [sourceId, path] of Object.entries(field.from)) {
154
+ if (!config.sources[sourceId]) {
155
+ fail(`fields.${objectType}.${field.crm}: "from" references undeclared source "${sourceId}" ` +
156
+ `(declared: ${sourceIds.join(", ")})`);
157
+ }
158
+ if (typeof path !== "string" || path.length === 0) {
159
+ fail(`fields.${objectType}.${field.crm}: path for source "${sourceId}" must be a non-empty string`);
160
+ }
161
+ }
162
+ if (field.staleDays !== undefined && (!Number.isFinite(field.staleDays) || field.staleDays <= 0)) {
163
+ fail(`fields.${objectType}.${field.crm}: staleDays must be a positive number`);
164
+ }
165
+ if (field.refresh !== undefined && typeof field.refresh !== "boolean") {
166
+ fail(`fields.${objectType}.${field.crm}: refresh must be true or false`);
167
+ }
168
+ if (field.policy !== undefined && field.policy !== "never") {
169
+ fail(`fields.${objectType}.${field.crm}: per-field policy "${String(field.policy)}" is not yet ` +
170
+ 'implemented (phase 2 of the conflict ladder). MVP supports only "never".');
171
+ }
172
+ }
173
+ }
174
+ if (!anyField)
175
+ fail('"fields" maps nothing — add at least one field entry');
176
+ return config;
177
+ }
178
+ export function loadEnrichConfig(path) {
179
+ let raw;
180
+ try {
181
+ raw = readFileSync(path, "utf8");
182
+ }
183
+ catch {
184
+ throw new Error(`No enrich config at ${path}. Create ${ENRICH_CONFIG_FILE_NAME} (sources/match/fields/policy — ` +
185
+ "see docs/enrich.md) or pass --config <path>.");
186
+ }
187
+ return parseEnrichConfig(raw);
188
+ }
189
+ // ---------------------------------------------------------------------------
190
+ // CSV: minimal dependency-free RFC-4180-ish parser (quoted fields, embedded
191
+ // commas/newlines, "" escapes, CRLF). Header row maps columns to names.
192
+ export function parseCsv(text) {
193
+ const rows = [];
194
+ let row = [];
195
+ let field = "";
196
+ let inQuotes = false;
197
+ let sawAny = false;
198
+ const pushField = () => {
199
+ row.push(field);
200
+ field = "";
201
+ };
202
+ const pushRow = () => {
203
+ pushField();
204
+ rows.push(row);
205
+ row = [];
206
+ };
207
+ for (let i = 0; i < text.length; i += 1) {
208
+ const char = text[i];
209
+ if (inQuotes) {
210
+ if (char === '"') {
211
+ if (text[i + 1] === '"') {
212
+ field += '"';
213
+ i += 1;
214
+ }
215
+ else {
216
+ inQuotes = false;
217
+ }
218
+ }
219
+ else {
220
+ field += char;
221
+ }
222
+ continue;
223
+ }
224
+ if (char === '"') {
225
+ inQuotes = true;
226
+ sawAny = true;
227
+ continue;
228
+ }
229
+ if (char === ",") {
230
+ pushField();
231
+ sawAny = true;
232
+ continue;
233
+ }
234
+ if (char === "\n" || char === "\r") {
235
+ if (char === "\r" && text[i + 1] === "\n")
236
+ i += 1;
237
+ // Skip empty lines (including the trailing newline).
238
+ if (field.length > 0 || row.length > 0)
239
+ pushRow();
240
+ continue;
241
+ }
242
+ field += char;
243
+ sawAny = true;
244
+ }
245
+ if (inQuotes)
246
+ throw new Error("CSV parse error: unterminated quoted field");
247
+ if (field.length > 0 || row.length > 0)
248
+ pushRow();
249
+ if (!sawAny || rows.length === 0)
250
+ return [];
251
+ const headers = rows[0].map((header) => header.trim());
252
+ return rows.slice(1).map((cells) => {
253
+ const record = {};
254
+ headers.forEach((header, index) => {
255
+ if (header)
256
+ record[header] = cells[index] ?? "";
257
+ });
258
+ return record;
259
+ });
260
+ }
261
+ /** Read a value from a payload: exact key first (CSV headers), then dotted path. */
262
+ export function sourceValueAt(payload, path) {
263
+ if (path in payload)
264
+ return payload[path];
265
+ let current = payload;
266
+ for (const segment of path.split(".")) {
267
+ if (!current || typeof current !== "object" || Array.isArray(current))
268
+ return undefined;
269
+ current = current[segment];
270
+ }
271
+ return current;
272
+ }
273
+ /** Case-insensitive header lookup for ingest rows ("Email" matches key "email"). */
274
+ export function ingestKeyValue(row, key) {
275
+ for (const [header, value] of Object.entries(row)) {
276
+ if (header.trim().toLowerCase() === key.toLowerCase()) {
277
+ const text = valueToString(value);
278
+ return text || undefined;
279
+ }
280
+ }
281
+ const dotted = sourceValueAt(row, key);
282
+ const text = valueToString(dotted);
283
+ return text || undefined;
284
+ }
285
+ function valueToString(value) {
286
+ if (value === null || value === undefined)
287
+ return "";
288
+ if (typeof value === "string")
289
+ return value.trim();
290
+ if (typeof value === "number" || typeof value === "boolean")
291
+ return String(value);
292
+ return "";
293
+ }
294
+ function normalizeKeyValue(key, value) {
295
+ const text = valueToString(value).toLowerCase();
296
+ if (!text)
297
+ return "";
298
+ if (key === "domain") {
299
+ return text
300
+ .replace(/^https?:\/\//, "")
301
+ .replace(/^www\./, "")
302
+ .replace(/\/.*$/, "");
303
+ }
304
+ return text.replace(/\s+/g, " ");
305
+ }
306
+ function crmKeyValue(objectType, record, key) {
307
+ if (objectType === "company") {
308
+ if (key === "domain")
309
+ return normalizeKeyValue("domain", record.domain);
310
+ if (key === "name")
311
+ return normalizeKeyValue("name", record.name);
312
+ return "";
313
+ }
314
+ if (key === "email")
315
+ return normalizeKeyValue("email", record.email);
316
+ if (key === "name") {
317
+ return normalizeKeyValue("name", `${record.firstName ?? ""} ${record.lastName ?? ""}`.trim());
318
+ }
319
+ return "";
320
+ }
321
+ export function matchSourceRecord(snapshot, objectType, keys, sourceKeys) {
322
+ const records = objectType === "company" ? snapshot.accounts : snapshot.contacts;
323
+ for (const key of keys) {
324
+ const wanted = normalizeKeyValue(key, sourceKeys[key]);
325
+ if (!wanted)
326
+ continue;
327
+ const hits = records.filter((record) => crmKeyValue(objectType, record, key) === wanted);
328
+ if (hits.length === 1)
329
+ return { status: "matched", recordId: hits[0].id, matchedKey: key };
330
+ if (hits.length > 1) {
331
+ return { status: "ambiguous", key, candidateIds: hits.map((hit) => hit.id) };
332
+ }
333
+ // Zero hits: fall through to the next key.
334
+ }
335
+ return { status: "unmatched" };
336
+ }
337
+ // ---------------------------------------------------------------------------
338
+ // Plan building
339
+ // Mirrors stableHash in rules.ts (FNV-1a); duplicated to keep enrich.ts
340
+ // importable without pulling the audit engine (the market.ts precedent).
341
+ function fnv1a(value) {
342
+ let hash = 0x811c9dc5;
343
+ for (let i = 0; i < value.length; i += 1) {
344
+ hash ^= value.charCodeAt(i);
345
+ hash = Math.imul(hash, 0x01000193);
346
+ }
347
+ return (hash >>> 0).toString(16).padStart(8, "0");
348
+ }
349
+ const PLACEHOLDER_RECORD_SELECTION = "requires_human_record_selection";
350
+ function canonicalObjectType(objectType) {
351
+ return objectType === "company" ? "account" : "contact";
352
+ }
353
+ function crmFieldValue(snapshot, objectType, objectId, field) {
354
+ const records = objectType === "company" ? snapshot.accounts : snapshot.contacts;
355
+ const record = records.find((entry) => entry.id === objectId);
356
+ return record ? record[field] : undefined;
357
+ }
358
+ function isEmptyValue(value) {
359
+ return value === undefined || value === null || (typeof value === "string" && value.trim() === "");
360
+ }
361
+ /** Values compare as trimmed strings; numbers compare numerically. */
362
+ function sameValue(a, b) {
363
+ if (isEmptyValue(a) && isEmptyValue(b))
364
+ return true;
365
+ if (typeof a === "number" || typeof b === "number") {
366
+ return Number(a) === Number(b);
367
+ }
368
+ return valueToString(a) === valueToString(b);
369
+ }
370
+ function describeSourceRecord(record) {
371
+ const name = record.keys.name ?? record.keys.domain ?? record.keys.email ?? record.id;
372
+ return String(name);
373
+ }
374
+ function evidenceFor(source, sourceKind, format, record, matchedKey, capturedAt) {
375
+ const excerpt = JSON.stringify(record.payload);
376
+ return {
377
+ id: `ev_enr_${fnv1a(`${source}:${record.id}`)}`,
378
+ sourceSystem: sourceKind === "api" ? "web" : format === "csv" ? "csv" : "manual",
379
+ sourceObjectType: record.objectType,
380
+ sourceObjectId: record.id,
381
+ title: `${source} payload for ${describeSourceRecord(record)}`,
382
+ text: excerpt.length > 1200 ? `${excerpt.slice(0, 1200)}…` : excerpt,
383
+ capturedAt,
384
+ metadata: { source, sourceRecordId: record.id, matchedKey: matchedKey ?? null },
385
+ };
386
+ }
387
+ /**
388
+ * Match source records against the snapshot and emit a patch plan under the
389
+ * conflict policy. Append fills blanks only; refresh proposes updates for
390
+ * stale stamped fields whose source value actually changed (beforeValue =
391
+ * current CRM value → apply-time compare-and-set rejects drifted records).
392
+ */
393
+ export function buildEnrichPlan(options) {
394
+ const { config, source, mode, snapshot, records, runLabel } = options;
395
+ const nowIso = (options.now ?? (() => new Date()))().toISOString();
396
+ const sourceConfig = config.sources[source];
397
+ if (!sourceConfig)
398
+ throw new Error(`enrich: source "${source}" is not declared in the config`);
399
+ const workSet = options.workSet ?? [];
400
+ const workKeys = new Set(workSet.map((item) => `${item.objectType}|${item.objectId}|${item.field}`));
401
+ const operations = [];
402
+ const evidence = [];
403
+ const stamps = [];
404
+ const ambiguities = [];
405
+ const unmatchedSourceIds = [];
406
+ const counts = { fetched: records.length, matched: 0, unmatched: 0, ambiguous: 0, opsEmitted: 0 };
407
+ for (const record of records) {
408
+ const match = config.match[record.objectType];
409
+ const fields = (config.fields[record.objectType] ?? []).filter((field) => field.from[source] !== undefined);
410
+ if (!match || fields.length === 0) {
411
+ counts.unmatched += 1;
412
+ unmatchedSourceIds.push(record.id);
413
+ continue;
414
+ }
415
+ const outcome = matchSourceRecord(snapshot, record.objectType, match.keys, record.keys);
416
+ if (outcome.status === "unmatched") {
417
+ counts.unmatched += 1;
418
+ unmatchedSourceIds.push(record.id);
419
+ continue;
420
+ }
421
+ if (outcome.status === "ambiguous") {
422
+ counts.ambiguous += 1;
423
+ ambiguities.push({ sourceRecordId: record.id, key: outcome.key, candidateIds: outcome.candidateIds });
424
+ if ((match.onAmbiguous ?? "skip") === "skip")
425
+ continue;
426
+ // onAmbiguous: suggest — emit placeholder operations (one per candidate
427
+ // per field) so the existing suggest → plans approve --values-from /
428
+ // --value chain resolves the record selection. Apply refuses to write
429
+ // requires_human_* placeholders without an explicit value.
430
+ const recordEvidence = evidenceFor(source, sourceConfig.kind, sourceConfig.format, record, undefined, nowIso);
431
+ let emittedForRecord = false;
432
+ for (const field of fields) {
433
+ const sourceValue = sourceValueAt(record.payload, field.from[source]);
434
+ if (isEmptyValue(sourceValue))
435
+ continue;
436
+ const canonicalField = resolveCrmField(record.objectType, field.crm);
437
+ for (const candidateId of outcome.candidateIds) {
438
+ const currentValue = crmFieldValue(snapshot, record.objectType, candidateId, canonicalField);
439
+ if (!isEmptyValue(currentValue))
440
+ continue; // policy never: blanks only
441
+ emittedForRecord = true;
442
+ operations.push({
443
+ id: `op_enr_${fnv1a(`${source}:${record.objectType}:${candidateId}:${canonicalField}`)}`,
444
+ objectType: canonicalObjectType(record.objectType),
445
+ objectId: candidateId,
446
+ operation: "set_field",
447
+ field: canonicalField,
448
+ beforeValue: currentValue ?? null,
449
+ afterValue: PLACEHOLDER_RECORD_SELECTION,
450
+ reason: `${source} record "${describeSourceRecord(record)}" matched ${outcome.candidateIds.length} CRM ` +
451
+ `records on ${outcome.key} (${outcome.candidateIds.join(", ")}). If ${candidateId} is the right ` +
452
+ `record, approve with --value <opId>=${JSON.stringify(valueToString(sourceValue))}.`,
453
+ sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
454
+ riskLevel: "medium",
455
+ approvalRequired: true,
456
+ rollback: "Clear the field (the before value was empty) if the selection was wrong.",
457
+ evidenceIds: [recordEvidence.id],
458
+ });
459
+ counts.opsEmitted += 1;
460
+ }
461
+ }
462
+ if (emittedForRecord)
463
+ evidence.push(recordEvidence);
464
+ continue;
465
+ }
466
+ // Matched.
467
+ counts.matched += 1;
468
+ const recordEvidence = evidenceFor(source, sourceConfig.kind, sourceConfig.format, record, outcome.matchedKey, nowIso);
469
+ let emittedForRecord = false;
470
+ for (const field of fields) {
471
+ const canonicalField = resolveCrmField(record.objectType, field.crm);
472
+ const sourceValue = sourceValueAt(record.payload, field.from[source]);
473
+ const currentValue = crmFieldValue(snapshot, record.objectType, outcome.recordId, canonicalField);
474
+ const cellKey = `${record.objectType}|${outcome.recordId}|${canonicalField}`;
475
+ if (mode === "refresh") {
476
+ // Refresh touches ONLY stamped, stale cells from the work set.
477
+ if (!workKeys.has(cellKey))
478
+ continue;
479
+ // Re-stamp every checked cell (changed or not): the staleness clock
480
+ // resets because the source was actually consulted.
481
+ stamps.push({
482
+ objectType: record.objectType,
483
+ objectId: outcome.recordId,
484
+ field: canonicalField,
485
+ enrichedAt: nowIso,
486
+ sourceRecordId: record.id,
487
+ value: sourceValue,
488
+ });
489
+ if (isEmptyValue(sourceValue))
490
+ continue; // source went blank: never propose clearing
491
+ if (sameValue(sourceValue, currentValue))
492
+ continue; // unchanged: no op
493
+ emittedForRecord = true;
494
+ operations.push({
495
+ id: `op_enr_${fnv1a(`${source}:${record.objectType}:${outcome.recordId}:${canonicalField}`)}`,
496
+ objectType: canonicalObjectType(record.objectType),
497
+ objectId: outcome.recordId,
498
+ operation: "set_field",
499
+ field: canonicalField,
500
+ beforeValue: currentValue ?? null,
501
+ afterValue: typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue),
502
+ reason: `${source} ${record.objectType} "${describeSourceRecord(record)}" (matched by ` +
503
+ `${outcome.matchedKey}) reports a changed value for ${canonicalField}.`,
504
+ sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
505
+ riskLevel: isEmptyValue(currentValue) ? "low" : "medium",
506
+ approvalRequired: true,
507
+ rollback: "Restore the before value if the refreshed value is wrong.",
508
+ evidenceIds: [recordEvidence.id],
509
+ });
510
+ counts.opsEmitted += 1;
511
+ continue;
512
+ }
513
+ // Append: fill blanks only (policy "never").
514
+ if (isEmptyValue(sourceValue))
515
+ continue;
516
+ if (!isEmptyValue(currentValue))
517
+ continue;
518
+ emittedForRecord = true;
519
+ const afterValue = typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue);
520
+ operations.push({
521
+ id: `op_enr_${fnv1a(`${source}:${record.objectType}:${outcome.recordId}:${canonicalField}`)}`,
522
+ objectType: canonicalObjectType(record.objectType),
523
+ objectId: outcome.recordId,
524
+ operation: "set_field",
525
+ field: canonicalField,
526
+ beforeValue: currentValue ?? null,
527
+ afterValue,
528
+ reason: `${source} ${record.objectType} "${describeSourceRecord(record)}" (matched by ` +
529
+ `${outcome.matchedKey}) fills the blank ${canonicalField}.`,
530
+ sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
531
+ riskLevel: "low",
532
+ approvalRequired: true,
533
+ rollback: "Clear the field (the before value was empty) if the enrichment is wrong.",
534
+ evidenceIds: [recordEvidence.id],
535
+ });
536
+ counts.opsEmitted += 1;
537
+ stamps.push({
538
+ objectType: record.objectType,
539
+ objectId: outcome.recordId,
540
+ field: canonicalField,
541
+ enrichedAt: nowIso,
542
+ sourceRecordId: record.id,
543
+ value: afterValue,
544
+ });
545
+ }
546
+ if (emittedForRecord)
547
+ evidence.push(recordEvidence);
548
+ }
549
+ const plan = {
550
+ id: `patch_plan_enr_${fnv1a(`${source}:${mode}:${runLabel}:${nowIso}`)}`,
551
+ title: `Enrichment ${mode} — ${source}`,
552
+ createdAt: nowIso,
553
+ status: operations.length > 0 ? "needs_approval" : "draft",
554
+ dryRun: true,
555
+ summary: `${counts.opsEmitted} proposed operation(s) from ${source} ${mode} (${counts.fetched} source ` +
556
+ `record(s): ${counts.matched} matched, ${counts.unmatched} unmatched, ${counts.ambiguous} ambiguous). ` +
557
+ `Conflict policy: ${config.policy.overwrite}.`,
558
+ findings: [],
559
+ evidence,
560
+ operations,
561
+ };
562
+ return { plan, counts, stamps, ambiguities, unmatchedSourceIds };
563
+ }
564
+ // ---------------------------------------------------------------------------
565
+ // Staleness: compute the refresh work set from run-store stamps.
566
+ /** Latest stamp per (objectType, objectId, field) across a source's runs. */
567
+ export function latestStamps(runs, source) {
568
+ const latest = new Map();
569
+ for (const run of runs) {
570
+ if (run.source !== source)
571
+ continue;
572
+ for (const stamp of run.stamps) {
573
+ const key = `${stamp.objectType}|${stamp.objectId}|${stamp.field}`;
574
+ const existing = latest.get(key);
575
+ if (!existing || existing.enrichedAt < stamp.enrichedAt)
576
+ latest.set(key, stamp);
577
+ }
578
+ }
579
+ return latest;
580
+ }
581
+ export function staleDaysFor(config, objectType, field) {
582
+ const entry = (config.fields[objectType] ?? []).find((candidate) => resolveCrmField(objectType, candidate.crm) === field);
583
+ return entry?.staleDays ?? config.policy.defaultStaleDays ?? DEFAULT_STALE_DAYS;
584
+ }
585
+ /**
586
+ * Stale (record, field) cells: stamped by this source, refresh-eligible in
587
+ * the config, and older than the staleness window (per-field staleDays →
588
+ * policy.defaultStaleDays → 90; --stale-days overrides all).
589
+ */
590
+ export function selectStaleWork(config, runs, source, options = {}) {
591
+ const now = (options.now ?? (() => new Date()))().getTime();
592
+ const work = [];
593
+ for (const stamp of latestStamps(runs, source).values()) {
594
+ const entry = (config.fields[stamp.objectType] ?? []).find((candidate) => resolveCrmField(stamp.objectType, candidate.crm) === stamp.field &&
595
+ candidate.from[source] !== undefined);
596
+ if (!entry?.refresh)
597
+ continue;
598
+ const windowDays = options.staleDaysOverride ?? entry.staleDays ?? config.policy.defaultStaleDays ?? DEFAULT_STALE_DAYS;
599
+ const ageDays = (now - Date.parse(stamp.enrichedAt)) / 86_400_000;
600
+ if (ageDays > windowDays) {
601
+ work.push({ objectType: stamp.objectType, objectId: stamp.objectId, field: stamp.field });
602
+ }
603
+ }
604
+ return work;
605
+ }
606
+ export function enrichRunId(source, runLabel) {
607
+ return `enr_${fnv1a(`${source}|${runLabel}`)}`;
608
+ }
609
+ export function enrichRunsDir(baseDir) {
610
+ return join(baseDir ?? credentialsDir(), "enrich", "runs");
611
+ }
612
+ export function createFileEnrichRunStore(directory) {
613
+ const dir = directory ?? enrichRunsDir();
614
+ function fileFor(runLabel) {
615
+ if (!/^[\w.-]+$/.test(runLabel))
616
+ throw new Error(`Invalid run label: ${runLabel}`);
617
+ return join(dir, `${runLabel}.json`);
618
+ }
619
+ function read(runLabel) {
620
+ try {
621
+ return JSON.parse(readFileSync(fileFor(runLabel), "utf8"));
622
+ }
623
+ catch {
624
+ return null;
625
+ }
626
+ }
627
+ function write(run) {
628
+ // Run files carry CRM record ids and source values; keep them owner-only
629
+ // like plan files (and lock the home down even before any login).
630
+ if (!directory)
631
+ ensureSecureHomeDir();
632
+ mkdirSync(dir, { recursive: true, mode: 0o700 });
633
+ writeSecureFile(fileFor(run.runLabel), `${JSON.stringify(run, null, 2)}\n`);
634
+ return run;
635
+ }
636
+ function listRuns() {
637
+ let names = [];
638
+ try {
639
+ names = readdirSync(dir).filter((name) => name.endsWith(".json"));
640
+ }
641
+ catch {
642
+ return [];
643
+ }
644
+ return names
645
+ .map((name) => read(name.slice(0, -".json".length)))
646
+ .filter((run) => run !== null)
647
+ .sort((a, b) => a.startedAt.localeCompare(b.startedAt));
648
+ }
649
+ return {
650
+ async append(run) {
651
+ if (read(run.runLabel)) {
652
+ throw new Error(`Run "${run.runLabel}" already exists — enrich runs are append-only; use a new run label`);
653
+ }
654
+ return write(run);
655
+ },
656
+ async update(run) {
657
+ const existing = read(run.runLabel);
658
+ if (!existing)
659
+ throw new Error(`No enrich run "${run.runLabel}" to update`);
660
+ if (existing.id !== run.id) {
661
+ throw new Error(`Run "${run.runLabel}" belongs to a different run id (${existing.id})`);
662
+ }
663
+ return write(run);
664
+ },
665
+ async get(runLabel) {
666
+ return read(runLabel);
667
+ },
668
+ async list() {
669
+ return listRuns();
670
+ },
671
+ async latest(filter = {}) {
672
+ const runs = listRuns().filter((run) => (filter.source === undefined || run.source === filter.source) &&
673
+ (filter.mode === undefined || run.mode === filter.mode));
674
+ return runs.length ? runs[runs.length - 1] : null;
675
+ },
676
+ };
677
+ }
678
+ // ---------------------------------------------------------------------------
679
+ // Ingest staging helpers
680
+ /**
681
+ * Infer the object type of staged rows from the configured match keys: the
682
+ * type whose key columns actually appear on the rows. Exactly one hit wins;
683
+ * zero or two is an error asking for --objects.
684
+ */
685
+ export function inferIngestObjectType(config, source, rows) {
686
+ const sample = rows[0] ?? {};
687
+ const hits = OBJECT_TYPES.filter((objectType) => {
688
+ const match = config.match[objectType];
689
+ const fields = (config.fields[objectType] ?? []).some((field) => field.from[source] !== undefined);
690
+ if (!match || !fields)
691
+ return false;
692
+ return match.keys.some((key) => ingestKeyValue(sample, key) !== undefined);
693
+ });
694
+ if (hits.length === 1)
695
+ return hits[0];
696
+ if (hits.length === 0) {
697
+ throw new Error(`enrich ingest: cannot tell whether these rows are companies or contacts — no configured match key ` +
698
+ `column found. Pass --objects companies|contacts, or add the key column to the export.`);
699
+ }
700
+ throw new Error(`enrich ingest: rows carry match keys for ${hits.join(" and ")} — pass --objects companies|contacts to disambiguate.`);
701
+ }
702
+ /** Turn staged ingest rows into source records for the matcher. */
703
+ export function stagedSourceRecords(config, source, run) {
704
+ const objectType = run.stagedObjectType;
705
+ const rows = run.staged ?? [];
706
+ if (!objectType || rows.length === 0) {
707
+ throw new Error(`enrich: run "${run.runLabel}" has no staged data — stage a Clay export first: ` +
708
+ `fullstackgtm enrich ingest <file.csv|payload.json> --source ${source}`);
709
+ }
710
+ const match = config.match[objectType];
711
+ if (!match)
712
+ throw new Error(`enrich: no match config for ${objectType}`);
713
+ return rows.map((row, index) => {
714
+ const keys = {};
715
+ for (const key of match.keys)
716
+ keys[key] = ingestKeyValue(row, key);
717
+ return {
718
+ id: `${source}:${run.runLabel}:row-${index + 1}`,
719
+ objectType,
720
+ keys,
721
+ payload: row,
722
+ };
723
+ });
724
+ }