fullstackgtm 0.21.2 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -0
- package/README.md +15 -0
- package/dist/bulkUpdate.d.ts +16 -1
- package/dist/bulkUpdate.js +88 -5
- package/dist/cli.js +670 -8
- package/dist/dedupe.d.ts +14 -0
- package/dist/dedupe.js +140 -0
- package/dist/enrich.d.ts +220 -0
- package/dist/enrich.js +724 -0
- package/dist/enrichApollo.d.ts +59 -0
- package/dist/enrichApollo.js +192 -0
- package/dist/index.d.ts +5 -1
- package/dist/index.js +5 -1
- package/dist/marketReport.js +97 -44
- package/dist/reassign.d.ts +19 -0
- package/dist/reassign.js +87 -0
- package/dist/suggest.js +67 -5
- package/llms.txt +15 -0
- package/package.json +1 -1
- package/src/bulkUpdate.ts +109 -6
- package/src/cli.ts +756 -8
- package/src/dedupe.ts +182 -0
- package/src/enrich.ts +1016 -0
- package/src/enrichApollo.ts +250 -0
- package/src/index.ts +48 -1
- package/src/marketReport.ts +116 -62
- package/src/reassign.ts +117 -0
- package/src/suggest.ts +69 -5
package/src/enrich.ts
ADDED
|
@@ -0,0 +1,1016 @@
|
|
|
1
|
+
import { mkdirSync, readFileSync, readdirSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { credentialsDir, ensureSecureHomeDir, writeSecureFile } from "./credentials.ts";
|
|
4
|
+
import { HUBSPOT_DEFAULT_FIELD_MAPPINGS } from "./mappings.ts";
|
|
5
|
+
import type {
|
|
6
|
+
CanonicalGtmSnapshot,
|
|
7
|
+
GtmEvidence,
|
|
8
|
+
PatchOperation,
|
|
9
|
+
PatchPlan,
|
|
10
|
+
} from "./types.ts";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* The enrich layer: governed append/refresh of third-party data into the CRM.
|
|
14
|
+
*
|
|
15
|
+
* Every enrichment vendor ships fire-and-forget writeback — data lands without
|
|
16
|
+
* a diff, without approval, over whatever a human typed. This layer inverts
|
|
17
|
+
* that: a source (Apollo pull, Clay ingest) feeds a deterministic matcher,
|
|
18
|
+
* the matcher feeds a fill-blanks-only patch plan, and the plan goes through
|
|
19
|
+
* the existing dry-run → approval → apply contract. Every proposed value is
|
|
20
|
+
* traceable to the source payload that produced it (`GtmEvidence` on the
|
|
21
|
+
* plan), and every write carries a `beforeValue` for apply-time
|
|
22
|
+
* compare-and-set.
|
|
23
|
+
*
|
|
24
|
+
* State lives in a profile-scoped, append-only run store
|
|
25
|
+
* (`~/.fullstackgtm/profiles/<profile>/enrich/runs/`) that is checkpoint,
|
|
26
|
+
* staleness ledger, and observability surface in one. The CLI never writes
|
|
27
|
+
* `fsgtm_enriched_at`-style custom properties into the customer's portal.
|
|
28
|
+
*
|
|
29
|
+
* Recurring execution belongs to the horizontal scheduler (docs/schedule.md);
|
|
30
|
+
* enrich owns no cron logic.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// Config: enrich.config.json
|
|
35
|
+
|
|
36
|
+
export type EnrichObjectType = "company" | "contact";
|
|
37
|
+
|
|
38
|
+
export type EnrichSourceKind = "api" | "ingest";
|
|
39
|
+
|
|
40
|
+
export type EnrichSourceConfig = {
|
|
41
|
+
kind: EnrichSourceKind;
|
|
42
|
+
/** Ingest staging format; csv (column headers) or json (dotted paths). */
|
|
43
|
+
format?: "csv" | "json";
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export type EnrichAmbiguousPolicy = "skip" | "suggest";
|
|
47
|
+
|
|
48
|
+
export type EnrichMatchConfig = {
|
|
49
|
+
/** Ordered match keys, evaluated against the snapshot. */
|
|
50
|
+
keys: string[];
|
|
51
|
+
/** Multi-hit behavior; default skip. */
|
|
52
|
+
onAmbiguous?: EnrichAmbiguousPolicy;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export type EnrichFieldConfig = {
|
|
56
|
+
/** CRM property: canonical field name, or a default HubSpot property name. */
|
|
57
|
+
crm: string;
|
|
58
|
+
/** sourceId → dotted JSON path (api/json) or column header (ingest csv). */
|
|
59
|
+
from: Record<string, string>;
|
|
60
|
+
/** Opt into `enrich refresh`; fields without it are set once, never revisited. */
|
|
61
|
+
refresh?: boolean;
|
|
62
|
+
/** Staleness window for refresh; falls back to policy.defaultStaleDays. */
|
|
63
|
+
staleDays?: number;
|
|
64
|
+
/** Per-field conflict policy override. MVP: only "never". */
|
|
65
|
+
policy?: "never";
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
export type EnrichPolicyConfig = {
|
|
69
|
+
/** Conflict policy ladder. MVP ships "never" (fill blanks only). */
|
|
70
|
+
overwrite: "never";
|
|
71
|
+
defaultStaleDays?: number;
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
export type EnrichConfig = {
|
|
75
|
+
sources: Record<string, EnrichSourceConfig>;
|
|
76
|
+
match: Partial<Record<EnrichObjectType, EnrichMatchConfig>>;
|
|
77
|
+
fields: Partial<Record<EnrichObjectType, EnrichFieldConfig[]>>;
|
|
78
|
+
policy: EnrichPolicyConfig;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export const ENRICH_CONFIG_FILE_NAME = "enrich.config.json";
|
|
82
|
+
|
|
83
|
+
export const DEFAULT_STALE_DAYS = 90;
|
|
84
|
+
|
|
85
|
+
const OBJECT_TYPES: EnrichObjectType[] = ["company", "contact"];
|
|
86
|
+
|
|
87
|
+
/** Match keys the matcher knows how to read off canonical snapshot records. */
|
|
88
|
+
const MATCH_KEYS: Record<EnrichObjectType, string[]> = {
|
|
89
|
+
company: ["domain", "name"],
|
|
90
|
+
contact: ["email", "name"],
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
/** API source ids the MVP can pull from. */
|
|
94
|
+
export const SUPPORTED_API_SOURCES = ["apollo"];
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Canonical fields enrich may target, plus the HubSpot property spellings the
|
|
98
|
+
* config may use for them (so `"crm": "numberofemployees"` and
|
|
99
|
+
* `"crm": "employeeCount"` both resolve). Reading the current value for the
|
|
100
|
+
* fill-blanks check happens against the canonical snapshot, so only fields
|
|
101
|
+
* with a canonical home are accepted — strict, with the accepted names in the
|
|
102
|
+
* error.
|
|
103
|
+
*/
|
|
104
|
+
const CANONICAL_FIELDS: Record<EnrichObjectType, string[]> = {
|
|
105
|
+
company: ["name", "domain", "industry", "employeeCount", "annualRevenue"],
|
|
106
|
+
contact: ["firstName", "lastName", "email", "phone", "title"],
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const PROVIDER_FIELD_ALIASES: Record<EnrichObjectType, Record<string, string>> = {
|
|
110
|
+
company: invertMapping(HUBSPOT_DEFAULT_FIELD_MAPPINGS.accounts),
|
|
111
|
+
contact: invertMapping(HUBSPOT_DEFAULT_FIELD_MAPPINGS.contacts),
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
function invertMapping(mapping: Record<string, string>): Record<string, string> {
|
|
115
|
+
const inverted: Record<string, string> = {};
|
|
116
|
+
for (const [canonical, provider] of Object.entries(mapping)) inverted[provider] = canonical;
|
|
117
|
+
return inverted;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/** Resolve a config `crm` field name to the canonical snapshot field. */
|
|
121
|
+
export function resolveCrmField(objectType: EnrichObjectType, name: string): string {
|
|
122
|
+
if (CANONICAL_FIELDS[objectType].includes(name)) return name;
|
|
123
|
+
const canonical = PROVIDER_FIELD_ALIASES[objectType][name];
|
|
124
|
+
if (canonical && CANONICAL_FIELDS[objectType].includes(canonical)) return canonical;
|
|
125
|
+
throw new Error(
|
|
126
|
+
`enrich config: unknown ${objectType} field "${name}". Accepted canonical fields: ` +
|
|
127
|
+
`${CANONICAL_FIELDS[objectType].join(", ")} (HubSpot property spellings like ` +
|
|
128
|
+
`${Object.keys(PROVIDER_FIELD_ALIASES[objectType]).join(", ")} also resolve).`,
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function fail(message: string): never {
|
|
133
|
+
throw new Error(`enrich config: ${message}`);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Strict, up-front validation (the 0.18 lesson: a config crash mid-run is
|
|
138
|
+
* worse than a refused config). Every problem names the offending entry and
|
|
139
|
+
* the accepted values.
|
|
140
|
+
*/
|
|
141
|
+
export function parseEnrichConfig(raw: string): EnrichConfig {
|
|
142
|
+
let parsed: unknown;
|
|
143
|
+
try {
|
|
144
|
+
parsed = JSON.parse(raw);
|
|
145
|
+
} catch (error) {
|
|
146
|
+
fail(`not valid JSON (${error instanceof Error ? error.message : String(error)})`);
|
|
147
|
+
}
|
|
148
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
149
|
+
fail("expected a JSON object with sources, match, fields, and policy");
|
|
150
|
+
}
|
|
151
|
+
const config = parsed as Partial<EnrichConfig>;
|
|
152
|
+
|
|
153
|
+
// sources
|
|
154
|
+
if (!config.sources || typeof config.sources !== "object" || Array.isArray(config.sources)) {
|
|
155
|
+
fail('missing "sources" — declare at least one, e.g. { "apollo": { "kind": "api" } }');
|
|
156
|
+
}
|
|
157
|
+
const sourceIds = Object.keys(config.sources);
|
|
158
|
+
if (sourceIds.length === 0) fail('"sources" is empty — declare at least one source');
|
|
159
|
+
for (const [id, source] of Object.entries(config.sources)) {
|
|
160
|
+
if (!source || typeof source !== "object") fail(`source "${id}" must be an object`);
|
|
161
|
+
if (source.kind !== "api" && source.kind !== "ingest") {
|
|
162
|
+
fail(`source "${id}": kind must be "api" or "ingest" (got ${JSON.stringify(source.kind)})`);
|
|
163
|
+
}
|
|
164
|
+
if (source.kind === "api" && !SUPPORTED_API_SOURCES.includes(id)) {
|
|
165
|
+
fail(
|
|
166
|
+
`api source "${id}" is not supported yet — MVP pulls from: ${SUPPORTED_API_SOURCES.join(", ")}. ` +
|
|
167
|
+
'Push-style sources stage data via `enrich ingest` with kind "ingest".',
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
if (source.format !== undefined && source.format !== "csv" && source.format !== "json") {
|
|
171
|
+
fail(`source "${id}": format must be "csv" or "json" (got ${JSON.stringify(source.format)})`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// policy
|
|
176
|
+
if (!config.policy || typeof config.policy !== "object") {
|
|
177
|
+
fail('missing "policy" — e.g. { "overwrite": "never", "defaultStaleDays": 90 }');
|
|
178
|
+
}
|
|
179
|
+
const overwrite = (config.policy as EnrichPolicyConfig).overwrite;
|
|
180
|
+
if (overwrite === ("system-only" as string) || overwrite === ("always" as string)) {
|
|
181
|
+
fail(
|
|
182
|
+
`policy.overwrite "${overwrite}" is not yet implemented (phase 2 of the conflict ladder — ` +
|
|
183
|
+
'it needs per-field property history). MVP supports only "never" (fill blanks).',
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
if (overwrite !== "never") {
|
|
187
|
+
fail(`policy.overwrite must be "never" (got ${JSON.stringify(overwrite)})`);
|
|
188
|
+
}
|
|
189
|
+
const defaultStaleDays = config.policy.defaultStaleDays;
|
|
190
|
+
if (defaultStaleDays !== undefined && (!Number.isFinite(defaultStaleDays) || defaultStaleDays <= 0)) {
|
|
191
|
+
fail(`policy.defaultStaleDays must be a positive number (got ${JSON.stringify(defaultStaleDays)})`);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// match
|
|
195
|
+
if (!config.match || typeof config.match !== "object" || Array.isArray(config.match)) {
|
|
196
|
+
fail('missing "match" — e.g. { "company": { "keys": ["domain", "name"] } }');
|
|
197
|
+
}
|
|
198
|
+
for (const [objectType, match] of Object.entries(config.match)) {
|
|
199
|
+
if (!OBJECT_TYPES.includes(objectType as EnrichObjectType)) {
|
|
200
|
+
fail(`match has unknown object type "${objectType}" (use: ${OBJECT_TYPES.join(", ")})`);
|
|
201
|
+
}
|
|
202
|
+
if (!match || !Array.isArray(match.keys) || match.keys.length === 0) {
|
|
203
|
+
fail(`match.${objectType}: "keys" must be a non-empty ordered array`);
|
|
204
|
+
}
|
|
205
|
+
for (const key of match.keys) {
|
|
206
|
+
const known = MATCH_KEYS[objectType as EnrichObjectType];
|
|
207
|
+
if (!known.includes(key)) {
|
|
208
|
+
fail(`match.${objectType}: unknown key "${key}" (supported: ${known.join(", ")})`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
if (match.onAmbiguous !== undefined && match.onAmbiguous !== "skip" && match.onAmbiguous !== "suggest") {
|
|
212
|
+
fail(`match.${objectType}: onAmbiguous must be "skip" or "suggest" (got ${JSON.stringify(match.onAmbiguous)})`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// fields
|
|
217
|
+
if (!config.fields || typeof config.fields !== "object" || Array.isArray(config.fields)) {
|
|
218
|
+
fail('missing "fields" — map CRM properties to source paths per object type');
|
|
219
|
+
}
|
|
220
|
+
let anyField = false;
|
|
221
|
+
for (const [objectType, fields] of Object.entries(config.fields)) {
|
|
222
|
+
if (!OBJECT_TYPES.includes(objectType as EnrichObjectType)) {
|
|
223
|
+
fail(`fields has unknown object type "${objectType}" (use: ${OBJECT_TYPES.join(", ")})`);
|
|
224
|
+
}
|
|
225
|
+
if (!Array.isArray(fields)) fail(`fields.${objectType} must be an array`);
|
|
226
|
+
if (!config.match[objectType as EnrichObjectType]) {
|
|
227
|
+
fail(`fields.${objectType} is configured but match.${objectType} is missing — the matcher needs ordered keys`);
|
|
228
|
+
}
|
|
229
|
+
const seen = new Set<string>();
|
|
230
|
+
for (const field of fields) {
|
|
231
|
+
anyField = true;
|
|
232
|
+
if (!field || typeof field.crm !== "string" || field.crm.length === 0) {
|
|
233
|
+
fail(`fields.${objectType}: every entry needs a "crm" property name`);
|
|
234
|
+
}
|
|
235
|
+
const canonical = resolveCrmField(objectType as EnrichObjectType, field.crm);
|
|
236
|
+
if (seen.has(canonical)) fail(`fields.${objectType}: duplicate mapping for "${field.crm}"`);
|
|
237
|
+
seen.add(canonical);
|
|
238
|
+
if (!field.from || typeof field.from !== "object" || Object.keys(field.from).length === 0) {
|
|
239
|
+
fail(`fields.${objectType}.${field.crm}: "from" must map at least one source to a path`);
|
|
240
|
+
}
|
|
241
|
+
for (const [sourceId, path] of Object.entries(field.from)) {
|
|
242
|
+
if (!config.sources[sourceId]) {
|
|
243
|
+
fail(
|
|
244
|
+
`fields.${objectType}.${field.crm}: "from" references undeclared source "${sourceId}" ` +
|
|
245
|
+
`(declared: ${sourceIds.join(", ")})`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
if (typeof path !== "string" || path.length === 0) {
|
|
249
|
+
fail(`fields.${objectType}.${field.crm}: path for source "${sourceId}" must be a non-empty string`);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (field.staleDays !== undefined && (!Number.isFinite(field.staleDays) || field.staleDays <= 0)) {
|
|
253
|
+
fail(`fields.${objectType}.${field.crm}: staleDays must be a positive number`);
|
|
254
|
+
}
|
|
255
|
+
if (field.refresh !== undefined && typeof field.refresh !== "boolean") {
|
|
256
|
+
fail(`fields.${objectType}.${field.crm}: refresh must be true or false`);
|
|
257
|
+
}
|
|
258
|
+
if (field.policy !== undefined && field.policy !== "never") {
|
|
259
|
+
fail(
|
|
260
|
+
`fields.${objectType}.${field.crm}: per-field policy "${String(field.policy)}" is not yet ` +
|
|
261
|
+
'implemented (phase 2 of the conflict ladder). MVP supports only "never".',
|
|
262
|
+
);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (!anyField) fail('"fields" maps nothing — add at least one field entry');
|
|
267
|
+
|
|
268
|
+
return config as EnrichConfig;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export function loadEnrichConfig(path: string): EnrichConfig {
|
|
272
|
+
let raw: string;
|
|
273
|
+
try {
|
|
274
|
+
raw = readFileSync(path, "utf8");
|
|
275
|
+
} catch {
|
|
276
|
+
throw new Error(
|
|
277
|
+
`No enrich config at ${path}. Create ${ENRICH_CONFIG_FILE_NAME} (sources/match/fields/policy — ` +
|
|
278
|
+
"see docs/enrich.md) or pass --config <path>.",
|
|
279
|
+
);
|
|
280
|
+
}
|
|
281
|
+
return parseEnrichConfig(raw);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// ---------------------------------------------------------------------------
|
|
285
|
+
// CSV: minimal dependency-free RFC-4180-ish parser (quoted fields, embedded
|
|
286
|
+
// commas/newlines, "" escapes, CRLF). Header row maps columns to names.
|
|
287
|
+
|
|
288
|
+
export function parseCsv(text: string): Array<Record<string, string>> {
|
|
289
|
+
const rows: string[][] = [];
|
|
290
|
+
let row: string[] = [];
|
|
291
|
+
let field = "";
|
|
292
|
+
let inQuotes = false;
|
|
293
|
+
let sawAny = false;
|
|
294
|
+
|
|
295
|
+
const pushField = () => {
|
|
296
|
+
row.push(field);
|
|
297
|
+
field = "";
|
|
298
|
+
};
|
|
299
|
+
const pushRow = () => {
|
|
300
|
+
pushField();
|
|
301
|
+
rows.push(row);
|
|
302
|
+
row = [];
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
for (let i = 0; i < text.length; i += 1) {
|
|
306
|
+
const char = text[i];
|
|
307
|
+
if (inQuotes) {
|
|
308
|
+
if (char === '"') {
|
|
309
|
+
if (text[i + 1] === '"') {
|
|
310
|
+
field += '"';
|
|
311
|
+
i += 1;
|
|
312
|
+
} else {
|
|
313
|
+
inQuotes = false;
|
|
314
|
+
}
|
|
315
|
+
} else {
|
|
316
|
+
field += char;
|
|
317
|
+
}
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
if (char === '"') {
|
|
321
|
+
inQuotes = true;
|
|
322
|
+
sawAny = true;
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
if (char === ",") {
|
|
326
|
+
pushField();
|
|
327
|
+
sawAny = true;
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
if (char === "\n" || char === "\r") {
|
|
331
|
+
if (char === "\r" && text[i + 1] === "\n") i += 1;
|
|
332
|
+
// Skip empty lines (including the trailing newline).
|
|
333
|
+
if (field.length > 0 || row.length > 0) pushRow();
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
field += char;
|
|
337
|
+
sawAny = true;
|
|
338
|
+
}
|
|
339
|
+
if (inQuotes) throw new Error("CSV parse error: unterminated quoted field");
|
|
340
|
+
if (field.length > 0 || row.length > 0) pushRow();
|
|
341
|
+
if (!sawAny || rows.length === 0) return [];
|
|
342
|
+
|
|
343
|
+
const headers = rows[0].map((header) => header.trim());
|
|
344
|
+
return rows.slice(1).map((cells) => {
|
|
345
|
+
const record: Record<string, string> = {};
|
|
346
|
+
headers.forEach((header, index) => {
|
|
347
|
+
if (header) record[header] = cells[index] ?? "";
|
|
348
|
+
});
|
|
349
|
+
return record;
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// ---------------------------------------------------------------------------
|
|
354
|
+
// Source records
|
|
355
|
+
|
|
356
|
+
export type EnrichSourceRecord = {
|
|
357
|
+
/** e.g. "apollo:org_abc", "clay:row-3". Lands on stamps as sourceRecordId. */
|
|
358
|
+
id: string;
|
|
359
|
+
objectType: EnrichObjectType;
|
|
360
|
+
/** Match-key values (key name → raw value), extracted by the source adapter. */
|
|
361
|
+
keys: Record<string, string | undefined>;
|
|
362
|
+
/** Raw source payload; field paths and evidence excerpts read from it. */
|
|
363
|
+
payload: Record<string, unknown>;
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
/** Read a value from a payload: exact key first (CSV headers), then dotted path. */
|
|
367
|
+
export function sourceValueAt(payload: Record<string, unknown>, path: string): unknown {
|
|
368
|
+
if (path in payload) return payload[path];
|
|
369
|
+
let current: unknown = payload;
|
|
370
|
+
for (const segment of path.split(".")) {
|
|
371
|
+
if (!current || typeof current !== "object" || Array.isArray(current)) return undefined;
|
|
372
|
+
current = (current as Record<string, unknown>)[segment];
|
|
373
|
+
}
|
|
374
|
+
return current;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/** Case-insensitive header lookup for ingest rows ("Email" matches key "email"). */
|
|
378
|
+
export function ingestKeyValue(row: Record<string, unknown>, key: string): string | undefined {
|
|
379
|
+
for (const [header, value] of Object.entries(row)) {
|
|
380
|
+
if (header.trim().toLowerCase() === key.toLowerCase()) {
|
|
381
|
+
const text = valueToString(value);
|
|
382
|
+
return text || undefined;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
const dotted = sourceValueAt(row, key);
|
|
386
|
+
const text = valueToString(dotted);
|
|
387
|
+
return text || undefined;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function valueToString(value: unknown): string {
|
|
391
|
+
if (value === null || value === undefined) return "";
|
|
392
|
+
if (typeof value === "string") return value.trim();
|
|
393
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
394
|
+
return "";
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// ---------------------------------------------------------------------------
|
|
398
|
+
// Matching: ordered keys, unique-hit-wins, zero-hits-next-key,
|
|
399
|
+
// multi-hit → onAmbiguous. Ambiguity is surfaced, never resolved by coin flip.
|
|
400
|
+
|
|
401
|
+
export type MatchOutcome =
|
|
402
|
+
| { status: "matched"; recordId: string; matchedKey: string }
|
|
403
|
+
| { status: "unmatched" }
|
|
404
|
+
| { status: "ambiguous"; key: string; candidateIds: string[] };
|
|
405
|
+
|
|
406
|
+
function normalizeKeyValue(key: string, value: unknown): string {
|
|
407
|
+
const text = valueToString(value).toLowerCase();
|
|
408
|
+
if (!text) return "";
|
|
409
|
+
if (key === "domain") {
|
|
410
|
+
return text
|
|
411
|
+
.replace(/^https?:\/\//, "")
|
|
412
|
+
.replace(/^www\./, "")
|
|
413
|
+
.replace(/\/.*$/, "");
|
|
414
|
+
}
|
|
415
|
+
return text.replace(/\s+/g, " ");
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
function crmKeyValue(
|
|
419
|
+
objectType: EnrichObjectType,
|
|
420
|
+
record: { name?: string; domain?: string; email?: string; firstName?: string; lastName?: string },
|
|
421
|
+
key: string,
|
|
422
|
+
): string {
|
|
423
|
+
if (objectType === "company") {
|
|
424
|
+
if (key === "domain") return normalizeKeyValue("domain", record.domain);
|
|
425
|
+
if (key === "name") return normalizeKeyValue("name", record.name);
|
|
426
|
+
return "";
|
|
427
|
+
}
|
|
428
|
+
if (key === "email") return normalizeKeyValue("email", record.email);
|
|
429
|
+
if (key === "name") {
|
|
430
|
+
return normalizeKeyValue("name", `${record.firstName ?? ""} ${record.lastName ?? ""}`.trim());
|
|
431
|
+
}
|
|
432
|
+
return "";
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
export function matchSourceRecord(
|
|
436
|
+
snapshot: CanonicalGtmSnapshot,
|
|
437
|
+
objectType: EnrichObjectType,
|
|
438
|
+
keys: string[],
|
|
439
|
+
sourceKeys: Record<string, string | undefined>,
|
|
440
|
+
): MatchOutcome {
|
|
441
|
+
const records: Array<Record<string, unknown> & { id: string }> =
|
|
442
|
+
objectType === "company" ? snapshot.accounts : snapshot.contacts;
|
|
443
|
+
for (const key of keys) {
|
|
444
|
+
const wanted = normalizeKeyValue(key, sourceKeys[key]);
|
|
445
|
+
if (!wanted) continue;
|
|
446
|
+
const hits = records.filter(
|
|
447
|
+
(record) => crmKeyValue(objectType, record as never, key) === wanted,
|
|
448
|
+
);
|
|
449
|
+
if (hits.length === 1) return { status: "matched", recordId: hits[0].id, matchedKey: key };
|
|
450
|
+
if (hits.length > 1) {
|
|
451
|
+
return { status: "ambiguous", key, candidateIds: hits.map((hit) => hit.id) };
|
|
452
|
+
}
|
|
453
|
+
// Zero hits: fall through to the next key.
|
|
454
|
+
}
|
|
455
|
+
return { status: "unmatched" };
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// ---------------------------------------------------------------------------
|
|
459
|
+
// Plan building
|
|
460
|
+
|
|
461
|
+
// Mirrors stableHash in rules.ts (FNV-1a); duplicated to keep enrich.ts
|
|
462
|
+
// importable without pulling the audit engine (the market.ts precedent).
|
|
463
|
+
function fnv1a(value: string): string {
|
|
464
|
+
let hash = 0x811c9dc5;
|
|
465
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
466
|
+
hash ^= value.charCodeAt(i);
|
|
467
|
+
hash = Math.imul(hash, 0x01000193);
|
|
468
|
+
}
|
|
469
|
+
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
export type EnrichMode = "append" | "refresh";
|
|
473
|
+
|
|
474
|
+
export type EnrichCounts = {
|
|
475
|
+
fetched: number;
|
|
476
|
+
matched: number;
|
|
477
|
+
unmatched: number;
|
|
478
|
+
ambiguous: number;
|
|
479
|
+
opsEmitted: number;
|
|
480
|
+
};
|
|
481
|
+
|
|
482
|
+
export type EnrichStamp = {
|
|
483
|
+
objectType: EnrichObjectType;
|
|
484
|
+
objectId: string;
|
|
485
|
+
/** Canonical field name. */
|
|
486
|
+
field: string;
|
|
487
|
+
enrichedAt: string;
|
|
488
|
+
sourceRecordId: string;
|
|
489
|
+
/** Source value at stamp time (refresh change-detection observability). */
|
|
490
|
+
value?: unknown;
|
|
491
|
+
};
|
|
492
|
+
|
|
493
|
+
export type EnrichAmbiguity = {
|
|
494
|
+
sourceRecordId: string;
|
|
495
|
+
key: string;
|
|
496
|
+
candidateIds: string[];
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
export type EnrichWorkItem = {
|
|
500
|
+
objectType: EnrichObjectType;
|
|
501
|
+
objectId: string;
|
|
502
|
+
/** Canonical field name. */
|
|
503
|
+
field: string;
|
|
504
|
+
};
|
|
505
|
+
|
|
506
|
+
export type BuildEnrichPlanOptions = {
|
|
507
|
+
config: EnrichConfig;
|
|
508
|
+
source: string;
|
|
509
|
+
mode: EnrichMode;
|
|
510
|
+
snapshot: CanonicalGtmSnapshot;
|
|
511
|
+
records: EnrichSourceRecord[];
|
|
512
|
+
/**
|
|
513
|
+
* Refresh only: the stale (record, field) work set computed from run-store
|
|
514
|
+
* stamps. Refresh proposes writes ONLY for work-set cells — fields the
|
|
515
|
+
* ledger proves enrich itself stamped — so policy "never" still never
|
|
516
|
+
* overwrites a value enrich did not put there.
|
|
517
|
+
*/
|
|
518
|
+
workSet?: EnrichWorkItem[];
|
|
519
|
+
now?: () => Date;
|
|
520
|
+
runLabel: string;
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
export type EnrichPlanResult = {
|
|
524
|
+
plan: PatchPlan;
|
|
525
|
+
counts: EnrichCounts;
|
|
526
|
+
stamps: EnrichStamp[];
|
|
527
|
+
ambiguities: EnrichAmbiguity[];
|
|
528
|
+
unmatchedSourceIds: string[];
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
const PLACEHOLDER_RECORD_SELECTION = "requires_human_record_selection";
|
|
532
|
+
|
|
533
|
+
function canonicalObjectType(objectType: EnrichObjectType): "account" | "contact" {
|
|
534
|
+
return objectType === "company" ? "account" : "contact";
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
function crmFieldValue(
|
|
538
|
+
snapshot: CanonicalGtmSnapshot,
|
|
539
|
+
objectType: EnrichObjectType,
|
|
540
|
+
objectId: string,
|
|
541
|
+
field: string,
|
|
542
|
+
): unknown {
|
|
543
|
+
const records = objectType === "company" ? snapshot.accounts : snapshot.contacts;
|
|
544
|
+
const record = records.find((entry) => entry.id === objectId);
|
|
545
|
+
return record ? (record as unknown as Record<string, unknown>)[field] : undefined;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function isEmptyValue(value: unknown): boolean {
|
|
549
|
+
return value === undefined || value === null || (typeof value === "string" && value.trim() === "");
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
/** Values compare as trimmed strings; numbers compare numerically. */
|
|
553
|
+
function sameValue(a: unknown, b: unknown): boolean {
|
|
554
|
+
if (isEmptyValue(a) && isEmptyValue(b)) return true;
|
|
555
|
+
if (typeof a === "number" || typeof b === "number") {
|
|
556
|
+
return Number(a) === Number(b);
|
|
557
|
+
}
|
|
558
|
+
return valueToString(a) === valueToString(b);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function describeSourceRecord(record: EnrichSourceRecord): string {
|
|
562
|
+
const name = record.keys.name ?? record.keys.domain ?? record.keys.email ?? record.id;
|
|
563
|
+
return String(name);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function evidenceFor(
|
|
567
|
+
source: string,
|
|
568
|
+
sourceKind: EnrichSourceKind,
|
|
569
|
+
format: "csv" | "json" | undefined,
|
|
570
|
+
record: EnrichSourceRecord,
|
|
571
|
+
matchedKey: string | undefined,
|
|
572
|
+
capturedAt: string,
|
|
573
|
+
): GtmEvidence {
|
|
574
|
+
const excerpt = JSON.stringify(record.payload);
|
|
575
|
+
return {
|
|
576
|
+
id: `ev_enr_${fnv1a(`${source}:${record.id}`)}`,
|
|
577
|
+
sourceSystem: sourceKind === "api" ? "web" : format === "csv" ? "csv" : "manual",
|
|
578
|
+
sourceObjectType: record.objectType,
|
|
579
|
+
sourceObjectId: record.id,
|
|
580
|
+
title: `${source} payload for ${describeSourceRecord(record)}`,
|
|
581
|
+
text: excerpt.length > 1200 ? `${excerpt.slice(0, 1200)}…` : excerpt,
|
|
582
|
+
capturedAt,
|
|
583
|
+
metadata: { source, sourceRecordId: record.id, matchedKey: matchedKey ?? null },
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Match source records against the snapshot and emit a patch plan under the
|
|
589
|
+
* conflict policy. Append fills blanks only; refresh proposes updates for
|
|
590
|
+
* stale stamped fields whose source value actually changed (beforeValue =
|
|
591
|
+
* current CRM value → apply-time compare-and-set rejects drifted records).
|
|
592
|
+
*/
|
|
593
|
+
export function buildEnrichPlan(options: BuildEnrichPlanOptions): EnrichPlanResult {
|
|
594
|
+
const { config, source, mode, snapshot, records, runLabel } = options;
|
|
595
|
+
const nowIso = (options.now ?? (() => new Date()))().toISOString();
|
|
596
|
+
const sourceConfig = config.sources[source];
|
|
597
|
+
if (!sourceConfig) throw new Error(`enrich: source "${source}" is not declared in the config`);
|
|
598
|
+
|
|
599
|
+
const workSet = options.workSet ?? [];
|
|
600
|
+
const workKeys = new Set(workSet.map((item) => `${item.objectType}|${item.objectId}|${item.field}`));
|
|
601
|
+
|
|
602
|
+
const operations: PatchOperation[] = [];
|
|
603
|
+
const evidence: GtmEvidence[] = [];
|
|
604
|
+
const stamps: EnrichStamp[] = [];
|
|
605
|
+
const ambiguities: EnrichAmbiguity[] = [];
|
|
606
|
+
const unmatchedSourceIds: string[] = [];
|
|
607
|
+
const counts: EnrichCounts = { fetched: records.length, matched: 0, unmatched: 0, ambiguous: 0, opsEmitted: 0 };
|
|
608
|
+
|
|
609
|
+
for (const record of records) {
|
|
610
|
+
const match = config.match[record.objectType];
|
|
611
|
+
const fields = (config.fields[record.objectType] ?? []).filter(
|
|
612
|
+
(field) => field.from[source] !== undefined,
|
|
613
|
+
);
|
|
614
|
+
if (!match || fields.length === 0) {
|
|
615
|
+
counts.unmatched += 1;
|
|
616
|
+
unmatchedSourceIds.push(record.id);
|
|
617
|
+
continue;
|
|
618
|
+
}
|
|
619
|
+
const outcome = matchSourceRecord(snapshot, record.objectType, match.keys, record.keys);
|
|
620
|
+
|
|
621
|
+
if (outcome.status === "unmatched") {
|
|
622
|
+
counts.unmatched += 1;
|
|
623
|
+
unmatchedSourceIds.push(record.id);
|
|
624
|
+
continue;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
if (outcome.status === "ambiguous") {
|
|
628
|
+
counts.ambiguous += 1;
|
|
629
|
+
ambiguities.push({ sourceRecordId: record.id, key: outcome.key, candidateIds: outcome.candidateIds });
|
|
630
|
+
if ((match.onAmbiguous ?? "skip") === "skip") continue;
|
|
631
|
+
// onAmbiguous: suggest — emit placeholder operations (one per candidate
|
|
632
|
+
// per field) so the existing suggest → plans approve --values-from /
|
|
633
|
+
// --value chain resolves the record selection. Apply refuses to write
|
|
634
|
+
// requires_human_* placeholders without an explicit value.
|
|
635
|
+
const recordEvidence = evidenceFor(source, sourceConfig.kind, sourceConfig.format, record, undefined, nowIso);
|
|
636
|
+
let emittedForRecord = false;
|
|
637
|
+
for (const field of fields) {
|
|
638
|
+
const sourceValue = sourceValueAt(record.payload, field.from[source]);
|
|
639
|
+
if (isEmptyValue(sourceValue)) continue;
|
|
640
|
+
const canonicalField = resolveCrmField(record.objectType, field.crm);
|
|
641
|
+
for (const candidateId of outcome.candidateIds) {
|
|
642
|
+
const currentValue = crmFieldValue(snapshot, record.objectType, candidateId, canonicalField);
|
|
643
|
+
if (!isEmptyValue(currentValue)) continue; // policy never: blanks only
|
|
644
|
+
emittedForRecord = true;
|
|
645
|
+
operations.push({
|
|
646
|
+
id: `op_enr_${fnv1a(`${source}:${record.objectType}:${candidateId}:${canonicalField}`)}`,
|
|
647
|
+
objectType: canonicalObjectType(record.objectType),
|
|
648
|
+
objectId: candidateId,
|
|
649
|
+
operation: "set_field",
|
|
650
|
+
field: canonicalField,
|
|
651
|
+
beforeValue: currentValue ?? null,
|
|
652
|
+
afterValue: PLACEHOLDER_RECORD_SELECTION,
|
|
653
|
+
reason:
|
|
654
|
+
`${source} record "${describeSourceRecord(record)}" matched ${outcome.candidateIds.length} CRM ` +
|
|
655
|
+
`records on ${outcome.key} (${outcome.candidateIds.join(", ")}). If ${candidateId} is the right ` +
|
|
656
|
+
`record, approve with --value <opId>=${JSON.stringify(valueToString(sourceValue))}.`,
|
|
657
|
+
sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
|
|
658
|
+
riskLevel: "medium",
|
|
659
|
+
approvalRequired: true,
|
|
660
|
+
rollback: "Clear the field (the before value was empty) if the selection was wrong.",
|
|
661
|
+
evidenceIds: [recordEvidence.id],
|
|
662
|
+
});
|
|
663
|
+
counts.opsEmitted += 1;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
if (emittedForRecord) evidence.push(recordEvidence);
|
|
667
|
+
continue;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
// Matched.
|
|
671
|
+
counts.matched += 1;
|
|
672
|
+
const recordEvidence = evidenceFor(
|
|
673
|
+
source,
|
|
674
|
+
sourceConfig.kind,
|
|
675
|
+
sourceConfig.format,
|
|
676
|
+
record,
|
|
677
|
+
outcome.matchedKey,
|
|
678
|
+
nowIso,
|
|
679
|
+
);
|
|
680
|
+
let emittedForRecord = false;
|
|
681
|
+
|
|
682
|
+
for (const field of fields) {
|
|
683
|
+
const canonicalField = resolveCrmField(record.objectType, field.crm);
|
|
684
|
+
const sourceValue = sourceValueAt(record.payload, field.from[source]);
|
|
685
|
+
const currentValue = crmFieldValue(snapshot, record.objectType, outcome.recordId, canonicalField);
|
|
686
|
+
const cellKey = `${record.objectType}|${outcome.recordId}|${canonicalField}`;
|
|
687
|
+
|
|
688
|
+
if (mode === "refresh") {
|
|
689
|
+
// Refresh touches ONLY stamped, stale cells from the work set.
|
|
690
|
+
if (!workKeys.has(cellKey)) continue;
|
|
691
|
+
// Re-stamp every checked cell (changed or not): the staleness clock
|
|
692
|
+
// resets because the source was actually consulted.
|
|
693
|
+
stamps.push({
|
|
694
|
+
objectType: record.objectType,
|
|
695
|
+
objectId: outcome.recordId,
|
|
696
|
+
field: canonicalField,
|
|
697
|
+
enrichedAt: nowIso,
|
|
698
|
+
sourceRecordId: record.id,
|
|
699
|
+
value: sourceValue,
|
|
700
|
+
});
|
|
701
|
+
if (isEmptyValue(sourceValue)) continue; // source went blank: never propose clearing
|
|
702
|
+
if (sameValue(sourceValue, currentValue)) continue; // unchanged: no op
|
|
703
|
+
emittedForRecord = true;
|
|
704
|
+
operations.push({
|
|
705
|
+
id: `op_enr_${fnv1a(`${source}:${record.objectType}:${outcome.recordId}:${canonicalField}`)}`,
|
|
706
|
+
objectType: canonicalObjectType(record.objectType),
|
|
707
|
+
objectId: outcome.recordId,
|
|
708
|
+
operation: "set_field",
|
|
709
|
+
field: canonicalField,
|
|
710
|
+
beforeValue: currentValue ?? null,
|
|
711
|
+
afterValue: typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue),
|
|
712
|
+
reason:
|
|
713
|
+
`${source} ${record.objectType} "${describeSourceRecord(record)}" (matched by ` +
|
|
714
|
+
`${outcome.matchedKey}) reports a changed value for ${canonicalField}.`,
|
|
715
|
+
sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
|
|
716
|
+
riskLevel: isEmptyValue(currentValue) ? "low" : "medium",
|
|
717
|
+
approvalRequired: true,
|
|
718
|
+
rollback: "Restore the before value if the refreshed value is wrong.",
|
|
719
|
+
evidenceIds: [recordEvidence.id],
|
|
720
|
+
});
|
|
721
|
+
counts.opsEmitted += 1;
|
|
722
|
+
continue;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
// Append: fill blanks only (policy "never").
|
|
726
|
+
if (isEmptyValue(sourceValue)) continue;
|
|
727
|
+
if (!isEmptyValue(currentValue)) continue;
|
|
728
|
+
emittedForRecord = true;
|
|
729
|
+
const afterValue = typeof sourceValue === "number" ? sourceValue : valueToString(sourceValue);
|
|
730
|
+
operations.push({
|
|
731
|
+
id: `op_enr_${fnv1a(`${source}:${record.objectType}:${outcome.recordId}:${canonicalField}`)}`,
|
|
732
|
+
objectType: canonicalObjectType(record.objectType),
|
|
733
|
+
objectId: outcome.recordId,
|
|
734
|
+
operation: "set_field",
|
|
735
|
+
field: canonicalField,
|
|
736
|
+
beforeValue: currentValue ?? null,
|
|
737
|
+
afterValue,
|
|
738
|
+
reason:
|
|
739
|
+
`${source} ${record.objectType} "${describeSourceRecord(record)}" (matched by ` +
|
|
740
|
+
`${outcome.matchedKey}) fills the blank ${canonicalField}.`,
|
|
741
|
+
sourceRuleOrPolicy: `enrich:${source}:${canonicalField}`,
|
|
742
|
+
riskLevel: "low",
|
|
743
|
+
approvalRequired: true,
|
|
744
|
+
rollback: "Clear the field (the before value was empty) if the enrichment is wrong.",
|
|
745
|
+
evidenceIds: [recordEvidence.id],
|
|
746
|
+
});
|
|
747
|
+
counts.opsEmitted += 1;
|
|
748
|
+
stamps.push({
|
|
749
|
+
objectType: record.objectType,
|
|
750
|
+
objectId: outcome.recordId,
|
|
751
|
+
field: canonicalField,
|
|
752
|
+
enrichedAt: nowIso,
|
|
753
|
+
sourceRecordId: record.id,
|
|
754
|
+
value: afterValue,
|
|
755
|
+
});
|
|
756
|
+
}
|
|
757
|
+
if (emittedForRecord) evidence.push(recordEvidence);
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
const plan: PatchPlan = {
|
|
761
|
+
id: `patch_plan_enr_${fnv1a(`${source}:${mode}:${runLabel}:${nowIso}`)}`,
|
|
762
|
+
title: `Enrichment ${mode} — ${source}`,
|
|
763
|
+
createdAt: nowIso,
|
|
764
|
+
status: operations.length > 0 ? "needs_approval" : "draft",
|
|
765
|
+
dryRun: true,
|
|
766
|
+
summary:
|
|
767
|
+
`${counts.opsEmitted} proposed operation(s) from ${source} ${mode} (${counts.fetched} source ` +
|
|
768
|
+
`record(s): ${counts.matched} matched, ${counts.unmatched} unmatched, ${counts.ambiguous} ambiguous). ` +
|
|
769
|
+
`Conflict policy: ${config.policy.overwrite}.`,
|
|
770
|
+
findings: [],
|
|
771
|
+
evidence,
|
|
772
|
+
operations,
|
|
773
|
+
};
|
|
774
|
+
|
|
775
|
+
return { plan, counts, stamps, ambiguities, unmatchedSourceIds };
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
// ---------------------------------------------------------------------------
|
|
779
|
+
// Staleness: compute the refresh work set from run-store stamps.
|
|
780
|
+
|
|
781
|
+
/** Latest stamp per (objectType, objectId, field) across a source's runs. */
|
|
782
|
+
export function latestStamps(runs: EnrichRun[], source: string): Map<string, EnrichStamp> {
|
|
783
|
+
const latest = new Map<string, EnrichStamp>();
|
|
784
|
+
for (const run of runs) {
|
|
785
|
+
if (run.source !== source) continue;
|
|
786
|
+
for (const stamp of run.stamps) {
|
|
787
|
+
const key = `${stamp.objectType}|${stamp.objectId}|${stamp.field}`;
|
|
788
|
+
const existing = latest.get(key);
|
|
789
|
+
if (!existing || existing.enrichedAt < stamp.enrichedAt) latest.set(key, stamp);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
return latest;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
export function staleDaysFor(config: EnrichConfig, objectType: EnrichObjectType, field: string): number {
|
|
796
|
+
const entry = (config.fields[objectType] ?? []).find(
|
|
797
|
+
(candidate) => resolveCrmField(objectType, candidate.crm) === field,
|
|
798
|
+
);
|
|
799
|
+
return entry?.staleDays ?? config.policy.defaultStaleDays ?? DEFAULT_STALE_DAYS;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
/**
|
|
803
|
+
* Stale (record, field) cells: stamped by this source, refresh-eligible in
|
|
804
|
+
* the config, and older than the staleness window (per-field staleDays →
|
|
805
|
+
* policy.defaultStaleDays → 90; --stale-days overrides all).
|
|
806
|
+
*/
|
|
807
|
+
export function selectStaleWork(
|
|
808
|
+
config: EnrichConfig,
|
|
809
|
+
runs: EnrichRun[],
|
|
810
|
+
source: string,
|
|
811
|
+
options: { now?: () => Date; staleDaysOverride?: number } = {},
|
|
812
|
+
): EnrichWorkItem[] {
|
|
813
|
+
const now = (options.now ?? (() => new Date()))().getTime();
|
|
814
|
+
const work: EnrichWorkItem[] = [];
|
|
815
|
+
for (const stamp of latestStamps(runs, source).values()) {
|
|
816
|
+
const entry = (config.fields[stamp.objectType] ?? []).find(
|
|
817
|
+
(candidate) =>
|
|
818
|
+
resolveCrmField(stamp.objectType, candidate.crm) === stamp.field &&
|
|
819
|
+
candidate.from[source] !== undefined,
|
|
820
|
+
);
|
|
821
|
+
if (!entry?.refresh) continue;
|
|
822
|
+
const windowDays =
|
|
823
|
+
options.staleDaysOverride ?? entry.staleDays ?? config.policy.defaultStaleDays ?? DEFAULT_STALE_DAYS;
|
|
824
|
+
const ageDays = (now - Date.parse(stamp.enrichedAt)) / 86_400_000;
|
|
825
|
+
if (ageDays > windowDays) {
|
|
826
|
+
work.push({ objectType: stamp.objectType, objectId: stamp.objectId, field: stamp.field });
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
return work;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
// ---------------------------------------------------------------------------
|
|
833
|
+
// Run store: append-only JSON files under the profile home, one per run label.
|
|
834
|
+
// Checkpoint (cursor), staleness ledger (stamps), and observability surface
|
|
835
|
+
// (enrich status) in one structure — mirrors the market observations store.
|
|
836
|
+
|
|
837
|
+
export type EnrichRunMode = EnrichMode | "ingest";
|
|
838
|
+
|
|
839
|
+
export type EnrichRun = {
|
|
840
|
+
id: string;
|
|
841
|
+
runLabel: string;
|
|
842
|
+
source: string;
|
|
843
|
+
mode: EnrichRunMode;
|
|
844
|
+
startedAt: string;
|
|
845
|
+
/** null while in progress — `status` surfaces it as an interrupted run. */
|
|
846
|
+
completedAt: string | null;
|
|
847
|
+
/** Resume point for an interrupted pull (last processed pull key). */
|
|
848
|
+
cursor: string | null;
|
|
849
|
+
counts: EnrichCounts;
|
|
850
|
+
planIds: string[];
|
|
851
|
+
stamps: EnrichStamp[];
|
|
852
|
+
/** Staged source rows (ingest mode only), consumed by append/refresh. */
|
|
853
|
+
staged?: Array<Record<string, unknown>>;
|
|
854
|
+
/** Object type of the staged rows (ingest mode only). */
|
|
855
|
+
stagedObjectType?: EnrichObjectType;
|
|
856
|
+
/**
|
|
857
|
+
* Source records pulled so far (api pulls with --save). Together with
|
|
858
|
+
* `cursor` this makes the checkpoint complete: a resumed run replays the
|
|
859
|
+
* already-paid-for payloads instead of re-fetching them.
|
|
860
|
+
*/
|
|
861
|
+
pulled?: EnrichSourceRecord[];
|
|
862
|
+
/** Pull keys the source returned no data for (api pulls with --save). */
|
|
863
|
+
missedKeys?: string[];
|
|
864
|
+
/** Match collisions recorded for review (candidate ids included). */
|
|
865
|
+
ambiguities?: EnrichAmbiguity[];
|
|
866
|
+
};
|
|
867
|
+
|
|
868
|
+
export function enrichRunId(source: string, runLabel: string): string {
|
|
869
|
+
return `enr_${fnv1a(`${source}|${runLabel}`)}`;
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
export function enrichRunsDir(baseDir?: string): string {
|
|
873
|
+
return join(baseDir ?? credentialsDir(), "enrich", "runs");
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
export interface EnrichRunStore {
|
|
877
|
+
/** Append a new run; refuses an existing label (runs are append-only). */
|
|
878
|
+
append(run: EnrichRun): Promise<EnrichRun>;
|
|
879
|
+
/** Update an in-progress run (cursor checkpoint, finalization) in place. */
|
|
880
|
+
update(run: EnrichRun): Promise<EnrichRun>;
|
|
881
|
+
get(runLabel: string): Promise<EnrichRun | null>;
|
|
882
|
+
list(): Promise<EnrichRun[]>;
|
|
883
|
+
latest(filter?: { source?: string; mode?: EnrichRunMode }): Promise<EnrichRun | null>;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
export function createFileEnrichRunStore(directory?: string): EnrichRunStore {
|
|
887
|
+
const dir = directory ?? enrichRunsDir();
|
|
888
|
+
|
|
889
|
+
function fileFor(runLabel: string) {
|
|
890
|
+
if (!/^[\w.-]+$/.test(runLabel)) throw new Error(`Invalid run label: ${runLabel}`);
|
|
891
|
+
return join(dir, `${runLabel}.json`);
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
function read(runLabel: string): EnrichRun | null {
|
|
895
|
+
try {
|
|
896
|
+
return JSON.parse(readFileSync(fileFor(runLabel), "utf8")) as EnrichRun;
|
|
897
|
+
} catch {
|
|
898
|
+
return null;
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
function write(run: EnrichRun): EnrichRun {
|
|
903
|
+
// Run files carry CRM record ids and source values; keep them owner-only
|
|
904
|
+
// like plan files (and lock the home down even before any login).
|
|
905
|
+
if (!directory) ensureSecureHomeDir();
|
|
906
|
+
mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
907
|
+
writeSecureFile(fileFor(run.runLabel), `${JSON.stringify(run, null, 2)}\n`);
|
|
908
|
+
return run;
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
function listRuns(): EnrichRun[] {
|
|
912
|
+
let names: string[] = [];
|
|
913
|
+
try {
|
|
914
|
+
names = readdirSync(dir).filter((name) => name.endsWith(".json"));
|
|
915
|
+
} catch {
|
|
916
|
+
return [];
|
|
917
|
+
}
|
|
918
|
+
return names
|
|
919
|
+
.map((name) => read(name.slice(0, -".json".length)))
|
|
920
|
+
.filter((run): run is EnrichRun => run !== null)
|
|
921
|
+
.sort((a, b) => a.startedAt.localeCompare(b.startedAt));
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
return {
|
|
925
|
+
async append(run) {
|
|
926
|
+
if (read(run.runLabel)) {
|
|
927
|
+
throw new Error(
|
|
928
|
+
`Run "${run.runLabel}" already exists — enrich runs are append-only; use a new run label`,
|
|
929
|
+
);
|
|
930
|
+
}
|
|
931
|
+
return write(run);
|
|
932
|
+
},
|
|
933
|
+
async update(run) {
|
|
934
|
+
const existing = read(run.runLabel);
|
|
935
|
+
if (!existing) throw new Error(`No enrich run "${run.runLabel}" to update`);
|
|
936
|
+
if (existing.id !== run.id) {
|
|
937
|
+
throw new Error(`Run "${run.runLabel}" belongs to a different run id (${existing.id})`);
|
|
938
|
+
}
|
|
939
|
+
return write(run);
|
|
940
|
+
},
|
|
941
|
+
async get(runLabel) {
|
|
942
|
+
return read(runLabel);
|
|
943
|
+
},
|
|
944
|
+
async list() {
|
|
945
|
+
return listRuns();
|
|
946
|
+
},
|
|
947
|
+
async latest(filter = {}) {
|
|
948
|
+
const runs = listRuns().filter(
|
|
949
|
+
(run) =>
|
|
950
|
+
(filter.source === undefined || run.source === filter.source) &&
|
|
951
|
+
(filter.mode === undefined || run.mode === filter.mode),
|
|
952
|
+
);
|
|
953
|
+
return runs.length ? runs[runs.length - 1] : null;
|
|
954
|
+
},
|
|
955
|
+
};
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
// ---------------------------------------------------------------------------
|
|
959
|
+
// Ingest staging helpers
|
|
960
|
+
|
|
961
|
+
/**
|
|
962
|
+
* Infer the object type of staged rows from the configured match keys: the
|
|
963
|
+
* type whose key columns actually appear on the rows. Exactly one hit wins;
|
|
964
|
+
* zero or two is an error asking for --objects.
|
|
965
|
+
*/
|
|
966
|
+
export function inferIngestObjectType(
|
|
967
|
+
config: EnrichConfig,
|
|
968
|
+
source: string,
|
|
969
|
+
rows: Array<Record<string, unknown>>,
|
|
970
|
+
): EnrichObjectType {
|
|
971
|
+
const sample = rows[0] ?? {};
|
|
972
|
+
const hits = OBJECT_TYPES.filter((objectType) => {
|
|
973
|
+
const match = config.match[objectType];
|
|
974
|
+
const fields = (config.fields[objectType] ?? []).some((field) => field.from[source] !== undefined);
|
|
975
|
+
if (!match || !fields) return false;
|
|
976
|
+
return match.keys.some((key) => ingestKeyValue(sample, key) !== undefined);
|
|
977
|
+
});
|
|
978
|
+
if (hits.length === 1) return hits[0];
|
|
979
|
+
if (hits.length === 0) {
|
|
980
|
+
throw new Error(
|
|
981
|
+
`enrich ingest: cannot tell whether these rows are companies or contacts — no configured match key ` +
|
|
982
|
+
`column found. Pass --objects companies|contacts, or add the key column to the export.`,
|
|
983
|
+
);
|
|
984
|
+
}
|
|
985
|
+
throw new Error(
|
|
986
|
+
`enrich ingest: rows carry match keys for ${hits.join(" and ")} — pass --objects companies|contacts to disambiguate.`,
|
|
987
|
+
);
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
/** Turn staged ingest rows into source records for the matcher. */
|
|
991
|
+
export function stagedSourceRecords(
|
|
992
|
+
config: EnrichConfig,
|
|
993
|
+
source: string,
|
|
994
|
+
run: EnrichRun,
|
|
995
|
+
): EnrichSourceRecord[] {
|
|
996
|
+
const objectType = run.stagedObjectType;
|
|
997
|
+
const rows = run.staged ?? [];
|
|
998
|
+
if (!objectType || rows.length === 0) {
|
|
999
|
+
throw new Error(
|
|
1000
|
+
`enrich: run "${run.runLabel}" has no staged data — stage a Clay export first: ` +
|
|
1001
|
+
`fullstackgtm enrich ingest <file.csv|payload.json> --source ${source}`,
|
|
1002
|
+
);
|
|
1003
|
+
}
|
|
1004
|
+
const match = config.match[objectType];
|
|
1005
|
+
if (!match) throw new Error(`enrich: no match config for ${objectType}`);
|
|
1006
|
+
return rows.map((row, index) => {
|
|
1007
|
+
const keys: Record<string, string | undefined> = {};
|
|
1008
|
+
for (const key of match.keys) keys[key] = ingestKeyValue(row, key);
|
|
1009
|
+
return {
|
|
1010
|
+
id: `${source}:${run.runLabel}:row-${index + 1}`,
|
|
1011
|
+
objectType,
|
|
1012
|
+
keys,
|
|
1013
|
+
payload: row,
|
|
1014
|
+
};
|
|
1015
|
+
});
|
|
1016
|
+
}
|