@mailwoman/registry 4.8.1 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -0
- package/out/address-key.d.ts +31 -0
- package/out/address-key.d.ts.map +1 -0
- package/out/address-key.js +38 -0
- package/out/address-key.js.map +1 -0
- package/out/geojson.d.ts.map +1 -1
- package/out/geojson.js +2 -0
- package/out/geojson.js.map +1 -1
- package/out/index.d.ts +5 -0
- package/out/index.d.ts.map +1 -1
- package/out/index.js +5 -0
- package/out/index.js.map +1 -1
- package/out/ingest.d.ts +65 -4
- package/out/ingest.d.ts.map +1 -1
- package/out/ingest.js +114 -5
- package/out/ingest.js.map +1 -1
- package/out/learned-scorer.d.ts +59 -0
- package/out/learned-scorer.d.ts.map +1 -0
- package/out/learned-scorer.js +78 -0
- package/out/learned-scorer.js.map +1 -0
- package/out/map-html.d.ts +51 -0
- package/out/map-html.d.ts.map +1 -0
- package/out/map-html.js +262 -0
- package/out/map-html.js.map +1 -0
- package/out/models/dedup-gbt-en-us.d.ts +36 -0
- package/out/models/dedup-gbt-en-us.d.ts.map +1 -0
- package/out/models/dedup-gbt-en-us.js +36 -0
- package/out/models/dedup-gbt-en-us.js.map +1 -0
- package/out/reconcile.d.ts +86 -0
- package/out/reconcile.d.ts.map +1 -0
- package/out/reconcile.js +161 -0
- package/out/reconcile.js.map +1 -0
- package/out/resolve.d.ts +114 -4
- package/out/resolve.d.ts.map +1 -1
- package/out/resolve.js +165 -21
- package/out/resolve.js.map +1 -1
- package/out/types.d.ts +7 -0
- package/out/types.d.ts.map +1 -1
- package/out/types.js.map +1 -1
- package/package.json +7 -4
package/out/reconcile.js
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Coverage reconciliation (#621) — the library home for what `scripts/record-matcher/
|
|
7
|
+
* coverage-reconciliation.ts` does inline, so the CLI (`registry --reconcile`) and any consumer
|
|
8
|
+
* can reuse it.
|
|
9
|
+
*
|
|
10
|
+
* Given entities already resolved ACROSS sources (#618), classify each by which KIND of source its
|
|
11
|
+
* records come from. You tag each source label as either an **eligibility** source (it denotes
|
|
12
|
+
* membership in some base set — e.g. registered providers/facilities) or a **funding/enrollment**
|
|
13
|
+
* source (it denotes participation in a program). Three buckets fall out per entity:
|
|
14
|
+
*
|
|
15
|
+
* - **enrolled** — resolves to BOTH an eligibility and a funding record.
|
|
16
|
+
* - **eligible, not enrolled** — an eligibility record with NO funding record resolving to it (the
|
|
17
|
+
* ANTI-JOIN).
|
|
18
|
+
* - **funded, not in the eligibility set** — a funding record with no eligibility record resolving to
|
|
19
|
+
* it.
|
|
20
|
+
*
|
|
21
|
+
* This is strictly a **set-membership reconciliation, never a determination.** We produce the
|
|
22
|
+
* reconciled join and surface the candidate set; what a gap MEANS — and whether it is real, a
|
|
23
|
+
* sampling artifact, or actionable — is entirely the data consumer's call. Nothing here is an
|
|
24
|
+
* allegation. {@link reconciliationReport} bakes that caveat in by construction.
|
|
25
|
+
*
|
|
26
|
+
* Pairs with {@link toMapHTML}: {@link reconciliationGeoJSON} tags each feature with its `bucket`,
|
|
27
|
+
* which the map auto-detects and colors categorically.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* Bucket an entity from the source labels its records span. Returns `null` when the entity carries
|
|
31
|
+
* NO eligibility- or funding-tagged source (it is outside this reconciliation — e.g. a source the
|
|
32
|
+
* caller didn't assign a role) so callers can exclude it rather than silently miscount it.
|
|
33
|
+
*/
|
|
34
|
+
export function bucketOf(sources, config) {
|
|
35
|
+
const elig = new Set(config.eligibilitySources);
|
|
36
|
+
const fund = new Set(config.fundingSources);
|
|
37
|
+
let hasEligibility = false;
|
|
38
|
+
let hasFunding = false;
|
|
39
|
+
for (const s of sources) {
|
|
40
|
+
if (elig.has(s))
|
|
41
|
+
hasEligibility = true;
|
|
42
|
+
if (fund.has(s))
|
|
43
|
+
hasFunding = true;
|
|
44
|
+
}
|
|
45
|
+
if (hasEligibility && hasFunding)
|
|
46
|
+
return "enrolled";
|
|
47
|
+
if (hasEligibility)
|
|
48
|
+
return "eligible-not-enrolled";
|
|
49
|
+
if (hasFunding)
|
|
50
|
+
return "funded-not-eligible";
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Classify resolved entities into reconciliation buckets. Entities with no eligibility- or
|
|
55
|
+
* funding-tagged source are excluded (see {@link bucketOf}).
|
|
56
|
+
*/
|
|
57
|
+
export function reconcileCoverage(entities, config) {
|
|
58
|
+
const reconciled = [];
|
|
59
|
+
const counts = {
|
|
60
|
+
enrolled: 0,
|
|
61
|
+
"eligible-not-enrolled": 0,
|
|
62
|
+
"funded-not-eligible": 0,
|
|
63
|
+
};
|
|
64
|
+
for (const entity of entities) {
|
|
65
|
+
const sources = [...new Set(entity.records.map((r) => r.source).filter((s) => !!s))].sort();
|
|
66
|
+
const bucket = bucketOf(sources, config);
|
|
67
|
+
if (!bucket)
|
|
68
|
+
continue;
|
|
69
|
+
reconciled.push({ entity, sources, bucket });
|
|
70
|
+
counts[bucket]++;
|
|
71
|
+
}
|
|
72
|
+
return { reconciled, counts };
|
|
73
|
+
}
|
|
74
|
+
/** A display name for a reconciled entity's representative record. */
|
|
75
|
+
function repName(entity) {
|
|
76
|
+
const rep = entity.representative;
|
|
77
|
+
const person = [rep.name?.given, rep.name?.family].filter(Boolean).join(" ");
|
|
78
|
+
return rep.organization?.canonical ?? (person || rep.id);
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* GeoJSON of every located reconciled entity, each feature tagged with its `bucket` + `sources` —
|
|
82
|
+
* the shape {@link toMapHTML} colors categorically by bucket. Entities without a coordinate are
|
|
83
|
+
* skipped.
|
|
84
|
+
*/
|
|
85
|
+
export function reconciliationGeoJSON(result) {
|
|
86
|
+
return {
|
|
87
|
+
type: "FeatureCollection",
|
|
88
|
+
features: result.reconciled
|
|
89
|
+
.filter((c) => c.entity.coordinate)
|
|
90
|
+
.map((c) => ({
|
|
91
|
+
type: "Feature",
|
|
92
|
+
geometry: {
|
|
93
|
+
type: "Point",
|
|
94
|
+
coordinates: [c.entity.coordinate.longitude, c.entity.coordinate.latitude],
|
|
95
|
+
},
|
|
96
|
+
properties: {
|
|
97
|
+
entityId: c.entity.id,
|
|
98
|
+
bucket: c.bucket,
|
|
99
|
+
sources: c.sources,
|
|
100
|
+
name: repName(c.entity),
|
|
101
|
+
recordCount: c.entity.records.length,
|
|
102
|
+
},
|
|
103
|
+
})),
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* A markdown reconciliation report: the bucket counts, the enrolled-rate floor, an anti-join
|
|
108
|
+
* spot-check, and — always, by construction — the neutral caveat. The deliverable is the anti-join
|
|
109
|
+
* SET, not a rate, and never an allegation.
|
|
110
|
+
*/
|
|
111
|
+
export function reconciliationReport(result, options = {}) {
|
|
112
|
+
const { counts, reconciled } = result;
|
|
113
|
+
const title = options.title ?? "Coverage reconciliation — eligibility ↔ enrollment";
|
|
114
|
+
const spotCheckLimit = options.spotCheckLimit ?? 15;
|
|
115
|
+
const eligibleTotal = counts.enrolled + counts["eligible-not-enrolled"];
|
|
116
|
+
const enrolledRate = eligibleTotal > 0 ? (100 * counts.enrolled) / eligibleTotal : 0;
|
|
117
|
+
const lines = [];
|
|
118
|
+
lines.push(`# ${title}`);
|
|
119
|
+
lines.push("");
|
|
120
|
+
if (options.scopeNote) {
|
|
121
|
+
lines.push(`_${options.scopeNote}_`);
|
|
122
|
+
lines.push("");
|
|
123
|
+
}
|
|
124
|
+
if (options.scorerNote) {
|
|
125
|
+
lines.push(options.scorerNote);
|
|
126
|
+
lines.push("");
|
|
127
|
+
}
|
|
128
|
+
lines.push(`## The reconciliation`);
|
|
129
|
+
lines.push("");
|
|
130
|
+
lines.push(`| bucket | entities | meaning |`);
|
|
131
|
+
lines.push(`|---|---:|---|`);
|
|
132
|
+
lines.push(`| **enrolled** | ${counts.enrolled} | resolves to an eligibility record AND a funding record |`);
|
|
133
|
+
lines.push(`| **eligible, not enrolled** | ${counts["eligible-not-enrolled"]} | eligibility record, no funding record resolved (the **anti-join**) |`);
|
|
134
|
+
lines.push(`| **funded, not in eligibility set** | ${counts["funded-not-eligible"]} | funding record, no eligibility record resolved |`);
|
|
135
|
+
lines.push("");
|
|
136
|
+
lines.push(`Of the ${eligibleTotal} entities with an eligibility record, ${enrolledRate.toFixed(1)}% also resolve to a ` +
|
|
137
|
+
`funding record — a **floor**, not a coverage rate (imperfect resolution + any sampling only ever miss ` +
|
|
138
|
+
`links, never invent them). The deliverable is the anti-join SET, not this percentage.`);
|
|
139
|
+
lines.push("");
|
|
140
|
+
lines.push(`## Anti-join spot-check — first ${spotCheckLimit} "eligible, not enrolled"`);
|
|
141
|
+
lines.push("");
|
|
142
|
+
lines.push(`| entity | sources | name | coordinate |`);
|
|
143
|
+
lines.push(`|---|---|---|---|`);
|
|
144
|
+
for (const c of reconciled.filter((x) => x.bucket === "eligible-not-enrolled").slice(0, spotCheckLimit)) {
|
|
145
|
+
const coord = c.entity.coordinate
|
|
146
|
+
? `${c.entity.coordinate.latitude.toFixed(4)}, ${c.entity.coordinate.longitude.toFixed(4)}`
|
|
147
|
+
: "—";
|
|
148
|
+
lines.push(`| ${c.entity.id} | ${c.sources.join(", ")} | ${repName(c.entity)} | ${coord} |`);
|
|
149
|
+
}
|
|
150
|
+
lines.push("");
|
|
151
|
+
lines.push(`## The caveat that matters`);
|
|
152
|
+
lines.push("");
|
|
153
|
+
const sample = options.sampleNote ? `${options.sampleNote} ` : "";
|
|
154
|
+
lines.push(`${sample}This is a **set-membership reconciliation, not a determination**. A missing funding record can mean ` +
|
|
155
|
+
`the entity didn't apply, applied under a name we didn't resolve, is ineligible, or any number of things. We ` +
|
|
156
|
+
`produce the reconciled join and surface the candidate set; **what a gap means, and whether to act on it, is ` +
|
|
157
|
+
`entirely the data consumer's call.** Nothing here is an allegation.`);
|
|
158
|
+
lines.push("");
|
|
159
|
+
return lines.join("\n");
|
|
160
|
+
}
|
|
161
|
+
//# sourceMappingURL=reconcile.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reconcile.js","sourceRoot":"","sources":["../reconcile.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AA6BH;;;;GAIG;AACH,MAAM,UAAU,QAAQ,CAAC,OAAyB,EAAE,MAAuB;IAC1E,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAA;IAC/C,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,cAAc,CAAC,CAAA;IAC3C,IAAI,cAAc,GAAG,KAAK,CAAA;IAC1B,IAAI,UAAU,GAAG,KAAK,CAAA;IACtB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,cAAc,GAAG,IAAI,CAAA;QACtC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,UAAU,GAAG,IAAI,CAAA;IACnC,CAAC;IACD,IAAI,cAAc,IAAI,UAAU;QAAE,OAAO,UAAU,CAAA;IACnD,IAAI,cAAc;QAAE,OAAO,uBAAuB,CAAA;IAClD,IAAI,UAAU;QAAE,OAAO,qBAAqB,CAAA;IAC5C,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAmC,EAAE,MAAuB;IAC7F,MAAM,UAAU,GAAuB,EAAE,CAAA;IACzC,MAAM,MAAM,GAAyC;QACpD,QAAQ,EAAE,CAAC;QACX,uBAAuB,EAAE,CAAC;QAC1B,qBAAqB,EAAE,CAAC;KACxB,CAAA;IACD,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QACxG,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;QACxC,IAAI,CAAC,MAAM;YAAE,SAAQ;QACrB,UAAU,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,EAAE,CAAA;IACjB,CAAC;IACD,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAA;AAC9B,CAAC;AAED,sEAAsE;AACtE,SAAS,OAAO,CAAC,MAAsB;IACtC,MAAM,GAAG,GAAG,MAAM,CAAC,cAAc,CAAA;IACjC,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC5E,OAAO,GAAG,CAAC,YAAY,EAAE,SAAS,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,EAAE,CAAC,CAAA;AACzD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAA4B;IACjE,OAAO;QACN,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,MAAM,CAAC,UAAU;aACzB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC;aAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACZ,IAAI,EAAE,SAAkB;YACxB,QAAQ,EAAE;gBACT,IAAI,EAAE,OAAgB;gBACtB,WAAW,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,UAAW,CAAC,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC,UAAW,CAAC,QAAQ,CAAqB;aAChG;YACD,UAAU,EAAE;gBACX,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE;gBACrB,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;gBACvB,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM;aACpC;SACD,CAAC,CAAC;KACJ,CAAA;AACF,CAAC;AAeD;;;;GAIG;AACH,MAAM,UAAU,oBAAoB,CAAC,MAA4B,EAAE,UAAuC,EAAE;IAC3G,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,CAAA;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,oDAAoD,CAAA;IACnF,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,EAAE,CAAA;IACnD,MAAM,aAAa,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,uBAAuB,CAAC,CAAA;IACvE,MAAM,YAAY,GAAG,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,CAAA;IAEpF,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,EAAE,CAAC,CAAA;IACxB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,CAAA;QACpC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACf,CAAC;IACD,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAA;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACf,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAA;IACnC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAA;IAC7C,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAA;IAC5B,KAAK,CAAC,IAAI,CAAC,oBAAoB,MAAM,CAAC,QAAQ,6DAA6D,CAAC,CAAA;IAC5G,KAAK,CAAC,IAAI,CACT,kCAAkC,MAAM,CAAC,uBAAuB,CAAC,yEAAyE,CAC1I,CAAA;IACD,KAAK,CAAC,IAAI,CACT,0CAA0C,MAAM,CAAC,qBAAqB,CAAC,qDAAqD,CAC5H,CAAA;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CACT,UAAU,aAAa,yCAAyC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB;QAC5G,wGAAwG;QACxG,uFAAuF,CACxF,CAAA;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CAAC,mCAAmC,cAAc,2BAA2B,CAAC,CAAA;IACxF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAA;IACtD,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAA;IAC/B,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,EAAE,CAAC;QACzG,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,UAAU;YAChC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YAC3F,CAAC,CAAC,GAAG,CAAA;QACN,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,KAAK,IAAI,CAAC,CAAA;IAC7F,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,KAAK,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAA;IACxC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,EAAE,CAAA;IACjE,KAAK,CAAC,IAAI,CACT,GAAG,MAAM,sGAAsG;QAC9G,8GAA8G;QAC9G,8GAA8G;QAC9G,qEAAqE,CACtE,CAAA;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACd,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACxB,CAAC"}
|
package/out/resolve.d.ts
CHANGED
|
@@ -16,13 +16,56 @@
|
|
|
16
16
|
* key). Wiring mailwoman's parser + geocoder to turn raw rows into `SourceRecord`s is the ingest
|
|
17
17
|
* layer that sits in front of this.
|
|
18
18
|
*/
|
|
19
|
-
import { type BlockingKey, type FellegiSunterModel } from "@mailwoman/match";
|
|
19
|
+
import { type BlockingKey, type FellegiSunterModel, type GBT, type TermFrequencyTable } from "@mailwoman/match";
|
|
20
20
|
import type { ResolvedEntity, SourceRecord } from "./types.js";
|
|
21
21
|
/**
|
|
22
|
-
*
|
|
23
|
-
*
|
|
22
|
+
* Cheap, parse-free normalization for the address-frequency key — uppercase, collapse whitespace,
|
|
23
|
+
* drop punctuation. Used to count how many distinct entities share an address across the WHOLE
|
|
24
|
+
* corpus (computable over millions of rows without geocoding) and to look that frequency up at
|
|
25
|
+
* match time. It's the inverse-frequency signal: a crowded clinic/billing address is weak evidence
|
|
26
|
+
* of identity; a lonely address is strong. (See
|
|
27
|
+
* docs/articles/evals/2026-06-15-nppes-dedup-benchmark.md.)
|
|
24
28
|
*/
|
|
25
|
-
export declare function
|
|
29
|
+
export declare function addressFrequencyKey(raw: string): string;
|
|
30
|
+
/**
|
|
31
|
+
* Options for {@link buildDefaultModel}. Each lever is default-off, so the base model is
|
|
32
|
+
* byte-stable.
|
|
33
|
+
*/
|
|
34
|
+
export interface DefaultModelOptions {
|
|
35
|
+
/**
|
|
36
|
+
* Corpus-wide address-frequency table (over {@link addressFrequencyKey}) — makes the address-
|
|
37
|
+
* agreement weight **inverse to how shared the address is** (a building with 50 providers makes
|
|
38
|
+
* "same address" near-worthless evidence). The table's `value` is the record's raw address
|
|
39
|
+
* string.
|
|
40
|
+
*/
|
|
41
|
+
addressFrequency?: TermFrequencyTable;
|
|
42
|
+
/**
|
|
43
|
+
* **A1 (#625):** collapse the redundant address-key + great-circle-distance comparisons into ONE
|
|
44
|
+
* {@link spatialComparison spatial-agreement} signal — an exact-key tier (where
|
|
45
|
+
* `addressFrequency`, if set, rides) over distance buckets. Removes the double-count that
|
|
46
|
+
* over-merges co-located providers (an exact key match already implies distance ≈ 0).
|
|
47
|
+
*/
|
|
48
|
+
collapseSpatial?: boolean;
|
|
49
|
+
/**
|
|
50
|
+
* **A3 (#625):** add a normalized-phone exact-match comparison — a shared line is strong evidence
|
|
51
|
+
* and the secondary corroborator that lets a true same-entity link survive name drift under A2.
|
|
52
|
+
*/
|
|
53
|
+
usePhone?: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* Extra secondary-identifier comparisons drawn from {@link SourceRecord.attributes} (e.g.
|
|
56
|
+
* `["authorizedOfficial"]`). Each becomes an `attr:<key>` comparison AND counts toward A2
|
|
57
|
+
* corroboration — a more reliable discriminator than phone where the data has one (#625).
|
|
58
|
+
*/
|
|
59
|
+
discriminators?: string[];
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* The default geocode-first scoring model: name + organization + a spatial signal. The spatial
|
|
63
|
+
* signal is either two comparisons (address-key similarity + great-circle distance — the legacy
|
|
64
|
+
* default, which double-counts) or, with {@link DefaultModelOptions.collapseSpatial}, one collapsed
|
|
65
|
+
* {@link spatialComparison}. `addressFrequency` down-weights agreement on a crowded address either
|
|
66
|
+
* way.
|
|
67
|
+
*/
|
|
68
|
+
export declare function buildDefaultModel(opts?: DefaultModelOptions): FellegiSunterModel<SourceRecord>;
|
|
26
69
|
/** The default blocking keys: a union of location, canonical address, phone, and email. */
|
|
27
70
|
export declare function defaultBlockingKeys(): BlockingKey<SourceRecord>[];
|
|
28
71
|
/** Options for {@link resolveEntities}. */
|
|
@@ -40,6 +83,73 @@ export interface ResolveConfig {
|
|
|
40
83
|
* false.
|
|
41
84
|
*/
|
|
42
85
|
trainEM?: boolean;
|
|
86
|
+
/**
|
|
87
|
+
* Address-frequency table (over {@link addressFrequencyKey}) — down-weights address agreement by
|
|
88
|
+
* how shared the address is (a crowded clinic/billing address is weak identity evidence).
|
|
89
|
+
* **Default-on (#625):** when omitted, `resolveEntities` AUTO-COMPUTES the table over the INPUT
|
|
90
|
+
* records' addresses (the right scope for a single dataset — a crowded address within the data is
|
|
91
|
+
* down-weighted). Pass your own {@link TermFrequencyTable} (e.g. a corpus-wide one) to override,
|
|
92
|
+
* or `false` to disable (the legacy bare baseline). Ignored if `model` is supplied.
|
|
93
|
+
*/
|
|
94
|
+
addressFrequency?: TermFrequencyTable | false;
|
|
95
|
+
/**
|
|
96
|
+
* A1 (#625): collapse the redundant address-key + distance pair into one
|
|
97
|
+
* {@link spatialComparison}. **Default-on (true)** — the cleaner, less-over-merging spatial model.
|
|
98
|
+
* Set `false` for the legacy two-signal baseline. Ignored if `model` is supplied.
|
|
99
|
+
*/
|
|
100
|
+
collapseSpatial?: boolean;
|
|
101
|
+
/**
|
|
102
|
+
* A2 (#625): require positive name OR org corroboration ({@link CORROBORATING_FIELDS}) for a link
|
|
103
|
+
* — a shared address alone cannot merge two records. Suppresses the spatial-only links that fuse
|
|
104
|
+
* distinct co-located providers. Default false.
|
|
105
|
+
*/
|
|
106
|
+
requireCorroboration?: boolean;
|
|
107
|
+
/**
|
|
108
|
+
* A3 (#625): add a normalized-phone comparison to the default model — strong evidence and the
|
|
109
|
+
* secondary corroborator that keeps A2 from killing name-drift recall. Ignored if `model` is
|
|
110
|
+
* supplied.
|
|
111
|
+
*/
|
|
112
|
+
usePhone?: boolean;
|
|
113
|
+
/**
|
|
114
|
+
* A4 (#625): clustering linkage. `"single"` (default) = connected components; `"average"` =
|
|
115
|
+
* average-linkage refinement that splits a component whose sub-clusters are joined only by a weak
|
|
116
|
+
* bridge — the principled over-merge fix.
|
|
117
|
+
*/
|
|
118
|
+
linkage?: "single" | "average";
|
|
119
|
+
/**
|
|
120
|
+
* Extra secondary-identifier keys (from {@link SourceRecord.attributes}) to add as comparisons +
|
|
121
|
+
* corroborators — e.g. `["authorizedOfficial"]`. Ignored if `model` is supplied.
|
|
122
|
+
*/
|
|
123
|
+
discriminators?: string[];
|
|
124
|
+
/**
|
|
125
|
+
* Override the Fellegi-Sunter link weight with a LEARNED score (#603). When set, a candidate
|
|
126
|
+
* pair's match weight is this function's return value (same threshold-comparable units as the FS
|
|
127
|
+
* weight) instead of {@link scorePair}'s. Default undefined (pure FS). The blocking + clustering
|
|
128
|
+
* are unchanged, so a trained scorer can be A/B'd against the FS spine on the identical pipeline.
|
|
129
|
+
* The function is responsible for its own feature computation (e.g. the agreement pattern, which
|
|
130
|
+
* is EM-independent, plus any corpus statistics it captured).
|
|
131
|
+
*
|
|
132
|
+
* INTERACTION with {@link requireCorroboration}: the two are independent and compose, but the
|
|
133
|
+
* corroboration gate is still evaluated on the Fellegi-Sunter `contributions` (NOT the learned
|
|
134
|
+
* score) — so a learned-high pair with no positive FS name/org/phone agreement is still gated
|
|
135
|
+
* out. A learned scorer is normally trained to subsume corroboration, so use ONE or the other;
|
|
136
|
+
* combining them lets the FS gate veto the learned score, which is rarely what you want.
|
|
137
|
+
*/
|
|
138
|
+
scorer?: (a: SourceRecord, b: SourceRecord) => number;
|
|
139
|
+
/**
|
|
140
|
+
* **#603: the LEARNED gradient-boosted-tree scorer — DEFAULT-ON.** Omitted or `true` uses the
|
|
141
|
+
* bundled {@link DEDUP_GBT_MODEL} (trained on the NPPES NPI-truth set; beats the Fellegi-Sunter
|
|
142
|
+
* spine ~+5pp dedup F1 held-out within a state and ~+22pp on states it never trained on, cutting
|
|
143
|
+
* the co-located over-merge). `false` opts out to the pure FS spine; pass your own {@link GBT} for
|
|
144
|
+
* a custom model. The scorer is built over the SAME collapsed-spatial + address-frequency feature
|
|
145
|
+
* model as training (via the resolved {@link addressFrequency}), independent of this call's
|
|
146
|
+
* comparison config. An explicit {@link scorer} takes precedence. When the bundled model is active
|
|
147
|
+
* and you don't set {@link threshold}, its CALIBRATED link threshold
|
|
148
|
+
* ({@link DEDUP_GBT_META}.recommendedThreshold) is used — the GBT logit isn't in FS-weight units,
|
|
149
|
+
* so 0 would over-merge. The model is NPPES/US-trained; for a very different domain, A/B it or
|
|
150
|
+
* pass `false`.
|
|
151
|
+
*/
|
|
152
|
+
learnedScorer?: boolean | GBT;
|
|
43
153
|
}
|
|
44
154
|
/** The outcome of a resolve pass. */
|
|
45
155
|
export interface ResolveResult {
|
package/out/resolve.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve.d.ts","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EACN,KAAK,WAAW,EAChB,KAAK,kBAAkB,
|
|
1
|
+
{"version":3,"file":"resolve.d.ts","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EACN,KAAK,WAAW,EAChB,KAAK,kBAAkB,EACvB,KAAK,GAAG,EAER,KAAK,kBAAkB,EAgBvB,MAAM,kBAAkB,CAAA;AAGzB,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAMvD;AAiCD;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IACnC;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,kBAAkB,CAAA;IACrC;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB;;;OAGG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,GAAE,mBAAwB,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAoElG;AAED,2FAA2F;AAC3F,wBAAgB,mBAAmB,IAAI,WAAW,CAAC,YAAY,CAAC,EAAE,CAOjE;AAED,2CAA2C;AAC3C,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,KAAK,CAAC,EAAE,kBAAkB,CAAC,YAAY,CAAC,CAAA;IACxC,wEAAwE;IACxE,YAAY,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,EAAE,CAAA;IAC1C,6FAA6F;IAC7F,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,yEAAyE;IACzE,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB;;;;;;;OAOG;IACH,gBAAgB,CAAC,EAAE,kBAAkB,GAAG,KAAK,CAAA;IAC7C;;;;OAIG;IACH,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAA;IAC9B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB;;;;OAIG;IACH,OAAO,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAA;IAC9B;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB;;;;;;;;;;;;;OAaG;IACH,MAAM,CAAC,EAAE,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,KAAK,MAAM,CAAA;IACrD;;;;;;;;;;;;OAYG;IACH,aAAa,CAAC,EAAE,OAAO,GAAG,GAAG,CAAA;CAC7B;AAED,qCAAqC;AACrC,MAAM,WAAW,aAAa;IAC7B,QAAQ,EAAE,cAAc,EAAE,CAAA;IAC1B,mDAAmD;IACnD,cAAc,EAAE,MAAM,CAAA;IACtB,yEAAyE;IACzE,aAAa,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CACnD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,EAAE,MAAM,GAAE,aAAkB,GAAG,aAAa,CAiG3G"}
|
package/out/resolve.js
CHANGED
|
@@ -16,7 +16,24 @@
|
|
|
16
16
|
* key). Wiring mailwoman's parser + geocoder to turn raw rows into `SourceRecord`s is the ingest
|
|
17
17
|
* layer that sits in front of this.
|
|
18
18
|
*/
|
|
19
|
-
import { DEFAULT_DISTANCE_LEVELS, agreementPattern, block, cluster, distanceComparison, estimateParameters, exactKey, geoCellKey, representative, scorePair, similarityComparison, } from "@mailwoman/match";
|
|
19
|
+
import { DEFAULT_DISTANCE_LEVELS, DEFAULT_SPATIAL_LEVELS, agreementPattern, block, buildTermFrequencyTable, cluster, distanceComparison, estimateParameters, exactKey, geoCellKey, representative, scorePair, similarityComparison, spatialComparison, withTermFrequency, } from "@mailwoman/match";
|
|
20
|
+
import { createGbtScorer } from "./learned-scorer.js";
|
|
21
|
+
import { DEDUP_GBT_META, DEDUP_GBT_MODEL } from "./models/dedup-gbt-en-us.js";
|
|
22
|
+
/**
|
|
23
|
+
* Cheap, parse-free normalization for the address-frequency key — uppercase, collapse whitespace,
|
|
24
|
+
* drop punctuation. Used to count how many distinct entities share an address across the WHOLE
|
|
25
|
+
* corpus (computable over millions of rows without geocoding) and to look that frequency up at
|
|
26
|
+
* match time. It's the inverse-frequency signal: a crowded clinic/billing address is weak evidence
|
|
27
|
+
* of identity; a lonely address is strong. (See
|
|
28
|
+
* docs/articles/evals/2026-06-15-nppes-dedup-benchmark.md.)
|
|
29
|
+
*/
|
|
30
|
+
export function addressFrequencyKey(raw) {
|
|
31
|
+
return raw
|
|
32
|
+
.toUpperCase()
|
|
33
|
+
.replace(/[^A-Z0-9]+/g, " ")
|
|
34
|
+
.trim()
|
|
35
|
+
.replace(/\s+/g, " ");
|
|
36
|
+
}
|
|
20
37
|
/** Default tiered levels for a name-like text field. `m`/`u` are EM-estimable seeds. */
|
|
21
38
|
const NAME_LEVELS = [
|
|
22
39
|
{ label: "exact", minSimilarity: 1.0, m: 0.8, u: 0.01 },
|
|
@@ -24,17 +41,89 @@ const NAME_LEVELS = [
|
|
|
24
41
|
{ label: "different", minSimilarity: 0, m: 0.05, u: 0.96 },
|
|
25
42
|
];
|
|
26
43
|
/**
|
|
27
|
-
*
|
|
28
|
-
*
|
|
44
|
+
* Exact-vs-different levels for a normalized phone. A shared line is strong, rarely-coincidental
|
|
45
|
+
* evidence.
|
|
46
|
+
*/
|
|
47
|
+
const PHONE_LEVELS = [
|
|
48
|
+
{ label: "exact", minSimilarity: 1.0, m: 0.6, u: 0.002 },
|
|
49
|
+
{ label: "different", minSimilarity: 0, m: 0.4, u: 0.998 },
|
|
50
|
+
];
|
|
51
|
+
/** Last-10-digits normalization for phone agreement (drops country code, punctuation, extensions). */
|
|
52
|
+
function normalizePhone(raw) {
|
|
53
|
+
if (!raw)
|
|
54
|
+
return null;
|
|
55
|
+
const digits = raw.replace(/\D+/g, "");
|
|
56
|
+
return digits.length >= 10 ? digits.slice(-10) : digits || null;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* The identity-corroborating comparisons (person name, organization, phone). A2 (#625,
|
|
60
|
+
* {@link ResolveConfig.requireCorroboration}) requires at least one of these to _positively_ agree
|
|
61
|
+
* before a pair may link — a shared address alone is not identity. Phone (A3) is the secondary
|
|
62
|
+
* identifier that rescues a true same-entity link across name drift.
|
|
29
63
|
*/
|
|
30
|
-
|
|
64
|
+
const CORROBORATING_FIELDS = new Set(["given", "family", "organization", "phone"]);
|
|
65
|
+
/**
|
|
66
|
+
* The default geocode-first scoring model: name + organization + a spatial signal. The spatial
|
|
67
|
+
* signal is either two comparisons (address-key similarity + great-circle distance — the legacy
|
|
68
|
+
* default, which double-counts) or, with {@link DefaultModelOptions.collapseSpatial}, one collapsed
|
|
69
|
+
* {@link spatialComparison}. `addressFrequency` down-weights agreement on a crowded address either
|
|
70
|
+
* way.
|
|
71
|
+
*/
|
|
72
|
+
export function buildDefaultModel(opts = {}) {
|
|
73
|
+
const identity = [
|
|
74
|
+
similarityComparison({ name: "given", extract: (r) => r.name?.given, levels: NAME_LEVELS }),
|
|
75
|
+
similarityComparison({ name: "family", extract: (r) => r.name?.family, levels: NAME_LEVELS }),
|
|
76
|
+
similarityComparison({
|
|
77
|
+
name: "organization",
|
|
78
|
+
extract: (r) => r.organization?.canonical,
|
|
79
|
+
levels: NAME_LEVELS,
|
|
80
|
+
}),
|
|
81
|
+
];
|
|
82
|
+
if (opts.usePhone) {
|
|
83
|
+
identity.push(similarityComparison({
|
|
84
|
+
name: "phone",
|
|
85
|
+
extract: (r) => normalizePhone(r.phone),
|
|
86
|
+
similarity: (a, b) => (a === b ? 1 : 0), // exact normalized-digit match only
|
|
87
|
+
levels: PHONE_LEVELS,
|
|
88
|
+
}));
|
|
89
|
+
}
|
|
90
|
+
for (const key of opts.discriminators ?? []) {
|
|
91
|
+
identity.push(similarityComparison({
|
|
92
|
+
name: `attr:${key}`,
|
|
93
|
+
extract: (r) => r.attributes?.[key],
|
|
94
|
+
levels: NAME_LEVELS,
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
if (opts.collapseSpatial) {
|
|
98
|
+
let spatial = spatialComparison({
|
|
99
|
+
name: "spatial",
|
|
100
|
+
key: (r) => r.address?.canonicalKey,
|
|
101
|
+
coordinate: (r) => r.address?.geocode?.coordinate,
|
|
102
|
+
levels: DEFAULT_SPATIAL_LEVELS,
|
|
103
|
+
});
|
|
104
|
+
if (opts.addressFrequency) {
|
|
105
|
+
spatial = withTermFrequency(spatial, {
|
|
106
|
+
table: opts.addressFrequency,
|
|
107
|
+
value: (a) => a.address?.raw ?? null,
|
|
108
|
+
levels: [0], // the exact same-key tier
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
return { lambda: 0.0001, comparisons: [...identity, spatial] };
|
|
112
|
+
}
|
|
113
|
+
// Legacy two-signal spatial: address-key similarity + great-circle distance (redundant; A1 collapses it).
|
|
114
|
+
let address = similarityComparison({
|
|
115
|
+
name: "address",
|
|
116
|
+
extract: (r) => r.address?.canonicalKey,
|
|
117
|
+
levels: NAME_LEVELS,
|
|
118
|
+
});
|
|
119
|
+
if (opts.addressFrequency) {
|
|
120
|
+
address = withTermFrequency(address, { table: opts.addressFrequency, value: (a) => a.address?.raw ?? null });
|
|
121
|
+
}
|
|
31
122
|
return {
|
|
32
123
|
lambda: 0.0001,
|
|
33
124
|
comparisons: [
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
similarityComparison({ name: "organization", extract: (r) => r.organization?.canonical, levels: NAME_LEVELS }),
|
|
37
|
-
similarityComparison({ name: "address", extract: (r) => r.address?.canonicalKey, levels: NAME_LEVELS }),
|
|
125
|
+
...identity,
|
|
126
|
+
address,
|
|
38
127
|
distanceComparison({
|
|
39
128
|
name: "distance",
|
|
40
129
|
extract: (r) => r.address?.geocode?.coordinate,
|
|
@@ -57,33 +146,88 @@ export function defaultBlockingKeys() {
|
|
|
57
146
|
* exactly one entity (a record with no confident link is its own singleton entity).
|
|
58
147
|
*/
|
|
59
148
|
export function resolveEntities(records, config = {}) {
|
|
60
|
-
|
|
149
|
+
// The proven levers are DEFAULT-ON (#625): the address-frequency down-weight (auto-computed over the
|
|
150
|
+
// input records when not supplied; `false` disables) + the collapsed spatial signal (A1). A new
|
|
151
|
+
// caller gets the strong config out of the box; pass explicit values to override.
|
|
152
|
+
const addressFrequency = config.addressFrequency === false
|
|
153
|
+
? undefined
|
|
154
|
+
: (config.addressFrequency ??
|
|
155
|
+
buildTermFrequencyTable(records.map((r) => r.address?.raw), { normalize: addressFrequencyKey }));
|
|
156
|
+
const collapseSpatial = config.collapseSpatial ?? true;
|
|
157
|
+
const model = config.model ??
|
|
158
|
+
buildDefaultModel({
|
|
159
|
+
addressFrequency,
|
|
160
|
+
collapseSpatial,
|
|
161
|
+
usePhone: config.usePhone,
|
|
162
|
+
discriminators: config.discriminators,
|
|
163
|
+
});
|
|
61
164
|
const blockingKeys = config.blockingKeys ?? defaultBlockingKeys();
|
|
62
|
-
|
|
165
|
+
// #603: the learned scorer is DEFAULT-ON. An explicit `scorer` overrides everything; otherwise
|
|
166
|
+
// `learnedScorer === false` opts out to the FS spine, a GBT supplies a custom model, and
|
|
167
|
+
// `true`/omitted uses the bundled DEDUP_GBT_MODEL. The scorer is built over the FIXED
|
|
168
|
+
// collapsed-spatial + address-frequency feature model (matching training, independent of this call's
|
|
169
|
+
// comparison config), using the resolved address-frequency table.
|
|
170
|
+
let scorer = config.scorer;
|
|
171
|
+
let usingBundledModel = false;
|
|
172
|
+
if (!scorer && config.learnedScorer !== false) {
|
|
173
|
+
const gbt = config.learnedScorer === undefined || config.learnedScorer === true ? DEDUP_GBT_MODEL : config.learnedScorer;
|
|
174
|
+
usingBundledModel = gbt === DEDUP_GBT_MODEL;
|
|
175
|
+
scorer = createGbtScorer({
|
|
176
|
+
model: gbt,
|
|
177
|
+
comparisons: buildDefaultModel({ collapseSpatial: true, addressFrequency }).comparisons,
|
|
178
|
+
addressFrequency: addressFrequency ?? buildTermFrequencyTable([], { normalize: addressFrequencyKey }),
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
// Threshold: an explicit value wins; else the bundled model's CALIBRATED threshold when it's active
|
|
182
|
+
// (its logit isn't in FS-weight units, so 0 would over-merge); else 0 (FS spine or a custom model).
|
|
183
|
+
const threshold = config.threshold ?? (usingBundledModel ? DEDUP_GBT_META.recommendedThreshold : 0);
|
|
63
184
|
const { pairs, droppedBlocks } = block(records, blockingKeys, { maxBlockSize: config.maxBlockSize });
|
|
64
185
|
let scoringModel = model;
|
|
65
186
|
if (config.trainEM && pairs.length > 0) {
|
|
66
187
|
const patterns = pairs.map(([a, b]) => agreementPattern(model.comparisons, a, b));
|
|
67
188
|
scoringModel = estimateParameters(model, patterns).model;
|
|
68
189
|
}
|
|
69
|
-
const links = pairs.map(([a, b]) =>
|
|
70
|
-
a,
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
190
|
+
const links = pairs.map(([a, b]) => {
|
|
191
|
+
const score = scorePair(scoringModel, a, b);
|
|
192
|
+
// #603: a learned scorer (explicit `scorer` or the opt-in `learnedScorer`) replaces the FS weight
|
|
193
|
+
// (same clustering + threshold semantics).
|
|
194
|
+
let weight = scorer ? scorer(a, b) : score.weight;
|
|
195
|
+
// A2 (#625): a link must carry positive name OR org corroboration — a shared (even down-weighted)
|
|
196
|
+
// address alone is not identity. Spatial-only pairs are suppressed below any threshold.
|
|
197
|
+
if (config.requireCorroboration) {
|
|
198
|
+
const corroborated = score.contributions.some((c) => (CORROBORATING_FIELDS.has(c.name) || c.name.startsWith("attr:")) && c.weight > 0);
|
|
199
|
+
if (!corroborated)
|
|
200
|
+
weight = Number.NEGATIVE_INFINITY;
|
|
201
|
+
}
|
|
202
|
+
return { a, b, weight };
|
|
203
|
+
});
|
|
204
|
+
const clusters = cluster(records, links, { threshold, linkage: config.linkage });
|
|
205
|
+
// Cohesion = the weakest within-cluster link weight (how tightly an entity holds together). Compute it
|
|
206
|
+
// in ONE pass over links via a record→cluster index, not by filtering every link for every cluster —
|
|
207
|
+
// the latter is O(clusters × links) and dominates the resolve at scale.
|
|
208
|
+
const clusterOf = new Map();
|
|
209
|
+
clusters.forEach((group, i) => {
|
|
210
|
+
for (const record of group)
|
|
211
|
+
clusterOf.set(record, i);
|
|
212
|
+
});
|
|
213
|
+
const minIntraWeight = new Array(clusters.length).fill(Infinity);
|
|
214
|
+
for (const link of links) {
|
|
215
|
+
if (link.weight < threshold)
|
|
216
|
+
continue;
|
|
217
|
+
const ci = clusterOf.get(link.a);
|
|
218
|
+
if (ci === undefined || ci !== clusterOf.get(link.b))
|
|
219
|
+
continue;
|
|
220
|
+
if (link.weight < minIntraWeight[ci])
|
|
221
|
+
minIntraWeight[ci] = link.weight;
|
|
222
|
+
}
|
|
75
223
|
const entities = clusters.map((group, i) => {
|
|
76
|
-
const members = new Set(group);
|
|
77
|
-
const intraWeights = links
|
|
78
|
-
.filter((link) => link.weight >= threshold && members.has(link.a) && members.has(link.b))
|
|
79
|
-
.map((link) => link.weight);
|
|
80
224
|
const rep = representative(group) ?? group[0];
|
|
81
225
|
return {
|
|
82
226
|
id: `entity-${i}`,
|
|
83
227
|
records: group,
|
|
84
228
|
representative: rep,
|
|
85
229
|
coordinate: rep.address?.geocode?.coordinate ?? undefined,
|
|
86
|
-
cohesion: group.length > 1 &&
|
|
230
|
+
cohesion: group.length > 1 && minIntraWeight[i] !== Infinity ? minIntraWeight[i] : null,
|
|
87
231
|
};
|
|
88
232
|
});
|
|
89
233
|
return { entities, candidatePairs: pairs.length, droppedBlocks };
|
package/out/resolve.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve.js","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,
|
|
1
|
+
{"version":3,"file":"resolve.js","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EAMN,uBAAuB,EACvB,sBAAsB,EACtB,gBAAgB,EAChB,KAAK,EACL,uBAAuB,EACvB,OAAO,EACP,kBAAkB,EAClB,kBAAkB,EAClB,QAAQ,EACR,UAAU,EACV,cAAc,EACd,SAAS,EACT,oBAAoB,EACpB,iBAAiB,EACjB,iBAAiB,GACjB,MAAM,kBAAkB,CAAA;AACzB,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AAG7E;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAW;IAC9C,OAAO,GAAG;SACR,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;AACvB,CAAC;AAED,wFAAwF;AACxF,MAAM,WAAW,GAAsB;IACtC,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACvD,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACxD,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;CAC1D,CAAA;AAED;;;GAGG;AACH,MAAM,YAAY,GAAsB;IACvC,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;IACxD,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;CAC1D,CAAA;AAED,sGAAsG;AACtG,SAAS,cAAc,CAAC,GAA8B;IACrD,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACrB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;IACtC,OAAO,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,IAAI,CAAA;AAChE,CAAC;AAED;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,OAAO,CAAC,CAAC,CAAA;AAkClF;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAA4B,EAAE;IAC/D,MAAM,QAAQ,GAAG;QAChB,oBAAoB,CAAe,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;QACzG,oBAAoB,CAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;QAC3G,oBAAoB,CAAe;YAClC,IAAI,EAAE,cAAc;YACpB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,EAAE,SAAS;YACzC,MAAM,EAAE,WAAW;SACnB,CAAC;KACF,CAAA;IACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACnB,QAAQ,CAAC,IAAI,CACZ,oBAAoB,CAAe;YAClC,IAAI,EAAE,OAAO;YACb,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,KAAK,CAAC;YACvC,UAAU,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,oCAAoC;YAC7E,MAAM,EAAE,YAAY;SACpB,CAAC,CACF,CAAA;IACF,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,cAAc,IAAI,EAAE,EAAE,CAAC;QAC7C,QAAQ,CAAC,IAAI,CACZ,oBAAoB,CAAe;YAClC,IAAI,EAAE,QAAQ,GAAG,EAAE;YACnB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,GAAG,CAAC;YACnC,MAAM,EAAE,WAAW;SACnB,CAAC,CACF,CAAA;IACF,CAAC;IAED,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;QAC1B,IAAI,OAAO,GAAG,iBAAiB,CAAe;YAC7C,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY;YACnC,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU;YACjD,MAAM,EAAE,sBAAsB;SAC9B,CAAC,CAAA;QACF,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3B,OAAO,GAAG,iBAAiB,CAAC,OAAO,EAAE;gBACpC,KAAK,EAAE,IAAI,CAAC,gBAAgB;gBAC5B,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI;gBACpC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,0BAA0B;aACvC,CAAC,CAAA;QACH,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,GAAG,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAA;IAC/D,CAAC;IAED,0GAA0G;IAC1G,IAAI,OAAO,GAAG,oBAAoB,CAAe;QAChD,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY;QACvC,MAAM,EAAE,WAAW;KACnB,CAAC,CAAA;IACF,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAC3B,OAAO,GAAG,iBAAiB,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,gBAAgB,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,EAAE,CAAC,CAAA;IAC7G,CAAC;IACD,OAAO;QACN,MAAM,EAAE,MAAM;QACd,WAAW,EAAE;YACZ,GAAG,QAAQ;YACX,OAAO;YACP,kBAAkB,CAAC;gBAClB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU;gBAC9C,MAAM,EAAE,uBAAuB;aAC/B,CAAC;SACF;KACD,CAAA;AACF,CAAC;AAED,2FAA2F;AAC3F,MAAM,UAAU,mBAAmB;IAClC,OAAO;QACN,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC;QACjD,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY,CAAC;QACxC,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QACxB,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;KACxB,CAAA;AACF,CAAC;AA+FD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,OAAgC,EAAE,SAAwB,EAAE;IAC3F,qGAAqG;IACrG,gGAAgG;IAChG,kFAAkF;IAClF,MAAM,gBAAgB,GACrB,MAAM,CAAC,gBAAgB,KAAK,KAAK;QAChC,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB;YACzB,uBAAuB,CACtB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,CAAC,EAClC,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAClC,CAAC,CAAA;IACL,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,IAAI,CAAA;IACtD,MAAM,KAAK,GACV,MAAM,CAAC,KAAK;QACZ,iBAAiB,CAAC;YACjB,gBAAgB;YAChB,eAAe;YACf,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,cAAc,EAAE,MAAM,CAAC,cAAc;SACrC,CAAC,CAAA;IACH,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,EAAE,CAAA;IAEjE,+FAA+F;IAC/F,yFAAyF;IACzF,sFAAsF;IACtF,qGAAqG;IACrG,kEAAkE;IAClE,IAAI,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;IAC1B,IAAI,iBAAiB,GAAG,KAAK,CAAA;IAC7B,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,aAAa,KAAK,KAAK,EAAE,CAAC;QAC/C,MAAM,GAAG,GACR,MAAM,CAAC,aAAa,KAAK,SAAS,IAAI,MAAM,CAAC,aAAa,KAAK,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAA;QAC7G,iBAAiB,GAAG,GAAG,KAAK,eAAe,CAAA;QAC3C,MAAM,GAAG,eAAe,CAAC;YACxB,KAAK,EAAE,GAAG;YACV,WAAW,EAAE,iBAAiB,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,WAAW;YACvF,gBAAgB,EAAE,gBAAgB,IAAI,uBAAuB,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC;SACrG,CAAC,CAAA;IACH,CAAC;IACD,oGAAoG;IACpG,oGAAoG;IACpG,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,cAAc,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAEnG,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAA;IAEpG,IAAI,YAAY,GAAG,KAAK,CAAA;IACxB,IAAI,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,gBAAgB,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QACjF,YAAY,GAAG,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAA;IACzD,CAAC;IAED,MAAM,KAAK,GAA+B,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE;QAC9D,MAAM,KAAK,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;QAC3C,kGAAkG;QAClG,2CAA2C;QAC3C,IAAI,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAA;QACjD,kGAAkG;QAClG,wFAAwF;QACxF,IAAI,MAAM,CAAC,oBAAoB,EAAE,CAAC;YACjC,MAAM,YAAY,GAAG,KAAK,CAAC,aAAa,CAAC,IAAI,CAC5C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CACvF,CAAA;YACD,IAAI,CAAC,YAAY;gBAAE,MAAM,GAAG,MAAM,CAAC,iBAAiB,CAAA;QACrD,CAAC;QACD,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,CAAA;IACxB,CAAC,CAAC,CAAA;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAA;IAEhF,uGAAuG;IACvG,qGAAqG;IACrG,wEAAwE;IACxE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAA;IACjD,QAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC7B,KAAK,MAAM,MAAM,IAAI,KAAK;YAAE,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;IACrD,CAAC,CAAC,CAAA;IACF,MAAM,cAAc,GAAG,IAAI,KAAK,CAAS,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IACxE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,MAAM,GAAG,SAAS;YAAE,SAAQ;QACrC,MAAM,EAAE,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAChC,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,SAAQ;QAC9D,IAAI,IAAI,CAAC,MAAM,GAAG,cAAc,CAAC,EAAE,CAAE;YAAE,cAAc,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAA;IACxE,CAAC;IAED,MAAM,QAAQ,GAAqB,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC5D,MAAM,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAE,CAAA;QAC9C,OAAO;YACN,EAAE,EAAE,UAAU,CAAC,EAAE;YACjB,OAAO,EAAE,KAAK;YACd,cAAc,EAAE,GAAG;YACnB,UAAU,EAAE,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,IAAI,SAAS;YACzD,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,cAAc,CAAC,CAAC,CAAE,KAAK,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI;SACzF,CAAA;IACF,CAAC,CAAC,CAAA;IAEF,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,MAAM,EAAE,aAAa,EAAE,CAAA;AACjE,CAAC"}
|
package/out/types.d.ts
CHANGED
|
@@ -21,6 +21,13 @@ export interface SourceRecord {
|
|
|
21
21
|
address?: PostalAddress | null;
|
|
22
22
|
phone?: string | null;
|
|
23
23
|
email?: string | null;
|
|
24
|
+
/**
|
|
25
|
+
* Additional secondary-identifier fields, normalized — anything that helps tell two records apart
|
|
26
|
+
* or confirm they're the same beyond name/org/address/phone (an authorized-official name, a
|
|
27
|
+
* provider taxonomy, a license number…). Used as extra comparisons + corroborators when the model
|
|
28
|
+
* is built with matching `discriminators`. Keyed by a stable field name the model references.
|
|
29
|
+
*/
|
|
30
|
+
attributes?: Record<string, string>;
|
|
24
31
|
/** The original row, verbatim, for audit. */
|
|
25
32
|
raw?: Record<string, string>;
|
|
26
33
|
}
|
package/out/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AAEpF,oGAAoG;AACpG,MAAM,WAAW,YAAY;IAC5B,iEAAiE;IACjE,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,wDAAwD;IACxD,IAAI,CAAC,EAAE,UAAU,GAAG,IAAI,CAAA;IACxB,kEAAkE;IAClE,YAAY,CAAC,EAAE,gBAAgB,GAAG,IAAI,CAAA;IACtC,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,IAAI,CAAA;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,6CAA6C;IAC7C,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC5B;AAED,uGAAuG;AACvG,MAAM,WAAW,cAAc;IAC9B,yCAAyC;IACzC,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,qEAAqE;IACrE,cAAc,EAAE,YAAY,CAAA;IAC5B,gEAAgE;IAChE,UAAU,CAAC,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;IACpD;;;OAGG;IACH,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;CACvB;AAID,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,OAAO,CAAA;IACb,qDAAqD;IACrD,WAAW,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC7B;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,SAAS,CAAA;IACf,QAAQ,EAAE,YAAY,CAAA;IACtB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,wBAAwB;IACxC,IAAI,EAAE,mBAAmB,CAAA;IACzB,QAAQ,EAAE,cAAc,EAAE,CAAA;CAC1B"}
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AAEpF,oGAAoG;AACpG,MAAM,WAAW,YAAY;IAC5B,iEAAiE;IACjE,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,wDAAwD;IACxD,IAAI,CAAC,EAAE,UAAU,GAAG,IAAI,CAAA;IACxB,kEAAkE;IAClE,YAAY,CAAC,EAAE,gBAAgB,GAAG,IAAI,CAAA;IACtC,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,IAAI,CAAA;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACnC,6CAA6C;IAC7C,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC5B;AAED,uGAAuG;AACvG,MAAM,WAAW,cAAc;IAC9B,yCAAyC;IACzC,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,qEAAqE;IACrE,cAAc,EAAE,YAAY,CAAA;IAC5B,gEAAgE;IAChE,UAAU,CAAC,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;IACpD;;;OAGG;IACH,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;CACvB;AAID,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,OAAO,CAAA;IACb,qDAAqD;IACrD,WAAW,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC7B;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,SAAS,CAAA;IACf,QAAQ,EAAE,YAAY,CAAA;IACtB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,wBAAwB;IACxC,IAAI,EAAE,mBAAmB,CAAA;IACzB,QAAQ,EAAE,cAAc,EAAE,CAAA;CAC1B"}
|
package/out/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAiEH,YAAY"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/registry",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.11.0",
|
|
4
4
|
"description": "The geocode-first record-matching application: resolve messy contact/organization records into canonical, geocoded entities (block → score → cluster) and export them as GeoJSON for spatial analysis. The clinic-funding use case mailwoman was built for.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
|
@@ -17,9 +17,12 @@
|
|
|
17
17
|
"./ingest": "./out/ingest.js"
|
|
18
18
|
},
|
|
19
19
|
"dependencies": {
|
|
20
|
-
"@mailwoman/
|
|
21
|
-
"@mailwoman/
|
|
22
|
-
"
|
|
20
|
+
"@mailwoman/address-id": "4.11.0",
|
|
21
|
+
"@mailwoman/match": "4.11.0",
|
|
22
|
+
"@mailwoman/record": "4.11.0",
|
|
23
|
+
"@protomaps/basemaps": "^5.7.2",
|
|
24
|
+
"csv-parse": "^5.6.0",
|
|
25
|
+
"spliterator": "^2.0.0"
|
|
23
26
|
},
|
|
24
27
|
"devDependencies": {
|
|
25
28
|
"@types/node": "^25.9.2"
|