@mailwoman/registry 4.8.1 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/resolve.d.ts CHANGED
@@ -16,13 +16,56 @@
16
16
  * key). Wiring mailwoman's parser + geocoder to turn raw rows into `SourceRecord`s is the ingest
17
17
  * layer that sits in front of this.
18
18
  */
19
- import { type BlockingKey, type FellegiSunterModel } from "@mailwoman/match";
19
+ import { type BlockingKey, type FellegiSunterModel, type GBT, type TermFrequencyTable } from "@mailwoman/match";
20
20
  import type { ResolvedEntity, SourceRecord } from "./types.js";
21
21
  /**
22
- * The default geocode-first scoring model: name + organization + address key + great-circle
23
- * distance.
22
+ * Cheap, parse-free normalization for the address-frequency key uppercase, collapse whitespace,
23
+ * drop punctuation. Used to count how many distinct entities share an address across the WHOLE
24
+ * corpus (computable over millions of rows without geocoding) and to look that frequency up at
25
+ * match time. It's the inverse-frequency signal: a crowded clinic/billing address is weak evidence
26
+ * of identity; a lonely address is strong. (See
27
+ * docs/articles/evals/2026-06-15-nppes-dedup-benchmark.md.)
24
28
  */
25
- export declare function buildDefaultModel(): FellegiSunterModel<SourceRecord>;
29
+ export declare function addressFrequencyKey(raw: string): string;
30
+ /**
31
+ * Options for {@link buildDefaultModel}. Each lever is default-off, so the base model is
32
+ * byte-stable.
33
+ */
34
+ export interface DefaultModelOptions {
35
+ /**
36
+ * Corpus-wide address-frequency table (over {@link addressFrequencyKey}) — makes the address-
37
+ * agreement weight **inverse to how shared the address is** (a building with 50 providers makes
38
+ * "same address" near-worthless evidence). The table's `value` is the record's raw address
39
+ * string.
40
+ */
41
+ addressFrequency?: TermFrequencyTable;
42
+ /**
43
+ * **A1 (#625):** collapse the redundant address-key + great-circle-distance comparisons into ONE
44
+ * {@link spatialComparison spatial-agreement} signal — an exact-key tier (where
45
+ * `addressFrequency`, if set, rides) over distance buckets. Removes the double-count that
46
+ * over-merges co-located providers (an exact key match already implies distance ≈ 0).
47
+ */
48
+ collapseSpatial?: boolean;
49
+ /**
50
+ * **A3 (#625):** add a normalized-phone exact-match comparison — a shared line is strong evidence
51
+ * and the secondary corroborator that lets a true same-entity link survive name drift under A2.
52
+ */
53
+ usePhone?: boolean;
54
+ /**
55
+ * Extra secondary-identifier comparisons drawn from {@link SourceRecord.attributes} (e.g.
56
+ * `["authorizedOfficial"]`). Each becomes an `attr:<key>` comparison AND counts toward A2
57
+ * corroboration — a more reliable discriminator than phone where the data has one (#625).
58
+ */
59
+ discriminators?: string[];
60
+ }
61
+ /**
62
+ * The default geocode-first scoring model: name + organization + a spatial signal. The spatial
63
+ * signal is either two comparisons (address-key similarity + great-circle distance — the legacy
64
+ * default, which double-counts) or, with {@link DefaultModelOptions.collapseSpatial}, one collapsed
65
+ * {@link spatialComparison}. `addressFrequency` down-weights agreement on a crowded address either
66
+ * way.
67
+ */
68
+ export declare function buildDefaultModel(opts?: DefaultModelOptions): FellegiSunterModel<SourceRecord>;
26
69
  /** The default blocking keys: a union of location, canonical address, phone, and email. */
27
70
  export declare function defaultBlockingKeys(): BlockingKey<SourceRecord>[];
28
71
  /** Options for {@link resolveEntities}. */
@@ -40,6 +83,73 @@ export interface ResolveConfig {
40
83
  * false.
41
84
  */
42
85
  trainEM?: boolean;
86
+ /**
87
+ * Address-frequency table (over {@link addressFrequencyKey}) — down-weights address agreement by
88
+ * how shared the address is (a crowded clinic/billing address is weak identity evidence).
89
+ * **Default-on (#625):** when omitted, `resolveEntities` AUTO-COMPUTES the table over the INPUT
90
+ * records' addresses (the right scope for a single dataset — a crowded address within the data is
91
+ * down-weighted). Pass your own {@link TermFrequencyTable} (e.g. a corpus-wide one) to override,
92
+ * or `false` to disable (the legacy bare baseline). Ignored if `model` is supplied.
93
+ */
94
+ addressFrequency?: TermFrequencyTable | false;
95
+ /**
96
+ * A1 (#625): collapse the redundant address-key + distance pair into one
97
+ * {@link spatialComparison}. **Default-on (true)** — the cleaner, less-over-merging spatial model.
98
+ * Set `false` for the legacy two-signal baseline. Ignored if `model` is supplied.
99
+ */
100
+ collapseSpatial?: boolean;
101
+ /**
102
+ * A2 (#625): require positive name OR org corroboration ({@link CORROBORATING_FIELDS}) for a link
103
+ * — a shared address alone cannot merge two records. Suppresses the spatial-only links that fuse
104
+ * distinct co-located providers. Default false.
105
+ */
106
+ requireCorroboration?: boolean;
107
+ /**
108
+ * A3 (#625): add a normalized-phone comparison to the default model — strong evidence and the
109
+ * secondary corroborator that keeps A2 from killing name-drift recall. Ignored if `model` is
110
+ * supplied.
111
+ */
112
+ usePhone?: boolean;
113
+ /**
114
+ * A4 (#625): clustering linkage. `"single"` (default) = connected components; `"average"` =
115
+ * average-linkage refinement that splits a component whose sub-clusters are joined only by a weak
116
+ * bridge — the principled over-merge fix.
117
+ */
118
+ linkage?: "single" | "average";
119
+ /**
120
+ * Extra secondary-identifier keys (from {@link SourceRecord.attributes}) to add as comparisons +
121
+ * corroborators — e.g. `["authorizedOfficial"]`. Ignored if `model` is supplied.
122
+ */
123
+ discriminators?: string[];
124
+ /**
125
+ * Override the Fellegi-Sunter link weight with a LEARNED score (#603). When set, a candidate
126
+ * pair's match weight is this function's return value (same threshold-comparable units as the FS
127
+ * weight) instead of {@link scorePair}'s. Default undefined (pure FS). The blocking + clustering
128
+ * are unchanged, so a trained scorer can be A/B'd against the FS spine on the identical pipeline.
129
+ * The function is responsible for its own feature computation (e.g. the agreement pattern, which
130
+ * is EM-independent, plus any corpus statistics it captured).
131
+ *
132
+ * INTERACTION with {@link requireCorroboration}: the two are independent and compose, but the
133
+ * corroboration gate is still evaluated on the Fellegi-Sunter `contributions` (NOT the learned
134
+ * score) — so a learned-high pair with no positive FS name/org/phone agreement is still gated
135
+ * out. A learned scorer is normally trained to subsume corroboration, so use ONE or the other;
136
+ * combining them lets the FS gate veto the learned score, which is rarely what you want.
137
+ */
138
+ scorer?: (a: SourceRecord, b: SourceRecord) => number;
139
+ /**
140
+ * **#603: the LEARNED gradient-boosted-tree scorer — DEFAULT-ON.** Omitted or `true` uses the
141
+ * bundled {@link DEDUP_GBT_MODEL} (trained on the NPPES NPI-truth set; beats the Fellegi-Sunter
142
+ * spine ~+5pp dedup F1 held-out within a state and ~+22pp on states it never trained on, cutting
143
+ * the co-located over-merge). `false` opts out to the pure FS spine; pass your own {@link GBT} for
144
+ * a custom model. The scorer is built over the SAME collapsed-spatial + address-frequency feature
145
+ * model as training (via the resolved {@link addressFrequency}), independent of this call's
146
+ * comparison config. An explicit {@link scorer} takes precedence. When the bundled model is active
147
+ * and you don't set {@link threshold}, its CALIBRATED link threshold
148
+ * ({@link DEDUP_GBT_META}.recommendedThreshold) is used — the GBT logit isn't in FS-weight units,
149
+ * so 0 would over-merge. The model is NPPES/US-trained; for a very different domain, A/B it or
150
+ * pass `false`.
151
+ */
152
+ learnedScorer?: boolean | GBT;
43
153
  }
44
154
  /** The outcome of a resolve pass. */
45
155
  export interface ResolveResult {
@@ -1 +1 @@
1
- {"version":3,"file":"resolve.d.ts","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EACN,KAAK,WAAW,EAChB,KAAK,kBAAkB,EAavB,MAAM,kBAAkB,CAAA;AACzB,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAS9D;;;GAGG;AACH,wBAAgB,iBAAiB,IAAI,kBAAkB,CAAC,YAAY,CAAC,CAepE;AAED,2FAA2F;AAC3F,wBAAgB,mBAAmB,IAAI,WAAW,CAAC,YAAY,CAAC,EAAE,CAOjE;AAED,2CAA2C;AAC3C,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,KAAK,CAAC,EAAE,kBAAkB,CAAC,YAAY,CAAC,CAAA;IACxC,wEAAwE;IACxE,YAAY,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,EAAE,CAAA;IAC1C,6FAA6F;IAC7F,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,yEAAyE;IACzE,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;CACjB;AAED,qCAAqC;AACrC,MAAM,WAAW,aAAa;IAC7B,QAAQ,EAAE,cAAc,EAAE,CAAA;IAC1B,mDAAmD;IACnD,cAAc,EAAE,MAAM,CAAA;IACtB,yEAAyE;IACzE,aAAa,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CACnD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,EAAE,MAAM,GAAE,aAAkB,GAAG,aAAa,CAsC3G"}
1
+ {"version":3,"file":"resolve.d.ts","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EACN,KAAK,WAAW,EAChB,KAAK,kBAAkB,EACvB,KAAK,GAAG,EAER,KAAK,kBAAkB,EAgBvB,MAAM,kBAAkB,CAAA;AAGzB,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAMvD;AAiCD;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IACnC;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,kBAAkB,CAAA;IACrC;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB;;;OAGG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,GAAE,mBAAwB,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAoElG;AAED,2FAA2F;AAC3F,wBAAgB,mBAAmB,IAAI,WAAW,CAAC,YAAY,CAAC,EAAE,CAOjE;AAED,2CAA2C;AAC3C,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,KAAK,CAAC,EAAE,kBAAkB,CAAC,YAAY,CAAC,CAAA;IACxC,wEAAwE;IACxE,YAAY,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,EAAE,CAAA;IAC1C,6FAA6F;IAC7F,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,yEAAyE;IACzE,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB;;;;;;;OAOG;IACH,gBAAgB,CAAC,EAAE,kBAAkB,GAAG,KAAK,CAAA;IAC7C;;;;OAIG;IACH,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAA;IAC9B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB;;;;OAIG;IACH,OAAO,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAA;IAC9B;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB;;;;;;;;;;;;;OAaG;IACH,MAAM,CAAC,EAAE,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,KAAK,MAAM,CAAA;IACrD;;;;;;;;;;;;OAYG;IACH,aAAa,CAAC,EAAE,OAAO,GAAG,GAAG,CAAA;CAC7B;AAED,qCAAqC;AACrC,MAAM,WAAW,aAAa;IAC7B,QAAQ,EAAE,cAAc,EAAE,CAAA;IAC1B,mDAAmD;IACnD,cAAc,EAAE,MAAM,CAAA;IACtB,yEAAyE;IACzE,aAAa,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CACnD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,EAAE,MAAM,GAAE,aAAkB,GAAG,aAAa,CAiG3G"}
package/out/resolve.js CHANGED
@@ -16,7 +16,24 @@
16
16
  * key). Wiring mailwoman's parser + geocoder to turn raw rows into `SourceRecord`s is the ingest
17
17
  * layer that sits in front of this.
18
18
  */
19
- import { DEFAULT_DISTANCE_LEVELS, agreementPattern, block, cluster, distanceComparison, estimateParameters, exactKey, geoCellKey, representative, scorePair, similarityComparison, } from "@mailwoman/match";
19
+ import { DEFAULT_DISTANCE_LEVELS, DEFAULT_SPATIAL_LEVELS, agreementPattern, block, buildTermFrequencyTable, cluster, distanceComparison, estimateParameters, exactKey, geoCellKey, representative, scorePair, similarityComparison, spatialComparison, withTermFrequency, } from "@mailwoman/match";
20
+ import { createGbtScorer } from "./learned-scorer.js";
21
+ import { DEDUP_GBT_META, DEDUP_GBT_MODEL } from "./models/dedup-gbt-en-us.js";
22
+ /**
23
+ * Cheap, parse-free normalization for the address-frequency key — uppercase, collapse whitespace,
24
+ * drop punctuation. Used to count how many distinct entities share an address across the WHOLE
25
+ * corpus (computable over millions of rows without geocoding) and to look that frequency up at
26
+ * match time. It's the inverse-frequency signal: a crowded clinic/billing address is weak evidence
27
+ * of identity; a lonely address is strong. (See
28
+ * docs/articles/evals/2026-06-15-nppes-dedup-benchmark.md.)
29
+ */
30
+ export function addressFrequencyKey(raw) {
31
+ return raw
32
+ .toUpperCase()
33
+ .replace(/[^A-Z0-9]+/g, " ")
34
+ .trim()
35
+ .replace(/\s+/g, " ");
36
+ }
20
37
  /** Default tiered levels for a name-like text field. `m`/`u` are EM-estimable seeds. */
21
38
  const NAME_LEVELS = [
22
39
  { label: "exact", minSimilarity: 1.0, m: 0.8, u: 0.01 },
@@ -24,17 +41,89 @@ const NAME_LEVELS = [
24
41
  { label: "different", minSimilarity: 0, m: 0.05, u: 0.96 },
25
42
  ];
26
43
  /**
27
- * The default geocode-first scoring model: name + organization + address key + great-circle
28
- * distance.
44
+ * Exact-vs-different levels for a normalized phone. A shared line is strong, rarely-coincidental
45
+ * evidence.
46
+ */
47
+ const PHONE_LEVELS = [
48
+ { label: "exact", minSimilarity: 1.0, m: 0.6, u: 0.002 },
49
+ { label: "different", minSimilarity: 0, m: 0.4, u: 0.998 },
50
+ ];
51
+ /** Last-10-digits normalization for phone agreement (drops country code, punctuation, extensions). */
52
+ function normalizePhone(raw) {
53
+ if (!raw)
54
+ return null;
55
+ const digits = raw.replace(/\D+/g, "");
56
+ return digits.length >= 10 ? digits.slice(-10) : digits || null;
57
+ }
58
+ /**
59
+ * The identity-corroborating comparisons (person name, organization, phone). A2 (#625,
60
+ * {@link ResolveConfig.requireCorroboration}) requires at least one of these to _positively_ agree
61
+ * before a pair may link — a shared address alone is not identity. Phone (A3) is the secondary
62
+ * identifier that rescues a true same-entity link across name drift.
29
63
  */
30
- export function buildDefaultModel() {
64
+ const CORROBORATING_FIELDS = new Set(["given", "family", "organization", "phone"]);
65
+ /**
66
+ * The default geocode-first scoring model: name + organization + a spatial signal. The spatial
67
+ * signal is either two comparisons (address-key similarity + great-circle distance — the legacy
68
+ * default, which double-counts) or, with {@link DefaultModelOptions.collapseSpatial}, one collapsed
69
+ * {@link spatialComparison}. `addressFrequency` down-weights agreement on a crowded address either
70
+ * way.
71
+ */
72
+ export function buildDefaultModel(opts = {}) {
73
+ const identity = [
74
+ similarityComparison({ name: "given", extract: (r) => r.name?.given, levels: NAME_LEVELS }),
75
+ similarityComparison({ name: "family", extract: (r) => r.name?.family, levels: NAME_LEVELS }),
76
+ similarityComparison({
77
+ name: "organization",
78
+ extract: (r) => r.organization?.canonical,
79
+ levels: NAME_LEVELS,
80
+ }),
81
+ ];
82
+ if (opts.usePhone) {
83
+ identity.push(similarityComparison({
84
+ name: "phone",
85
+ extract: (r) => normalizePhone(r.phone),
86
+ similarity: (a, b) => (a === b ? 1 : 0), // exact normalized-digit match only
87
+ levels: PHONE_LEVELS,
88
+ }));
89
+ }
90
+ for (const key of opts.discriminators ?? []) {
91
+ identity.push(similarityComparison({
92
+ name: `attr:${key}`,
93
+ extract: (r) => r.attributes?.[key],
94
+ levels: NAME_LEVELS,
95
+ }));
96
+ }
97
+ if (opts.collapseSpatial) {
98
+ let spatial = spatialComparison({
99
+ name: "spatial",
100
+ key: (r) => r.address?.canonicalKey,
101
+ coordinate: (r) => r.address?.geocode?.coordinate,
102
+ levels: DEFAULT_SPATIAL_LEVELS,
103
+ });
104
+ if (opts.addressFrequency) {
105
+ spatial = withTermFrequency(spatial, {
106
+ table: opts.addressFrequency,
107
+ value: (a) => a.address?.raw ?? null,
108
+ levels: [0], // the exact same-key tier
109
+ });
110
+ }
111
+ return { lambda: 0.0001, comparisons: [...identity, spatial] };
112
+ }
113
+ // Legacy two-signal spatial: address-key similarity + great-circle distance (redundant; A1 collapses it).
114
+ let address = similarityComparison({
115
+ name: "address",
116
+ extract: (r) => r.address?.canonicalKey,
117
+ levels: NAME_LEVELS,
118
+ });
119
+ if (opts.addressFrequency) {
120
+ address = withTermFrequency(address, { table: opts.addressFrequency, value: (a) => a.address?.raw ?? null });
121
+ }
31
122
  return {
32
123
  lambda: 0.0001,
33
124
  comparisons: [
34
- similarityComparison({ name: "given", extract: (r) => r.name?.given, levels: NAME_LEVELS }),
35
- similarityComparison({ name: "family", extract: (r) => r.name?.family, levels: NAME_LEVELS }),
36
- similarityComparison({ name: "organization", extract: (r) => r.organization?.canonical, levels: NAME_LEVELS }),
37
- similarityComparison({ name: "address", extract: (r) => r.address?.canonicalKey, levels: NAME_LEVELS }),
125
+ ...identity,
126
+ address,
38
127
  distanceComparison({
39
128
  name: "distance",
40
129
  extract: (r) => r.address?.geocode?.coordinate,
@@ -57,33 +146,88 @@ export function defaultBlockingKeys() {
57
146
  * exactly one entity (a record with no confident link is its own singleton entity).
58
147
  */
59
148
  export function resolveEntities(records, config = {}) {
60
- const model = config.model ?? buildDefaultModel();
149
+ // The proven levers are DEFAULT-ON (#625): the address-frequency down-weight (auto-computed over the
150
+ // input records when not supplied; `false` disables) + the collapsed spatial signal (A1). A new
151
+ // caller gets the strong config out of the box; pass explicit values to override.
152
+ const addressFrequency = config.addressFrequency === false
153
+ ? undefined
154
+ : (config.addressFrequency ??
155
+ buildTermFrequencyTable(records.map((r) => r.address?.raw), { normalize: addressFrequencyKey }));
156
+ const collapseSpatial = config.collapseSpatial ?? true;
157
+ const model = config.model ??
158
+ buildDefaultModel({
159
+ addressFrequency,
160
+ collapseSpatial,
161
+ usePhone: config.usePhone,
162
+ discriminators: config.discriminators,
163
+ });
61
164
  const blockingKeys = config.blockingKeys ?? defaultBlockingKeys();
62
- const threshold = config.threshold ?? 0;
165
+ // #603: the learned scorer is DEFAULT-ON. An explicit `scorer` overrides everything; otherwise
166
+ // `learnedScorer === false` opts out to the FS spine, a GBT supplies a custom model, and
167
+ // `true`/omitted uses the bundled DEDUP_GBT_MODEL. The scorer is built over the FIXED
168
+ // collapsed-spatial + address-frequency feature model (matching training, independent of this call's
169
+ // comparison config), using the resolved address-frequency table.
170
+ let scorer = config.scorer;
171
+ let usingBundledModel = false;
172
+ if (!scorer && config.learnedScorer !== false) {
173
+ const gbt = config.learnedScorer === undefined || config.learnedScorer === true ? DEDUP_GBT_MODEL : config.learnedScorer;
174
+ usingBundledModel = gbt === DEDUP_GBT_MODEL;
175
+ scorer = createGbtScorer({
176
+ model: gbt,
177
+ comparisons: buildDefaultModel({ collapseSpatial: true, addressFrequency }).comparisons,
178
+ addressFrequency: addressFrequency ?? buildTermFrequencyTable([], { normalize: addressFrequencyKey }),
179
+ });
180
+ }
181
+ // Threshold: an explicit value wins; else the bundled model's CALIBRATED threshold when it's active
182
+ // (its logit isn't in FS-weight units, so 0 would over-merge); else 0 (FS spine or a custom model).
183
+ const threshold = config.threshold ?? (usingBundledModel ? DEDUP_GBT_META.recommendedThreshold : 0);
63
184
  const { pairs, droppedBlocks } = block(records, blockingKeys, { maxBlockSize: config.maxBlockSize });
64
185
  let scoringModel = model;
65
186
  if (config.trainEM && pairs.length > 0) {
66
187
  const patterns = pairs.map(([a, b]) => agreementPattern(model.comparisons, a, b));
67
188
  scoringModel = estimateParameters(model, patterns).model;
68
189
  }
69
- const links = pairs.map(([a, b]) => ({
70
- a,
71
- b,
72
- weight: scorePair(scoringModel, a, b).weight,
73
- }));
74
- const clusters = cluster(records, links, { threshold });
190
+ const links = pairs.map(([a, b]) => {
191
+ const score = scorePair(scoringModel, a, b);
192
+ // #603: a learned scorer (explicit `scorer` or the opt-in `learnedScorer`) replaces the FS weight
193
+ // (same clustering + threshold semantics).
194
+ let weight = scorer ? scorer(a, b) : score.weight;
195
+ // A2 (#625): a link must carry positive name OR org corroboration — a shared (even down-weighted)
196
+ // address alone is not identity. Spatial-only pairs are suppressed below any threshold.
197
+ if (config.requireCorroboration) {
198
+ const corroborated = score.contributions.some((c) => (CORROBORATING_FIELDS.has(c.name) || c.name.startsWith("attr:")) && c.weight > 0);
199
+ if (!corroborated)
200
+ weight = Number.NEGATIVE_INFINITY;
201
+ }
202
+ return { a, b, weight };
203
+ });
204
+ const clusters = cluster(records, links, { threshold, linkage: config.linkage });
205
+ // Cohesion = the weakest within-cluster link weight (how tightly an entity holds together). Compute it
206
+ // in ONE pass over links via a record→cluster index, not by filtering every link for every cluster —
207
+ // the latter is O(clusters × links) and dominates the resolve at scale.
208
+ const clusterOf = new Map();
209
+ clusters.forEach((group, i) => {
210
+ for (const record of group)
211
+ clusterOf.set(record, i);
212
+ });
213
+ const minIntraWeight = new Array(clusters.length).fill(Infinity);
214
+ for (const link of links) {
215
+ if (link.weight < threshold)
216
+ continue;
217
+ const ci = clusterOf.get(link.a);
218
+ if (ci === undefined || ci !== clusterOf.get(link.b))
219
+ continue;
220
+ if (link.weight < minIntraWeight[ci])
221
+ minIntraWeight[ci] = link.weight;
222
+ }
75
223
  const entities = clusters.map((group, i) => {
76
- const members = new Set(group);
77
- const intraWeights = links
78
- .filter((link) => link.weight >= threshold && members.has(link.a) && members.has(link.b))
79
- .map((link) => link.weight);
80
224
  const rep = representative(group) ?? group[0];
81
225
  return {
82
226
  id: `entity-${i}`,
83
227
  records: group,
84
228
  representative: rep,
85
229
  coordinate: rep.address?.geocode?.coordinate ?? undefined,
86
- cohesion: group.length > 1 && intraWeights.length > 0 ? Math.min(...intraWeights) : null,
230
+ cohesion: group.length > 1 && minIntraWeight[i] !== Infinity ? minIntraWeight[i] : null,
87
231
  };
88
232
  });
89
233
  return { entities, candidatePairs: pairs.length, droppedBlocks };
@@ -1 +1 @@
1
- {"version":3,"file":"resolve.js","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EAIN,uBAAuB,EACvB,gBAAgB,EAChB,KAAK,EACL,OAAO,EACP,kBAAkB,EAClB,kBAAkB,EAClB,QAAQ,EACR,UAAU,EACV,cAAc,EACd,SAAS,EACT,oBAAoB,GACpB,MAAM,kBAAkB,CAAA;AAGzB,wFAAwF;AACxF,MAAM,WAAW,GAAsB;IACtC,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACvD,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACxD,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;CAC1D,CAAA;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB;IAChC,OAAO;QACN,MAAM,EAAE,MAAM;QACd,WAAW,EAAE;YACZ,oBAAoB,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;YAC3F,oBAAoB,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;YAC7F,oBAAoB,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;YAC9G,oBAAoB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;YACvG,kBAAkB,CAAC;gBAClB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU;gBAC9C,MAAM,EAAE,uBAAuB;aAC/B,CAAC;SACF;KACD,CAAA;AACF,CAAC;AAED,2FAA2F;AAC3F,MAAM,UAAU,mBAAmB;IAClC,OAAO;QACN,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC;QACjD,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY,CAAC;QACxC,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QACxB,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;KACxB,CAAA;AACF,CAAC;AA4BD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,OAAgC,EAAE,SAAwB,EAAE;IAC3F,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,iBAAiB,EAAE,CAAA;IACjD,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,EAAE,CAAA;IACjE,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,CAAA;IAEvC,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAA;IAEpG,IAAI,YAAY,GAAG,KAAK,CAAA;IACxB,IAAI,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,gBAAgB,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QACjF,YAAY,GAAG,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAA;IACzD,CAAC;IAED,MAAM,KAAK,GAA+B,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAChE,CAAC;QACD,CAAC;QACD,MAAM,EAAE,SAAS,CAAC,YAAY,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM;KAC5C,CAAC,CAAC,CAAA;IAEH,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,SAAS,EAAE,CAAC,CAAA;IAEvD,MAAM,QAAQ,GAAqB,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC5D,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAA;QAC9B,MAAM,YAAY,GAAG,KAAK;aACxB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,IAAI,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aACxF,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC5B,MAAM,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAE,CAAA;QAE9C,OAAO;YACN,EAAE,EAAE,UAAU,CAAC,EAAE;YACjB,OAAO,EAAE,KAAK;YACd,cAAc,EAAE,GAAG;YACnB,UAAU,EAAE,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,IAAI,SAAS;YACzD,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI;SACxF,CAAA;IACF,CAAC,CAAC,CAAA;IAEF,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,MAAM,EAAE,aAAa,EAAE,CAAA;AACjE,CAAC"}
1
+ {"version":3,"file":"resolve.js","sourceRoot":"","sources":["../resolve.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,EAMN,uBAAuB,EACvB,sBAAsB,EACtB,gBAAgB,EAChB,KAAK,EACL,uBAAuB,EACvB,OAAO,EACP,kBAAkB,EAClB,kBAAkB,EAClB,QAAQ,EACR,UAAU,EACV,cAAc,EACd,SAAS,EACT,oBAAoB,EACpB,iBAAiB,EACjB,iBAAiB,GACjB,MAAM,kBAAkB,CAAA;AACzB,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AAG7E;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAW;IAC9C,OAAO,GAAG;SACR,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;AACvB,CAAC;AAED,wFAAwF;AACxF,MAAM,WAAW,GAAsB;IACtC,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACvD,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACxD,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;CAC1D,CAAA;AAED;;;GAGG;AACH,MAAM,YAAY,GAAsB;IACvC,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;IACxD,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;CAC1D,CAAA;AAED,sGAAsG;AACtG,SAAS,cAAc,CAAC,GAA8B;IACrD,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACrB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;IACtC,OAAO,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,IAAI,CAAA;AAChE,CAAC;AAED;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,OAAO,CAAC,CAAC,CAAA;AAkClF;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAA4B,EAAE;IAC/D,MAAM,QAAQ,GAAG;QAChB,oBAAoB,CAAe,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;QACzG,oBAAoB,CAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;QAC3G,oBAAoB,CAAe;YAClC,IAAI,EAAE,cAAc;YACpB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,EAAE,SAAS;YACzC,MAAM,EAAE,WAAW;SACnB,CAAC;KACF,CAAA;IACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACnB,QAAQ,CAAC,IAAI,CACZ,oBAAoB,CAAe;YAClC,IAAI,EAAE,OAAO;YACb,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,KAAK,CAAC;YACvC,UAAU,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,oCAAoC;YAC7E,MAAM,EAAE,YAAY;SACpB,CAAC,CACF,CAAA;IACF,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,cAAc,IAAI,EAAE,EAAE,CAAC;QAC7C,QAAQ,CAAC,IAAI,CACZ,oBAAoB,CAAe;YAClC,IAAI,EAAE,QAAQ,GAAG,EAAE;YACnB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,GAAG,CAAC;YACnC,MAAM,EAAE,WAAW;SACnB,CAAC,CACF,CAAA;IACF,CAAC;IAED,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;QAC1B,IAAI,OAAO,GAAG,iBAAiB,CAAe;YAC7C,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY;YACnC,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU;YACjD,MAAM,EAAE,sBAAsB;SAC9B,CAAC,CAAA;QACF,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3B,OAAO,GAAG,iBAAiB,CAAC,OAAO,EAAE;gBACpC,KAAK,EAAE,IAAI,CAAC,gBAAgB;gBAC5B,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI;gBACpC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,0BAA0B;aACvC,CAAC,CAAA;QACH,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,GAAG,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAA;IAC/D,CAAC;IAED,0GAA0G;IAC1G,IAAI,OAAO,GAAG,oBAAoB,CAAe;QAChD,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY;QACvC,MAAM,EAAE,WAAW;KACnB,CAAC,CAAA;IACF,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAC3B,OAAO,GAAG,iBAAiB,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,gBAAgB,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,EAAE,CAAC,CAAA;IAC7G,CAAC;IACD,OAAO;QACN,MAAM,EAAE,MAAM;QACd,WAAW,EAAE;YACZ,GAAG,QAAQ;YACX,OAAO;YACP,kBAAkB,CAAC;gBAClB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU;gBAC9C,MAAM,EAAE,uBAAuB;aAC/B,CAAC;SACF;KACD,CAAA;AACF,CAAC;AAED,2FAA2F;AAC3F,MAAM,UAAU,mBAAmB;IAClC,OAAO;QACN,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC;QACjD,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,YAAY,CAAC;QACxC,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QACxB,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;KACxB,CAAA;AACF,CAAC;AA+FD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,OAAgC,EAAE,SAAwB,EAAE;IAC3F,qGAAqG;IACrG,gGAAgG;IAChG,kFAAkF;IAClF,MAAM,gBAAgB,GACrB,MAAM,CAAC,gBAAgB,KAAK,KAAK;QAChC,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,CAAC,MAAM,CAAC,gBAAgB;YACzB,uBAAuB,CACtB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,CAAC,EAClC,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAClC,CAAC,CAAA;IACL,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,IAAI,CAAA;IACtD,MAAM,KAAK,GACV,MAAM,CAAC,KAAK;QACZ,iBAAiB,CAAC;YACjB,gBAAgB;YAChB,eAAe;YACf,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,cAAc,EAAE,MAAM,CAAC,cAAc;SACrC,CAAC,CAAA;IACH,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,EAAE,CAAA;IAEjE,+FAA+F;IAC/F,yFAAyF;IACzF,sFAAsF;IACtF,qGAAqG;IACrG,kEAAkE;IAClE,IAAI,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;IAC1B,IAAI,iBAAiB,GAAG,KAAK,CAAA;IAC7B,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,aAAa,KAAK,KAAK,EAAE,CAAC;QAC/C,MAAM,GAAG,GACR,MAAM,CAAC,aAAa,KAAK,SAAS,IAAI,MAAM,CAAC,aAAa,KAAK,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAA;QAC7G,iBAAiB,GAAG,GAAG,KAAK,eAAe,CAAA;QAC3C,MAAM,GAAG,eAAe,CAAC;YACxB,KAAK,EAAE,GAAG;YACV,WAAW,EAAE,iBAAiB,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,WAAW;YACvF,gBAAgB,EAAE,gBAAgB,IAAI,uBAAuB,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC;SACrG,CAAC,CAAA;IACH,CAAC;IACD,oGAAoG;IACpG,oGAAoG;IACpG,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,cAAc,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAEnG,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAA;IAEpG,IAAI,YAAY,GAAG,KAAK,CAAA;IACxB,IAAI,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,gBAAgB,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QACjF,YAAY,GAAG,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAA;IACzD,CAAC;IAED,MAAM,KAAK,GAA+B,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE;QAC9D,MAAM,KAAK,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;QAC3C,kGAAkG;QAClG,2CAA2C;QAC3C,IAAI,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAA;QACjD,kGAAkG;QAClG,wFAAwF;QACxF,IAAI,MAAM,CAAC,oBAAoB,EAAE,CAAC;YACjC,MAAM,YAAY,GAAG,KAAK,CAAC,aAAa,CAAC,IAAI,CAC5C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CACvF,CAAA;YACD,IAAI,CAAC,YAAY;gBAAE,MAAM,GAAG,MAAM,CAAC,iBAAiB,CAAA;QACrD,CAAC;QACD,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,CAAA;IACxB,CAAC,CAAC,CAAA;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAA;IAEhF,uGAAuG;IACvG,qGAAqG;IACrG,wEAAwE;IACxE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAA;IACjD,QAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC7B,KAAK,MAAM,MAAM,IAAI,KAAK;YAAE,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;IACrD,CAAC,CAAC,CAAA;IACF,MAAM,cAAc,GAAG,IAAI,KAAK,CAAS,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IACxE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,MAAM,GAAG,SAAS;YAAE,SAAQ;QACrC,MAAM,EAAE,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAChC,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,SAAQ;QAC9D,IAAI,IAAI,CAAC,MAAM,GAAG,cAAc,CAAC,EAAE,CAAE;YAAE,cAAc,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAA;IACxE,CAAC;IAED,MAAM,QAAQ,GAAqB,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC5D,MAAM,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAE,CAAA;QAC9C,OAAO;YACN,EAAE,EAAE,UAAU,CAAC,EAAE;YACjB,OAAO,EAAE,KAAK;YACd,cAAc,EAAE,GAAG;YACnB,UAAU,EAAE,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,IAAI,SAAS;YACzD,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,cAAc,CAAC,CAAC,CAAE,KAAK,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,IAAI;SACzF,CAAA;IACF,CAAC,CAAC,CAAA;IAEF,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,MAAM,EAAE,aAAa,EAAE,CAAA;AACjE,CAAC"}
package/out/types.d.ts CHANGED
@@ -21,6 +21,13 @@ export interface SourceRecord {
21
21
  address?: PostalAddress | null;
22
22
  phone?: string | null;
23
23
  email?: string | null;
24
+ /**
25
+ * Additional secondary-identifier fields, normalized — anything that helps tell two records apart
26
+ * or confirm they're the same beyond name/org/address/phone (an authorized-official name, a
27
+ * provider taxonomy, a license number…). Used as extra comparisons + corroborators when the model
28
+ * is built with matching `discriminators`. Keyed by a stable field name the model references.
29
+ */
30
+ attributes?: Record<string, string>;
24
31
  /** The original row, verbatim, for audit. */
25
32
  raw?: Record<string, string>;
26
33
  }
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AAEpF,oGAAoG;AACpG,MAAM,WAAW,YAAY;IAC5B,iEAAiE;IACjE,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,wDAAwD;IACxD,IAAI,CAAC,EAAE,UAAU,GAAG,IAAI,CAAA;IACxB,kEAAkE;IAClE,YAAY,CAAC,EAAE,gBAAgB,GAAG,IAAI,CAAA;IACtC,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,IAAI,CAAA;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,6CAA6C;IAC7C,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC5B;AAED,uGAAuG;AACvG,MAAM,WAAW,cAAc;IAC9B,yCAAyC;IACzC,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,qEAAqE;IACrE,cAAc,EAAE,YAAY,CAAA;IAC5B,gEAAgE;IAChE,UAAU,CAAC,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;IACpD;;;OAGG;IACH,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;CACvB;AAID,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,OAAO,CAAA;IACb,qDAAqD;IACrD,WAAW,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC7B;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,SAAS,CAAA;IACf,QAAQ,EAAE,YAAY,CAAA;IACtB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,wBAAwB;IACxC,IAAI,EAAE,mBAAmB,CAAA;IACzB,QAAQ,EAAE,cAAc,EAAE,CAAA;CAC1B"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AAEpF,oGAAoG;AACpG,MAAM,WAAW,YAAY;IAC5B,iEAAiE;IACjE,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,wDAAwD;IACxD,IAAI,CAAC,EAAE,UAAU,GAAG,IAAI,CAAA;IACxB,kEAAkE;IAClE,YAAY,CAAC,EAAE,gBAAgB,GAAG,IAAI,CAAA;IACtC,oDAAoD;IACpD,OAAO,CAAC,EAAE,aAAa,GAAG,IAAI,CAAA;IAC9B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACnC,6CAA6C;IAC7C,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC5B;AAED,uGAAuG;AACvG,MAAM,WAAW,cAAc;IAC9B,yCAAyC;IACzC,EAAE,EAAE,MAAM,CAAA;IACV,uDAAuD;IACvD,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,qEAAqE;IACrE,cAAc,EAAE,YAAY,CAAA;IAC5B,gEAAgE;IAChE,UAAU,CAAC,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;IACpD;;;OAGG;IACH,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;CACvB;AAID,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,OAAO,CAAA;IACb,qDAAqD;IACrD,WAAW,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC7B;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,SAAS,CAAA;IACf,QAAQ,EAAE,YAAY,CAAA;IACtB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,wBAAwB;IACxC,IAAI,EAAE,mBAAmB,CAAA;IACzB,QAAQ,EAAE,cAAc,EAAE,CAAA;CAC1B"}
package/out/types.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AA0DH,YAAY"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAiEH,YAAY"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mailwoman/registry",
3
- "version": "4.8.1",
3
+ "version": "4.10.0",
4
4
  "description": "The geocode-first record-matching application: resolve messy contact/organization records into canonical, geocoded entities (block → score → cluster) and export them as GeoJSON for spatial analysis. The clinic-funding use case mailwoman was built for.",
5
5
  "license": "AGPL-3.0-only",
6
6
  "repository": {
@@ -17,9 +17,11 @@
17
17
  "./ingest": "./out/ingest.js"
18
18
  },
19
19
  "dependencies": {
20
- "@mailwoman/match": "4.8.1",
21
- "@mailwoman/record": "4.8.1",
22
- "csv-parse": "^5.6.0"
20
+ "@mailwoman/address-id": "4.10.0",
21
+ "@mailwoman/match": "4.10.0",
22
+ "@mailwoman/record": "4.10.0",
23
+ "csv-parse": "^5.6.0",
24
+ "spliterator": "^2.0.0"
23
25
  },
24
26
  "devDependencies": {
25
27
  "@types/node": "^25.9.2"