@mailwoman/match 4.8.1 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,95 @@
1
+ # @mailwoman/match
2
+
3
+ **The geocode-first record matcher** — a three-stage entity resolution pipeline:
4
+ **block → score → cluster**. Resolves whether two records refer to the same
5
+ real-world entity by matching the resolved _place_ (not the address string),
6
+ then comparing names and other fields.
7
+
8
+ ```ts
9
+ import { block, scorePair, cluster } from "@mailwoman/match"
10
+
11
+ // Stage 1: Block — geo-first candidate generation
12
+ const pairs = block(records, {
13
+ keys: [defaultBlockingKeys.geoCell, defaultBlockingKeys.canonical],
14
+ })
15
+
16
+ // Stage 2: Score — Fellegi-Sunter probabilistic match
17
+ const { probability } = scorePair(recordA, recordB, { model })
18
+
19
+ // Stage 3: Cluster — connected-components resolution
20
+ const entities = cluster(records, links, { threshold: 0.5 })
21
+ ```
22
+
23
+ ## The three-stage pipeline
24
+
25
+ ### 1. Block — geo-first candidate generation
26
+
27
+ Instead of comparing every record to every other (O(n²)), blocking generates
28
+ candidate pairs via cheap, high-recall keys:
29
+
30
+ - **Geo cell key** — a generous H3 cell (~5.5 km) so two records at the same
31
+ place meet regardless of how their address is spelled
32
+ - **Canonical address key** — the formatter's deterministic match key
33
+ - **Exact keys** — phone, email, domain for exact-match joins
34
+
35
+ ### 2. Score — Fellegi-Sunter probabilistic matching
36
+
37
+ The `scorePair` function computes a match probability using:
38
+
39
+ - **String comparators** — Jaro-Winkler similarity over names and addresses
40
+ - **Distance comparison** — great-circle distance bucketed into same-building /
41
+ same-block / same-area / far
42
+ - **Fellegi-Sunter weight model** — agreement-level log-likelihood ratios
43
+ (`log2(m/u)`) converted to a probability
44
+ - **Label-free EM estimation** — `m`/`u` parameters learned via
45
+ expectation-maximization without labeled training data
46
+ - **Term frequency adjustment** — rare-value agreement (e.g., an unusual
47
+ organization name) up-weighted; common-value agreement down-weighted
48
+ - **Learned (GBT) scorer** — optional gradient-boosted tree scorer for
49
+ single-dataset dedup, available via `scorer` hook
50
+
51
+ ### 3. Cluster — connected-components
52
+
53
+ Non-transitive pairwise links (A↔B, B↔C, but not A↔C) are resolved into
54
+ canonical entities via union-find with path compression.
55
+
56
+ ## API
57
+
58
+ ```ts
59
+ // Blocking — generate candidate record pairs
60
+ block(records, opts: BlockOpts): { pairs: Pair[]; droppedBlocks: BlockDrop[] }
61
+
62
+ // Scoring — pairwise Fellegi-Sunter match probability
63
+ scorePair(a: SourceRecord, b: SourceRecord, opts: ScoreOpts): ScoreResult
64
+
65
+ // Clustering — resolve pairwise links into entities
66
+ cluster(records: SourceRecord[], links: ScoredLink[], opts: ClusterOpts): Entity[]
67
+
68
+ // Distance — great-circle comparison levels
69
+ haversineKm(lat1: number, lon1: number, lat2: number, lon2: number): number
70
+ distanceComparison(distKm: number): ComparisonLevel
71
+
72
+ // Learned scorer — GBT for single-dataset dedup
73
+ trainGBT(pairs: TrainingPair[], opts?: GBTOpts): GBTModel
74
+ gbtPredict(model: GBTModel, features: number[]): number
75
+
76
+ // Label-free EM parameter estimation
77
+ estimateParameters(pairs: Pair[]): EMResult
78
+
79
+ // Term frequency adjustment
80
+ withTermFrequency(model: FSModel, records: SourceRecord[]): FSModel
81
+ ```
82
+
83
+ ## Related
84
+
85
+ - [`@mailwoman/record`](../record) — record schemas and normalizers consumed by the matcher
86
+ - [`@mailwoman/formatter`](../formatter) — `canonicalKey` used for blocking
87
+ - [`@mailwoman/address-id`](../address-id) — complementary exact-match join key
88
+ - [`@mailwoman/registry`](../registry) — high-level `resolveEntities` that composes this pipeline
89
+ - [Geocode-First Record Matching](https://mailwoman.sister.software/articles/concepts/geocode-first-record-matching/)
90
+ - [Dedup Entity Truth](https://mailwoman.sister.software/articles/concepts/dedup-entity-truth/)
91
+ - [Spatial Expectation & Density](https://mailwoman.sister.software/articles/concepts/spatial-expectation-and-density/)
92
+
93
+ ## License
94
+
95
+ [AGPL-3.0-only](https://www.gnu.org/licenses/agpl-3.0.html)
@@ -32,6 +32,24 @@ export interface ClusterOptions {
32
32
  * knob.
33
33
  */
34
34
  threshold: number;
35
+ /**
36
+ * How the above-threshold link graph resolves into clusters:
37
+ *
38
+ * - `"single"` (default) — connected components (union-find). Fast; ANY above-threshold link fuses
39
+ * two groups, so a single weak link can over-merge unrelated records through a transitive
40
+ * chain.
41
+ * - `"average"` — agglomerative average-linkage refinement WITHIN each connected component: two
42
+ * sub-clusters merge only when the AVERAGE weight of the links between them clears the
43
+ * threshold, so a lone weak bridge no longer fuses two otherwise-dense groups. The documented
44
+ * over-merge fix (Dedupe). Falls back to single-linkage for any component larger than
45
+ * {@link maxAverageLinkageComponent}.
46
+ */
47
+ linkage?: "single" | "average";
48
+ /**
49
+ * Components larger than this skip the O(k³) average-linkage refine and keep single-linkage.
50
+ * Default 64.
51
+ */
52
+ maxAverageLinkageComponent?: number;
35
53
  }
36
54
  /**
37
55
  * Cluster records into canonical entities by connected components of the above-threshold link
@@ -1 +1 @@
1
- {"version":3,"file":"clustering.d.ts","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,iGAAiG;AACjG,MAAM,WAAW,UAAU,CAAC,CAAC;IAC5B,CAAC,EAAE,CAAC,CAAA;IACJ,CAAC,EAAE,CAAC,CAAA;IACJ,MAAM,EAAE,MAAM,CAAA;CACd;AAED,mCAAmC;AACnC,MAAM,WAAW,cAAc;IAC9B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAA;CACjB;AAED;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,GAAG,CAAC,EAAE,EAAE,CAgD7G;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,CAAC,SAAS,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,GAAG,CAAC,GAAG,SAAS,CAgBnF"}
1
+ {"version":3,"file":"clustering.d.ts","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,iGAAiG;AACjG,MAAM,WAAW,UAAU,CAAC,CAAC;IAC5B,CAAC,EAAE,CAAC,CAAA;IACJ,CAAC,EAAE,CAAC,CAAA;IACJ,MAAM,EAAE,MAAM,CAAA;CACd;AAED,mCAAmC;AACnC,MAAM,WAAW,cAAc;IAC9B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAA;IACjB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAA;IAC9B;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,CAAA;CACnC;AA6CD;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,GAAG,CAAC,EAAE,EAAE,CA4E7G;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,CAAC,SAAS,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,GAAG,CAAC,GAAG,SAAS,CAgBnF"}
package/out/clustering.js CHANGED
@@ -19,6 +19,47 @@
19
19
  * over-merge risk is already damped: blocking keeps candidate sets local, so chains can't run
20
20
  * across the whole dataset.
21
21
  */
22
+ /**
23
+ * Refine one connected component by agglomerative average-linkage. Starts with every member a
24
+ * singleton and repeatedly merges the cluster pair with the highest _average_ inter-cluster link
25
+ * weight while that average is at or above `threshold`; clusters with no link between them never
26
+ * merge. O(k³) in the component size, so callers gate it on a size cap.
27
+ */
28
+ function averageLinkageRefine(members, edges, threshold) {
29
+ const clusters = members.map((_, i) => [i]);
30
+ const crossAverage = (a, b) => {
31
+ const inA = new Set(a);
32
+ const inB = new Set(b);
33
+ let sum = 0;
34
+ let count = 0;
35
+ for (const [i, j, w] of edges) {
36
+ if ((inA.has(i) && inB.has(j)) || (inA.has(j) && inB.has(i))) {
37
+ sum += w;
38
+ count++;
39
+ }
40
+ }
41
+ return count > 0 ? sum / count : null;
42
+ };
43
+ for (;;) {
44
+ let bestAvg = -Infinity;
45
+ let bestPair = null;
46
+ for (let p = 0; p < clusters.length; p++) {
47
+ for (let q = p + 1; q < clusters.length; q++) {
48
+ const avg = crossAverage(clusters[p], clusters[q]);
49
+ if (avg !== null && avg > bestAvg) {
50
+ bestAvg = avg;
51
+ bestPair = [p, q];
52
+ }
53
+ }
54
+ }
55
+ if (!bestPair || bestAvg < threshold)
56
+ break;
57
+ const [p, q] = bestPair;
58
+ clusters[p] = clusters[p].concat(clusters[q]);
59
+ clusters.splice(q, 1);
60
+ }
61
+ return clusters.map((local) => local.map((i) => members[i]));
62
+ }
22
63
  /**
23
64
  * Cluster records into canonical entities by connected components of the above-threshold link
24
65
  * graph. Every input record lands in exactly one cluster — a record with no qualifying link is a
@@ -56,14 +97,18 @@ export function cluster(records, links, opts) {
56
97
  rank[rx]++;
57
98
  }
58
99
  };
100
+ // Collect ALL valid links (not just above-threshold): connected components form from the
101
+ // above-threshold ones, but the average-linkage refinement needs the full sub-graph — a weak or
102
+ // disagreeing below-threshold edge between two sub-clusters is exactly what should pull them apart.
103
+ const allLinks = [];
59
104
  for (const link of links) {
60
- if (link.weight < opts.threshold)
61
- continue;
62
105
  const ia = index.get(link.a);
63
106
  const ib = index.get(link.b);
64
107
  if (ia === undefined || ib === undefined)
65
108
  continue;
66
- union(ia, ib);
109
+ allLinks.push(link);
110
+ if (link.weight >= opts.threshold)
111
+ union(ia, ib);
67
112
  }
68
113
  const groups = new Map();
69
114
  records.forEach((record, i) => {
@@ -74,7 +119,33 @@ export function cluster(records, links, opts) {
74
119
  else
75
120
  groups.set(root, [record]);
76
121
  });
77
- return [...groups.values()];
122
+ if (opts.linkage !== "average")
123
+ return [...groups.values()];
124
+ // Average-linkage refinement: split each component where its sub-clusters are joined only by a weak
125
+ // bridge (the average inter-cluster link weight, over ALL edges between them, falls below the threshold).
126
+ const maxComponent = opts.maxAverageLinkageComponent ?? 64;
127
+ const localOf = new Map(); // member → its index within its own group
128
+ for (const members of groups.values())
129
+ members.forEach((m, i) => localOf.set(m, i));
130
+ const groupEdges = new Map();
131
+ for (const link of allLinks) {
132
+ const root = find(index.get(link.a));
133
+ if (root !== find(index.get(link.b)))
134
+ continue; // cross-component edge — not part of any refinement
135
+ const list = groupEdges.get(root) ?? [];
136
+ list.push([localOf.get(link.a), localOf.get(link.b), link.weight]);
137
+ groupEdges.set(root, list);
138
+ }
139
+ const result = [];
140
+ for (const [root, members] of groups) {
141
+ if (members.length <= 1 || members.length > maxComponent) {
142
+ result.push(members);
143
+ continue;
144
+ }
145
+ for (const sub of averageLinkageRefine(members, groupEdges.get(root) ?? [], opts.threshold))
146
+ result.push(sub);
147
+ }
148
+ return result;
78
149
  }
79
150
  /**
80
151
  * Pick a cluster's most complete record as its canonical representative — the one with the fewest
@@ -1 +1 @@
1
- {"version":3,"file":"clustering.js","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAkBH;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAI,OAAqB,EAAE,KAA8B,EAAE,IAAoB;IACrG,MAAM,KAAK,GAAG,IAAI,GAAG,EAAa,CAAA;IAClC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAA;IAEpD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAA;IACvC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAS,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAEtD,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE;QAClC,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,IAAI;YAAE,IAAI,GAAG,MAAM,CAAC,IAAI,CAAE,CAAA;QAClD,oBAAoB;QACpB,OAAO,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACvB,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YAChB,CAAC,GAAG,IAAI,CAAA;QACT,CAAC;QACD,OAAO,IAAI,CAAA;IACZ,CAAC,CAAA;IAED,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,CAAS,EAAQ,EAAE;QAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,IAAI,EAAE,KAAK,EAAE;YAAE,OAAM;QACrB,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aACrC,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aAC1C,CAAC;YACL,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;YACf,IAAI,CAAC,EAAE,CAAE,EAAE,CAAA;QACZ,CAAC;IACF,CAAC,CAAA;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS;YAAE,SAAQ;QAC1C,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS;YAAE,SAAQ;QAClD,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;IACd,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAA;IACrC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;;YACxB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;AAC5B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAmB,KAAmB;IACnE,IAAI,IAAmB,CAAA;IACvB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAA;IAEnB,KAAK,MAAM,MAAM,IAAI,KAAK,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAA;QACd,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,EAAE;gBAAE,MAAM,EAAE,CAAA;QACpE,CAAC;QACD,IAAI,MAAM,GAAG,UAAU,EAAE,CAAC;YACzB,UAAU,GAAG,MAAM,CAAA;YACnB,IAAI,GAAG,MAAM,CAAA;QACd,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC"}
1
+ {"version":3,"file":"clustering.js","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAoCH;;;;;GAKG;AACH,SAAS,oBAAoB,CAAI,OAAY,EAAE,KAAsC,EAAE,SAAiB;IACvG,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC3C,MAAM,YAAY,GAAG,CAAC,CAAW,EAAE,CAAW,EAAiB,EAAE;QAChE,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA;QACtB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA;QACtB,IAAI,GAAG,GAAG,CAAC,CAAA;QACX,IAAI,KAAK,GAAG,CAAC,CAAA;QACb,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9D,GAAG,IAAI,CAAC,CAAA;gBACR,KAAK,EAAE,CAAA;YACR,CAAC;QACF,CAAC;QACD,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAA;IACtC,CAAC,CAAA;IAED,SAAS,CAAC;QACT,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAA;QACvB,IAAI,QAAQ,GAA4B,IAAI,CAAA;QAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAE,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;gBACpD,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,GAAG,OAAO,EAAE,CAAC;oBACnC,OAAO,GAAG,GAAG,CAAA;oBACb,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;QACF,CAAC;QACD,IAAI,CAAC,QAAQ,IAAI,OAAO,GAAG,SAAS;YAAE,MAAK;QAC3C,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,QAAQ,CAAA;QACvB,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;QAC/C,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IACtB,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAI,OAAqB,EAAE,KAA8B,EAAE,IAAoB;IACrG,MAAM,KAAK,GAAG,IAAI,GAAG,EAAa,CAAA;IAClC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAA;IAEpD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAA;IACvC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAS,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAEtD,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE;QAClC,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,IAAI;YAAE,IAAI,GAAG,MAAM,CAAC,IAAI,CAAE,CAAA;QAClD,oBAAoB;QACpB,OAAO,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACvB,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YAChB,CAAC,GAAG,IAAI,CAAA;QACT,CAAC;QACD,OAAO,IAAI,CAAA;IACZ,CAAC,CAAA;IAED,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,CAAS,EAAQ,EAAE;QAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,IAAI,EAAE,KAAK,EAAE;YAAE,OAAM;QACrB,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aACrC,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aAC1C,CAAC;YACL,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;YACf,IAAI,CAAC,EAAE,CAAE,EAAE,CAAA;QACZ,CAAC;IACF,CAAC,CAAA;IAED,yFAAyF;IACzF,gGAAgG;IAChG,oGAAoG;IACpG,MAAM,QAAQ,GAAoB,EAAE,CAAA;IACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS;YAAE,SAAQ;QAClD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnB,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS;YAAE,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;IACjD,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAA;IACrC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;;YACxB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS;QAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;IAE3D,oGAAoG;IACpG,0GAA0G;IAC1G,MAAM,YAAY,GAAG,IAAI,CAAC,0BAA0B,IAAI,EAAE,CAAA;IAC1D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAa,CAAA,CAAC,0CAA0C;IAC/E,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,EAAE;QAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;IACnF,MAAM,UAAU,GAAG,IAAI,GAAG,EAA2C,CAAA;IACrE,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,CAAA;QACrC,IAAI,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC;YAAE,SAAQ,CAAC,oDAAoD;QACpG,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;QACvC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;QACpE,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC3B,CAAC;IAED,MAAM,MAAM,GAAU,EAAE,CAAA;IACxB,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;YAC1D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACpB,SAAQ;QACT,CAAC;QACD,KAAK,MAAM,GAAG,IAAI,oBAAoB,CAAC,OAAO,EAAE,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC9G,CAAC;IACD,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAmB,KAAmB;IACnE,IAAI,IAAmB,CAAA;IACvB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAA;IAEnB,KAAK,MAAM,MAAM,IAAI,KAAK,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAA;QACd,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,EAAE;gBAAE,MAAM,EAAE,CAAA;QACpE,CAAC;QACD,IAAI,MAAM,GAAG,UAAU,EAAE,CAAC;YACzB,UAAU,GAAG,MAAM,CAAA;YACnB,IAAI,GAAG,MAAM,CAAA;QACd,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC"}
package/out/distance.d.ts CHANGED
@@ -38,4 +38,36 @@ export declare function distanceComparison<R>(config: {
38
38
  * error.
39
39
  */
40
40
  export declare const DEFAULT_DISTANCE_LEVELS: ComparisonLevel[];
41
+ /**
42
+ * The collapsed spatial-agreement comparison — ONE non-redundant geographic signal.
43
+ *
44
+ * The first matcher carried TWO spatial comparisons: canonical-address-key similarity AND
45
+ * great-circle distance. They double-count — an exact key match implies distance ≈ 0, so a
46
+ * co-located pair banked the same evidence twice, and the redundant vote is exactly what
47
+ * over-merges distinct providers at a shared clinic address. This folds them into one comparison:
48
+ *
49
+ * - **level 0 `same-key`** — an EXACT canonical-key match: the strongest tier, and the one the
50
+ * inverse-address-frequency adjustment rides ({@link withTermFrequency} on level 0), so agreement
51
+ * on a crowded shared key is down-weighted toward worthless while a rare one keeps full weight.
52
+ * - **levels 1…n** — great-circle distance buckets for pairs whose keys DIFFER, so "123 Main St" vs
53
+ * "123 Main Street Apt 2" that geocode to the same rooftop still earns near-agreement (the
54
+ * geo-first point of the whole design).
55
+ * - Keys differ and no usable coordinate → no evidence.
56
+ *
57
+ * Exactly one spatial vote, no redundancy. Pass {@link DEFAULT_SPATIAL_LEVELS} or your own; index 0
58
+ * must be the exact-key tier, indices 1…n the distance buckets nearest → far by `maxKm` (last =
59
+ * `far`).
60
+ */
61
+ export declare function spatialComparison<R>(config: {
62
+ name: string;
63
+ key: (record: R) => string | null | undefined;
64
+ coordinate: (record: R) => LatLon | null | undefined;
65
+ levels: ComparisonLevel[];
66
+ }): Comparison<R>;
67
+ /**
68
+ * Default levels for {@link spatialComparison}: an exact same-key tier on top of the distance
69
+ * buckets. `m`/`u` are EM-estimable seeds (m decreasing, u increasing down the tiers; each column ≈
70
+ * sums to 1).
71
+ */
72
+ export declare const DEFAULT_SPATIAL_LEVELS: ComparisonLevel[];
41
73
  //# sourceMappingURL=distance.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"distance.d.ts","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AAKtE,uEAAuE;AACvE,wBAAgB,WAAW,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CASxD;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,EAAE;IAC7C,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IACjD,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,GAAG,UAAU,CAAC,CAAC,CAAC,CAmBhB;AAED;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,EAAE,eAAe,EAKpD,CAAA"}
1
+ {"version":3,"file":"distance.d.ts","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AAKtE,uEAAuE;AACvE,wBAAgB,WAAW,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CASxD;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,EAAE;IAC7C,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IACjD,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,GAAG,UAAU,CAAC,CAAC,CAAC,CAmBhB;AAED;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,EAAE,eAAe,EAKpD,CAAA;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,EAAE;IAC5C,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IAC7C,UAAU,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IACpD,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,GAAG,UAAU,CAAC,CAAC,CAAC,CAuBhB;AAED;;;;GAIG;AACH,eAAO,MAAM,sBAAsB,EAAE,eAAe,EAMnD,CAAA"}
package/out/distance.js CHANGED
@@ -65,4 +65,59 @@ export const DEFAULT_DISTANCE_LEVELS = [
65
65
  { label: "same-area", maxKm: 5, m: 0.08, u: 0.2 },
66
66
  { label: "far", m: 0.02, u: 0.779 },
67
67
  ];
68
+ /**
69
+ * The collapsed spatial-agreement comparison — ONE non-redundant geographic signal.
70
+ *
71
+ * The first matcher carried TWO spatial comparisons: canonical-address-key similarity AND
72
+ * great-circle distance. They double-count — an exact key match implies distance ≈ 0, so a
73
+ * co-located pair banked the same evidence twice, and the redundant vote is exactly what
74
+ * over-merges distinct providers at a shared clinic address. This folds them into one comparison:
75
+ *
76
+ * - **level 0 `same-key`** — an EXACT canonical-key match: the strongest tier, and the one the
77
+ * inverse-address-frequency adjustment rides ({@link withTermFrequency} on level 0), so agreement
78
+ * on a crowded shared key is down-weighted toward worthless while a rare one keeps full weight.
79
+ * - **levels 1…n** — great-circle distance buckets for pairs whose keys DIFFER, so "123 Main St" vs
80
+ * "123 Main Street Apt 2" that geocode to the same rooftop still earns near-agreement (the
81
+ * geo-first point of the whole design).
82
+ * - Keys differ and no usable coordinate → no evidence.
83
+ *
84
+ * Exactly one spatial vote, no redundancy. Pass {@link DEFAULT_SPATIAL_LEVELS} or your own; index 0
85
+ * must be the exact-key tier, indices 1…n the distance buckets nearest → far by `maxKm` (last =
86
+ * `far`).
87
+ */
88
+ export function spatialComparison(config) {
89
+ const valid = (c) => !!c && Number.isFinite(c.latitude) && Number.isFinite(c.longitude);
90
+ return {
91
+ name: config.name,
92
+ levels: config.levels,
93
+ assess(a, b) {
94
+ const ka = config.key(a);
95
+ const kb = config.key(b);
96
+ if (ka && kb && ka.trim() && ka === kb)
97
+ return 0; // exact canonical-key match — one strong vote
98
+ const ca = config.coordinate(a);
99
+ const cb = config.coordinate(b);
100
+ if (!valid(ca) || !valid(cb))
101
+ return -1; // keys differ and no coordinate → no spatial evidence
102
+ const km = haversineKm(ca, cb);
103
+ for (let i = 1; i < config.levels.length; i++) {
104
+ if (km <= (config.levels[i].maxKm ?? Infinity))
105
+ return i;
106
+ }
107
+ return config.levels.length - 1;
108
+ },
109
+ };
110
+ }
111
+ /**
112
+ * Default levels for {@link spatialComparison}: an exact same-key tier on top of the distance
113
+ * buckets. `m`/`u` are EM-estimable seeds (m decreasing, u increasing down the tiers; each column ≈
114
+ * sums to 1).
115
+ */
116
+ export const DEFAULT_SPATIAL_LEVELS = [
117
+ { label: "same-key", m: 0.85, u: 0.01 },
118
+ { label: "same-building", maxKm: 0.05, m: 0.1, u: 0.02 },
119
+ { label: "same-block", maxKm: 0.5, m: 0.03, u: 0.05 },
120
+ { label: "same-area", maxKm: 5, m: 0.015, u: 0.2 },
121
+ { label: "far", m: 0.005, u: 0.72 },
122
+ ];
68
123
  //# sourceMappingURL=distance.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"distance.js","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAKH,sCAAsC;AACtC,MAAM,eAAe,GAAG,SAAS,CAAA;AAEjC,uEAAuE;AACvE,MAAM,UAAU,WAAW,CAAC,CAAS,EAAE,CAAS;IAC/C,MAAM,KAAK,GAAG,CAAC,OAAe,EAAU,EAAE,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAA;IACpE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAE9B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAA;IAC7F,OAAO,CAAC,GAAG,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAClE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAI,MAIrC;IACA,MAAM,KAAK,GAAG,CAAC,CAA4B,EAAe,EAAE,CAC3D,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAEnE,OAAO;QACN,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,CAAC,CAAC,EAAE,CAAC;YACV,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,OAAO,CAAC,CAAC,CAAA;YAEvC,MAAM,EAAE,GAAG,WAAW,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,QAAQ,CAAC;oBAAE,OAAO,CAAC,CAAA;YAC1D,CAAC;YACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAChC,CAAC;KACD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACzD,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;IACzD,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACpD,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE;IACjD,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE;CACnC,CAAA"}
1
+ {"version":3,"file":"distance.js","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAKH,sCAAsC;AACtC,MAAM,eAAe,GAAG,SAAS,CAAA;AAEjC,uEAAuE;AACvE,MAAM,UAAU,WAAW,CAAC,CAAS,EAAE,CAAS;IAC/C,MAAM,KAAK,GAAG,CAAC,OAAe,EAAU,EAAE,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAA;IACpE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAE9B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAA;IAC7F,OAAO,CAAC,GAAG,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAClE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAI,MAIrC;IACA,MAAM,KAAK,GAAG,CAAC,CAA4B,EAAe,EAAE,CAC3D,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAEnE,OAAO;QACN,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,CAAC,CAAC,EAAE,CAAC;YACV,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,OAAO,CAAC,CAAC,CAAA;YAEvC,MAAM,EAAE,GAAG,WAAW,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,QAAQ,CAAC;oBAAE,OAAO,CAAC,CAAA;YAC1D,CAAC;YACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAChC,CAAC;KACD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACzD,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;IACzD,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACpD,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE;IACjD,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE;CACnC,CAAA;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,iBAAiB,CAAI,MAKpC;IACA,MAAM,KAAK,GAAG,CAAC,CAA4B,EAAe,EAAE,CAC3D,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAEnE,OAAO;QACN,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,CAAC,CAAC,EAAE,CAAC;YACV,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YACxB,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YACxB,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE;gBAAE,OAAO,CAAC,CAAA,CAAC,8CAA8C;YAE/F,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;YAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;YAC/B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,OAAO,CAAC,CAAC,CAAA,CAAC,sDAAsD;YAE9F,MAAM,EAAE,GAAG,WAAW,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,QAAQ,CAAC;oBAAE,OAAO,CAAC,CAAA;YAC1D,CAAC;YACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAChC,CAAC;KACD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAsB;IACxD,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACvC,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACxD,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACrD,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE;IAClD,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE;CACnC,CAAA"}
package/out/gbt.d.ts ADDED
@@ -0,0 +1,51 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Gradient-boosted shallow regression trees (logistic loss), pure-Node — the learned scorer #603
7
+ * names: an offline-trained model (this trainer, or XGBoost/LightGBM exported to the same
8
+ * {@link GBT} shape) plus a trivial evaluator, no new runtime dependency. It sits behind the
9
+ * matcher's `scorer` hook to replace the Fellegi-Sunter link weight where labels (or a held-out
10
+ * truth like an NPI) let a tree learn the over-merge signature the hand-weights miss.
11
+ *
12
+ * This module is feature-agnostic: feature vectors are caller-defined `number[]` (the record
13
+ * matcher builds them in `@mailwoman/registry`'s learned-scorer module — one-hot agreement
14
+ * levels
15
+ *
16
+ * - Interaction terms + corpus statistics). It only fits ({@link trainGBT}) and scores
17
+ * ({@link gbtScore}). The trained {@link GBT} is plain JSON (`{trees, lr, base}`), so a model
18
+ * trains offline once and ships as a data file.
19
+ */
20
+ /** A trained tree: an internal split (feature `f` ≤ `thr` → `lo`, else `hi`) or a `leaf` value. */
21
+ export type TreeNode = {
22
+ leaf: number;
23
+ } | {
24
+ f: number;
25
+ thr: number;
26
+ lo: TreeNode;
27
+ hi: TreeNode;
28
+ };
29
+ /**
30
+ * Per-feature candidate split thresholds: midpoints for few-valued/binary features, quantiles for
31
+ * continuous.
32
+ */
33
+ export declare function buildThresholds(X: number[][]): number[][];
34
+ /** A trained gradient-boosted-tree model: an additive ensemble over a base log-odds. Plain JSON. */
35
+ export interface GBT {
36
+ trees: TreeNode[];
37
+ lr: number;
38
+ base: number;
39
+ }
40
+ /** Hyperparameters for {@link trainGBT}. */
41
+ export interface GBTOpts {
42
+ rounds: number;
43
+ depth: number;
44
+ lr: number;
45
+ minLeaf: number;
46
+ }
47
+ /** Gradient-boosted regression trees on logistic loss, with per-sample class weights `w`. */
48
+ export declare function trainGBT(X: number[][], y: number[], w: number[], opts: GBTOpts): GBT;
49
+ /** GBT score (logit) for one feature vector. Threshold-comparable like the FS weight. */
50
+ export declare function gbtScore(m: GBT, x: number[]): number;
51
+ //# sourceMappingURL=gbt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gbt.d.ts","sourceRoot":"","sources":["../gbt.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,mGAAmG;AACnG,MAAM,MAAM,QAAQ,GAAG;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,QAAQ,CAAC;IAAC,EAAE,EAAE,QAAQ,CAAA;CAAE,CAAA;AAIhG;;;GAGG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,EAAE,CAoBzD;AA0ED,oGAAoG;AACpG,MAAM,WAAW,GAAG;IACnB,KAAK,EAAE,QAAQ,EAAE,CAAA;IACjB,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,MAAM,CAAA;CACZ;AAED,4CAA4C;AAC5C,MAAM,WAAW,OAAO;IACvB,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,EAAE,EAAE,MAAM,CAAA;IACV,OAAO,EAAE,MAAM,CAAA;CACf;AAED,6FAA6F;AAC7F,wBAAgB,QAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,GAAG,GAAG,CAqBpF;AAED,yFAAyF;AACzF,wBAAgB,QAAQ,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAIpD"}
package/out/gbt.js ADDED
@@ -0,0 +1,149 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Gradient-boosted shallow regression trees (logistic loss), pure-Node — the learned scorer #603
7
+ * names: an offline-trained model (this trainer, or XGBoost/LightGBM exported to the same
8
+ * {@link GBT} shape) plus a trivial evaluator, no new runtime dependency. It sits behind the
9
+ * matcher's `scorer` hook to replace the Fellegi-Sunter link weight where labels (or a held-out
10
+ * truth like an NPI) let a tree learn the over-merge signature the hand-weights miss.
11
+ *
12
+ * This module is feature-agnostic: feature vectors are caller-defined `number[]` (the record
13
+ * matcher builds them in `@mailwoman/registry`'s learned-scorer module — one-hot agreement
14
+ * levels
15
+ *
16
+ * - Interaction terms + corpus statistics). It only fits ({@link trainGBT}) and scores
17
+ * ({@link gbtScore}). The trained {@link GBT} is plain JSON (`{trees, lr, base}`), so a model
18
+ * trains offline once and ships as a data file.
19
+ */
20
+ const sigmoid = (z) => 1 / (1 + Math.exp(-Math.max(-30, Math.min(30, z))));
21
+ /**
22
+ * Per-feature candidate split thresholds: midpoints for few-valued/binary features, quantiles for
23
+ * continuous.
24
+ */
25
+ export function buildThresholds(X) {
26
+ const dim = X[0]?.length ?? 0;
27
+ const out = [];
28
+ for (let f = 0; f < dim; f++) {
29
+ const vals = X.map((r) => r[f]);
30
+ const uniq = [...new Set(vals)].sort((p, q) => p - q);
31
+ if (uniq.length <= 1) {
32
+ out.push([]);
33
+ }
34
+ else if (uniq.length <= 5) {
35
+ const t = [];
36
+ for (let k = 0; k < uniq.length - 1; k++)
37
+ t.push((uniq[k] + uniq[k + 1]) / 2);
38
+ out.push(t);
39
+ }
40
+ else {
41
+ const sorted = [...vals].sort((p, q) => p - q);
42
+ const t = [];
43
+ for (let q = 1; q <= 6; q++)
44
+ t.push(sorted[Math.floor((q / 7) * (sorted.length - 1))]);
45
+ out.push([...new Set(t)]);
46
+ }
47
+ }
48
+ return out;
49
+ }
50
+ /** Weighted SSE of target `g` over `rows` around their weighted mean. */
51
+ function nodeSSE(rows, g, w) {
52
+ let wsum = 0;
53
+ let wg = 0;
54
+ for (const i of rows) {
55
+ wsum += w[i];
56
+ wg += w[i] * g[i];
57
+ }
58
+ const mean = wsum > 0 ? wg / wsum : 0;
59
+ let sse = 0;
60
+ for (const i of rows) {
61
+ const d = g[i] - mean;
62
+ sse += w[i] * d * d;
63
+ }
64
+ return sse;
65
+ }
66
+ /** Greedy depth-limited weighted regression tree on target `g` (the boosting residual). */
67
+ function fitRegTree(rows, X, g, w, thresholds, depth, minLeaf) {
68
+ let wsum = 0;
69
+ let wg = 0;
70
+ for (const i of rows) {
71
+ wsum += w[i];
72
+ wg += w[i] * g[i];
73
+ }
74
+ const leaf = wsum > 0 ? wg / wsum : 0;
75
+ if (depth === 0 || rows.length < 2 * minLeaf)
76
+ return { leaf };
77
+ const parentSSE = nodeSSE(rows, g, w);
78
+ let bestGain = 1e-12;
79
+ let bestF = -1;
80
+ let bestThr = 0;
81
+ let bestLo = [];
82
+ let bestHi = [];
83
+ for (let f = 0; f < thresholds.length; f++) {
84
+ for (const thr of thresholds[f]) {
85
+ const lo = [];
86
+ const hi = [];
87
+ for (const i of rows)
88
+ (X[i][f] <= thr ? lo : hi).push(i);
89
+ if (lo.length < minLeaf || hi.length < minLeaf)
90
+ continue;
91
+ const gain = parentSSE - (nodeSSE(lo, g, w) + nodeSSE(hi, g, w));
92
+ if (gain > bestGain) {
93
+ bestGain = gain;
94
+ bestF = f;
95
+ bestThr = thr;
96
+ bestLo = lo;
97
+ bestHi = hi;
98
+ }
99
+ }
100
+ }
101
+ if (bestF < 0)
102
+ return { leaf };
103
+ return {
104
+ f: bestF,
105
+ thr: bestThr,
106
+ lo: fitRegTree(bestLo, X, g, w, thresholds, depth - 1, minLeaf),
107
+ hi: fitRegTree(bestHi, X, g, w, thresholds, depth - 1, minLeaf),
108
+ };
109
+ }
110
+ function predictTree(t, x) {
111
+ let n = t;
112
+ while ("f" in n)
113
+ n = x[n.f] <= n.thr ? n.lo : n.hi;
114
+ return n.leaf;
115
+ }
116
+ /** Gradient-boosted regression trees on logistic loss, with per-sample class weights `w`. */
117
+ export function trainGBT(X, y, w, opts) {
118
+ const N = X.length;
119
+ const thresholds = buildThresholds(X);
120
+ const rowsAll = Array.from({ length: N }, (_, i) => i);
121
+ let wpos = 0;
122
+ let wtot = 0;
123
+ for (let i = 0; i < N; i++) {
124
+ wtot += w[i];
125
+ if (y[i] === 1)
126
+ wpos += w[i];
127
+ }
128
+ const base = Math.log((wpos + 1) / (wtot - wpos + 1)); // weighted base log-odds
129
+ const F = new Array(N).fill(base);
130
+ const trees = [];
131
+ for (let m = 0; m < opts.rounds; m++) {
132
+ const g = new Array(N);
133
+ for (let i = 0; i < N; i++)
134
+ g[i] = y[i] - sigmoid(F[i]); // negative gradient of logistic loss
135
+ const tree = fitRegTree(rowsAll, X, g, w, thresholds, opts.depth, opts.minLeaf);
136
+ for (let i = 0; i < N; i++)
137
+ F[i] += opts.lr * predictTree(tree, X[i]);
138
+ trees.push(tree);
139
+ }
140
+ return { trees, lr: opts.lr, base };
141
+ }
142
+ /** GBT score (logit) for one feature vector. Threshold-comparable like the FS weight. */
143
+ export function gbtScore(m, x) {
144
+ let f = m.base;
145
+ for (const t of m.trees)
146
+ f += m.lr * predictTree(t, x);
147
+ return f;
148
+ }
149
+ //# sourceMappingURL=gbt.js.map
package/out/gbt.js.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gbt.js","sourceRoot":"","sources":["../gbt.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAKH,MAAM,OAAO,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAE1F;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,CAAa;IAC5C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;IAC7B,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA;QAChC,MAAM,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACrD,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACtB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QACb,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,GAAa,EAAE,CAAA;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE;gBAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAE,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YAC/E,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACZ,CAAC;aAAM,CAAC;YACP,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;YAC9C,MAAM,CAAC,GAAa,EAAE,CAAA;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;gBAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA;YACvF,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAC1B,CAAC;IACF,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,yEAAyE;AACzE,SAAS,OAAO,CAAC,IAAc,EAAE,CAAW,EAAE,CAAW;IACxD,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,IAAI,EAAE,GAAG,CAAC,CAAA;IACV,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;QACb,EAAE,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IACrC,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,GAAG,IAAI,CAAA;QACtB,GAAG,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,GAAG,CAAC,CAAA;IACrB,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,2FAA2F;AAC3F,SAAS,UAAU,CAClB,IAAc,EACd,CAAa,EACb,CAAW,EACX,CAAW,EACX,UAAsB,EACtB,KAAa,EACb,OAAe;IAEf,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,IAAI,EAAE,GAAG,CAAC,CAAA;IACV,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;QACb,EAAE,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IACrC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,OAAO;QAAE,OAAO,EAAE,IAAI,EAAE,CAAA;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;IACrC,IAAI,QAAQ,GAAG,KAAK,CAAA;IACpB,IAAI,KAAK,GAAG,CAAC,CAAC,CAAA;IACd,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,MAAM,GAAa,EAAE,CAAA;IACzB,IAAI,MAAM,GAAa,EAAE,CAAA;IACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,CAAC,CAAE,EAAE,CAAC;YAClC,MAAM,EAAE,GAAa,EAAE,CAAA;YACvB,MAAM,EAAE,GAAa,EAAE,CAAA;YACvB,KAAK,MAAM,CAAC,IAAI,IAAI;gBAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC1D,IAAI,EAAE,CAAC,MAAM,GAAG,OAAO,IAAI,EAAE,CAAC,MAAM,GAAG,OAAO;gBAAE,SAAQ;YACxD,MAAM,IAAI,GAAG,SAAS,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;YAChE,IAAI,IAAI,GAAG,QAAQ,EAAE,CAAC;gBACrB,QAAQ,GAAG,IAAI,CAAA;gBACf,KAAK,GAAG,CAAC,CAAA;gBACT,OAAO,GAAG,GAAG,CAAA;gBACb,MAAM,GAAG,EAAE,CAAA;gBACX,MAAM,GAAG,EAAE,CAAA;YACZ,CAAC;QACF,CAAC;IACF,CAAC;IACD,IAAI,KAAK,GAAG,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,CAAA;IAC9B,OAAO;QACN,CAAC,EAAE,KAAK;QACR,GAAG,EAAE,OAAO;QACZ,EAAE,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,KAAK,GAAG,CAAC,EAAE,OAAO,CAAC;QAC/D,EAAE,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,KAAK,GAAG,CAAC,EAAE,OAAO,CAAC;KAC/D,CAAA;AACF,CAAC;AAED,SAAS,WAAW,CAAC,CAAW,EAAE,CAAW;IAC5C,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,GAAG,IAAI,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACnD,OAAO,CAAC,CAAC,IAAI,CAAA;AACd,CAAC;AAiBD,6FAA6F;AAC7F,MAAM,UAAU,QAAQ,CAAC,CAAa,EAAE,CAAW,EAAE,CAAW,EAAE,IAAa;IAC9E,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,UAAU,GAAG,eAAe,CAAC,CAAC,CAAC,CAAA;IACrC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAA;IACtD,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;QACb,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;YAAE,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;IAC9B,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA,CAAC,yBAAyB;IAC/E,MAAM,CAAC,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACzC,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA,CAAC,qCAAqC;QAC/F,MAAM,IAAI,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAA;QAC/E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,CAAC,CAAC,CAAC,CAAE,IAAI,IAAI,CAAC,EAAE,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA;QACvE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACjB,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,IAAI,EAAE,CAAA;AACpC,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,QAAQ,CAAC,CAAM,EAAE,CAAW;IAC3C,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK;QAAE,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IACtD,OAAO,CAAC,CAAA;AACT,CAAC"}
package/out/index.d.ts CHANGED
@@ -21,5 +21,6 @@ export * from "./comparators.js";
21
21
  export * from "./distance.js";
22
22
  export * from "./em.js";
23
23
  export * from "./fellegi-sunter.js";
24
+ export * from "./gbt.js";
24
25
  export * from "./tf.js";
25
26
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,SAAS,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,UAAU,CAAA;AACxB,cAAc,SAAS,CAAA"}
package/out/index.js CHANGED
@@ -21,5 +21,6 @@ export * from "./comparators.js";
21
21
  export * from "./distance.js";
22
22
  export * from "./em.js";
23
23
  export * from "./fellegi-sunter.js";
24
+ export * from "./gbt.js";
24
25
  export * from "./tf.js";
25
26
  //# sourceMappingURL=index.js.map
package/out/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,SAAS,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,UAAU,CAAA;AACxB,cAAc,SAAS,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mailwoman/match",
3
- "version": "4.8.1",
3
+ "version": "4.11.0",
4
4
  "description": "The geocode-first record matcher: block → score → cluster. This first cut ships the string comparators (Jaro / Jaro-Winkler + an edit-distance fallback for compound surnames) that the Fellegi-Sunter scorer is built on.",
5
5
  "license": "AGPL-3.0-only",
6
6
  "repository": {