@mailwoman/match 4.8.1 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,24 @@ export interface ClusterOptions {
32
32
  * knob.
33
33
  */
34
34
  threshold: number;
35
+ /**
36
+ * How the above-threshold link graph resolves into clusters:
37
+ *
38
+ * - `"single"` (default) — connected components (union-find). Fast; ANY above-threshold link fuses
39
+ * two groups, so a single weak link can over-merge unrelated records through a transitive
40
+ * chain.
41
+ * - `"average"` — agglomerative average-linkage refinement WITHIN each connected component: two
42
+ * sub-clusters merge only when the AVERAGE weight of the links between them clears the
43
+ * threshold, so a lone weak bridge no longer fuses two otherwise-dense groups. The documented
44
+ * over-merge fix (Dedupe). Falls back to single-linkage for any component larger than
45
+ * {@link maxAverageLinkageComponent}.
46
+ */
47
+ linkage?: "single" | "average";
48
+ /**
49
+ * Components larger than this skip the O(k³) average-linkage refine and keep single-linkage.
50
+ * Default 64.
51
+ */
52
+ maxAverageLinkageComponent?: number;
35
53
  }
36
54
  /**
37
55
  * Cluster records into canonical entities by connected components of the above-threshold link
@@ -1 +1 @@
1
- {"version":3,"file":"clustering.d.ts","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,iGAAiG;AACjG,MAAM,WAAW,UAAU,CAAC,CAAC;IAC5B,CAAC,EAAE,CAAC,CAAA;IACJ,CAAC,EAAE,CAAC,CAAA;IACJ,MAAM,EAAE,MAAM,CAAA;CACd;AAED,mCAAmC;AACnC,MAAM,WAAW,cAAc;IAC9B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAA;CACjB;AAED;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,GAAG,CAAC,EAAE,EAAE,CAgD7G;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,CAAC,SAAS,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,GAAG,CAAC,GAAG,SAAS,CAgBnF"}
1
+ {"version":3,"file":"clustering.d.ts","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,iGAAiG;AACjG,MAAM,WAAW,UAAU,CAAC,CAAC;IAC5B,CAAC,EAAE,CAAC,CAAA;IACJ,CAAC,EAAE,CAAC,CAAA;IACJ,MAAM,EAAE,MAAM,CAAA;CACd;AAED,mCAAmC;AACnC,MAAM,WAAW,cAAc;IAC9B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAA;IACjB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAA;IAC9B;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,CAAA;CACnC;AA6CD;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,GAAG,CAAC,EAAE,EAAE,CA4E7G;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,CAAC,SAAS,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,GAAG,CAAC,GAAG,SAAS,CAgBnF"}
package/out/clustering.js CHANGED
@@ -19,6 +19,47 @@
19
19
  * over-merge risk is already damped: blocking keeps candidate sets local, so chains can't run
20
20
  * across the whole dataset.
21
21
  */
22
+ /**
23
+ * Refine one connected component by agglomerative average-linkage. Starts with every member a
24
+ * singleton and repeatedly merges the cluster pair with the highest _average_ inter-cluster link
25
+ * weight while that average is at or above `threshold`; clusters with no link between them never
26
+ * merge. O(k³) in the component size, so callers gate it on a size cap.
27
+ */
28
+ function averageLinkageRefine(members, edges, threshold) {
29
+ const clusters = members.map((_, i) => [i]);
30
+ const crossAverage = (a, b) => {
31
+ const inA = new Set(a);
32
+ const inB = new Set(b);
33
+ let sum = 0;
34
+ let count = 0;
35
+ for (const [i, j, w] of edges) {
36
+ if ((inA.has(i) && inB.has(j)) || (inA.has(j) && inB.has(i))) {
37
+ sum += w;
38
+ count++;
39
+ }
40
+ }
41
+ return count > 0 ? sum / count : null;
42
+ };
43
+ for (;;) {
44
+ let bestAvg = -Infinity;
45
+ let bestPair = null;
46
+ for (let p = 0; p < clusters.length; p++) {
47
+ for (let q = p + 1; q < clusters.length; q++) {
48
+ const avg = crossAverage(clusters[p], clusters[q]);
49
+ if (avg !== null && avg > bestAvg) {
50
+ bestAvg = avg;
51
+ bestPair = [p, q];
52
+ }
53
+ }
54
+ }
55
+ if (!bestPair || bestAvg < threshold)
56
+ break;
57
+ const [p, q] = bestPair;
58
+ clusters[p] = clusters[p].concat(clusters[q]);
59
+ clusters.splice(q, 1);
60
+ }
61
+ return clusters.map((local) => local.map((i) => members[i]));
62
+ }
22
63
  /**
23
64
  * Cluster records into canonical entities by connected components of the above-threshold link
24
65
  * graph. Every input record lands in exactly one cluster — a record with no qualifying link is a
@@ -56,14 +97,18 @@ export function cluster(records, links, opts) {
56
97
  rank[rx]++;
57
98
  }
58
99
  };
100
+ // Collect ALL valid links (not just above-threshold): connected components form from the
101
+ // above-threshold ones, but the average-linkage refinement needs the full sub-graph — a weak or
102
+ // disagreeing below-threshold edge between two sub-clusters is exactly what should pull them apart.
103
+ const allLinks = [];
59
104
  for (const link of links) {
60
- if (link.weight < opts.threshold)
61
- continue;
62
105
  const ia = index.get(link.a);
63
106
  const ib = index.get(link.b);
64
107
  if (ia === undefined || ib === undefined)
65
108
  continue;
66
- union(ia, ib);
109
+ allLinks.push(link);
110
+ if (link.weight >= opts.threshold)
111
+ union(ia, ib);
67
112
  }
68
113
  const groups = new Map();
69
114
  records.forEach((record, i) => {
@@ -74,7 +119,33 @@ export function cluster(records, links, opts) {
74
119
  else
75
120
  groups.set(root, [record]);
76
121
  });
77
- return [...groups.values()];
122
+ if (opts.linkage !== "average")
123
+ return [...groups.values()];
124
+ // Average-linkage refinement: split each component where its sub-clusters are joined only by a weak
125
+ // bridge (the average inter-cluster link weight, over ALL edges between them, falls below the threshold).
126
+ const maxComponent = opts.maxAverageLinkageComponent ?? 64;
127
+ const localOf = new Map(); // member → its index within its own group
128
+ for (const members of groups.values())
129
+ members.forEach((m, i) => localOf.set(m, i));
130
+ const groupEdges = new Map();
131
+ for (const link of allLinks) {
132
+ const root = find(index.get(link.a));
133
+ if (root !== find(index.get(link.b)))
134
+ continue; // cross-component edge — not part of any refinement
135
+ const list = groupEdges.get(root) ?? [];
136
+ list.push([localOf.get(link.a), localOf.get(link.b), link.weight]);
137
+ groupEdges.set(root, list);
138
+ }
139
+ const result = [];
140
+ for (const [root, members] of groups) {
141
+ if (members.length <= 1 || members.length > maxComponent) {
142
+ result.push(members);
143
+ continue;
144
+ }
145
+ for (const sub of averageLinkageRefine(members, groupEdges.get(root) ?? [], opts.threshold))
146
+ result.push(sub);
147
+ }
148
+ return result;
78
149
  }
79
150
  /**
80
151
  * Pick a cluster's most complete record as its canonical representative — the one with the fewest
@@ -1 +1 @@
1
- {"version":3,"file":"clustering.js","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAkBH;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAI,OAAqB,EAAE,KAA8B,EAAE,IAAoB;IACrG,MAAM,KAAK,GAAG,IAAI,GAAG,EAAa,CAAA;IAClC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAA;IAEpD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAA;IACvC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAS,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAEtD,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE;QAClC,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,IAAI;YAAE,IAAI,GAAG,MAAM,CAAC,IAAI,CAAE,CAAA;QAClD,oBAAoB;QACpB,OAAO,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACvB,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YAChB,CAAC,GAAG,IAAI,CAAA;QACT,CAAC;QACD,OAAO,IAAI,CAAA;IACZ,CAAC,CAAA;IAED,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,CAAS,EAAQ,EAAE;QAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,IAAI,EAAE,KAAK,EAAE;YAAE,OAAM;QACrB,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aACrC,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aAC1C,CAAC;YACL,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;YACf,IAAI,CAAC,EAAE,CAAE,EAAE,CAAA;QACZ,CAAC;IACF,CAAC,CAAA;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS;YAAE,SAAQ;QAC1C,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS;YAAE,SAAQ;QAClD,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;IACd,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAA;IACrC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;;YACxB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;AAC5B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAmB,KAAmB;IACnE,IAAI,IAAmB,CAAA;IACvB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAA;IAEnB,KAAK,MAAM,MAAM,IAAI,KAAK,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAA;QACd,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,EAAE;gBAAE,MAAM,EAAE,CAAA;QACpE,CAAC;QACD,IAAI,MAAM,GAAG,UAAU,EAAE,CAAC;YACzB,UAAU,GAAG,MAAM,CAAA;YACnB,IAAI,GAAG,MAAM,CAAA;QACd,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC"}
1
+ {"version":3,"file":"clustering.js","sourceRoot":"","sources":["../clustering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAoCH;;;;;GAKG;AACH,SAAS,oBAAoB,CAAI,OAAY,EAAE,KAAsC,EAAE,SAAiB;IACvG,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC3C,MAAM,YAAY,GAAG,CAAC,CAAW,EAAE,CAAW,EAAiB,EAAE;QAChE,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA;QACtB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA;QACtB,IAAI,GAAG,GAAG,CAAC,CAAA;QACX,IAAI,KAAK,GAAG,CAAC,CAAA;QACb,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9D,GAAG,IAAI,CAAC,CAAA;gBACR,KAAK,EAAE,CAAA;YACR,CAAC;QACF,CAAC;QACD,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAA;IACtC,CAAC,CAAA;IAED,SAAS,CAAC;QACT,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAA;QACvB,IAAI,QAAQ,GAA4B,IAAI,CAAA;QAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAE,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;gBACpD,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,GAAG,OAAO,EAAE,CAAC;oBACnC,OAAO,GAAG,GAAG,CAAA;oBACb,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;QACF,CAAC;QACD,IAAI,CAAC,QAAQ,IAAI,OAAO,GAAG,SAAS;YAAE,MAAK;QAC3C,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,QAAQ,CAAA;QACvB,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;QAC/C,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IACtB,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAI,OAAqB,EAAE,KAA8B,EAAE,IAAoB;IACrG,MAAM,KAAK,GAAG,IAAI,GAAG,EAAa,CAAA;IAClC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAA;IAEpD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAA;IACvC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAS,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAEtD,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE;QAClC,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,IAAI;YAAE,IAAI,GAAG,MAAM,CAAC,IAAI,CAAE,CAAA;QAClD,oBAAoB;QACpB,OAAO,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACvB,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YAChB,CAAC,GAAG,IAAI,CAAA;QACT,CAAC;QACD,OAAO,IAAI,CAAA;IACZ,CAAC,CAAA;IAED,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,CAAS,EAAQ,EAAE;QAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,IAAI,EAAE,KAAK,EAAE;YAAE,OAAM;QACrB,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aACrC,IAAI,IAAI,CAAC,EAAE,CAAE,GAAG,IAAI,CAAC,EAAE,CAAE;YAAE,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;aAC1C,CAAC;YACL,MAAM,CAAC,EAAE,CAAC,GAAG,EAAE,CAAA;YACf,IAAI,CAAC,EAAE,CAAE,EAAE,CAAA;QACZ,CAAC;IACF,CAAC,CAAA;IAED,yFAAyF;IACzF,gGAAgG;IAChG,oGAAoG;IACpG,MAAM,QAAQ,GAAoB,EAAE,CAAA;IACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS;YAAE,SAAQ;QAClD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnB,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS;YAAE,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;IACjD,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAA;IACrC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;;YACxB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS;QAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;IAE3D,oGAAoG;IACpG,0GAA0G;IAC1G,MAAM,YAAY,GAAG,IAAI,CAAC,0BAA0B,IAAI,EAAE,CAAA;IAC1D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAa,CAAA,CAAC,0CAA0C;IAC/E,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,EAAE;QAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;IACnF,MAAM,UAAU,GAAG,IAAI,GAAG,EAA2C,CAAA;IACrE,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,CAAA;QACrC,IAAI,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC;YAAE,SAAQ,CAAC,oDAAoD;QACpG,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;QACvC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;QACpE,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC3B,CAAC;IAED,MAAM,MAAM,GAAU,EAAE,CAAA;IACxB,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;YAC1D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACpB,SAAQ;QACT,CAAC;QACD,KAAK,MAAM,GAAG,IAAI,oBAAoB,CAAC,OAAO,EAAE,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC9G,CAAC;IACD,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAmB,KAAmB;IACnE,IAAI,IAAmB,CAAA;IACvB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAA;IAEnB,KAAK,MAAM,MAAM,IAAI,KAAK,EAAE,CAAC;QAC5B,IAAI,MAAM,GAAG,CAAC,CAAA;QACd,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,EAAE;gBAAE,MAAM,EAAE,CAAA;QACpE,CAAC;QACD,IAAI,MAAM,GAAG,UAAU,EAAE,CAAC;YACzB,UAAU,GAAG,MAAM,CAAA;YACnB,IAAI,GAAG,MAAM,CAAA;QACd,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC"}
package/out/distance.d.ts CHANGED
@@ -38,4 +38,36 @@ export declare function distanceComparison<R>(config: {
38
38
  * error.
39
39
  */
40
40
  export declare const DEFAULT_DISTANCE_LEVELS: ComparisonLevel[];
41
+ /**
42
+ * The collapsed spatial-agreement comparison — ONE non-redundant geographic signal.
43
+ *
44
+ * The first matcher carried TWO spatial comparisons: canonical-address-key similarity AND
45
+ * great-circle distance. They double-count — an exact key match implies distance ≈ 0, so a
46
+ * co-located pair banked the same evidence twice, and the redundant vote is exactly what
47
+ * over-merges distinct providers at a shared clinic address. This folds them into one comparison:
48
+ *
49
+ * - **level 0 `same-key`** — an EXACT canonical-key match: the strongest tier, and the one the
50
+ * inverse-address-frequency adjustment rides ({@link withTermFrequency} on level 0), so agreement
51
+ * on a crowded shared key is down-weighted toward worthless while a rare one keeps full weight.
52
+ * - **levels 1…n** — great-circle distance buckets for pairs whose keys DIFFER, so "123 Main St" vs
53
+ * "123 Main Street Apt 2" that geocode to the same rooftop still earns near-agreement (the
54
+ * geo-first point of the whole design).
55
+ * - Keys differ and no usable coordinate → no evidence.
56
+ *
57
+ * Exactly one spatial vote, no redundancy. Pass {@link DEFAULT_SPATIAL_LEVELS} or your own; index 0
58
+ * must be the exact-key tier, indices 1…n the distance buckets nearest → far by `maxKm` (last =
59
+ * `far`).
60
+ */
61
+ export declare function spatialComparison<R>(config: {
62
+ name: string;
63
+ key: (record: R) => string | null | undefined;
64
+ coordinate: (record: R) => LatLon | null | undefined;
65
+ levels: ComparisonLevel[];
66
+ }): Comparison<R>;
67
+ /**
68
+ * Default levels for {@link spatialComparison}: an exact same-key tier on top of the distance
69
+ * buckets. `m`/`u` are EM-estimable seeds (m decreasing, u increasing down the tiers; each column ≈
70
+ * sums to 1).
71
+ */
72
+ export declare const DEFAULT_SPATIAL_LEVELS: ComparisonLevel[];
41
73
  //# sourceMappingURL=distance.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"distance.d.ts","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AAKtE,uEAAuE;AACvE,wBAAgB,WAAW,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CASxD;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,EAAE;IAC7C,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IACjD,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,GAAG,UAAU,CAAC,CAAC,CAAC,CAmBhB;AAED;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,EAAE,eAAe,EAKpD,CAAA"}
1
+ {"version":3,"file":"distance.d.ts","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AAKtE,uEAAuE;AACvE,wBAAgB,WAAW,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CASxD;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,EAAE;IAC7C,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IACjD,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,GAAG,UAAU,CAAC,CAAC,CAAC,CAmBhB;AAED;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,EAAE,eAAe,EAKpD,CAAA;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,EAAE;IAC5C,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IAC7C,UAAU,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,GAAG,IAAI,GAAG,SAAS,CAAA;IACpD,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,GAAG,UAAU,CAAC,CAAC,CAAC,CAuBhB;AAED;;;;GAIG;AACH,eAAO,MAAM,sBAAsB,EAAE,eAAe,EAMnD,CAAA"}
package/out/distance.js CHANGED
@@ -65,4 +65,59 @@ export const DEFAULT_DISTANCE_LEVELS = [
65
65
  { label: "same-area", maxKm: 5, m: 0.08, u: 0.2 },
66
66
  { label: "far", m: 0.02, u: 0.779 },
67
67
  ];
68
+ /**
69
+ * The collapsed spatial-agreement comparison — ONE non-redundant geographic signal.
70
+ *
71
+ * The first matcher carried TWO spatial comparisons: canonical-address-key similarity AND
72
+ * great-circle distance. They double-count — an exact key match implies distance ≈ 0, so a
73
+ * co-located pair banked the same evidence twice, and the redundant vote is exactly what
74
+ * over-merges distinct providers at a shared clinic address. This folds them into one comparison:
75
+ *
76
+ * - **level 0 `same-key`** — an EXACT canonical-key match: the strongest tier, and the one the
77
+ * inverse-address-frequency adjustment rides ({@link withTermFrequency} on level 0), so agreement
78
+ * on a crowded shared key is down-weighted toward worthless while a rare one keeps full weight.
79
+ * - **levels 1…n** — great-circle distance buckets for pairs whose keys DIFFER, so "123 Main St" vs
80
+ * "123 Main Street Apt 2" that geocode to the same rooftop still earns near-agreement (the
81
+ * geo-first point of the whole design).
82
+ * - Keys differ and no usable coordinate → no evidence.
83
+ *
84
+ * Exactly one spatial vote, no redundancy. Pass {@link DEFAULT_SPATIAL_LEVELS} or your own; index 0
85
+ * must be the exact-key tier, indices 1…n the distance buckets nearest → far by `maxKm` (last =
86
+ * `far`).
87
+ */
88
+ export function spatialComparison(config) {
89
+ const valid = (c) => !!c && Number.isFinite(c.latitude) && Number.isFinite(c.longitude);
90
+ return {
91
+ name: config.name,
92
+ levels: config.levels,
93
+ assess(a, b) {
94
+ const ka = config.key(a);
95
+ const kb = config.key(b);
96
+ if (ka && kb && ka.trim() && ka === kb)
97
+ return 0; // exact canonical-key match — one strong vote
98
+ const ca = config.coordinate(a);
99
+ const cb = config.coordinate(b);
100
+ if (!valid(ca) || !valid(cb))
101
+ return -1; // keys differ and no coordinate → no spatial evidence
102
+ const km = haversineKm(ca, cb);
103
+ for (let i = 1; i < config.levels.length; i++) {
104
+ if (km <= (config.levels[i].maxKm ?? Infinity))
105
+ return i;
106
+ }
107
+ return config.levels.length - 1;
108
+ },
109
+ };
110
+ }
111
+ /**
112
+ * Default levels for {@link spatialComparison}: an exact same-key tier on top of the distance
113
+ * buckets. `m`/`u` are EM-estimable seeds (m decreasing, u increasing down the tiers; each column ≈
114
+ * sums to 1).
115
+ */
116
+ export const DEFAULT_SPATIAL_LEVELS = [
117
+ { label: "same-key", m: 0.85, u: 0.01 },
118
+ { label: "same-building", maxKm: 0.05, m: 0.1, u: 0.02 },
119
+ { label: "same-block", maxKm: 0.5, m: 0.03, u: 0.05 },
120
+ { label: "same-area", maxKm: 5, m: 0.015, u: 0.2 },
121
+ { label: "far", m: 0.005, u: 0.72 },
122
+ ];
68
123
  //# sourceMappingURL=distance.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"distance.js","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAKH,sCAAsC;AACtC,MAAM,eAAe,GAAG,SAAS,CAAA;AAEjC,uEAAuE;AACvE,MAAM,UAAU,WAAW,CAAC,CAAS,EAAE,CAAS;IAC/C,MAAM,KAAK,GAAG,CAAC,OAAe,EAAU,EAAE,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAA;IACpE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAE9B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAA;IAC7F,OAAO,CAAC,GAAG,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAClE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAI,MAIrC;IACA,MAAM,KAAK,GAAG,CAAC,CAA4B,EAAe,EAAE,CAC3D,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAEnE,OAAO;QACN,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,CAAC,CAAC,EAAE,CAAC;YACV,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,OAAO,CAAC,CAAC,CAAA;YAEvC,MAAM,EAAE,GAAG,WAAW,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,QAAQ,CAAC;oBAAE,OAAO,CAAC,CAAA;YAC1D,CAAC;YACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAChC,CAAC;KACD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACzD,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;IACzD,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACpD,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE;IACjD,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE;CACnC,CAAA"}
1
+ {"version":3,"file":"distance.js","sourceRoot":"","sources":["../distance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAKH,sCAAsC;AACtC,MAAM,eAAe,GAAG,SAAS,CAAA;AAEjC,uEAAuE;AACvE,MAAM,UAAU,WAAW,CAAC,CAAS,EAAE,CAAS;IAC/C,MAAM,KAAK,GAAG,CAAC,OAAe,EAAU,EAAE,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAA;IACpE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;IAE9B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAA;IAC7F,OAAO,CAAC,GAAG,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAClE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAI,MAIrC;IACA,MAAM,KAAK,GAAG,CAAC,CAA4B,EAAe,EAAE,CAC3D,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAEnE,OAAO;QACN,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,CAAC,CAAC,EAAE,CAAC;YACV,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,OAAO,CAAC,CAAC,CAAA;YAEvC,MAAM,EAAE,GAAG,WAAW,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,QAAQ,CAAC;oBAAE,OAAO,CAAC,CAAA;YAC1D,CAAC;YACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAChC,CAAC;KACD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACzD,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE;IACzD,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACpD,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE;IACjD,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE;CACnC,CAAA;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,iBAAiB,CAAI,MAKpC;IACA,MAAM,KAAK,GAAG,CAAC,CAA4B,EAAe,EAAE,CAC3D,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAEnE,OAAO;QACN,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,CAAC,CAAC,EAAE,CAAC;YACV,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YACxB,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YACxB,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE;gBAAE,OAAO,CAAC,CAAA,CAAC,8CAA8C;YAE/F,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;YAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;YAC/B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,OAAO,CAAC,CAAC,CAAA,CAAC,sDAAsD;YAE9F,MAAM,EAAE,GAAG,WAAW,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,QAAQ,CAAC;oBAAE,OAAO,CAAC,CAAA;YAC1D,CAAC;YACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAChC,CAAC;KACD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAsB;IACxD,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACvC,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE;IACxD,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE;IACrD,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE;IAClD,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE;CACnC,CAAA"}
package/out/gbt.d.ts ADDED
@@ -0,0 +1,49 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Gradient-boosted shallow regression trees (logistic loss), pure-Node — the learned scorer #603
7
+ * names: an offline-trained model (this trainer, or XGBoost/LightGBM exported to the same
8
+ * {@link GBT} shape) plus a trivial evaluator, no new runtime dependency. It sits behind the
9
+ * matcher's `scorer` hook to replace the Fellegi-Sunter link weight where labels (or a held-out
10
+ * truth like an NPI) let a tree learn the over-merge signature the hand-weights miss.
11
+ *
12
+ * This module is feature-agnostic: feature vectors are caller-defined `number[]` (the record
13
+ * matcher builds them in `@mailwoman/registry`'s learned-scorer module — one-hot agreement levels
14
+ * + interaction terms + corpus statistics). It only fits ({@link trainGBT}) and scores
15
+ * ({@link gbtScore}). The trained {@link GBT} is plain JSON (`{trees, lr, base}`), so a model
16
+ * trains offline once and ships as a data file.
17
+ */
18
+ /** A trained tree: an internal split (feature `f` ≤ `thr` → `lo`, else `hi`) or a `leaf` value. */
19
+ export type TreeNode = {
20
+ leaf: number;
21
+ } | {
22
+ f: number;
23
+ thr: number;
24
+ lo: TreeNode;
25
+ hi: TreeNode;
26
+ };
27
+ /**
28
+ * Per-feature candidate split thresholds: midpoints for few-valued/binary features, quantiles for
29
+ * continuous.
30
+ */
31
+ export declare function buildThresholds(X: number[][]): number[][];
32
+ /** A trained gradient-boosted-tree model: an additive ensemble over a base log-odds. Plain JSON. */
33
+ export interface GBT {
34
+ trees: TreeNode[];
35
+ lr: number;
36
+ base: number;
37
+ }
38
+ /** Hyperparameters for {@link trainGBT}. */
39
+ export interface GBTOpts {
40
+ rounds: number;
41
+ depth: number;
42
+ lr: number;
43
+ minLeaf: number;
44
+ }
45
+ /** Gradient-boosted regression trees on logistic loss, with per-sample class weights `w`. */
46
+ export declare function trainGBT(X: number[][], y: number[], w: number[], opts: GBTOpts): GBT;
47
+ /** GBT score (logit) for one feature vector. Threshold-comparable like the FS weight. */
48
+ export declare function gbtScore(m: GBT, x: number[]): number;
49
+ //# sourceMappingURL=gbt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gbt.d.ts","sourceRoot":"","sources":["../gbt.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,mGAAmG;AACnG,MAAM,MAAM,QAAQ,GAAG;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,QAAQ,CAAC;IAAC,EAAE,EAAE,QAAQ,CAAA;CAAE,CAAA;AAIhG;;;GAGG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,EAAE,CAoBzD;AA0ED,oGAAoG;AACpG,MAAM,WAAW,GAAG;IACnB,KAAK,EAAE,QAAQ,EAAE,CAAA;IACjB,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,MAAM,CAAA;CACZ;AAED,4CAA4C;AAC5C,MAAM,WAAW,OAAO;IACvB,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,EAAE,EAAE,MAAM,CAAA;IACV,OAAO,EAAE,MAAM,CAAA;CACf;AAED,6FAA6F;AAC7F,wBAAgB,QAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,GAAG,GAAG,CAqBpF;AAED,yFAAyF;AACzF,wBAAgB,QAAQ,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAIpD"}
package/out/gbt.js ADDED
@@ -0,0 +1,147 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Gradient-boosted shallow regression trees (logistic loss), pure-Node — the learned scorer #603
7
+ * names: an offline-trained model (this trainer, or XGBoost/LightGBM exported to the same
8
+ * {@link GBT} shape) plus a trivial evaluator, no new runtime dependency. It sits behind the
9
+ * matcher's `scorer` hook to replace the Fellegi-Sunter link weight where labels (or a held-out
10
+ * truth like an NPI) let a tree learn the over-merge signature the hand-weights miss.
11
+ *
12
+ * This module is feature-agnostic: feature vectors are caller-defined `number[]` (the record
13
+ * matcher builds them in `@mailwoman/registry`'s learned-scorer module — one-hot agreement levels
14
+ * + interaction terms + corpus statistics). It only fits ({@link trainGBT}) and scores
15
+ * ({@link gbtScore}). The trained {@link GBT} is plain JSON (`{trees, lr, base}`), so a model
16
+ * trains offline once and ships as a data file.
17
+ */
18
+ const sigmoid = (z) => 1 / (1 + Math.exp(-Math.max(-30, Math.min(30, z))));
19
+ /**
20
+ * Per-feature candidate split thresholds: midpoints for few-valued/binary features, quantiles for
21
+ * continuous.
22
+ */
23
+ export function buildThresholds(X) {
24
+ const dim = X[0]?.length ?? 0;
25
+ const out = [];
26
+ for (let f = 0; f < dim; f++) {
27
+ const vals = X.map((r) => r[f]);
28
+ const uniq = [...new Set(vals)].sort((p, q) => p - q);
29
+ if (uniq.length <= 1) {
30
+ out.push([]);
31
+ }
32
+ else if (uniq.length <= 5) {
33
+ const t = [];
34
+ for (let k = 0; k < uniq.length - 1; k++)
35
+ t.push((uniq[k] + uniq[k + 1]) / 2);
36
+ out.push(t);
37
+ }
38
+ else {
39
+ const sorted = [...vals].sort((p, q) => p - q);
40
+ const t = [];
41
+ for (let q = 1; q <= 6; q++)
42
+ t.push(sorted[Math.floor((q / 7) * (sorted.length - 1))]);
43
+ out.push([...new Set(t)]);
44
+ }
45
+ }
46
+ return out;
47
+ }
48
+ /** Weighted SSE of target `g` over `rows` around their weighted mean. */
49
+ function nodeSSE(rows, g, w) {
50
+ let wsum = 0;
51
+ let wg = 0;
52
+ for (const i of rows) {
53
+ wsum += w[i];
54
+ wg += w[i] * g[i];
55
+ }
56
+ const mean = wsum > 0 ? wg / wsum : 0;
57
+ let sse = 0;
58
+ for (const i of rows) {
59
+ const d = g[i] - mean;
60
+ sse += w[i] * d * d;
61
+ }
62
+ return sse;
63
+ }
64
+ /** Greedy depth-limited weighted regression tree on target `g` (the boosting residual). */
65
+ function fitRegTree(rows, X, g, w, thresholds, depth, minLeaf) {
66
+ let wsum = 0;
67
+ let wg = 0;
68
+ for (const i of rows) {
69
+ wsum += w[i];
70
+ wg += w[i] * g[i];
71
+ }
72
+ const leaf = wsum > 0 ? wg / wsum : 0;
73
+ if (depth === 0 || rows.length < 2 * minLeaf)
74
+ return { leaf };
75
+ const parentSSE = nodeSSE(rows, g, w);
76
+ let bestGain = 1e-12;
77
+ let bestF = -1;
78
+ let bestThr = 0;
79
+ let bestLo = [];
80
+ let bestHi = [];
81
+ for (let f = 0; f < thresholds.length; f++) {
82
+ for (const thr of thresholds[f]) {
83
+ const lo = [];
84
+ const hi = [];
85
+ for (const i of rows)
86
+ (X[i][f] <= thr ? lo : hi).push(i);
87
+ if (lo.length < minLeaf || hi.length < minLeaf)
88
+ continue;
89
+ const gain = parentSSE - (nodeSSE(lo, g, w) + nodeSSE(hi, g, w));
90
+ if (gain > bestGain) {
91
+ bestGain = gain;
92
+ bestF = f;
93
+ bestThr = thr;
94
+ bestLo = lo;
95
+ bestHi = hi;
96
+ }
97
+ }
98
+ }
99
+ if (bestF < 0)
100
+ return { leaf };
101
+ return {
102
+ f: bestF,
103
+ thr: bestThr,
104
+ lo: fitRegTree(bestLo, X, g, w, thresholds, depth - 1, minLeaf),
105
+ hi: fitRegTree(bestHi, X, g, w, thresholds, depth - 1, minLeaf),
106
+ };
107
+ }
108
+ function predictTree(t, x) {
109
+ let n = t;
110
+ while ("f" in n)
111
+ n = x[n.f] <= n.thr ? n.lo : n.hi;
112
+ return n.leaf;
113
+ }
114
+ /** Gradient-boosted regression trees on logistic loss, with per-sample class weights `w`. */
115
+ export function trainGBT(X, y, w, opts) {
116
+ const N = X.length;
117
+ const thresholds = buildThresholds(X);
118
+ const rowsAll = Array.from({ length: N }, (_, i) => i);
119
+ let wpos = 0;
120
+ let wtot = 0;
121
+ for (let i = 0; i < N; i++) {
122
+ wtot += w[i];
123
+ if (y[i] === 1)
124
+ wpos += w[i];
125
+ }
126
+ const base = Math.log((wpos + 1) / (wtot - wpos + 1)); // weighted base log-odds
127
+ const F = new Array(N).fill(base);
128
+ const trees = [];
129
+ for (let m = 0; m < opts.rounds; m++) {
130
+ const g = new Array(N);
131
+ for (let i = 0; i < N; i++)
132
+ g[i] = y[i] - sigmoid(F[i]); // negative gradient of logistic loss
133
+ const tree = fitRegTree(rowsAll, X, g, w, thresholds, opts.depth, opts.minLeaf);
134
+ for (let i = 0; i < N; i++)
135
+ F[i] += opts.lr * predictTree(tree, X[i]);
136
+ trees.push(tree);
137
+ }
138
+ return { trees, lr: opts.lr, base };
139
+ }
140
+ /** GBT score (logit) for one feature vector. Threshold-comparable like the FS weight. */
141
+ export function gbtScore(m, x) {
142
+ let f = m.base;
143
+ for (const t of m.trees)
144
+ f += m.lr * predictTree(t, x);
145
+ return f;
146
+ }
147
+ //# sourceMappingURL=gbt.js.map
package/out/gbt.js.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gbt.js","sourceRoot":"","sources":["../gbt.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAKH,MAAM,OAAO,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAE1F;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,CAAa;IAC5C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;IAC7B,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA;QAChC,MAAM,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACrD,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACtB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QACb,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,GAAa,EAAE,CAAA;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE;gBAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAE,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YAC/E,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACZ,CAAC;aAAM,CAAC;YACP,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;YAC9C,MAAM,CAAC,GAAa,EAAE,CAAA;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;gBAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA;YACvF,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAC1B,CAAC;IACF,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,yEAAyE;AACzE,SAAS,OAAO,CAAC,IAAc,EAAE,CAAW,EAAE,CAAW;IACxD,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,IAAI,EAAE,GAAG,CAAC,CAAA;IACV,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;QACb,EAAE,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IACrC,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,GAAG,IAAI,CAAA;QACtB,GAAG,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,GAAG,CAAC,CAAA;IACrB,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,2FAA2F;AAC3F,SAAS,UAAU,CAClB,IAAc,EACd,CAAa,EACb,CAAW,EACX,CAAW,EACX,UAAsB,EACtB,KAAa,EACb,OAAe;IAEf,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,IAAI,EAAE,GAAG,CAAC,CAAA;IACV,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;QACb,EAAE,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAA;IACpB,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IACrC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,OAAO;QAAE,OAAO,EAAE,IAAI,EAAE,CAAA;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;IACrC,IAAI,QAAQ,GAAG,KAAK,CAAA;IACpB,IAAI,KAAK,GAAG,CAAC,CAAC,CAAA;IACd,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,MAAM,GAAa,EAAE,CAAA;IACzB,IAAI,MAAM,GAAa,EAAE,CAAA;IACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,CAAC,CAAE,EAAE,CAAC;YAClC,MAAM,EAAE,GAAa,EAAE,CAAA;YACvB,MAAM,EAAE,GAAa,EAAE,CAAA;YACvB,KAAK,MAAM,CAAC,IAAI,IAAI;gBAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC1D,IAAI,EAAE,CAAC,MAAM,GAAG,OAAO,IAAI,EAAE,CAAC,MAAM,GAAG,OAAO;gBAAE,SAAQ;YACxD,MAAM,IAAI,GAAG,SAAS,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;YAChE,IAAI,IAAI,GAAG,QAAQ,EAAE,CAAC;gBACrB,QAAQ,GAAG,IAAI,CAAA;gBACf,KAAK,GAAG,CAAC,CAAA;gBACT,OAAO,GAAG,GAAG,CAAA;gBACb,MAAM,GAAG,EAAE,CAAA;gBACX,MAAM,GAAG,EAAE,CAAA;YACZ,CAAC;QACF,CAAC;IACF,CAAC;IACD,IAAI,KAAK,GAAG,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,CAAA;IAC9B,OAAO;QACN,CAAC,EAAE,KAAK;QACR,GAAG,EAAE,OAAO;QACZ,EAAE,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,KAAK,GAAG,CAAC,EAAE,OAAO,CAAC;QAC/D,EAAE,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,KAAK,GAAG,CAAC,EAAE,OAAO,CAAC;KAC/D,CAAA;AACF,CAAC;AAED,SAAS,WAAW,CAAC,CAAW,EAAE,CAAW;IAC5C,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,GAAG,IAAI,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACnD,OAAO,CAAC,CAAC,IAAI,CAAA;AACd,CAAC;AAiBD,6FAA6F;AAC7F,MAAM,UAAU,QAAQ,CAAC,CAAa,EAAE,CAAW,EAAE,CAAW,EAAE,IAAa;IAC9E,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,UAAU,GAAG,eAAe,CAAC,CAAC,CAAC,CAAA;IACrC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAA;IACtD,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;QACb,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;YAAE,IAAI,IAAI,CAAC,CAAC,CAAC,CAAE,CAAA;IAC9B,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA,CAAC,yBAAyB;IAC/E,MAAM,CAAC,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACzC,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA,CAAC,qCAAqC;QAC/F,MAAM,IAAI,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAA;QAC/E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,CAAC,CAAC,CAAC,CAAE,IAAI,IAAI,CAAC,EAAE,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,CAAA;QACvE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACjB,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,IAAI,EAAE,CAAA;AACpC,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,QAAQ,CAAC,CAAM,EAAE,CAAW;IAC3C,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK;QAAE,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IACtD,OAAO,CAAC,CAAA;AACT,CAAC"}
package/out/index.d.ts CHANGED
@@ -21,5 +21,6 @@ export * from "./comparators.js";
21
21
  export * from "./distance.js";
22
22
  export * from "./em.js";
23
23
  export * from "./fellegi-sunter.js";
24
+ export * from "./gbt.js";
24
25
  export * from "./tf.js";
25
26
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,SAAS,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,UAAU,CAAA;AACxB,cAAc,SAAS,CAAA"}
package/out/index.js CHANGED
@@ -21,5 +21,6 @@ export * from "./comparators.js";
21
21
  export * from "./distance.js";
22
22
  export * from "./em.js";
23
23
  export * from "./fellegi-sunter.js";
24
+ export * from "./gbt.js";
24
25
  export * from "./tf.js";
25
26
  //# sourceMappingURL=index.js.map
package/out/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,SAAS,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,cAAc,eAAe,CAAA;AAC7B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,eAAe,CAAA;AAC7B,cAAc,SAAS,CAAA;AACvB,cAAc,qBAAqB,CAAA;AACnC,cAAc,UAAU,CAAA;AACxB,cAAc,SAAS,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mailwoman/match",
3
- "version": "4.8.1",
3
+ "version": "4.10.0",
4
4
  "description": "The geocode-first record matcher: block → score → cluster. This first cut ships the string comparators (Jaro / Jaro-Winkler + an edit-distance fallback for compound surnames) that the Fellegi-Sunter scorer is built on.",
5
5
  "license": "AGPL-3.0-only",
6
6
  "repository": {