@mailwoman/registry 4.8.1 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * The learned scorer (#603) — the production wiring for the gradient-boosted-tree model behind
7
+ * {@link ResolveConfig.scorer}. Two pieces:
8
+ *
9
+ * 1. {@link createMatchFeaturizer} — the ONE feature extractor for a candidate pair, used identically
10
+ * at train time (`scripts/record-matcher/train-gbt.ts`), eval time (the learned-scorer
11
+ * evals), and inference time (here). A pair → one-hot of each comparison's agreement level +
12
+ * the over-merge interaction terms (co-located × name/org disagreement) + address
13
+ * crowdedness.
14
+ * 2. {@link createGbtScorer} — wraps a trained {@link GBT} + the featurizer into the `(a, b) => number`
15
+ * the resolve pipeline's `scorer` hook expects (a logit, threshold-comparable with the
16
+ * Fellegi-Sunter weight it replaces).
17
+ *
18
+ * Both take the comparison set as INPUT (rather than importing {@link buildDefaultModel}) so this
19
+ * module has no dependency cycle with `resolve.ts`. The contract that keeps train ≡ inference:
20
+ * feed the comparisons from `buildDefaultModel({ collapseSpatial: true, addressFrequency })` —
21
+ * the model's structure (and thus the feature layout) is fixed by that config; only the frequency
22
+ * VALUES differ between the training corpus and the matched set, which is the point (the model
23
+ * generalizes, as the cross-state eval showed).
24
+ */
25
+ import { agreementPattern, gbtScore } from "@mailwoman/match";
26
+ /**
27
+ * Build the per-pair feature extractor. The vector is: one-hot of each comparison's agreement
28
+ * level, then the two over-merge interaction terms (spatial-exact × name-disagree, spatial-exact ×
29
+ * org-disagree — the "same place, different names" signature that drives co-located over-merges),
30
+ * then address crowdedness scaled into [0, 1]. Deterministic and EM-independent, so it is identical
31
+ * across train / eval / inference.
32
+ */
33
+ export function createMatchFeaturizer(config) {
34
+ const { comparisons, addressFrequency } = config;
35
+ const levelCounts = comparisons.map((c) => c.levels.length);
36
+ const index = Object.fromEntries(comparisons.map((c, i) => [c.name, i]));
37
+ const spatialI = index["spatial"];
38
+ const givenI = index["given"];
39
+ const familyI = index["family"];
40
+ const orgI = index["organization"];
41
+ const lastLevel = (i) => levelCounts[i] - 1;
42
+ return (a, b) => {
43
+ const pat = agreementPattern(comparisons, a, b);
44
+ const f = [];
45
+ for (let i = 0; i < pat.length; i++) {
46
+ const lvl = pat[i];
47
+ for (let l = 0; l < levelCounts[i]; l++)
48
+ f.push(lvl === l ? 1 : 0);
49
+ }
50
+ // Interaction: co-located (spatial exact = level 0) AND names/org disagree (catch-all level).
51
+ const spatialExact = spatialI !== undefined && pat[spatialI] === 0 ? 1 : 0;
52
+ const nameDisagree = givenI !== undefined &&
53
+ familyI !== undefined &&
54
+ pat[givenI] === lastLevel(givenI) &&
55
+ pat[familyI] === lastLevel(familyI)
56
+ ? 1
57
+ : 0;
58
+ const orgDisagree = orgI !== undefined && pat[orgI] === lastLevel(orgI) ? 1 : 0;
59
+ f.push(spatialExact * nameDisagree); // the over-merge signature: same place, names disagree
60
+ f.push(spatialExact * orgDisagree);
61
+ // Address crowdedness (how shared this address is) — high → "same address" is weak evidence.
62
+ const freq = a.address?.raw ? addressFrequency.frequency(a.address.raw) : 0;
63
+ f.push(Math.min(1, freq * 1000)); // scale into a usable range
64
+ return f;
65
+ };
66
+ }
67
+ /**
68
+ * Wrap a trained {@link GBT} into the `(a, b) => number` link scorer for
69
+ * {@link ResolveConfig.scorer}. The returned weight is the model's logit — same threshold-comparable
70
+ * units as the Fellegi-Sunter weight it replaces, so the pipeline's clustering + threshold
71
+ * semantics are unchanged.
72
+ */
73
+ export function createGbtScorer(config) {
74
+ const featurize = createMatchFeaturizer(config);
75
+ const { model } = config;
76
+ return (a, b) => gbtScore(model, featurize(a, b));
77
+ }
78
+ //# sourceMappingURL=learned-scorer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"learned-scorer.js","sourceRoot":"","sources":["../learned-scorer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EAAE,gBAAgB,EAA6B,QAAQ,EAA2B,MAAM,kBAAkB,CAAA;AAmBjH;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAA4B;IACjE,MAAM,EAAE,WAAW,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAAA;IAChD,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IAC3D,MAAM,KAAK,GAAG,MAAM,CAAC,WAAW,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAuC,CAAA;IAC9G,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,CAAC,CAAA;IACjC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,CAAA;IAC7B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAA;IAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,cAAc,CAAC,CAAA;IAClC,MAAM,SAAS,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,WAAW,CAAC,CAAC,CAAE,GAAG,CAAC,CAAA;IAE5D,OAAO,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACf,MAAM,GAAG,GAAG,gBAAgB,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;QAC/C,MAAM,CAAC,GAAa,EAAE,CAAA;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAE,EAAE,CAAC,EAAE;gBAAE,CAAC,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QACpE,CAAC;QACD,8FAA8F;QAC9F,MAAM,YAAY,GAAG,QAAQ,KAAK,SAAS,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAC1E,MAAM,YAAY,GACjB,MAAM,KAAK,SAAS;YACpB,OAAO,KAAK,SAAS;YACrB,GAAG,CAAC,MAAM,CAAC,KAAK,SAAS,CAAC,MAAM,CAAC;YACjC,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,CAAC,OAAO,CAAC;YAClC,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,CAAC,CAAA;QACL,MAAM,WAAW,GAAG,IAAI,KAAK,SAAS,IAAI,GAAG,CAAC,IAAI,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAC/E,CAAC,CAAC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC,CAAA,CAAC,uDAAuD;QAC3F,CAAC,CAAC,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC,CAAA;QAClC,6FAA6F;QAC7F,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAC3E,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,CAAC,CAAA,CAAC,4BAA4B;QAC7D,OAAO,CAAC,CAAA;IACT,CAAC,CAAA;AACF,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAC9B,MAA6C;IAE7C,MAAM,SAAS,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAA;IAC/C,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAA;IACxB,OAAO,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;AAClD,CAAC"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Render resolved entities as a standalone map page — the visual complement to {@link toGeoJSON}'s
7
+ * QGIS/analyst export. `toMapHTML(featureCollection)` returns one HTML file you open in a
8
+ * browser; no server, no build.
9
+ *
10
+ * It renders on the HOUSE stack: MapLibre GL + a Protomaps basemap (`@protomaps/basemaps` generates
11
+ * the `layers()` for a named flavor) over the `basemap-v4` vector tiles served from R2 at
12
+ * `tiles.sister.software` — the same basemap the demo uses. Each entity is a circle sized by
13
+ * records-merged and colored by cross-dataset-link status (≥2 sources stand out), or
14
+ * categorically by `bucket` when the reconciliation output carries one. Pure: GeoJSON in, HTML
15
+ * string out (the Protomaps layer specs are generated at this point and inlined). The generated
16
+ * page fetches MapLibre, the basemap tiles, and glyphs/sprite over the network when opened (the
17
+ * house infra).
18
+ *
19
+ * SERVE IT OVER LOCALHOST, don't open it as a file. The house tile server (`tiles.sister.software`)
20
+ * CORS-restricts to localhost + the docs domains, so a `file://` page shows the (accurate)
21
+ * markers on a blank basemap. `npx serve` / `python3 -m http.server` in the output directory is
22
+ * enough; the page also surfaces a hint banner when it detects it's running from `file://`.
23
+ *
24
+ * Neutral entity-resolution view: it shows what resolved to what and how confidently (cohesion).
25
+ * Bucket labels render verbatim from the data, never editorialized.
26
+ */
27
+ import type { GeoJsonFeatureCollection } from "./types.js";
28
+ /** Protomaps stock flavors (shipped by `@protomaps/basemaps`). */
29
+ export type MapFlavor = "light" | "dark" | "white" | "grayscale" | "black";
30
+ export interface MapHTMLOptions {
31
+ /** Document `<title>` + on-map heading. Default: "Mailwoman — resolved entities". */
32
+ title?: string;
33
+ /** Protomaps basemap flavor. Default: "light" (data points read cleanly over it). */
34
+ flavor?: MapFlavor;
35
+ /**
36
+ * How to color the markers:
37
+ *
38
+ * - `"auto"` (default) — by `bucket` if any feature carries one (reconciliation output), else by
39
+ * cross-dataset-link status.
40
+ * - `"sources"` — always by cross-dataset-link status (≥2 sources stand out).
41
+ * - `"bucket"` — always by the `bucket` property.
42
+ */
43
+ colorBy?: "auto" | "sources" | "bucket";
44
+ }
45
+ /**
46
+ * Render `geojson` (a {@link toGeoJSON} / reconciliation FeatureCollection) as a complete,
47
+ * standalone HTML document. Entities without a coordinate are already absent from those
48
+ * collections; an empty collection renders a friendly empty state rather than a broken map.
49
+ */
50
+ export declare function toMapHTML(geojson: GeoJsonFeatureCollection, options?: MapHTMLOptions): string;
51
+ //# sourceMappingURL=map-html.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"map-html.d.ts","sourceRoot":"","sources":["../map-html.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAGH,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAuB1D,kEAAkE;AAClE,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,WAAW,GAAG,OAAO,CAAA;AAE1E,MAAM,WAAW,cAAc;IAC9B,qFAAqF;IACrF,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,qFAAqF;IACrF,MAAM,CAAC,EAAE,SAAS,CAAA;IAClB;;;;;;;OAOG;IACH,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,QAAQ,CAAA;CACvC;AAyBD;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,OAAO,EAAE,wBAAwB,EAAE,OAAO,GAAE,cAAmB,GAAG,MAAM,CAmMjG"}
@@ -0,0 +1,262 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Render resolved entities as a standalone map page — the visual complement to {@link toGeoJSON}'s
7
+ * QGIS/analyst export. `toMapHTML(featureCollection)` returns one HTML file you open in a
8
+ * browser; no server, no build.
9
+ *
10
+ * It renders on the HOUSE stack: MapLibre GL + a Protomaps basemap (`@protomaps/basemaps` generates
11
+ * the `layers()` for a named flavor) over the `basemap-v4` vector tiles served from R2 at
12
+ * `tiles.sister.software` — the same basemap the demo uses. Each entity is a circle sized by
13
+ * records-merged and colored by cross-dataset-link status (≥2 sources stand out), or
14
+ * categorically by `bucket` when the reconciliation output carries one. Pure: GeoJSON in, HTML
15
+ * string out (the Protomaps layer specs are generated at this point and inlined). The generated
16
+ * page fetches MapLibre, the basemap tiles, and glyphs/sprite over the network when opened (the
17
+ * house infra).
18
+ *
19
+ * SERVE IT OVER LOCALHOST, don't open it as a file. The house tile server (`tiles.sister.software`)
20
+ * CORS-restricts to localhost + the docs domains, so a `file://` page shows the (accurate)
21
+ * markers on a blank basemap. `npx serve` / `python3 -m http.server` in the output directory is
22
+ * enough; the page also surfaces a hint banner when it detects it's running from `file://`.
23
+ *
24
+ * Neutral entity-resolution view: it shows what resolved to what and how confidently (cohesion).
25
+ * Bucket labels render verbatim from the data, never editorialized.
26
+ */
27
+ import { layers, namedFlavor } from "@protomaps/basemaps";
28
+ /** MapLibre GL release the page pins (CDN + SRI). Matches the workspace's `maplibre-gl` major. */
29
+ const MAPLIBRE_VERSION = "5.24.0";
30
+ const MAPLIBRE_JS_SRI = "sha384-5+cfbwT0iiub6VsQAdn6yz16nr6sDiQoHx6tm4O8OVYXHYOxcffFmCJBL0dgdvGp";
31
+ const MAPLIBRE_CSS_SRI = "sha384-uTttxo/aOKbdE5RlD/SPzSDoDmNvGlUYPjONi2MN/b7c9HPSvW07OIuyP7uL6jxK";
32
+ /**
33
+ * The house Protomaps basemap: `basemap-v4` PMTiles (tile-worker → R2 at `tiles.sister.software`,
34
+ * which sends CORS for localhost + the docs domains).
35
+ *
36
+ * Glyphs + sprite come from the UPSTREAM Protomaps assets (GitHub Pages, `ACAO: *`), not the house
37
+ * mirror at `public.sister.software` — that bucket sends no CORS headers, so the mirror can't be
38
+ * fetched cross-origin (`cartographer/base/composition.ts` flags the same: "Currently upstream
39
+ * URLs; we mirror these … but no public route fronts that bucket yet"). The upstream assets target
40
+ * the v4 schema, matching the `basemap-v4` tiles. Swap to the house mirror once it has a
41
+ * CORS-enabled route.
42
+ */
43
+ const BASEMAP_SOURCE_ID = "basemap-v4";
44
+ const BASEMAP_TILEJSON_URL = "https://tiles.sister.software/basemap-v4.json";
45
+ const GLYPHS_URL = "https://protomaps.github.io/basemaps-assets/fonts/{fontstack}/{range}.pbf";
46
+ const SPRITE_URL = "https://protomaps.github.io/basemaps-assets/sprites/v4/light";
47
+ /** Categorical palette (reused for buckets; cycles if there are more buckets than entries). */
48
+ const PALETTE = ["#2f9e44", "#f08c00", "#1971c2", "#e8590c", "#9c36b5", "#0c8599", "#e03131", "#5c940d"];
49
+ const SINGLE_COLOR = "#3388ff"; // single-source entity
50
+ const CROSS_COLOR = "#e8590c"; // cross-dataset link (≥2 sources)
51
+ /**
52
+ * Escape a value for safe inlining inside a `<script>` as JSON. `JSON.stringify` alone isn't enough
53
+ * — a record value containing `</script>` would close the block early; escaping `<`/`>`/`&` to
54
+ * `\uXXXX` keeps the JSON valid and makes a breakout impossible.
55
+ */
56
+ function safeJsonForScript(value) {
57
+ return JSON.stringify(value).replace(/</g, "\\u003c").replace(/>/g, "\\u003e").replace(/&/g, "\\u0026");
58
+ }
59
+ /** Escape text for the HTML document body (title/heading), not the inlined script. */
60
+ function escapeHtml(text) {
61
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
62
+ }
63
+ function sourceCount(props) {
64
+ return Array.isArray(props["sources"]) ? props["sources"].length : 0;
65
+ }
66
+ /**
67
+ * Render `geojson` (a {@link toGeoJSON} / reconciliation FeatureCollection) as a complete,
68
+ * standalone HTML document. Entities without a coordinate are already absent from those
69
+ * collections; an empty collection renders a friendly empty state rather than a broken map.
70
+ */
71
+ export function toMapHTML(geojson, options = {}) {
72
+ const title = options.title ?? "Mailwoman — resolved entities";
73
+ const flavorName = options.flavor ?? "light";
74
+ const colorBy = options.colorBy ?? "auto";
75
+ const hasBuckets = geojson.features.some((f) => f.properties?.["bucket"] != null);
76
+ const mode = colorBy === "auto" ? (hasBuckets ? "bucket" : "sources") : colorBy;
77
+ // Assign a color to each distinct bucket value, in first-seen order.
78
+ const bucketColors = {};
79
+ if (mode === "bucket") {
80
+ let i = 0;
81
+ for (const f of geojson.features) {
82
+ const b = f.properties?.["bucket"] != null ? String(f.properties["bucket"]) : "—";
83
+ if (!(b in bucketColors))
84
+ bucketColors[b] = PALETTE[i++ % PALETTE.length];
85
+ }
86
+ }
87
+ const colorFor = (props) => {
88
+ if (mode === "bucket") {
89
+ const b = props["bucket"] != null ? String(props["bucket"]) : "—";
90
+ return bucketColors[b] ?? SINGLE_COLOR;
91
+ }
92
+ return sourceCount(props) >= 2 ? CROSS_COLOR : SINGLE_COLOR;
93
+ };
94
+ // Precompute the per-feature color (`_color`) so the circle layer is a simple `["get","_color"]`,
95
+ // and the bounding box, both at generate time. The data is copied (not mutated in place).
96
+ let minLng = Infinity;
97
+ let minLat = Infinity;
98
+ let maxLng = -Infinity;
99
+ let maxLat = -Infinity;
100
+ const features = geojson.features.map((f) => {
101
+ const [lng, lat] = f.geometry.coordinates;
102
+ if (lng < minLng)
103
+ minLng = lng;
104
+ if (lat < minLat)
105
+ minLat = lat;
106
+ if (lng > maxLng)
107
+ maxLng = lng;
108
+ if (lat > maxLat)
109
+ maxLat = lat;
110
+ return { ...f, properties: { ...f.properties, _color: colorFor(f.properties) } };
111
+ });
112
+ const bbox = features.length ? [[minLng, minLat], [maxLng, maxLat]] : null;
113
+ // The full MapLibre style: the Protomaps basemap layers (generated here) over the house basemap-v4
114
+ // vector source, plus our inlined entities source + a circle layer keyed off the precomputed color.
115
+ const style = {
116
+ version: 8,
117
+ glyphs: GLYPHS_URL,
118
+ sprite: SPRITE_URL,
119
+ sources: {
120
+ [BASEMAP_SOURCE_ID]: { type: "vector", url: BASEMAP_TILEJSON_URL },
121
+ entities: { type: "geojson", data: { type: "FeatureCollection", features } },
122
+ },
123
+ layers: [
124
+ ...layers(BASEMAP_SOURCE_ID, namedFlavor(flavorName), { lang: "en" }),
125
+ {
126
+ id: "mw-entities",
127
+ type: "circle",
128
+ source: "entities",
129
+ paint: {
130
+ "circle-radius": [
131
+ "interpolate",
132
+ ["linear"],
133
+ ["coalesce", ["get", "recordCount"], 1],
134
+ 1,
135
+ 5,
136
+ 5,
137
+ 9,
138
+ 25,
139
+ 15,
140
+ 100,
141
+ 22,
142
+ ],
143
+ "circle-color": ["get", "_color"],
144
+ "circle-stroke-color": "#ffffff",
145
+ "circle-stroke-width": 1.2,
146
+ "circle-opacity": 0.9,
147
+ },
148
+ },
149
+ ],
150
+ };
151
+ // Legend rows, built here so arbitrary bucket sets render without client-side guessing.
152
+ const legendRows = mode === "bucket"
153
+ ? Object.entries(bucketColors)
154
+ .map(([b, c]) => `<div><i style="background:${c}"></i>${escapeHtml(b)}</div>`)
155
+ .join("")
156
+ : `<div><i style="background:${CROSS_COLOR}"></i>cross-dataset link (&ge;2 sources)</div>` +
157
+ `<div><i style="background:${SINGLE_COLOR}"></i>single-source entity</div>` +
158
+ `<div class="muted" style="margin-top:4px">marker size = records merged</div>`;
159
+ const crossLinks = geojson.features.filter((f) => sourceCount(f.properties) >= 2).length;
160
+ const summary = `${geojson.features.length} entities` + (mode === "sources" ? ` &middot; ${crossLinks} cross-dataset links` : "");
161
+ // The client script avoids template literals and `${` so it survives this outer template verbatim.
162
+ return `<!doctype html>
163
+ <html lang="en">
164
+ <head>
165
+ <meta charset="utf-8" />
166
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
167
+ <title>${escapeHtml(title)}</title>
168
+ <link
169
+ rel="stylesheet"
170
+ href="https://unpkg.com/maplibre-gl@${MAPLIBRE_VERSION}/dist/maplibre-gl.css"
171
+ integrity="${MAPLIBRE_CSS_SRI}"
172
+ crossorigin="anonymous" />
173
+ <style>
174
+ html, body { margin: 0; height: 100%; font-family: system-ui, -apple-system, "Segoe UI", sans-serif; }
175
+ #map { position: absolute; inset: 0; }
176
+ .mw-panel {
177
+ position: absolute; z-index: 1; background: rgba(255,255,255,0.94); padding: 10px 12px;
178
+ border-radius: 8px; box-shadow: 0 1px 6px rgba(0,0,0,0.3); font-size: 12px; line-height: 1.5; color: #1a1a1a;
179
+ }
180
+ .mw-title { top: 10px; left: 10px; max-width: 60%; }
181
+ .mw-title h1 { font-size: 14px; margin: 0 0 4px; }
182
+ .mw-legend { bottom: 22px; right: 10px; }
183
+ .mw-legend i { display: inline-block; width: 12px; height: 12px; margin-right: 6px; border-radius: 50%; vertical-align: -1px; }
184
+ .muted { color: #666; }
185
+ .mw-popup { font-size: 12px; line-height: 1.5; max-width: 260px; }
186
+ .mw-popup .nm { font-weight: 600; font-size: 13px; }
187
+ .mw-popup dt { color: #666; display: inline; }
188
+ .mw-popup .link { color: ${CROSS_COLOR}; font-weight: 600; }
189
+ .mw-empty { position: absolute; inset: 0; display: grid; place-items: center; z-index: 1; }
190
+ .mw-warn { bottom: 10px; left: 10px; max-width: 52%; background: rgba(255,243,205,0.97); }
191
+ .mw-warn code { background: rgba(0,0,0,0.06); padding: 0 3px; border-radius: 3px; }
192
+ </style>
193
+ </head>
194
+ <body>
195
+ <div id="map"></div>
196
+ <div class="mw-panel mw-title"><h1>${escapeHtml(title)}</h1><div class="muted">${summary}</div></div>
197
+ ${features.length ? `<div class="mw-panel mw-legend">${legendRows}</div>` : `<div class="mw-empty"><div class="mw-panel"><h1>${escapeHtml(title)}</h1><div class="muted">No geocoded entities to display.</div></div></div>`}
198
+ <script
199
+ src="https://unpkg.com/maplibre-gl@${MAPLIBRE_VERSION}/dist/maplibre-gl.js"
200
+ integrity="${MAPLIBRE_JS_SRI}"
201
+ crossorigin="anonymous"></script>
202
+ <script>
203
+ "use strict";
204
+ var STYLE = ${safeJsonForScript(style)};
205
+ var BBOX = ${safeJsonForScript(bbox)};
206
+
207
+ function esc(v) {
208
+ if (v === null || v === undefined) return "";
209
+ return String(v).replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
210
+ }
211
+
212
+ function popupHtml(p) {
213
+ var rows = [];
214
+ var heading = p.name || p.organization || p.entityId || "entity";
215
+ rows.push('<div class="nm">' + esc(heading) + '</div>');
216
+ if (p.organization && p.organization !== p.name) rows.push('<div>' + esc(p.organization) + '</div>');
217
+ if (p.address) rows.push('<div class="muted">' + esc(p.address) + '</div>');
218
+ rows.push('<hr style="border:none;border-top:1px solid #eee;margin:6px 0" />');
219
+ rows.push('<div><dt>records merged:</dt> ' + esc(p.recordCount) + '</div>');
220
+ var srcs = Array.isArray(p.sources) ? p.sources : [];
221
+ if (srcs.length) {
222
+ var cls = srcs.length >= 2 ? ' class="link"' : '';
223
+ rows.push('<div><dt>sources:</dt> <span' + cls + '>' + esc(srcs.join(", ")) + '</span>'
224
+ + (srcs.length >= 2 ? ' (cross-dataset link)' : '') + '</div>');
225
+ }
226
+ if (p.bucket !== null && p.bucket !== undefined) rows.push('<div><dt>bucket:</dt> ' + esc(p.bucket) + '</div>');
227
+ if (p.cohesion !== null && p.cohesion !== undefined) rows.push('<div><dt>cohesion:</dt> ' + esc(p.cohesion) + ' bits</div>');
228
+ if (p.geocodeTier) rows.push('<div><dt>geocode tier:</dt> ' + esc(p.geocodeTier) + '</div>');
229
+ return '<div class="mw-popup">' + rows.join("") + '</div>';
230
+ }
231
+
232
+ var map = new maplibregl.Map({ container: "map", style: STYLE, center: [-98, 39], zoom: 3, attributionControl: { compact: true } });
233
+ map.addControl(new maplibregl.NavigationControl({ showCompass: false }), "top-right");
234
+
235
+ map.on("load", function () {
236
+ if (BBOX) map.fitBounds(BBOX, { padding: 56, maxZoom: 15, duration: 0 });
237
+ });
238
+ map.on("click", "mw-entities", function (e) {
239
+ var f = e.features && e.features[0];
240
+ if (!f) return;
241
+ new maplibregl.Popup({ closeButton: true }).setLngLat(e.lngLat).setHTML(popupHtml(f.properties || {})).addTo(map);
242
+ });
243
+ map.on("mouseenter", "mw-entities", function () { map.getCanvas().style.cursor = "pointer"; });
244
+ map.on("mouseleave", "mw-entities", function () { map.getCanvas().style.cursor = ""; });
245
+
246
+ // The house basemap tiles are CORS-restricted to localhost + the docs domain, so a page opened
247
+ // straight off disk (file://) shows the markers on a blank basemap. Make that explicit rather than
248
+ // silent — the entity positions are correct regardless of whether the basemap paints.
249
+ if (location.protocol === "file:") {
250
+ var warn = document.createElement("div");
251
+ warn.className = "mw-panel mw-warn";
252
+ warn.innerHTML = "⚠ Basemap tiles are blocked from <code>file://</code>. Serve this over "
253
+ + "<code>http://localhost</code> (e.g. <code>npx serve</code> or <code>python3 -m http.server</code>) "
254
+ + "to see the map background — the markers are accurate either way.";
255
+ document.body.appendChild(warn);
256
+ }
257
+ </script>
258
+ </body>
259
+ </html>
260
+ `;
261
+ }
262
+ //# sourceMappingURL=map-html.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"map-html.js","sourceRoot":"","sources":["../map-html.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAGzD,kGAAkG;AAClG,MAAM,gBAAgB,GAAG,QAAQ,CAAA;AACjC,MAAM,eAAe,GAAG,yEAAyE,CAAA;AACjG,MAAM,gBAAgB,GAAG,yEAAyE,CAAA;AAElG;;;;;;;;;;GAUG;AACH,MAAM,iBAAiB,GAAG,YAAY,CAAA;AACtC,MAAM,oBAAoB,GAAG,+CAA+C,CAAA;AAC5E,MAAM,UAAU,GAAG,2EAA2E,CAAA;AAC9F,MAAM,UAAU,GAAG,8DAA8D,CAAA;AAqBjF,+FAA+F;AAC/F,MAAM,OAAO,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAA;AACxG,MAAM,YAAY,GAAG,SAAS,CAAA,CAAC,uBAAuB;AACtD,MAAM,WAAW,GAAG,SAAS,CAAA,CAAC,kCAAkC;AAEhE;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAc;IACxC,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;AACxG,CAAC;AAED,sFAAsF;AACtF,SAAS,UAAU,CAAC,IAAY;IAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACvG,CAAC;AAED,SAAS,WAAW,CAAC,KAA8B;IAClD,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;AACrE,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,SAAS,CAAC,OAAiC,EAAE,UAA0B,EAAE;IACxF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,+BAA+B,CAAA;IAC9D,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAA;IAC5C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,MAAM,CAAA;IAEzC,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAA;IACjF,MAAM,IAAI,GAAG,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAA;IAE/E,qEAAqE;IACrE,MAAM,YAAY,GAA2B,EAAE,CAAA;IAC/C,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACvB,IAAI,CAAC,GAAG,CAAC,CAAA;QACT,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YAClC,MAAM,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;YACjF,IAAI,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC;gBAAE,YAAY,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,MAAM,CAAE,CAAA;QAC3E,CAAC;IACF,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,KAA8B,EAAU,EAAE;QAC3D,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACvB,MAAM,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;YACjE,OAAO,YAAY,CAAC,CAAC,CAAC,IAAI,YAAY,CAAA;QACvC,CAAC;QACD,OAAO,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,YAAY,CAAA;IAC5D,CAAC,CAAA;IAED,kGAAkG;IAClG,0FAA0F;IAC1F,IAAI,MAAM,GAAG,QAAQ,CAAA;IACrB,IAAI,MAAM,GAAG,QAAQ,CAAA;IACrB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAA;IACtB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAA;IACtB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC3C,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAA;QACzC,IAAI,GAAG,GAAG,MAAM;YAAE,MAAM,GAAG,GAAG,CAAA;QAC9B,IAAI,GAAG,GAAG,MAAM;YAAE,MAAM,GAAG,GAAG,CAAA;QAC9B,IAAI,GAAG,GAAG,MAAM;YAAE,MAAM,GAAG,GAAG,CAAA;QAC9B,IAAI,GAAG,GAAG,MAAM;YAAE,MAAM,GAAG,GAAG,CAAA;QAC9B,OAAO,EAAE,GAAG,CAAC,EAAE,UAAU,EAAE,EAAE,GAAG,CAAC,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,EAAE,CAAA;IACjF,CAAC,CAAC,CAAA;IACF,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,MAAM,CAAU,EAAE,CAAC,MAAM,EAAE,MAAM,CAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAE5F,mGAAmG;IACnG,oGAAoG;IACpG,MAAM,KAAK,GAAG;QACb,OAAO,EAAE,CAAC;QACV,MAAM,EAAE,UAAU;QAClB,MAAM,EAAE,UAAU;QAClB,OAAO,EAAE;YACR,CAAC,iBAAiB,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,oBAAoB,EAAE;YAClE,QAAQ,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE,QAAQ,EAAE,EAAE;SAC5E;QACD,MAAM,EAAE;YACP,GAAI,MAAM,CAAC,iBAAiB,EAAE,WAAW,CAAC,UAAU,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAe;YACpF;gBACC,EAAE,EAAE,aAAa;gBACjB,IAAI,EAAE,QAAQ;gBACd,MAAM,EAAE,UAAU;gBAClB,KAAK,EAAE;oBACN,eAAe,EAAE;wBAChB,aAAa;wBACb,CAAC,QAAQ,CAAC;wBACV,CAAC,UAAU,EAAE,CAAC,KAAK,EAAE,aAAa,CAAC,EAAE,CAAC,CAAC;wBACvC,CAAC;wBACD,CAAC;wBACD,CAAC;wBACD,CAAC;wBACD,EAAE;wBACF,EAAE;wBACF,GAAG;wBACH,EAAE;qBACF;oBACD,cAAc,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC;oBACjC,qBAAqB,EAAE,SAAS;oBAChC,qBAAqB,EAAE,GAAG;oBAC1B,gBAAgB,EAAE,GAAG;iBACrB;aACD;SACD;KACD,CAAA;IAED,wFAAwF;IACxF,MAAM,UAAU,GACf,IAAI,KAAK,QAAQ;QAChB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC;aAC3B,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,6BAA6B,CAAC,SAAS,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;aAC7E,IAAI,CAAC,EAAE,CAAC;QACX,CAAC,CAAC,6BAA6B,WAAW,gDAAgD;YACzF,6BAA6B,YAAY,kCAAkC;YAC3E,8EAA8E,CAAA;IAEjF,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAA;IACxF,MAAM,OAAO,GACZ,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,WAAW,GAAG,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,aAAa,UAAU,sBAAsB,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IAElH,mGAAmG;IACnG,OAAO;;;;;SAKC,UAAU,CAAC,KAAK,CAAC;;;uCAGa,gBAAgB;cACzC,gBAAgB;;;;;;;;;;;;;;;;;4BAiBF,WAAW;;;;;;;;qCAQF,UAAU,CAAC,KAAK,CAAC,2BAA2B,OAAO;EACtF,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,mCAAmC,UAAU,QAAQ,CAAC,CAAC,CAAC,mDAAmD,UAAU,CAAC,KAAK,CAAC,4EAA4E;;sCAEtL,gBAAgB;cACxC,eAAe;;;;cAIf,iBAAiB,CAAC,KAAK,CAAC;aACzB,iBAAiB,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuDnC,CAAA;AACD,CAAC"}
@@ -0,0 +1,36 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * GENERATED by scripts/record-matcher/train-gbt.ts — DO NOT edit by hand; retrain to update.
7
+ *
8
+ * The default learned-scorer model (#603): a gradient-boosted-tree dedup scorer trained on the
9
+ * NPPES NPI-truth set (TX, 3000 NPIs → 707975 candidate pairs). Validated to generalize across
10
+ * states by learned-scorer-crossstate-eval.ts. Used by resolveEntities' opt-in learnedScorer hook
11
+ * via createGbtScorer. The trained {@link GBT} is plain data.
12
+ */
13
+ import type { GBT } from "@mailwoman/match";
14
+ /** Provenance for the bundled model — what it was trained on. */
15
+ export declare const DEDUP_GBT_META: {
16
+ readonly version: "1.0.0";
17
+ readonly locale: "en-US";
18
+ readonly trainedOn: "2026-06-15";
19
+ readonly state: "TX";
20
+ readonly npis: 3000;
21
+ readonly records: 8602;
22
+ readonly pairs: 707975;
23
+ readonly posRate: 0.0088;
24
+ readonly hyperparams: {
25
+ readonly rounds: 120;
26
+ readonly depth: 3;
27
+ readonly lr: 0.3;
28
+ readonly minLeaf: 20;
29
+ };
30
+ readonly recommendedThreshold: 2.7143;
31
+ readonly features: 17;
32
+ readonly addressFrequencyDistinct: 3317267;
33
+ readonly addressFrequencyTotal: 9260504;
34
+ };
35
+ export declare const DEDUP_GBT_MODEL: GBT;
36
+ //# sourceMappingURL=dedup-gbt-en-us.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dedup-gbt-en-us.d.ts","sourceRoot":"","sources":["../../models/dedup-gbt-en-us.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,kBAAkB,CAAA;AAE3C,iEAAiE;AACjE,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;;CAmBjB,CAAA;AAGV,eAAO,MAAM,eAAe,EAAE,GAAo0vD,CAAA"}