@mailwoman/address-id 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/index.d.ts +76 -0
- package/out/index.d.ts.map +1 -0
- package/out/index.js +91 -0
- package/out/index.js.map +1 -0
- package/package.json +33 -0
package/out/index.d.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `@mailwoman/address-id` — turn a canonicalized + geocoded address into a STABLE, parseable
|
|
7
|
+
* primary key: `<state>.<H3-cell>.<hash>`. The deterministic, exact-match complement to the fuzzy
|
|
8
|
+
* matcher (`@mailwoman/match`): where the matcher decides whether two messy records are probably
|
|
9
|
+
* the same entity, the address-id is a content-addressed key you can GROUP BY / JOIN ON without
|
|
10
|
+
* running the matcher at all — for the common "same canonical address" case.
|
|
11
|
+
*
|
|
12
|
+
* The three parts (see {@link createPostalAddressID}):
|
|
13
|
+
*
|
|
14
|
+
* - **state** — a coarse region prefix (`tx`, `ca`, …), from a supplied state or plucked from the
|
|
15
|
+
* address's ZIP ({@link @mailwoman/codex}); `xx` when unknown. Makes the key region-sortable.
|
|
16
|
+
* - **H3 cell** — a jitter-stable locality token from the resolved coordinate (`h3-js`'s
|
|
17
|
+
* `latLngToCell` at {@link ADDRESS_H3_RESOLUTION}). Coarse on purpose: two geocodes of the
|
|
18
|
+
* same place a few metres apart land in the same cell.
|
|
19
|
+
* - **hash** — a content hash of the address canonicalized by {@link @mailwoman/normalize} (so `123
|
|
20
|
+
* Main St` and `123 MAIN STREET` hash identically). This is the identity; the cell + state
|
|
21
|
+
* localize and partition it.
|
|
22
|
+
*
|
|
23
|
+
* Lineage: the isp-nexus `createPostalAddressID` / `parsePostalAddressID`. `@mailwoman/normalize`
|
|
24
|
+
* is the descendant of that era's `sanitize`, re-scoped to parser-input prep — this layer is the
|
|
25
|
+
* keying purpose, kept separate by design. (Self-contained on `h3-js`, not `@mailwoman/spatial`,
|
|
26
|
+
* which isn't published.)
|
|
27
|
+
*/
|
|
28
|
+
/** A geographic coordinate (the geocoder/resolver shape). */
|
|
29
|
+
export interface LatLng {
|
|
30
|
+
latitude: number;
|
|
31
|
+
longitude: number;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* H3 resolution for the locality cell — coarse on purpose (~edge 174 m). The same place geocoded a
|
|
35
|
+
* few metres apart (situs vs interpolation, geocode jitter) lands in the same cell, so the key is
|
|
36
|
+
* stable; the address hash carries the precise identity. Self-contained here (not via
|
|
37
|
+
* `@mailwoman/spatial`, which isn't a published package) so this stays cleanly publishable.
|
|
38
|
+
*/
|
|
39
|
+
export declare const ADDRESS_H3_RESOLUTION = 9;
|
|
40
|
+
/**
|
|
41
|
+
* A stable address primary key, `<state>.<H3-cell>.<hash>`. Branded so it can't be confused with an
|
|
42
|
+
* arbitrary string.
|
|
43
|
+
*/
|
|
44
|
+
export type PostalAddressID = string & {
|
|
45
|
+
readonly __postalAddressID: unique symbol;
|
|
46
|
+
};
|
|
47
|
+
/** Inputs for {@link createPostalAddressID}. */
|
|
48
|
+
export interface CreatePostalAddressIDInput {
|
|
49
|
+
/** The resolved coordinate (the geocoder's output) — drives the locality cell. */
|
|
50
|
+
coordinate: LatLng;
|
|
51
|
+
/** The address string to content-hash. Canonicalized via {@link normalize} before hashing. */
|
|
52
|
+
address: string;
|
|
53
|
+
/** 2-letter region/state for the prefix. When omitted, plucked from the address's ZIP; else `xx`. */
|
|
54
|
+
state?: string;
|
|
55
|
+
/** H3 resolution for the cell. Default {@link ADDRESS_H3_RESOLUTION} (jitter-stable). */
|
|
56
|
+
resolution?: number;
|
|
57
|
+
}
|
|
58
|
+
/** The parsed parts of a {@link PostalAddressID}. */
|
|
59
|
+
export interface ParsedPostalAddressID {
|
|
60
|
+
state: string;
|
|
61
|
+
cell: string;
|
|
62
|
+
hash: string;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Build a stable {@link PostalAddressID} from a geocoded, canonicalizable address. Deterministic:
|
|
66
|
+
* the same (coordinate-cell, canonical address, state) always yields the same key. Two records that
|
|
67
|
+
* resolve to the same place and share a canonical address get the SAME id — a join/dedup key that
|
|
68
|
+
* needs no matcher. (Distinct canonical address strings → distinct keys; semantic equivalence that
|
|
69
|
+
* isn't string-identical is the fuzzy matcher's job, not this one's.)
|
|
70
|
+
*/
|
|
71
|
+
export declare function createPostalAddressID(input: CreatePostalAddressIDInput): PostalAddressID;
|
|
72
|
+
/** Parse a {@link PostalAddressID} into its parts, or null if it isn't one. */
|
|
73
|
+
export declare function parsePostalAddressID(id: string): ParsedPostalAddressID | null;
|
|
74
|
+
/** Type guard: is `value` a well-formed {@link PostalAddressID}? */
|
|
75
|
+
export declare function isPostalAddressID(value: string): value is PostalAddressID;
|
|
76
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAOH,6DAA6D;AAC7D,MAAM,WAAW,MAAM;IACtB,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;CACjB;AAED;;;;;GAKG;AACH,eAAO,MAAM,qBAAqB,IAAI,CAAA;AAEtC;;;GAGG;AACH,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG;IAAE,QAAQ,CAAC,iBAAiB,EAAE,OAAO,MAAM,CAAA;CAAE,CAAA;AAWpF,gDAAgD;AAChD,MAAM,WAAW,0BAA0B;IAC1C,kFAAkF;IAClF,UAAU,EAAE,MAAM,CAAA;IAClB,8FAA8F;IAC9F,OAAO,EAAE,MAAM,CAAA;IACf,qGAAqG;IACrG,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yFAAyF;IACzF,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,qDAAqD;AACrD,MAAM,WAAW,qBAAqB;IACrC,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;CACZ;AAwBD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,0BAA0B,GAAG,eAAe,CASxF;AAED,+EAA+E;AAC/E,wBAAgB,oBAAoB,CAAC,EAAE,EAAE,MAAM,GAAG,qBAAqB,GAAG,IAAI,CAI7E;AAED,oEAAoE;AACpE,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,KAAK,IAAI,eAAe,CAEzE"}
|
package/out/index.js
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `@mailwoman/address-id` — turn a canonicalized + geocoded address into a STABLE, parseable
|
|
7
|
+
* primary key: `<state>.<H3-cell>.<hash>`. The deterministic, exact-match complement to the fuzzy
|
|
8
|
+
* matcher (`@mailwoman/match`): where the matcher decides whether two messy records are probably
|
|
9
|
+
* the same entity, the address-id is a content-addressed key you can GROUP BY / JOIN ON without
|
|
10
|
+
* running the matcher at all — for the common "same canonical address" case.
|
|
11
|
+
*
|
|
12
|
+
* The three parts (see {@link createPostalAddressID}):
|
|
13
|
+
*
|
|
14
|
+
* - **state** — a coarse region prefix (`tx`, `ca`, …), from a supplied state or plucked from the
|
|
15
|
+
* address's ZIP ({@link @mailwoman/codex}); `xx` when unknown. Makes the key region-sortable.
|
|
16
|
+
* - **H3 cell** — a jitter-stable locality token from the resolved coordinate (`h3-js`'s
|
|
17
|
+
* `latLngToCell` at {@link ADDRESS_H3_RESOLUTION}). Coarse on purpose: two geocodes of the
|
|
18
|
+
* same place a few metres apart land in the same cell.
|
|
19
|
+
* - **hash** — a content hash of the address canonicalized by {@link @mailwoman/normalize} (so `123
|
|
20
|
+
* Main St` and `123 MAIN STREET` hash identically). This is the identity; the cell + state
|
|
21
|
+
* localize and partition it.
|
|
22
|
+
*
|
|
23
|
+
* Lineage: the isp-nexus `createPostalAddressID` / `parsePostalAddressID`. `@mailwoman/normalize`
|
|
24
|
+
* is the descendant of that era's `sanitize`, re-scoped to parser-input prep — this layer is the
|
|
25
|
+
* keying purpose, kept separate by design. (Self-contained on `h3-js`, not `@mailwoman/spatial`,
|
|
26
|
+
* which isn't published.)
|
|
27
|
+
*/
|
|
28
|
+
import { us } from "@mailwoman/codex";
|
|
29
|
+
import { normalize } from "@mailwoman/normalize";
|
|
30
|
+
import { latLngToCell } from "h3-js";
|
|
31
|
+
import { createHash } from "node:crypto";
|
|
32
|
+
/**
|
|
33
|
+
* H3 resolution for the locality cell — coarse on purpose (~edge 174 m). The same place geocoded a
|
|
34
|
+
* few metres apart (situs vs interpolation, geocode jitter) lands in the same cell, so the key is
|
|
35
|
+
* stable; the address hash carries the precise identity. Self-contained here (not via
|
|
36
|
+
* `@mailwoman/spatial`, which isn't a published package) so this stays cleanly publishable.
|
|
37
|
+
*/
|
|
38
|
+
export const ADDRESS_H3_RESOLUTION = 9;
|
|
39
|
+
/** `<2-letter-state>.<hex-cell>.<hex-hash>` — lowercase, dot-delimited. */
|
|
40
|
+
const POSTAL_ADDRESS_ID_PATTERN = /^([a-z]{2})\.([0-9a-f]{1,15})\.([0-9a-f]{8,})$/;
|
|
41
|
+
/**
|
|
42
|
+
* Hex chars of the address content hash kept in the key (64 bits — collision-safe at billions of
|
|
43
|
+
* keys).
|
|
44
|
+
*/
|
|
45
|
+
const HASH_LENGTH = 16;
|
|
46
|
+
/**
|
|
47
|
+
* Canonicalize an address for content-hashing: {@link normalize} (NFC + whitespace + punctuation +
|
|
48
|
+
* abbreviation expansion) then uppercase, so casing/abbreviation/spacing variants key identically.
|
|
49
|
+
*/
|
|
50
|
+
function canonicalizeForHash(address) {
|
|
51
|
+
return normalize(address).normalized.toUpperCase().trim();
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Best-effort 2-letter US state from a full address: scan for `ST ZIP` occurrences (codex's
|
|
55
|
+
* `pluckStateZIPCode` anchors to a bare snippet, so it can't read a full address) and take the LAST
|
|
56
|
+
* valid one — addresses end with the state + ZIP. Returns the uppercase abbreviation or null.
|
|
57
|
+
*/
|
|
58
|
+
function deriveState(address) {
|
|
59
|
+
const candidates = [...address.matchAll(/\b([A-Za-z]{2})[ ,]+\d{5}(?:-\d{4})?\b/g)];
|
|
60
|
+
for (let i = candidates.length - 1; i >= 0; i--) {
|
|
61
|
+
const abbreviation = candidates[i][1].toUpperCase();
|
|
62
|
+
if (us.isUsStateAbbreviation(abbreviation))
|
|
63
|
+
return abbreviation;
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Build a stable {@link PostalAddressID} from a geocoded, canonicalizable address. Deterministic:
|
|
69
|
+
* the same (coordinate-cell, canonical address, state) always yields the same key. Two records that
|
|
70
|
+
* resolve to the same place and share a canonical address get the SAME id — a join/dedup key that
|
|
71
|
+
* needs no matcher. (Distinct canonical address strings → distinct keys; semantic equivalence that
|
|
72
|
+
* isn't string-identical is the fuzzy matcher's job, not this one's.)
|
|
73
|
+
*/
|
|
74
|
+
export function createPostalAddressID(input) {
|
|
75
|
+
const cell = latLngToCell(input.coordinate.latitude, input.coordinate.longitude, input.resolution ?? ADDRESS_H3_RESOLUTION);
|
|
76
|
+
const hash = createHash("sha256").update(canonicalizeForHash(input.address)).digest("hex").slice(0, HASH_LENGTH);
|
|
77
|
+
const state = (input.state ?? deriveState(input.address) ?? "xx").toLowerCase();
|
|
78
|
+
return `${state}.${cell}.${hash}`;
|
|
79
|
+
}
|
|
80
|
+
/** Parse a {@link PostalAddressID} into its parts, or null if it isn't one. */
|
|
81
|
+
export function parsePostalAddressID(id) {
|
|
82
|
+
const match = POSTAL_ADDRESS_ID_PATTERN.exec(id);
|
|
83
|
+
if (!match)
|
|
84
|
+
return null;
|
|
85
|
+
return { state: match[1], cell: match[2], hash: match[3] };
|
|
86
|
+
}
|
|
87
|
+
/** Type guard: is `value` a well-formed {@link PostalAddressID}? */
|
|
88
|
+
export function isPostalAddressID(value) {
|
|
89
|
+
return POSTAL_ADDRESS_ID_PATTERN.test(value);
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=index.js.map
|
package/out/index.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAA;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAA;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,OAAO,CAAA;AACpC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AAQxC;;;;;GAKG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAA;AAQtC,2EAA2E;AAC3E,MAAM,yBAAyB,GAAG,gDAAgD,CAAA;AAElF;;;GAGG;AACH,MAAM,WAAW,GAAG,EAAE,CAAA;AAqBtB;;;GAGG;AACH,SAAS,mBAAmB,CAAC,OAAe;IAC3C,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAA;AAC1D,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,OAAe;IACnC,MAAM,UAAU,GAAG,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,yCAAyC,CAAC,CAAC,CAAA;IACnF,KAAK,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACjD,MAAM,YAAY,GAAG,UAAU,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,CAAA;QACrD,IAAI,EAAE,CAAC,qBAAqB,CAAC,YAAY,CAAC;YAAE,OAAO,YAAY,CAAA;IAChE,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAiC;IACtE,MAAM,IAAI,GAAG,YAAY,CACxB,KAAK,CAAC,UAAU,CAAC,QAAQ,EACzB,KAAK,CAAC,UAAU,CAAC,SAAS,EAC1B,KAAK,CAAC,UAAU,IAAI,qBAAqB,CACzC,CAAA;IACD,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,mBAAmB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAA;IAChH,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,WAAW,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,WAAW,EAAE,CAAA;IAC/E,OAAO,GAAG,KAAK,IAAI,IAAI,IAAI,IAAI,EAAqB,CAAA;AACrD,CAAC;AAED,+EAA+E;AAC/E,MAAM,UAAU,oBAAoB,CAAC,EAAU;IAC9C,MAAM,KAAK,GAAG,yBAAyB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAChD,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IACvB,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAE,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAE,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAE,EAAE,CAAA;AAC9D,CAAC;AAED,oEAAoE;AACpE,MAAM,UAAU,iBAAiB,CAAC,KAAa;IAC9C,OAAO,yBAAyB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;AAC7C,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mailwoman/address-id",
|
|
3
|
+
"version": "4.9.0",
|
|
4
|
+
"description": "Turn a canonicalized + geocoded address into a stable, parseable primary key (`<state>.<H3-cell>.<hash>`) for deterministic record joins / dedup — the exact-match complement to the fuzzy matcher.",
|
|
5
|
+
"license": "AGPL-3.0-only",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/sister-software/mailwoman.git",
|
|
9
|
+
"directory": "address-id"
|
|
10
|
+
},
|
|
11
|
+
"type": "module",
|
|
12
|
+
"exports": {
|
|
13
|
+
"./package.json": "./package.json",
|
|
14
|
+
".": "./out/index.js"
|
|
15
|
+
},
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"@mailwoman/codex": "workspace:*",
|
|
18
|
+
"@mailwoman/normalize": "workspace:*",
|
|
19
|
+
"h3-js": "^4.4.0"
|
|
20
|
+
},
|
|
21
|
+
"devDependencies": {
|
|
22
|
+
"@types/node": "^25.9.2"
|
|
23
|
+
},
|
|
24
|
+
"files": [
|
|
25
|
+
"out/**/*.js",
|
|
26
|
+
"out/**/*.js.map",
|
|
27
|
+
"out/**/*.d.ts",
|
|
28
|
+
"out/**/*.d.ts.map"
|
|
29
|
+
],
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
32
|
+
}
|
|
33
|
+
}
|