@mailwoman/corpus 4.0.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapters/geonames/adapter.d.ts +35 -0
- package/out/src/adapters/geonames/adapter.d.ts.map +1 -0
- package/out/src/adapters/geonames/adapter.js +161 -0
- package/out/src/adapters/geonames/adapter.js.map +1 -0
- package/out/src/adapters/geonames-postal/adapter.d.ts +30 -0
- package/out/src/adapters/geonames-postal/adapter.d.ts.map +1 -0
- package/out/src/adapters/geonames-postal/adapter.js +96 -0
- package/out/src/adapters/geonames-postal/adapter.js.map +1 -0
- package/out/src/adapters/index.d.ts +3 -0
- package/out/src/adapters/index.d.ts.map +1 -1
- package/out/src/adapters/index.js +9 -0
- package/out/src/adapters/index.js.map +1 -1
- package/out/src/adapters/tiger/adapter.d.ts +0 -3
- package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
- package/out/src/adapters/tiger/adapter.js +0 -3
- package/out/src/adapters/tiger/adapter.js.map +1 -1
- package/out/src/adapters/usgov-irs-bmf/adapter.d.ts +26 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.js +115 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.js.map +1 -0
- package/out/src/codex/us-fips-state.d.ts +0 -5
- package/out/src/codex/us-fips-state.d.ts.map +1 -1
- package/out/src/codex/us-fips-state.js +0 -5
- package/out/src/codex/us-fips-state.js.map +1 -1
- package/out/src/parquet-wrapper/index.d.ts +0 -3
- package/out/src/parquet-wrapper/index.d.ts.map +1 -1
- package/out/src/parquet-wrapper/index.js +0 -3
- package/out/src/parquet-wrapper/index.js.map +1 -1
- package/out/src/parquet-wrapper/reader.d.ts +0 -7
- package/out/src/parquet-wrapper/reader.d.ts.map +1 -1
- package/out/src/parquet-wrapper/reader.js +0 -7
- package/out/src/parquet-wrapper/reader.js.map +1 -1
- package/out/src/parquet-wrapper/writer.d.ts +0 -5
- package/out/src/parquet-wrapper/writer.d.ts.map +1 -1
- package/out/src/parquet-wrapper/writer.js +0 -5
- package/out/src/parquet-wrapper/writer.js.map +1 -1
- package/out/src/parquet.d.ts +2 -3
- package/out/src/parquet.d.ts.map +1 -1
- package/out/src/parquet.js +2 -3
- package/out/src/parquet.js.map +1 -1
- package/out/src/synthesize-german.d.ts +10 -8
- package/out/src/synthesize-german.d.ts.map +1 -1
- package/out/src/synthesize-german.js +10 -9
- package/out/src/synthesize-german.js.map +1 -1
- package/out/src/synthesize-intersection.d.ts +9 -11
- package/out/src/synthesize-intersection.d.ts.map +1 -1
- package/out/src/synthesize-intersection.js +28 -14
- package/out/src/synthesize-intersection.js.map +1 -1
- package/out/src/synthesize-street.d.ts +2 -2
- package/out/src/synthesize.d.ts +19 -0
- package/out/src/synthesize.d.ts.map +1 -1
- package/out/src/synthesize.js +65 -1
- package/out/src/synthesize.js.map +1 -1
- package/package.json +8 -8
- package/out/src/codex/us-street-suffix.d.ts +0 -260
- package/out/src/codex/us-street-suffix.d.ts.map +0 -1
- package/out/src/codex/us-street-suffix.js +0 -286
- package/out/src/codex/us-street-suffix.js.map +0 -1
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `usgov-irs-bmf`: IRS Exempt Organizations Business Master File (EO BMF) CSV consumer.
|
|
7
|
+
*
|
|
8
|
+
* The EO BMF is the IRS's authoritative registry of US tax-exempt organizations (charities,
|
|
9
|
+
* churches, foundations, ...), published as per-region CSVs at
|
|
10
|
+
* `https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf`
|
|
11
|
+
* (`eo1.csv`..`eo4.csv`, `eo_pr.csv`, `eo_xx.csv`). Each row carries an organization NAME plus
|
|
12
|
+
* its mailing address. It complements `usgov-nppes` with a DIFFERENT venue population
|
|
13
|
+
* (non-profits vs healthcare providers) and, notably, a high share of PO-box addresses — useful
|
|
14
|
+
* `po_box`-tag signal (a tag with historically low recall).
|
|
15
|
+
*
|
|
16
|
+
* Output: one row per record with a usable city + postcode. NAME → `venue`; the street line becomes
|
|
17
|
+
* `po_box` when it's a PO-box, else `house_number` + `street`; CITY/STATE/ZIP fill the locality
|
|
18
|
+
* line. STATE is already a USPS abbreviation in the source. License: `"Public Domain"` (US
|
|
19
|
+
* federal).
|
|
20
|
+
*/
|
|
21
|
+
import { parse as csvParse } from "csv-parse";
|
|
22
|
+
import { createReadStream } from "node:fs";
|
|
23
|
+
import { stableSourceId } from "../../adapter.js";
|
|
24
|
+
import { reconcileComponents } from "../../format.js";
|
|
25
|
+
export const USGOV_IRS_BMF_ADAPTER_ID = "usgov-irs-bmf";
|
|
26
|
+
export const USGOV_IRS_BMF_DEFAULT_LICENSE = "Public Domain";
|
|
27
|
+
const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
|
|
28
|
+
// PO box in its many written forms: "PO BOX 12", "P.O. BOX 12", "P O BOX 12", "POB 12", "BOX 12".
|
|
29
|
+
const PO_BOX = /^\s*(?:P\.?\s?O\.?\s*BOX|POB|BOX)\s+\w/i;
|
|
30
|
+
/** Classify the street line into a `po_box` or a `{house_number?, street}` split. */
|
|
31
|
+
function splitStreetLine(street) {
|
|
32
|
+
const trimmed = street.trim();
|
|
33
|
+
if (!trimmed)
|
|
34
|
+
return null;
|
|
35
|
+
if (PO_BOX.test(trimmed))
|
|
36
|
+
return { po_box: trimmed };
|
|
37
|
+
const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
|
|
38
|
+
if (m)
|
|
39
|
+
return { house_number: m[1], street: m[2].trim() };
|
|
40
|
+
return { street: trimmed };
|
|
41
|
+
}
|
|
42
|
+
function composeRaw(venue, streetPart, city, state, postcode) {
|
|
43
|
+
const cityPart = [city.trim(), [state, postcode].filter(Boolean).join(" ").trim()].filter(Boolean).join(", ");
|
|
44
|
+
return [venue, streetPart, cityPart].filter(Boolean).join(", ");
|
|
45
|
+
}
|
|
46
|
+
export function createUsgovIrsBmfAdapter() {
|
|
47
|
+
return {
|
|
48
|
+
id: USGOV_IRS_BMF_ADAPTER_ID,
|
|
49
|
+
defaultLicense: USGOV_IRS_BMF_DEFAULT_LICENSE,
|
|
50
|
+
description: "IRS Exempt Organizations Business Master File — US non-profit venue+address (public-domain), with strong PO-box coverage.",
|
|
51
|
+
async *rows(opts) {
|
|
52
|
+
if (opts.country && opts.country !== "US") {
|
|
53
|
+
throw new Error(`usgov-irs-bmf adapter: only US supported, got country=${opts.country}`);
|
|
54
|
+
}
|
|
55
|
+
const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
|
|
56
|
+
const parser = stream.pipe(csvParse({ columns: true, skip_empty_lines: true, relax_quotes: true, relax_column_count: true, trim: true }));
|
|
57
|
+
let emitted = 0;
|
|
58
|
+
try {
|
|
59
|
+
for await (const record of parser) {
|
|
60
|
+
if (opts.signal?.aborted)
|
|
61
|
+
break;
|
|
62
|
+
if (opts.limit !== undefined && emitted >= opts.limit)
|
|
63
|
+
break;
|
|
64
|
+
const ein = (record.EIN ?? "").trim();
|
|
65
|
+
const venue = (record.NAME ?? "").trim() || undefined;
|
|
66
|
+
const street = (record.STREET ?? "").trim();
|
|
67
|
+
const city = (record.CITY ?? "").trim();
|
|
68
|
+
const state = (record.STATE ?? "").trim();
|
|
69
|
+
const zipRaw = (record.ZIP ?? "").trim();
|
|
70
|
+
if (!city || !zipRaw)
|
|
71
|
+
continue;
|
|
72
|
+
const postcode = zipRaw.split("-")[0].trim(); // 5-digit; drop the optional +4
|
|
73
|
+
const split = splitStreetLine(street);
|
|
74
|
+
if (!split)
|
|
75
|
+
continue;
|
|
76
|
+
const streetPart = "po_box" in split ? split.po_box : [split.house_number, split.street].filter(Boolean).join(" ");
|
|
77
|
+
const components = {
|
|
78
|
+
...(venue ? { venue } : {}),
|
|
79
|
+
...("po_box" in split
|
|
80
|
+
? { po_box: split.po_box }
|
|
81
|
+
: { ...(split.house_number ? { house_number: split.house_number } : {}), street: split.street }),
|
|
82
|
+
locality: city,
|
|
83
|
+
...(state ? { region: state } : {}),
|
|
84
|
+
postcode,
|
|
85
|
+
};
|
|
86
|
+
const raw = composeRaw(venue, streetPart, city, state, postcode);
|
|
87
|
+
if (!raw)
|
|
88
|
+
continue;
|
|
89
|
+
const aligned = reconcileComponents(components, raw);
|
|
90
|
+
if (Object.keys(aligned).length <= 2)
|
|
91
|
+
continue;
|
|
92
|
+
const sourceId = ein
|
|
93
|
+
? `${USGOV_IRS_BMF_ADAPTER_ID}-${ein}`
|
|
94
|
+
: stableSourceId(USGOV_IRS_BMF_ADAPTER_ID, aligned);
|
|
95
|
+
yield {
|
|
96
|
+
raw,
|
|
97
|
+
components: aligned,
|
|
98
|
+
country: "US",
|
|
99
|
+
locale: "en-US",
|
|
100
|
+
source: USGOV_IRS_BMF_ADAPTER_ID,
|
|
101
|
+
source_id: sourceId,
|
|
102
|
+
corpus_version: "",
|
|
103
|
+
license: USGOV_IRS_BMF_DEFAULT_LICENSE,
|
|
104
|
+
};
|
|
105
|
+
emitted++;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
finally {
|
|
109
|
+
stream.destroy();
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
export const usgovIrsBmfAdapter = createUsgovIrsBmfAdapter();
|
|
115
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-irs-bmf/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,wBAAwB,GAAG,eAAe,CAAA;AACvD,MAAM,CAAC,MAAM,6BAA6B,GAAG,eAAe,CAAA;AAE5D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAC9D,kGAAkG;AAClG,MAAM,MAAM,GAAG,yCAAyC,CAAA;AAWxD,qFAAqF;AACrF,SAAS,eAAe,CAAC,MAAc;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IAC7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;IACpD,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,SAAS,UAAU,CAClB,KAAyB,EACzB,UAAkB,EAClB,IAAY,EACZ,KAAa,EACb,QAAgB;IAEhB,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC7G,OAAO,CAAC,KAAK,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAChE,CAAC;AAED,MAAM,UAAU,wBAAwB;IACvC,OAAO;QACN,EAAE,EAAE,wBAAwB;QAC5B,cAAc,EAAE,6BAA6B;QAC7C,WAAW,EACV,2HAA2H;QAE5H,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,yDAAyD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACzF,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAC7G,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAkC,EAAE,CAAC;oBAC/D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAA;oBACrD,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC3C,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACzC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACxC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM;wBAAE,SAAQ;oBAC9B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAA,CAAC,gCAAgC;oBAE9E,MAAM,KAAK,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;oBACrC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,UAAU,GACf,QAAQ,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAEhG,MAAM,UAAU,GAA+B;wBAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC3B,GAAG,CAAC,QAAQ,IAAI,KAAK;4BACpB,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE;4BAC1B,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;wBACjG,QAAQ,EAAE,IAAI;wBACd,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnC,QAAQ;qBACR,CAAA;oBAED,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;oBAChE,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,QAAQ,GAAG,GAAG;wBACnB,CAAC,CAAC,GAAG,wBAAwB,IAAI,GAAG,EAAE;wBACtC,CAAC,CAAC,cAAc,CAAC,wBAAwB,EAAE,OAAO,CAAC,CAAA;oBAEpD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,wBAAwB;wBAChC,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,6BAA6B;qBACtC,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAG,wBAAwB,EAAE,CAAA"}
|
|
@@ -11,11 +11,6 @@
|
|
|
11
11
|
* numeric ID used by every TIGER product). It is reproduced here so the TIGER adapter can resolve
|
|
12
12
|
* `statefp` columns (e.g. `"50"`) into a `region` component (e.g. `"VT"`) without an extra DB
|
|
13
13
|
* join.
|
|
14
|
-
*
|
|
15
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
-
* `tiger/state.ts` (originally copyright Sister Software; both projects are AGPL-3.0). isp-nexus
|
|
17
|
-
* ships these as TypeScript enums + a TypeORM-backed service layer; mailwoman only needs the flat
|
|
18
|
-
* lookup so the file is a plain `Record` plus a small helper.
|
|
19
14
|
*/
|
|
20
15
|
/** Per-state record: two-letter postal abbreviation + full canonical display name. */
|
|
21
16
|
export interface UsStateInfo {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"us-fips-state.d.ts","sourceRoot":"","sources":["../../../src/codex/us-fips-state.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"us-fips-state.d.ts","sourceRoot":"","sources":["../../../src/codex/us-fips-state.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,sFAAsF;AACtF,MAAM,WAAW,WAAW;IAC3B,YAAY,EAAE,MAAM,CAAA;IACpB,IAAI,EAAE,MAAM,CAAA;CACZ;AAED;;;;GAIG;AACH,eAAO,MAAM,aAAa,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,CA0D9D,CAAA;AAEF,uEAAuE;AACvE,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,WAAW,GAAG,IAAI,CAGtF;AAED;;;;GAIG;AACH,eAAO,MAAM,wBAAwB,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,CAE1E,CAAA;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,WAAW,GAAG,IAAI,CAGnG"}
|
|
@@ -11,11 +11,6 @@
|
|
|
11
11
|
* numeric ID used by every TIGER product). It is reproduced here so the TIGER adapter can resolve
|
|
12
12
|
* `statefp` columns (e.g. `"50"`) into a `region` component (e.g. `"VT"`) without an extra DB
|
|
13
13
|
* join.
|
|
14
|
-
*
|
|
15
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
-
* `tiger/state.ts` (originally copyright Sister Software; both projects are AGPL-3.0). isp-nexus
|
|
17
|
-
* ships these as TypeScript enums + a TypeORM-backed service layer; mailwoman only needs the flat
|
|
18
|
-
* lookup so the file is a plain `Record` plus a small helper.
|
|
19
14
|
*/
|
|
20
15
|
/**
|
|
21
16
|
* FIPS state-or-territory code → `{ abbreviation, name }`. Includes all 50 states, DC, and the five
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"us-fips-state.js","sourceRoot":"","sources":["../../../src/codex/us-fips-state.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"us-fips-state.js","sourceRoot":"","sources":["../../../src/codex/us-fips-state.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAQH;;;;GAIG;AACH,MAAM,CAAC,MAAM,aAAa,GAA0C,MAAM,CAAC,MAAM,CAAC;IACjF,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC5C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE;IAChD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;IACjD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,sBAAsB,EAAE;IAC1D,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC5C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;IAC3C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;IAC1C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC5C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE;IAC/C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;IAC3C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,eAAe,EAAE;IACnD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE;IAC/C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;IACjD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC5C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,eAAe,EAAE;IACnD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE;IAChD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE;IAChD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,gBAAgB,EAAE;IACpD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE;IAClD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;IAC1C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC5C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE;IAClD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE;IAClD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,gBAAgB,EAAE;IACpD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE;IAClD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE;IAC/C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;IAC3C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;IAC1C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;IAC9C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE;IAChD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,eAAe,EAAE;IACnD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE;IAC/C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;IAC7C,cAAc;IACd,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,gBAAgB,EAAE;IACpD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;IAC1C,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,0BAA0B,EAAE;IAC9D,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;IACjD,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,gBAAgB,EAAE;CACpD,CAAC,CAAA;AAEF,uEAAuE;AACvE,MAAM,UAAU,eAAe,CAAC,OAAkC;IACjE,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,OAAO,aAAa,CAAC,OAAO,CAAC,IAAI,IAAI,CAAA;AACtC,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAA0C,MAAM,CAAC,MAAM,CAC3F,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC,CAAC,CACzF,CAAA;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CAAC,YAAuC;IAC9E,IAAI,CAAC,YAAY;QAAE,OAAO,IAAI,CAAA;IAC9B,OAAO,wBAAwB,CAAC,YAAY,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAA;AACpE,CAAC"}
|
|
@@ -2,9 +2,6 @@
|
|
|
2
2
|
* @copyright Sister Software
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
|
-
*
|
|
6
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
7
|
-
* `sdk/parquet/` (originally copyright Sister Software; both projects are AGPL-3.0).
|
|
8
5
|
*/
|
|
9
6
|
export * from "./reader.js";
|
|
10
7
|
export * from "./schema.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA"}
|
|
@@ -2,9 +2,6 @@
|
|
|
2
2
|
* @copyright Sister Software
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
|
-
*
|
|
6
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
7
|
-
* `sdk/parquet/` (originally copyright Sister Software; both projects are AGPL-3.0).
|
|
8
5
|
*/
|
|
9
6
|
export * from "./reader.js";
|
|
10
7
|
export * from "./schema.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA"}
|
|
@@ -6,13 +6,6 @@
|
|
|
6
6
|
* Typed wrapper around `@dsnp/parquetjs`'s `ParquetReader` that narrows the row-iterator generic to
|
|
7
7
|
* a user-supplied record type and adds `AsyncDisposable` support so `await using` cleans up the
|
|
8
8
|
* envelope reader without an explicit `close()`.
|
|
9
|
-
*
|
|
10
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
11
|
-
* `sdk/parquet/reader.ts` (originally copyright Sister Software; both projects are AGPL-3.0). Two
|
|
12
|
-
* trims relative to the original: (a) removed the
|
|
13
|
-
* `@mailwoman/core/polyfills/promises/withResolvers` import — Node 22 has it native; (b) replaced
|
|
14
|
-
* the `PathBuilderLike` (path-ts) type on `openFile` with the plain `string | URL` the
|
|
15
|
-
* `@dsnp/parquetjs` envelope reader accepts directly.
|
|
16
9
|
*/
|
|
17
10
|
import { ParquetReader as BaseParquetReader } from "@dsnp/parquetjs";
|
|
18
11
|
import type { BufferReaderOptions } from "@dsnp/parquetjs/dist/lib/bufferReader.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reader.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/reader.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"reader.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACpE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0CAA0C,CAAA;AACnF,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAA;AAC1E,OAAO,EAAE,KAAK,iBAAiB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAEnE,gEAAgE;AAChE,qBAAa,aAAa,CAAC,CAAC,SAAS,iBAAiB,CAAE,SAAQ,iBAAkB,YAAW,eAAe;IACnG,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAA;WAEV,QAAQ,CAAC,CAAC,SAAS,iBAAiB,EACzD,QAAQ,EAAE,MAAM,GAAG,GAAG,EACtB,OAAO,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;WAMN,UAAU,CAAC,CAAC,SAAS,iBAAiB,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB;WAMrF,kBAAkB,CAAC,CAAC,SAAS,iBAAiB,EACnE,cAAc,EAAE,qBAAqB,EACrC,IAAI,CAAC,EAAE,mBAAmB;IAkBX,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,cAAc,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC;IAI7D,CAAC,MAAM,CAAC,YAAY,CAAC;IAIrB,OAAO;CAGpB"}
|
|
@@ -6,13 +6,6 @@
|
|
|
6
6
|
* Typed wrapper around `@dsnp/parquetjs`'s `ParquetReader` that narrows the row-iterator generic to
|
|
7
7
|
* a user-supplied record type and adds `AsyncDisposable` support so `await using` cleans up the
|
|
8
8
|
* envelope reader without an explicit `close()`.
|
|
9
|
-
*
|
|
10
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
11
|
-
* `sdk/parquet/reader.ts` (originally copyright Sister Software; both projects are AGPL-3.0). Two
|
|
12
|
-
* trims relative to the original: (a) removed the
|
|
13
|
-
* `@mailwoman/core/polyfills/promises/withResolvers` import — Node 22 has it native; (b) replaced
|
|
14
|
-
* the `PathBuilderLike` (path-ts) type on `openFile` with the plain `string | URL` the
|
|
15
|
-
* `@dsnp/parquetjs` envelope reader accepts directly.
|
|
16
9
|
*/
|
|
17
10
|
import { ParquetReader as BaseParquetReader } from "@dsnp/parquetjs";
|
|
18
11
|
import { ParquetEnvelopeReader } from "@dsnp/parquetjs/dist/lib/reader.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reader.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/reader.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"reader.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAA;AAC1E,OAAO,EAA0B,aAAa,EAAE,MAAM,aAAa,CAAA;AAEnE,gEAAgE;AAChE,MAAM,OAAO,aAA2C,SAAQ,iBAAiB;IAGhF,MAAM,CAAU,KAAK,CAAC,QAAQ,CAC7B,QAAsB,EACtB,OAA6B;QAE7B,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAA;QAEzF,OAAO,aAAa,CAAC,kBAAkB,CAAI,cAAc,EAAE,OAAO,CAAC,CAAA;IACpE,CAAC;IAED,MAAM,CAAU,KAAK,CAAC,UAAU,CAA8B,MAAc,EAAE,OAA6B;QAC1G,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QAE9E,OAAO,IAAI,CAAC,kBAAkB,CAAI,cAAc,EAAE,OAAO,CAAC,CAAA;IAC3D,CAAC;IAED,MAAM,CAAU,KAAK,CAAC,kBAAkB,CACvC,cAAqC,EACrC,IAA0B;QAE1B,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;YACpB,OAAO,IAAI,aAAa,CAAI,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,CAAC,CAAA;QACjE,CAAC;QAED,IAAI,CAAC;YACJ,MAAM,cAAc,CAAC,UAAU,EAAE,CAAA;YAEjC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,UAAU,EAAE,CAAA;YAElD,OAAO,IAAI,aAAa,CAAI,QAAQ,EAAE,cAAc,EAAE,IAAI,CAAC,CAAA;QAC5D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,MAAM,cAAc,CAAC,KAAK,EAAE,CAAA;YAC5B,MAAM,GAAG,CAAA;QACV,CAAC;IACF,CAAC;IAEe,CAAC,MAAM,CAAC,aAAa,CAAC;QACrC,OAAO,KAAK,CAAC,MAAM,CAAC,aAAa,CAAC,EAAsC,CAAA;IACzE,CAAC;IAEM,KAAK,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;QACjC,OAAO,IAAI,CAAC,KAAK,EAAE,CAAA;IACpB,CAAC;IAEM,KAAK,CAAC,OAAO;QACnB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAA;IACnC,CAAC;CACD"}
|
|
@@ -11,11 +11,6 @@
|
|
|
11
11
|
* Implements `AsyncDisposable` so `await using writer = await ParquetWriter.openFile(...)` flushes
|
|
12
12
|
* and closes cleanly. `close()` internally serializes against any in-flight flush so back-to-back
|
|
13
13
|
* dispose calls don't race.
|
|
14
|
-
*
|
|
15
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
-
* `sdk/parquet/writer.ts` (originally copyright Sister Software; both projects are AGPL-3.0). One
|
|
17
|
-
* trim relative to the original: dropped the `@mailwoman/core/polyfills/promises/withResolvers`
|
|
18
|
-
* import — Node 22 (mailwoman's runtime) has `Promise.withResolvers` natively.
|
|
19
14
|
*/
|
|
20
15
|
import { ParquetWriter as BaseParquetWriter } from "@dsnp/parquetjs";
|
|
21
16
|
import type { WriterOptions } from "@dsnp/parquetjs/dist/lib/declare.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/writer.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACpE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qCAAqC,CAAA;AACxE,OAAO,EAAU,KAAK,kBAAkB,EAAE,MAAM,kCAAkC,CAAA;AAIlF,OAAO,EACN,KAAK,iBAAiB,EACtB,aAAa,EACb,KAAK,uBAAuB,EAC5B,4BAA4B,EAC5B,MAAM,aAAa,CAAA;AAEpB,gEAAgE;AAChE,qBAAa,aAAa,CAAC,CAAC,SAAS,iBAAiB,CAAE,SAAQ,iBAAkB,YAAW,eAAe;;IACnG,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAA;IAChC,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,qBAAqB,+BAAqC;WAG9D,UAAU,CAAC,CAAC,SAAS,iBAAiB,EAC3D,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,GAAG,uBAAuB,CAAC,CAAC,CAAC,EACzD,YAAY,EAAE,kBAAkB,EAChC,IAAI,GAAE,aAAkB,GACtB,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAW5B,oGAAoG;WAC9E,QAAQ,CAAC,CAAC,SAAS,iBAAiB,EACzD,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,GAAG,uBAAuB,CAAC,CAAC,CAAC,EACzD,UAAU,EAAE,MAAM,GAAG,MAAM,GAAG,GAAG,EACjC,IAAI,CAAC,EAAE,aAAa,GAClB,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAU5B,mDAAmD;IACnC,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAI7D,2FAA2F;IACrE,SAAS,CAAC,GAAG,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAItD,8EAA8E;IACxD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAa/B,CAAC,MAAM,CAAC,YAAY,CAAC;IAIrB,OAAO;CAGpB"}
|
|
@@ -11,11 +11,6 @@
|
|
|
11
11
|
* Implements `AsyncDisposable` so `await using writer = await ParquetWriter.openFile(...)` flushes
|
|
12
12
|
* and closes cleanly. `close()` internally serializes against any in-flight flush so back-to-back
|
|
13
13
|
* dispose calls don't race.
|
|
14
|
-
*
|
|
15
|
-
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
-
* `sdk/parquet/writer.ts` (originally copyright Sister Software; both projects are AGPL-3.0). One
|
|
17
|
-
* trim relative to the original: dropped the `@mailwoman/core/polyfills/promises/withResolvers`
|
|
18
|
-
* import — Node 22 (mailwoman's runtime) has `Promise.withResolvers` natively.
|
|
19
14
|
*/
|
|
20
15
|
import { ParquetWriter as BaseParquetWriter } from "@dsnp/parquetjs";
|
|
21
16
|
import { osopen } from "@dsnp/parquetjs/dist/lib/util.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"writer.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/writer.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"writer.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,MAAM,EAA2B,MAAM,kCAAkC,CAAA;AAClF,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAA;AAC1E,OAAO,KAAK,EAAE,MAAM,kBAAkB,CAAA;AACtC,OAAO,KAAK,IAAI,MAAM,WAAW,CAAA;AACjC,OAAO,EAEN,aAAa,EAEb,4BAA4B,GAC5B,MAAM,aAAa,CAAA;AAEpB,gEAAgE;AAChE,MAAM,OAAO,aAA2C,SAAQ,iBAAiB;IAEtE,MAAM,CAAU,qBAAqB,GAAG,IAAI,4BAA4B,EAAE,CAAA;IACpF,SAAS,GAAkB,OAAO,CAAC,OAAO,EAAE,CAAA;IAE5C,MAAM,CAAU,KAAK,CAAC,UAAU,CAC/B,UAAyD,EACzD,YAAgC,EAChC,OAAsB,EAAE;QAExB,MAAM,MAAM,GACX,UAAU,YAAY,aAAa;YAClC,CAAC,CAAC,UAAU;YACZ,CAAC,CAAC,aAAa,CAAC,qBAAqB,CAAC,kBAAkB,CAAC,UAAU,CAAC,CAAA;QAEtE,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,UAAU,CAAC,MAAM,EAAE,YAAY,EAAE,IAAI,CAAC,CAAA;QAEzF,OAAO,IAAI,aAAa,CAAI,MAAM,EAAE,cAAc,EAAE,IAAI,CAAC,CAAA;IAC1D,CAAC;IAED,oGAAoG;IACpG,MAAM,CAAU,KAAK,CAAC,QAAQ,CAC7B,UAAyD,EACzD,UAAiC,EACjC,IAAoB;QAEpB,IAAI,OAAO,UAAU,KAAK,QAAQ,EAAE,CAAC;YACpC,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QAC9D,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,CAAA;QACnD,OAAO,aAAa,CAAC,UAAU,CAAI,UAAU,EAAE,YAAY,EAAE,IAAI,CAAC,CAAA;IACnE,CAAC;IAED,6DAA6D;IAC7D,mDAAmD;IACnC,WAAW,CAAC,GAAW,EAAE,KAAa;QACrD,OAAO,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;IACrC,CAAC;IAED,2FAA2F;IAC3E,KAAK,CAAC,SAAS,CAAC,GAAM;QACrC,OAAO,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAA;IAC5B,CAAC;IAED,8EAA8E;IAC9D,KAAK,CAAC,KAAK;QAC1B,MAAM,IAAI,CAAC,SAAS,CAAA;QACpB,IAAI,IAAI,CAAC,MAAM;YAAE,OAAM;QAEvB,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,aAAa,EAAQ,CAAA;QAElE,KAAK,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;QAEnC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAA;QAExB,OAAO,IAAI,CAAC,SAAS,CAAA;IACtB,CAAC;IAEM,KAAK,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;QACjC,OAAO,IAAI,CAAC,KAAK,EAAE,CAAA;IACpB,CAAC;IAEM,KAAK,CAAC,OAAO;QACnB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAA;IACnC,CAAC"}
|
package/out/src/parquet.d.ts
CHANGED
|
@@ -7,9 +7,8 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Phase 1 (#9) shipped JSONL shards + a Python (PyArrow) converter as the path to binary Parquet —
|
|
9
9
|
* bridging until the JS toolchain caught up. Phase 1.5 (#18 §4) replaced that with a native JS
|
|
10
|
-
* writer
|
|
11
|
-
*
|
|
12
|
-
* only remaining Python is the one-shot `train_tokenizer.py` SentencePiece step.
|
|
10
|
+
* writer. The build pipeline no longer touches Python at all in its hot path; the only remaining
|
|
11
|
+
* Python is the one-shot `train_tokenizer.py` SentencePiece step.
|
|
13
12
|
*
|
|
14
13
|
* Compression: `SNAPPY`. The plan in #18 §4 specified `zstd`, but `@dsnp/parquetjs` 1.7.0 only
|
|
15
14
|
* supports UNCOMPRESSED / GZIP / SNAPPY / BROTLI (see `node_modules/@dsnp/parquetjs/dist/lib/
|
package/out/src/parquet.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAMH,OAAO,EAAiB,KAAK,uBAAuB,EAAE,MAAM,4BAA4B,CAAA;AACxF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE5C,sFAAsF;AACtF,eAAO,MAAM,cAAc,QAAS,CAAA;AAEpC,mFAAmF;AACnF,eAAO,MAAM,iBAAiB,EAAG,QAAiB,CAAA;AAElD;;;;GAIG;AACH,MAAM,WAAW,UAAU;IAC1B,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,OAAO,EAAE,MAAM,CAAA;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;IAC5B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB;AAED,mEAAmE;AACnE,eAAO,MAAM,eAAe,gJAYlB,CAAA;AAEV;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,uBAAuB,CAAC,UAAU,CAYlE,CAAA;AAED,sDAAsD;AACtD,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,SAAS,CAAA;IAChB,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,SAAS,CAAA;IACjB,WAAW,EAAE,OAAO,iBAAiB,CAAA;IACrC,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,eAAe,EAAE,MAAM,CAAA;IACvB,cAAc,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,aAAa;IAC7B,cAAc,EAAE,MAAM,CAAA;IACtB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,cAAc,EAAE,MAAM,CAAA;IACtB,cAAc,EAAE,MAAM,CAAA;IACtB,MAAM,EAAE,eAAe,EAAE,CAAA;IACzB,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACjC,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IAClC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IAEjB,yEAAyE;IACzE,aAAa,EAAE,MAAM,CAAA;IAErB,6EAA6E;IAC7E,YAAY,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,SAAS,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;AAEhF,mDAAmD;AACnD,wBAAgB,YAAY,CAAC,GAAG,EAAE,UAAU,GAAG,UAAU,CAcxD;AAwBD;;;;;;;;GAQG;AACH,wBAAsB,WAAW,CAAC,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,aAAa,CAAC,CAwF1G"}
|
package/out/src/parquet.js
CHANGED
|
@@ -7,9 +7,8 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Phase 1 (#9) shipped JSONL shards + a Python (PyArrow) converter as the path to binary Parquet —
|
|
9
9
|
* bridging until the JS toolchain caught up. Phase 1.5 (#18 §4) replaced that with a native JS
|
|
10
|
-
* writer
|
|
11
|
-
*
|
|
12
|
-
* only remaining Python is the one-shot `train_tokenizer.py` SentencePiece step.
|
|
10
|
+
* writer. The build pipeline no longer touches Python at all in its hot path; the only remaining
|
|
11
|
+
* Python is the one-shot `train_tokenizer.py` SentencePiece step.
|
|
13
12
|
*
|
|
14
13
|
* Compression: `SNAPPY`. The plan in #18 §4 specified `zstd`, but `@dsnp/parquetjs` 1.7.0 only
|
|
15
14
|
* supports UNCOMPRESSED / GZIP / SNAPPY / BROTLI (see `node_modules/@dsnp/parquetjs/dist/lib/
|
package/out/src/parquet.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet.js","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"parquet.js","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACzD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,aAAa,EAAgC,MAAM,4BAA4B,CAAA;AAIxF,sFAAsF;AACtF,MAAM,CAAC,MAAM,cAAc,GAAG,MAAM,CAAA;AAEpC,mFAAmF;AACnF,MAAM,CAAC,MAAM,iBAAiB,GAAG,QAAiB,CAAA;AAsBlD,mEAAmE;AACnE,MAAM,CAAC,MAAM,eAAe,GAAG;IAC9B,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,QAAQ;IACR,WAAW;IACX,gBAAgB;IAChB,SAAS;IACT,cAAc;IACd,eAAe;CACN,CAAA;AAEV;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAwC;IACtE,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACrD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxE,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACzD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;IACxE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxD,SAAS,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAC3D,cAAc,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAChE,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACzD,YAAY,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC9E,aAAa,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;CAC/E,CAAA;AA6CD,mDAAmD;AACnD,MAAM,UAAU,YAAY,CAAC,GAAe;IAC3C,OAAO;QACN,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,IAAI;QAC1B,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,YAAY,EAAE,GAAG,CAAC,KAAK,EAAE,MAAM,IAAI,IAAI;QACvC,aAAa,EAAE,GAAG,CAAC,KAAK,EAAE,cAAc,IAAI,IAAI;KAChD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAe;IACnC,MAAM,GAAG,GAA4B;QACpC,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,OAAO,EAAE,GAAG,CAAC,OAAO;KACpB,CAAA;IACD,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI;QAAE,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAA;IAChD,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI;QAAE,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,YAAY,CAAA;IAClE,IAAI,GAAG,CAAC,aAAa,KAAK,IAAI;QAAE,GAAG,CAAC,aAAa,GAAG,GAAG,CAAC,aAAa,CAAA;IACrE,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAsB,EAAE,IAAwB;IACjF,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,SAAS,CAAA;IACnD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;IACvE,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE3C,MAAM,MAAM,GAAsB,EAAE,CAAA;IACpC,MAAM,MAAM,GAA8B,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAA;IACvE,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,KAAK,MAAM,KAAK,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAU,EAAE,CAAC;QACvD,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAA;QAC5B,IAAI,CAAC,IAAI;YAAE,SAAQ;QAEnB,IAAI,UAAU,GAAG,CAAC,CAAA;QAClB,IAAI,MAAM,GAAqC,IAAI,CAAA;QACnD,IAAI,IAAI,GAAG,EAAE,CAAA;QACb,IAAI,SAAS,GAAG,CAAC,CAAA;QACjB,IAAI,aAAa,GAAG,EAAE,CAAA;QACtB,IAAI,YAAY,GAAG,EAAE,CAAA;QAErB,MAAM,SAAS,GAAG,KAAK,IAAmB,EAAE;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;YACvC,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,MAAM,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,UAAU,CAAC,CAAA;YAC5E,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,CAAa,kBAAkB,EAAE,IAAI,EAAE;gBAC3E,YAAY,EAAE,cAAc;aAC5B,CAAC,CAAA;YACF,MAAM,CAAC,WAAW,CAAC,0BAA0B,EAAE,IAAI,CAAC,aAAa,CAAC,CAAA;YAClE,MAAM,CAAC,WAAW,CAAC,iBAAiB,EAAE,KAAK,CAAC,CAAA;YAC5C,MAAM,CAAC,WAAW,CAAC,uBAAuB,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAA;YAC/D,SAAS,GAAG,CAAC,CAAA;YACb,aAAa,GAAG,EAAE,CAAA;YAClB,YAAY,GAAG,EAAE,CAAA;QAClB,CAAC,CAAA;QAED,MAAM,UAAU,GAAG,KAAK,IAAmB,EAAE;YAC5C,IAAI,CAAC,MAAM;gBAAE,OAAM;YACnB,MAAM,MAAM,CAAC,KAAK,EAAE,CAAA;YACpB,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACnB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAA;gBACnC,MAAM,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,IAAI;oBACJ,MAAM,EAAE,SAAS;oBACjB,WAAW,EAAE,iBAAiB;oBAC9B,IAAI,EAAE,SAAS;oBACf,KAAK,EAAE,QAAQ,CAAC,IAAI;oBACpB,MAAM;oBACN,eAAe,EAAE,aAAa;oBAC9B,cAAc,EAAE,YAAY;iBAC5B,CAAC,CAAA;YACH,CAAC;YACD,MAAM,GAAG,IAAI,CAAA;QACd,CAAC,CAAA;QAED,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,MAAM;gBAAE,MAAM,SAAS,EAAE,CAAA;YAC9B,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;YAC5B,MAAM,MAAO,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,CAA0B,CAAC,CAAA;YACjE,IAAI,SAAS,KAAK,CAAC;gBAAE,aAAa,GAAG,GAAG,CAAC,SAAS,CAAA;YAClD,YAAY,GAAG,GAAG,CAAC,SAAS,CAAA;YAC5B,SAAS,EAAE,CAAA;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,CAAA;YACf,SAAS,EAAE,CAAA;YAEX,IAAI,SAAS,IAAI,YAAY,EAAE,CAAC;gBAC/B,MAAM,UAAU,EAAE,CAAA;gBAClB,UAAU,EAAE,CAAA;YACb,CAAC;QACF,CAAC;QAED,MAAM,UAAU,EAAE,CAAA;IACnB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;IAE5G,MAAM,QAAQ,GAAkB;QAC/B,cAAc,EAAE,IAAI,CAAC,aAAa;QAClC,MAAM,EAAE,eAAe;QACvB,cAAc,EAAE,YAAY;QAC5B,cAAc,EAAE,cAAc;QAC9B,MAAM;QACN,MAAM;QACN,UAAU,EAAE,SAAS;KACrB,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACnG,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED,+FAA+F;AAC/F,KAAK,UAAU,QAAQ,CAAC,IAAY;IACnC,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAA;IACjC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAA;IACrC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM;QAAE,IAAI,CAAC,MAAM,CAAC,KAAe,CAAC,CAAA;IAC9D,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;AAC1B,CAAC"}
|
|
@@ -44,10 +44,11 @@ export interface LocaleSynthesisOpts {
|
|
|
44
44
|
/**
|
|
45
45
|
* Rendering order for the SAME components. `"native"` (default) uses the country's own template
|
|
46
46
|
* (DE → house-AFTER-street, postcode-BEFORE-city). `"international"` renders house-FIRST,
|
|
47
|
-
* postcode-AFTER-city — the US/GB layout that international feeds, US-centric systems, and our
|
|
48
|
-
* OpenAddresses de-sample impose on non-US addresses. Training both teaches the model that a
|
|
47
|
+
* postcode-AFTER-city — the US/GB layout that international feeds, US-centric systems, and our
|
|
48
|
+
* own OpenAddresses de-sample impose on non-US addresses. Training both teaches the model that a
|
|
49
49
|
* German address can arrive either way, so the eval's US-order rendering stops reading as a
|
|
50
|
-
* collapse. See `docs/articles/evals/2026-06-06-anchor-pilot.md` (the order-artifact
|
|
50
|
+
* collapse. See `docs/articles/evals/2026-06-06-anchor-pilot.md` (the order-artifact
|
|
51
|
+
* correction).
|
|
51
52
|
*/
|
|
52
53
|
order?: "native" | "international";
|
|
53
54
|
}
|
|
@@ -62,12 +63,13 @@ export type GermanSynthesisOpts = LocaleSynthesisOpts;
|
|
|
62
63
|
*
|
|
63
64
|
* Region handling is order-dependent: NATIVE order omits it (the native template absorbs the admin
|
|
64
65
|
* region into the postcode/city line, so it rarely renders verbatim and would break BIO alignment),
|
|
65
|
-
* while INTERNATIONAL order includes it in the tail ("City, Region Postcode" — the US/feed layout
|
|
66
|
-
* eval uses; v0.9.3 / #327).
|
|
66
|
+
* while INTERNATIONAL order includes it in the tail ("City, Region Postcode" — the US/feed layout
|
|
67
|
+
* the eval uses; v0.9.3 / #327).
|
|
67
68
|
*
|
|
68
|
-
* Pass `opts.order: "international"` to render the same components house-first /
|
|
69
|
-
* instead (see {@link LocaleSynthesisOpts.order}) — the layout international
|
|
70
|
-
* addresses, and the one a native-order-trained model treats as a
|
|
69
|
+
* Pass `opts.order: "international"` to render the same components house-first /
|
|
70
|
+
* postcode-after-city instead (see {@link LocaleSynthesisOpts.order}) — the layout international
|
|
71
|
+
* feeds impose on foreign addresses, and the one a native-order-trained model treats as a
|
|
72
|
+
* "collapse."
|
|
71
73
|
*/
|
|
72
74
|
export declare function synthesizeLocaleRow(base: LocaleBaseTuple, country: string, opts?: LocaleSynthesisOpts): SynthesizedLocaleRow | null;
|
|
73
75
|
/** German wrapper over {@link synthesizeLocaleRow}. Kept for the build-german-shard caller + tests. */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize-german.d.ts","sourceRoot":"","sources":["../../src/synthesize-german.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,oGAAoG;AACpG,MAAM,WAAW,eAAe;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,MAAM,CAAA;CACjB;AACD,+CAA+C;AAC/C,MAAM,MAAM,eAAe,GAAG,eAAe,CAAA;AAE7C,MAAM,WAAW,oBAAoB;IACpC,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;IACtC,MAAM,EAAE,MAAM,CAAA;CACd;AACD,oDAAoD;AACpD,MAAM,MAAM,oBAAoB,GAAG,oBAAoB,CAAA;AAEvD,MAAM,WAAW,mBAAmB;IACnC,MAAM,CAAC,EAAE,MAAM,MAAM,CAAA;IACrB
|
|
1
|
+
{"version":3,"file":"synthesize-german.d.ts","sourceRoot":"","sources":["../../src/synthesize-german.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,oGAAoG;AACpG,MAAM,WAAW,eAAe;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,MAAM,CAAA;CACjB;AACD,+CAA+C;AAC/C,MAAM,MAAM,eAAe,GAAG,eAAe,CAAA;AAE7C,MAAM,WAAW,oBAAoB;IACpC,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;IACtC,MAAM,EAAE,MAAM,CAAA;CACd;AACD,oDAAoD;AACpD,MAAM,MAAM,oBAAoB,GAAG,oBAAoB,CAAA;AAEvD,MAAM,WAAW,mBAAmB;IACnC,MAAM,CAAC,EAAE,MAAM,MAAM,CAAA;IACrB;;;;;;;;OAQG;IACH,KAAK,CAAC,EAAE,QAAQ,GAAG,eAAe,CAAA;CAClC;AACD,mDAAmD;AACnD,MAAM,MAAM,mBAAmB,GAAG,mBAAmB,CAAA;AAuCrD;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,eAAe,EACrB,OAAO,EAAE,MAAM,EACf,IAAI,GAAE,mBAAwB,GAC5B,oBAAoB,GAAG,IAAI,CA+B7B;AAED,uGAAuG;AACvG,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,eAAe,EACrB,IAAI,GAAE,mBAAwB,GAC5B,oBAAoB,GAAG,IAAI,CAE7B"}
|
|
@@ -33,10 +33,10 @@ const LOCALE_TAG = {
|
|
|
33
33
|
US: "en-US",
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
|
-
* Canonicalize a postcode to the form the country's template renders, so the stored component
|
|
37
|
-
* verbatim against `raw`. NL is the case that needs it: OA stores `1011AB` but the OpenCage
|
|
38
|
-
* emits the conventional spaced `1011 AB` (4 digits + space + 2 letters), which
|
|
39
|
-
* alignment and drops the row. Other countries pass through unchanged.
|
|
36
|
+
* Canonicalize a postcode to the form the country's template renders, so the stored component
|
|
37
|
+
* aligns verbatim against `raw`. NL is the case that needs it: OA stores `1011AB` but the OpenCage
|
|
38
|
+
* NL template emits the conventional spaced `1011 AB` (4 digits + space + 2 letters), which
|
|
39
|
+
* otherwise fails verbatim alignment and drops the row. Other countries pass through unchanged.
|
|
40
40
|
*/
|
|
41
41
|
function normalizePostcode(postcode, country) {
|
|
42
42
|
if (country === "NL") {
|
|
@@ -68,12 +68,13 @@ function tokenPresent(raw, value) {
|
|
|
68
68
|
*
|
|
69
69
|
* Region handling is order-dependent: NATIVE order omits it (the native template absorbs the admin
|
|
70
70
|
* region into the postcode/city line, so it rarely renders verbatim and would break BIO alignment),
|
|
71
|
-
* while INTERNATIONAL order includes it in the tail ("City, Region Postcode" — the US/feed layout
|
|
72
|
-
* eval uses; v0.9.3 / #327).
|
|
71
|
+
* while INTERNATIONAL order includes it in the tail ("City, Region Postcode" — the US/feed layout
|
|
72
|
+
* the eval uses; v0.9.3 / #327).
|
|
73
73
|
*
|
|
74
|
-
* Pass `opts.order: "international"` to render the same components house-first /
|
|
75
|
-
* instead (see {@link LocaleSynthesisOpts.order}) — the layout international
|
|
76
|
-
* addresses, and the one a native-order-trained model treats as a
|
|
74
|
+
* Pass `opts.order: "international"` to render the same components house-first /
|
|
75
|
+
* postcode-after-city instead (see {@link LocaleSynthesisOpts.order}) — the layout international
|
|
76
|
+
* feeds impose on foreign addresses, and the one a native-order-trained model treats as a
|
|
77
|
+
* "collapse."
|
|
77
78
|
*/
|
|
78
79
|
export function synthesizeLocaleRow(base, country, opts = {}) {
|
|
79
80
|
const random = opts.random ?? Math.random;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize-german.js","sourceRoot":"","sources":["../../src/synthesize-german.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;
|
|
1
|
+
{"version":3,"file":"synthesize-german.js","sourceRoot":"","sources":["../../src/synthesize-german.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAsC3C,yFAAyF;AACzF,MAAM,UAAU,GAA2B;IAC1C,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;CACX,CAAA;AAED;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,QAAgB,EAAE,OAAe;IAC3D,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,2BAA2B,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACpD,IAAI,CAAC;YAAE,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,EAAE,CAAA;IAC/C,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED,qGAAqG;AACrG,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa;IAC/C,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACtC,2FAA2F;IAC3F,MAAM,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IAC5B,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IACzB,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAA;IACnC,MAAM,OAAO,GAAG,CAAC,CAAqB,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAA;IAClF,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC;QAAE,OAAO,KAAK,CAAA;IAC5E,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,mBAAmB,CAClC,IAAqB,EACrB,OAAe,EACf,OAA4B,EAAE;IAE9B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAA;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,QAAQ,CAAA;IACpC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAE/C,MAAM,UAAU,GAA+B,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC/F,+EAA+E;IAC/E,IAAI,IAAI,CAAC,YAAY,IAAI,MAAM,EAAE,GAAG,GAAG;QAAE,UAAU,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,CAAA;IACpF,wFAAwF;IACxF,IAAI,IAAI,CAAC,QAAQ,IAAI,MAAM,EAAE,GAAG,IAAI;QAAE,UAAU,CAAC,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;IACrG,iGAAiG;IACjG,oGAAoG;IACpG,kGAAkG;IAClG,mGAAmG;IACnG,6FAA6F;IAC7F,IAAI,KAAK,KAAK,eAAe,IAAI,IAAI,CAAC,MAAM;QAAE,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAE7E,mGAAmG;IACnG,+FAA+F;IAC/F,oGAAoG;IACpG,MAAM,aAAa,GAAG,KAAK,KAAK,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAA;IAChE,MAAM,GAAG,GAAG,aAAa,CAAC,UAAU,EAAE,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACzE,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IAErB,8FAA8F;IAC9F,4DAA4D;IAC5D,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/C,IAAI,CAAC,KAAK,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,KAAK,CAAC;YAAE,OAAO,IAAI,CAAA;IACrD,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,MAAM,EAAE,UAAU,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,EAAE,CAAA;AACjF,CAAC;AAED,uGAAuG;AACvG,MAAM,UAAU,mBAAmB,CAClC,IAAqB,EACrB,OAA4B,EAAE;IAE9B,OAAO,mBAAmB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;AAC7C,CAAC"}
|
|
@@ -5,18 +5,16 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Intersection synthesizer — v0.7 coverage fix (night-3, DeepSeek-decided).
|
|
7
7
|
*
|
|
8
|
-
* The 2026-05-29 harness diagnostic found the neural model emits
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* missing signal as a small targeted supplement shard (synthesis-as-supplement
|
|
15
|
-
* discipline: weight < 0.25, one-and-done).
|
|
8
|
+
* The 2026-05-29 harness diagnostic found the neural model emits `intersection_a`/`intersection_b`
|
|
9
|
+
* with ~0.0001 probability on canonical intersections ("Broadway & W 42nd St") — it never learned
|
|
10
|
+
* the tags, because the corpus has NO intersection training signal (no generator, and real-data
|
|
11
|
+
* adapters don't emit intersection-formatted rows). Intersections are 65 of the 376 harness
|
|
12
|
+
* assertions (17%), all 0% neural. This generator produces the missing signal as a small targeted
|
|
13
|
+
* supplement shard (synthesis-as-supplement discipline: weight < 0.25, one-and-done).
|
|
16
14
|
*
|
|
17
|
-
* Output is a `CanonicalRow` ({raw, components}); the corpus aligner turns it
|
|
18
|
-
*
|
|
19
|
-
*
|
|
15
|
+
* Output is a `CanonicalRow` ({raw, components}); the corpus aligner turns it into BIO labels
|
|
16
|
+
* (B-/I-intersection_a, O on the connector, B-/I-intersection_b). Surface forms of both streets
|
|
17
|
+
* MUST occur verbatim in `raw` so alignment lands.
|
|
20
18
|
*
|
|
21
19
|
* US-idiomatic only (the harness intersection cases are US: "X & Y, City, ST ZIP").
|
|
22
20
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize-intersection.d.ts","sourceRoot":"","sources":["../../src/synthesize-intersection.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"synthesize-intersection.d.ts","sourceRoot":"","sources":["../../src/synthesize-intersection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAyD9C,MAAM,WAAW,qBAAqB;IACrC,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,2EAA2E;IAC3E,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,0BAA0B;IAC1C,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;IACtC,MAAM,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,yBAAyB;IACzC,MAAM,CAAC,EAAE,MAAM,MAAM,CAAA;CACrB;AAkBD;;;GAGG;AACH,wBAAgB,yBAAyB,CACxC,IAAI,EAAE,qBAAqB,EAC3B,IAAI,GAAE,yBAA8B,GAClC,0BAA0B,GAAG,IAAI,CAoCnC;AAED,wFAAwF;AACxF,eAAO,MAAM,gBAAgB,EAAE,aAAa,CAAC,qBAAqB,CAWjE,CAAA;AAED,gFAAgF;AAChF,wBAAgB,wBAAwB,CACvC,KAAK,EAAE,MAAM,EACb,KAAK,GAAE,aAAa,CAAC,qBAAqB,CAAoB,EAC9D,IAAI,GAAE,yBAA8B,GAClC,0BAA0B,EAAE,CAU9B"}
|
|
@@ -5,18 +5,16 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Intersection synthesizer — v0.7 coverage fix (night-3, DeepSeek-decided).
|
|
7
7
|
*
|
|
8
|
-
* The 2026-05-29 harness diagnostic found the neural model emits
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* missing signal as a small targeted supplement shard (synthesis-as-supplement
|
|
15
|
-
* discipline: weight < 0.25, one-and-done).
|
|
8
|
+
* The 2026-05-29 harness diagnostic found the neural model emits `intersection_a`/`intersection_b`
|
|
9
|
+
* with ~0.0001 probability on canonical intersections ("Broadway & W 42nd St") — it never learned
|
|
10
|
+
* the tags, because the corpus has NO intersection training signal (no generator, and real-data
|
|
11
|
+
* adapters don't emit intersection-formatted rows). Intersections are 65 of the 376 harness
|
|
12
|
+
* assertions (17%), all 0% neural. This generator produces the missing signal as a small targeted
|
|
13
|
+
* supplement shard (synthesis-as-supplement discipline: weight < 0.25, one-and-done).
|
|
16
14
|
*
|
|
17
|
-
* Output is a `CanonicalRow` ({raw, components}); the corpus aligner turns it
|
|
18
|
-
*
|
|
19
|
-
*
|
|
15
|
+
* Output is a `CanonicalRow` ({raw, components}); the corpus aligner turns it into BIO labels
|
|
16
|
+
* (B-/I-intersection_a, O on the connector, B-/I-intersection_b). Surface forms of both streets
|
|
17
|
+
* MUST occur verbatim in `raw` so alignment lands.
|
|
20
18
|
*
|
|
21
19
|
* US-idiomatic only (the harness intersection cases are US: "X & Y, City, ST ZIP").
|
|
22
20
|
*/
|
|
@@ -47,11 +45,27 @@ const STREET_CORES = [
|
|
|
47
45
|
];
|
|
48
46
|
/** Bare proper-noun streets that idiomatically take NO suffix. */
|
|
49
47
|
const BARE_NAMES = ["Broadway", "Wall", "Bourbon", "Esplanade", "Riverside", "Lakeshore"];
|
|
50
|
-
const ORDINALS = [
|
|
48
|
+
const ORDINALS = [
|
|
49
|
+
"1st",
|
|
50
|
+
"2nd",
|
|
51
|
+
"3rd",
|
|
52
|
+
"4th",
|
|
53
|
+
"5th",
|
|
54
|
+
"6th",
|
|
55
|
+
"7th",
|
|
56
|
+
"8th",
|
|
57
|
+
"9th",
|
|
58
|
+
"10th",
|
|
59
|
+
"42nd",
|
|
60
|
+
"23rd",
|
|
61
|
+
"34th",
|
|
62
|
+
];
|
|
51
63
|
const SUFFIXES = ["St", "Ave", "Blvd", "Rd", "Dr", "Ln", "Way", "Pl", "Ct", "Pkwy", "Ter", "Cir"];
|
|
52
64
|
const DIRECTIONALS = ["N", "S", "E", "W", "NE", "NW", "SE", "SW"];
|
|
53
|
-
/**
|
|
54
|
-
*
|
|
65
|
+
/**
|
|
66
|
+
* Connectors between the two streets. Whitespace-padded forms keep tokens clean for alignment. `@`
|
|
67
|
+
* added in v0.7.2 — the harness uses it ("Main St @ Second Ave") and v0.7.1 had never seen it.
|
|
68
|
+
*/
|
|
55
69
|
const CONNECTORS = [" & ", " and ", " at ", " / ", " @ "];
|
|
56
70
|
function pick(arr, random) {
|
|
57
71
|
return arr[Math.floor(random() * arr.length)];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize-intersection.js","sourceRoot":"","sources":["../../src/synthesize-intersection.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"synthesize-intersection.js","sourceRoot":"","sources":["../../src/synthesize-intersection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAIH,kFAAkF;AAClF,MAAM,YAAY,GAAG;IACpB,MAAM;IACN,KAAK;IACL,KAAK;IACL,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,YAAY;IACZ,SAAS;IACT,WAAW;IACX,SAAS;IACT,UAAU;IACV,QAAQ;IACR,OAAO;IACP,QAAQ;IACR,MAAM;IACN,UAAU;IACV,QAAQ;IACR,OAAO;IACP,QAAQ;CACC,CAAA;AAEV,kEAAkE;AAClE,MAAM,UAAU,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,WAAW,EAAE,WAAW,CAAU,CAAA;AAElG,MAAM,QAAQ,GAAG;IAChB,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;CACG,CAAA;AAEV,MAAM,QAAQ,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,CAAU,CAAA;AAE1G,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAU,CAAA;AAE1E;;;GAGG;AACH,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,CAAU,CAAA;AAoBlE,SAAS,IAAI,CAAI,GAAqB,EAAE,MAAoB;IAC3D,OAAO,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC,CAAE,CAAA;AAC/C,CAAC;AAED,gGAAgG;AAChG,SAAS,eAAe,CAAC,MAAoB;IAC5C,uFAAuF;IACvF,IAAI,MAAM,EAAE,GAAG,GAAG;QAAE,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAA;IAEnD,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,MAAM,EAAE,GAAG,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;IAC3D,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;IACjF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAA;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,yBAAyB,CACxC,IAA2B,EAC3B,OAAkC,EAAE;IAEpC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAA;IACzC,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAEtC,MAAM,CAAC,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;IACjC,IAAI,CAAC,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;IAC/B,yGAAyG;IACzG,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,GAAG,CAAC;QAAE,CAAC,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;IAC9F,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAA;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAA;IAC1C,gFAAgF;IAChF,MAAM,YAAY,GAAG,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;IAEvD,MAAM,UAAU,GAA+B,EAAE,cAAc,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,CAAA;IAEvF,kGAAkG;IAClG,4FAA4F;IAC5F,8EAA8E;IAC9E,MAAM,IAAI,GAAG,MAAM,EAAE,GAAG,GAAG,CAAA;IAC3B,IAAI,GAAW,CAAA;IACf,IAAI,IAAI,EAAE,CAAC;QACV,GAAG,GAAG,GAAG,YAAY,GAAG,CAAC,GAAG,SAAS,GAAG,CAAC,EAAE,CAAA;IAC5C,CAAC;SAAM,CAAC;QACP,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,MAAM,EAAE,GAAG,GAAG,CAAA;QAC/D,MAAM,IAAI,GAAG,eAAe;YAC3B,CAAC,CAAC,KAAK,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE;YACvD,CAAC,CAAC,KAAK,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,MAAM,EAAE,CAAA;QACvC,GAAG,GAAG,GAAG,YAAY,GAAG,CAAC,GAAG,SAAS,GAAG,CAAC,GAAG,IAAI,EAAE,CAAA;QAClD,UAAU,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAA;QACnC,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;QAC/B,IAAI,eAAe;YAAE,UAAU,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAA;IACzD,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC5C,CAAC;AAED,wFAAwF;AACxF,MAAM,CAAC,MAAM,gBAAgB,GAAyC;IACrE,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACxE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACvE,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IAC3E,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACvE,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACtE,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACxE,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACtE,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACtE,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;IACrE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE;CACvE,CAAA;AAED,gFAAgF;AAChF,MAAM,UAAU,wBAAwB,CACvC,KAAa,EACb,QAA8C,gBAAgB,EAC9D,OAAkC,EAAE;IAEpC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAA;IACzC,MAAM,GAAG,GAAiC,EAAE,CAAA;IAC5C,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,OAAO,GAAG,CAAC,MAAM,GAAG,KAAK,IAAI,KAAK,EAAE,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;QAClD,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAE,CAAA;QAC9C,MAAM,GAAG,GAAG,yBAAyB,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,CAAA;QACvD,IAAI,GAAG;YAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACvB,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|