@mailwoman/corpus 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapter.d.ts +96 -0
- package/out/src/adapter.d.ts.map +1 -0
- package/out/src/adapter.js +107 -0
- package/out/src/adapter.js.map +1 -0
- package/out/src/adapters/ban/adapter.d.ts +32 -0
- package/out/src/adapters/ban/adapter.d.ts.map +1 -0
- package/out/src/adapters/ban/adapter.js +133 -0
- package/out/src/adapters/ban/adapter.js.map +1 -0
- package/out/src/adapters/fcc-bdc/adapter.d.ts +61 -0
- package/out/src/adapters/fcc-bdc/adapter.d.ts.map +1 -0
- package/out/src/adapters/fcc-bdc/adapter.js +153 -0
- package/out/src/adapters/fcc-bdc/adapter.js.map +1 -0
- package/out/src/adapters/index.d.ts +42 -0
- package/out/src/adapters/index.d.ts.map +1 -0
- package/out/src/adapters/index.js +76 -0
- package/out/src/adapters/index.js.map +1 -0
- package/out/src/adapters/openaddresses/adapter.d.ts +60 -0
- package/out/src/adapters/openaddresses/adapter.d.ts.map +1 -0
- package/out/src/adapters/openaddresses/adapter.js +174 -0
- package/out/src/adapters/openaddresses/adapter.js.map +1 -0
- package/out/src/adapters/state-ia-contractors/adapter.d.ts +23 -0
- package/out/src/adapters/state-ia-contractors/adapter.d.ts.map +1 -0
- package/out/src/adapters/state-ia-contractors/adapter.js +113 -0
- package/out/src/adapters/state-ia-contractors/adapter.js.map +1 -0
- package/out/src/adapters/state-ny-notaries/adapter.d.ts +21 -0
- package/out/src/adapters/state-ny-notaries/adapter.d.ts.map +1 -0
- package/out/src/adapters/state-ny-notaries/adapter.js +132 -0
- package/out/src/adapters/state-ny-notaries/adapter.js.map +1 -0
- package/out/src/adapters/state-tx-notaries/adapter.d.ts +22 -0
- package/out/src/adapters/state-tx-notaries/adapter.d.ts.map +1 -0
- package/out/src/adapters/state-tx-notaries/adapter.js +125 -0
- package/out/src/adapters/state-tx-notaries/adapter.js.map +1 -0
- package/out/src/adapters/tiger/adapter.d.ts +45 -0
- package/out/src/adapters/tiger/adapter.d.ts.map +1 -0
- package/out/src/adapters/tiger/adapter.js +179 -0
- package/out/src/adapters/tiger/adapter.js.map +1 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts +36 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.js +147 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.js.map +1 -0
- package/out/src/adapters/usgov-imls-pls/adapter.d.ts +25 -0
- package/out/src/adapters/usgov-imls-pls/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-imls-pls/adapter.js +118 -0
- package/out/src/adapters/usgov-imls-pls/adapter.js.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts +37 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.js +227 -0
- package/out/src/adapters/usgov-nad/adapter.js.map +1 -0
- package/out/src/adapters/usgov-nppes/adapter.d.ts +28 -0
- package/out/src/adapters/usgov-nppes/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-nppes/adapter.js +123 -0
- package/out/src/adapters/usgov-nppes/adapter.js.map +1 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts +35 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js +162 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js.map +1 -0
- package/out/src/adapters/wof-admin-json/adapter.d.ts +85 -0
- package/out/src/adapters/wof-admin-json/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-admin-json/adapter.js +241 -0
- package/out/src/adapters/wof-admin-json/adapter.js.map +1 -0
- package/out/src/adapters/wof-postalcode-json/adapter.d.ts +63 -0
- package/out/src/adapters/wof-postalcode-json/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-postalcode-json/adapter.js +178 -0
- package/out/src/adapters/wof-postalcode-json/adapter.js.map +1 -0
- package/out/src/align.d.ts +58 -0
- package/out/src/align.d.ts.map +1 -0
- package/out/src/align.js +139 -0
- package/out/src/align.js.map +1 -0
- package/out/src/build.d.ts +104 -0
- package/out/src/build.d.ts.map +1 -0
- package/out/src/build.js +201 -0
- package/out/src/build.js.map +1 -0
- package/out/src/codex/us-fips-state.d.ts +44 -0
- package/out/src/codex/us-fips-state.d.ts.map +1 -0
- package/out/src/codex/us-fips-state.js +105 -0
- package/out/src/codex/us-fips-state.js.map +1 -0
- package/out/src/codex/us-street-suffix.d.ts +259 -0
- package/out/src/codex/us-street-suffix.d.ts.map +1 -0
- package/out/src/codex/us-street-suffix.js +285 -0
- package/out/src/codex/us-street-suffix.js.map +1 -0
- package/out/src/format.d.ts +79 -0
- package/out/src/format.d.ts.map +1 -0
- package/out/src/format.js +151 -0
- package/out/src/format.js.map +1 -0
- package/out/src/golden.d.ts +50 -0
- package/out/src/golden.d.ts.map +1 -0
- package/out/src/golden.js +104 -0
- package/out/src/golden.js.map +1 -0
- package/out/src/index.d.ts +18 -0
- package/out/src/index.d.ts.map +1 -0
- package/out/src/index.js +18 -0
- package/out/src/index.js.map +1 -0
- package/out/src/parquet-wrapper/index.d.ts +12 -0
- package/out/src/parquet-wrapper/index.d.ts.map +1 -0
- package/out/src/parquet-wrapper/index.js +12 -0
- package/out/src/parquet-wrapper/index.js.map +1 -0
- package/out/src/parquet-wrapper/reader.d.ts +31 -0
- package/out/src/parquet-wrapper/reader.d.ts.map +1 -0
- package/out/src/parquet-wrapper/reader.js +54 -0
- package/out/src/parquet-wrapper/reader.js.map +1 -0
- package/out/src/parquet-wrapper/schema.d.ts +45 -0
- package/out/src/parquet-wrapper/schema.d.ts.map +1 -0
- package/out/src/parquet-wrapper/schema.js +55 -0
- package/out/src/parquet-wrapper/schema.js.map +1 -0
- package/out/src/parquet-wrapper/writer.d.ts +41 -0
- package/out/src/parquet-wrapper/writer.d.ts.map +1 -0
- package/out/src/parquet-wrapper/writer.js +71 -0
- package/out/src/parquet-wrapper/writer.js.map +1 -0
- package/out/src/parquet.d.ts +122 -0
- package/out/src/parquet.d.ts.map +1 -0
- package/out/src/parquet.js +220 -0
- package/out/src/parquet.js.map +1 -0
- package/out/src/runner.d.ts +100 -0
- package/out/src/runner.d.ts.map +1 -0
- package/out/src/runner.js +183 -0
- package/out/src/runner.js.map +1 -0
- package/out/src/split.d.ts +108 -0
- package/out/src/split.d.ts.map +1 -0
- package/out/src/split.js +191 -0
- package/out/src/split.js.map +1 -0
- package/out/src/synthesize.d.ts +146 -0
- package/out/src/synthesize.d.ts.map +1 -0
- package/out/src/synthesize.js +472 -0
- package/out/src/synthesize.js.map +1 -0
- package/out/src/tokenize.d.ts +47 -0
- package/out/src/tokenize.d.ts.map +1 -0
- package/out/src/tokenize.js +49 -0
- package/out/src/tokenize.js.map +1 -0
- package/out/src/types.d.ts +168 -0
- package/out/src/types.d.ts.map +1 -0
- package/out/src/types.js +19 -0
- package/out/src/types.js.map +1 -0
- package/out/src/wof-json.d.ts +105 -0
- package/out/src/wof-json.d.ts.map +1 -0
- package/out/src/wof-json.js +174 -0
- package/out/src/wof-json.js.map +1 -0
- package/package.json +36 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Render a `ComponentTag`-keyed dict into a country-localized string.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1's adapters carry ground-truth components but need a plausible `raw` string for the model
|
|
9
|
+
* to learn from. This module bridges Mailwoman's `ComponentTag` schema to OpenCage's
|
|
10
|
+
* `address-formatting` templates (vendored via `@fragaria/address-formatter`, MIT) so adapters
|
|
11
|
+
* can synthesize idiomatic per-country output without each one reinventing the template logic.
|
|
12
|
+
*
|
|
13
|
+
* `@fragaria/address-formatter` returns multi-line strings (newline-separated). This module honors
|
|
14
|
+
* that by default and exposes a `separator` option to coerce to single-line when an adapter wants
|
|
15
|
+
* `", "` joined output for the corpus.
|
|
16
|
+
*
|
|
17
|
+
* Limitations (documented, not blockers for Phase 1):
|
|
18
|
+
*
|
|
19
|
+
* - `unit`: OpenCage's vocabulary doesn't have a slot, so units are appended to the road line
|
|
20
|
+
* (`"Pennsylvania Ave NW Apt 4B"`). Phase 2 or beyond can change this if needed.
|
|
21
|
+
* - `intersection_a` / `intersection_b`: joined as `"<a> & <b>"` into the road field.
|
|
22
|
+
* - `cedex` (FR): folded into `postcode` (e.g. `"75008 CEDEX 08"`) so the FR template renders it in
|
|
23
|
+
* the right slot.
|
|
24
|
+
* - JP-specific tags (`prefecture`, `municipality`, ...): no mapping yet — left for Phase 6 when JP
|
|
25
|
+
* becomes a live locale.
|
|
26
|
+
*/
|
|
27
|
+
import addressFormatter from "@fragaria/address-formatter";
|
|
28
|
+
/**
|
|
29
|
+
* After rendering, drop any component whose value isn't actually present in the formatted `raw`.
|
|
30
|
+
* OpenCage's per-country templates legitimately omit some inputs:
|
|
31
|
+
*
|
|
32
|
+
* - **FR**: regions are absorbed by the postcode and rarely render verbatim (`"75008 Paris, France"`
|
|
33
|
+
* — `"Île-de-France"` is dropped).
|
|
34
|
+
* - **US**: state names are abbreviated (`"Oregon"` → `"OR"`); the verbatim form may not survive.
|
|
35
|
+
*
|
|
36
|
+
* Alignment downstream requires `components[tag]` to occur in `raw` (within fuzzy-match tolerance).
|
|
37
|
+
* If a component was dropped or transformed beyond Levenshtein reach, keeping it in the dict
|
|
38
|
+
* guarantees a quarantine reject. Reconciliation prunes the dropouts up front so the row arrives at
|
|
39
|
+
* alignment with a self-consistent (`raw`, `components`) pair.
|
|
40
|
+
*
|
|
41
|
+
* Comparison is case- and whitespace-insensitive. The retained value in `components` is the
|
|
42
|
+
* **original** input — alignment will fuzzy-match it against the raw character span.
|
|
43
|
+
*/
|
|
44
|
+
export function reconcileComponents(components, raw) {
|
|
45
|
+
const haystack = raw.toLowerCase().replace(/\s+/g, " ");
|
|
46
|
+
const out = {};
|
|
47
|
+
for (const [k, v] of Object.entries(components)) {
|
|
48
|
+
if (!v)
|
|
49
|
+
continue;
|
|
50
|
+
const needle = v.toLowerCase().replace(/\s+/g, " ");
|
|
51
|
+
if (haystack.includes(needle)) {
|
|
52
|
+
out[k] = v;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Render a component dict into an idiomatic per-country address string.
|
|
59
|
+
*
|
|
60
|
+
* Returns an empty string if `components` is empty after translation. Throws nothing — bad inputs
|
|
61
|
+
* (empty dict, unsupported tag) silently degrade to the longest meaningful prefix.
|
|
62
|
+
*/
|
|
63
|
+
export function formatAddress(components, country, opts = {}) {
|
|
64
|
+
const ocComponents = toOpenCageComponents(components, country);
|
|
65
|
+
if (Object.keys(ocComponents).length === 0)
|
|
66
|
+
return "";
|
|
67
|
+
const raw = addressFormatter.format(ocComponents, {
|
|
68
|
+
abbreviate: opts.abbreviate ?? false,
|
|
69
|
+
appendCountry: opts.appendCountry ?? false,
|
|
70
|
+
});
|
|
71
|
+
const trimmed = raw.replace(/\s+$/g, "");
|
|
72
|
+
return opts.separator !== undefined ? trimmed.replace(/\n+/g, opts.separator) : trimmed;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Translate a `ComponentTag` dict to the OpenCage vocabulary that `@fragaria/address-formatter`
|
|
76
|
+
* expects. Exported for testing and for adapters that want to pre-build the dict for batch
|
|
77
|
+
* formatting.
|
|
78
|
+
*/
|
|
79
|
+
export function toOpenCageComponents(components, country) {
|
|
80
|
+
const out = {};
|
|
81
|
+
const road = composeRoad(components);
|
|
82
|
+
if (road)
|
|
83
|
+
out.road = road;
|
|
84
|
+
if (components.house_number)
|
|
85
|
+
out.house_number = components.house_number;
|
|
86
|
+
if (components.venue)
|
|
87
|
+
out.house = components.venue;
|
|
88
|
+
if (components.locality)
|
|
89
|
+
out.city = components.locality;
|
|
90
|
+
if (components.dependent_locality)
|
|
91
|
+
out.suburb = components.dependent_locality;
|
|
92
|
+
if (components.subregion)
|
|
93
|
+
out.county = components.subregion;
|
|
94
|
+
if (components.region)
|
|
95
|
+
out.state = components.region;
|
|
96
|
+
const postcode = composePostcode(components);
|
|
97
|
+
if (postcode)
|
|
98
|
+
out.postcode = postcode;
|
|
99
|
+
if (components.po_box)
|
|
100
|
+
out.po_box = components.po_box;
|
|
101
|
+
if (components.attention)
|
|
102
|
+
out.attention = components.attention;
|
|
103
|
+
if (components.country)
|
|
104
|
+
out.country = components.country;
|
|
105
|
+
// country_code drives template selection, not output. Only emit it if at least one other
|
|
106
|
+
// component is present — otherwise the FR/US templates render the bare code as a fallback
|
|
107
|
+
// line ("US"), which is never what a corpus consumer wants.
|
|
108
|
+
const cc = country.trim().toLowerCase();
|
|
109
|
+
if (cc && Object.keys(out).length > 0)
|
|
110
|
+
out.country_code = cc;
|
|
111
|
+
return out;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Build the `road` line from prefix / particle / street / suffix / unit / intersection components.
|
|
115
|
+
* Order:
|
|
116
|
+
*
|
|
117
|
+
* ```
|
|
118
|
+
* [intersection_a & intersection_b]
|
|
119
|
+
* OR
|
|
120
|
+
* [street_prefix] [street_prefix_particle] [street] [street_suffix] [unit]
|
|
121
|
+
* ```
|
|
122
|
+
*/
|
|
123
|
+
function composeRoad(components) {
|
|
124
|
+
if (components.intersection_a && components.intersection_b) {
|
|
125
|
+
return `${components.intersection_a} & ${components.intersection_b}`;
|
|
126
|
+
}
|
|
127
|
+
const parts = [];
|
|
128
|
+
if (components.street_prefix)
|
|
129
|
+
parts.push(components.street_prefix);
|
|
130
|
+
if (components.street_prefix_particle)
|
|
131
|
+
parts.push(components.street_prefix_particle);
|
|
132
|
+
if (components.street)
|
|
133
|
+
parts.push(components.street);
|
|
134
|
+
if (components.street_suffix)
|
|
135
|
+
parts.push(components.street_suffix);
|
|
136
|
+
if (components.unit)
|
|
137
|
+
parts.push(components.unit);
|
|
138
|
+
return parts.join(" ").replace(/\s+/g, " ").trim();
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Fold CEDEX into postcode for FR-style output: `"75008"` + cedex `"CEDEX 08"` → `"75008 CEDEX
|
|
142
|
+
* 08"`. If only one is present, return it. If neither, return empty.
|
|
143
|
+
*/
|
|
144
|
+
function composePostcode(components) {
|
|
145
|
+
const base = components.postcode?.trim() ?? "";
|
|
146
|
+
const cedex = components.cedex?.trim() ?? "";
|
|
147
|
+
if (base && cedex)
|
|
148
|
+
return `${base} ${cedex}`.replace(/\s+/g, " ");
|
|
149
|
+
return base || cedex;
|
|
150
|
+
}
|
|
151
|
+
//# sourceMappingURL=format.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"format.js","sourceRoot":"","sources":["../../src/format.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,gBAAgB,MAAM,6BAA6B,CAAA;AA2B1D;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,mBAAmB,CAAC,UAAyB,EAAE,GAAW;IACzE,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IACvD,MAAM,GAAG,GAAkB,EAAE,CAAA;IAC7B,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACjD,IAAI,CAAC,CAAC;YAAE,SAAQ;QAChB,MAAM,MAAM,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;QACnD,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/B,GAAG,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAA;QAC3B,CAAC;IACF,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,UAAyB,EAAE,OAAe,EAAE,OAA6B,EAAE;IACxG,MAAM,YAAY,GAAG,oBAAoB,CAAC,UAAU,EAAE,OAAO,CAAC,CAAA;IAC9D,IAAI,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAErD,MAAM,GAAG,GAAG,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE;QACjD,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,KAAK;QACpC,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,KAAK;KAC1C,CAAC,CAAA;IAEF,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAA;IACxC,OAAO,IAAI,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAA;AACxF,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,oBAAoB,CAAC,UAAyB,EAAE,OAAe;IAC9E,MAAM,GAAG,GAA2B,EAAE,CAAA;IAEtC,MAAM,IAAI,GAAG,WAAW,CAAC,UAAU,CAAC,CAAA;IACpC,IAAI,IAAI;QAAE,GAAG,CAAC,IAAI,GAAG,IAAI,CAAA;IAEzB,IAAI,UAAU,CAAC,YAAY;QAAE,GAAG,CAAC,YAAY,GAAG,UAAU,CAAC,YAAY,CAAA;IAEvE,IAAI,UAAU,CAAC,KAAK;QAAE,GAAG,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,CAAA;IAElD,IAAI,UAAU,CAAC,QAAQ;QAAE,GAAG,CAAC,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAA;IACvD,IAAI,UAAU,CAAC,kBAAkB;QAAE,GAAG,CAAC,MAAM,GAAG,UAAU,CAAC,kBAAkB,CAAA;IAC7E,IAAI,UAAU,CAAC,SAAS;QAAE,GAAG,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,CAAA;IAC3D,IAAI,UAAU,CAAC,MAAM;QAAE,GAAG,CAAC,KAAK,GAAG,UAAU,CAAC,MAAM,CAAA;IAEpD,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;IAC5C,IAAI,QAAQ;QAAE,GAAG,CAAC,QAAQ,GAAG,QAAQ,CAAA;IAErC,IAAI,UAAU,CAAC,MAAM;QAAE,GAAG,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAA;IACrD,IAAI,UAAU,CAAC,SAAS;QAAE,GAAG,CAAC,SAAS,GAAG,UAAU,CAAC,SAAS,CAAA;IAE9D,IAAI,UAAU,CAAC,OAAO;QAAE,GAAG,CAAC,OAAO,GAAG,UAAU,CAAC,OAAO,CAAA;IAExD,yFAAyF;IACzF,0FAA0F;IAC1F,4DAA4D;IAC5D,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IACvC,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,GAAG,CAAC,YAAY,GAAG,EAAE,CAAA;IAE5D,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,WAAW,CAAC,UAAyB;IAC7C,IAAI,UAAU,CAAC,cAAc,IAAI,UAAU,CAAC,cAAc,EAAE,CAAC;QAC5D,OAAO,GAAG,UAAU,CAAC,cAAc,MAAM,UAAU,CAAC,cAAc,EAAE,CAAA;IACrE,CAAC;IAED,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,UAAU,CAAC,aAAa;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;IAClE,IAAI,UAAU,CAAC,sBAAsB;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,sBAAsB,CAAC,CAAA;IACpF,IAAI,UAAU,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAA;IACpD,IAAI,UAAU,CAAC,aAAa;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;IAClE,IAAI,UAAU,CAAC,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAA;IAEhD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;AACnD,CAAC;AAED;;;GAGG;AACH,SAAS,eAAe,CAAC,UAAyB;IACjD,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAC9C,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAC5C,IAAI,IAAI,IAAI,KAAK;QAAE,OAAO,GAAG,IAAI,IAAI,KAAK,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IACjE,OAAO,IAAI,IAAI,KAAK,CAAA;AACrB,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Golden eval-set validator (Phase 1 task #9 in the plan).
|
|
7
|
+
*
|
|
8
|
+
* The golden set is hand-labeled ground truth for the neural classifier. Each entry must carry
|
|
9
|
+
* components whose surface forms actually occur in `raw` — otherwise the entry will silently rot
|
|
10
|
+
* the eval signal. This module:
|
|
11
|
+
*
|
|
12
|
+
* - Defines `GoldenEntry` (schema check).
|
|
13
|
+
* - Loads `.jsonl` files (one entry per line).
|
|
14
|
+
* - Validates every entry: schema shape, ComponentTag membership, reachability of each component in
|
|
15
|
+
* `raw` via the same `reconcileComponents` helper alignment uses.
|
|
16
|
+
* - Returns a structured report of per-entry errors so the CLI / CI surface can act on it.
|
|
17
|
+
*
|
|
18
|
+
* The 1000-entry target (500 US + 500 FR) is a human task. This module catches the regressions that
|
|
19
|
+
* creep in over time as new entries land.
|
|
20
|
+
*/
|
|
21
|
+
import { type ComponentTag } from "@mailwoman/core/types";
|
|
22
|
+
/** One entry in a golden `.jsonl` file. */
|
|
23
|
+
export interface GoldenEntry {
|
|
24
|
+
raw: string;
|
|
25
|
+
components: Partial<Record<ComponentTag, string>>;
|
|
26
|
+
country: string;
|
|
27
|
+
source: "golden";
|
|
28
|
+
notes?: string;
|
|
29
|
+
}
|
|
30
|
+
/** Per-entry validation failure. */
|
|
31
|
+
export interface GoldenIssue {
|
|
32
|
+
file: string;
|
|
33
|
+
line: number;
|
|
34
|
+
reason: string;
|
|
35
|
+
}
|
|
36
|
+
/** Aggregate report from `validateGoldenDir`. */
|
|
37
|
+
export interface GoldenReport {
|
|
38
|
+
entries: number;
|
|
39
|
+
files: number;
|
|
40
|
+
issues: GoldenIssue[];
|
|
41
|
+
}
|
|
42
|
+
/** Parse a single JSONL line into a `GoldenEntry`. Throws on schema violations. */
|
|
43
|
+
export declare function parseGoldenLine(line: string): GoldenEntry;
|
|
44
|
+
/** Check that every component in `entry` appears in `entry.raw` (reconciliation-equivalent). */
|
|
45
|
+
export declare function unreachableComponents(entry: GoldenEntry): ComponentTag[];
|
|
46
|
+
/** Validate one `.jsonl` file end-to-end, returning a list of issues. */
|
|
47
|
+
export declare function validateGoldenFile(path: string): Promise<GoldenIssue[]>;
|
|
48
|
+
/** Validate every `.jsonl` in a golden directory. */
|
|
49
|
+
export declare function validateGoldenDir(dir: string): Promise<GoldenReport>;
|
|
50
|
+
//# sourceMappingURL=golden.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"golden.d.ts","sourceRoot":"","sources":["../../src/golden.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAkB,KAAK,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAOzE,2CAA2C;AAC3C,MAAM,WAAW,WAAW;IAC3B,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;IACjD,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,QAAQ,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;CACd;AAED,oCAAoC;AACpC,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,MAAM,CAAA;CACd;AAED,iDAAiD;AACjD,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,WAAW,EAAE,CAAA;CACrB;AAED,mFAAmF;AACnF,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,CAyBzD;AAED,gGAAgG;AAChG,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,WAAW,GAAG,YAAY,EAAE,CAOxE;AAED,yEAAyE;AACzE,wBAAsB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAsB7E;AAED,qDAAqD;AACrD,wBAAsB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAY1E"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Golden eval-set validator (Phase 1 task #9 in the plan).
|
|
7
|
+
*
|
|
8
|
+
* The golden set is hand-labeled ground truth for the neural classifier. Each entry must carry
|
|
9
|
+
* components whose surface forms actually occur in `raw` — otherwise the entry will silently rot
|
|
10
|
+
* the eval signal. This module:
|
|
11
|
+
*
|
|
12
|
+
* - Defines `GoldenEntry` (schema check).
|
|
13
|
+
* - Loads `.jsonl` files (one entry per line).
|
|
14
|
+
* - Validates every entry: schema shape, ComponentTag membership, reachability of each component in
|
|
15
|
+
* `raw` via the same `reconcileComponents` helper alignment uses.
|
|
16
|
+
* - Returns a structured report of per-entry errors so the CLI / CI surface can act on it.
|
|
17
|
+
*
|
|
18
|
+
* The 1000-entry target (500 US + 500 FR) is a human task. This module catches the regressions that
|
|
19
|
+
* creep in over time as new entries land.
|
|
20
|
+
*/
|
|
21
|
+
import { COMPONENT_TAGS } from "@mailwoman/core/types";
|
|
22
|
+
import { readdir, readFile } from "node:fs/promises";
|
|
23
|
+
import { extname, join } from "node:path";
|
|
24
|
+
import { reconcileComponents } from "./format.js";
|
|
25
|
+
const TAG_SET = new Set(COMPONENT_TAGS);
|
|
26
|
+
/** Parse a single JSONL line into a `GoldenEntry`. Throws on schema violations. */
|
|
27
|
+
export function parseGoldenLine(line) {
|
|
28
|
+
const obj = JSON.parse(line);
|
|
29
|
+
if (typeof obj.raw !== "string" || obj.raw.length === 0) {
|
|
30
|
+
throw new Error("missing/empty raw");
|
|
31
|
+
}
|
|
32
|
+
if (typeof obj.country !== "string" || !/^[A-Z]{2}$/u.test(obj.country)) {
|
|
33
|
+
throw new Error(`country must be ISO 3166-1 alpha-2 (got ${JSON.stringify(obj.country)})`);
|
|
34
|
+
}
|
|
35
|
+
if (obj.source !== "golden") {
|
|
36
|
+
throw new Error(`source must be "golden" (got ${JSON.stringify(obj.source)})`);
|
|
37
|
+
}
|
|
38
|
+
const components = (obj.components ?? {});
|
|
39
|
+
for (const [k, v] of Object.entries(components)) {
|
|
40
|
+
if (!TAG_SET.has(k))
|
|
41
|
+
throw new Error(`unknown ComponentTag: ${k}`);
|
|
42
|
+
if (typeof v !== "string" || v.length === 0) {
|
|
43
|
+
throw new Error(`components.${k} must be a non-empty string`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
raw: obj.raw,
|
|
48
|
+
components: components,
|
|
49
|
+
country: obj.country,
|
|
50
|
+
source: "golden",
|
|
51
|
+
notes: typeof obj.notes === "string" ? obj.notes : undefined,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
/** Check that every component in `entry` appears in `entry.raw` (reconciliation-equivalent). */
|
|
55
|
+
export function unreachableComponents(entry) {
|
|
56
|
+
const reconciled = reconcileComponents(entry.components, entry.raw);
|
|
57
|
+
const missing = [];
|
|
58
|
+
for (const tag of Object.keys(entry.components)) {
|
|
59
|
+
if (!(tag in reconciled))
|
|
60
|
+
missing.push(tag);
|
|
61
|
+
}
|
|
62
|
+
return missing;
|
|
63
|
+
}
|
|
64
|
+
/** Validate one `.jsonl` file end-to-end, returning a list of issues. */
|
|
65
|
+
export async function validateGoldenFile(path) {
|
|
66
|
+
const text = await readFile(path, "utf8");
|
|
67
|
+
const lines = text.split("\n");
|
|
68
|
+
const issues = [];
|
|
69
|
+
for (let i = 0; i < lines.length; i++) {
|
|
70
|
+
const line = lines[i].trim();
|
|
71
|
+
if (!line)
|
|
72
|
+
continue;
|
|
73
|
+
try {
|
|
74
|
+
const entry = parseGoldenLine(line);
|
|
75
|
+
const unreachable = unreachableComponents(entry);
|
|
76
|
+
if (unreachable.length > 0) {
|
|
77
|
+
issues.push({
|
|
78
|
+
file: path,
|
|
79
|
+
line: i + 1,
|
|
80
|
+
reason: `components not reachable in raw: ${unreachable.join(", ")}`,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
issues.push({ file: path, line: i + 1, reason: err.message });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return issues;
|
|
89
|
+
}
|
|
90
|
+
/** Validate every `.jsonl` in a golden directory. */
|
|
91
|
+
export async function validateGoldenDir(dir) {
|
|
92
|
+
const files = (await readdir(dir)).filter((n) => extname(n) === ".jsonl").sort();
|
|
93
|
+
const issues = [];
|
|
94
|
+
let entries = 0;
|
|
95
|
+
for (const name of files) {
|
|
96
|
+
const fullPath = join(dir, name);
|
|
97
|
+
const fileIssues = await validateGoldenFile(fullPath);
|
|
98
|
+
issues.push(...fileIssues);
|
|
99
|
+
const text = await readFile(fullPath, "utf8");
|
|
100
|
+
entries += text.split("\n").filter((l) => l.trim()).length;
|
|
101
|
+
}
|
|
102
|
+
return { entries, files: files.length, issues };
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=golden.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"golden.js","sourceRoot":"","sources":["../../src/golden.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,cAAc,EAAqB,MAAM,uBAAuB,CAAA;AACzE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AACpD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAA;AAEjD,MAAM,OAAO,GAAG,IAAI,GAAG,CAAS,cAAmC,CAAC,CAAA;AAyBpE,mFAAmF;AACnF,MAAM,UAAU,eAAe,CAAC,IAAY;IAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAmD,CAAA;IAC9E,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAA;IACrC,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QACzE,MAAM,IAAI,KAAK,CAAC,2CAA2C,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC3F,CAAC;IACD,IAAI,GAAG,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,gCAAgC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;IAC/E,CAAC;IACD,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,UAAU,IAAI,EAAE,CAA4B,CAAA;IACpE,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACjD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,EAAE,CAAC,CAAA;QAClE,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,6BAA6B,CAAC,CAAA;QAC9D,CAAC;IACF,CAAC;IACD,OAAO;QACN,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,UAAU,EAAE,UAAuC;QACnD,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,QAAQ;QAChB,KAAK,EAAE,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;KAC5D,CAAA;AACF,CAAC;AAED,gGAAgG;AAChG,MAAM,UAAU,qBAAqB,CAAC,KAAkB;IACvD,MAAM,UAAU,GAAG,mBAAmB,CAAC,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,CAAA;IACnE,MAAM,OAAO,GAAmB,EAAE,CAAA;IAClC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAmB,EAAE,CAAC;QACnE,IAAI,CAAC,CAAC,GAAG,IAAI,UAAU,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC5C,CAAC;IACD,OAAO,OAAO,CAAA;AACf,CAAC;AAED,yEAAyE;AACzE,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,IAAY;IACpD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC9B,MAAM,MAAM,GAAkB,EAAE,CAAA;IAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAA;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAQ;QACnB,IAAI,CAAC;YACJ,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,CAAA;YACnC,MAAM,WAAW,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAA;YAChD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,CAAC,GAAG,CAAC;oBACX,MAAM,EAAE,oCAAoC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;iBACpE,CAAC,CAAA;YACH,CAAC;QACF,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,CAAC,EAAE,MAAM,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAA;QACzE,CAAC;IACF,CAAC;IACD,OAAO,MAAM,CAAA;AACd,CAAC;AAED,qDAAqD;AACrD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,GAAW;IAClD,MAAM,KAAK,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAA;IAChF,MAAM,MAAM,GAAkB,EAAE,CAAA;IAChC,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAA;QAChC,MAAM,UAAU,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAA;QACrD,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAA;QAC1B,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;QAC7C,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAA;IAC3D,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,CAAA;AAChD,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
export * from "./adapter.js";
|
|
7
|
+
export * from "./adapters/index.js";
|
|
8
|
+
export * from "./align.js";
|
|
9
|
+
export * from "./build.js";
|
|
10
|
+
export * from "./format.js";
|
|
11
|
+
export * from "./golden.js";
|
|
12
|
+
export * from "./parquet.js";
|
|
13
|
+
export * from "./runner.js";
|
|
14
|
+
export * from "./split.js";
|
|
15
|
+
export * from "./synthesize.js";
|
|
16
|
+
export * from "./tokenize.js";
|
|
17
|
+
export * from "./types.js";
|
|
18
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,cAAc,CAAA;AAC5B,cAAc,qBAAqB,CAAA;AACnC,cAAc,YAAY,CAAA;AAC1B,cAAc,YAAY,CAAA;AAC1B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,cAAc,CAAA;AAC5B,cAAc,aAAa,CAAA;AAC3B,cAAc,YAAY,CAAA;AAC1B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,eAAe,CAAA;AAC7B,cAAc,YAAY,CAAA"}
|
package/out/src/index.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
export * from "./adapter.js";
|
|
7
|
+
export * from "./adapters/index.js";
|
|
8
|
+
export * from "./align.js";
|
|
9
|
+
export * from "./build.js";
|
|
10
|
+
export * from "./format.js";
|
|
11
|
+
export * from "./golden.js";
|
|
12
|
+
export * from "./parquet.js";
|
|
13
|
+
export * from "./runner.js";
|
|
14
|
+
export * from "./split.js";
|
|
15
|
+
export * from "./synthesize.js";
|
|
16
|
+
export * from "./tokenize.js";
|
|
17
|
+
export * from "./types.js";
|
|
18
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,cAAc,CAAA;AAC5B,cAAc,qBAAqB,CAAA;AACnC,cAAc,YAAY,CAAA;AAC1B,cAAc,YAAY,CAAA;AAC1B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,cAAc,CAAA;AAC5B,cAAc,aAAa,CAAA;AAC3B,cAAc,YAAY,CAAA;AAC1B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,eAAe,CAAA;AAC7B,cAAc,YAAY,CAAA"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
7
|
+
* `sdk/parquet/` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0).
|
|
8
|
+
*/
|
|
9
|
+
export * from "./reader.js";
|
|
10
|
+
export * from "./schema.js";
|
|
11
|
+
export * from "./writer.js";
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
7
|
+
* `sdk/parquet/` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0).
|
|
8
|
+
*/
|
|
9
|
+
export * from "./reader.js";
|
|
10
|
+
export * from "./schema.js";
|
|
11
|
+
export * from "./writer.js";
|
|
12
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed wrapper around `@dsnp/parquetjs`'s `ParquetReader` that narrows the row-iterator generic to
|
|
7
|
+
* a user-supplied record type and adds `AsyncDisposable` support so `await using` cleans up the
|
|
8
|
+
* envelope reader without an explicit `close()`.
|
|
9
|
+
*
|
|
10
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
11
|
+
* `sdk/parquet/reader.ts` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0). Two
|
|
12
|
+
* trims relative to the original: (a) removed the
|
|
13
|
+
* `@isp.nexus/core/polyfills/promises/withResolvers` import — Node 22 has it native; (b) replaced
|
|
14
|
+
* the `PathBuilderLike` (path-ts) type on `openFile` with the plain `string | URL` the
|
|
15
|
+
* `@dsnp/parquetjs` envelope reader accepts directly.
|
|
16
|
+
*/
|
|
17
|
+
import { ParquetReader as BaseParquetReader } from "@dsnp/parquetjs";
|
|
18
|
+
import type { BufferReaderOptions } from "@dsnp/parquetjs/dist/lib/bufferReader.js";
|
|
19
|
+
import { ParquetEnvelopeReader } from "@dsnp/parquetjs/dist/lib/reader.js";
|
|
20
|
+
import { type ParquetRecordLike, ParquetSchema } from "./schema.js";
|
|
21
|
+
/** A typed Parquet reader, wrapping the base Parquet reader. */
|
|
22
|
+
export declare class ParquetReader<T extends ParquetRecordLike> extends BaseParquetReader implements AsyncDisposable {
|
|
23
|
+
schema: ParquetSchema<T>;
|
|
24
|
+
static openFile<T extends ParquetRecordLike>(filePath: string | URL, options?: BufferReaderOptions): Promise<ParquetReader<T>>;
|
|
25
|
+
static openBuffer<T extends ParquetRecordLike>(buffer: Buffer, options?: BufferReaderOptions): Promise<ParquetReader<T>>;
|
|
26
|
+
static openEnvelopeReader<T extends ParquetRecordLike>(envelopeReader: ParquetEnvelopeReader, opts?: BufferReaderOptions): Promise<ParquetReader<T>>;
|
|
27
|
+
[Symbol.asyncIterator](): AsyncGenerator<T, void, unknown>;
|
|
28
|
+
[Symbol.asyncDispose](): Promise<void>;
|
|
29
|
+
dispose(): Promise<void>;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=reader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reader.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACpE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0CAA0C,CAAA;AACnF,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAA;AAC1E,OAAO,EAAE,KAAK,iBAAiB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAEnE,gEAAgE;AAChE,qBAAa,aAAa,CAAC,CAAC,SAAS,iBAAiB,CAAE,SAAQ,iBAAkB,YAAW,eAAe;IACnG,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAA;WAEV,QAAQ,CAAC,CAAC,SAAS,iBAAiB,EACzD,QAAQ,EAAE,MAAM,GAAG,GAAG,EACtB,OAAO,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;WAMN,UAAU,CAAC,CAAC,SAAS,iBAAiB,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB;WAMrF,kBAAkB,CAAC,CAAC,SAAS,iBAAiB,EACnE,cAAc,EAAE,qBAAqB,EACrC,IAAI,CAAC,EAAE,mBAAmB;IAkBX,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,cAAc,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC;IAI7D,CAAC,MAAM,CAAC,YAAY,CAAC;IAIrB,OAAO;CAGpB"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed wrapper around `@dsnp/parquetjs`'s `ParquetReader` that narrows the row-iterator generic to
|
|
7
|
+
* a user-supplied record type and adds `AsyncDisposable` support so `await using` cleans up the
|
|
8
|
+
* envelope reader without an explicit `close()`.
|
|
9
|
+
*
|
|
10
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
11
|
+
* `sdk/parquet/reader.ts` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0). Two
|
|
12
|
+
* trims relative to the original: (a) removed the
|
|
13
|
+
* `@isp.nexus/core/polyfills/promises/withResolvers` import — Node 22 has it native; (b) replaced
|
|
14
|
+
* the `PathBuilderLike` (path-ts) type on `openFile` with the plain `string | URL` the
|
|
15
|
+
* `@dsnp/parquetjs` envelope reader accepts directly.
|
|
16
|
+
*/
|
|
17
|
+
import { ParquetReader as BaseParquetReader } from "@dsnp/parquetjs";
|
|
18
|
+
import { ParquetEnvelopeReader } from "@dsnp/parquetjs/dist/lib/reader.js";
|
|
19
|
+
import { ParquetSchema } from "./schema.js";
|
|
20
|
+
/** A typed Parquet reader, wrapping the base Parquet reader. */
|
|
21
|
+
export class ParquetReader extends BaseParquetReader {
|
|
22
|
+
static async openFile(filePath, options) {
|
|
23
|
+
const envelopeReader = await ParquetEnvelopeReader.openFile(filePath.toString(), options);
|
|
24
|
+
return ParquetReader.openEnvelopeReader(envelopeReader, options);
|
|
25
|
+
}
|
|
26
|
+
static async openBuffer(buffer, options) {
|
|
27
|
+
const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer, options);
|
|
28
|
+
return this.openEnvelopeReader(envelopeReader, options);
|
|
29
|
+
}
|
|
30
|
+
static async openEnvelopeReader(envelopeReader, opts) {
|
|
31
|
+
if (opts?.metadata) {
|
|
32
|
+
return new ParquetReader(opts.metadata, envelopeReader, opts);
|
|
33
|
+
}
|
|
34
|
+
try {
|
|
35
|
+
await envelopeReader.readHeader();
|
|
36
|
+
const metadata = await envelopeReader.readFooter();
|
|
37
|
+
return new ParquetReader(metadata, envelopeReader, opts);
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
await envelopeReader.close();
|
|
41
|
+
throw err;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
[Symbol.asyncIterator]() {
|
|
45
|
+
return super[Symbol.asyncIterator]();
|
|
46
|
+
}
|
|
47
|
+
async [Symbol.asyncDispose]() {
|
|
48
|
+
return this.close();
|
|
49
|
+
}
|
|
50
|
+
async dispose() {
|
|
51
|
+
return this[Symbol.asyncDispose]();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=reader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reader.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/reader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAA;AAC1E,OAAO,EAA0B,aAAa,EAAE,MAAM,aAAa,CAAA;AAEnE,gEAAgE;AAChE,MAAM,OAAO,aAA2C,SAAQ,iBAAiB;IAGhF,MAAM,CAAU,KAAK,CAAC,QAAQ,CAC7B,QAAsB,EACtB,OAA6B;QAE7B,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAA;QAEzF,OAAO,aAAa,CAAC,kBAAkB,CAAI,cAAc,EAAE,OAAO,CAAC,CAAA;IACpE,CAAC;IAED,MAAM,CAAU,KAAK,CAAC,UAAU,CAA8B,MAAc,EAAE,OAA6B;QAC1G,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QAE9E,OAAO,IAAI,CAAC,kBAAkB,CAAI,cAAc,EAAE,OAAO,CAAC,CAAA;IAC3D,CAAC;IAED,MAAM,CAAU,KAAK,CAAC,kBAAkB,CACvC,cAAqC,EACrC,IAA0B;QAE1B,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;YACpB,OAAO,IAAI,aAAa,CAAI,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,CAAC,CAAA;QACjE,CAAC;QAED,IAAI,CAAC;YACJ,MAAM,cAAc,CAAC,UAAU,EAAE,CAAA;YAEjC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,UAAU,EAAE,CAAA;YAElD,OAAO,IAAI,aAAa,CAAI,QAAQ,EAAE,cAAc,EAAE,IAAI,CAAC,CAAA;QAC5D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,MAAM,cAAc,CAAC,KAAK,EAAE,CAAA;YAC5B,MAAM,GAAG,CAAA;QACV,CAAC;IACF,CAAC;IAEe,CAAC,MAAM,CAAC,aAAa,CAAC;QACrC,OAAO,KAAK,CAAC,MAAM,CAAC,aAAa,CAAC,EAAsC,CAAA;IACzE,CAAC;IAEM,KAAK,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;QACjC,OAAO,IAAI,CAAC,KAAK,EAAE,CAAA;IACpB,CAAC;IAEM,KAAK,CAAC,OAAO;QACnB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAA;IACnC,CAAC;CACD"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed wrapper around `@dsnp/parquetjs`'s schema definition. Adds:
|
|
7
|
+
*
|
|
8
|
+
* - `ParquetSchema<T>`: a generic class narrowing the base schema's `schema` property to a
|
|
9
|
+
* field-by-field typed dict.
|
|
10
|
+
* - `ParquetSchemaDefinitionCache`: an LRU lookup so hot paths that compute the same schema
|
|
11
|
+
* repeatedly pay the cost once. Implements `Disposable` so `using` works.
|
|
12
|
+
* - `createBloomFilters<T>`: helper that takes a schema and a list of columns and returns the
|
|
13
|
+
* `@dsnp/parquetjs`-shaped bloom-filter spec array.
|
|
14
|
+
*
|
|
15
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
+
* `sdk/parquet/schema.ts` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0). Two
|
|
17
|
+
* trims relative to the original: (a) `ParquetSchemaDefinitionCache`'s LRU generics use
|
|
18
|
+
* `ParquetRecordLike` instead of `any` to satisfy this project's eslint config; (b)
|
|
19
|
+
* `Symbol.dispose` is sync (the original was async, but `Disposable`'s contract is sync — async
|
|
20
|
+
* cleanup belongs on `AsyncDisposable`).
|
|
21
|
+
*/
|
|
22
|
+
import { ParquetSchema as BaseParquetSchema } from "@dsnp/parquetjs";
|
|
23
|
+
import type { createSBBFParams as BloomFilterCreation } from "@dsnp/parquetjs/dist/lib/bloomFilterIO/bloomFilterWriter.js";
|
|
24
|
+
import type { FieldDefinition } from "@dsnp/parquetjs/dist/lib/declare.js";
|
|
25
|
+
import { LRUCache } from "lru-cache";
|
|
26
|
+
/** A Parquet record-like object, i.e. a record with string keys and JSON-serializable values. */
|
|
27
|
+
export type ParquetRecordLike = {
|
|
28
|
+
[key: string]: unknown | undefined;
|
|
29
|
+
};
|
|
30
|
+
/** Typed Parquet schema definition. */
|
|
31
|
+
export type ParquetSchemaDefinition<T = ParquetRecordLike> = {
|
|
32
|
+
[field in Extract<keyof T, string>]: FieldDefinition;
|
|
33
|
+
};
|
|
34
|
+
/** Typed Parquet schema. */
|
|
35
|
+
export declare class ParquetSchema<T> extends BaseParquetSchema {
|
|
36
|
+
schema: ParquetSchemaDefinition<T>;
|
|
37
|
+
}
|
|
38
|
+
/** Given a Parquet schema and a list of columns, create a list of Bloom filters for those columns. */
|
|
39
|
+
export declare function createBloomFilters<T>(parquetSchemaDef: ParquetSchemaDefinition<T>, columns: Extract<keyof T, string>[]): BloomFilterCreation[];
|
|
40
|
+
export declare class ParquetSchemaDefinitionCache extends LRUCache<ParquetSchemaDefinition<ParquetRecordLike>, ParquetSchema<ParquetRecordLike>> implements Disposable {
|
|
41
|
+
constructor(max?: number);
|
|
42
|
+
findOrCreateSchema<T extends ParquetRecordLike>(schemaDef: ParquetSchemaDefinition<T>): ParquetSchema<T>;
|
|
43
|
+
[Symbol.dispose](): void;
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACpE,OAAO,KAAK,EAAE,gBAAgB,IAAI,mBAAmB,EAAE,MAAM,6DAA6D,CAAA;AAC1H,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qCAAqC,CAAA;AAC1E,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAA;AAEpC,iGAAiG;AACjG,MAAM,MAAM,iBAAiB,GAAG;IAC/B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,CAAA;CAClC,CAAA;AAED,uCAAuC;AACvC,MAAM,MAAM,uBAAuB,CAAC,CAAC,GAAG,iBAAiB,IAAI;KAC3D,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,GAAG,eAAe;CACpD,CAAA;AAED,4BAA4B;AAC5B,qBAAa,aAAa,CAAC,CAAC,CAAE,SAAQ,iBAAiB;IAC9C,MAAM,EAAE,uBAAuB,CAAC,CAAC,CAAC,CAAA;CAC1C;AAED,sGAAsG;AACtG,wBAAgB,kBAAkB,CAAC,CAAC,EACnC,gBAAgB,EAAE,uBAAuB,CAAC,CAAC,CAAC,EAC5C,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,EAAE,yBAanC;AAED,qBAAa,4BACZ,SAAQ,QAAQ,CAAC,uBAAuB,CAAC,iBAAiB,CAAC,EAAE,aAAa,CAAC,iBAAiB,CAAC,CAC7F,YAAW,UAAU;gBAET,GAAG,SAAO;IAIf,kBAAkB,CAAC,CAAC,SAAS,iBAAiB,EAAE,SAAS,EAAE,uBAAuB,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC;IAYxG,CAAC,MAAM,CAAC,OAAO,CAAC;CAGvB"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed wrapper around `@dsnp/parquetjs`'s schema definition. Adds:
|
|
7
|
+
*
|
|
8
|
+
* - `ParquetSchema<T>`: a generic class narrowing the base schema's `schema` property to a
|
|
9
|
+
* field-by-field typed dict.
|
|
10
|
+
* - `ParquetSchemaDefinitionCache`: an LRU lookup so hot paths that compute the same schema
|
|
11
|
+
* repeatedly pay the cost once. Implements `Disposable` so `using` works.
|
|
12
|
+
* - `createBloomFilters<T>`: helper that takes a schema and a list of columns and returns the
|
|
13
|
+
* `@dsnp/parquetjs`-shaped bloom-filter spec array.
|
|
14
|
+
*
|
|
15
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
+
* `sdk/parquet/schema.ts` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0). Two
|
|
17
|
+
* trims relative to the original: (a) `ParquetSchemaDefinitionCache`'s LRU generics use
|
|
18
|
+
* `ParquetRecordLike` instead of `any` to satisfy this project's eslint config; (b)
|
|
19
|
+
* `Symbol.dispose` is sync (the original was async, but `Disposable`'s contract is sync — async
|
|
20
|
+
* cleanup belongs on `AsyncDisposable`).
|
|
21
|
+
*/
|
|
22
|
+
import { ParquetSchema as BaseParquetSchema } from "@dsnp/parquetjs";
|
|
23
|
+
import { LRUCache } from "lru-cache";
|
|
24
|
+
/** Typed Parquet schema. */
|
|
25
|
+
export class ParquetSchema extends BaseParquetSchema {
|
|
26
|
+
}
|
|
27
|
+
/** Given a Parquet schema and a list of columns, create a list of Bloom filters for those columns. */
|
|
28
|
+
export function createBloomFilters(parquetSchemaDef, columns) {
|
|
29
|
+
const bloomFilters = [];
|
|
30
|
+
for (const column of columns) {
|
|
31
|
+
if (!parquetSchemaDef[column]) {
|
|
32
|
+
throw new Error(`Bloom filter column ${column} not found in Parquet schema`);
|
|
33
|
+
}
|
|
34
|
+
bloomFilters.push({ column });
|
|
35
|
+
}
|
|
36
|
+
return bloomFilters;
|
|
37
|
+
}
|
|
38
|
+
export class ParquetSchemaDefinitionCache extends LRUCache {
|
|
39
|
+
constructor(max = 1000) {
|
|
40
|
+
super({ max });
|
|
41
|
+
}
|
|
42
|
+
findOrCreateSchema(schemaDef) {
|
|
43
|
+
const key = schemaDef;
|
|
44
|
+
let schema = this.get(key);
|
|
45
|
+
if (!schema) {
|
|
46
|
+
schema = new ParquetSchema(schemaDef);
|
|
47
|
+
this.set(key, schema);
|
|
48
|
+
}
|
|
49
|
+
return schema;
|
|
50
|
+
}
|
|
51
|
+
[Symbol.dispose]() {
|
|
52
|
+
this.clear();
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AAGpE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAA;AAYpC,4BAA4B;AAC5B,MAAM,OAAO,aAAiB,SAAQ,iBAAiB;CAEtD;AAED,sGAAsG;AACtG,MAAM,UAAU,kBAAkB,CACjC,gBAA4C,EAC5C,OAAmC;IAEnC,MAAM,YAAY,GAA0B,EAAE,CAAA;IAE9C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,8BAA8B,CAAC,CAAA;QAC7E,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;IAC9B,CAAC;IAED,OAAO,YAAY,CAAA;AACpB,CAAC;AAED,MAAM,OAAO,4BACZ,SAAQ,QAAsF;IAG9F,YAAY,GAAG,GAAG,IAAI;QACrB,KAAK,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;IACf,CAAC;IAEM,kBAAkB,CAA8B,SAAqC;QAC3F,MAAM,GAAG,GAAG,SAAuD,CAAA;QACnE,IAAI,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAiC,CAAA;QAE1D,IAAI,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,GAAG,IAAI,aAAa,CAAI,SAAS,CAAC,CAAA;YACxC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,MAA0C,CAAC,CAAA;QAC1D,CAAC;QAED,OAAO,MAAM,CAAA;IACd,CAAC;IAEM,CAAC,MAAM,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,KAAK,EAAE,CAAA;IACb,CAAC;CACD"}
|