@mailwoman/corpus 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapters/usgov-imls-pls/adapter.d.ts.map +1 -1
- package/out/src/adapters/usgov-imls-pls/adapter.js +4 -1
- package/out/src/adapters/usgov-imls-pls/adapter.js.map +1 -1
- package/out/src/align.d.ts +30 -1
- package/out/src/align.d.ts.map +1 -1
- package/out/src/align.js +99 -8
- package/out/src/align.js.map +1 -1
- package/out/src/build.d.ts.map +1 -1
- package/out/src/build.js +29 -2
- package/out/src/build.js.map +1 -1
- package/out/src/parquet.d.ts +12 -2
- package/out/src/parquet.d.ts.map +1 -1
- package/out/src/parquet.js +36 -1
- package/out/src/parquet.js.map +1 -1
- package/out/src/split.d.ts +6 -0
- package/out/src/split.d.ts.map +1 -1
- package/out/src/split.js +7 -0
- package/out/src/split.js.map +1 -1
- package/out/src/synthesize.d.ts +10 -0
- package/out/src/synthesize.d.ts.map +1 -1
- package/out/src/synthesize.js +34 -1
- package/out/src/synthesize.js.map +1 -1
- package/out/src/types.d.ts +22 -1
- package/out/src/types.d.ts.map +1 -1
- package/package.json +3 -3
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-imls-pls/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAOH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,yBAAyB,mBAAmB,CAAA;AACzD,eAAO,MAAM,8BAA8B,kBAAkB,CAAA;AAsB7D,wBAAgB,yBAAyB,IAAI,aAAa,
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-imls-pls/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAOH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,yBAAyB,mBAAmB,CAAA;AACzD,eAAO,MAAM,8BAA8B,kBAAkB,CAAA;AAsB7D,wBAAgB,yBAAyB,IAAI,aAAa,CAyFzD;AAED,eAAO,MAAM,mBAAmB,eAA8B,CAAA"}
|
|
@@ -78,7 +78,10 @@ export function createUsgovImlsPlsAdapter() {
|
|
|
78
78
|
locality: city,
|
|
79
79
|
region: state.abbreviation,
|
|
80
80
|
postcode: zip,
|
|
81
|
-
|
|
81
|
+
// #552: no subregion — US postal addresses don't surface the county, so emitting
|
|
82
|
+
// subregion creates a phantom component with no raw-span to align to, quarantining
|
|
83
|
+
// ~21% of rows. The county is still available in the source CSV; it just isn't
|
|
84
|
+
// a postal-surface component here.
|
|
82
85
|
};
|
|
83
86
|
const streetPart = [split.house_number, split.street].filter(Boolean).join(" ").trim();
|
|
84
87
|
const raw = [
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-imls-pls/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAA;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,yBAAyB,GAAG,gBAAgB,CAAA;AACzD,MAAM,CAAC,MAAM,8BAA8B,GAAG,eAAe,CAAA;AAE7D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAY9D,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,MAAM,UAAU,yBAAyB;IACxC,OAAO;QACN,EAAE,EAAE,yBAAyB;QAC7B,cAAc,EAAE,8BAA8B;QAC9C,WAAW,EAAE,yFAAyF;QAEtG,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,0DAA0D,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC1F,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAsC,EAAE,CAAC;oBACnE,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAEzC,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAEvC,MAAM,KAAK,GAAG,uBAAuB,CAAC,SAAS,CAAC,CAAA;oBAChD,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,CAAC,CAAA;oBACnC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,UAAU,GAA+B;wBAC9C,KAAK,EAAE,OAAO;wBACd,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnE,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,KAAK,CAAC,YAAY;wBAC1B,QAAQ,EAAE,GAAG;wBACb,
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-imls-pls/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAA;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,yBAAyB,GAAG,gBAAgB,CAAA;AACzD,MAAM,CAAC,MAAM,8BAA8B,GAAG,eAAe,CAAA;AAE7D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAY9D,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,MAAM,UAAU,yBAAyB;IACxC,OAAO;QACN,EAAE,EAAE,yBAAyB;QAC7B,cAAc,EAAE,8BAA8B;QAC9C,WAAW,EAAE,yFAAyF;QAEtG,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,0DAA0D,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC1F,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAsC,EAAE,CAAC;oBACnE,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAEzC,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAEvC,MAAM,KAAK,GAAG,uBAAuB,CAAC,SAAS,CAAC,CAAA;oBAChD,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,CAAC,CAAA;oBACnC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,UAAU,GAA+B;wBAC9C,KAAK,EAAE,OAAO;wBACd,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnE,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,KAAK,CAAC,YAAY;wBAC1B,QAAQ,EAAE,GAAG;wBACb,iFAAiF;wBACjF,mFAAmF;wBACnF,+EAA+E;wBAC/E,mCAAmC;qBACnC,CAAA;oBAED,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;oBACtF,MAAM,GAAG,GAAG;wBACX,OAAO;wBACP,UAAU;wBACV,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;qBAC7E;yBACC,MAAM,CAAC,OAAO,CAAC;yBACf,IAAI,CAAC,IAAI,CAAC,CAAA;oBAEZ,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,QAAQ,GAAG,OAAO;wBACvB,CAAC,CAAC,GAAG,yBAAyB,IAAI,OAAO,EAAE;wBAC3C,CAAC,CAAC,cAAc,CAAC,yBAAyB,EAAE,OAAO,CAAC,CAAA;oBAErD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,yBAAyB;wBACjC,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,8BAA8B;qBACvC,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,mBAAmB,GAAG,yBAAyB,EAAE,CAAA"}
|
package/out/src/align.d.ts
CHANGED
|
@@ -18,13 +18,21 @@
|
|
|
18
18
|
* 4. For each token: walk the list of component spans, pick the one whose span contains the token's
|
|
19
19
|
* character range. First token in a component span → `B-<tag>`; subsequent tokens →
|
|
20
20
|
* `I-<tag>`; no overlap → `O`.
|
|
21
|
+
* 5. Emit the located char spans verbatim as `span_starts[]` / `span_ends[]` / `span_tags[]` (the
|
|
22
|
+
* v0.5.0 char-offset format, #519). The token quantization in step 4 is the part the v0.5.0
|
|
23
|
+
* rebuild deletes; during the transition both representations ride on every labeled row.
|
|
21
24
|
*
|
|
22
|
-
*
|
|
25
|
+
* Structural invariants the function preserves (the span ones loudly — a violation throws rather
|
|
26
|
+
* than quarantines, because it indicates a bug here, not bad source data):
|
|
23
27
|
*
|
|
24
28
|
* - `tokens.length === labels.length` always.
|
|
25
29
|
* - Each component contributes at most one contiguous BIO run (no `B-tag … O … I-tag` gaps). This is
|
|
26
30
|
* enforced by greedy first-match span assignment + ordered token iteration.
|
|
31
|
+
* - The span triple is sorted ascending by start and non-overlapping.
|
|
32
|
+
* - `raw` is NFC-normalized (asserted per row; a non-NFC raw makes char offsets ambiguous downstream
|
|
33
|
+
* — NFD `é` occupies two code units where NFC `é` occupies one — and silently so).
|
|
27
34
|
*/
|
|
35
|
+
import type { ComponentTag } from "@mailwoman/core/types";
|
|
28
36
|
import { type Tokenizer } from "./tokenize.js";
|
|
29
37
|
import type { CanonicalRow, LabeledRow, QuarantinedRow } from "./types.js";
|
|
30
38
|
/** Options for `alignRow`. */
|
|
@@ -53,6 +61,27 @@ export type AlignmentResult = {
|
|
|
53
61
|
kind: "quarantined";
|
|
54
62
|
row: QuarantinedRow;
|
|
55
63
|
};
|
|
64
|
+
/**
|
|
65
|
+
* One located char-offset label span over a row's `raw` ([start, end) in UTF-16 code units). The
|
|
66
|
+
* element type behind the parallel `span_starts[]`/`span_ends[]`/`span_tags[]` triple on
|
|
67
|
+
* `LabeledRow` (#519).
|
|
68
|
+
*/
|
|
69
|
+
export interface ComponentSpan {
|
|
70
|
+
tag: ComponentTag;
|
|
71
|
+
start: number;
|
|
72
|
+
end: number;
|
|
73
|
+
}
|
|
56
74
|
/** Align a single row. */
|
|
57
75
|
export declare function alignRow(row: CanonicalRow, opts?: AlignOptions): AlignmentResult;
|
|
76
|
+
/**
|
|
77
|
+
* Enforce the #519 span-triple invariants — in-bounds, sorted ascending by start, non-overlapping —
|
|
78
|
+
* loudly.
|
|
79
|
+
*
|
|
80
|
+
* For `alignRow`: `claimed`-span bookkeeping in `locateSpan` already makes overlap impossible and
|
|
81
|
+
* the caller sorts, so a violation here is a bug in this file, not bad source data: throw (naming
|
|
82
|
+
* the row) rather than quarantine, so the corruption can't ride into a corpus. Exported for every
|
|
83
|
+
* OTHER span producer (`composeAdversarialRow`'s offset arithmetic, future synthesis paths) — any
|
|
84
|
+
* code that emits the triple without going through `alignRow` must pass its output through this.
|
|
85
|
+
*/
|
|
86
|
+
export declare function assertSpanInvariants(spans: readonly ComponentSpan[], row: Pick<CanonicalRow, "raw" | "source" | "source_id">): void;
|
|
58
87
|
//# sourceMappingURL=align.d.ts.map
|
package/out/src/align.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"align.d.ts","sourceRoot":"","sources":["../../src/align.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"align.d.ts","sourceRoot":"","sources":["../../src/align.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,KAAK,EAAY,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEnE,OAAO,EAAuC,KAAK,SAAS,EAAE,MAAM,eAAe,CAAA;AACnF,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AAE1E,8BAA8B;AAC9B,MAAM,WAAW,YAAY;IAC5B,6DAA6D;IAC7D,SAAS,CAAC,EAAE,SAAS,CAAA;IAErB;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAA;IAExB;;;OAGG;IACH,eAAe,CAAC,EAAE,OAAO,CAAA;CACzB;AAED,4DAA4D;AAC5D,MAAM,MAAM,eAAe,GAAG;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,GAAG,EAAE,UAAU,CAAA;CAAE,GAAG;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,GAAG,EAAE,cAAc,CAAA;CAAE,CAAA;AAEjH;;;;GAIG;AACH,MAAM,WAAW,aAAa;IAC7B,GAAG,EAAE,YAAY,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACX;AAED,0BAA0B;AAC1B,wBAAgB,QAAQ,CAAC,GAAG,EAAE,YAAY,EAAE,IAAI,GAAE,YAAiB,GAAG,eAAe,CAgFpF;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,SAAS,aAAa,EAAE,EAC/B,GAAG,EAAE,IAAI,CAAC,YAAY,EAAE,KAAK,GAAG,QAAQ,GAAG,WAAW,CAAC,GACrD,IAAI,CAwBN"}
|
package/out/src/align.js
CHANGED
|
@@ -18,12 +18,19 @@
|
|
|
18
18
|
* 4. For each token: walk the list of component spans, pick the one whose span contains the token's
|
|
19
19
|
* character range. First token in a component span → `B-<tag>`; subsequent tokens →
|
|
20
20
|
* `I-<tag>`; no overlap → `O`.
|
|
21
|
+
* 5. Emit the located char spans verbatim as `span_starts[]` / `span_ends[]` / `span_tags[]` (the
|
|
22
|
+
* v0.5.0 char-offset format, #519). The token quantization in step 4 is the part the v0.5.0
|
|
23
|
+
* rebuild deletes; during the transition both representations ride on every labeled row.
|
|
21
24
|
*
|
|
22
|
-
*
|
|
25
|
+
* Structural invariants the function preserves (the span ones loudly — a violation throws rather
|
|
26
|
+
* than quarantines, because it indicates a bug here, not bad source data):
|
|
23
27
|
*
|
|
24
28
|
* - `tokens.length === labels.length` always.
|
|
25
29
|
* - Each component contributes at most one contiguous BIO run (no `B-tag … O … I-tag` gaps). This is
|
|
26
30
|
* enforced by greedy first-match span assignment + ordered token iteration.
|
|
31
|
+
* - The span triple is sorted ascending by start and non-overlapping.
|
|
32
|
+
* - `raw` is NFC-normalized (asserted per row; a non-NFC raw makes char offsets ambiguous downstream
|
|
33
|
+
* — NFD `é` occupies two code units where NFC `é` occupies one — and silently so).
|
|
27
34
|
*/
|
|
28
35
|
import { distance as levenshteinDistance } from "fastest-levenshtein";
|
|
29
36
|
import { whitespaceTokenizer } from "./tokenize.js";
|
|
@@ -35,33 +42,99 @@ export function alignRow(row, opts = {}) {
|
|
|
35
42
|
if (!row.raw) {
|
|
36
43
|
return { kind: "quarantined", row: { row, reason: "raw-empty" } };
|
|
37
44
|
}
|
|
45
|
+
// #519 NFC handling — relaxed from a hard throw to normalization (2026-06-12, DeepSeek-validated):
|
|
46
|
+
// one non-NFC row (e.g. a non-Latin name variant like "দক্ষিণ কোরিয়া") must not crash a multi-hour
|
|
47
|
+
// build. Normalize `raw` AND every component value to NFC, compute spans over the NFC raw, and
|
|
48
|
+
// store the NFC raw — preserving the #519 single-normalization-form principle while keeping the row.
|
|
49
|
+
const raw = row.raw.normalize("NFC");
|
|
50
|
+
const components = { ...row.components };
|
|
51
|
+
for (const key in components) {
|
|
52
|
+
const v = components[key];
|
|
53
|
+
if (typeof v === "string")
|
|
54
|
+
components[key] = v.normalize("NFC");
|
|
55
|
+
}
|
|
38
56
|
const componentSpans = [];
|
|
39
57
|
const claimed = [];
|
|
40
|
-
const haystack = caseInsensitive ?
|
|
41
|
-
|
|
58
|
+
const haystack = caseInsensitive ? raw.toLowerCase() : raw;
|
|
59
|
+
// Longest value first: a short component must not claim a word that a longer, more specific
|
|
60
|
+
// component owns ("Alaska Regional Dr, Alaska" — region "Alaska" stealing the street's first
|
|
61
|
+
// word quarantined the street; pilot2's residual class). Emit order is unaffected — spans are
|
|
62
|
+
// re-sorted by start below.
|
|
63
|
+
const entries = Object.entries(components).sort((a, b) => (b[1]?.length ?? 0) - (a[1]?.length ?? 0));
|
|
64
|
+
for (const [tag, value] of entries) {
|
|
42
65
|
if (!value)
|
|
43
66
|
continue;
|
|
44
67
|
const needle = caseInsensitive ? value.toLowerCase() : value;
|
|
45
|
-
const span = locateSpan({ haystack, needle, raw
|
|
68
|
+
const span = locateSpan({ haystack, needle, raw, claimed, maxEditDistance });
|
|
46
69
|
if (!span) {
|
|
47
70
|
return {
|
|
48
71
|
kind: "quarantined",
|
|
49
72
|
row: { row, reason: `component-not-found:${tag}` },
|
|
50
73
|
};
|
|
51
74
|
}
|
|
75
|
+
// Defensive bounds quarantine (2026-06-12): locateSpan's fuzzy/boundary logic can over-run the
|
|
76
|
+
// raw by a code unit on some non-Latin / combining-mark strings (e.g. Bengali name variants),
|
|
77
|
+
// yielding a span past the end. An out-of-bounds offset can never be a valid char span —
|
|
78
|
+
// quarantine the row rather than crash assertSpanInvariants and take down a multi-hour build.
|
|
79
|
+
// (If this class proves large in the quarantine report, locateSpan's boundary logic needs a
|
|
80
|
+
// combining-mark fix to KEEP these non-Latin rows.)
|
|
81
|
+
if (span.start < 0 || span.end > raw.length || span.start >= span.end) {
|
|
82
|
+
return {
|
|
83
|
+
kind: "quarantined",
|
|
84
|
+
row: { row, reason: `span-out-of-bounds:${tag}` },
|
|
85
|
+
};
|
|
86
|
+
}
|
|
52
87
|
componentSpans.push({ tag, start: span.start, end: span.end });
|
|
53
88
|
claimed.push([span.start, span.end]);
|
|
54
89
|
}
|
|
55
90
|
componentSpans.sort((a, b) => a.start - b.start);
|
|
56
|
-
|
|
91
|
+
assertSpanInvariants(componentSpans, { ...row, raw });
|
|
92
|
+
const tokens = tokenizer.tokenize(raw);
|
|
57
93
|
const labels = labelTokens(tokens, componentSpans);
|
|
58
94
|
const labeled = {
|
|
59
95
|
...row,
|
|
96
|
+
raw,
|
|
97
|
+
components,
|
|
60
98
|
tokens: tokens.map((t) => t.text),
|
|
61
99
|
labels,
|
|
100
|
+
// The v0.5.0 char-offset triple (#519): the located spans, emitted verbatim. The token
|
|
101
|
+
// quantization above is what the rebuild deletes; both ride during the transition.
|
|
102
|
+
span_starts: componentSpans.map((s) => s.start),
|
|
103
|
+
span_ends: componentSpans.map((s) => s.end),
|
|
104
|
+
span_tags: componentSpans.map((s) => s.tag),
|
|
62
105
|
};
|
|
63
106
|
return { kind: "labeled", row: labeled };
|
|
64
107
|
}
|
|
108
|
+
/**
|
|
109
|
+
* Enforce the #519 span-triple invariants — in-bounds, sorted ascending by start, non-overlapping —
|
|
110
|
+
* loudly.
|
|
111
|
+
*
|
|
112
|
+
* For `alignRow`: `claimed`-span bookkeeping in `locateSpan` already makes overlap impossible and
|
|
113
|
+
* the caller sorts, so a violation here is a bug in this file, not bad source data: throw (naming
|
|
114
|
+
* the row) rather than quarantine, so the corruption can't ride into a corpus. Exported for every
|
|
115
|
+
* OTHER span producer (`composeAdversarialRow`'s offset arithmetic, future synthesis paths) — any
|
|
116
|
+
* code that emits the triple without going through `alignRow` must pass its output through this.
|
|
117
|
+
*/
|
|
118
|
+
export function assertSpanInvariants(spans, row) {
|
|
119
|
+
for (let i = 0; i < spans.length; i++) {
|
|
120
|
+
const s = spans[i];
|
|
121
|
+
if (!(s.start >= 0 && s.start < s.end && s.end <= row.raw.length)) {
|
|
122
|
+
throw new Error(`alignRow: span out of bounds (source=${row.source}, source_id=${row.source_id}): ` +
|
|
123
|
+
`${s.tag}@[${s.start}, ${s.end}) over raw of length ${row.raw.length}`);
|
|
124
|
+
}
|
|
125
|
+
if (i === 0)
|
|
126
|
+
continue;
|
|
127
|
+
const prev = spans[i - 1];
|
|
128
|
+
if (s.start < prev.start) {
|
|
129
|
+
throw new Error(`alignRow: spans not sorted (source=${row.source}, source_id=${row.source_id}): ` +
|
|
130
|
+
`${prev.tag}@[${prev.start}, ${prev.end}) precedes ${s.tag}@[${s.start}, ${s.end})`);
|
|
131
|
+
}
|
|
132
|
+
if (s.start < prev.end) {
|
|
133
|
+
throw new Error(`alignRow: spans overlap (source=${row.source}, source_id=${row.source_id}): ` +
|
|
134
|
+
`${prev.tag}@[${prev.start}, ${prev.end}) overlaps ${s.tag}@[${s.start}, ${s.end})`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
65
138
|
/**
|
|
66
139
|
* Locate `needle` in `haystack` (both already normalized for case if requested), preferring
|
|
67
140
|
* verbatim substring match. Falls back to a fuzzy window scan when verbatim fails and
|
|
@@ -74,17 +147,28 @@ function locateSpan(args) {
|
|
|
74
147
|
const { haystack, needle, claimed, maxEditDistance } = args;
|
|
75
148
|
if (needle.length === 0)
|
|
76
149
|
return undefined;
|
|
77
|
-
// Pass 1: verbatim substring
|
|
150
|
+
// Pass 1: verbatim substring. Word-boundary-aligned matches are PREFERRED over intra-word ones
|
|
151
|
+
// — leftmost-substring alone let a short value claim the inside of an earlier word (region "AK"
|
|
152
|
+
// matched inside "Umak"/"Lake", scrambling every later span; caught by the v0.5.0 pilot build).
|
|
153
|
+
// Intra-word matches stay allowed as the fallback because they are load-bearing for affix
|
|
154
|
+
// supervision (street_suffix "straße" inside "Hauptstraße" has no boundary-aligned occurrence —
|
|
155
|
+
// sub-word spans are the point of the char-offset format).
|
|
156
|
+
let intraWord;
|
|
78
157
|
let from = 0;
|
|
79
158
|
while (true) {
|
|
80
159
|
const idx = haystack.indexOf(needle, from);
|
|
81
160
|
if (idx < 0)
|
|
82
161
|
break;
|
|
83
162
|
const end = idx + needle.length;
|
|
84
|
-
if (!overlapsClaimed(idx, end, claimed))
|
|
85
|
-
|
|
163
|
+
if (!overlapsClaimed(idx, end, claimed)) {
|
|
164
|
+
if (isBoundaryAligned(haystack, idx, end))
|
|
165
|
+
return { start: idx, end };
|
|
166
|
+
intraWord ??= { start: idx, end };
|
|
167
|
+
}
|
|
86
168
|
from = idx + 1;
|
|
87
169
|
}
|
|
170
|
+
if (intraWord)
|
|
171
|
+
return intraWord;
|
|
88
172
|
if (maxEditDistance <= 0)
|
|
89
173
|
return undefined;
|
|
90
174
|
// Pass 2: fuzzy sliding-window. Walk over candidate windows of length `needle.length`
|
|
@@ -102,6 +186,13 @@ function locateSpan(args) {
|
|
|
102
186
|
}
|
|
103
187
|
return undefined;
|
|
104
188
|
}
|
|
189
|
+
const WORD_CHAR = /[\p{L}\p{N}]/u;
|
|
190
|
+
/** Both needle edges sit on word boundaries of the haystack (string edges count as boundaries). */
|
|
191
|
+
function isBoundaryAligned(haystack, start, end) {
|
|
192
|
+
const before = start === 0 || !WORD_CHAR.test(haystack[start - 1]);
|
|
193
|
+
const after = end === haystack.length || !WORD_CHAR.test(haystack[end]);
|
|
194
|
+
return before && after;
|
|
195
|
+
}
|
|
105
196
|
function overlapsClaimed(start, end, claimed) {
|
|
106
197
|
for (const [a, b] of claimed) {
|
|
107
198
|
if (start < b && a < end)
|
package/out/src/align.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"align.js","sourceRoot":"","sources":["../../src/align.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"align.js","sourceRoot":"","sources":["../../src/align.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAGH,OAAO,EAAE,QAAQ,IAAI,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AACrE,OAAO,EAAE,mBAAmB,EAAkC,MAAM,eAAe,CAAA;AAsCnF,0BAA0B;AAC1B,MAAM,UAAU,QAAQ,CAAC,GAAiB,EAAE,OAAqB,EAAE;IAClE,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,mBAAmB,EAAE,CAAA;IACzD,MAAM,eAAe,GAAG,IAAI,CAAC,eAAe,IAAI,CAAC,CAAA;IACjD,MAAM,eAAe,GAAG,IAAI,CAAC,eAAe,IAAI,IAAI,CAAA;IAEpD,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE,CAAA;IAClE,CAAC;IAED,mGAAmG;IACnG,oGAAoG;IACpG,+FAA+F;IAC/F,qGAAqG;IACrG,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACpC,MAAM,UAAU,GAAG,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IACxC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,CAAC,GAAG,UAAU,CAAC,GAA8B,CAAC,CAAA;QACpD,IAAI,OAAO,CAAC,KAAK,QAAQ;YAAE,UAAU,CAAC,GAA8B,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IAC3F,CAAC;IAED,MAAM,cAAc,GAAoB,EAAE,CAAA;IAC1C,MAAM,OAAO,GAA4B,EAAE,CAAA;IAE3C,MAAM,QAAQ,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,GAAG,CAAA;IAE1D,4FAA4F;IAC5F,6FAA6F;IAC7F,8FAA8F;IAC9F,4BAA4B;IAC5B,MAAM,OAAO,GAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAA+C,CAAC,IAAI,CAC7F,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,CACnD,CAAA;IACD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,OAAO,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,SAAQ;QAEpB,MAAM,MAAM,GAAG,eAAe,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAA;QAC5D,MAAM,IAAI,GAAG,UAAU,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAA;QAE5E,IAAI,CAAC,IAAI,EAAE,CAAC;YACX,OAAO;gBACN,IAAI,EAAE,aAAa;gBACnB,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,uBAAuB,GAAG,EAAE,EAAE;aAClD,CAAA;QACF,CAAC;QAED,+FAA+F;QAC/F,8FAA8F;QAC9F,yFAAyF;QACzF,8FAA8F;QAC9F,4FAA4F;QAC5F,oDAAoD;QACpD,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;YACvE,OAAO;gBACN,IAAI,EAAE,aAAa;gBACnB,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,sBAAsB,GAAG,EAAE,EAAE;aACjD,CAAA;QACF,CAAC;QAED,cAAc,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;QAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;IACrC,CAAC;IAED,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IAChD,oBAAoB,CAAC,cAAc,EAAE,EAAE,GAAG,GAAG,EAAE,GAAG,EAAE,CAAC,CAAA;IACrD,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IACtC,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,EAAE,cAAc,CAAC,CAAA;IAElD,MAAM,OAAO,GAAe;QAC3B,GAAG,GAAG;QACN,GAAG;QACH,UAAU;QACV,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACjC,MAAM;QACN,uFAAuF;QACvF,mFAAmF;QACnF,WAAW,EAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAC/C,SAAS,EAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC3C,SAAS,EAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;KAC3C,CAAA;IACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAA;AACzC,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,oBAAoB,CACnC,KAA+B,EAC/B,GAAuD;IAEvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;QACnB,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACnE,MAAM,IAAI,KAAK,CACd,wCAAwC,GAAG,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,KAAK;gBAClF,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,wBAAwB,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,CACvE,CAAA;QACF,CAAC;QACD,IAAI,CAAC,KAAK,CAAC;YAAE,SAAQ;QACrB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAE,CAAA;QAC1B,IAAI,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CACd,sCAAsC,GAAG,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,KAAK;gBAChF,GAAG,IAAI,CAAC,GAAG,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,GAAG,CACpF,CAAA;QACF,CAAC;QACD,IAAI,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CACd,mCAAmC,GAAG,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,KAAK;gBAC7E,GAAG,IAAI,CAAC,GAAG,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,GAAG,CACpF,CAAA;QACF,CAAC;IACF,CAAC;AACF,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,UAAU,CAAC,IAMnB;IACA,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,IAAI,CAAA;IAC3D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAA;IAEzC,+FAA+F;IAC/F,gGAAgG;IAChG,gGAAgG;IAChG,0FAA0F;IAC1F,gGAAgG;IAChG,2DAA2D;IAC3D,IAAI,SAAqD,CAAA;IACzD,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,OAAO,IAAI,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAA;QAC1C,IAAI,GAAG,GAAG,CAAC;YAAE,MAAK;QAClB,MAAM,GAAG,GAAG,GAAG,GAAG,MAAM,CAAC,MAAM,CAAA;QAC/B,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC;YACzC,IAAI,iBAAiB,CAAC,QAAQ,EAAE,GAAG,EAAE,GAAG,CAAC;gBAAE,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,CAAA;YACrE,SAAS,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,CAAA;QAClC,CAAC;QACD,IAAI,GAAG,GAAG,GAAG,CAAC,CAAA;IACf,CAAC;IACD,IAAI,SAAS;QAAE,OAAO,SAAS,CAAA;IAE/B,IAAI,eAAe,IAAI,CAAC;QAAE,OAAO,SAAS,CAAA;IAE1C,sFAAsF;IACtF,sFAAsF;IACtF,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAA;IACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI,eAAe,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,OAAO,CAAC;YAAE,SAAQ;QAClD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAA;QACzC,IAAI,MAAM,KAAK,MAAM;YAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,GAAG,EAAE,CAAA,CAAC,+BAA+B;QACxF,MAAM,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;QAC7C,IAAI,CAAC,IAAI,eAAe;YAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,GAAG,EAAE,CAAA;IAC5D,CAAC;IAED,OAAO,SAAS,CAAA;AACjB,CAAC;AAED,MAAM,SAAS,GAAG,eAAe,CAAA;AAEjC,mGAAmG;AACnG,SAAS,iBAAiB,CAAC,QAAgB,EAAE,KAAa,EAAE,GAAW;IACtE,MAAM,MAAM,GAAG,KAAK,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAE,CAAC,CAAA;IACnE,MAAM,KAAK,GAAG,GAAG,KAAK,QAAQ,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAE,CAAC,CAAA;IACxE,OAAO,MAAM,IAAI,KAAK,CAAA;AACvB,CAAC;AAED,SAAS,eAAe,CAAC,KAAa,EAAE,GAAW,EAAE,OAAgC;IACpF,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG;YAAE,OAAO,IAAI,CAAA;IACtC,CAAC;IACD,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,MAA4B,EAAE,KAA+B;IACjF,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAA,CAAC,qBAAqB;IAExD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,IAAI,QAAQ,GAAa,GAAG,CAAA;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;YACnB,IAAI,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;gBAClD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBACtB,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,EAAc,CAAA;oBACnC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAChB,CAAC;qBAAM,CAAC;oBACP,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,EAAc,CAAA;gBACpC,CAAC;gBACD,MAAK;YACN,CAAC;QACF,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IACnB,CAAC;IAED,OAAO,GAAG,CAAA;AACX,CAAC"}
|
package/out/src/build.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build.d.ts","sourceRoot":"","sources":["../../src/build.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AAQH,OAAO,EAAe,KAAK,aAAa,EAAE,MAAM,cAAc,CAAA;AAC9D,OAAO,EAAc,KAAK,kBAAkB,EAAE,MAAM,aAAa,CAAA;AACjE,OAAO,EAIN,KAAK,aAAa,EAElB,MAAM,YAAY,CAAA;AAEnB,OAAO,KAAK,EAAE,cAAc,EAAgB,aAAa,EAAc,MAAM,YAAY,CAAA;AAEzF,2CAA2C;AAC3C,MAAM,MAAM,UAAU,GAAG,aAAa,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,UAAU,CAAA;AAEjF,gDAAgD;AAChD,MAAM,WAAW,kBAAkB;IAClC,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAA;IAEjB,0FAA0F;IAC1F,aAAa,EAAE,MAAM,CAAA;IAErB;;;OAGG;IACH,QAAQ,CAAC,EAAE,SAAS,aAAa,EAAE,CAAA;IAEnC;;;OAGG;IACH,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAA;IAE7C,yFAAyF;IACzF,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,oDAAoD;IACpD,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAA;CACzD;AAED,qDAAqD;AACrD,MAAM,WAAW,mBAAmB;IACnC,cAAc,EAAE,MAAM,CAAA;IACtB,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,kBAAkB,EAAE,CAAA;IAC9B,gBAAgB,EAAE,MAAM,EAAE,CAAA;IAC1B,MAAM,EAAE;QAAE,MAAM,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;QAAC,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,CAAA;KAAE,CAAA;IAChF,MAAM,EAAE;QAAE,MAAM,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAA;IAC/D,gBAAgB,EAAE,MAAM,CAAA;IACxB,kBAAkB,EAAE,MAAM,CAAA;CAC1B;AAED;;;;;;;GAOG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,mBAAmB,CAAC,
|
|
1
|
+
{"version":3,"file":"build.d.ts","sourceRoot":"","sources":["../../src/build.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AAQH,OAAO,EAAe,KAAK,aAAa,EAAE,MAAM,cAAc,CAAA;AAC9D,OAAO,EAAc,KAAK,kBAAkB,EAAE,MAAM,aAAa,CAAA;AACjE,OAAO,EAIN,KAAK,aAAa,EAElB,MAAM,YAAY,CAAA;AAEnB,OAAO,KAAK,EAAE,cAAc,EAAgB,aAAa,EAAc,MAAM,YAAY,CAAA;AAEzF,2CAA2C;AAC3C,MAAM,MAAM,UAAU,GAAG,aAAa,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,UAAU,CAAA;AAEjF,gDAAgD;AAChD,MAAM,WAAW,kBAAkB;IAClC,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAA;IAEjB,0FAA0F;IAC1F,aAAa,EAAE,MAAM,CAAA;IAErB;;;OAGG;IACH,QAAQ,CAAC,EAAE,SAAS,aAAa,EAAE,CAAA;IAEnC;;;OAGG;IACH,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAA;IAE7C,yFAAyF;IACzF,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,oDAAoD;IACpD,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAA;CACzD;AAED,qDAAqD;AACrD,MAAM,WAAW,mBAAmB;IACnC,cAAc,EAAE,MAAM,CAAA;IACtB,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,kBAAkB,EAAE,CAAA;IAC9B,gBAAgB,EAAE,MAAM,EAAE,CAAA;IAC1B,MAAM,EAAE;QAAE,MAAM,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;QAAC,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,CAAA;KAAE,CAAA;IAChF,MAAM,EAAE;QAAE,MAAM,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAA;IAC/D,gBAAgB,EAAE,MAAM,CAAA;IACxB,kBAAkB,EAAE,MAAM,CAAA;CAC1B;AAED;;;;;;;GAOG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CA4JxF"}
|
package/out/src/build.js
CHANGED
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
* can `rm -rf intermediate/` after the build if disk is tight; the final `corpus-v<version>/` is
|
|
47
47
|
* self-contained.
|
|
48
48
|
*/
|
|
49
|
-
import { createReadStream, createWriteStream } from "node:fs";
|
|
49
|
+
import { createReadStream, createWriteStream, existsSync, readFileSync } from "node:fs";
|
|
50
50
|
import { mkdir, writeFile } from "node:fs/promises";
|
|
51
51
|
import { join } from "node:path";
|
|
52
52
|
import { createInterface } from "node:readline";
|
|
@@ -82,6 +82,21 @@ export async function buildCorpus(opts) {
|
|
|
82
82
|
opts.onProgress?.("adapter-run", `skipped ${adapter.id} (no input configured)`);
|
|
83
83
|
continue;
|
|
84
84
|
}
|
|
85
|
+
// Opt-in resume (MAILWOMAN_RESUME=1): if a complete per-adapter canonical.jsonl + MANIFEST.json
|
|
86
|
+
// already exist, reuse them instead of re-emitting. The MANIFEST is written only after the
|
|
87
|
+
// canonical is fully flushed, so its presence guarantees completeness; row order is identical,
|
|
88
|
+
// so downstream holdout-split determinism is preserved. Recovers an align-phase crash without
|
|
89
|
+
// redoing the (expensive) emit phase. Default (unset) re-emits, preserving correctness. (2026-06-12.)
|
|
90
|
+
const adapterDir = join(intermediateDir, adapter.id);
|
|
91
|
+
const cachedManifest = join(adapterDir, "MANIFEST.json");
|
|
92
|
+
if (process.env.MAILWOMAN_RESUME === "1" &&
|
|
93
|
+
existsSync(cachedManifest) &&
|
|
94
|
+
existsSync(join(adapterDir, "canonical.jsonl"))) {
|
|
95
|
+
const cached = JSON.parse(readFileSync(cachedManifest, "utf8"));
|
|
96
|
+
opts.onProgress?.("adapter-run", `resumed ${adapter.id} (reused ${cached.yielded} canonical rows)`);
|
|
97
|
+
adapterRuns.push(cached);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
85
100
|
opts.onProgress?.("adapter-run", `running ${adapter.id}`);
|
|
86
101
|
const m = await runAdapter({
|
|
87
102
|
adapter,
|
|
@@ -125,7 +140,19 @@ export async function buildCorpus(opts) {
|
|
|
125
140
|
}
|
|
126
141
|
}
|
|
127
142
|
for (const r of fanned) {
|
|
128
|
-
|
|
143
|
+
let result;
|
|
144
|
+
try {
|
|
145
|
+
result = alignRow(r);
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
// Last-resort robustness (2026-06-12): no single row may crash a multi-hour build.
|
|
149
|
+
// alignRow's targeted paths normalize/quarantine known issues with specific reasons;
|
|
150
|
+
// this catches any UNKNOWN throw (e.g. assertSpanInvariants on an unforeseen span
|
|
151
|
+
// shape) → quarantine + continue. A spike in `align-threw` reasons is a finding.
|
|
152
|
+
writeQuarantine(r, `align-threw:${err.message.slice(0, 160)}`);
|
|
153
|
+
quarantined++;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
129
156
|
if (result.kind === "labeled") {
|
|
130
157
|
const split = splitForRow(result.row, holdouts);
|
|
131
158
|
labeledStreams[split].write(`${JSON.stringify(result.row)}\n`);
|
package/out/src/build.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build.js","sourceRoot":"","sources":["../../src/build.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AAEH,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAoB,MAAM,SAAS,CAAA;
|
|
1
|
+
{"version":3,"file":"build.js","sourceRoot":"","sources":["../../src/build.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AAEH,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,UAAU,EAAE,YAAY,EAAoB,MAAM,SAAS,CAAA;AACzG,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAA;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AACrC,OAAO,EAAE,WAAW,EAAsB,MAAM,cAAc,CAAA;AAC9D,OAAO,EAAE,UAAU,EAA2B,MAAM,aAAa,CAAA;AACjE,OAAO,EACN,eAAe,EACf,WAAW,EACX,mCAAmC,GAGnC,MAAM,YAAY,CAAA;AACnB,OAAO,EAAE,8BAA8B,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAgD/E;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAAwB;IACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,sBAAsB,CAAC,IAAI,EAAE,CAAA;IAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,CAAA;IAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,SAAS,CAAA;IACnD,MAAM,QAAQ,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAEzC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAChD,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,cAAc,CAAC,CAAA;IAC5D,MAAM,KAAK,CAAC,eAAe,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAEjD,mBAAmB;IACnB,MAAM,WAAW,GAAyB,EAAE,CAAA;IAC5C,MAAM,OAAO,GAAa,EAAE,CAAA;IAC5B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACrD,IAAI,CAAC,cAAc,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;YACxB,IAAI,CAAC,UAAU,EAAE,CAAC,aAAa,EAAE,WAAW,OAAO,CAAC,EAAE,wBAAwB,CAAC,CAAA;YAC/E,SAAQ;QACT,CAAC;QACD,gGAAgG;QAChG,2FAA2F;QAC3F,+FAA+F;QAC/F,8FAA8F;QAC9F,sGAAsG;QACtG,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,EAAE,OAAO,CAAC,EAAE,CAAC,CAAA;QACpD,MAAM,cAAc,GAAG,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,CAAA;QACxD,IACC,OAAO,CAAC,GAAG,CAAC,gBAAgB,KAAK,GAAG;YACpC,UAAU,CAAC,cAAc,CAAC;YAC1B,UAAU,CAAC,IAAI,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAC,EAC9C,CAAC;YACF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAuB,CAAA;YACrF,IAAI,CAAC,UAAU,EAAE,CAAC,aAAa,EAAE,WAAW,OAAO,CAAC,EAAE,YAAY,MAAM,CAAC,OAAO,kBAAkB,CAAC,CAAA;YACnG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACxB,SAAQ;QACT,CAAC;QACD,IAAI,CAAC,UAAU,EAAE,CAAC,aAAa,EAAE,WAAW,OAAO,CAAC,EAAE,EAAE,CAAC,CAAA;QACzD,MAAM,CAAC,GAAG,MAAM,UAAU,CAAC;YAC1B,OAAO;YACP,cAAc;YACd,SAAS,EAAE,eAAe;YAC1B,aAAa,EAAE,IAAI,CAAC,aAAa;SACjC,CAAC,CAAA;QACF,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACpB,CAAC;IAED,yFAAyF;IACzF,8FAA8F;IAC9F,0FAA0F;IAC1F,0FAA0F;IAC1F,0EAA0E;IAC1E,MAAM,YAAY,GAA8B;QAC/C,KAAK,EAAE,IAAI,CAAC,eAAe,EAAE,qBAAqB,CAAC;QACnD,GAAG,EAAE,IAAI,CAAC,eAAe,EAAE,mBAAmB,CAAC;QAC/C,IAAI,EAAE,IAAI,CAAC,eAAe,EAAE,oBAAoB,CAAC;KACjD,CAAA;IACD,MAAM,cAAc,GAAmC;QACtD,KAAK,EAAE,iBAAiB,CAAC,YAAY,CAAC,KAAK,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAClE,GAAG,EAAE,iBAAiB,CAAC,YAAY,CAAC,GAAG,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAC9D,IAAI,EAAE,iBAAiB,CAAC,YAAY,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;KAChE,CAAA;IACD,MAAM,cAAc,GAAG,IAAI,CAAC,eAAe,EAAE,kBAAkB,CAAC,CAAA;IAChE,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,cAAc,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;IAEhF,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,MAAM,MAAM,GAA8B,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAA;IACvE,MAAM,QAAQ,GAAG,eAAe,EAAE,CAAA;IAElC,MAAM,eAAe,GAAG,CAAC,GAAiB,EAAE,MAAc,EAAQ,EAAE;QACnE,gBAAgB,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,IAAI,CAAC,CAAA;IAC/D,CAAC,CAAA;IAED,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACtC,IAAI,CAAC,UAAU,EAAE,CAAC,OAAO,EAAE,YAAY,UAAU,CAAC,UAAU,EAAE,CAAC,CAAA;QAC/D,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,WAAW,CAAe,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1E,MAAM,MAAM,GAAmB,CAAC,GAAG,CAAC,CAAA;YACpC,IAAI,UAAU,EAAE,CAAC;gBAChB,KAAK,MAAM,GAAG,IAAI,aAAa,CAAC,GAAG,EAAE,8BAA8B,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;oBACnF,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBACjB,CAAC;YACF,CAAC;YACD,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;gBACxB,IAAI,MAAmC,CAAA;gBACvC,IAAI,CAAC;oBACJ,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAA;gBACrB,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACd,mFAAmF;oBACnF,qFAAqF;oBACrF,kFAAkF;oBAClF,iFAAiF;oBACjF,eAAe,CAAC,CAAC,EAAE,eAAgB,GAAa,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;oBACzE,WAAW,EAAE,CAAA;oBACb,SAAQ;gBACT,CAAC;gBACD,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;oBAC/B,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;oBAC/C,cAAc,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;oBAC9D,MAAM,CAAC,KAAK,CAAC,EAAE,CAAA;oBACf,OAAO,EAAE,CAAA;gBACV,CAAC;qBAAM,CAAC;oBACP,eAAe,CAAC,CAAC,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;oBACrC,WAAW,EAAE,CAAA;gBACd,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IAED,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC;QAAE,CAAC,CAAC,GAAG,EAAE,CAAA;IACtD,gBAAgB,CAAC,GAAG,EAAE,CAAA;IACtB,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,SAAS,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAA;IAEjG,sFAAsF;IACtF,uFAAuF;IACvF,yEAAyE;IACzE,IAAI,CAAC,UAAU,EAAE,CAAC,OAAO,EAAE,aAAa,OAAO,eAAe,CAAC,CAAA;IAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAA;IAChD,MAAM,WAAW,GAAG,MAAM,mCAAmC,CAAC;QAC7D,YAAY;QACZ,SAAS,EAAE,SAAS;QACpB,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,MAAM;QACN,QAAQ;KACR,CAAC,CAAA;IAEF,4FAA4F;IAC5F,0FAA0F;IAC1F,IAAI,CAAC,UAAU,EAAE,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAA;IACpD,MAAM,aAAa,GAAG,MAAM,WAAW,CACtC;QACC,KAAK,EAAE,WAAW,CAAa,YAAY,CAAC,KAAK,CAAC;QAClD,GAAG,EAAE,WAAW,CAAa,YAAY,CAAC,GAAG,CAAC;QAC9C,IAAI,EAAE,WAAW,CAAa,YAAY,CAAC,IAAI,CAAC;KAChD,EACD;QACC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,YAAY;KACZ,CACD,CAAA;IAED,yBAAyB;IACzB,IAAI,CAAC,UAAU,EAAE,CAAC,UAAU,EAAE,iCAAiC,CAAC,CAAA;IAChE,MAAM,QAAQ,GAAwB;QACrC,cAAc,EAAE,IAAI,CAAC,aAAa;QAClC,QAAQ;QACR,QAAQ,EAAE,WAAW;QACrB,gBAAgB,EAAE,OAAO;QACzB,MAAM,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE;QACzC,MAAM,EAAE,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,UAAU,EAAE,aAAa,CAAC,UAAU,EAAE;QAC9E,gBAAgB,EAAE,WAAW;QAC7B,kBAAkB,EAAE,OAAO;KAC3B,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACxG,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED,KAAK,SAAS,CAAC,CAAC,WAAW,CAAI,IAAY;IAC1C,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;IAC3D,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClE,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;QAC3B,IAAI,CAAC,OAAO;YAAE,SAAQ;QACtB,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAM,CAAA;IAC/B,CAAC;AACF,CAAC;AAED,SAAS,SAAS,CAAC,CAAc;IAChC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACtC,CAAC,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAA;QACxB,CAAC,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;IACxB,CAAC,CAAC,CAAA;AACH,CAAC"}
|
package/out/src/parquet.d.ts
CHANGED
|
@@ -52,6 +52,9 @@ export interface ParquetRow {
|
|
|
52
52
|
raw: string;
|
|
53
53
|
tokens: readonly string[];
|
|
54
54
|
labels: readonly string[];
|
|
55
|
+
span_starts: readonly number[];
|
|
56
|
+
span_ends: readonly number[];
|
|
57
|
+
span_tags: readonly string[];
|
|
55
58
|
country: string;
|
|
56
59
|
locale: string | null;
|
|
57
60
|
source: string;
|
|
@@ -63,7 +66,7 @@ export interface ParquetRow {
|
|
|
63
66
|
[key: string]: unknown;
|
|
64
67
|
}
|
|
65
68
|
/** Column names emitted into every shard. Matches `ParquetRow`. */
|
|
66
|
-
export declare const PARQUET_COLUMNS: readonly ["raw", "tokens", "labels", "country", "locale", "source", "source_id", "corpus_version", "license", "synth_method", "synth_base_id"];
|
|
69
|
+
export declare const PARQUET_COLUMNS: readonly ["raw", "tokens", "labels", "span_starts", "span_ends", "span_tags", "country", "locale", "source", "source_id", "corpus_version", "license", "synth_method", "synth_base_id"];
|
|
67
70
|
/**
|
|
68
71
|
* Parquet schema for `LabeledRow` per #18 §4. Optional fields use `optional: true`; repeated UTF8
|
|
69
72
|
* columns capture tokens/labels arrays. Compression is per-column SNAPPY.
|
|
@@ -106,7 +109,14 @@ export interface WriteShardsOptions {
|
|
|
106
109
|
* Splits with no rows can be omitted (or passed as an empty iterable); `writeShards` skips them.
|
|
107
110
|
*/
|
|
108
111
|
export type PerSplitRows = Partial<Record<SplitName, AsyncIterable<LabeledRow>>>;
|
|
109
|
-
/**
|
|
112
|
+
/**
|
|
113
|
+
* Project a labeled row to the Parquet schema.
|
|
114
|
+
*
|
|
115
|
+
* The span triple is REQUIRED here (#519): `alignRow` emits it on every labeled row, so a row
|
|
116
|
+
* arriving without it came from a producer that hasn't migrated — writing it would silently drop
|
|
117
|
+
* the v0.5.0 labels from the shard (the "builders before parquet = silent loss" hazard). Loud
|
|
118
|
+
* failure, naming the row, instead.
|
|
119
|
+
*/
|
|
110
120
|
export declare function rowToParquet(row: LabeledRow): ParquetRow;
|
|
111
121
|
/**
|
|
112
122
|
* Stream labeled rows into `.parquet` shards, one set of shards per split. Splits are processed
|
package/out/src/parquet.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAMH,OAAO,EAAiB,KAAK,uBAAuB,EAAE,MAAM,4BAA4B,CAAA;AACxF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE5C,sFAAsF;AACtF,eAAO,MAAM,cAAc,QAAS,CAAA;AAEpC,mFAAmF;AACnF,eAAO,MAAM,iBAAiB,EAAG,QAAiB,CAAA;AAElD;;;;GAIG;AACH,MAAM,WAAW,UAAU;IAC1B,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,OAAO,EAAE,MAAM,CAAA;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;IAC5B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB;AAED,mEAAmE;AACnE,eAAO,MAAM,eAAe,
|
|
1
|
+
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAMH,OAAO,EAAiB,KAAK,uBAAuB,EAAE,MAAM,4BAA4B,CAAA;AACxF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE5C,sFAAsF;AACtF,eAAO,MAAM,cAAc,QAAS,CAAA;AAEpC,mFAAmF;AACnF,eAAO,MAAM,iBAAiB,EAAG,QAAiB,CAAA;AAElD;;;;GAIG;AACH,MAAM,WAAW,UAAU;IAC1B,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,WAAW,EAAE,SAAS,MAAM,EAAE,CAAA;IAC9B,SAAS,EAAE,SAAS,MAAM,EAAE,CAAA;IAC5B,SAAS,EAAE,SAAS,MAAM,EAAE,CAAA;IAC5B,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,OAAO,EAAE,MAAM,CAAA;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;IAC5B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB;AAED,mEAAmE;AACnE,eAAO,MAAM,eAAe,yLAelB,CAAA;AAEV;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,uBAAuB,CAAC,UAAU,CAkBlE,CAAA;AAED,sDAAsD;AACtD,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,SAAS,CAAA;IAChB,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,SAAS,CAAA;IACjB,WAAW,EAAE,OAAO,iBAAiB,CAAA;IACrC,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,eAAe,EAAE,MAAM,CAAA;IACvB,cAAc,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,aAAa;IAC7B,cAAc,EAAE,MAAM,CAAA;IACtB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,cAAc,EAAE,MAAM,CAAA;IACtB,cAAc,EAAE,MAAM,CAAA;IACtB,MAAM,EAAE,eAAe,EAAE,CAAA;IACzB,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACjC,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IAClC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IAEjB,yEAAyE;IACzE,aAAa,EAAE,MAAM,CAAA;IAErB,6EAA6E;IAC7E,YAAY,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,SAAS,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;AAEhF;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,UAAU,GAAG,UAAU,CAkCxD;AA2BD;;;;;;;;GAQG;AACH,wBAAsB,WAAW,CAAC,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,aAAa,CAAC,CAwF1G"}
|
package/out/src/parquet.js
CHANGED
|
@@ -50,6 +50,9 @@ export const PARQUET_COLUMNS = [
|
|
|
50
50
|
"raw",
|
|
51
51
|
"tokens",
|
|
52
52
|
"labels",
|
|
53
|
+
"span_starts",
|
|
54
|
+
"span_ends",
|
|
55
|
+
"span_tags",
|
|
53
56
|
"country",
|
|
54
57
|
"locale",
|
|
55
58
|
"source",
|
|
@@ -67,6 +70,12 @@ export const LABELED_ROW_SCHEMA = {
|
|
|
67
70
|
raw: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
68
71
|
tokens: { type: "UTF8", repeated: true, compression: SHARD_COMPRESSION },
|
|
69
72
|
labels: { type: "UTF8", repeated: true, compression: SHARD_COMPRESSION },
|
|
73
|
+
// v0.5.0 char-offset label spans (#519): parallel arrays over `raw` (UTF-16 code units,
|
|
74
|
+
// [start, end) exclusive-end, sorted, non-overlapping). INT32 — raw is a short address string,
|
|
75
|
+
// and INT32 round-trips as `number` where parquetjs INT64 would surface bigint.
|
|
76
|
+
span_starts: { type: "INT32", repeated: true, compression: SHARD_COMPRESSION },
|
|
77
|
+
span_ends: { type: "INT32", repeated: true, compression: SHARD_COMPRESSION },
|
|
78
|
+
span_tags: { type: "UTF8", repeated: true, compression: SHARD_COMPRESSION },
|
|
70
79
|
country: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
71
80
|
locale: { type: "UTF8", compression: SHARD_COMPRESSION, optional: true },
|
|
72
81
|
source: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
@@ -76,12 +85,35 @@ export const LABELED_ROW_SCHEMA = {
|
|
|
76
85
|
synth_method: { type: "UTF8", compression: SHARD_COMPRESSION, optional: true },
|
|
77
86
|
synth_base_id: { type: "UTF8", compression: SHARD_COMPRESSION, optional: true },
|
|
78
87
|
};
|
|
79
|
-
/**
|
|
88
|
+
/**
|
|
89
|
+
* Project a labeled row to the Parquet schema.
|
|
90
|
+
*
|
|
91
|
+
* The span triple is REQUIRED here (#519): `alignRow` emits it on every labeled row, so a row
|
|
92
|
+
* arriving without it came from a producer that hasn't migrated — writing it would silently drop
|
|
93
|
+
* the v0.5.0 labels from the shard (the "builders before parquet = silent loss" hazard). Loud
|
|
94
|
+
* failure, naming the row, instead.
|
|
95
|
+
*/
|
|
80
96
|
export function rowToParquet(row) {
|
|
97
|
+
const { span_starts, span_ends, span_tags } = row;
|
|
98
|
+
if (span_starts === undefined || span_ends === undefined || span_tags === undefined) {
|
|
99
|
+
throw new Error(`rowToParquet: row is missing the char-offset span triple (#519) — ` +
|
|
100
|
+
`span_starts=${span_starts !== undefined} span_ends=${span_ends !== undefined} span_tags=${span_tags !== undefined} ` +
|
|
101
|
+
`(source=${row.source}, source_id=${row.source_id}). ` +
|
|
102
|
+
`Every parquet-bound row must carry span_starts/span_ends/span_tags; ` +
|
|
103
|
+
`producers that emit tokens/labels only have not migrated to the v0.5.0 format.`);
|
|
104
|
+
}
|
|
105
|
+
if (span_starts.length !== span_ends.length || span_starts.length !== span_tags.length) {
|
|
106
|
+
throw new Error(`rowToParquet: span triple arrays are not parallel — ` +
|
|
107
|
+
`starts=${span_starts.length} ends=${span_ends.length} tags=${span_tags.length} ` +
|
|
108
|
+
`(source=${row.source}, source_id=${row.source_id})`);
|
|
109
|
+
}
|
|
81
110
|
return {
|
|
82
111
|
raw: row.raw,
|
|
83
112
|
tokens: row.tokens,
|
|
84
113
|
labels: row.labels,
|
|
114
|
+
span_starts,
|
|
115
|
+
span_ends,
|
|
116
|
+
span_tags,
|
|
85
117
|
country: row.country,
|
|
86
118
|
locale: row.locale ?? null,
|
|
87
119
|
source: row.source,
|
|
@@ -102,6 +134,9 @@ function appendShape(row) {
|
|
|
102
134
|
raw: row.raw,
|
|
103
135
|
tokens: row.tokens,
|
|
104
136
|
labels: row.labels,
|
|
137
|
+
span_starts: row.span_starts,
|
|
138
|
+
span_ends: row.span_ends,
|
|
139
|
+
span_tags: row.span_tags,
|
|
105
140
|
country: row.country,
|
|
106
141
|
source: row.source,
|
|
107
142
|
source_id: row.source_id,
|
package/out/src/parquet.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet.js","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACzD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,aAAa,EAAgC,MAAM,4BAA4B,CAAA;AAIxF,sFAAsF;AACtF,MAAM,CAAC,MAAM,cAAc,GAAG,MAAM,CAAA;AAEpC,mFAAmF;AACnF,MAAM,CAAC,MAAM,iBAAiB,GAAG,QAAiB,CAAA;
|
|
1
|
+
{"version":3,"file":"parquet.js","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACzD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,aAAa,EAAgC,MAAM,4BAA4B,CAAA;AAIxF,sFAAsF;AACtF,MAAM,CAAC,MAAM,cAAc,GAAG,MAAM,CAAA;AAEpC,mFAAmF;AACnF,MAAM,CAAC,MAAM,iBAAiB,GAAG,QAAiB,CAAA;AAyBlD,mEAAmE;AACnE,MAAM,CAAC,MAAM,eAAe,GAAG;IAC9B,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,aAAa;IACb,WAAW;IACX,WAAW;IACX,SAAS;IACT,QAAQ;IACR,QAAQ;IACR,WAAW;IACX,gBAAgB;IAChB,SAAS;IACT,cAAc;IACd,eAAe;CACN,CAAA;AAEV;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAwC;IACtE,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACrD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxE,wFAAwF;IACxF,+FAA+F;IAC/F,gFAAgF;IAChF,WAAW,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAC9E,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAC5E,SAAS,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAC3E,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACzD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;IACxE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxD,SAAS,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAC3D,cAAc,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAChE,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACzD,YAAY,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC9E,aAAa,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;CAC/E,CAAA;AA6CD;;;;;;;GAOG;AACH,MAAM,UAAU,YAAY,CAAC,GAAe;IAC3C,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,GAAG,CAAA;IACjD,IAAI,WAAW,KAAK,SAAS,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QACrF,MAAM,IAAI,KAAK,CACd,oEAAoE;YACnE,eAAe,WAAW,KAAK,SAAS,cAAc,SAAS,KAAK,SAAS,cAAc,SAAS,KAAK,SAAS,GAAG;YACrH,WAAW,GAAG,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,KAAK;YACtD,sEAAsE;YACtE,gFAAgF,CACjF,CAAA;IACF,CAAC;IACD,IAAI,WAAW,CAAC,MAAM,KAAK,SAAS,CAAC,MAAM,IAAI,WAAW,CAAC,MAAM,KAAK,SAAS,CAAC,MAAM,EAAE,CAAC;QACxF,MAAM,IAAI,KAAK,CACd,sDAAsD;YACrD,UAAU,WAAW,CAAC,MAAM,SAAS,SAAS,CAAC,MAAM,SAAS,SAAS,CAAC,MAAM,GAAG;YACjF,WAAW,GAAG,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,GAAG,CACrD,CAAA;IACF,CAAC;IACD,OAAO;QACN,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,WAAW;QACX,SAAS;QACT,SAAS;QACT,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,IAAI;QAC1B,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,YAAY,EAAE,GAAG,CAAC,KAAK,EAAE,MAAM,IAAI,IAAI;QACvC,aAAa,EAAE,GAAG,CAAC,KAAK,EAAE,cAAc,IAAI,IAAI;KAChD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAe;IACnC,MAAM,GAAG,GAA4B;QACpC,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,OAAO,EAAE,GAAG,CAAC,OAAO;KACpB,CAAA;IACD,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI;QAAE,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAA;IAChD,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI;QAAE,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,YAAY,CAAA;IAClE,IAAI,GAAG,CAAC,aAAa,KAAK,IAAI;QAAE,GAAG,CAAC,aAAa,GAAG,GAAG,CAAC,aAAa,CAAA;IACrE,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAsB,EAAE,IAAwB;IACjF,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,SAAS,CAAA;IACnD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;IACvE,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE3C,MAAM,MAAM,GAAsB,EAAE,CAAA;IACpC,MAAM,MAAM,GAA8B,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAA;IACvE,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,KAAK,MAAM,KAAK,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAU,EAAE,CAAC;QACvD,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAA;QAC5B,IAAI,CAAC,IAAI;YAAE,SAAQ;QAEnB,IAAI,UAAU,GAAG,CAAC,CAAA;QAClB,IAAI,MAAM,GAAqC,IAAI,CAAA;QACnD,IAAI,IAAI,GAAG,EAAE,CAAA;QACb,IAAI,SAAS,GAAG,CAAC,CAAA;QACjB,IAAI,aAAa,GAAG,EAAE,CAAA;QACtB,IAAI,YAAY,GAAG,EAAE,CAAA;QAErB,MAAM,SAAS,GAAG,KAAK,IAAmB,EAAE;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;YACvC,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,MAAM,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,UAAU,CAAC,CAAA;YAC5E,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,CAAa,kBAAkB,EAAE,IAAI,EAAE;gBAC3E,YAAY,EAAE,cAAc;aAC5B,CAAC,CAAA;YACF,MAAM,CAAC,WAAW,CAAC,0BAA0B,EAAE,IAAI,CAAC,aAAa,CAAC,CAAA;YAClE,MAAM,CAAC,WAAW,CAAC,iBAAiB,EAAE,KAAK,CAAC,CAAA;YAC5C,MAAM,CAAC,WAAW,CAAC,uBAAuB,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAA;YAC/D,SAAS,GAAG,CAAC,CAAA;YACb,aAAa,GAAG,EAAE,CAAA;YAClB,YAAY,GAAG,EAAE,CAAA;QAClB,CAAC,CAAA;QAED,MAAM,UAAU,GAAG,KAAK,IAAmB,EAAE;YAC5C,IAAI,CAAC,MAAM;gBAAE,OAAM;YACnB,MAAM,MAAM,CAAC,KAAK,EAAE,CAAA;YACpB,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACnB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAA;gBACnC,MAAM,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,IAAI;oBACJ,MAAM,EAAE,SAAS;oBACjB,WAAW,EAAE,iBAAiB;oBAC9B,IAAI,EAAE,SAAS;oBACf,KAAK,EAAE,QAAQ,CAAC,IAAI;oBACpB,MAAM;oBACN,eAAe,EAAE,aAAa;oBAC9B,cAAc,EAAE,YAAY;iBAC5B,CAAC,CAAA;YACH,CAAC;YACD,MAAM,GAAG,IAAI,CAAA;QACd,CAAC,CAAA;QAED,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,MAAM;gBAAE,MAAM,SAAS,EAAE,CAAA;YAC9B,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;YAC5B,MAAM,MAAO,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,CAA0B,CAAC,CAAA;YACjE,IAAI,SAAS,KAAK,CAAC;gBAAE,aAAa,GAAG,GAAG,CAAC,SAAS,CAAA;YAClD,YAAY,GAAG,GAAG,CAAC,SAAS,CAAA;YAC5B,SAAS,EAAE,CAAA;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,CAAA;YACf,SAAS,EAAE,CAAA;YAEX,IAAI,SAAS,IAAI,YAAY,EAAE,CAAC;gBAC/B,MAAM,UAAU,EAAE,CAAA;gBAClB,UAAU,EAAE,CAAA;YACb,CAAC;QACF,CAAC;QAED,MAAM,UAAU,EAAE,CAAA;IACnB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;IAE5G,MAAM,QAAQ,GAAkB;QAC/B,cAAc,EAAE,IAAI,CAAC,aAAa;QAClC,MAAM,EAAE,eAAe;QACvB,cAAc,EAAE,YAAY;QAC5B,cAAc,EAAE,cAAc;QAC9B,MAAM;QACN,MAAM;QACN,UAAU,EAAE,SAAS;KACrB,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACnG,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED,+FAA+F;AAC/F,KAAK,UAAU,QAAQ,CAAC,IAAY;IACnC,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAA;IACjC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAA;IACrC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM;QAAE,IAAI,CAAC,MAAM,CAAC,KAAe,CAAC,CAAA;IAC9D,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;AAC1B,CAAC"}
|
package/out/src/split.d.ts
CHANGED
|
@@ -55,6 +55,12 @@ export interface SplitManifest {
|
|
|
55
55
|
*
|
|
56
56
|
* - US: Vermont, Wyoming, North Dakota (low density, easy to identify in WOF/admin sources).
|
|
57
57
|
* - FR: Corse, Lozère, Creuse (small departments / regions).
|
|
58
|
+
* - DE (added 2026-06-11, night-11): Saarland + Mecklenburg-Vorpommern — small Länder so the training
|
|
59
|
+
* cost is low while the slice clears the honest-eval 1000-row trust floor. DE has had NO
|
|
60
|
+
* trustable honest-eval slice since the harness shipped (flagged 2026-06-08); this takes effect
|
|
61
|
+
* at the NEXT base corpus rebuild — existing versioned corpora keep their committed
|
|
62
|
+
* SPLIT_MANIFESTs (a holdout added after a corpus is built is leakage-laundering, not a
|
|
63
|
+
* holdout).
|
|
58
64
|
*/
|
|
59
65
|
export declare function defaultHoldouts(): Record<string, readonly string[]>;
|
|
60
66
|
type SplitInputRow = Pick<CanonicalRow, "source_id" | "country" | "corpus_version" | "components">;
|
package/out/src/split.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"split.d.ts","sourceRoot":"","sources":["../../src/split.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAQH,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE1D,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,KAAK,GAAG,MAAM,CAAA;AAEhD,MAAM,WAAW,YAAY;IAC5B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAA;CAC5C;AAED,kDAAkD;AAClD,MAAM,WAAW,aAAa;IAC7B,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,GAAG,EAAE,MAAM,EAAE,CAAA;IACb,IAAI,EAAE,MAAM,EAAE,CAAA;IACd,oEAAoE;IACpE,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAA;IAC3C,yEAAyE;IACzE,cAAc,EAAE,MAAM,CAAA;IACtB,sCAAsC;IACtC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAA;CACnE;AAED
|
|
1
|
+
{"version":3,"file":"split.d.ts","sourceRoot":"","sources":["../../src/split.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAQH,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE1D,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,KAAK,GAAG,MAAM,CAAA;AAEhD,MAAM,WAAW,YAAY;IAC5B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAA;CAC5C;AAED,kDAAkD;AAClD,MAAM,WAAW,aAAa;IAC7B,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,GAAG,EAAE,MAAM,EAAE,CAAA;IACb,IAAI,EAAE,MAAM,EAAE,CAAA;IACd,oEAAoE;IACpE,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAA;IAC3C,yEAAyE;IACzE,cAAc,EAAE,MAAM,CAAA;IACtB,sCAAsC;IACtC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAA;CACnE;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,eAAe,IAAI,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAMnE;AAED,KAAK,aAAa,GAAG,IAAI,CAAC,YAAY,EAAE,WAAW,GAAG,SAAS,GAAG,gBAAgB,GAAG,YAAY,CAAC,CAAA;AAElG;;;;;GAKG;AACH,wBAAgB,WAAW,CAC1B,GAAG,EAAE,IAAI,CAAC,aAAa,EAAE,WAAW,GAAG,SAAS,GAAG,YAAY,CAAC,EAChE,QAAQ,GAAE,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAqB,GAC7D,SAAS,CAOX;AAED;;;;;;;GAOG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,IAAI,GAAE,YAAiB,GAAG,aAAa,CAwB/F;AAED,kEAAkE;AAClE,wBAAgB,UAAU,CAAC,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAKxD;AAED;;;;;;;GAOG;AACH,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAYnG;AAED,8EAA8E;AAC9E,MAAM,MAAM,oBAAoB,GAAG,IAAI,CAAC,UAAU,EAAE,WAAW,GAAG,SAAS,GAAG,gBAAgB,GAAG,YAAY,CAAC,CAAA;AAE9G;;;;;;;;;GASG;AACH,wBAAsB,mCAAmC,CAAC,IAAI,EAAE;IAC/D,YAAY,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACvC,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAA;CAC5C,GAAG,OAAO,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,CAkBnC"}
|
package/out/src/split.js
CHANGED
|
@@ -34,11 +34,18 @@ import { createInterface } from "node:readline";
|
|
|
34
34
|
*
|
|
35
35
|
* - US: Vermont, Wyoming, North Dakota (low density, easy to identify in WOF/admin sources).
|
|
36
36
|
* - FR: Corse, Lozère, Creuse (small departments / regions).
|
|
37
|
+
* - DE (added 2026-06-11, night-11): Saarland + Mecklenburg-Vorpommern — small Länder so the training
|
|
38
|
+
* cost is low while the slice clears the honest-eval 1000-row trust floor. DE has had NO
|
|
39
|
+
* trustable honest-eval slice since the harness shipped (flagged 2026-06-08); this takes effect
|
|
40
|
+
* at the NEXT base corpus rebuild — existing versioned corpora keep their committed
|
|
41
|
+
* SPLIT_MANIFESTs (a holdout added after a corpus is built is leakage-laundering, not a
|
|
42
|
+
* holdout).
|
|
37
43
|
*/
|
|
38
44
|
export function defaultHoldouts() {
|
|
39
45
|
return {
|
|
40
46
|
US: ["Vermont", "VT", "Wyoming", "WY", "North Dakota", "ND"],
|
|
41
47
|
FR: ["Corse", "Lozère", "Lozere", "Creuse"],
|
|
48
|
+
DE: ["Saarland", "SL", "Mecklenburg-Vorpommern", "MV"],
|
|
42
49
|
};
|
|
43
50
|
}
|
|
44
51
|
/**
|
package/out/src/split.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"split.js","sourceRoot":"","sources":["../../src/split.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAA;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAA;AAC7D,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AA2B/C
|
|
1
|
+
{"version":3,"file":"split.js","sourceRoot":"","sources":["../../src/split.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAA;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAA;AAC7D,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AA2B/C;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,eAAe;IAC9B,OAAO;QACN,EAAE,EAAE,CAAC,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,CAAC;QAC5D,EAAE,EAAE,CAAC,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC;QAC3C,EAAE,EAAE,CAAC,UAAU,EAAE,IAAI,EAAE,wBAAwB,EAAE,IAAI,CAAC;KACtD,CAAA;AACF,CAAC;AAID;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAC1B,GAAgE,EAChE,WAA8C,eAAe,EAAE;IAE/D,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,MAAM,eAAe,GAAG,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;IACnD,MAAM,SAAS,GAAG,MAAM,KAAK,SAAS,IAAI,eAAe,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAA;IAC1E,IAAI,CAAC,SAAS;QAAE,OAAO,OAAO,CAAA;IAC9B,oFAAoF;IACpF,OAAO,UAAU,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAA;AAC3D,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,SAAS,CAAC,IAA6B,EAAE,OAAqB,EAAE;IAC/E,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,eAAe,EAAE,CAAA;IACnD,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,MAAM,GAAG,GAAa,EAAE,CAAA;IACxB,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,IAAI,cAAc,GAAG,EAAE,CAAA;IAEvB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,cAAc,IAAI,GAAG,CAAC,cAAc;YAAE,cAAc,GAAG,GAAG,CAAC,cAAc,CAAA;QAC9E,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAA;QACxC,IAAI,KAAK,KAAK,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;aAC3C,IAAI,KAAK,KAAK,KAAK;YAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;;YAC5C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;IAC9B,CAAC;IAED,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IACrD,OAAO;QACN,KAAK;QACL,GAAG;QACH,IAAI;QACJ,QAAQ;QACR,cAAc;QACd,MAAM,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE;KAC1E,CAAA;AACF,CAAC;AAED,kEAAkE;AAClE,MAAM,UAAU,UAAU,CAAC,EAAU,EAAE,CAAS;IAC/C,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAA;IACvD,mDAAmD;IACnD,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,GAAG,aAAa,GAAG,MAAM,CAAC,CAAC,CAAE,GAAG,UAAU,GAAG,MAAM,CAAC,CAAC,CAAE,GAAG,OAAO,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;IAClG,OAAO,CAAC,GAAG,CAAC,CAAA;AACb,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,QAAuB,EAAE,SAAiB;IACnF,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAC3C,KAAK,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAU,EAAE,CAAC;QACtD,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QACzC,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC,CAAA;IACzG,CAAC;IACD,MAAM,OAAO,GAAG;QACf,cAAc,EAAE,QAAQ,CAAC,cAAc;QACvC,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,MAAM,EAAE,QAAQ,CAAC,MAAM;KACvB,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,qBAAqB,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;AACzG,CAAC;AAKD;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,mCAAmC,CAAC,IAMzD;IACA,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,eAAe,EAAE,CAAA;IAEnD,KAAK,MAAM,KAAK,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAU,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAA;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,KAAK,MAAM,CAAC,CAAA;QACpD,MAAM,qBAAqB,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;IAClD,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAA;IACpE,MAAM,OAAO,GAAG;QACf,cAAc,EAAE,IAAI,CAAC,aAAa;QAClC,QAAQ;QACR,MAAM,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE;KACjC,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,qBAAqB,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IAC7G,OAAO,OAAO,CAAC,MAAM,CAAA;AACtB,CAAC;AAED;;;;GAIG;AACH,KAAK,UAAU,qBAAqB,CAAC,gBAAwB,EAAE,OAAe;IAC7E,MAAM,YAAY,GAAG,GAAG,OAAO,WAAW,CAAA;IAC1C,MAAM,GAAG,GAAG,iBAAiB,CAAC,YAAY,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;IACjE,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,gBAAgB,CAAC,gBAAgB,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;IAEpH,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC3C,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACtB,IAAI,CAAC,IAAI;gBAAE,OAAM;YACjB,IAAI,CAAC;gBACJ,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAA2B,CAAA;gBACtD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ;oBAAE,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,SAAS,IAAI,CAAC,CAAA;YACvE,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACd,MAAM,CAAC,GAAY,CAAC,CAAA;YACrB,CAAC;QACF,CAAC,CAAC,CAAA;QACF,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACnB,GAAG,CAAC,GAAG,EAAE,CAAA;QACV,CAAC,CAAC,CAAA;QACF,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;QACtB,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC,CAAA;QAChC,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;IACxB,CAAC,CAAC,CAAA;IAEF,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC3C,wEAAwE;QACxE,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,YAAY,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,CAAA;QACnG,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;QACxB,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,IAAI,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAA;;gBACpB,MAAM,CAAC,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC,CAAA;QACxD,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IACF,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;AAC3C,CAAC"}
|
package/out/src/synthesize.d.ts
CHANGED
|
@@ -153,12 +153,22 @@ export type ComposeResult = {
|
|
|
153
153
|
* tokens in the venue stay labeled as `venue`, never as the address's locality / region / etc.,
|
|
154
154
|
* even when they share surface forms.
|
|
155
155
|
*
|
|
156
|
+
* The char-offset span triple (#519) is re-targeted to the composed surface by the same
|
|
157
|
+
* deterministic boundary: one `venue` span over `[0, venue.length)` (no re-search), then the
|
|
158
|
+
* address's own spans shifted by `venue.length + separator.length` — plain offset arithmetic, no
|
|
159
|
+
* token indirection. The separator chars sit outside every span (deliberately unlabeled — now
|
|
160
|
+
* expressible). The composed triple is passed through `assertSpanInvariants` so a composition bug
|
|
161
|
+
* can't ride into a corpus.
|
|
162
|
+
*
|
|
156
163
|
* The address's components are forwarded as-is (alignment ran on them and they survived); `venue`
|
|
157
164
|
* is added on top with the trimmed venue string as its surface form.
|
|
158
165
|
*
|
|
159
166
|
* Returns `{ kind: "quarantined" }` when:
|
|
160
167
|
*
|
|
161
168
|
* - The venue is empty or whitespace-only.
|
|
169
|
+
* - The venue is not NFC-normalized (char offsets over a non-NFC raw are ambiguous — the same
|
|
170
|
+
* discipline `alignRow` enforces on adapter rows, surfaced as quarantine here because the venue
|
|
171
|
+
* is caller-supplied data).
|
|
162
172
|
* - The address row fails alignment in isolation (the underlying failure reason is propagated).
|
|
163
173
|
*/
|
|
164
174
|
export declare function composeAdversarialRow(venue: string, address: CanonicalRow, options: ComposeAdversarialOptions): ComposeResult;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../../src/synthesize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAWH,OAAO,EAAuB,KAAK,SAAS,EAAE,MAAM,eAAe,CAAA;AACnE,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AAE1E;;;GAGG;AACH,MAAM,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,YAAY,KAAK,YAAY,GAAG,IAAI,CAAA;AAyBrE,iFAAiF;AACjF,eAAO,MAAM,SAAS,EAAE,YAQvB,CAAA;AAED,iFAAiF;AACjF,eAAO,MAAM,SAAS,EAAE,YAQvB,CAAA;AAED,+EAA+E;AAC/E,eAAO,MAAM,UAAU,EAAE,YAIxB,CAAA;AAED;;;;;;GAMG;AACH,eAAO,MAAM,WAAW,EAAE,YAQzB,CAAA;AAED;;;GAGG;AACH,eAAO,MAAM,WAAW,EAAE,YAQzB,CAAA;AAqED,uEAAuE;AACvE,eAAO,MAAM,WAAW,EAAE,YAazB,CAAA;AAED,uEAAuE;AACvE,eAAO,MAAM,eAAe,EAAE,YAW7B,CAAA;AAgBD,yFAAyF;AACzF,eAAO,MAAM,iBAAiB,EAAE,YAkB/B,CAAA;AAED,yDAAyD;AACzD,eAAO,MAAM,qBAAqB,EAAE,YAqBnC,CAAA;AAED;;;;;;;;;GASG;AACH,eAAO,MAAM,sBAAsB,EAAE,YAkBpC,CAAA;AAED;;;;;;;GAOG;AACH,eAAO,MAAM,kBAAkB,EAAE,YAiBhC,CAAA;AAED;;;;;;;;;;GAUG;AACH,eAAO,MAAM,wBAAwB,EAAE,YAkBtC,CAAA;AAED;;;;;GAKG;AACH,eAAO,MAAM,oBAAoB,EAAE,YAiBlC,CAAA;AAED,gEAAgE;AAChE,eAAO,MAAM,gBAAgB,EAAE,YAQ9B,CAAA;AAMD,+FAA+F;AAC/F,eAAO,MAAM,aAAa,EAAE,YAW3B,CAAA;AAMD,sCAAsC;AACtC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAgBtD,CAAA;AAED,kGAAkG;AAClG,wBAAgB,8BAA8B,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,YAAY,EAAE,CAqBvF;AAED;;;GAGG;AACH,wBAAiB,aAAa,CAC7B,GAAG,EAAE,YAAY,EACjB,aAAa,GAAE,SAAS,YAAY,EAAgD,GAClF,SAAS,CAAC,YAAY,CAAC,CAKzB;AAqCD,mDAAmD;AACnD,MAAM,WAAW,yBAAyB;IACzC;;;;;;;;;;;OAWG;IACH,OAAO,EAAE,MAAM,CAAA;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAA;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAA;CACrB;AAED,wEAAwE;AACxE,MAAM,MAAM,aAAa,GAAG;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,GAAG,EAAE,UAAU,CAAA;CAAE,GAAG;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,GAAG,EAAE,cAAc,CAAA;CAAE,CAAA;AAE/G
|
|
1
|
+
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../../src/synthesize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAWH,OAAO,EAAuB,KAAK,SAAS,EAAE,MAAM,eAAe,CAAA;AACnE,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AAE1E;;;GAGG;AACH,MAAM,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,YAAY,KAAK,YAAY,GAAG,IAAI,CAAA;AAyBrE,iFAAiF;AACjF,eAAO,MAAM,SAAS,EAAE,YAQvB,CAAA;AAED,iFAAiF;AACjF,eAAO,MAAM,SAAS,EAAE,YAQvB,CAAA;AAED,+EAA+E;AAC/E,eAAO,MAAM,UAAU,EAAE,YAIxB,CAAA;AAED;;;;;;GAMG;AACH,eAAO,MAAM,WAAW,EAAE,YAQzB,CAAA;AAED;;;GAGG;AACH,eAAO,MAAM,WAAW,EAAE,YAQzB,CAAA;AAqED,uEAAuE;AACvE,eAAO,MAAM,WAAW,EAAE,YAazB,CAAA;AAED,uEAAuE;AACvE,eAAO,MAAM,eAAe,EAAE,YAW7B,CAAA;AAgBD,yFAAyF;AACzF,eAAO,MAAM,iBAAiB,EAAE,YAkB/B,CAAA;AAED,yDAAyD;AACzD,eAAO,MAAM,qBAAqB,EAAE,YAqBnC,CAAA;AAED;;;;;;;;;GASG;AACH,eAAO,MAAM,sBAAsB,EAAE,YAkBpC,CAAA;AAED;;;;;;;GAOG;AACH,eAAO,MAAM,kBAAkB,EAAE,YAiBhC,CAAA;AAED;;;;;;;;;;GAUG;AACH,eAAO,MAAM,wBAAwB,EAAE,YAkBtC,CAAA;AAED;;;;;GAKG;AACH,eAAO,MAAM,oBAAoB,EAAE,YAiBlC,CAAA;AAED,gEAAgE;AAChE,eAAO,MAAM,gBAAgB,EAAE,YAQ9B,CAAA;AAMD,+FAA+F;AAC/F,eAAO,MAAM,aAAa,EAAE,YAW3B,CAAA;AAMD,sCAAsC;AACtC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAgBtD,CAAA;AAED,kGAAkG;AAClG,wBAAgB,8BAA8B,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,YAAY,EAAE,CAqBvF;AAED;;;GAGG;AACH,wBAAiB,aAAa,CAC7B,GAAG,EAAE,YAAY,EACjB,aAAa,GAAE,SAAS,YAAY,EAAgD,GAClF,SAAS,CAAC,YAAY,CAAC,CAKzB;AAqCD,mDAAmD;AACnD,MAAM,WAAW,yBAAyB;IACzC;;;;;;;;;;;OAWG;IACH,OAAO,EAAE,MAAM,CAAA;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAA;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAA;CACrB;AAED,wEAAwE;AACxE,MAAM,MAAM,aAAa,GAAG;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,GAAG,EAAE,UAAU,CAAA;CAAE,GAAG;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,GAAG,EAAE,cAAc,CAAA;CAAE,CAAA;AAE/G;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,wBAAgB,qBAAqB,CACpC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,YAAY,EACrB,OAAO,EAAE,yBAAyB,GAChC,aAAa,CAgFf"}
|
package/out/src/synthesize.js
CHANGED
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
* most useful at training time, not corpus build time.
|
|
25
25
|
*/
|
|
26
26
|
import { US_STREET_SUFFIX_PREFERRED_ABBR, US_UNIT_DESIGNATOR_PREFERRED_ABBR, matchCase, matchLeadingDesignator, matchTrailingSuffix, } from "@mailwoman/codex/us";
|
|
27
|
-
import { alignRow } from "./align.js";
|
|
27
|
+
import { alignRow, assertSpanInvariants } from "./align.js";
|
|
28
28
|
import { whitespaceTokenizer } from "./tokenize.js";
|
|
29
29
|
/** Helper: build the augmented row with synth marker + chained source_id. */
|
|
30
30
|
function withAugmentation(source, method, newRaw, newComponents) {
|
|
@@ -479,12 +479,22 @@ function escapeRegex(s) {
|
|
|
479
479
|
* tokens in the venue stay labeled as `venue`, never as the address's locality / region / etc.,
|
|
480
480
|
* even when they share surface forms.
|
|
481
481
|
*
|
|
482
|
+
* The char-offset span triple (#519) is re-targeted to the composed surface by the same
|
|
483
|
+
* deterministic boundary: one `venue` span over `[0, venue.length)` (no re-search), then the
|
|
484
|
+
* address's own spans shifted by `venue.length + separator.length` — plain offset arithmetic, no
|
|
485
|
+
* token indirection. The separator chars sit outside every span (deliberately unlabeled — now
|
|
486
|
+
* expressible). The composed triple is passed through `assertSpanInvariants` so a composition bug
|
|
487
|
+
* can't ride into a corpus.
|
|
488
|
+
*
|
|
482
489
|
* The address's components are forwarded as-is (alignment ran on them and they survived); `venue`
|
|
483
490
|
* is added on top with the trimmed venue string as its surface form.
|
|
484
491
|
*
|
|
485
492
|
* Returns `{ kind: "quarantined" }` when:
|
|
486
493
|
*
|
|
487
494
|
* - The venue is empty or whitespace-only.
|
|
495
|
+
* - The venue is not NFC-normalized (char offsets over a non-NFC raw are ambiguous — the same
|
|
496
|
+
* discipline `alignRow` enforces on adapter rows, surfaced as quarantine here because the venue
|
|
497
|
+
* is caller-supplied data).
|
|
488
498
|
* - The address row fails alignment in isolation (the underlying failure reason is propagated).
|
|
489
499
|
*/
|
|
490
500
|
export function composeAdversarialRow(venue, address, options) {
|
|
@@ -494,6 +504,11 @@ export function composeAdversarialRow(venue, address, options) {
|
|
|
494
504
|
if (!venueTrimmed) {
|
|
495
505
|
return { kind: "quarantined", row: { row: address, reason: "venue-empty" } };
|
|
496
506
|
}
|
|
507
|
+
// Char-offset spans over the composed raw are only meaningful under NFC (#519) — the address
|
|
508
|
+
// half is enforced by alignRow; the venue is caller-supplied and checked here.
|
|
509
|
+
if (venueTrimmed.normalize("NFC") !== venueTrimmed) {
|
|
510
|
+
return { kind: "quarantined", row: { row: address, reason: "venue-not-nfc" } };
|
|
511
|
+
}
|
|
497
512
|
const addressAligned = alignRow(address, { tokenizer });
|
|
498
513
|
if (addressAligned.kind !== "labeled") {
|
|
499
514
|
// Surface the address's quarantine reason but tag it with the compose attempt for
|
|
@@ -516,6 +531,20 @@ export function composeAdversarialRow(venue, address, options) {
|
|
|
516
531
|
venue: venueTrimmed,
|
|
517
532
|
...address.components,
|
|
518
533
|
};
|
|
534
|
+
// Re-target the char-offset spans (#519) onto the composed surface: the venue span covers the
|
|
535
|
+
// whole trimmed venue (internal punctuation included — the token path cannot say that), and the
|
|
536
|
+
// address's spans shift right by the venue + separator length. alignRow emits the triple on
|
|
537
|
+
// every labeled row, so absence here is an alignment-contract bug, not data — fail loudly.
|
|
538
|
+
const { span_starts: addrStarts, span_ends: addrEnds, span_tags: addrTags } = addressAligned.row;
|
|
539
|
+
if (addrStarts === undefined || addrEnds === undefined || addrTags === undefined) {
|
|
540
|
+
throw new Error(`composeAdversarialRow: alignRow returned a labeled row without the span triple ` +
|
|
541
|
+
`(source=${address.source}, source_id=${address.source_id}) — alignment contract violation`);
|
|
542
|
+
}
|
|
543
|
+
const offset = venueTrimmed.length + separator.length;
|
|
544
|
+
const spans = [
|
|
545
|
+
{ tag: "venue", start: 0, end: venueTrimmed.length },
|
|
546
|
+
...addrTags.map((tag, i) => ({ tag, start: addrStarts[i] + offset, end: addrEnds[i] + offset })),
|
|
547
|
+
];
|
|
519
548
|
const baseSourceId = address.synth?.base_source_id ?? address.source_id;
|
|
520
549
|
const method = `compose:${options.pattern}`;
|
|
521
550
|
const composed = {
|
|
@@ -530,7 +559,11 @@ export function composeAdversarialRow(venue, address, options) {
|
|
|
530
559
|
synth: { method, base_source_id: baseSourceId },
|
|
531
560
|
tokens,
|
|
532
561
|
labels,
|
|
562
|
+
span_starts: spans.map((s) => s.start),
|
|
563
|
+
span_ends: spans.map((s) => s.end),
|
|
564
|
+
span_tags: spans.map((s) => s.tag),
|
|
533
565
|
};
|
|
566
|
+
assertSpanInvariants(spans, composed);
|
|
534
567
|
return { kind: "labeled", row: composed };
|
|
535
568
|
}
|
|
536
569
|
//# sourceMappingURL=synthesize.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../../src/synthesize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EACN,+BAA+B,EAC/B,iCAAiC,EACjC,SAAS,EACT,sBAAsB,EACtB,mBAAmB,GACnB,MAAM,qBAAqB,CAAA;AAE5B,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AACrC,OAAO,EAAE,mBAAmB,EAAkB,MAAM,eAAe,CAAA;AAWnE,6EAA6E;AAC7E,SAAS,gBAAgB,CACxB,MAAoB,EACpB,MAAc,EACd,MAAc,EACd,aAA4B;IAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,cAAc,IAAI,MAAM,CAAC,SAAS,CAAA;IAC/D,OAAO;QACN,GAAG,MAAM;QACT,GAAG,EAAE,MAAM;QACX,UAAU,EAAE,aAAa;QACzB,SAAS,EAAE,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,EAAE;QAC1C,KAAK,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE;KACzC,CAAA;AACF,CAAC;AAED,8EAA8E;AAC9E,gCAAgC;AAChC,8EAA8E;AAE9E,iFAAiF;AACjF,MAAM,CAAC,MAAM,SAAS,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC9C,IAAI,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE;QAAE,OAAO,IAAI,CAAA;IAClD,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAA;IACnC,MAAM,YAAY,GAAkB,EAAE,CAAA;IACtC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,YAAY,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;IACzD,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,YAAY,EAAE,KAAK,EAAE,YAAY,CAAC,CAAA;AAChE,CAAC,CAAA;AAED,iFAAiF;AACjF,MAAM,CAAC,MAAM,SAAS,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC9C,IAAI,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE;QAAE,OAAO,IAAI,CAAA;IAClD,MAAM,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAA;IACrC,MAAM,cAAc,GAAkB,EAAE,CAAA;IACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,cAAc,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;IAC3D,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,YAAY,EAAE,OAAO,EAAE,cAAc,CAAC,CAAA;AACpE,CAAC,CAAA;AAED,+EAA+E;AAC/E,MAAM,CAAC,MAAM,UAAU,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC/C,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACvC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACpE,OAAO,gBAAgB,CAAC,GAAG,EAAE,aAAa,EAAE,MAAM,EAAE,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;AAC3E,CAAC,CAAA;AAED;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,WAAW,GAAiB,CAAC,GAAG,EAAE,EAAE;IAChD,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACnC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,aAAa,GAAkB,EAAE,CAAA;IACvC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,aAAa,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAChE,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACpE,CAAC,CAAA;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,WAAW,GAAiB,CAAC,GAAG,EAAE,EAAE;IAChD,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACtC,IAAI,QAAQ,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,aAAa,GAAkB,EAAE,CAAA;IACvC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,aAAa,CAAC,CAAiB,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAA;IAC1D,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAA;AACtE,CAAC,CAAA;AAED,SAAS,YAAY,CAAC,CAAS;IAC9B,OAAO,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAA;AACjD,CAAC;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,kFAAkF;AAClF,MAAM,kBAAkB,GAA2B;IAClD,OAAO,EAAE,IAAI;IACb,MAAM,EAAE,IAAI;IACZ,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,IAAI;IACd,UAAU,EAAE,IAAI;IAChB,QAAQ,EAAE,IAAI;IACd,WAAW,EAAE,IAAI;IACjB,QAAQ,EAAE,IAAI;IACd,OAAO,EAAE,IAAI;IACb,OAAO,EAAE,IAAI;IACb,MAAM,EAAE,IAAI;IACZ,KAAK,EAAE,IAAI;IACX,QAAQ,EAAE,IAAI;IACd,OAAO,EAAE,IAAI;IACb,IAAI,EAAE,IAAI;IACV,MAAM,EAAE,IAAI;IACZ,QAAQ,EAAE,IAAI;IACd,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,QAAQ,EAAE,IAAI;IACd,aAAa,EAAE,IAAI;IACnB,QAAQ,EAAE,IAAI;IACd,SAAS,EAAE,IAAI;IACf,WAAW,EAAE,IAAI;IACjB,QAAQ,EAAE,IAAI;IACd,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,IAAI;IACd,MAAM,EAAE,IAAI;IACZ,eAAe,EAAE,IAAI;IACrB,YAAY,EAAE,IAAI;IAClB,YAAY,EAAE,IAAI;IAClB,UAAU,EAAE,IAAI;IAChB,gBAAgB,EAAE,IAAI;IACtB,cAAc,EAAE,IAAI;IACpB,IAAI,EAAE,IAAI;IACV,QAAQ,EAAE,IAAI;IACd,MAAM,EAAE,IAAI;IACZ,YAAY,EAAE,IAAI;IAClB,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,IAAI;IACtB,cAAc,EAAE,IAAI;IACpB,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,IAAI,EAAE,IAAI;IACV,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,IAAI;IACd,UAAU,EAAE,IAAI;IAChB,eAAe,EAAE,IAAI;IACrB,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,IAAI;IACb,sBAAsB,EAAE,IAAI;CAC5B,CAAA;AAED,MAAM,kBAAkB,GAA2B,MAAM,CAAC,WAAW,CACpE,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAC1D,CAAA;AAED,uEAAuE;AACvE,MAAM,CAAC,MAAM,WAAW,GAAiB,CAAC,GAAG,EAAE,EAAE;IAChD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,IAAI,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,wFAAwF;IACxF,wCAAwC;IACxC,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,CAAA;IAC7C,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAClC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;IACxE,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACxE,OAAO,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACpE,CAAC,CAAA;AAED,uEAAuE;AACvE,MAAM,CAAC,MAAM,eAAe,GAAiB,CAAC,GAAG,EAAE,EAAE;IACpD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,IAAI,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,CAAA;IAC7C,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAClC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;IACxE,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACxE,OAAO,gBAAgB,CAAC,GAAG,EAAE,kBAAkB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACxE,CAAC,CAAA;AAED,MAAM,wBAAwB,GAA2B;IACxD,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;IACV,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;CACf,CAAA;AACD,MAAM,wBAAwB,GAA2B,MAAM,CAAC,WAAW,CAC1E,MAAM,CAAC,OAAO,CAAC,wBAAwB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAChE,CAAA;AAED,yFAAyF;AACzF,MAAM,CAAC,MAAM,iBAAiB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACtD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,WAAW,GAAmB,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,CAAC,CAAA;IAChF,IAAI,OAAO,GAAG,KAAK,CAAA;IACnB,IAAI,MAAM,GAAG,GAAG,CAAC,GAAG,CAAA;IACpB,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IAC1D,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;QAC5B,IAAI,CAAC,CAAC;YAAE,SAAQ;QAChB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,4BAA4B,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;QACjG,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACpB,aAAa,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAA;YAC7B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAA;YAC7E,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;IACF,CAAC;IACD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,OAAO,gBAAgB,CAAC,GAAG,EAAE,oBAAoB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AAC1E,CAAC,CAAA;AAED,yDAAyD;AACzD,MAAM,CAAC,MAAM,qBAAqB,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC1D,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,WAAW,GAAmB,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,CAAC,CAAA;IAChF,IAAI,OAAO,GAAG,KAAK,CAAA;IACnB,IAAI,MAAM,GAAG,GAAG,CAAC,GAAG,CAAA;IACpB,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IAC1D,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;QAC5B,IAAI,CAAC,CAAC;YAAE,SAAQ;QAChB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CACzB,sEAAsE,EACtE,CAAC,CAAC,EAAE,EAAE,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI,CAAC,CACvC,CAAA;QACD,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACpB,aAAa,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAA;YAC7B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAA;YAC7E,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;IACF,CAAC;IACD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,OAAO,gBAAgB,CAAC,GAAG,EAAE,wBAAwB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AAC9E,CAAC,CAAA;AAED;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC3D,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,SAAS,GAAG,+BAA+B,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;IAClE,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IAClD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,SAAS,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,MAAM,EAAE,CAAA;IAClF,IAAI,SAAS,KAAK,MAAM;QAAE,OAAO,IAAI,CAAA;IAErC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC7E,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,SAAS,CAAC,CAAA;IAC1F,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,6BAA6B,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACnF,CAAC,CAAA;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACvD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IACxD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,SAAS,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,MAAM,EAAE,CAAA;IAClF,IAAI,SAAS,KAAK,MAAM;QAAE,OAAO,IAAI,CAAA;IAErC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC7E,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,SAAS,CAAC,CAAA;IAC1F,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,yBAAyB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AAC/E,CAAC,CAAA;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC7D,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAA;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,KAAK,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,SAAS,GAAG,iCAAiC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;IACpE,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IAClD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,OAAO,GAAG,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAA;IAC9D,IAAI,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAEjC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAA;IACzE,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,OAAO,CAAC,CAAA;IACtF,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,+BAA+B,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACrF,CAAC,CAAA;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACzD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAA;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,KAAK,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IACxD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,OAAO,GAAG,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAA;IAC9D,IAAI,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAEjC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAA;IACzE,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,OAAO,CAAC,CAAA;IACtF,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,2BAA2B,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACjF,CAAC,CAAA;AAED,gEAAgE;AAChE,MAAM,CAAC,MAAM,gBAAgB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACrD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAA;IACxC,IAAI,CAAC,QAAQ,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAA;IAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;IACxC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;IAChD,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,qBAAqB,EAAE,MAAM,EAAE,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;AACrG,CAAC,CAAA;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,+FAA+F;AAC/F,MAAM,CAAC,MAAM,aAAa,GAAiB,CAAC,GAAG,EAAE,EAAE;IAClD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,sBAAsB,CAAA;IACtD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IAC1D,OAAO,aAAa,CAAC,sBAAsB,CAAA;IAC3C,+DAA+D;IAC/D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,WAAW,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IAC9D,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAClC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,OAAO,gBAAgB,CAAC,GAAG,EAAE,gBAAgB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACtE,CAAC,CAAA;AAED,8EAA8E;AAC9E,8BAA8B;AAC9B,8EAA8E;AAE9E,sCAAsC;AACtC,MAAM,CAAC,MAAM,aAAa,GAAiC;IAC1D,YAAY,EAAE,SAAS;IACvB,YAAY,EAAE,SAAS;IACvB,aAAa,EAAE,UAAU;IACzB,cAAc,EAAE,WAAW;IAC3B,cAAc,EAAE,WAAW;IAC3B,cAAc,EAAE,WAAW;IAC3B,kBAAkB,EAAE,eAAe;IACnC,oBAAoB,EAAE,iBAAiB;IACvC,wBAAwB,EAAE,qBAAqB;IAC/C,6BAA6B,EAAE,sBAAsB;IACrD,yBAAyB,EAAE,kBAAkB;IAC7C,+BAA+B,EAAE,wBAAwB;IACzD,2BAA2B,EAAE,oBAAoB;IACjD,qBAAqB,EAAE,gBAAgB;IACvC,gBAAgB,EAAE,aAAa;CAC/B,CAAA;AAED,kGAAkG;AAClG,MAAM,UAAU,8BAA8B,CAAC,OAAe;IAC7D,MAAM,SAAS,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,CAAC,CAAA;IACjE,QAAQ,OAAO,EAAE,CAAC;QACjB,KAAK,IAAI;YACR,OAAO;gBACN,GAAG,SAAS;gBACZ,WAAW;gBACX,eAAe;gBACf,iBAAiB;gBACjB,qBAAqB;gBACrB,sBAAsB;gBACtB,kBAAkB;gBAClB,wBAAwB;gBACxB,oBAAoB;gBACpB,gBAAgB;aAChB,CAAA;QACF,KAAK,IAAI;YACR,OAAO,CAAC,GAAG,SAAS,EAAE,WAAW,EAAE,aAAa,CAAC,CAAA;QAClD;YACC,OAAO,SAAS,CAAA;IAClB,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,SAAS,CAAC,CAAC,aAAa,CAC7B,GAAiB,EACjB,gBAAyC,8BAA8B,CAAC,GAAG,CAAC,OAAO,CAAC;IAEpF,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,CAAA;QACpB,IAAI,GAAG;YAAE,MAAM,GAAG,CAAA;IACnB,CAAC;AACF,CAAC;AAED,SAAS,WAAW,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAA;AAChD,CAAC;AAiED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,qBAAqB,CACpC,KAAa,EACb,OAAqB,EACrB,OAAkC;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAA;IAC3C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,mBAAmB,EAAE,CAAA;IAE5D,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,EAAE,CAAA;IACjC,IAAI,CAAC,YAAY,EAAE,CAAC;QACnB,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,EAAE,CAAA;IAC7E,CAAC;IAED,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,CAAC,CAAA;IACvD,IAAI,cAAc,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACvC,kFAAkF;QAClF,kFAAkF;QAClF,+BAA+B;QAC/B,OAAO;YACN,IAAI,EAAE,aAAa;YACnB,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,mBAAmB,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE;SAC7E,CAAA;IACF,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAA;IACpD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,iBAAiB,EAAE,EAAE,CAAA;IACjF,CAAC;IAED,MAAM,WAAW,GAAe,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAA;IAE5F,MAAM,MAAM,GAAa,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAC1F,MAAM,MAAM,GAAe,CAAC,GAAG,WAAW,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAEzE,MAAM,WAAW,GAAG,GAAG,YAAY,GAAG,SAAS,GAAG,OAAO,CAAC,GAAG,EAAE,CAAA;IAC/D,MAAM,kBAAkB,GAAG;QAC1B,KAAK,EAAE,YAAY;QACnB,GAAG,OAAO,CAAC,UAAU;KACrB,CAAA;IAED,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,EAAE,cAAc,IAAI,OAAO,CAAC,SAAS,CAAA;IACvE,MAAM,MAAM,GAAG,WAAW,OAAO,CAAC,OAAO,EAAE,CAAA;IAE3C,MAAM,QAAQ,GAAe;QAC5B,GAAG,EAAE,WAAW;QAChB,UAAU,EAAE,kBAAkB;QAC9B,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,SAAS,EAAE,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,EAAE;QAC3C,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,KAAK,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,YAAY,EAAE;QAC/C,MAAM;QACN,MAAM;KACN,CAAA;IAED,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAA;AAC1C,CAAC"}
|
|
1
|
+
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../../src/synthesize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EACN,+BAA+B,EAC/B,iCAAiC,EACjC,SAAS,EACT,sBAAsB,EACtB,mBAAmB,GACnB,MAAM,qBAAqB,CAAA;AAE5B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAsB,MAAM,YAAY,CAAA;AAC/E,OAAO,EAAE,mBAAmB,EAAkB,MAAM,eAAe,CAAA;AAWnE,6EAA6E;AAC7E,SAAS,gBAAgB,CACxB,MAAoB,EACpB,MAAc,EACd,MAAc,EACd,aAA4B;IAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,cAAc,IAAI,MAAM,CAAC,SAAS,CAAA;IAC/D,OAAO;QACN,GAAG,MAAM;QACT,GAAG,EAAE,MAAM;QACX,UAAU,EAAE,aAAa;QACzB,SAAS,EAAE,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,EAAE;QAC1C,KAAK,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE;KACzC,CAAA;AACF,CAAC;AAED,8EAA8E;AAC9E,gCAAgC;AAChC,8EAA8E;AAE9E,iFAAiF;AACjF,MAAM,CAAC,MAAM,SAAS,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC9C,IAAI,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE;QAAE,OAAO,IAAI,CAAA;IAClD,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAA;IACnC,MAAM,YAAY,GAAkB,EAAE,CAAA;IACtC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,YAAY,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;IACzD,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,YAAY,EAAE,KAAK,EAAE,YAAY,CAAC,CAAA;AAChE,CAAC,CAAA;AAED,iFAAiF;AACjF,MAAM,CAAC,MAAM,SAAS,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC9C,IAAI,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE;QAAE,OAAO,IAAI,CAAA;IAClD,MAAM,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAA;IACrC,MAAM,cAAc,GAAkB,EAAE,CAAA;IACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,cAAc,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;IAC3D,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,YAAY,EAAE,OAAO,EAAE,cAAc,CAAC,CAAA;AACpE,CAAC,CAAA;AAED,+EAA+E;AAC/E,MAAM,CAAC,MAAM,UAAU,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC/C,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACvC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACpE,OAAO,gBAAgB,CAAC,GAAG,EAAE,aAAa,EAAE,MAAM,EAAE,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;AAC3E,CAAC,CAAA;AAED;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,WAAW,GAAiB,CAAC,GAAG,EAAE,EAAE;IAChD,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACnC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,aAAa,GAAkB,EAAE,CAAA;IACvC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,aAAa,CAAC,CAAiB,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAChE,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACpE,CAAC,CAAA;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,WAAW,GAAiB,CAAC,GAAG,EAAE,EAAE;IAChD,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACtC,IAAI,QAAQ,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,aAAa,GAAkB,EAAE,CAAA;IACvC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC;YAAE,aAAa,CAAC,CAAiB,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAA;IAC1D,CAAC;IACD,OAAO,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAA;AACtE,CAAC,CAAA;AAED,SAAS,YAAY,CAAC,CAAS;IAC9B,OAAO,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAA;AACjD,CAAC;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,kFAAkF;AAClF,MAAM,kBAAkB,GAA2B;IAClD,OAAO,EAAE,IAAI;IACb,MAAM,EAAE,IAAI;IACZ,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,IAAI;IACd,UAAU,EAAE,IAAI;IAChB,QAAQ,EAAE,IAAI;IACd,WAAW,EAAE,IAAI;IACjB,QAAQ,EAAE,IAAI;IACd,OAAO,EAAE,IAAI;IACb,OAAO,EAAE,IAAI;IACb,MAAM,EAAE,IAAI;IACZ,KAAK,EAAE,IAAI;IACX,QAAQ,EAAE,IAAI;IACd,OAAO,EAAE,IAAI;IACb,IAAI,EAAE,IAAI;IACV,MAAM,EAAE,IAAI;IACZ,QAAQ,EAAE,IAAI;IACd,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,QAAQ,EAAE,IAAI;IACd,aAAa,EAAE,IAAI;IACnB,QAAQ,EAAE,IAAI;IACd,SAAS,EAAE,IAAI;IACf,WAAW,EAAE,IAAI;IACjB,QAAQ,EAAE,IAAI;IACd,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,IAAI;IACd,MAAM,EAAE,IAAI;IACZ,eAAe,EAAE,IAAI;IACrB,YAAY,EAAE,IAAI;IAClB,YAAY,EAAE,IAAI;IAClB,UAAU,EAAE,IAAI;IAChB,gBAAgB,EAAE,IAAI;IACtB,cAAc,EAAE,IAAI;IACpB,IAAI,EAAE,IAAI;IACV,QAAQ,EAAE,IAAI;IACd,MAAM,EAAE,IAAI;IACZ,YAAY,EAAE,IAAI;IAClB,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,IAAI;IACtB,cAAc,EAAE,IAAI;IACpB,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,IAAI,EAAE,IAAI;IACV,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,IAAI;IACd,UAAU,EAAE,IAAI;IAChB,eAAe,EAAE,IAAI;IACrB,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,IAAI;IACb,sBAAsB,EAAE,IAAI;CAC5B,CAAA;AAED,MAAM,kBAAkB,GAA2B,MAAM,CAAC,WAAW,CACpE,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAC1D,CAAA;AAED,uEAAuE;AACvE,MAAM,CAAC,MAAM,WAAW,GAAiB,CAAC,GAAG,EAAE,EAAE;IAChD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,IAAI,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,wFAAwF;IACxF,wCAAwC;IACxC,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,CAAA;IAC7C,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAClC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;IACxE,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACxE,OAAO,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACpE,CAAC,CAAA;AAED,uEAAuE;AACvE,MAAM,CAAC,MAAM,eAAe,GAAiB,CAAC,GAAG,EAAE,EAAE;IACpD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,IAAI,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,CAAA;IAC7C,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAClC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,MAAM,KAAK,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;IACxE,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACxE,OAAO,gBAAgB,CAAC,GAAG,EAAE,kBAAkB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACxE,CAAC,CAAA;AAED,MAAM,wBAAwB,GAA2B;IACxD,KAAK,EAAE,GAAG;IACV,KAAK,EAAE,GAAG;IACV,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;CACf,CAAA;AACD,MAAM,wBAAwB,GAA2B,MAAM,CAAC,WAAW,CAC1E,MAAM,CAAC,OAAO,CAAC,wBAAwB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAChE,CAAA;AAED,yFAAyF;AACzF,MAAM,CAAC,MAAM,iBAAiB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACtD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,WAAW,GAAmB,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,CAAC,CAAA;IAChF,IAAI,OAAO,GAAG,KAAK,CAAA;IACnB,IAAI,MAAM,GAAG,GAAG,CAAC,GAAG,CAAA;IACpB,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IAC1D,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;QAC5B,IAAI,CAAC,CAAC;YAAE,SAAQ;QAChB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,4BAA4B,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;QACjG,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACpB,aAAa,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAA;YAC7B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAA;YAC7E,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;IACF,CAAC;IACD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,OAAO,gBAAgB,CAAC,GAAG,EAAE,oBAAoB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AAC1E,CAAC,CAAA;AAED,yDAAyD;AACzD,MAAM,CAAC,MAAM,qBAAqB,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC1D,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,WAAW,GAAmB,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,CAAC,CAAA;IAChF,IAAI,OAAO,GAAG,KAAK,CAAA;IACnB,IAAI,MAAM,GAAG,GAAG,CAAC,GAAG,CAAA;IACpB,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IAC1D,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;QAC5B,IAAI,CAAC,CAAC;YAAE,SAAQ;QAChB,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CACzB,sEAAsE,EACtE,CAAC,CAAC,EAAE,EAAE,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI,CAAC,CACvC,CAAA;QACD,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;YACpB,aAAa,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAA;YAC7B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAA;YAC7E,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;IACF,CAAC;IACD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,OAAO,gBAAgB,CAAC,GAAG,EAAE,wBAAwB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AAC9E,CAAC,CAAA;AAED;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC3D,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,SAAS,GAAG,+BAA+B,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;IAClE,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IAClD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,SAAS,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,MAAM,EAAE,CAAA;IAClF,IAAI,SAAS,KAAK,MAAM;QAAE,OAAO,IAAI,CAAA;IAErC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC7E,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,SAAS,CAAC,CAAA;IAC1F,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,6BAA6B,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACnF,CAAC,CAAA;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACvD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAA;IACpC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IACxD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,SAAS,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,MAAM,EAAE,CAAA;IAClF,IAAI,SAAS,KAAK,MAAM;QAAE,OAAO,IAAI,CAAA;IAErC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC7E,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,SAAS,CAAC,CAAA;IAC1F,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,yBAAyB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AAC/E,CAAC,CAAA;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAiB,CAAC,GAAG,EAAE,EAAE;IAC7D,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAA;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,KAAK,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,SAAS,GAAG,iCAAiC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;IACpE,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IAClD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,OAAO,GAAG,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAA;IAC9D,IAAI,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAEjC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAA;IACzE,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,OAAO,CAAC,CAAA;IACtF,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,+BAA+B,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACrF,CAAC,CAAA;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACzD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,IAAI,CAAA;IAChC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,KAAK,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IAEvB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;IACxD,IAAI,MAAM,KAAK,KAAK,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IAEzC,MAAM,OAAO,GAAG,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAA;IAC9D,IAAI,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAEjC,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAA;IACzE,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,OAAO,CAAC,CAAA;IACtF,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,2BAA2B,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACjF,CAAC,CAAA;AAED,gEAAgE;AAChE,MAAM,CAAC,MAAM,gBAAgB,GAAiB,CAAC,GAAG,EAAE,EAAE;IACrD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAA;IACxC,IAAI,CAAC,QAAQ,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAA;IAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;IACxC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;IAChD,IAAI,MAAM,KAAK,GAAG,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,gBAAgB,CAAC,GAAG,EAAE,qBAAqB,EAAE,MAAM,EAAE,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;AACrG,CAAC,CAAA;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,+FAA+F;AAC/F,MAAM,CAAC,MAAM,aAAa,GAAiB,CAAC,GAAG,EAAE,EAAE;IAClD,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,sBAAsB,CAAA;IACtD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,aAAa,GAAkB,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAA;IAC1D,OAAO,aAAa,CAAC,sBAAsB,CAAA;IAC3C,+DAA+D;IAC/D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,WAAW,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IAC9D,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAClC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,OAAO,gBAAgB,CAAC,GAAG,EAAE,gBAAgB,EAAE,MAAM,EAAE,aAAa,CAAC,CAAA;AACtE,CAAC,CAAA;AAED,8EAA8E;AAC9E,8BAA8B;AAC9B,8EAA8E;AAE9E,sCAAsC;AACtC,MAAM,CAAC,MAAM,aAAa,GAAiC;IAC1D,YAAY,EAAE,SAAS;IACvB,YAAY,EAAE,SAAS;IACvB,aAAa,EAAE,UAAU;IACzB,cAAc,EAAE,WAAW;IAC3B,cAAc,EAAE,WAAW;IAC3B,cAAc,EAAE,WAAW;IAC3B,kBAAkB,EAAE,eAAe;IACnC,oBAAoB,EAAE,iBAAiB;IACvC,wBAAwB,EAAE,qBAAqB;IAC/C,6BAA6B,EAAE,sBAAsB;IACrD,yBAAyB,EAAE,kBAAkB;IAC7C,+BAA+B,EAAE,wBAAwB;IACzD,2BAA2B,EAAE,oBAAoB;IACjD,qBAAqB,EAAE,gBAAgB;IACvC,gBAAgB,EAAE,aAAa;CAC/B,CAAA;AAED,kGAAkG;AAClG,MAAM,UAAU,8BAA8B,CAAC,OAAe;IAC7D,MAAM,SAAS,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,CAAC,CAAA;IACjE,QAAQ,OAAO,EAAE,CAAC;QACjB,KAAK,IAAI;YACR,OAAO;gBACN,GAAG,SAAS;gBACZ,WAAW;gBACX,eAAe;gBACf,iBAAiB;gBACjB,qBAAqB;gBACrB,sBAAsB;gBACtB,kBAAkB;gBAClB,wBAAwB;gBACxB,oBAAoB;gBACpB,gBAAgB;aAChB,CAAA;QACF,KAAK,IAAI;YACR,OAAO,CAAC,GAAG,SAAS,EAAE,WAAW,EAAE,aAAa,CAAC,CAAA;QAClD;YACC,OAAO,SAAS,CAAA;IAClB,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,SAAS,CAAC,CAAC,aAAa,CAC7B,GAAiB,EACjB,gBAAyC,8BAA8B,CAAC,GAAG,CAAC,OAAO,CAAC;IAEpF,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,CAAA;QACpB,IAAI,GAAG;YAAE,MAAM,GAAG,CAAA;IACnB,CAAC;AACF,CAAC;AAED,SAAS,WAAW,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAA;AAChD,CAAC;AAiED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,UAAU,qBAAqB,CACpC,KAAa,EACb,OAAqB,EACrB,OAAkC;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAA;IAC3C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,mBAAmB,EAAE,CAAA;IAE5D,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,EAAE,CAAA;IACjC,IAAI,CAAC,YAAY,EAAE,CAAC;QACnB,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,EAAE,CAAA;IAC7E,CAAC;IACD,6FAA6F;IAC7F,+EAA+E;IAC/E,IAAI,YAAY,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,YAAY,EAAE,CAAC;QACpD,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,EAAE,CAAA;IAC/E,CAAC;IAED,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,CAAC,CAAA;IACvD,IAAI,cAAc,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACvC,kFAAkF;QAClF,kFAAkF;QAClF,+BAA+B;QAC/B,OAAO;YACN,IAAI,EAAE,aAAa;YACnB,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,mBAAmB,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE;SAC7E,CAAA;IACF,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAA;IACpD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,iBAAiB,EAAE,EAAE,CAAA;IACjF,CAAC;IAED,MAAM,WAAW,GAAe,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAA;IAE5F,MAAM,MAAM,GAAa,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAC1F,MAAM,MAAM,GAAe,CAAC,GAAG,WAAW,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAEzE,MAAM,WAAW,GAAG,GAAG,YAAY,GAAG,SAAS,GAAG,OAAO,CAAC,GAAG,EAAE,CAAA;IAC/D,MAAM,kBAAkB,GAAG;QAC1B,KAAK,EAAE,YAAY;QACnB,GAAG,OAAO,CAAC,UAAU;KACrB,CAAA;IAED,8FAA8F;IAC9F,gGAAgG;IAChG,4FAA4F;IAC5F,2FAA2F;IAC3F,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,cAAc,CAAC,GAAG,CAAA;IAChG,IAAI,UAAU,KAAK,SAAS,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAClF,MAAM,IAAI,KAAK,CACd,iFAAiF;YAChF,WAAW,OAAO,CAAC,MAAM,eAAe,OAAO,CAAC,SAAS,kCAAkC,CAC5F,CAAA;IACF,CAAC;IACD,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAA;IACrD,MAAM,KAAK,GAAoB;QAC9B,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,YAAY,CAAC,MAAM,EAAE;QACpD,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC,CAAC;KAClG,CAAA;IAED,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,EAAE,cAAc,IAAI,OAAO,CAAC,SAAS,CAAA;IACvE,MAAM,MAAM,GAAG,WAAW,OAAO,CAAC,OAAO,EAAE,CAAA;IAE3C,MAAM,QAAQ,GAAe;QAC5B,GAAG,EAAE,WAAW;QAChB,UAAU,EAAE,kBAAkB;QAC9B,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,SAAS,EAAE,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,EAAE;QAC3C,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,KAAK,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,YAAY,EAAE;QAC/C,MAAM;QACN,MAAM;QACN,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QACtC,SAAS,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAClC,SAAS,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;KAClC,CAAA;IACD,oBAAoB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;IAErC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAA;AAC1C,CAAC"}
|
package/out/src/types.d.ts
CHANGED
|
@@ -85,13 +85,34 @@ export interface CanonicalRow extends SourceProvenance {
|
|
|
85
85
|
}
|
|
86
86
|
/**
|
|
87
87
|
* Output of `align.ts`. Carries everything `CanonicalRow` does, plus parallel `tokens` and `labels`
|
|
88
|
-
* arrays of identical length
|
|
88
|
+
* arrays of identical length (`labels[i]` is the BIO tag for `tokens[i]`) and — as of the v0.5.0
|
|
89
|
+
* char-offset migration (#519) — parallel char-span arrays addressing `raw` directly.
|
|
90
|
+
*
|
|
91
|
+
* The span triple is the v0.5.0 source of truth; `tokens`/`labels` remain emitted during the
|
|
92
|
+
* transition (and stay derivable afterwards: whitespace split + span lookup). The reverse
|
|
93
|
+
* derivation — today's token labels — is the lossy direction (punctuation-mute).
|
|
89
94
|
*/
|
|
90
95
|
export interface LabeledRow extends CanonicalRow {
|
|
91
96
|
/** SentencePiece subword tokens for `raw`. */
|
|
92
97
|
tokens: readonly string[];
|
|
93
98
|
/** BIO labels, one per token. Same length as `tokens`. */
|
|
94
99
|
labels: readonly BioLabel[];
|
|
100
|
+
/**
|
|
101
|
+
* Char-offset label spans over `raw` (parallel arrays, per the #519 ruling): `span_starts[i]` is
|
|
102
|
+
* the inclusive start offset (UTF-16 code units) of span `i`, `span_ends[i]` its exclusive end,
|
|
103
|
+
* `span_tags[i]` its component tag. Invariants — enforced loudly by `alignRow`, documented for
|
|
104
|
+
* every other producer: sorted ascending by start, non-overlapping. `raw` must be NFC-normalized
|
|
105
|
+
* or the offsets are ambiguous (also enforced by `alignRow`).
|
|
106
|
+
*
|
|
107
|
+
* Optional during the v0.4.x → v0.5.0 transition only: alignment always emits the triple; frozen
|
|
108
|
+
* historical corpora and not-yet-migrated synthesis paths may lack it. Required once v0.5.0 lands
|
|
109
|
+
* and the token path is deleted.
|
|
110
|
+
*/
|
|
111
|
+
span_starts?: readonly number[];
|
|
112
|
+
/** Exclusive end offsets, parallel to `span_starts`. */
|
|
113
|
+
span_ends?: readonly number[];
|
|
114
|
+
/** Component tags, parallel to `span_starts`. */
|
|
115
|
+
span_tags?: readonly ComponentTag[];
|
|
95
116
|
}
|
|
96
117
|
/**
|
|
97
118
|
* A row that alignment refused to label. Lands in `/data/corpus/quarantine/` for human review.
|
package/out/src/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEnE;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAChC,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IAEd;;;;OAIG;IACH,SAAS,EAAE,MAAM,CAAA;IAEjB;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAA;IAEtB;;;OAGG;IACH,OAAO,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC3B;;;OAGG;IACH,MAAM,EAAE,MAAM,CAAA;IAEd;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAA;CACtB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,YAAa,SAAQ,gBAAgB;IACrD,wDAAwD;IACxD,GAAG,EAAE,MAAM,CAAA;IAEX;;;OAGG;IACH,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;IAEjD,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAA;IAEf,8DAA8D;IAC9D,MAAM,CAAC,EAAE,MAAM,CAAA;IAEf,sCAAsC;IACtC,KAAK,CAAC,EAAE,WAAW,CAAA;CACnB;AAED
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEnE;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAChC,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IAEd;;;;OAIG;IACH,SAAS,EAAE,MAAM,CAAA;IAEjB;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAA;IAEtB;;;OAGG;IACH,OAAO,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC3B;;;OAGG;IACH,MAAM,EAAE,MAAM,CAAA;IAEd;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAA;CACtB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,YAAa,SAAQ,gBAAgB;IACrD,wDAAwD;IACxD,GAAG,EAAE,MAAM,CAAA;IAEX;;;OAGG;IACH,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;IAEjD,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAA;IAEf,8DAA8D;IAC9D,MAAM,CAAC,EAAE,MAAM,CAAA;IAEf,sCAAsC;IACtC,KAAK,CAAC,EAAE,WAAW,CAAA;CACnB;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC/C,8CAA8C;IAC9C,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IAEzB,0DAA0D;IAC1D,MAAM,EAAE,SAAS,QAAQ,EAAE,CAAA;IAE3B;;;;;;;;;;OAUG;IACH,WAAW,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAE/B,wDAAwD;IACxD,SAAS,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAE7B,iDAAiD;IACjD,SAAS,CAAC,EAAE,SAAS,YAAY,EAAE,CAAA;CACnC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC9B,GAAG,EAAE,YAAY,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;CACd;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,WAAW,cAAc;IAC9B,qFAAqF;IACrF,SAAS,EAAE,MAAM,CAAA;IAEjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAA;IAElB,yCAAyC;IACzC,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,yEAAyE;IACzE,KAAK,CAAC,EAAE,MAAM,CAAA;IAEd,mFAAmF;IACnF,MAAM,CAAC,EAAE,WAAW,CAAA;CACpB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,aAAa;IAC7B,kFAAkF;IAClF,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;IAEnB,4FAA4F;IAC5F,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAA;IAE/B,qEAAqE;IACrE,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAA;IAE5B;;;;;;;;;;;;;;;OAeG;IACH,IAAI,CAAC,IAAI,EAAE,cAAc,GAAG,aAAa,CAAC,YAAY,CAAC,CAAA;CACvD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/corpus",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.1",
|
|
4
4
|
"description": "Mailwoman corpus pipeline: BIO-labeled dataset builder for the neural classifier.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"@dsnp/parquetjs": "1.8.7",
|
|
20
20
|
"@fragaria/address-formatter": "^6.7.1",
|
|
21
|
-
"@mailwoman/codex": "4.
|
|
22
|
-
"@mailwoman/core": "4.
|
|
21
|
+
"@mailwoman/codex": "4.5.1",
|
|
22
|
+
"@mailwoman/core": "4.5.1",
|
|
23
23
|
"csv-parse": "^5.6.0",
|
|
24
24
|
"fastest-levenshtein": "^1.0.16",
|
|
25
25
|
"lru-cache": "^10.4.3"
|