@mailwoman/corpus 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapter.d.ts +96 -0
- package/out/src/adapter.d.ts.map +1 -0
- package/out/src/adapter.js +107 -0
- package/out/src/adapter.js.map +1 -0
- package/out/src/adapters/ban/adapter.d.ts +32 -0
- package/out/src/adapters/ban/adapter.d.ts.map +1 -0
- package/out/src/adapters/ban/adapter.js +133 -0
- package/out/src/adapters/ban/adapter.js.map +1 -0
- package/out/src/adapters/fcc-bdc/adapter.d.ts +61 -0
- package/out/src/adapters/fcc-bdc/adapter.d.ts.map +1 -0
- package/out/src/adapters/fcc-bdc/adapter.js +153 -0
- package/out/src/adapters/fcc-bdc/adapter.js.map +1 -0
- package/out/src/adapters/index.d.ts +42 -0
- package/out/src/adapters/index.d.ts.map +1 -0
- package/out/src/adapters/index.js +76 -0
- package/out/src/adapters/index.js.map +1 -0
- package/out/src/adapters/openaddresses/adapter.d.ts +60 -0
- package/out/src/adapters/openaddresses/adapter.d.ts.map +1 -0
- package/out/src/adapters/openaddresses/adapter.js +174 -0
- package/out/src/adapters/openaddresses/adapter.js.map +1 -0
- package/out/src/adapters/state-ia-contractors/adapter.d.ts +23 -0
- package/out/src/adapters/state-ia-contractors/adapter.d.ts.map +1 -0
- package/out/src/adapters/state-ia-contractors/adapter.js +113 -0
- package/out/src/adapters/state-ia-contractors/adapter.js.map +1 -0
- package/out/src/adapters/state-ny-notaries/adapter.d.ts +21 -0
- package/out/src/adapters/state-ny-notaries/adapter.d.ts.map +1 -0
- package/out/src/adapters/state-ny-notaries/adapter.js +132 -0
- package/out/src/adapters/state-ny-notaries/adapter.js.map +1 -0
- package/out/src/adapters/state-tx-notaries/adapter.d.ts +22 -0
- package/out/src/adapters/state-tx-notaries/adapter.d.ts.map +1 -0
- package/out/src/adapters/state-tx-notaries/adapter.js +125 -0
- package/out/src/adapters/state-tx-notaries/adapter.js.map +1 -0
- package/out/src/adapters/tiger/adapter.d.ts +45 -0
- package/out/src/adapters/tiger/adapter.d.ts.map +1 -0
- package/out/src/adapters/tiger/adapter.js +179 -0
- package/out/src/adapters/tiger/adapter.js.map +1 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts +36 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.js +147 -0
- package/out/src/adapters/usgov-hrsa-fqhc/adapter.js.map +1 -0
- package/out/src/adapters/usgov-imls-pls/adapter.d.ts +25 -0
- package/out/src/adapters/usgov-imls-pls/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-imls-pls/adapter.js +118 -0
- package/out/src/adapters/usgov-imls-pls/adapter.js.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts +37 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.js +227 -0
- package/out/src/adapters/usgov-nad/adapter.js.map +1 -0
- package/out/src/adapters/usgov-nppes/adapter.d.ts +28 -0
- package/out/src/adapters/usgov-nppes/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-nppes/adapter.js +123 -0
- package/out/src/adapters/usgov-nppes/adapter.js.map +1 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts +35 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js +162 -0
- package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js.map +1 -0
- package/out/src/adapters/wof-admin-json/adapter.d.ts +85 -0
- package/out/src/adapters/wof-admin-json/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-admin-json/adapter.js +241 -0
- package/out/src/adapters/wof-admin-json/adapter.js.map +1 -0
- package/out/src/adapters/wof-postalcode-json/adapter.d.ts +63 -0
- package/out/src/adapters/wof-postalcode-json/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-postalcode-json/adapter.js +178 -0
- package/out/src/adapters/wof-postalcode-json/adapter.js.map +1 -0
- package/out/src/align.d.ts +58 -0
- package/out/src/align.d.ts.map +1 -0
- package/out/src/align.js +139 -0
- package/out/src/align.js.map +1 -0
- package/out/src/build.d.ts +104 -0
- package/out/src/build.d.ts.map +1 -0
- package/out/src/build.js +201 -0
- package/out/src/build.js.map +1 -0
- package/out/src/codex/us-fips-state.d.ts +44 -0
- package/out/src/codex/us-fips-state.d.ts.map +1 -0
- package/out/src/codex/us-fips-state.js +105 -0
- package/out/src/codex/us-fips-state.js.map +1 -0
- package/out/src/codex/us-street-suffix.d.ts +259 -0
- package/out/src/codex/us-street-suffix.d.ts.map +1 -0
- package/out/src/codex/us-street-suffix.js +285 -0
- package/out/src/codex/us-street-suffix.js.map +1 -0
- package/out/src/format.d.ts +79 -0
- package/out/src/format.d.ts.map +1 -0
- package/out/src/format.js +151 -0
- package/out/src/format.js.map +1 -0
- package/out/src/golden.d.ts +50 -0
- package/out/src/golden.d.ts.map +1 -0
- package/out/src/golden.js +104 -0
- package/out/src/golden.js.map +1 -0
- package/out/src/index.d.ts +18 -0
- package/out/src/index.d.ts.map +1 -0
- package/out/src/index.js +18 -0
- package/out/src/index.js.map +1 -0
- package/out/src/parquet-wrapper/index.d.ts +12 -0
- package/out/src/parquet-wrapper/index.d.ts.map +1 -0
- package/out/src/parquet-wrapper/index.js +12 -0
- package/out/src/parquet-wrapper/index.js.map +1 -0
- package/out/src/parquet-wrapper/reader.d.ts +31 -0
- package/out/src/parquet-wrapper/reader.d.ts.map +1 -0
- package/out/src/parquet-wrapper/reader.js +54 -0
- package/out/src/parquet-wrapper/reader.js.map +1 -0
- package/out/src/parquet-wrapper/schema.d.ts +45 -0
- package/out/src/parquet-wrapper/schema.d.ts.map +1 -0
- package/out/src/parquet-wrapper/schema.js +55 -0
- package/out/src/parquet-wrapper/schema.js.map +1 -0
- package/out/src/parquet-wrapper/writer.d.ts +41 -0
- package/out/src/parquet-wrapper/writer.d.ts.map +1 -0
- package/out/src/parquet-wrapper/writer.js +71 -0
- package/out/src/parquet-wrapper/writer.js.map +1 -0
- package/out/src/parquet.d.ts +122 -0
- package/out/src/parquet.d.ts.map +1 -0
- package/out/src/parquet.js +220 -0
- package/out/src/parquet.js.map +1 -0
- package/out/src/runner.d.ts +100 -0
- package/out/src/runner.d.ts.map +1 -0
- package/out/src/runner.js +183 -0
- package/out/src/runner.js.map +1 -0
- package/out/src/split.d.ts +108 -0
- package/out/src/split.d.ts.map +1 -0
- package/out/src/split.js +191 -0
- package/out/src/split.js.map +1 -0
- package/out/src/synthesize.d.ts +146 -0
- package/out/src/synthesize.d.ts.map +1 -0
- package/out/src/synthesize.js +472 -0
- package/out/src/synthesize.js.map +1 -0
- package/out/src/tokenize.d.ts +47 -0
- package/out/src/tokenize.d.ts.map +1 -0
- package/out/src/tokenize.js +49 -0
- package/out/src/tokenize.js.map +1 -0
- package/out/src/types.d.ts +168 -0
- package/out/src/types.d.ts.map +1 -0
- package/out/src/types.js +19 -0
- package/out/src/types.js.map +1 -0
- package/out/src/wof-json.d.ts +105 -0
- package/out/src/wof-json.d.ts.map +1 -0
- package/out/src/wof-json.js +174 -0
- package/out/src/wof-json.js.map +1 -0
- package/package.json +36 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed Parquet writer. Two static constructors mirror the base class:
|
|
7
|
+
*
|
|
8
|
+
* - `openStream`: wrap an existing writable stream.
|
|
9
|
+
* - `openFile`: open a path on disk, ensuring the parent directory exists first.
|
|
10
|
+
*
|
|
11
|
+
* Implements `AsyncDisposable` so `await using writer = await ParquetWriter.openFile(...)` flushes
|
|
12
|
+
* and closes cleanly. `close()` internally serializes against any in-flight flush so back-to-back
|
|
13
|
+
* dispose calls don't race.
|
|
14
|
+
*
|
|
15
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
+
* `sdk/parquet/writer.ts` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0). One
|
|
17
|
+
* trim relative to the original: dropped the `@isp.nexus/core/polyfills/promises/withResolvers`
|
|
18
|
+
* import — Node 22 (mailwoman's runtime) has `Promise.withResolvers` natively.
|
|
19
|
+
*/
|
|
20
|
+
import { ParquetWriter as BaseParquetWriter } from "@dsnp/parquetjs";
|
|
21
|
+
import type { WriterOptions } from "@dsnp/parquetjs/dist/lib/declare.js";
|
|
22
|
+
import { type WriteStreamMinimal } from "@dsnp/parquetjs/dist/lib/util.js";
|
|
23
|
+
import { type ParquetRecordLike, ParquetSchema, type ParquetSchemaDefinition, ParquetSchemaDefinitionCache } from "./schema.js";
|
|
24
|
+
/** A typed Parquet writer, wrapping the base Parquet writer. */
|
|
25
|
+
export declare class ParquetWriter<T extends ParquetRecordLike> extends BaseParquetWriter implements AsyncDisposable {
|
|
26
|
+
#private;
|
|
27
|
+
schema: ParquetSchema<T>;
|
|
28
|
+
protected static readonly SchemaDefinitionCache: ParquetSchemaDefinitionCache;
|
|
29
|
+
static openStream<T extends ParquetRecordLike>(schemaLike: ParquetSchema<T> | ParquetSchemaDefinition<T>, outputStream: WriteStreamMinimal, opts?: WriterOptions): Promise<ParquetWriter<T>>;
|
|
30
|
+
/** Convenience method to create a new buffered parquet writer that writes to the specified file. */
|
|
31
|
+
static openFile<T extends ParquetRecordLike>(schemaLike: ParquetSchema<T> | ParquetSchemaDefinition<T>, sourcePath: string | Buffer | URL, opts?: WriterOptions): Promise<ParquetWriter<T>>;
|
|
32
|
+
/** Set a metadata key-value pair on the writer. */
|
|
33
|
+
setMetadata(key: string, value: string): void;
|
|
34
|
+
/** Append a row to the buffer. If the buffer is full, the data will be written to disk. */
|
|
35
|
+
appendRow(row: T): Promise<void>;
|
|
36
|
+
/** Flush all buffered data to disk, close the file, and release resources. */
|
|
37
|
+
close(): Promise<void>;
|
|
38
|
+
[Symbol.asyncDispose](): Promise<void>;
|
|
39
|
+
dispose(): Promise<void>;
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=writer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../../src/parquet-wrapper/writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACpE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qCAAqC,CAAA;AACxE,OAAO,EAAU,KAAK,kBAAkB,EAAE,MAAM,kCAAkC,CAAA;AAIlF,OAAO,EACN,KAAK,iBAAiB,EACtB,aAAa,EACb,KAAK,uBAAuB,EAC5B,4BAA4B,EAC5B,MAAM,aAAa,CAAA;AAEpB,gEAAgE;AAChE,qBAAa,aAAa,CAAC,CAAC,SAAS,iBAAiB,CAAE,SAAQ,iBAAkB,YAAW,eAAe;;IACnG,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAA;IAChC,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,qBAAqB,+BAAqC;WAG9D,UAAU,CAAC,CAAC,SAAS,iBAAiB,EAC3D,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,GAAG,uBAAuB,CAAC,CAAC,CAAC,EACzD,YAAY,EAAE,kBAAkB,EAChC,IAAI,GAAE,aAAkB,GACtB,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAW5B,oGAAoG;WAC9E,QAAQ,CAAC,CAAC,SAAS,iBAAiB,EACzD,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,GAAG,uBAAuB,CAAC,CAAC,CAAC,EACzD,UAAU,EAAE,MAAM,GAAG,MAAM,GAAG,GAAG,EACjC,IAAI,CAAC,EAAE,aAAa,GAClB,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAU5B,mDAAmD;IACnC,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAI7D,2FAA2F;IACrE,SAAS,CAAC,GAAG,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAItD,8EAA8E;IACxD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAa/B,CAAC,MAAM,CAAC,YAAY,CAAC;IAIrB,OAAO;CAGpB"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed Parquet writer. Two static constructors mirror the base class:
|
|
7
|
+
*
|
|
8
|
+
* - `openStream`: wrap an existing writable stream.
|
|
9
|
+
* - `openFile`: open a path on disk, ensuring the parent directory exists first.
|
|
10
|
+
*
|
|
11
|
+
* Implements `AsyncDisposable` so `await using writer = await ParquetWriter.openFile(...)` flushes
|
|
12
|
+
* and closes cleanly. `close()` internally serializes against any in-flight flush so back-to-back
|
|
13
|
+
* dispose calls don't race.
|
|
14
|
+
*
|
|
15
|
+
* Salvaged 2026-05-17 from `isp-nexus/universe@6eeb7bd99643a6d62a8b8abbd50968a1e492b90b`
|
|
16
|
+
* `sdk/parquet/writer.ts` (originally copyright OpenISP, Inc.; both projects are AGPL-3.0). One
|
|
17
|
+
* trim relative to the original: dropped the `@isp.nexus/core/polyfills/promises/withResolvers`
|
|
18
|
+
* import — Node 22 (mailwoman's runtime) has `Promise.withResolvers` natively.
|
|
19
|
+
*/
|
|
20
|
+
import { ParquetWriter as BaseParquetWriter } from "@dsnp/parquetjs";
|
|
21
|
+
import { osopen } from "@dsnp/parquetjs/dist/lib/util.js";
|
|
22
|
+
import { ParquetEnvelopeWriter } from "@dsnp/parquetjs/dist/lib/writer.js";
|
|
23
|
+
import * as fs from "node:fs/promises";
|
|
24
|
+
import * as path from "node:path";
|
|
25
|
+
import { ParquetSchema, ParquetSchemaDefinitionCache, } from "./schema.js";
|
|
26
|
+
/** A typed Parquet writer, wrapping the base Parquet writer. */
|
|
27
|
+
export class ParquetWriter extends BaseParquetWriter {
|
|
28
|
+
static SchemaDefinitionCache = new ParquetSchemaDefinitionCache();
|
|
29
|
+
#flushing = Promise.resolve();
|
|
30
|
+
static async openStream(schemaLike, outputStream, opts = {}) {
|
|
31
|
+
const schema = schemaLike instanceof ParquetSchema
|
|
32
|
+
? schemaLike
|
|
33
|
+
: ParquetWriter.SchemaDefinitionCache.findOrCreateSchema(schemaLike);
|
|
34
|
+
const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
35
|
+
return new ParquetWriter(schema, envelopeWriter, opts);
|
|
36
|
+
}
|
|
37
|
+
/** Convenience method to create a new buffered parquet writer that writes to the specified file. */
|
|
38
|
+
static async openFile(schemaLike, sourcePath, opts) {
|
|
39
|
+
if (typeof sourcePath === "string") {
|
|
40
|
+
await fs.mkdir(path.dirname(sourcePath), { recursive: true });
|
|
41
|
+
}
|
|
42
|
+
const outputStream = await osopen(sourcePath, opts);
|
|
43
|
+
return ParquetWriter.openStream(schemaLike, outputStream, opts);
|
|
44
|
+
}
|
|
45
|
+
// @note This fixes invalid Markdown in the base class JSDoc.
|
|
46
|
+
/** Set a metadata key-value pair on the writer. */
|
|
47
|
+
setMetadata(key, value) {
|
|
48
|
+
return super.setMetadata(key, value);
|
|
49
|
+
}
|
|
50
|
+
/** Append a row to the buffer. If the buffer is full, the data will be written to disk. */
|
|
51
|
+
async appendRow(row) {
|
|
52
|
+
return super.appendRow(row);
|
|
53
|
+
}
|
|
54
|
+
/** Flush all buffered data to disk, close the file, and release resources. */
|
|
55
|
+
async close() {
|
|
56
|
+
await this.#flushing;
|
|
57
|
+
if (this.closed)
|
|
58
|
+
return;
|
|
59
|
+
const { promise, resolve, reject } = Promise.withResolvers();
|
|
60
|
+
super.close().then(resolve, reject);
|
|
61
|
+
this.#flushing = promise;
|
|
62
|
+
return this.#flushing;
|
|
63
|
+
}
|
|
64
|
+
async [Symbol.asyncDispose]() {
|
|
65
|
+
return this.close();
|
|
66
|
+
}
|
|
67
|
+
async dispose() {
|
|
68
|
+
return this[Symbol.asyncDispose]();
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=writer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"writer.js","sourceRoot":"","sources":["../../../src/parquet-wrapper/writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,aAAa,IAAI,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,MAAM,EAA2B,MAAM,kCAAkC,CAAA;AAClF,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAA;AAC1E,OAAO,KAAK,EAAE,MAAM,kBAAkB,CAAA;AACtC,OAAO,KAAK,IAAI,MAAM,WAAW,CAAA;AACjC,OAAO,EAEN,aAAa,EAEb,4BAA4B,GAC5B,MAAM,aAAa,CAAA;AAEpB,gEAAgE;AAChE,MAAM,OAAO,aAA2C,SAAQ,iBAAiB;IAEtE,MAAM,CAAU,qBAAqB,GAAG,IAAI,4BAA4B,EAAE,CAAA;IACpF,SAAS,GAAkB,OAAO,CAAC,OAAO,EAAE,CAAA;IAE5C,MAAM,CAAU,KAAK,CAAC,UAAU,CAC/B,UAAyD,EACzD,YAAgC,EAChC,OAAsB,EAAE;QAExB,MAAM,MAAM,GACX,UAAU,YAAY,aAAa;YAClC,CAAC,CAAC,UAAU;YACZ,CAAC,CAAC,aAAa,CAAC,qBAAqB,CAAC,kBAAkB,CAAC,UAAU,CAAC,CAAA;QAEtE,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,UAAU,CAAC,MAAM,EAAE,YAAY,EAAE,IAAI,CAAC,CAAA;QAEzF,OAAO,IAAI,aAAa,CAAI,MAAM,EAAE,cAAc,EAAE,IAAI,CAAC,CAAA;IAC1D,CAAC;IAED,oGAAoG;IACpG,MAAM,CAAU,KAAK,CAAC,QAAQ,CAC7B,UAAyD,EACzD,UAAiC,EACjC,IAAoB;QAEpB,IAAI,OAAO,UAAU,KAAK,QAAQ,EAAE,CAAC;YACpC,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QAC9D,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,CAAA;QACnD,OAAO,aAAa,CAAC,UAAU,CAAI,UAAU,EAAE,YAAY,EAAE,IAAI,CAAC,CAAA;IACnE,CAAC;IAED,6DAA6D;IAC7D,mDAAmD;IACnC,WAAW,CAAC,GAAW,EAAE,KAAa;QACrD,OAAO,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;IACrC,CAAC;IAED,2FAA2F;IAC3E,KAAK,CAAC,SAAS,CAAC,GAAM;QACrC,OAAO,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAA;IAC5B,CAAC;IAED,8EAA8E;IAC9D,KAAK,CAAC,KAAK;QAC1B,MAAM,IAAI,CAAC,SAAS,CAAA;QACpB,IAAI,IAAI,CAAC,MAAM;YAAE,OAAM;QAEvB,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,aAAa,EAAQ,CAAA;QAElE,KAAK,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;QAEnC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAA;QAExB,OAAO,IAAI,CAAC,SAAS,CAAA;IACtB,CAAC;IAEM,KAAK,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;QACjC,OAAO,IAAI,CAAC,KAAK,EAAE,CAAA;IACpB,CAAC;IAEM,KAAK,CAAC,OAAO;QACnB,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAA;IACnC,CAAC"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Final output sharder for the corpus pipeline.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1 (#9) shipped JSONL shards + a Python (PyArrow) converter as the path to binary Parquet —
|
|
9
|
+
* bridging until the JS toolchain caught up. Phase 1.5 (#18 §4) replaced that with a native JS
|
|
10
|
+
* writer based on the salvaged `@dsnp/parquetjs` wrapper from isp-nexus (now in
|
|
11
|
+
* `./parquet-wrapper/`). The build pipeline no longer touches Python at all in its hot path; the
|
|
12
|
+
* only remaining Python is the one-shot `train_tokenizer.py` SentencePiece step.
|
|
13
|
+
*
|
|
14
|
+
* Compression: `SNAPPY`. The plan in #18 §4 specified `zstd`, but `@dsnp/parquetjs` 1.7.0 only
|
|
15
|
+
* supports UNCOMPRESSED / GZIP / SNAPPY / BROTLI (see `node_modules/@dsnp/parquetjs/dist/lib/
|
|
16
|
+
* compression.js`). SNAPPY is the standard ML-corpus default (PyArrow's default too) and is the
|
|
17
|
+
* closest substitute on speed; revisit if @dsnp/parquetjs gains zstd support. Documented in
|
|
18
|
+
* `DECISIONS.md`.
|
|
19
|
+
*
|
|
20
|
+
* Layout under `<outputDir>`:
|
|
21
|
+
*
|
|
22
|
+
* ```
|
|
23
|
+
* corpus-v<version>/
|
|
24
|
+
* MANIFEST.json
|
|
25
|
+
* train/
|
|
26
|
+
* part-0000.parquet
|
|
27
|
+
* part-0001.parquet
|
|
28
|
+
* ...
|
|
29
|
+
* val/
|
|
30
|
+
* part-0000.parquet
|
|
31
|
+
* test/
|
|
32
|
+
* part-0000.parquet
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* Each shard caps at `rowsPerShard` (default 1_000_000); within a shard, parquetjs flushes row
|
|
36
|
+
* groups every `ROW_GROUP_SIZE` (50_000) rows per the issue spec. The MANIFEST captures every
|
|
37
|
+
* shard's path, row count, byte size, and SHA-256 (computed by re-reading the shard once after
|
|
38
|
+
* close — cheap relative to writing it).
|
|
39
|
+
*/
|
|
40
|
+
import { type ParquetSchemaDefinition } from "./parquet-wrapper/index.js";
|
|
41
|
+
import type { SplitName } from "./split.js";
|
|
42
|
+
import type { LabeledRow } from "./types.js";
|
|
43
|
+
/** Row groups flush at this many rows (parquetjs internal cadence within a shard). */
|
|
44
|
+
export declare const ROW_GROUP_SIZE = 50000;
|
|
45
|
+
/** Snappy is the only zstd-equivalent codec available in @dsnp/parquetjs 1.7.0. */
|
|
46
|
+
export declare const SHARD_COMPRESSION: "SNAPPY";
|
|
47
|
+
/**
|
|
48
|
+
* A single Parquet-style row shape. The `[key: string]: unknown` index signature is required for
|
|
49
|
+
* compatibility with `ParquetRecordLike` in the wrapper — parquetjs accepts any string key on
|
|
50
|
+
* rows.
|
|
51
|
+
*/
|
|
52
|
+
export interface ParquetRow {
|
|
53
|
+
raw: string;
|
|
54
|
+
tokens: readonly string[];
|
|
55
|
+
labels: readonly string[];
|
|
56
|
+
country: string;
|
|
57
|
+
locale: string | null;
|
|
58
|
+
source: string;
|
|
59
|
+
source_id: string;
|
|
60
|
+
corpus_version: string;
|
|
61
|
+
license: string;
|
|
62
|
+
synth_method: string | null;
|
|
63
|
+
synth_base_id: string | null;
|
|
64
|
+
[key: string]: unknown;
|
|
65
|
+
}
|
|
66
|
+
/** Column names emitted into every shard. Matches `ParquetRow`. */
|
|
67
|
+
export declare const PARQUET_COLUMNS: readonly ["raw", "tokens", "labels", "country", "locale", "source", "source_id", "corpus_version", "license", "synth_method", "synth_base_id"];
|
|
68
|
+
/**
|
|
69
|
+
* Parquet schema for `LabeledRow` per #18 §4. Optional fields use `optional: true`; repeated UTF8
|
|
70
|
+
* columns capture tokens/labels arrays. Compression is per-column SNAPPY.
|
|
71
|
+
*/
|
|
72
|
+
export declare const LABELED_ROW_SCHEMA: ParquetSchemaDefinition<ParquetRow>;
|
|
73
|
+
/** Per-shard metadata captured in `MANIFEST.json`. */
|
|
74
|
+
export interface ShardDescriptor {
|
|
75
|
+
split: SplitName;
|
|
76
|
+
path: string;
|
|
77
|
+
format: "parquet";
|
|
78
|
+
compression: typeof SHARD_COMPRESSION;
|
|
79
|
+
rows: number;
|
|
80
|
+
bytes: number;
|
|
81
|
+
sha256: string;
|
|
82
|
+
first_source_id: string;
|
|
83
|
+
last_source_id: string;
|
|
84
|
+
}
|
|
85
|
+
export interface ShardManifest {
|
|
86
|
+
corpus_version: string;
|
|
87
|
+
schema: readonly string[];
|
|
88
|
+
rows_per_shard: number;
|
|
89
|
+
row_group_size: number;
|
|
90
|
+
shards: ShardDescriptor[];
|
|
91
|
+
counts: Record<SplitName, number>;
|
|
92
|
+
total_rows: number;
|
|
93
|
+
}
|
|
94
|
+
export interface WriteShardsOptions {
|
|
95
|
+
/** Root output directory; corpus version dir is created beneath. */
|
|
96
|
+
outputDir: string;
|
|
97
|
+
/** Corpus version stamped onto rows + into the output directory name. */
|
|
98
|
+
corpusVersion: string;
|
|
99
|
+
/** Max rows per `.parquet` shard. Default 1_000_000 per the Phase 1 plan. */
|
|
100
|
+
rowsPerShard?: number;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Pre-partitioned labeled-row streams, one per split. Callers (`buildCorpus`) decide each row's
|
|
104
|
+
* split inline at align time via `splitForRow` and route rows to the matching stream, eliminating
|
|
105
|
+
* the prior `Map<source_id, SplitName>` O(n) lookup table.
|
|
106
|
+
*
|
|
107
|
+
* Splits with no rows can be omitted (or passed as an empty iterable); `writeShards` skips them.
|
|
108
|
+
*/
|
|
109
|
+
export type PerSplitRows = Partial<Record<SplitName, AsyncIterable<LabeledRow>>>;
|
|
110
|
+
/** Project a labeled row to the Parquet schema. */
|
|
111
|
+
export declare function rowToParquet(row: LabeledRow): ParquetRow;
|
|
112
|
+
/**
|
|
113
|
+
* Stream labeled rows into `.parquet` shards, one set of shards per split. Splits are processed
|
|
114
|
+
* sequentially so that only one shard writer is open at a time — memory cost is bounded by the
|
|
115
|
+
* parquetjs row-group buffer (~`ROW_GROUP_SIZE × row_size`), not by the labeled-row count.
|
|
116
|
+
*
|
|
117
|
+
* Callers pass per-split `AsyncIterable<LabeledRow>` (`PerSplitRows`); the prior
|
|
118
|
+
* `splitFor(sourceId)` callback is gone because pre-partitioning at the caller eliminates the O(n)
|
|
119
|
+
* `Map<source_id, SplitName>` it required. See `buildCorpus` for the new wire-up.
|
|
120
|
+
*/
|
|
121
|
+
export declare function writeShards(perSplit: PerSplitRows, opts: WriteShardsOptions): Promise<ShardManifest>;
|
|
122
|
+
//# sourceMappingURL=parquet.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAMH,OAAO,EAAiB,KAAK,uBAAuB,EAAE,MAAM,4BAA4B,CAAA;AACxF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE5C,sFAAsF;AACtF,eAAO,MAAM,cAAc,QAAS,CAAA;AAEpC,mFAAmF;AACnF,eAAO,MAAM,iBAAiB,EAAG,QAAiB,CAAA;AAElD;;;;GAIG;AACH,MAAM,WAAW,UAAU;IAC1B,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,OAAO,EAAE,MAAM,CAAA;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;IAC5B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB;AAED,mEAAmE;AACnE,eAAO,MAAM,eAAe,gJAYlB,CAAA;AAEV;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,uBAAuB,CAAC,UAAU,CAYlE,CAAA;AAED,sDAAsD;AACtD,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,SAAS,CAAA;IAChB,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,SAAS,CAAA;IACjB,WAAW,EAAE,OAAO,iBAAiB,CAAA;IACrC,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,eAAe,EAAE,MAAM,CAAA;IACvB,cAAc,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,aAAa;IAC7B,cAAc,EAAE,MAAM,CAAA;IACtB,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;IACzB,cAAc,EAAE,MAAM,CAAA;IACtB,cAAc,EAAE,MAAM,CAAA;IACtB,MAAM,EAAE,eAAe,EAAE,CAAA;IACzB,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACjC,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,kBAAkB;IAClC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IAEjB,yEAAyE;IACzE,aAAa,EAAE,MAAM,CAAA;IAErB,6EAA6E;IAC7E,YAAY,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,SAAS,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;AAEhF,mDAAmD;AACnD,wBAAgB,YAAY,CAAC,GAAG,EAAE,UAAU,GAAG,UAAU,CAcxD;AAwBD;;;;;;;;GAQG;AACH,wBAAsB,WAAW,CAAC,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,aAAa,CAAC,CAwF1G"}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Final output sharder for the corpus pipeline.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1 (#9) shipped JSONL shards + a Python (PyArrow) converter as the path to binary Parquet —
|
|
9
|
+
* bridging until the JS toolchain caught up. Phase 1.5 (#18 §4) replaced that with a native JS
|
|
10
|
+
* writer based on the salvaged `@dsnp/parquetjs` wrapper from isp-nexus (now in
|
|
11
|
+
* `./parquet-wrapper/`). The build pipeline no longer touches Python at all in its hot path; the
|
|
12
|
+
* only remaining Python is the one-shot `train_tokenizer.py` SentencePiece step.
|
|
13
|
+
*
|
|
14
|
+
* Compression: `SNAPPY`. The plan in #18 §4 specified `zstd`, but `@dsnp/parquetjs` 1.7.0 only
|
|
15
|
+
* supports UNCOMPRESSED / GZIP / SNAPPY / BROTLI (see `node_modules/@dsnp/parquetjs/dist/lib/
|
|
16
|
+
* compression.js`). SNAPPY is the standard ML-corpus default (PyArrow's default too) and is the
|
|
17
|
+
* closest substitute on speed; revisit if @dsnp/parquetjs gains zstd support. Documented in
|
|
18
|
+
* `DECISIONS.md`.
|
|
19
|
+
*
|
|
20
|
+
* Layout under `<outputDir>`:
|
|
21
|
+
*
|
|
22
|
+
* ```
|
|
23
|
+
* corpus-v<version>/
|
|
24
|
+
* MANIFEST.json
|
|
25
|
+
* train/
|
|
26
|
+
* part-0000.parquet
|
|
27
|
+
* part-0001.parquet
|
|
28
|
+
* ...
|
|
29
|
+
* val/
|
|
30
|
+
* part-0000.parquet
|
|
31
|
+
* test/
|
|
32
|
+
* part-0000.parquet
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* Each shard caps at `rowsPerShard` (default 1_000_000); within a shard, parquetjs flushes row
|
|
36
|
+
* groups every `ROW_GROUP_SIZE` (50_000) rows per the issue spec. The MANIFEST captures every
|
|
37
|
+
* shard's path, row count, byte size, and SHA-256 (computed by re-reading the shard once after
|
|
38
|
+
* close — cheap relative to writing it).
|
|
39
|
+
*/
|
|
40
|
+
import { createHash } from "node:crypto";
|
|
41
|
+
import { createReadStream } from "node:fs";
|
|
42
|
+
import { mkdir, stat, writeFile } from "node:fs/promises";
|
|
43
|
+
import { join } from "node:path";
|
|
44
|
+
import { ParquetWriter } from "./parquet-wrapper/index.js";
|
|
45
|
+
/** Row groups flush at this many rows (parquetjs internal cadence within a shard). */
|
|
46
|
+
export const ROW_GROUP_SIZE = 50_000;
|
|
47
|
+
/** Snappy is the only zstd-equivalent codec available in @dsnp/parquetjs 1.7.0. */
|
|
48
|
+
export const SHARD_COMPRESSION = "SNAPPY";
|
|
49
|
+
/** Column names emitted into every shard. Matches `ParquetRow`. */
|
|
50
|
+
export const PARQUET_COLUMNS = [
|
|
51
|
+
"raw",
|
|
52
|
+
"tokens",
|
|
53
|
+
"labels",
|
|
54
|
+
"country",
|
|
55
|
+
"locale",
|
|
56
|
+
"source",
|
|
57
|
+
"source_id",
|
|
58
|
+
"corpus_version",
|
|
59
|
+
"license",
|
|
60
|
+
"synth_method",
|
|
61
|
+
"synth_base_id",
|
|
62
|
+
];
|
|
63
|
+
/**
|
|
64
|
+
* Parquet schema for `LabeledRow` per #18 §4. Optional fields use `optional: true`; repeated UTF8
|
|
65
|
+
* columns capture tokens/labels arrays. Compression is per-column SNAPPY.
|
|
66
|
+
*/
|
|
67
|
+
export const LABELED_ROW_SCHEMA = {
|
|
68
|
+
raw: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
69
|
+
tokens: { type: "UTF8", repeated: true, compression: SHARD_COMPRESSION },
|
|
70
|
+
labels: { type: "UTF8", repeated: true, compression: SHARD_COMPRESSION },
|
|
71
|
+
country: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
72
|
+
locale: { type: "UTF8", compression: SHARD_COMPRESSION, optional: true },
|
|
73
|
+
source: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
74
|
+
source_id: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
75
|
+
corpus_version: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
76
|
+
license: { type: "UTF8", compression: SHARD_COMPRESSION },
|
|
77
|
+
synth_method: { type: "UTF8", compression: SHARD_COMPRESSION, optional: true },
|
|
78
|
+
synth_base_id: { type: "UTF8", compression: SHARD_COMPRESSION, optional: true },
|
|
79
|
+
};
|
|
80
|
+
/** Project a labeled row to the Parquet schema. */
|
|
81
|
+
export function rowToParquet(row) {
|
|
82
|
+
return {
|
|
83
|
+
raw: row.raw,
|
|
84
|
+
tokens: row.tokens,
|
|
85
|
+
labels: row.labels,
|
|
86
|
+
country: row.country,
|
|
87
|
+
locale: row.locale ?? null,
|
|
88
|
+
source: row.source,
|
|
89
|
+
source_id: row.source_id,
|
|
90
|
+
corpus_version: row.corpus_version,
|
|
91
|
+
license: row.license,
|
|
92
|
+
synth_method: row.synth?.method ?? null,
|
|
93
|
+
synth_base_id: row.synth?.base_source_id ?? null,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Project a `ParquetRow` for `appendRow`. parquetjs treats `null` as "skip" for `optional` columns;
|
|
98
|
+
* passing it explicitly is fine, but cleaner to omit so the on-disk Definition Levels match what
|
|
99
|
+
* PyArrow / DuckDB / etc. produce for the same logical row.
|
|
100
|
+
*/
|
|
101
|
+
function appendShape(row) {
|
|
102
|
+
const out = {
|
|
103
|
+
raw: row.raw,
|
|
104
|
+
tokens: row.tokens,
|
|
105
|
+
labels: row.labels,
|
|
106
|
+
country: row.country,
|
|
107
|
+
source: row.source,
|
|
108
|
+
source_id: row.source_id,
|
|
109
|
+
corpus_version: row.corpus_version,
|
|
110
|
+
license: row.license,
|
|
111
|
+
};
|
|
112
|
+
if (row.locale !== null)
|
|
113
|
+
out.locale = row.locale;
|
|
114
|
+
if (row.synth_method !== null)
|
|
115
|
+
out.synth_method = row.synth_method;
|
|
116
|
+
if (row.synth_base_id !== null)
|
|
117
|
+
out.synth_base_id = row.synth_base_id;
|
|
118
|
+
return out;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Stream labeled rows into `.parquet` shards, one set of shards per split. Splits are processed
|
|
122
|
+
* sequentially so that only one shard writer is open at a time — memory cost is bounded by the
|
|
123
|
+
* parquetjs row-group buffer (~`ROW_GROUP_SIZE × row_size`), not by the labeled-row count.
|
|
124
|
+
*
|
|
125
|
+
* Callers pass per-split `AsyncIterable<LabeledRow>` (`PerSplitRows`); the prior
|
|
126
|
+
* `splitFor(sourceId)` callback is gone because pre-partitioning at the caller eliminates the O(n)
|
|
127
|
+
* `Map<source_id, SplitName>` it required. See `buildCorpus` for the new wire-up.
|
|
128
|
+
*/
|
|
129
|
+
export async function writeShards(perSplit, opts) {
|
|
130
|
+
const rowsPerShard = opts.rowsPerShard ?? 1_000_000;
|
|
131
|
+
const corpusDir = join(opts.outputDir, `corpus-v${opts.corpusVersion}`);
|
|
132
|
+
await mkdir(corpusDir, { recursive: true });
|
|
133
|
+
const shards = [];
|
|
134
|
+
const counts = { train: 0, val: 0, test: 0 };
|
|
135
|
+
let totalRows = 0;
|
|
136
|
+
for (const split of ["train", "val", "test"]) {
|
|
137
|
+
const rows = perSplit[split];
|
|
138
|
+
if (!rows)
|
|
139
|
+
continue;
|
|
140
|
+
let shardIndex = 0;
|
|
141
|
+
let writer = null;
|
|
142
|
+
let path = "";
|
|
143
|
+
let shardRows = 0;
|
|
144
|
+
let firstSourceId = "";
|
|
145
|
+
let lastSourceId = "";
|
|
146
|
+
const openShard = async () => {
|
|
147
|
+
const splitDir = join(corpusDir, split);
|
|
148
|
+
await mkdir(splitDir, { recursive: true });
|
|
149
|
+
path = join(splitDir, `part-${String(shardIndex).padStart(4, "0")}.parquet`);
|
|
150
|
+
writer = await ParquetWriter.openFile(LABELED_ROW_SCHEMA, path, {
|
|
151
|
+
rowGroupSize: ROW_GROUP_SIZE,
|
|
152
|
+
});
|
|
153
|
+
writer.setMetadata("mailwoman.corpus_version", opts.corpusVersion);
|
|
154
|
+
writer.setMetadata("mailwoman.split", split);
|
|
155
|
+
writer.setMetadata("mailwoman.shard_index", String(shardIndex));
|
|
156
|
+
shardRows = 0;
|
|
157
|
+
firstSourceId = "";
|
|
158
|
+
lastSourceId = "";
|
|
159
|
+
};
|
|
160
|
+
const closeShard = async () => {
|
|
161
|
+
if (!writer)
|
|
162
|
+
return;
|
|
163
|
+
await writer.close();
|
|
164
|
+
if (shardRows > 0) {
|
|
165
|
+
const fileStat = await stat(path);
|
|
166
|
+
const sha256 = await hashFile(path);
|
|
167
|
+
shards.push({
|
|
168
|
+
split,
|
|
169
|
+
path,
|
|
170
|
+
format: "parquet",
|
|
171
|
+
compression: SHARD_COMPRESSION,
|
|
172
|
+
rows: shardRows,
|
|
173
|
+
bytes: fileStat.size,
|
|
174
|
+
sha256,
|
|
175
|
+
first_source_id: firstSourceId,
|
|
176
|
+
last_source_id: lastSourceId,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
writer = null;
|
|
180
|
+
};
|
|
181
|
+
for await (const row of rows) {
|
|
182
|
+
if (!writer)
|
|
183
|
+
await openShard();
|
|
184
|
+
const pq = rowToParquet(row);
|
|
185
|
+
await writer.appendRow(appendShape(pq));
|
|
186
|
+
if (shardRows === 0)
|
|
187
|
+
firstSourceId = row.source_id;
|
|
188
|
+
lastSourceId = row.source_id;
|
|
189
|
+
shardRows++;
|
|
190
|
+
counts[split]++;
|
|
191
|
+
totalRows++;
|
|
192
|
+
if (shardRows >= rowsPerShard) {
|
|
193
|
+
await closeShard();
|
|
194
|
+
shardIndex++;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
await closeShard();
|
|
198
|
+
}
|
|
199
|
+
shards.sort((a, b) => (a.split === b.split ? a.path.localeCompare(b.path) : a.split.localeCompare(b.split)));
|
|
200
|
+
const manifest = {
|
|
201
|
+
corpus_version: opts.corpusVersion,
|
|
202
|
+
schema: PARQUET_COLUMNS,
|
|
203
|
+
rows_per_shard: rowsPerShard,
|
|
204
|
+
row_group_size: ROW_GROUP_SIZE,
|
|
205
|
+
shards,
|
|
206
|
+
counts,
|
|
207
|
+
total_rows: totalRows,
|
|
208
|
+
};
|
|
209
|
+
await writeFile(join(corpusDir, "MANIFEST.json"), `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
|
|
210
|
+
return manifest;
|
|
211
|
+
}
|
|
212
|
+
/** Single-pass SHA-256 over the file at `path`. Cheap relative to Parquet write throughput. */
|
|
213
|
+
async function hashFile(path) {
|
|
214
|
+
const hash = createHash("sha256");
|
|
215
|
+
const stream = createReadStream(path);
|
|
216
|
+
for await (const chunk of stream)
|
|
217
|
+
hash.update(chunk);
|
|
218
|
+
return hash.digest("hex");
|
|
219
|
+
}
|
|
220
|
+
//# sourceMappingURL=parquet.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parquet.js","sourceRoot":"","sources":["../../src/parquet.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACzD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,aAAa,EAAgC,MAAM,4BAA4B,CAAA;AAIxF,sFAAsF;AACtF,MAAM,CAAC,MAAM,cAAc,GAAG,MAAM,CAAA;AAEpC,mFAAmF;AACnF,MAAM,CAAC,MAAM,iBAAiB,GAAG,QAAiB,CAAA;AAsBlD,mEAAmE;AACnE,MAAM,CAAC,MAAM,eAAe,GAAG;IAC9B,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,QAAQ;IACR,WAAW;IACX,gBAAgB;IAChB,SAAS;IACT,cAAc;IACd,eAAe;CACN,CAAA;AAEV;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAwC;IACtE,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACrD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxE,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACzD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;IACxE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACxD,SAAS,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAC3D,cAAc,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IAChE,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE;IACzD,YAAY,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC9E,aAAa,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,IAAI,EAAE;CAC/E,CAAA;AA6CD,mDAAmD;AACnD,MAAM,UAAU,YAAY,CAAC,GAAe;IAC3C,OAAO;QACN,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,IAAI;QAC1B,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,YAAY,EAAE,GAAG,CAAC,KAAK,EAAE,MAAM,IAAI,IAAI;QACvC,aAAa,EAAE,GAAG,CAAC,KAAK,EAAE,cAAc,IAAI,IAAI;KAChD,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAe;IACnC,MAAM,GAAG,GAA4B;QACpC,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,OAAO,EAAE,GAAG,CAAC,OAAO;KACpB,CAAA;IACD,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI;QAAE,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAA;IAChD,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI;QAAE,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,YAAY,CAAA;IAClE,IAAI,GAAG,CAAC,aAAa,KAAK,IAAI;QAAE,GAAG,CAAC,aAAa,GAAG,GAAG,CAAC,aAAa,CAAA;IACrE,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAsB,EAAE,IAAwB;IACjF,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,SAAS,CAAA;IACnD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;IACvE,MAAM,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE3C,MAAM,MAAM,GAAsB,EAAE,CAAA;IACpC,MAAM,MAAM,GAA8B,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAA;IACvE,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,KAAK,MAAM,KAAK,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAU,EAAE,CAAC;QACvD,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAA;QAC5B,IAAI,CAAC,IAAI;YAAE,SAAQ;QAEnB,IAAI,UAAU,GAAG,CAAC,CAAA;QAClB,IAAI,MAAM,GAAqC,IAAI,CAAA;QACnD,IAAI,IAAI,GAAG,EAAE,CAAA;QACb,IAAI,SAAS,GAAG,CAAC,CAAA;QACjB,IAAI,aAAa,GAAG,EAAE,CAAA;QACtB,IAAI,YAAY,GAAG,EAAE,CAAA;QAErB,MAAM,SAAS,GAAG,KAAK,IAAmB,EAAE;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;YACvC,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,MAAM,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,UAAU,CAAC,CAAA;YAC5E,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,CAAa,kBAAkB,EAAE,IAAI,EAAE;gBAC3E,YAAY,EAAE,cAAc;aAC5B,CAAC,CAAA;YACF,MAAM,CAAC,WAAW,CAAC,0BAA0B,EAAE,IAAI,CAAC,aAAa,CAAC,CAAA;YAClE,MAAM,CAAC,WAAW,CAAC,iBAAiB,EAAE,KAAK,CAAC,CAAA;YAC5C,MAAM,CAAC,WAAW,CAAC,uBAAuB,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAA;YAC/D,SAAS,GAAG,CAAC,CAAA;YACb,aAAa,GAAG,EAAE,CAAA;YAClB,YAAY,GAAG,EAAE,CAAA;QAClB,CAAC,CAAA;QAED,MAAM,UAAU,GAAG,KAAK,IAAmB,EAAE;YAC5C,IAAI,CAAC,MAAM;gBAAE,OAAM;YACnB,MAAM,MAAM,CAAC,KAAK,EAAE,CAAA;YACpB,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACnB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAA;gBACnC,MAAM,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,IAAI;oBACJ,MAAM,EAAE,SAAS;oBACjB,WAAW,EAAE,iBAAiB;oBAC9B,IAAI,EAAE,SAAS;oBACf,KAAK,EAAE,QAAQ,CAAC,IAAI;oBACpB,MAAM;oBACN,eAAe,EAAE,aAAa;oBAC9B,cAAc,EAAE,YAAY;iBAC5B,CAAC,CAAA;YACH,CAAC;YACD,MAAM,GAAG,IAAI,CAAA;QACd,CAAC,CAAA;QAED,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,MAAM;gBAAE,MAAM,SAAS,EAAE,CAAA;YAC9B,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;YAC5B,MAAM,MAAO,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,CAA0B,CAAC,CAAA;YACjE,IAAI,SAAS,KAAK,CAAC;gBAAE,aAAa,GAAG,GAAG,CAAC,SAAS,CAAA;YAClD,YAAY,GAAG,GAAG,CAAC,SAAS,CAAA;YAC5B,SAAS,EAAE,CAAA;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,CAAA;YACf,SAAS,EAAE,CAAA;YAEX,IAAI,SAAS,IAAI,YAAY,EAAE,CAAC;gBAC/B,MAAM,UAAU,EAAE,CAAA;gBAClB,UAAU,EAAE,CAAA;YACb,CAAC;QACF,CAAC;QAED,MAAM,UAAU,EAAE,CAAA;IACnB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;IAE5G,MAAM,QAAQ,GAAkB;QAC/B,cAAc,EAAE,IAAI,CAAC,aAAa;QAClC,MAAM,EAAE,eAAe;QACvB,cAAc,EAAE,YAAY;QAC5B,cAAc,EAAE,cAAc;QAC9B,MAAM;QACN,MAAM;QACN,UAAU,EAAE,SAAS;KACrB,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACnG,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED,+FAA+F;AAC/F,KAAK,UAAU,QAAQ,CAAC,IAAY;IACnC,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAA;IACjC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAA;IACrC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM;QAAE,IAAI,CAAC,MAAM,CAAC,KAAe,CAAC,CAAA;IAC9D,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;AAC1B,CAAC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Adapter runner — drives a `CorpusAdapter` to completion and writes intermediate JSONL + a
|
|
7
|
+
* per-shard manifest.
|
|
8
|
+
*
|
|
9
|
+
* Output layout under `outputDir`:
|
|
10
|
+
*
|
|
11
|
+
* ```
|
|
12
|
+
* <outputDir>/<adapter.id>/
|
|
13
|
+
* canonical.jsonl # one row per line, in emission order
|
|
14
|
+
* MANIFEST.json # adapter id, version, row count, sha256, license, started_at, ended_at
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* The runner is responsible for everything an adapter is **not** responsible for:
|
|
18
|
+
*
|
|
19
|
+
* - Stamping `corpus_version` on every row (adapters must NOT set it).
|
|
20
|
+
* - Applying `canonicalDedupKey` and skipping duplicates.
|
|
21
|
+
* - Streaming sha256 over JSONL bytes so the manifest checksum doesn't require a re-read.
|
|
22
|
+
* - Honoring backpressure on the output write stream.
|
|
23
|
+
* - Counting + emitting periodic progress to an optional callback.
|
|
24
|
+
* - Honoring `signal` (delegates to adapter's iteration boundary).
|
|
25
|
+
*
|
|
26
|
+
* The runner does NOT perform alignment, tokenization, synthesis, or sharding into Parquet. Those
|
|
27
|
+
* steps run later, consuming the JSONL shards this writes.
|
|
28
|
+
*/
|
|
29
|
+
import { type AdapterRegistry } from "./adapter.js";
|
|
30
|
+
import type { AdapterOptions, CorpusAdapter } from "./types.js";
|
|
31
|
+
/** Snapshot of the runner's state, emitted on every progress tick. */
|
|
32
|
+
export interface RunnerProgress {
|
|
33
|
+
/** Adapter being driven. */
|
|
34
|
+
adapterId: string;
|
|
35
|
+
/** Total rows the adapter has yielded (before dedup). */
|
|
36
|
+
yielded: number;
|
|
37
|
+
/** Rows actually written to JSONL (after dedup). */
|
|
38
|
+
written: number;
|
|
39
|
+
/** Bytes written to JSONL so far. */
|
|
40
|
+
bytes: number;
|
|
41
|
+
/** Wall-clock milliseconds since the run started. */
|
|
42
|
+
elapsed_ms: number;
|
|
43
|
+
}
|
|
44
|
+
/** Per-invocation options for `runAdapter`. */
|
|
45
|
+
export interface RunAdapterOptions {
|
|
46
|
+
/** Adapter to drive. */
|
|
47
|
+
adapter: CorpusAdapter;
|
|
48
|
+
/** Options handed to the adapter (input path, country filter, limit, signal). */
|
|
49
|
+
adapterOptions: AdapterOptions;
|
|
50
|
+
/** Root output directory; the runner creates `<outputDir>/<adapter.id>/` under it. */
|
|
51
|
+
outputDir: string;
|
|
52
|
+
/** Corpus version stamped onto every row. Locked together with the tokenizer version. */
|
|
53
|
+
corpusVersion: string;
|
|
54
|
+
/**
|
|
55
|
+
* Optional progress callback. Invoked every `progressEvery` rows yielded (default 1000) and once
|
|
56
|
+
* at the end of the run. Errors thrown from this callback abort the run.
|
|
57
|
+
*/
|
|
58
|
+
onProgress?: (snapshot: RunnerProgress) => void;
|
|
59
|
+
/**
|
|
60
|
+
* Yielded-row interval at which `onProgress` fires. Defaults to 1000. The terminal tick is always
|
|
61
|
+
* emitted regardless of this value.
|
|
62
|
+
*/
|
|
63
|
+
progressEvery?: number;
|
|
64
|
+
}
|
|
65
|
+
/** Return value of `runAdapter`: the same shape as `MANIFEST.json` on disk. */
|
|
66
|
+
export interface AdapterRunManifest {
|
|
67
|
+
adapter_id: string;
|
|
68
|
+
corpus_version: string;
|
|
69
|
+
default_license: string;
|
|
70
|
+
description: string;
|
|
71
|
+
yielded: number;
|
|
72
|
+
written: number;
|
|
73
|
+
deduped: number;
|
|
74
|
+
bytes: number;
|
|
75
|
+
sha256: string;
|
|
76
|
+
jsonl_path: string;
|
|
77
|
+
started_at: string;
|
|
78
|
+
ended_at: string;
|
|
79
|
+
elapsed_ms: number;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Drive a single adapter to completion.
|
|
83
|
+
*
|
|
84
|
+
* Returns the manifest describing the run. Writes `canonical.jsonl` + `MANIFEST.json` under
|
|
85
|
+
* `outputDir/<adapter.id>/`. Throws if the output directory cannot be created, if a row arrives
|
|
86
|
+
* with a missing required field, or if the abort signal fires.
|
|
87
|
+
*/
|
|
88
|
+
export declare function runAdapter(opts: RunAdapterOptions): Promise<AdapterRunManifest>;
|
|
89
|
+
/**
|
|
90
|
+
* Drive every adapter in a registry sequentially. Stops on the first failure (caller can filter the
|
|
91
|
+
* registry before calling if partial-failure is desired).
|
|
92
|
+
*
|
|
93
|
+
* Returns the manifests in registry insertion order.
|
|
94
|
+
*/
|
|
95
|
+
export declare function runAllAdapters(registry: AdapterRegistry, common: Omit<RunAdapterOptions, "adapter"> & {
|
|
96
|
+
adapterOptionsFor?: (a: CorpusAdapter) => AdapterOptions;
|
|
97
|
+
}): Promise<AdapterRunManifest[]>;
|
|
98
|
+
/** Convenience: ensure the parent directory of `filePath` exists. */
|
|
99
|
+
export declare function ensureParentDir(filePath: string): Promise<void>;
|
|
100
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAKH,OAAO,EAAsC,KAAK,eAAe,EAAwB,MAAM,cAAc,CAAA;AAC7G,OAAO,KAAK,EAAE,cAAc,EAAgB,aAAa,EAAE,MAAM,YAAY,CAAA;AAE7E,sEAAsE;AACtE,MAAM,WAAW,cAAc;IAC9B,4BAA4B;IAC5B,SAAS,EAAE,MAAM,CAAA;IAEjB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAA;IAEf,oDAAoD;IACpD,OAAO,EAAE,MAAM,CAAA;IAEf,qCAAqC;IACrC,KAAK,EAAE,MAAM,CAAA;IAEb,qDAAqD;IACrD,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,+CAA+C;AAC/C,MAAM,WAAW,iBAAiB;IACjC,wBAAwB;IACxB,OAAO,EAAE,aAAa,CAAA;IAEtB,iFAAiF;IACjF,cAAc,EAAE,cAAc,CAAA;IAE9B,sFAAsF;IACtF,SAAS,EAAE,MAAM,CAAA;IAEjB,yFAAyF;IACzF,aAAa,EAAE,MAAM,CAAA;IAErB;;;OAGG;IACH,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,cAAc,KAAK,IAAI,CAAA;IAE/C;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,+EAA+E;AAC/E,MAAM,WAAW,kBAAkB;IAClC,UAAU,EAAE,MAAM,CAAA;IAClB,cAAc,EAAE,MAAM,CAAA;IACtB,eAAe,EAAE,MAAM,CAAA;IACvB,WAAW,EAAE,MAAM,CAAA;IACnB,OAAO,EAAE,MAAM,CAAA;IACf,OAAO,EAAE,MAAM,CAAA;IACf,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;GAMG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAkGrF;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,QAAQ,EAAE,eAAe,EACzB,MAAM,EAAE,IAAI,CAAC,iBAAiB,EAAE,SAAS,CAAC,GAAG;IAAE,iBAAiB,CAAC,EAAE,CAAC,CAAC,EAAE,aAAa,KAAK,cAAc,CAAA;CAAE,GACvG,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAa/B;AAwCD,qEAAqE;AACrE,wBAAsB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAErE"}
|