@loaders.gl/parquet 3.4.6 → 4.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +27 -34
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +6 -6
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +58 -42
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +33 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/es5/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +11 -7
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +51 -29
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +6 -6
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/es5/parquet-loader.js +16 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +4 -4
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +7 -7
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +117 -22
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/index.js +5 -5
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js +57 -41
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +33 -31
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/esm/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +12 -8
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +14 -3
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +3 -3
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/esm/parquet-loader.js +14 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +2 -2
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +1 -1
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +6 -6
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +108 -21
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/index.d.ts +8 -49
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -6
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +2 -2
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +4 -4
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.js +48 -44
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.js +30 -31
- package/dist/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +2 -2
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +13 -7
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +3 -2
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-rows.js +16 -19
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +3 -3
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +3 -3
- package/dist/parquet-loader.d.ts +3 -14
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +14 -2
- package/dist/parquet-worker.js +31 -38
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.d.ts +2 -1
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.d.ts +4 -4
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.d.ts +2 -2
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.d.ts +6 -6
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +1 -1
- package/dist/parquetjs/schema/declare.d.ts +6 -5
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +3 -3
- package/dist/parquetjs/schema/schema.d.ts +4 -4
- package/dist/parquetjs/schema/schema.d.ts.map +1 -1
- package/dist/parquetjs/schema/schema.js +5 -5
- package/dist/parquetjs/schema/shred.d.ts +17 -111
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +127 -119
- package/package.json +8 -8
- package/src/index.ts +32 -9
- package/src/lib/arrow/convert-row-group-to-columns.ts +2 -2
- package/src/lib/arrow/convert-schema-from-parquet.ts +56 -66
- package/src/lib/arrow/convert-schema-to-parquet.ts +32 -44
- package/src/lib/geo/decode-geo-metadata.ts +17 -8
- package/src/lib/parsers/parse-parquet-to-columns.ts +22 -11
- package/src/lib/parsers/parse-parquet-to-rows.ts +28 -23
- package/src/lib/wasm/parse-parquet-wasm.ts +7 -7
- package/src/parquet-loader.ts +25 -2
- package/src/parquet-writer.ts +4 -1
- package/src/parquetjs/encoder/parquet-encoder.ts +11 -10
- package/src/parquetjs/parser/decoders.ts +3 -3
- package/src/parquetjs/parser/parquet-reader.ts +7 -7
- package/src/parquetjs/schema/declare.ts +6 -5
- package/src/parquetjs/schema/schema.ts +8 -8
- package/src/parquetjs/schema/shred.ts +142 -103
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"declare.js","names":["
|
|
1
|
+
{"version":3,"file":"declare.js","names":["ParquetRowGroup","_createClass2","default","rowCount","arguments","length","undefined","columnData","_classCallCheck2","_defineProperty2","exports"],"sources":["../../../../src/parquetjs/schema/declare.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport Int64 from 'node-int64';\nimport type {PageHeader} from '../parquet-thrift';\n\nexport type ParquetCodec = 'PLAIN' | 'RLE' | 'PLAIN_DICTIONARY';\nexport type ParquetCompression =\n | 'UNCOMPRESSED'\n | 'GZIP'\n | 'SNAPPY'\n | 'LZO'\n | 'BROTLI'\n | 'LZ4'\n | 'LZ4_RAW'\n | 'ZSTD';\nexport type RepetitionType = 'REQUIRED' | 'OPTIONAL' | 'REPEATED';\nexport type ParquetType = PrimitiveType | OriginalType;\n\n/**\n * Physical type\n */\nexport type PrimitiveType =\n // Base Types\n | 'BOOLEAN' // 0\n | 'INT32' // 1\n | 'INT64' // 2\n | 'INT96' // 3\n | 'FLOAT' // 4\n | 'DOUBLE' // 5\n | 'BYTE_ARRAY' // 6,\n | 'FIXED_LEN_BYTE_ARRAY'; // 7\n\n/**\n * Logical type\n */\nexport type OriginalType =\n // Converted Types\n | 'UTF8' // 0\n // | 'MAP' // 1\n // | 'MAP_KEY_VALUE' // 2\n // | 'LIST' // 3\n // | 'ENUM' // 4\n // | 'DECIMAL' // 5\n | 'DECIMAL_INT32' // 5\n | 'DECIMAL_INT64' // 5\n | 'DECIMAL_BYTE_ARRAY' // 5\n | 'DECIMAL_FIXED_LEN_BYTE_ARRAY' // 5\n | 'DATE' // 6\n | 'TIME_MILLIS' // 7\n | 'TIME_MICROS' // 8\n | 'TIMESTAMP_MILLIS' // 9\n | 'TIMESTAMP_MICROS' // 10\n | 'UINT_8' // 11\n | 'UINT_16' // 12\n | 'UINT_32' // 13\n | 'UINT_64' // 14\n | 'INT_8' // 15\n | 'INT_16' // 16\n | 'INT_32' // 17\n | 'INT_64' // 18\n | 'JSON' // 19\n | 'BSON' // 20\n | 'INTERVAL'; // 21\n\nexport type ParquetDictionary = string[];\n\nexport interface SchemaDefinition {\n [string: string]: FieldDefinition;\n}\n\nexport interface FieldDefinition {\n type?: ParquetType;\n typeLength?: number;\n presision?: number;\n scale?: number;\n encoding?: ParquetCodec;\n compression?: ParquetCompression;\n optional?: boolean;\n repeated?: boolean;\n fields?: SchemaDefinition;\n}\n\nexport interface ParquetField {\n name: string;\n path: string[];\n key: string;\n primitiveType?: PrimitiveType;\n originalType?: OriginalType;\n repetitionType: RepetitionType;\n typeLength?: number;\n presision?: number;\n scale?: number;\n encoding?: ParquetCodec;\n compression?: ParquetCompression;\n rLevelMax: number;\n dLevelMax: number;\n isNested?: boolean;\n fieldCount?: number;\n fields?: Record<string, ParquetField>;\n}\n\n/** @todo better name, this is an internal type? */\nexport interface ParquetOptions {\n type: ParquetType;\n rLevelMax: number;\n dLevelMax: number;\n compression: ParquetCompression;\n column: ParquetField;\n numValues?: Int64;\n dictionary?: ParquetDictionary;\n}\n\nexport interface ParquetPageData {\n dlevels: number[];\n rlevels: number[];\n /** Actual column chunks */\n values: any[]; // ArrayLike<any>;\n count: number;\n dictionary?: ParquetDictionary;\n /** The \"raw\" page header from the file */\n pageHeader: PageHeader;\n}\n\nexport interface ParquetRow {\n [key: string]: any;\n}\n\n/** @\n * Holds data for one row group (column chunks) */\nexport class ParquetRowGroup {\n /** Number of rows in this page */\n rowCount: number;\n /** Map of Column chunks */\n columnData: Record<string, ParquetColumnChunk>;\n\n constructor(rowCount: number = 0, columnData: Record<string, ParquetColumnChunk> = {}) {\n this.rowCount = rowCount;\n this.columnData = columnData;\n }\n}\n\n/** Holds the data for one column chunk */\nexport interface ParquetColumnChunk {\n dlevels: number[];\n rlevels: number[];\n values: any[];\n count: number;\n pageHeaders: PageHeader[];\n}\n"],"mappings":";;;;;;;;;;IAgIaA,eAAe,OAAAC,aAAA,CAAAC,OAAA,EAM1B,SAAAF,gBAAA,EAAuF;EAAA,IAA3EG,QAAgB,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC;EAAA,IAAEG,UAA8C,GAAAH,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAAA,IAAAI,gBAAA,CAAAN,OAAA,QAAAF,eAAA;EAAA,IAAAS,gBAAA,CAAAP,OAAA;EAAA,IAAAO,gBAAA,CAAAP,OAAA;EACnF,IAAI,CAACC,QAAQ,GAAGA,QAAQ;EACxB,IAAI,CAACI,UAAU,GAAGA,UAAU;AAC9B,CAAC;AAAAG,OAAA,CAAAV,eAAA,GAAAA,eAAA"}
|
|
@@ -54,13 +54,13 @@ var ParquetSchema = function () {
|
|
|
54
54
|
}
|
|
55
55
|
}, {
|
|
56
56
|
key: "shredRecord",
|
|
57
|
-
value: function shredRecord(
|
|
58
|
-
(0, _shred.shredRecord)(this,
|
|
57
|
+
value: function shredRecord(row, rowGroup) {
|
|
58
|
+
(0, _shred.shredRecord)(this, row, rowGroup);
|
|
59
59
|
}
|
|
60
60
|
}, {
|
|
61
|
-
key: "
|
|
62
|
-
value: function
|
|
63
|
-
return (0, _shred.
|
|
61
|
+
key: "materializeRows",
|
|
62
|
+
value: function materializeRows(rowGroup) {
|
|
63
|
+
return (0, _shred.materializeRows)(this, rowGroup);
|
|
64
64
|
}
|
|
65
65
|
}, {
|
|
66
66
|
key: "compress",
|
|
@@ -70,8 +70,8 @@ var ParquetSchema = function () {
|
|
|
70
70
|
return this;
|
|
71
71
|
}
|
|
72
72
|
}, {
|
|
73
|
-
key: "
|
|
74
|
-
value: function
|
|
73
|
+
key: "rowGroup",
|
|
74
|
+
value: function rowGroup() {
|
|
75
75
|
return (0, _shred.shredBuffer)(this);
|
|
76
76
|
}
|
|
77
77
|
}]);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.js","names":["_codecs","require","_compression","_shred","_types","ParquetSchema","schema","_classCallCheck2","default","_defineProperty2","fields","buildFields","fieldList","listFields","_createClass2","key","value","findField","path","split","slice","n","length","shift","findFieldBranch","branch","push","shredRecord","record","buffer","materializeRecords","compress","type","setCompress","shredBuffer","exports","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","join","isNested","fieldCount","Object","keys","typeDef","PARQUET_LOGICAL_TYPES","Error","encoding","PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","primitiveType","originalType","typeLength","presision","scale","list","k"],"sources":["../../../../src/parquetjs/schema/schema.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n presision: opts.presision,\n scale: opts.scale,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"mappings":";;;;;;;;;;AAEA,IAAAA,OAAA,GAAAC,OAAA;AACA,IAAAC,YAAA,GAAAD,OAAA;AAUA,IAAAE,MAAA,GAAAF,OAAA;AACA,IAAAG,MAAA,GAAAH,OAAA;AAA8C,IAKjCI,aAAa;EAQxB,SAAAA,cAAYC,MAAwB,EAAE;IAAA,IAAAC,gBAAA,CAAAC,OAAA,QAAAH,aAAA;IAAA,IAAAI,gBAAA,CAAAD,OAAA;IAAA,IAAAC,gBAAA,CAAAD,OAAA;IAAA,IAAAC,gBAAA,CAAAD,OAAA;IACpC,IAAI,CAACF,MAAM,GAAGA,MAAM;IACpB,IAAI,CAACI,MAAM,GAAGC,WAAW,CAACL,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;IAC3C,IAAI,CAACM,SAAS,GAAGC,UAAU,CAAC,IAAI,CAACH,MAAM,CAAC;EAC1C;EAAC,IAAAI,aAAA,CAAAN,OAAA,EAAAH,aAAA;IAAAU,GAAA;IAAAC,KAAA,EAKD,SAAAC,UAAUC,IAAuB,EAAgB;MAC/C,IAAI,OAAOA,IAAI,KAAK,QAAQ,EAAE;QAE5BA,IAAI,GAAGA,IAAI,CAACC,KAAK,CAAC,GAAG,CAAC;MACxB,CAAC,MAAM;QAELD,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;MACtB;MAEA,IAAIC,CAAC,GAAG,IAAI,CAACX,MAAM;MACnB,OAAOQ,IAAI,CAACI,MAAM,GAAG,CAAC,EAAEJ,IAAI,CAACK,KAAK,CAAC,CAAC,EAAE;QACpCF,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC,CAACR,MAAsC;MACvD;MAEA,OAAOW,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC;IACnB;EAAC;IAAAH,GAAA;IAAAC,KAAA,EAKD,SAAAQ,gBAAgBN,IAAuB,EAAkB;MACvD,IAAI,OAAOA,IAAI,KAAK,QAAQ,EAAE;QAE5BA,IAAI,GAAGA,IAAI,CAACC,KAAK,CAAC,GAAG,CAAC;MACxB;MACA,IAAMM,MAAsB,GAAG,EAAE;MACjC,IAAIJ,CAAC,GAAG,IAAI,CAACX,MAAM;MACnB,OAAOQ,IAAI,CAACI,MAAM,GAAG,CAAC,EAAEJ,IAAI,CAACK,KAAK,CAAC,CAAC,EAAE;QACpCE,MAAM,CAACC,IAAI,CAACL,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACvB,IAAIA,IAAI,CAACI,MAAM,GAAG,CAAC,EAAE;UACnBD,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC,CAACR,MAAsC;QACvD;MACF;MACA,OAAOe,MAAM;IACf;EAAC;IAAAV,GAAA;IAAAC,KAAA,EAED,SAAAW,YAAYC,MAAqB,EAAEC,MAAqB,EAAQ;MAC9D,IAAAF,kBAAW,EAAC,IAAI,EAAEC,MAAM,EAAEC,MAAM,CAAC;IACnC;EAAC;IAAAd,GAAA;IAAAC,KAAA,EAED,SAAAc,mBAAmBD,MAAqB,EAAmB;MACzD,OAAO,IAAAC,yBAAkB,EAAC,IAAI,EAAED,MAAM,CAAC;IACzC;EAAC;IAAAd,GAAA;IAAAC,KAAA,EAED,SAAAe,SAASC,IAAwB,EAAQ;MACvCC,WAAW,CAAC,IAAI,CAAC3B,MAAM,EAAE0B,IAAI,CAAC;MAC9BC,WAAW,CAAC,IAAI,CAACvB,MAAM,EAAEsB,IAAI,CAAC;MAC9B,OAAO,IAAI;IACb;EAAC;IAAAjB,GAAA;IAAAC,KAAA,EAED,SAAAa,OAAA,EAAwB;MACtB,OAAO,IAAAK,kBAAW,EAAC,IAAI,CAAC;IAC1B;EAAC;EAAA,OAAA7B,aAAA;AAAA;AAAA8B,OAAA,CAAA9B,aAAA,GAAAA,aAAA;AAGH,SAAS4B,WAAWA,CAAC3B,MAAW,EAAE0B,IAAwB,EAAE;EAC1D,KAAK,IAAMI,IAAI,IAAI9B,MAAM,EAAE;IACzB,IAAM+B,IAAI,GAAG/B,MAAM,CAAC8B,IAAI,CAAC;IACzB,IAAIC,IAAI,CAAC3B,MAAM,EAAE;MACfuB,WAAW,CAACI,IAAI,CAAC3B,MAAM,EAAEsB,IAAI,CAAC;IAChC,CAAC,MAAM;MACLK,IAAI,CAACC,WAAW,GAAGN,IAAI;IACzB;EACF;AACF;AAGA,SAASrB,WAAWA,CAClBL,MAAwB,EACxBiC,eAAuB,EACvBC,eAAuB,EACvBtB,IAAc,EACgB;EAC9B,IAAMN,SAAuC,GAAG,CAAC,CAAC;EAElD,KAAK,IAAMwB,IAAI,IAAI9B,MAAM,EAAE;IACzB,IAAMmC,IAAI,GAAGnC,MAAM,CAAC8B,IAAI,CAAC;IAGzB,IAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAQ;IAC/B,IAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAQ,CAAC;IACvC,IAAIE,SAAS,GAAGP,eAAe;IAC/B,IAAIQ,SAAS,GAAGP,eAAe;IAE/B,IAAIQ,cAA8B,GAAG,UAAU;IAC/C,IAAI,CAACN,QAAQ,EAAE;MACbM,cAAc,GAAG,UAAU;MAC3BD,SAAS,EAAE;IACb;IACA,IAAIH,QAAQ,EAAE;MACZI,cAAc,GAAG,UAAU;MAC3BF,SAAS,EAAE;MACX,IAAIJ,QAAQ,EAAEK,SAAS,EAAE;IAC3B;IAGA,IAAIN,IAAI,CAAC/B,MAAM,EAAE;MACf,IAAMuC,MAAK,GAAG/B,IAAI,CAACgC,MAAM,CAAC,CAACd,IAAI,CAAC,CAAC;MACjCxB,SAAS,CAACwB,IAAI,CAAC,GAAG;QAChBA,IAAI,EAAJA,IAAI;QACJlB,IAAI,EAAE+B,MAAK;QACXlC,GAAG,EAAEkC,MAAK,CAACE,IAAI,CAAC,CAAC;QACjBH,cAAc,EAAdA,cAAc;QACdF,SAAS,EAATA,SAAS;QACTC,SAAS,EAATA,SAAS;QACTK,QAAQ,EAAE,IAAI;QACdC,UAAU,EAAEC,MAAM,CAACC,IAAI,CAACd,IAAI,CAAC/B,MAAM,CAAC,CAACY,MAAM;QAC3CZ,MAAM,EAAEC,WAAW,CAAC8B,IAAI,CAAC/B,MAAM,EAAEoC,SAAS,EAAEC,SAAS,EAAEE,MAAK;MAC9D,CAAC;MACD;IACF;IAEA,IAAMO,OAAY,GAAGC,4BAAqB,CAAChB,IAAI,CAACT,IAAI,CAAE;IACtD,IAAI,CAACwB,OAAO,EAAE;MACZ,MAAM,IAAIE,KAAK,0BAAAR,MAAA,CAA0BT,IAAI,CAACT,IAAI,CAAE,CAAC;IACvD;IAEAS,IAAI,CAACkB,QAAQ,GAAGlB,IAAI,CAACkB,QAAQ,IAAI,OAAO;IACxC,IAAI,EAAElB,IAAI,CAACkB,QAAQ,IAAIC,sBAAc,CAAC,EAAE;MACtC,MAAM,IAAIF,KAAK,kCAAAR,MAAA,CAAkCT,IAAI,CAACkB,QAAQ,CAAE,CAAC;IACnE;IAEAlB,IAAI,CAACH,WAAW,GAAGG,IAAI,CAACH,WAAW,IAAI,cAAc;IACrD,IAAI,EAAEG,IAAI,CAACH,WAAW,IAAIuB,wCAA2B,CAAC,EAAE;MACtD,MAAM,IAAIH,KAAK,oCAAAR,MAAA,CAAoCT,IAAI,CAACH,WAAW,CAAE,CAAC;IACxE;IAGA,IAAMW,KAAK,GAAG/B,IAAI,CAACgC,MAAM,CAAC,CAACd,IAAI,CAAC,CAAC;IACjCxB,SAAS,CAACwB,IAAI,CAAC,GAAG;MAChBA,IAAI,EAAJA,IAAI;MACJ0B,aAAa,EAAEN,OAAO,CAACM,aAAa;MACpCC,YAAY,EAAEP,OAAO,CAACO,YAAY;MAClC7C,IAAI,EAAE+B,KAAK;MACXlC,GAAG,EAAEkC,KAAK,CAACE,IAAI,CAAC,CAAC;MACjBH,cAAc,EAAdA,cAAc;MACdW,QAAQ,EAAElB,IAAI,CAACkB,QAAQ;MACvBrB,WAAW,EAAEG,IAAI,CAACH,WAAW;MAC7B0B,UAAU,EAAEvB,IAAI,CAACuB,UAAU,IAAIR,OAAO,CAACQ,UAAU;MACjDC,SAAS,EAAExB,IAAI,CAACwB,SAAS;MACzBC,KAAK,EAAEzB,IAAI,CAACyB,KAAK;MACjBpB,SAAS,EAATA,SAAS;MACTC,SAAS,EAATA;IACF,CAAC;EACH;EACA,OAAOnC,SAAS;AAClB;AAEA,SAASC,UAAUA,CAACH,MAAoC,EAAkB;EACxE,IAAIyD,IAAoB,GAAG,EAAE;EAC7B,KAAK,IAAMC,CAAC,IAAI1D,MAAM,EAAE;IACtByD,IAAI,CAACzC,IAAI,CAAChB,MAAM,CAAC0D,CAAC,CAAC,CAAC;IACpB,IAAI1D,MAAM,CAAC0D,CAAC,CAAC,CAAChB,QAAQ,EAAE;MACtBe,IAAI,GAAGA,IAAI,CAACjB,MAAM,CAACrC,UAAU,CAACH,MAAM,CAAC0D,CAAC,CAAC,CAAC1D,MAAO,CAAC,CAAC;IACnD;EACF;EACA,OAAOyD,IAAI;AACb"}
|
|
1
|
+
{"version":3,"file":"schema.js","names":["_codecs","require","_compression","_shred","_types","ParquetSchema","schema","_classCallCheck2","default","_defineProperty2","fields","buildFields","fieldList","listFields","_createClass2","key","value","findField","path","split","slice","n","length","shift","findFieldBranch","branch","push","shredRecord","row","rowGroup","materializeRows","compress","type","setCompress","shredBuffer","exports","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","join","isNested","fieldCount","Object","keys","typeDef","PARQUET_LOGICAL_TYPES","Error","encoding","PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","primitiveType","originalType","typeLength","presision","scale","list","k"],"sources":["../../../../src/parquetjs/schema/schema.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetRowGroup,\n ParquetCompression,\n ParquetField,\n ParquetRow,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRows, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(row: ParquetRow, rowGroup: ParquetRowGroup): void {\n shredRecord(this, row, rowGroup);\n }\n\n materializeRows(rowGroup: ParquetRowGroup): ParquetRow[] {\n return materializeRows(this, rowGroup);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n rowGroup(): ParquetRowGroup {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n presision: opts.presision,\n scale: opts.scale,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"mappings":";;;;;;;;;;AAEA,IAAAA,OAAA,GAAAC,OAAA;AACA,IAAAC,YAAA,GAAAD,OAAA;AAUA,IAAAE,MAAA,GAAAF,OAAA;AACA,IAAAG,MAAA,GAAAH,OAAA;AAA8C,IAKjCI,aAAa;EAQxB,SAAAA,cAAYC,MAAwB,EAAE;IAAA,IAAAC,gBAAA,CAAAC,OAAA,QAAAH,aAAA;IAAA,IAAAI,gBAAA,CAAAD,OAAA;IAAA,IAAAC,gBAAA,CAAAD,OAAA;IAAA,IAAAC,gBAAA,CAAAD,OAAA;IACpC,IAAI,CAACF,MAAM,GAAGA,MAAM;IACpB,IAAI,CAACI,MAAM,GAAGC,WAAW,CAACL,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;IAC3C,IAAI,CAACM,SAAS,GAAGC,UAAU,CAAC,IAAI,CAACH,MAAM,CAAC;EAC1C;EAAC,IAAAI,aAAA,CAAAN,OAAA,EAAAH,aAAA;IAAAU,GAAA;IAAAC,KAAA,EAKD,SAAAC,UAAUC,IAAuB,EAAgB;MAC/C,IAAI,OAAOA,IAAI,KAAK,QAAQ,EAAE;QAE5BA,IAAI,GAAGA,IAAI,CAACC,KAAK,CAAC,GAAG,CAAC;MACxB,CAAC,MAAM;QAELD,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;MACtB;MAEA,IAAIC,CAAC,GAAG,IAAI,CAACX,MAAM;MACnB,OAAOQ,IAAI,CAACI,MAAM,GAAG,CAAC,EAAEJ,IAAI,CAACK,KAAK,CAAC,CAAC,EAAE;QACpCF,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC,CAACR,MAAsC;MACvD;MAEA,OAAOW,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC;IACnB;EAAC;IAAAH,GAAA;IAAAC,KAAA,EAKD,SAAAQ,gBAAgBN,IAAuB,EAAkB;MACvD,IAAI,OAAOA,IAAI,KAAK,QAAQ,EAAE;QAE5BA,IAAI,GAAGA,IAAI,CAACC,KAAK,CAAC,GAAG,CAAC;MACxB;MACA,IAAMM,MAAsB,GAAG,EAAE;MACjC,IAAIJ,CAAC,GAAG,IAAI,CAACX,MAAM;MACnB,OAAOQ,IAAI,CAACI,MAAM,GAAG,CAAC,EAAEJ,IAAI,CAACK,KAAK,CAAC,CAAC,EAAE;QACpCE,MAAM,CAACC,IAAI,CAACL,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACvB,IAAIA,IAAI,CAACI,MAAM,GAAG,CAAC,EAAE;UACnBD,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAC,CAAC,CAAC,CAACR,MAAsC;QACvD;MACF;MACA,OAAOe,MAAM;IACf;EAAC;IAAAV,GAAA;IAAAC,KAAA,EAED,SAAAW,YAAYC,GAAe,EAAEC,QAAyB,EAAQ;MAC5D,IAAAF,kBAAW,EAAC,IAAI,EAAEC,GAAG,EAAEC,QAAQ,CAAC;IAClC;EAAC;IAAAd,GAAA;IAAAC,KAAA,EAED,SAAAc,gBAAgBD,QAAyB,EAAgB;MACvD,OAAO,IAAAC,sBAAe,EAAC,IAAI,EAAED,QAAQ,CAAC;IACxC;EAAC;IAAAd,GAAA;IAAAC,KAAA,EAED,SAAAe,SAASC,IAAwB,EAAQ;MACvCC,WAAW,CAAC,IAAI,CAAC3B,MAAM,EAAE0B,IAAI,CAAC;MAC9BC,WAAW,CAAC,IAAI,CAACvB,MAAM,EAAEsB,IAAI,CAAC;MAC9B,OAAO,IAAI;IACb;EAAC;IAAAjB,GAAA;IAAAC,KAAA,EAED,SAAAa,SAAA,EAA4B;MAC1B,OAAO,IAAAK,kBAAW,EAAC,IAAI,CAAC;IAC1B;EAAC;EAAA,OAAA7B,aAAA;AAAA;AAAA8B,OAAA,CAAA9B,aAAA,GAAAA,aAAA;AAGH,SAAS4B,WAAWA,CAAC3B,MAAW,EAAE0B,IAAwB,EAAE;EAC1D,KAAK,IAAMI,IAAI,IAAI9B,MAAM,EAAE;IACzB,IAAM+B,IAAI,GAAG/B,MAAM,CAAC8B,IAAI,CAAC;IACzB,IAAIC,IAAI,CAAC3B,MAAM,EAAE;MACfuB,WAAW,CAACI,IAAI,CAAC3B,MAAM,EAAEsB,IAAI,CAAC;IAChC,CAAC,MAAM;MACLK,IAAI,CAACC,WAAW,GAAGN,IAAI;IACzB;EACF;AACF;AAGA,SAASrB,WAAWA,CAClBL,MAAwB,EACxBiC,eAAuB,EACvBC,eAAuB,EACvBtB,IAAc,EACgB;EAC9B,IAAMN,SAAuC,GAAG,CAAC,CAAC;EAElD,KAAK,IAAMwB,IAAI,IAAI9B,MAAM,EAAE;IACzB,IAAMmC,IAAI,GAAGnC,MAAM,CAAC8B,IAAI,CAAC;IAGzB,IAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAQ;IAC/B,IAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAQ,CAAC;IACvC,IAAIE,SAAS,GAAGP,eAAe;IAC/B,IAAIQ,SAAS,GAAGP,eAAe;IAE/B,IAAIQ,cAA8B,GAAG,UAAU;IAC/C,IAAI,CAACN,QAAQ,EAAE;MACbM,cAAc,GAAG,UAAU;MAC3BD,SAAS,EAAE;IACb;IACA,IAAIH,QAAQ,EAAE;MACZI,cAAc,GAAG,UAAU;MAC3BF,SAAS,EAAE;MACX,IAAIJ,QAAQ,EAAEK,SAAS,EAAE;IAC3B;IAGA,IAAIN,IAAI,CAAC/B,MAAM,EAAE;MACf,IAAMuC,MAAK,GAAG/B,IAAI,CAACgC,MAAM,CAAC,CAACd,IAAI,CAAC,CAAC;MACjCxB,SAAS,CAACwB,IAAI,CAAC,GAAG;QAChBA,IAAI,EAAJA,IAAI;QACJlB,IAAI,EAAE+B,MAAK;QACXlC,GAAG,EAAEkC,MAAK,CAACE,IAAI,CAAC,CAAC;QACjBH,cAAc,EAAdA,cAAc;QACdF,SAAS,EAATA,SAAS;QACTC,SAAS,EAATA,SAAS;QACTK,QAAQ,EAAE,IAAI;QACdC,UAAU,EAAEC,MAAM,CAACC,IAAI,CAACd,IAAI,CAAC/B,MAAM,CAAC,CAACY,MAAM;QAC3CZ,MAAM,EAAEC,WAAW,CAAC8B,IAAI,CAAC/B,MAAM,EAAEoC,SAAS,EAAEC,SAAS,EAAEE,MAAK;MAC9D,CAAC;MACD;IACF;IAEA,IAAMO,OAAY,GAAGC,4BAAqB,CAAChB,IAAI,CAACT,IAAI,CAAE;IACtD,IAAI,CAACwB,OAAO,EAAE;MACZ,MAAM,IAAIE,KAAK,0BAAAR,MAAA,CAA0BT,IAAI,CAACT,IAAI,CAAE,CAAC;IACvD;IAEAS,IAAI,CAACkB,QAAQ,GAAGlB,IAAI,CAACkB,QAAQ,IAAI,OAAO;IACxC,IAAI,EAAElB,IAAI,CAACkB,QAAQ,IAAIC,sBAAc,CAAC,EAAE;MACtC,MAAM,IAAIF,KAAK,kCAAAR,MAAA,CAAkCT,IAAI,CAACkB,QAAQ,CAAE,CAAC;IACnE;IAEAlB,IAAI,CAACH,WAAW,GAAGG,IAAI,CAACH,WAAW,IAAI,cAAc;IACrD,IAAI,EAAEG,IAAI,CAACH,WAAW,IAAIuB,wCAA2B,CAAC,EAAE;MACtD,MAAM,IAAIH,KAAK,oCAAAR,MAAA,CAAoCT,IAAI,CAACH,WAAW,CAAE,CAAC;IACxE;IAGA,IAAMW,KAAK,GAAG/B,IAAI,CAACgC,MAAM,CAAC,CAACd,IAAI,CAAC,CAAC;IACjCxB,SAAS,CAACwB,IAAI,CAAC,GAAG;MAChBA,IAAI,EAAJA,IAAI;MACJ0B,aAAa,EAAEN,OAAO,CAACM,aAAa;MACpCC,YAAY,EAAEP,OAAO,CAACO,YAAY;MAClC7C,IAAI,EAAE+B,KAAK;MACXlC,GAAG,EAAEkC,KAAK,CAACE,IAAI,CAAC,CAAC;MACjBH,cAAc,EAAdA,cAAc;MACdW,QAAQ,EAAElB,IAAI,CAACkB,QAAQ;MACvBrB,WAAW,EAAEG,IAAI,CAACH,WAAW;MAC7B0B,UAAU,EAAEvB,IAAI,CAACuB,UAAU,IAAIR,OAAO,CAACQ,UAAU;MACjDC,SAAS,EAAExB,IAAI,CAACwB,SAAS;MACzBC,KAAK,EAAEzB,IAAI,CAACyB,KAAK;MACjBpB,SAAS,EAATA,SAAS;MACTC,SAAS,EAATA;IACF,CAAC;EACH;EACA,OAAOnC,SAAS;AAClB;AAEA,SAASC,UAAUA,CAACH,MAAoC,EAAkB;EACxE,IAAIyD,IAAoB,GAAG,EAAE;EAC7B,KAAK,IAAMC,CAAC,IAAI1D,MAAM,EAAE;IACtByD,IAAI,CAACzC,IAAI,CAAChB,MAAM,CAAC0D,CAAC,CAAC,CAAC;IACpB,IAAI1D,MAAM,CAAC0D,CAAC,CAAC,CAAChB,QAAQ,EAAE;MACtBe,IAAI,GAAGA,IAAI,CAACjB,MAAM,CAACrC,UAAU,CAACH,MAAM,CAAC0D,CAAC,CAAC,CAAC1D,MAAO,CAAC,CAAC;IACnD;EACF;EACA,OAAOyD,IAAI;AACb"}
|
|
@@ -4,13 +4,14 @@ var _typeof = require("@babel/runtime/helpers/typeof");
|
|
|
4
4
|
Object.defineProperty(exports, "__esModule", {
|
|
5
5
|
value: true
|
|
6
6
|
});
|
|
7
|
-
Object.defineProperty(exports, "
|
|
7
|
+
Object.defineProperty(exports, "ParquetRowGroup", {
|
|
8
8
|
enumerable: true,
|
|
9
9
|
get: function get() {
|
|
10
|
-
return _declare.
|
|
10
|
+
return _declare.ParquetRowGroup;
|
|
11
11
|
}
|
|
12
12
|
});
|
|
13
|
-
exports.
|
|
13
|
+
exports.materializeColumns = materializeColumns;
|
|
14
|
+
exports.materializeRows = materializeRows;
|
|
14
15
|
exports.shredBuffer = shredBuffer;
|
|
15
16
|
exports.shredRecord = shredRecord;
|
|
16
17
|
var _declare = require("./declare");
|
|
@@ -45,24 +46,24 @@ function shredBuffer(schema) {
|
|
|
45
46
|
columnData: columnData
|
|
46
47
|
};
|
|
47
48
|
}
|
|
48
|
-
function shredRecord(schema, record,
|
|
49
|
+
function shredRecord(schema, record, rowGroup) {
|
|
49
50
|
var data = shredBuffer(schema).columnData;
|
|
50
51
|
shredRecordFields(schema.fields, record, data, 0, 0);
|
|
51
|
-
if (
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
if (rowGroup.rowCount === 0) {
|
|
53
|
+
rowGroup.rowCount = 1;
|
|
54
|
+
rowGroup.columnData = data;
|
|
54
55
|
return;
|
|
55
56
|
}
|
|
56
|
-
|
|
57
|
+
rowGroup.rowCount += 1;
|
|
57
58
|
var _iterator2 = _createForOfIteratorHelper(schema.fieldList),
|
|
58
59
|
_step2;
|
|
59
60
|
try {
|
|
60
61
|
for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
|
|
61
62
|
var field = _step2.value;
|
|
62
|
-
Array.prototype.push.apply(
|
|
63
|
-
Array.prototype.push.apply(
|
|
64
|
-
Array.prototype.push.apply(
|
|
65
|
-
|
|
63
|
+
Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);
|
|
64
|
+
Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);
|
|
65
|
+
Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);
|
|
66
|
+
rowGroup.columnData[field.key].count += data[field.key].count;
|
|
66
67
|
}
|
|
67
68
|
} catch (err) {
|
|
68
69
|
_iterator2.e(err);
|
|
@@ -110,20 +111,20 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
|
110
111
|
}
|
|
111
112
|
}
|
|
112
113
|
}
|
|
113
|
-
function
|
|
114
|
-
var
|
|
115
|
-
for (var i = 0; i <
|
|
116
|
-
|
|
114
|
+
function materializeRows(schema, rowGroup) {
|
|
115
|
+
var rows = [];
|
|
116
|
+
for (var i = 0; i < rowGroup.rowCount; i++) {
|
|
117
|
+
rows.push({});
|
|
117
118
|
}
|
|
118
|
-
for (var key in
|
|
119
|
-
var columnData =
|
|
119
|
+
for (var key in rowGroup.columnData) {
|
|
120
|
+
var columnData = rowGroup.columnData[key];
|
|
120
121
|
if (columnData.count) {
|
|
121
|
-
|
|
122
|
+
materializeColumnAsRows(schema, columnData, key, rows);
|
|
122
123
|
}
|
|
123
124
|
}
|
|
124
|
-
return
|
|
125
|
+
return rows;
|
|
125
126
|
}
|
|
126
|
-
function
|
|
127
|
+
function materializeColumnAsRows(schema, columnData, key, rows) {
|
|
127
128
|
var field = schema.findField(key);
|
|
128
129
|
var branch = schema.findFieldBranch(key);
|
|
129
130
|
var rLevels = new Array(field.rLevelMax + 1).fill(0);
|
|
@@ -134,7 +135,7 @@ function materializeColumn(schema, columnData, key, records) {
|
|
|
134
135
|
rLevels[rLevel]++;
|
|
135
136
|
rLevels.fill(0, rLevel + 1);
|
|
136
137
|
var rIndex = 0;
|
|
137
|
-
var record =
|
|
138
|
+
var record = rows[rLevels[rIndex++] - 1];
|
|
138
139
|
var _iterator3 = _createForOfIteratorHelper(branch),
|
|
139
140
|
_step3;
|
|
140
141
|
try {
|
|
@@ -184,4 +185,98 @@ function materializeColumn(schema, columnData, key, records) {
|
|
|
184
185
|
}
|
|
185
186
|
}
|
|
186
187
|
}
|
|
188
|
+
function materializeColumns(schema, rowGroup) {
|
|
189
|
+
var columns = {};
|
|
190
|
+
for (var key in rowGroup.columnData) {
|
|
191
|
+
var columnData = rowGroup.columnData[key];
|
|
192
|
+
if (columnData.count) {
|
|
193
|
+
materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
return columns;
|
|
197
|
+
}
|
|
198
|
+
function materializeColumnAsColumnarArray(schema, columnData, rowCount, key, columns) {
|
|
199
|
+
if (columnData.count <= 0) {
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
var field = schema.findField(key);
|
|
203
|
+
var branch = schema.findFieldBranch(key);
|
|
204
|
+
var columnName = branch[0].name;
|
|
205
|
+
var column;
|
|
206
|
+
var values = columnData.values;
|
|
207
|
+
if (values.length === rowCount && branch[0].primitiveType) {
|
|
208
|
+
column = values;
|
|
209
|
+
}
|
|
210
|
+
if (column) {
|
|
211
|
+
columns[columnName] = column;
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
column = new Array(rowCount);
|
|
215
|
+
for (var i = 0; i < rowCount; i++) {
|
|
216
|
+
column[i] = {};
|
|
217
|
+
}
|
|
218
|
+
columns[columnName] = column;
|
|
219
|
+
var rLevels = new Array(field.rLevelMax + 1).fill(0);
|
|
220
|
+
var vIndex = 0;
|
|
221
|
+
for (var _i = 0; _i < columnData.count; _i++) {
|
|
222
|
+
var dLevel = columnData.dlevels[_i];
|
|
223
|
+
var rLevel = columnData.rlevels[_i];
|
|
224
|
+
rLevels[rLevel]++;
|
|
225
|
+
rLevels.fill(0, rLevel + 1);
|
|
226
|
+
var rIndex = 0;
|
|
227
|
+
var record = column[rLevels[rIndex++] - 1];
|
|
228
|
+
var _iterator4 = _createForOfIteratorHelper(branch),
|
|
229
|
+
_step4;
|
|
230
|
+
try {
|
|
231
|
+
for (_iterator4.s(); !(_step4 = _iterator4.n()).done;) {
|
|
232
|
+
var step = _step4.value;
|
|
233
|
+
if (step === field || dLevel < step.dLevelMax) {
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
switch (step.repetitionType) {
|
|
237
|
+
case 'REPEATED':
|
|
238
|
+
if (!(step.name in record)) {
|
|
239
|
+
record[step.name] = [];
|
|
240
|
+
}
|
|
241
|
+
var _ix2 = rLevels[rIndex++];
|
|
242
|
+
while (record[step.name].length <= _ix2) {
|
|
243
|
+
record[step.name].push({});
|
|
244
|
+
}
|
|
245
|
+
record = record[step.name][_ix2];
|
|
246
|
+
break;
|
|
247
|
+
default:
|
|
248
|
+
record[step.name] = record[step.name] || {};
|
|
249
|
+
record = record[step.name];
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
} catch (err) {
|
|
253
|
+
_iterator4.e(err);
|
|
254
|
+
} finally {
|
|
255
|
+
_iterator4.f();
|
|
256
|
+
}
|
|
257
|
+
if (dLevel === field.dLevelMax) {
|
|
258
|
+
var value = Types.fromPrimitive(field.originalType || field.primitiveType, columnData.values[vIndex], field);
|
|
259
|
+
vIndex++;
|
|
260
|
+
switch (field.repetitionType) {
|
|
261
|
+
case 'REPEATED':
|
|
262
|
+
if (!(field.name in record)) {
|
|
263
|
+
record[field.name] = [];
|
|
264
|
+
}
|
|
265
|
+
var ix = rLevels[rIndex];
|
|
266
|
+
while (record[field.name].length <= ix) {
|
|
267
|
+
record[field.name].push(null);
|
|
268
|
+
}
|
|
269
|
+
record[field.name][ix] = value;
|
|
270
|
+
break;
|
|
271
|
+
default:
|
|
272
|
+
record[field.name] = value;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
for (var _i2 = 0; _i2 < rowCount; ++_i2) {
|
|
277
|
+
if (columnName in column[_i2]) {
|
|
278
|
+
column[_i2] = column[_i2][columnName];
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
187
282
|
//# sourceMappingURL=shred.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shred.js","names":["_declare","require","Types","_interopRequireWildcard","_getRequireWildcardCache","nodeInterop","WeakMap","cacheBabelInterop","cacheNodeInterop","obj","__esModule","_typeof","default","cache","has","get","newObj","hasPropertyDescriptor","Object","defineProperty","getOwnPropertyDescriptor","key","prototype","hasOwnProperty","call","desc","set","_createForOfIteratorHelper","o","allowArrayLike","it","Symbol","iterator","Array","isArray","_unsupportedIterableToArray","length","i","F","s","n","done","value","e","_e","f","TypeError","normalCompletion","didErr","err","step","next","_e2","return","minLen","_arrayLikeToArray","toString","slice","constructor","name","from","test","arr","len","arr2","shredBuffer","schema","columnData","_iterator","fieldList","_step","field","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","_iterator2","_step2","push","apply","rLevel","dLevel","undefined","Boolean","repetitionType","Error","concat","isNested","rlvl","rLevelMax","dLevelMax","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","_iterator3","_step3","ix","fromPrimitive"],"sources":["../../../../src/parquetjs/schema/shred.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) {\n records.push({});\n }\n for (const key in buffer.columnData) {\n const columnData = buffer.columnData[key];\n if (columnData.count) {\n materializeColumn(schema, columnData, key, records);\n }\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n columnData: ParquetData,\n key: string,\n records: ParquetRecord[]\n): void {\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < columnData.count; i++) {\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes - Build a nested row object\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node - Add the value\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n }\n}\n\n// Columnar export\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n *\nexport function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {\n const columns: ParquetRecord = {};\n for (const key in buffer.columnData) {\n const columnData = buffer.columnData[key];\n if (columnData.count) {\n extractColumn(schema, columnData, key, columns);\n }\n }\n return columns;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction extractColumn(\n schema: ParquetSchema,\n columnData: ParquetData,\n key: string,\n columns: Record<string, unknown> \n) {\n if (columnData.count <= 0) {\n return;\n }\n\n const record = columns;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n\n let i = 0;\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n}\n*/\n"],"mappings":";;;;;;;;;;;;;;;AAEA,IAAAA,QAAA,GAAAC,OAAA;AAEA,IAAAC,KAAA,GAAAC,uBAAA,CAAAF,OAAA;AAAiC,SAAAG,yBAAAC,WAAA,eAAAC,OAAA,kCAAAC,iBAAA,OAAAD,OAAA,QAAAE,gBAAA,OAAAF,OAAA,YAAAF,wBAAA,YAAAA,yBAAAC,WAAA,WAAAA,WAAA,GAAAG,gBAAA,GAAAD,iBAAA,KAAAF,WAAA;AAAA,SAAAF,wBAAAM,GAAA,EAAAJ,WAAA,SAAAA,WAAA,IAAAI,GAAA,IAAAA,GAAA,CAAAC,UAAA,WAAAD,GAAA,QAAAA,GAAA,aAAAE,OAAA,CAAAF,GAAA,yBAAAA,GAAA,4BAAAG,OAAA,EAAAH,GAAA,UAAAI,KAAA,GAAAT,wBAAA,CAAAC,WAAA,OAAAQ,KAAA,IAAAA,KAAA,CAAAC,GAAA,CAAAL,GAAA,YAAAI,KAAA,CAAAE,GAAA,CAAAN,GAAA,SAAAO,MAAA,WAAAC,qBAAA,GAAAC,MAAA,CAAAC,cAAA,IAAAD,MAAA,CAAAE,wBAAA,WAAAC,GAAA,IAAAZ,GAAA,QAAAY,GAAA,kBAAAH,MAAA,CAAAI,SAAA,CAAAC,cAAA,CAAAC,IAAA,CAAAf,GAAA,EAAAY,GAAA,SAAAI,IAAA,GAAAR,qBAAA,GAAAC,MAAA,CAAAE,wBAAA,CAAAX,GAAA,EAAAY,GAAA,cAAAI,IAAA,KAAAA,IAAA,CAAAV,GAAA,IAAAU,IAAA,CAAAC,GAAA,KAAAR,MAAA,CAAAC,cAAA,CAAAH,MAAA,EAAAK,GAAA,EAAAI,IAAA,YAAAT,MAAA,CAAAK,GAAA,IAAAZ,GAAA,CAAAY,GAAA,SAAAL,MAAA,CAAAJ,OAAA,GAAAH,GAAA,MAAAI,KAAA,IAAAA,KAAA,CAAAa,GAAA,CAAAjB,GAAA,EAAAO,MAAA,YAAAA,MAAA;AAAA,SAAAW,2BAAAC,CAAA,EAAAC,cAAA,QAAAC,EAAA,UAAAC,MAAA,oBAAAH,CAAA,CAAAG,MAAA,CAAAC,QAAA,KAAAJ,CAAA,qBAAAE,EAAA,QAAAG,KAAA,CAAAC,OAAA,CAAAN,CAAA,MAAAE,EAAA,GAAAK,2BAAA,CAAAP,CAAA,MAAAC,cAAA,IAAAD,CAAA,WAAAA,CAAA,CAAAQ,MAAA,qBAAAN,EAAA,EAAAF,CAAA,GAAAE,EAAA,MAAAO,CAAA,UAAAC,CAAA,YAAAA,EAAA,eAAAC,CAAA,EAAAD,CAAA,EAAAE,CAAA,WAAAA,EAAA,QAAAH,CAAA,IAAAT,CAAA,CAAAQ,MAAA,WAAAK,IAAA,mBAAAA,IAAA,SAAAC,KAAA,EAAAd,CAAA,CAAAS,CAAA,UAAAM,CAAA,WAAAA,EAAAC,EAAA,UAAAA,EAAA,KAAAC,CAAA,EAAAP,CAAA,gBAAAQ,SAAA,iJAAAC,gBAAA,SAAAC,MAAA,UAAAC,GAAA,WAAAV,CAAA,WAAAA,EAAA,IAAAT,EAAA,GAAAA,EAAA,CAAAN,IAAA,CAAAI,CAAA,MAAAY,CAAA,WAAAA,EAAA,QAAAU,IAAA,GAAApB,EAAA,CAAAqB,IAAA,IAAAJ,gBAAA,GAAAG,IAAA,CAAAT,IAAA,SAAAS,IAAA,KAAAP,CAAA,WAAAA,EAAAS,GAAA,IAAAJ,MAAA,SAAAC,GAAA,GAAAG,GAAA,KAAAP,CAAA,WAAAA,EAAA,eAAAE,gBAAA,IAAAjB,EAAA,CAAAuB,MAAA,UAAAvB,EAAA,CAAAuB,MAAA,oBAAAL,MAAA,QAAAC,GAAA;AAAA,SAAAd,4BAAAP,CAAA,EAAA0B,MAAA,SAAA1B,CAAA,qBAAAA,CAAA,sBAAA2B,iBAAA,CAAA3B,CAAA,EAAA0B,MAAA,OAAAd,CAAA,GAAAtB,MAAA,CAAAI,SAAA,CAAAkC,QAAA,CAAAhC,IAAA,CAAAI,CAAA,EAAA6B,KAAA,aAAAjB,CAAA,iBAAAZ,CAAA,CAAA8B,WAAA,EAAAlB,CAAA,GAAAZ,CAAA,CAAA8B,WAAA,CAAAC,IAAA,MAAAnB,CAAA,cAAAA,CAAA,mBAAAP,KAAA,CAAA2B,IAAA,CAAAhC,CAAA,OAAAY,CAAA,+DAAAqB,IAAA,CAAArB,CAAA,UAAAe,iBAAA,CAAA3B,CAAA,EAAA0B,MAAA;AAAA,SAAAC,kBAAAO,GAAA,EAAAC,GAAA,QAAAA,GAAA,YAAAA,GAAA,GAAAD,GAAA,CAAA1B,MAAA,EAAA2B,GAAA,GAAAD,GAAA,CAAA1B,MAAA,WAAAC,CAAA,MAAA2B,IAAA,OAAA/B,KAAA,CAAA8B,GAAA,GAAA1B,CAAA,GAAA0B,GAAA,EAAA1B,CAAA,IAAA2B,IAAA,CAAA3B,CAAA,IAAAyB,GAAA,CAAAzB,CAAA,UAAA2B,IAAA;AAI1B,SAASC,WAAWA,CAACC,MAAqB,EAAiB;EAChE,IAAMC,UAAuC,GAAG,CAAC,CAAC;EAAC,IAAAC,SAAA,GAAAzC,0BAAA,CAC/BuC,MAAM,CAACG,SAAS;IAAAC,KAAA;EAAA;IAApC,KAAAF,SAAA,CAAA7B,CAAA,MAAA+B,KAAA,GAAAF,SAAA,CAAA5B,CAAA,IAAAC,IAAA,GAAsC;MAAA,IAA3B8B,KAAK,GAAAD,KAAA,CAAA5B,KAAA;MACdyB,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,GAAG;QACtBmD,OAAO,EAAE,EAAE;QACXC,OAAO,EAAE,EAAE;QACXC,MAAM,EAAE,EAAE;QACVC,WAAW,EAAE,EAAE;QACfC,KAAK,EAAE;MACT,CAAC;IACH;EAAC,SAAA3B,GAAA;IAAAmB,SAAA,CAAAzB,CAAA,CAAAM,GAAA;EAAA;IAAAmB,SAAA,CAAAvB,CAAA;EAAA;EACD,OAAO;IAACgC,QAAQ,EAAE,CAAC;IAAEV,UAAU,EAAVA;EAAU,CAAC;AAClC;AAwBO,SAASW,WAAWA,CAACZ,MAAqB,EAAEa,MAAW,EAAEC,MAAqB,EAAQ;EAE3F,IAAMC,IAAI,GAAGhB,WAAW,CAACC,MAAM,CAAC,CAACC,UAAU;EAE3Ce,iBAAiB,CAAChB,MAAM,CAACiB,MAAM,EAAEJ,MAAM,EAAEE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;EAGpD,IAAID,MAAM,CAACH,QAAQ,KAAK,CAAC,EAAE;IACzBG,MAAM,CAACH,QAAQ,GAAG,CAAC;IACnBG,MAAM,CAACb,UAAU,GAAGc,IAAI;IACxB;EACF;EACAD,MAAM,CAACH,QAAQ,IAAI,CAAC;EAAC,IAAAO,UAAA,GAAAzD,0BAAA,CACDuC,MAAM,CAACG,SAAS;IAAAgB,MAAA;EAAA;IAApC,KAAAD,UAAA,CAAA7C,CAAA,MAAA8C,MAAA,GAAAD,UAAA,CAAA5C,CAAA,IAAAC,IAAA,GAAsC;MAAA,IAA3B8B,KAAK,GAAAc,MAAA,CAAA3C,KAAA;MACdT,KAAK,CAACX,SAAS,CAACgE,IAAI,CAACC,KAAK,CAACP,MAAM,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,EAAEQ,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,CAAC;MACzFxC,KAAK,CAACX,SAAS,CAACgE,IAAI,CAACC,KAAK,CAACP,MAAM,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,EAAES,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,CAAC;MACzFvC,KAAK,CAACX,SAAS,CAACgE,IAAI,CAACC,KAAK,CAACP,MAAM,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACqD,MAAM,EAAEO,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACqD,MAAM,CAAC;MACvFM,MAAM,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK,IAAIK,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK;IAC7D;EAAC,SAAA3B,GAAA;IAAAmC,UAAA,CAAAzC,CAAA,CAAAM,GAAA;EAAA;IAAAmC,UAAA,CAAAvC,CAAA;EAAA;AACH;AAGA,SAASqC,iBAAiBA,CACxBC,MAAoC,EACpCJ,MAAW,EACXE,IAAiC,EACjCO,MAAc,EACdC,MAAc,EACd;EACA,KAAK,IAAM9B,IAAI,IAAIwB,MAAM,EAAE;IACzB,IAAMZ,KAAK,GAAGY,MAAM,CAACxB,IAAI,CAAC;IAG1B,IAAIe,MAAa,GAAG,EAAE;IACtB,IACEK,MAAM,IACNR,KAAK,CAACZ,IAAI,IAAIoB,MAAM,IACpBA,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,KAAK+B,SAAS,IAChCX,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,KAAK,IAAI,EAC3B;MACA,IAAIoB,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACD,WAAW,KAAKzB,KAAK,EAAE;QAC5CyC,MAAM,GAAGK,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC;MAC7B,CAAC,MAAM;QACLe,MAAM,CAACY,IAAI,CAACP,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAAC;MACjC;IACF;IAEA,IAAIe,MAAM,CAACtC,MAAM,KAAK,CAAC,IAAIuD,OAAO,CAACZ,MAAM,CAAC,IAAIR,KAAK,CAACqB,cAAc,KAAK,UAAU,EAAE;MACjF,MAAM,IAAIC,KAAK,4BAAAC,MAAA,CAA4BvB,KAAK,CAACZ,IAAI,CAAE,CAAC;IAC1D;IACA,IAAIe,MAAM,CAACtC,MAAM,GAAG,CAAC,IAAImC,KAAK,CAACqB,cAAc,KAAK,UAAU,EAAE;MAC5D,MAAM,IAAIC,KAAK,+BAAAC,MAAA,CAA+BvB,KAAK,CAACZ,IAAI,CAAE,CAAC;IAC7D;IAGA,IAAIe,MAAM,CAACtC,MAAM,KAAK,CAAC,EAAE;MACvB,IAAImC,KAAK,CAACwB,QAAQ,EAAE;QAClBb,iBAAiB,CAACX,KAAK,CAACY,MAAM,EAAG,IAAI,EAAEF,IAAI,EAAEO,MAAM,EAAEC,MAAM,CAAC;MAC9D,CAAC,MAAM;QACLR,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,CAACa,IAAI,CAACE,MAAM,CAAC;QACpCP,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,CAACc,IAAI,CAACG,MAAM,CAAC;MACtC;MACA;IACF;IAGA,KAAK,IAAIpD,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGqC,MAAM,CAACtC,MAAM,EAAEC,CAAC,EAAE,EAAE;MACtC,IAAM2D,IAAI,GAAG3D,CAAC,KAAK,CAAC,GAAGmD,MAAM,GAAGjB,KAAK,CAAC0B,SAAS;MAC/C,IAAI1B,KAAK,CAACwB,QAAQ,EAAE;QAClBb,iBAAiB,CAACX,KAAK,CAACY,MAAM,EAAGT,MAAM,CAACrC,CAAC,CAAC,EAAE4C,IAAI,EAAEe,IAAI,EAAEzB,KAAK,CAAC2B,SAAS,CAAC;MAC1E,CAAC,MAAM;QACLjB,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,CAACa,IAAI,CAACU,IAAI,CAAC;QAClCf,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,CAACc,IAAI,CAACf,KAAK,CAAC2B,SAAS,CAAC;QAC7CjB,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACqD,MAAM,CAACY,IAAI,CACzBpF,KAAK,CAACiG,WAAW,CAAE5B,KAAK,CAAC6B,YAAY,IAAI7B,KAAK,CAAC8B,aAAa,EAAI3B,MAAM,CAACrC,CAAC,CAAC,CAC3E,CAAC;MACH;IACF;EACF;AACF;AAqBO,SAASiE,kBAAkBA,CAACpC,MAAqB,EAAEc,MAAqB,EAAmB;EAChG,IAAMuB,OAAwB,GAAG,EAAE;EACnC,KAAK,IAAIlE,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG2C,MAAM,CAACH,QAAQ,EAAExC,CAAC,EAAE,EAAE;IACxCkE,OAAO,CAACjB,IAAI,CAAC,CAAC,CAAC,CAAC;EAClB;EACA,KAAK,IAAMjE,GAAG,IAAI2D,MAAM,CAACb,UAAU,EAAE;IACnC,IAAMA,UAAU,GAAGa,MAAM,CAACb,UAAU,CAAC9C,GAAG,CAAC;IACzC,IAAI8C,UAAU,CAACS,KAAK,EAAE;MACpB4B,iBAAiB,CAACtC,MAAM,EAAEC,UAAU,EAAE9C,GAAG,EAAEkF,OAAO,CAAC;IACrD;EACF;EACA,OAAOA,OAAO;AAChB;AAGA,SAASC,iBAAiBA,CACxBtC,MAAqB,EACrBC,UAAuB,EACvB9C,GAAW,EACXkF,OAAwB,EAClB;EACN,IAAMhC,KAAK,GAAGL,MAAM,CAACuC,SAAS,CAACpF,GAAG,CAAC;EACnC,IAAMqF,MAAM,GAAGxC,MAAM,CAACyC,eAAe,CAACtF,GAAG,CAAC;EAG1C,IAAMuF,OAAiB,GAAG,IAAI3E,KAAK,CAACsC,KAAK,CAAC0B,SAAS,GAAG,CAAC,CAAC,CAACY,IAAI,CAAC,CAAC,CAAC;EAChE,IAAIC,MAAM,GAAG,CAAC;EACd,KAAK,IAAIzE,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG8B,UAAU,CAACS,KAAK,EAAEvC,CAAC,EAAE,EAAE;IACzC,IAAMoD,MAAM,GAAGtB,UAAU,CAACK,OAAO,CAACnC,CAAC,CAAC;IACpC,IAAMmD,MAAM,GAAGrB,UAAU,CAACM,OAAO,CAACpC,CAAC,CAAC;IACpCuE,OAAO,CAACpB,MAAM,CAAC,EAAE;IACjBoB,OAAO,CAACC,IAAI,CAAC,CAAC,EAAErB,MAAM,GAAG,CAAC,CAAC;IAE3B,IAAIuB,MAAM,GAAG,CAAC;IACd,IAAIhC,MAAM,GAAGwB,OAAO,CAACK,OAAO,CAACG,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC;IAAC,IAAAC,UAAA,GAAArF,0BAAA,CAGzB+E,MAAM;MAAAO,MAAA;IAAA;MAAzB,KAAAD,UAAA,CAAAzE,CAAA,MAAA0E,MAAA,GAAAD,UAAA,CAAAxE,CAAA,IAAAC,IAAA,GAA2B;QAAA,IAAhBS,IAAI,GAAA+D,MAAA,CAAAvE,KAAA;QACb,IAAIQ,IAAI,KAAKqB,KAAK,IAAIkB,MAAM,GAAGvC,IAAI,CAACgD,SAAS,EAAE;UAC7C;QACF;QAEA,QAAQhD,IAAI,CAAC0C,cAAc;UACzB,KAAK,UAAU;YACb,IAAI,EAAE1C,IAAI,CAACS,IAAI,IAAIoB,MAAM,CAAC,EAAE;cAE1BA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,GAAG,EAAE;YACxB;YACA,IAAMuD,GAAE,GAAGN,OAAO,CAACG,MAAM,EAAE,CAAC;YAC5B,OAAOhC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAACvB,MAAM,IAAI8E,GAAE,EAAE;cAErCnC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAAC2B,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5B;YACAP,MAAM,GAAGA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAACuD,GAAE,CAAC;YAC9B;UAEF;YACEnC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,GAAGoB,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3CoB,MAAM,GAAGA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC;QAC9B;MACF;IAAC,SAAAV,GAAA;MAAA+D,UAAA,CAAArE,CAAA,CAAAM,GAAA;IAAA;MAAA+D,UAAA,CAAAnE,CAAA;IAAA;IAGD,IAAI4C,MAAM,KAAKlB,KAAK,CAAC2B,SAAS,EAAE;MAC9B,IAAMxD,KAAK,GAAGxC,KAAK,CAACiH,aAAa,CAE/B5C,KAAK,CAAC6B,YAAY,IAAI7B,KAAK,CAAC8B,aAAa,EACzClC,UAAU,CAACO,MAAM,CAACoC,MAAM,CAAC,EACzBvC,KACF,CAAC;MACDuC,MAAM,EAAE;MAER,QAAQvC,KAAK,CAACqB,cAAc;QAC1B,KAAK,UAAU;UACb,IAAI,EAAErB,KAAK,CAACZ,IAAI,IAAIoB,MAAM,CAAC,EAAE;YAE3BA,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,GAAG,EAAE;UACzB;UACA,IAAMuD,EAAE,GAAGN,OAAO,CAACG,MAAM,CAAC;UAC1B,OAAOhC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACvB,MAAM,IAAI8E,EAAE,EAAE;YAEtCnC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAAC2B,IAAI,CAAC,IAAI,CAAC;UAC/B;UACAP,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACuD,EAAE,CAAC,GAAGxE,KAAK;UAC9B;QAEF;UACEqC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,GAAGjB,KAAK;MAC9B;IACF;EACF;AACF"}
|
|
1
|
+
{"version":3,"file":"shred.js","names":["_declare","require","Types","_interopRequireWildcard","_getRequireWildcardCache","nodeInterop","WeakMap","cacheBabelInterop","cacheNodeInterop","obj","__esModule","_typeof","default","cache","has","get","newObj","hasPropertyDescriptor","Object","defineProperty","getOwnPropertyDescriptor","key","prototype","hasOwnProperty","call","desc","set","_createForOfIteratorHelper","o","allowArrayLike","it","Symbol","iterator","Array","isArray","_unsupportedIterableToArray","length","i","F","s","n","done","value","e","_e","f","TypeError","normalCompletion","didErr","err","step","next","_e2","return","minLen","_arrayLikeToArray","toString","slice","constructor","name","from","test","arr","len","arr2","shredBuffer","schema","columnData","_iterator","fieldList","_step","field","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","rowGroup","data","shredRecordFields","fields","_iterator2","_step2","push","apply","rLevel","dLevel","undefined","Boolean","repetitionType","Error","concat","isNested","rlvl","rLevelMax","dLevelMax","toPrimitive","originalType","primitiveType","materializeRows","rows","materializeColumnAsRows","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","_iterator3","_step3","ix","fromPrimitive","materializeColumns","columns","materializeColumnAsColumnarArray","columnName","column","_iterator4","_step4"],"sources":["../../../../src/parquetjs/schema/shred.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ArrayType} from '@loaders.gl/schema';\nimport {ParquetRowGroup, ParquetColumnChunk, ParquetField, ParquetRow} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetRowGroup};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetRowGroup {\n const columnData: Record<string, ParquetColumnChunk> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The rowGroup argument must point to an object into which the shredded record\n * will be returned. You may re-use the rowGroup for repeated calls to this function\n * to append to an existing rowGroup, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the rowGroup is as\n * follows:\n *\n * rowGroup = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(\n schema: ParquetSchema,\n record: ParquetRow,\n rowGroup: ParquetRowGroup\n): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the rowGroup */\n if (rowGroup.rowCount === 0) {\n rowGroup.rowCount = 1;\n rowGroup.columnData = data;\n return;\n }\n rowGroup.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);\n rowGroup.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: ParquetRow,\n data: Record<string, ParquetColumnChunk>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null!, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The rowGroup argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * rowGroup = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRows(schema: ParquetSchema, rowGroup: ParquetRowGroup): ParquetRow[] {\n const rows: ParquetRow[] = [];\n // rows = new Array(rowGroup.rowCount).fill({})'\n for (let i = 0; i < rowGroup.rowCount; i++) {\n rows.push({});\n }\n for (const key in rowGroup.columnData) {\n const columnData = rowGroup.columnData[key];\n if (columnData.count) {\n materializeColumnAsRows(schema, columnData, key, rows);\n }\n }\n return rows;\n}\n\n/** Populate record fields for one column */\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumnAsRows(\n schema: ParquetSchema,\n columnData: ParquetColumnChunk,\n key: string,\n rows: ParquetRow[]\n): void {\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < columnData.count; i++) {\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = rows[rLevels[rIndex++] - 1];\n\n // Internal nodes - Build a nested row object\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node - Add the value\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n }\n}\n\n// Columnar export\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The rowGroup argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * rowGroup = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeColumns(\n schema: ParquetSchema,\n rowGroup: ParquetRowGroup\n): Record<string, ArrayType> {\n const columns: Record<string, ArrayType> = {};\n for (const key in rowGroup.columnData) {\n const columnData = rowGroup.columnData[key];\n if (columnData.count) {\n materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);\n }\n }\n return columns;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumnAsColumnarArray(\n schema: ParquetSchema,\n columnData: ParquetColumnChunk,\n rowCount: number,\n key: string,\n columns: Record<string, ArrayType<any>>\n) {\n if (columnData.count <= 0) {\n return;\n }\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n const columnName = branch[0].name;\n\n let column: ArrayType | undefined;\n const {values} = columnData;\n if (values.length === rowCount && branch[0].primitiveType) {\n // if (branch[0].repetitionType === `REQUIRED`) {\n // switch (branch[0].primitiveType) {\n // case 'INT32': return values instanceof Int32Array ? values : new Int32Array(values);\n // }\n // }\n column = values;\n }\n\n if (column) {\n columns[columnName] = column;\n return;\n }\n\n column = new Array(rowCount);\n for (let i = 0; i < rowCount; i++) {\n column[i] = {};\n }\n columns[columnName] = column;\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < columnData.count; i++) {\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = column[rLevels[rIndex++] - 1] as ParquetRow;\n\n // Internal nodes - Build a nested row object\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node - Add the value\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n }\n\n // Remove one level of nesting\n for (let i = 0; i < rowCount; ++i) {\n if (columnName in (column[i] as object)) {\n column[i] = (column[i] as object)[columnName];\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;AAGA,IAAAA,QAAA,GAAAC,OAAA;AAEA,IAAAC,KAAA,GAAAC,uBAAA,CAAAF,OAAA;AAAiC,SAAAG,yBAAAC,WAAA,eAAAC,OAAA,kCAAAC,iBAAA,OAAAD,OAAA,QAAAE,gBAAA,OAAAF,OAAA,YAAAF,wBAAA,YAAAA,yBAAAC,WAAA,WAAAA,WAAA,GAAAG,gBAAA,GAAAD,iBAAA,KAAAF,WAAA;AAAA,SAAAF,wBAAAM,GAAA,EAAAJ,WAAA,SAAAA,WAAA,IAAAI,GAAA,IAAAA,GAAA,CAAAC,UAAA,WAAAD,GAAA,QAAAA,GAAA,aAAAE,OAAA,CAAAF,GAAA,yBAAAA,GAAA,4BAAAG,OAAA,EAAAH,GAAA,UAAAI,KAAA,GAAAT,wBAAA,CAAAC,WAAA,OAAAQ,KAAA,IAAAA,KAAA,CAAAC,GAAA,CAAAL,GAAA,YAAAI,KAAA,CAAAE,GAAA,CAAAN,GAAA,SAAAO,MAAA,WAAAC,qBAAA,GAAAC,MAAA,CAAAC,cAAA,IAAAD,MAAA,CAAAE,wBAAA,WAAAC,GAAA,IAAAZ,GAAA,QAAAY,GAAA,kBAAAH,MAAA,CAAAI,SAAA,CAAAC,cAAA,CAAAC,IAAA,CAAAf,GAAA,EAAAY,GAAA,SAAAI,IAAA,GAAAR,qBAAA,GAAAC,MAAA,CAAAE,wBAAA,CAAAX,GAAA,EAAAY,GAAA,cAAAI,IAAA,KAAAA,IAAA,CAAAV,GAAA,IAAAU,IAAA,CAAAC,GAAA,KAAAR,MAAA,CAAAC,cAAA,CAAAH,MAAA,EAAAK,GAAA,EAAAI,IAAA,YAAAT,MAAA,CAAAK,GAAA,IAAAZ,GAAA,CAAAY,GAAA,SAAAL,MAAA,CAAAJ,OAAA,GAAAH,GAAA,MAAAI,KAAA,IAAAA,KAAA,CAAAa,GAAA,CAAAjB,GAAA,EAAAO,MAAA,YAAAA,MAAA;AAAA,SAAAW,2BAAAC,CAAA,EAAAC,cAAA,QAAAC,EAAA,UAAAC,MAAA,oBAAAH,CAAA,CAAAG,MAAA,CAAAC,QAAA,KAAAJ,CAAA,qBAAAE,EAAA,QAAAG,KAAA,CAAAC,OAAA,CAAAN,CAAA,MAAAE,EAAA,GAAAK,2BAAA,CAAAP,CAAA,MAAAC,cAAA,IAAAD,CAAA,WAAAA,CAAA,CAAAQ,MAAA,qBAAAN,EAAA,EAAAF,CAAA,GAAAE,EAAA,MAAAO,CAAA,UAAAC,CAAA,YAAAA,EAAA,eAAAC,CAAA,EAAAD,CAAA,EAAAE,CAAA,WAAAA,EAAA,QAAAH,CAAA,IAAAT,CAAA,CAAAQ,MAAA,WAAAK,IAAA,mBAAAA,IAAA,SAAAC,KAAA,EAAAd,CAAA,CAAAS,CAAA,UAAAM,CAAA,WAAAA,EAAAC,EAAA,UAAAA,EAAA,KAAAC,CAAA,EAAAP,CAAA,gBAAAQ,SAAA,iJAAAC,gBAAA,SAAAC,MAAA,UAAAC,GAAA,WAAAV,CAAA,WAAAA,EAAA,IAAAT,EAAA,GAAAA,EAAA,CAAAN,IAAA,CAAAI,CAAA,MAAAY,CAAA,WAAAA,EAAA,QAAAU,IAAA,GAAApB,EAAA,CAAAqB,IAAA,IAAAJ,gBAAA,GAAAG,IAAA,CAAAT,IAAA,SAAAS,IAAA,KAAAP,CAAA,WAAAA,EAAAS,GAAA,IAAAJ,MAAA,SAAAC,GAAA,GAAAG,GAAA,KAAAP,CAAA,WAAAA,EAAA,eAAAE,gBAAA,IAAAjB,EAAA,CAAAuB,MAAA,UAAAvB,EAAA,CAAAuB,MAAA,oBAAAL,MAAA,QAAAC,GAAA;AAAA,SAAAd,4BAAAP,CAAA,EAAA0B,MAAA,SAAA1B,CAAA,qBAAAA,CAAA,sBAAA2B,iBAAA,CAAA3B,CAAA,EAAA0B,MAAA,OAAAd,CAAA,GAAAtB,MAAA,CAAAI,SAAA,CAAAkC,QAAA,CAAAhC,IAAA,CAAAI,CAAA,EAAA6B,KAAA,aAAAjB,CAAA,iBAAAZ,CAAA,CAAA8B,WAAA,EAAAlB,CAAA,GAAAZ,CAAA,CAAA8B,WAAA,CAAAC,IAAA,MAAAnB,CAAA,cAAAA,CAAA,mBAAAP,KAAA,CAAA2B,IAAA,CAAAhC,CAAA,OAAAY,CAAA,+DAAAqB,IAAA,CAAArB,CAAA,UAAAe,iBAAA,CAAA3B,CAAA,EAAA0B,MAAA;AAAA,SAAAC,kBAAAO,GAAA,EAAAC,GAAA,QAAAA,GAAA,YAAAA,GAAA,GAAAD,GAAA,CAAA1B,MAAA,EAAA2B,GAAA,GAAAD,GAAA,CAAA1B,MAAA,WAAAC,CAAA,MAAA2B,IAAA,OAAA/B,KAAA,CAAA8B,GAAA,GAAA1B,CAAA,GAAA0B,GAAA,EAAA1B,CAAA,IAAA2B,IAAA,CAAA3B,CAAA,IAAAyB,GAAA,CAAAzB,CAAA,UAAA2B,IAAA;AAI1B,SAASC,WAAWA,CAACC,MAAqB,EAAmB;EAClE,IAAMC,UAA8C,GAAG,CAAC,CAAC;EAAC,IAAAC,SAAA,GAAAzC,0BAAA,CACtCuC,MAAM,CAACG,SAAS;IAAAC,KAAA;EAAA;IAApC,KAAAF,SAAA,CAAA7B,CAAA,MAAA+B,KAAA,GAAAF,SAAA,CAAA5B,CAAA,IAAAC,IAAA,GAAsC;MAAA,IAA3B8B,KAAK,GAAAD,KAAA,CAAA5B,KAAA;MACdyB,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,GAAG;QACtBmD,OAAO,EAAE,EAAE;QACXC,OAAO,EAAE,EAAE;QACXC,MAAM,EAAE,EAAE;QACVC,WAAW,EAAE,EAAE;QACfC,KAAK,EAAE;MACT,CAAC;IACH;EAAC,SAAA3B,GAAA;IAAAmB,SAAA,CAAAzB,CAAA,CAAAM,GAAA;EAAA;IAAAmB,SAAA,CAAAvB,CAAA;EAAA;EACD,OAAO;IAACgC,QAAQ,EAAE,CAAC;IAAEV,UAAU,EAAVA;EAAU,CAAC;AAClC;AAwBO,SAASW,WAAWA,CACzBZ,MAAqB,EACrBa,MAAkB,EAClBC,QAAyB,EACnB;EAEN,IAAMC,IAAI,GAAGhB,WAAW,CAACC,MAAM,CAAC,CAACC,UAAU;EAE3Ce,iBAAiB,CAAChB,MAAM,CAACiB,MAAM,EAAEJ,MAAM,EAAEE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;EAGpD,IAAID,QAAQ,CAACH,QAAQ,KAAK,CAAC,EAAE;IAC3BG,QAAQ,CAACH,QAAQ,GAAG,CAAC;IACrBG,QAAQ,CAACb,UAAU,GAAGc,IAAI;IAC1B;EACF;EACAD,QAAQ,CAACH,QAAQ,IAAI,CAAC;EAAC,IAAAO,UAAA,GAAAzD,0BAAA,CACHuC,MAAM,CAACG,SAAS;IAAAgB,MAAA;EAAA;IAApC,KAAAD,UAAA,CAAA7C,CAAA,MAAA8C,MAAA,GAAAD,UAAA,CAAA5C,CAAA,IAAAC,IAAA,GAAsC;MAAA,IAA3B8B,KAAK,GAAAc,MAAA,CAAA3C,KAAA;MACdT,KAAK,CAACX,SAAS,CAACgE,IAAI,CAACC,KAAK,CAACP,QAAQ,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,EAAEQ,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,CAAC;MAC3FxC,KAAK,CAACX,SAAS,CAACgE,IAAI,CAACC,KAAK,CAACP,QAAQ,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,EAAES,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,CAAC;MAC3FvC,KAAK,CAACX,SAAS,CAACgE,IAAI,CAACC,KAAK,CAACP,QAAQ,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACqD,MAAM,EAAEO,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACqD,MAAM,CAAC;MACzFM,QAAQ,CAACb,UAAU,CAACI,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK,IAAIK,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK;IAC/D;EAAC,SAAA3B,GAAA;IAAAmC,UAAA,CAAAzC,CAAA,CAAAM,GAAA;EAAA;IAAAmC,UAAA,CAAAvC,CAAA;EAAA;AACH;AAGA,SAASqC,iBAAiBA,CACxBC,MAAoC,EACpCJ,MAAkB,EAClBE,IAAwC,EACxCO,MAAc,EACdC,MAAc,EACd;EACA,KAAK,IAAM9B,IAAI,IAAIwB,MAAM,EAAE;IACzB,IAAMZ,KAAK,GAAGY,MAAM,CAACxB,IAAI,CAAC;IAG1B,IAAIe,MAAa,GAAG,EAAE;IACtB,IACEK,MAAM,IACNR,KAAK,CAACZ,IAAI,IAAIoB,MAAM,IACpBA,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,KAAK+B,SAAS,IAChCX,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,KAAK,IAAI,EAC3B;MACA,IAAIoB,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACD,WAAW,KAAKzB,KAAK,EAAE;QAC5CyC,MAAM,GAAGK,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC;MAC7B,CAAC,MAAM;QACLe,MAAM,CAACY,IAAI,CAACP,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAAC;MACjC;IACF;IAEA,IAAIe,MAAM,CAACtC,MAAM,KAAK,CAAC,IAAIuD,OAAO,CAACZ,MAAM,CAAC,IAAIR,KAAK,CAACqB,cAAc,KAAK,UAAU,EAAE;MACjF,MAAM,IAAIC,KAAK,4BAAAC,MAAA,CAA4BvB,KAAK,CAACZ,IAAI,CAAE,CAAC;IAC1D;IACA,IAAIe,MAAM,CAACtC,MAAM,GAAG,CAAC,IAAImC,KAAK,CAACqB,cAAc,KAAK,UAAU,EAAE;MAC5D,MAAM,IAAIC,KAAK,+BAAAC,MAAA,CAA+BvB,KAAK,CAACZ,IAAI,CAAE,CAAC;IAC7D;IAGA,IAAIe,MAAM,CAACtC,MAAM,KAAK,CAAC,EAAE;MACvB,IAAImC,KAAK,CAACwB,QAAQ,EAAE;QAClBb,iBAAiB,CAACX,KAAK,CAACY,MAAM,EAAG,IAAI,EAAGF,IAAI,EAAEO,MAAM,EAAEC,MAAM,CAAC;MAC/D,CAAC,MAAM;QACLR,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,CAACa,IAAI,CAACE,MAAM,CAAC;QACpCP,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,CAACc,IAAI,CAACG,MAAM,CAAC;MACtC;MACA;IACF;IAGA,KAAK,IAAIpD,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGqC,MAAM,CAACtC,MAAM,EAAEC,CAAC,EAAE,EAAE;MACtC,IAAM2D,IAAI,GAAG3D,CAAC,KAAK,CAAC,GAAGmD,MAAM,GAAGjB,KAAK,CAAC0B,SAAS;MAC/C,IAAI1B,KAAK,CAACwB,QAAQ,EAAE;QAClBb,iBAAiB,CAACX,KAAK,CAACY,MAAM,EAAGT,MAAM,CAACrC,CAAC,CAAC,EAAE4C,IAAI,EAAEe,IAAI,EAAEzB,KAAK,CAAC2B,SAAS,CAAC;MAC1E,CAAC,MAAM;QACLjB,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACuD,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACoD,OAAO,CAACa,IAAI,CAACU,IAAI,CAAC;QAClCf,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACmD,OAAO,CAACc,IAAI,CAACf,KAAK,CAAC2B,SAAS,CAAC;QAC7CjB,IAAI,CAACV,KAAK,CAAClD,GAAG,CAAC,CAACqD,MAAM,CAACY,IAAI,CACzBpF,KAAK,CAACiG,WAAW,CAAE5B,KAAK,CAAC6B,YAAY,IAAI7B,KAAK,CAAC8B,aAAa,EAAI3B,MAAM,CAACrC,CAAC,CAAC,CAC3E,CAAC;MACH;IACF;EACF;AACF;AAqBO,SAASiE,eAAeA,CAACpC,MAAqB,EAAEc,QAAyB,EAAgB;EAC9F,IAAMuB,IAAkB,GAAG,EAAE;EAE7B,KAAK,IAAIlE,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG2C,QAAQ,CAACH,QAAQ,EAAExC,CAAC,EAAE,EAAE;IAC1CkE,IAAI,CAACjB,IAAI,CAAC,CAAC,CAAC,CAAC;EACf;EACA,KAAK,IAAMjE,GAAG,IAAI2D,QAAQ,CAACb,UAAU,EAAE;IACrC,IAAMA,UAAU,GAAGa,QAAQ,CAACb,UAAU,CAAC9C,GAAG,CAAC;IAC3C,IAAI8C,UAAU,CAACS,KAAK,EAAE;MACpB4B,uBAAuB,CAACtC,MAAM,EAAEC,UAAU,EAAE9C,GAAG,EAAEkF,IAAI,CAAC;IACxD;EACF;EACA,OAAOA,IAAI;AACb;AAIA,SAASC,uBAAuBA,CAC9BtC,MAAqB,EACrBC,UAA8B,EAC9B9C,GAAW,EACXkF,IAAkB,EACZ;EACN,IAAMhC,KAAK,GAAGL,MAAM,CAACuC,SAAS,CAACpF,GAAG,CAAC;EACnC,IAAMqF,MAAM,GAAGxC,MAAM,CAACyC,eAAe,CAACtF,GAAG,CAAC;EAG1C,IAAMuF,OAAiB,GAAG,IAAI3E,KAAK,CAACsC,KAAK,CAAC0B,SAAS,GAAG,CAAC,CAAC,CAACY,IAAI,CAAC,CAAC,CAAC;EAChE,IAAIC,MAAM,GAAG,CAAC;EACd,KAAK,IAAIzE,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG8B,UAAU,CAACS,KAAK,EAAEvC,CAAC,EAAE,EAAE;IACzC,IAAMoD,MAAM,GAAGtB,UAAU,CAACK,OAAO,CAACnC,CAAC,CAAC;IACpC,IAAMmD,MAAM,GAAGrB,UAAU,CAACM,OAAO,CAACpC,CAAC,CAAC;IACpCuE,OAAO,CAACpB,MAAM,CAAC,EAAE;IACjBoB,OAAO,CAACC,IAAI,CAAC,CAAC,EAAErB,MAAM,GAAG,CAAC,CAAC;IAE3B,IAAIuB,MAAM,GAAG,CAAC;IACd,IAAIhC,MAAM,GAAGwB,IAAI,CAACK,OAAO,CAACG,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC;IAAC,IAAAC,UAAA,GAAArF,0BAAA,CAGtB+E,MAAM;MAAAO,MAAA;IAAA;MAAzB,KAAAD,UAAA,CAAAzE,CAAA,MAAA0E,MAAA,GAAAD,UAAA,CAAAxE,CAAA,IAAAC,IAAA,GAA2B;QAAA,IAAhBS,IAAI,GAAA+D,MAAA,CAAAvE,KAAA;QACb,IAAIQ,IAAI,KAAKqB,KAAK,IAAIkB,MAAM,GAAGvC,IAAI,CAACgD,SAAS,EAAE;UAC7C;QACF;QAEA,QAAQhD,IAAI,CAAC0C,cAAc;UACzB,KAAK,UAAU;YACb,IAAI,EAAE1C,IAAI,CAACS,IAAI,IAAIoB,MAAM,CAAC,EAAE;cAE1BA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,GAAG,EAAE;YACxB;YACA,IAAMuD,GAAE,GAAGN,OAAO,CAACG,MAAM,EAAE,CAAC;YAC5B,OAAOhC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAACvB,MAAM,IAAI8E,GAAE,EAAE;cAErCnC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAAC2B,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5B;YACAP,MAAM,GAAGA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAACuD,GAAE,CAAC;YAC9B;UAEF;YACEnC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,GAAGoB,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3CoB,MAAM,GAAGA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC;QAC9B;MACF;IAAC,SAAAV,GAAA;MAAA+D,UAAA,CAAArE,CAAA,CAAAM,GAAA;IAAA;MAAA+D,UAAA,CAAAnE,CAAA;IAAA;IAGD,IAAI4C,MAAM,KAAKlB,KAAK,CAAC2B,SAAS,EAAE;MAC9B,IAAMxD,KAAK,GAAGxC,KAAK,CAACiH,aAAa,CAE/B5C,KAAK,CAAC6B,YAAY,IAAI7B,KAAK,CAAC8B,aAAa,EACzClC,UAAU,CAACO,MAAM,CAACoC,MAAM,CAAC,EACzBvC,KACF,CAAC;MACDuC,MAAM,EAAE;MAER,QAAQvC,KAAK,CAACqB,cAAc;QAC1B,KAAK,UAAU;UACb,IAAI,EAAErB,KAAK,CAACZ,IAAI,IAAIoB,MAAM,CAAC,EAAE;YAE3BA,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,GAAG,EAAE;UACzB;UACA,IAAMuD,EAAE,GAAGN,OAAO,CAACG,MAAM,CAAC;UAC1B,OAAOhC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACvB,MAAM,IAAI8E,EAAE,EAAE;YAEtCnC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAAC2B,IAAI,CAAC,IAAI,CAAC;UAC/B;UACAP,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACuD,EAAE,CAAC,GAAGxE,KAAK;UAC9B;QAEF;UACEqC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,GAAGjB,KAAK;MAC9B;IACF;EACF;AACF;AAuBO,SAAS0E,kBAAkBA,CAChClD,MAAqB,EACrBc,QAAyB,EACE;EAC3B,IAAMqC,OAAkC,GAAG,CAAC,CAAC;EAC7C,KAAK,IAAMhG,GAAG,IAAI2D,QAAQ,CAACb,UAAU,EAAE;IACrC,IAAMA,UAAU,GAAGa,QAAQ,CAACb,UAAU,CAAC9C,GAAG,CAAC;IAC3C,IAAI8C,UAAU,CAACS,KAAK,EAAE;MACpB0C,gCAAgC,CAACpD,MAAM,EAAEC,UAAU,EAAEa,QAAQ,CAACH,QAAQ,EAAExD,GAAG,EAAEgG,OAAO,CAAC;IACvF;EACF;EACA,OAAOA,OAAO;AAChB;AAGA,SAASC,gCAAgCA,CACvCpD,MAAqB,EACrBC,UAA8B,EAC9BU,QAAgB,EAChBxD,GAAW,EACXgG,OAAuC,EACvC;EACA,IAAIlD,UAAU,CAACS,KAAK,IAAI,CAAC,EAAE;IACzB;EACF;EAEA,IAAML,KAAK,GAAGL,MAAM,CAACuC,SAAS,CAACpF,GAAG,CAAC;EACnC,IAAMqF,MAAM,GAAGxC,MAAM,CAACyC,eAAe,CAACtF,GAAG,CAAC;EAE1C,IAAMkG,UAAU,GAAGb,MAAM,CAAC,CAAC,CAAC,CAAC/C,IAAI;EAEjC,IAAI6D,MAA6B;EACjC,IAAO9C,MAAM,GAAIP,UAAU,CAApBO,MAAM;EACb,IAAIA,MAAM,CAACtC,MAAM,KAAKyC,QAAQ,IAAI6B,MAAM,CAAC,CAAC,CAAC,CAACL,aAAa,EAAE;IAMzDmB,MAAM,GAAG9C,MAAM;EACjB;EAEA,IAAI8C,MAAM,EAAE;IACVH,OAAO,CAACE,UAAU,CAAC,GAAGC,MAAM;IAC5B;EACF;EAEAA,MAAM,GAAG,IAAIvF,KAAK,CAAC4C,QAAQ,CAAC;EAC5B,KAAK,IAAIxC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGwC,QAAQ,EAAExC,CAAC,EAAE,EAAE;IACjCmF,MAAM,CAACnF,CAAC,CAAC,GAAG,CAAC,CAAC;EAChB;EACAgF,OAAO,CAACE,UAAU,CAAC,GAAGC,MAAM;EAG5B,IAAMZ,OAAiB,GAAG,IAAI3E,KAAK,CAACsC,KAAK,CAAC0B,SAAS,GAAG,CAAC,CAAC,CAACY,IAAI,CAAC,CAAC,CAAC;EAChE,IAAIC,MAAM,GAAG,CAAC;EACd,KAAK,IAAIzE,EAAC,GAAG,CAAC,EAAEA,EAAC,GAAG8B,UAAU,CAACS,KAAK,EAAEvC,EAAC,EAAE,EAAE;IACzC,IAAMoD,MAAM,GAAGtB,UAAU,CAACK,OAAO,CAACnC,EAAC,CAAC;IACpC,IAAMmD,MAAM,GAAGrB,UAAU,CAACM,OAAO,CAACpC,EAAC,CAAC;IACpCuE,OAAO,CAACpB,MAAM,CAAC,EAAE;IACjBoB,OAAO,CAACC,IAAI,CAAC,CAAC,EAAErB,MAAM,GAAG,CAAC,CAAC;IAE3B,IAAIuB,MAAM,GAAG,CAAC;IACd,IAAIhC,MAAM,GAAGyC,MAAM,CAACZ,OAAO,CAACG,MAAM,EAAE,CAAC,GAAG,CAAC,CAAe;IAAC,IAAAU,UAAA,GAAA9F,0BAAA,CAGtC+E,MAAM;MAAAgB,MAAA;IAAA;MAAzB,KAAAD,UAAA,CAAAlF,CAAA,MAAAmF,MAAA,GAAAD,UAAA,CAAAjF,CAAA,IAAAC,IAAA,GAA2B;QAAA,IAAhBS,IAAI,GAAAwE,MAAA,CAAAhF,KAAA;QACb,IAAIQ,IAAI,KAAKqB,KAAK,IAAIkB,MAAM,GAAGvC,IAAI,CAACgD,SAAS,EAAE;UAC7C;QACF;QAEA,QAAQhD,IAAI,CAAC0C,cAAc;UACzB,KAAK,UAAU;YACb,IAAI,EAAE1C,IAAI,CAACS,IAAI,IAAIoB,MAAM,CAAC,EAAE;cAE1BA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,GAAG,EAAE;YACxB;YACA,IAAMuD,IAAE,GAAGN,OAAO,CAACG,MAAM,EAAE,CAAC;YAC5B,OAAOhC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAACvB,MAAM,IAAI8E,IAAE,EAAE;cAErCnC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAAC2B,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5B;YACAP,MAAM,GAAGA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,CAACuD,IAAE,CAAC;YAC9B;UAEF;YACEnC,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,GAAGoB,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3CoB,MAAM,GAAGA,MAAM,CAAC7B,IAAI,CAACS,IAAI,CAAC;QAC9B;MACF;IAAC,SAAAV,GAAA;MAAAwE,UAAA,CAAA9E,CAAA,CAAAM,GAAA;IAAA;MAAAwE,UAAA,CAAA5E,CAAA;IAAA;IAGD,IAAI4C,MAAM,KAAKlB,KAAK,CAAC2B,SAAS,EAAE;MAC9B,IAAMxD,KAAK,GAAGxC,KAAK,CAACiH,aAAa,CAE/B5C,KAAK,CAAC6B,YAAY,IAAI7B,KAAK,CAAC8B,aAAa,EACzClC,UAAU,CAACO,MAAM,CAACoC,MAAM,CAAC,EACzBvC,KACF,CAAC;MACDuC,MAAM,EAAE;MAER,QAAQvC,KAAK,CAACqB,cAAc;QAC1B,KAAK,UAAU;UACb,IAAI,EAAErB,KAAK,CAACZ,IAAI,IAAIoB,MAAM,CAAC,EAAE;YAE3BA,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,GAAG,EAAE;UACzB;UACA,IAAMuD,EAAE,GAAGN,OAAO,CAACG,MAAM,CAAC;UAC1B,OAAOhC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACvB,MAAM,IAAI8E,EAAE,EAAE;YAEtCnC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAAC2B,IAAI,CAAC,IAAI,CAAC;UAC/B;UACAP,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,CAACuD,EAAE,CAAC,GAAGxE,KAAK;UAC9B;QAEF;UACEqC,MAAM,CAACR,KAAK,CAACZ,IAAI,CAAC,GAAGjB,KAAK;MAC9B;IACF;EACF;EAGA,KAAK,IAAIL,GAAC,GAAG,CAAC,EAAEA,GAAC,GAAGwC,QAAQ,EAAE,EAAExC,GAAC,EAAE;IACjC,IAAIkF,UAAU,IAAKC,MAAM,CAACnF,GAAC,CAAY,EAAE;MACvCmF,MAAM,CAACnF,GAAC,CAAC,GAAImF,MAAM,CAACnF,GAAC,CAAC,CAAYkF,UAAU,CAAC;IAC/C;EACF;AACF"}
|
package/dist/esm/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { ParquetLoader as ParquetWorkerLoader } from './parquet-loader';
|
|
1
|
+
import { ParquetLoader as ParquetWorkerLoader, ParquetLoader as ParquetColumnarWorkerLoader } from './parquet-loader';
|
|
3
2
|
import { parseParquet, parseParquetFileInBatches } from './lib/parsers/parse-parquet-to-rows';
|
|
4
3
|
import { parseParquetInColumns, parseParquetFileInColumnarBatches } from './lib/parsers/parse-parquet-to-columns';
|
|
5
|
-
import {
|
|
4
|
+
import { parseParquetWasm } from './lib/wasm/parse-parquet-wasm';
|
|
5
|
+
import { ParquetWasmLoader as ParquetWasmWorkerLoader } from './parquet-wasm-loader';
|
|
6
6
|
export { ParquetWorkerLoader, ParquetWasmWorkerLoader };
|
|
7
7
|
export const ParquetLoader = {
|
|
8
8
|
...ParquetWorkerLoader,
|
|
@@ -10,7 +10,7 @@ export const ParquetLoader = {
|
|
|
10
10
|
parseFileInBatches: parseParquetFileInBatches
|
|
11
11
|
};
|
|
12
12
|
export const ParquetColumnarLoader = {
|
|
13
|
-
...
|
|
13
|
+
...ParquetColumnarWorkerLoader,
|
|
14
14
|
parse: parseParquetInColumns,
|
|
15
15
|
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
16
16
|
};
|
|
@@ -24,7 +24,7 @@ export { preloadCompressions } from './parquetjs/compression';
|
|
|
24
24
|
export { ParquetSchema } from './parquetjs/schema/schema';
|
|
25
25
|
export { ParquetReader } from './parquetjs/parser/parquet-reader';
|
|
26
26
|
export { ParquetEncoder } from './parquetjs/encoder/parquet-encoder';
|
|
27
|
-
export {
|
|
27
|
+
export { convertParquetSchema, convertParquetSchema as convertParquetToArrowSchema } from './lib/arrow/convert-schema-from-parquet';
|
|
28
28
|
export const _typecheckParquetLoader = ParquetLoader;
|
|
29
29
|
export { default as geoJSONSchema } from './lib/geo/geoparquet-schema';
|
|
30
30
|
export { getGeoMetadata, setGeoMetadata, unpackGeoMetadata } from './lib/geo/decode-geo-metadata';
|
package/dist/esm/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","names":["
|
|
1
|
+
{"version":3,"file":"index.js","names":["ParquetLoader","ParquetWorkerLoader","ParquetColumnarWorkerLoader","parseParquet","parseParquetFileInBatches","parseParquetInColumns","parseParquetFileInColumnarBatches","parseParquetWasm","ParquetWasmLoader","ParquetWasmWorkerLoader","parse","parseFileInBatches","ParquetColumnarLoader","ParquetWriter","_ParquetWriter","ParquetWasmWriter","preloadCompressions","ParquetSchema","ParquetReader","ParquetEncoder","convertParquetSchema","convertParquetToArrowSchema","_typecheckParquetLoader","default","geoJSONSchema","getGeoMetadata","setGeoMetadata","unpackGeoMetadata"],"sources":["../../src/index.ts"],"sourcesContent":["// loaders.gl, MIT license\n\nimport type {LoaderWithParser} from '@loaders.gl/loader-utils';\nimport type {\n ObjectRowTable,\n ObjectRowTableBatch,\n ColumnarTable,\n ColumnarTableBatch\n} from '@loaders.gl/schema';\nimport type {Table as ArrowTable} from 'apache-arrow';\n\n// ParquetLoader\n\nimport {\n ParquetLoader as ParquetWorkerLoader,\n ParquetLoader as ParquetColumnarWorkerLoader,\n ParquetLoaderOptions\n} from './parquet-loader';\nimport {parseParquet, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-rows';\nimport {\n parseParquetInColumns,\n parseParquetFileInColumnarBatches\n} from './lib/parsers/parse-parquet-to-columns';\n\nimport {parseParquetWasm, ParquetWasmLoaderOptions} from './lib/wasm/parse-parquet-wasm';\nimport {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';\n\nexport {ParquetWorkerLoader, ParquetWasmWorkerLoader};\n\n/** ParquetJS table loader */\nexport const ParquetLoader: LoaderWithParser<\n ObjectRowTable,\n ObjectRowTableBatch,\n ParquetLoaderOptions\n> = {\n ...ParquetWorkerLoader,\n parse: parseParquet,\n parseFileInBatches: parseParquetFileInBatches\n};\n\n/** ParquetJS table loader */\n// @ts-expect-error\nexport const ParquetColumnarLoader: LoaderWithParser<\n ColumnarTable,\n ColumnarTableBatch,\n ParquetLoaderOptions\n> = {\n ...ParquetColumnarWorkerLoader,\n parse: parseParquetInColumns,\n parseFileInBatches: parseParquetFileInColumnarBatches\n};\n\nexport const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {\n ...ParquetWasmWorkerLoader,\n parse: parseParquetWasm\n};\n\n// ParquetWriter\n\nexport {ParquetWriter as _ParquetWriter} from './parquet-writer';\nexport {ParquetWasmWriter} from './parquet-wasm-writer';\n\n// EXPERIMENTAL - expose the internal parquetjs API\n\nexport {preloadCompressions} from './parquetjs/compression';\n\nexport {ParquetSchema} from './parquetjs/schema/schema';\nexport {ParquetReader} from './parquetjs/parser/parquet-reader';\nexport {ParquetEncoder} from './parquetjs/encoder/parquet-encoder';\n\nexport {\n convertParquetSchema,\n convertParquetSchema as convertParquetToArrowSchema\n} from './lib/arrow/convert-schema-from-parquet';\n\n// TESTS\nexport const _typecheckParquetLoader: LoaderWithParser = ParquetLoader;\n\n// Geo Metadata\nexport {default as geoJSONSchema} from './lib/geo/geoparquet-schema';\n\nexport type {GeoMetadata} from './lib/geo/decode-geo-metadata';\nexport {getGeoMetadata, setGeoMetadata, unpackGeoMetadata} from './lib/geo/decode-geo-metadata';\n"],"mappings":"AAaA,SACEA,aAAa,IAAIC,mBAAmB,EACpCD,aAAa,IAAIE,2BAA2B,QAEvC,kBAAkB;AACzB,SAAQC,YAAY,EAAEC,yBAAyB,QAAO,qCAAqC;AAC3F,SACEC,qBAAqB,EACrBC,iCAAiC,QAC5B,wCAAwC;AAE/C,SAAQC,gBAAgB,QAAiC,+BAA+B;AACxF,SAAQC,iBAAiB,IAAIC,uBAAuB,QAAO,uBAAuB;AAElF,SAAQR,mBAAmB,EAAEQ,uBAAuB;AAGpD,OAAO,MAAMT,aAIZ,GAAG;EACF,GAAGC,mBAAmB;EACtBS,KAAK,EAAEP,YAAY;EACnBQ,kBAAkB,EAAEP;AACtB,CAAC;AAID,OAAO,MAAMQ,qBAIZ,GAAG;EACF,GAAGV,2BAA2B;EAC9BQ,KAAK,EAAEL,qBAAqB;EAC5BM,kBAAkB,EAAEL;AACtB,CAAC;AAED,OAAO,MAAME,iBAAgF,GAAG;EAC9F,GAAGC,uBAAuB;EAC1BC,KAAK,EAAEH;AACT,CAAC;AAID,SAAQM,aAAa,IAAIC,cAAc,QAAO,kBAAkB;AAChE,SAAQC,iBAAiB,QAAO,uBAAuB;AAIvD,SAAQC,mBAAmB,QAAO,yBAAyB;AAE3D,SAAQC,aAAa,QAAO,2BAA2B;AACvD,SAAQC,aAAa,QAAO,mCAAmC;AAC/D,SAAQC,cAAc,QAAO,qCAAqC;AAElE,SACEC,oBAAoB,EACpBA,oBAAoB,IAAIC,2BAA2B,QAC9C,yCAAyC;AAGhD,OAAO,MAAMC,uBAAyC,GAAGtB,aAAa;AAGtE,SAAQuB,OAAO,IAAIC,aAAa,QAAO,6BAA6B;AAGpE,SAAQC,cAAc,EAAEC,cAAc,EAAEC,iBAAiB,QAAO,+BAA+B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"convert-row-group-to-columns.js","names":["convertParquetRowGroupToColumns","schema","rowGroup","columns","columnName","data","Object","entries","columnData","values"],"sources":["../../../../src/lib/arrow/convert-row-group-to-columns.ts"],"sourcesContent":["// loaders.gl, MIT license\n\nimport {Schema} from '@loaders.gl/schema';\nimport {
|
|
1
|
+
{"version":3,"file":"convert-row-group-to-columns.js","names":["convertParquetRowGroupToColumns","schema","rowGroup","columns","columnName","data","Object","entries","columnData","values"],"sources":["../../../../src/lib/arrow/convert-row-group-to-columns.ts"],"sourcesContent":["// loaders.gl, MIT license\n\nimport {Schema} from '@loaders.gl/schema';\nimport {ParquetRowGroup} from '@loaders.gl/parquet/parquetjs/schema/declare';\n\nexport function convertParquetRowGroupToColumns(\n schema: Schema,\n rowGroup: ParquetRowGroup\n): Record<string, any[]> {\n const columns: Record<string, any[]> = {};\n for (const [columnName, data] of Object.entries(rowGroup.columnData)) {\n columns[columnName] = columns[columnName] || data.values;\n }\n return columns;\n}\n"],"mappings":"AAKA,OAAO,SAASA,+BAA+BA,CAC7CC,MAAc,EACdC,QAAyB,EACF;EACvB,MAAMC,OAA8B,GAAG,CAAC,CAAC;EACzC,KAAK,MAAM,CAACC,UAAU,EAAEC,IAAI,CAAC,IAAIC,MAAM,CAACC,OAAO,CAACL,QAAQ,CAACM,UAAU,CAAC,EAAE;IACpEL,OAAO,CAACC,UAAU,CAAC,GAAGD,OAAO,CAACC,UAAU,CAAC,IAAIC,IAAI,CAACI,MAAM;EAC1D;EACA,OAAON,OAAO;AAChB"}
|
|
@@ -1,77 +1,93 @@
|
|
|
1
|
-
import { Schema, Struct, Field, Bool, Float64, Int32, Float32, Binary, Utf8, Int64, Uint16, Uint32, Uint64, Int8, Int16 } from '@loaders.gl/schema';
|
|
2
1
|
export const PARQUET_TYPE_MAPPING = {
|
|
3
|
-
BOOLEAN:
|
|
4
|
-
INT32:
|
|
5
|
-
INT64:
|
|
6
|
-
INT96:
|
|
7
|
-
FLOAT:
|
|
8
|
-
DOUBLE:
|
|
9
|
-
BYTE_ARRAY:
|
|
10
|
-
FIXED_LEN_BYTE_ARRAY:
|
|
11
|
-
UTF8:
|
|
12
|
-
DATE:
|
|
13
|
-
TIME_MILLIS:
|
|
14
|
-
TIME_MICROS:
|
|
15
|
-
TIMESTAMP_MILLIS:
|
|
16
|
-
TIMESTAMP_MICROS:
|
|
17
|
-
UINT_8:
|
|
18
|
-
UINT_16:
|
|
19
|
-
UINT_32:
|
|
20
|
-
UINT_64:
|
|
21
|
-
INT_8:
|
|
22
|
-
INT_16:
|
|
23
|
-
INT_32:
|
|
24
|
-
INT_64:
|
|
25
|
-
JSON:
|
|
26
|
-
BSON:
|
|
27
|
-
INTERVAL:
|
|
28
|
-
DECIMAL_INT32:
|
|
29
|
-
DECIMAL_INT64:
|
|
30
|
-
DECIMAL_BYTE_ARRAY:
|
|
31
|
-
DECIMAL_FIXED_LEN_BYTE_ARRAY:
|
|
2
|
+
BOOLEAN: 'bool',
|
|
3
|
+
INT32: 'int32',
|
|
4
|
+
INT64: 'float64',
|
|
5
|
+
INT96: 'float64',
|
|
6
|
+
FLOAT: 'float32',
|
|
7
|
+
DOUBLE: 'float64',
|
|
8
|
+
BYTE_ARRAY: 'binary',
|
|
9
|
+
FIXED_LEN_BYTE_ARRAY: 'binary',
|
|
10
|
+
UTF8: 'utf8',
|
|
11
|
+
DATE: 'int32',
|
|
12
|
+
TIME_MILLIS: 'int64',
|
|
13
|
+
TIME_MICROS: 'int64',
|
|
14
|
+
TIMESTAMP_MILLIS: 'int64',
|
|
15
|
+
TIMESTAMP_MICROS: 'int64',
|
|
16
|
+
UINT_8: 'int32',
|
|
17
|
+
UINT_16: 'uint16',
|
|
18
|
+
UINT_32: 'uint32',
|
|
19
|
+
UINT_64: 'uint64',
|
|
20
|
+
INT_8: 'int8',
|
|
21
|
+
INT_16: 'int16',
|
|
22
|
+
INT_32: 'int32',
|
|
23
|
+
INT_64: 'int64',
|
|
24
|
+
JSON: 'binary',
|
|
25
|
+
BSON: 'binary',
|
|
26
|
+
INTERVAL: 'binary',
|
|
27
|
+
DECIMAL_INT32: 'float32',
|
|
28
|
+
DECIMAL_INT64: 'float64',
|
|
29
|
+
DECIMAL_BYTE_ARRAY: 'float64',
|
|
30
|
+
DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
|
|
32
31
|
};
|
|
33
|
-
export function
|
|
32
|
+
export function convertParquetSchema(parquetSchema, parquetMetadata) {
|
|
34
33
|
const fields = getFields(parquetSchema.schema);
|
|
35
34
|
const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
|
|
36
|
-
|
|
35
|
+
const schema = {
|
|
36
|
+
fields,
|
|
37
|
+
metadata: metadata || {}
|
|
38
|
+
};
|
|
39
|
+
return schema;
|
|
37
40
|
}
|
|
38
41
|
function getFields(schema) {
|
|
39
42
|
const fields = [];
|
|
40
43
|
for (const name in schema) {
|
|
41
44
|
const field = schema[name];
|
|
42
45
|
if (field.fields) {
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
const children = getFields(field.fields);
|
|
47
|
+
fields.push({
|
|
48
|
+
name,
|
|
49
|
+
type: {
|
|
50
|
+
type: 'struct',
|
|
51
|
+
children
|
|
52
|
+
},
|
|
53
|
+
nullable: field.optional
|
|
54
|
+
});
|
|
46
55
|
} else {
|
|
47
|
-
const
|
|
56
|
+
const type = PARQUET_TYPE_MAPPING[field.type];
|
|
48
57
|
const metadata = getFieldMetadata(field);
|
|
49
|
-
const arrowField =
|
|
58
|
+
const arrowField = {
|
|
59
|
+
name,
|
|
60
|
+
type,
|
|
61
|
+
nullable: field.optional,
|
|
62
|
+
metadata
|
|
63
|
+
};
|
|
50
64
|
fields.push(arrowField);
|
|
51
65
|
}
|
|
52
66
|
}
|
|
53
67
|
return fields;
|
|
54
68
|
}
|
|
55
69
|
function getFieldMetadata(field) {
|
|
56
|
-
|
|
70
|
+
let metadata;
|
|
57
71
|
for (const key in field) {
|
|
58
72
|
if (key !== 'name') {
|
|
59
73
|
let value = field[key] || '';
|
|
60
74
|
value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
|
|
61
|
-
metadata
|
|
75
|
+
metadata = metadata || {};
|
|
76
|
+
metadata[key] = value;
|
|
62
77
|
}
|
|
63
78
|
}
|
|
64
79
|
return metadata;
|
|
65
80
|
}
|
|
66
81
|
function getSchemaMetadata(parquetMetadata) {
|
|
67
|
-
|
|
82
|
+
let metadata;
|
|
68
83
|
const keyValueList = parquetMetadata.key_value_metadata || [];
|
|
69
84
|
for (const {
|
|
70
85
|
key,
|
|
71
86
|
value
|
|
72
87
|
} of keyValueList) {
|
|
73
88
|
if (typeof value === 'string') {
|
|
74
|
-
metadata
|
|
89
|
+
metadata = metadata || {};
|
|
90
|
+
metadata[key] = value;
|
|
75
91
|
}
|
|
76
92
|
}
|
|
77
93
|
return metadata;
|