@loaders.gl/parquet 3.4.6 → 4.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +27 -34
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +6 -6
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +58 -42
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +33 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/es5/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +11 -7
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +51 -29
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +6 -6
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/es5/parquet-loader.js +16 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +4 -4
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +7 -7
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +117 -22
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/index.js +5 -5
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js +57 -41
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +33 -31
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/esm/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +12 -8
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +14 -3
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +3 -3
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/esm/parquet-loader.js +14 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +2 -2
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +1 -1
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +6 -6
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +108 -21
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/index.d.ts +8 -49
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -6
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +2 -2
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +4 -4
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.js +48 -44
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.js +30 -31
- package/dist/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +2 -2
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +13 -7
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +3 -2
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-rows.js +16 -19
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +3 -3
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +3 -3
- package/dist/parquet-loader.d.ts +3 -14
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +14 -2
- package/dist/parquet-worker.js +31 -38
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.d.ts +2 -1
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.d.ts +4 -4
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.d.ts +2 -2
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.d.ts +6 -6
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +1 -1
- package/dist/parquetjs/schema/declare.d.ts +6 -5
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +3 -3
- package/dist/parquetjs/schema/schema.d.ts +4 -4
- package/dist/parquetjs/schema/schema.d.ts.map +1 -1
- package/dist/parquetjs/schema/schema.js +5 -5
- package/dist/parquetjs/schema/shred.d.ts +17 -111
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +127 -119
- package/package.json +8 -8
- package/src/index.ts +32 -9
- package/src/lib/arrow/convert-row-group-to-columns.ts +2 -2
- package/src/lib/arrow/convert-schema-from-parquet.ts +56 -66
- package/src/lib/arrow/convert-schema-to-parquet.ts +32 -44
- package/src/lib/geo/decode-geo-metadata.ts +17 -8
- package/src/lib/parsers/parse-parquet-to-columns.ts +22 -11
- package/src/lib/parsers/parse-parquet-to-rows.ts +28 -23
- package/src/lib/wasm/parse-parquet-wasm.ts +7 -7
- package/src/parquet-loader.ts +25 -2
- package/src/parquet-writer.ts +4 -1
- package/src/parquetjs/encoder/parquet-encoder.ts +11 -10
- package/src/parquetjs/parser/decoders.ts +3 -3
- package/src/parquetjs/parser/parquet-reader.ts +7 -7
- package/src/parquetjs/schema/declare.ts +6 -5
- package/src/parquetjs/schema/schema.ts +8 -8
- package/src/parquetjs/schema/shred.ts +142 -103
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ParquetRowGroup } from './declare';
|
|
2
2
|
import * as Types from './types';
|
|
3
|
-
export {
|
|
3
|
+
export { ParquetRowGroup };
|
|
4
4
|
export function shredBuffer(schema) {
|
|
5
5
|
const columnData = {};
|
|
6
6
|
for (const field of schema.fieldList) {
|
|
@@ -17,20 +17,20 @@ export function shredBuffer(schema) {
|
|
|
17
17
|
columnData
|
|
18
18
|
};
|
|
19
19
|
}
|
|
20
|
-
export function shredRecord(schema, record,
|
|
20
|
+
export function shredRecord(schema, record, rowGroup) {
|
|
21
21
|
const data = shredBuffer(schema).columnData;
|
|
22
22
|
shredRecordFields(schema.fields, record, data, 0, 0);
|
|
23
|
-
if (
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
if (rowGroup.rowCount === 0) {
|
|
24
|
+
rowGroup.rowCount = 1;
|
|
25
|
+
rowGroup.columnData = data;
|
|
26
26
|
return;
|
|
27
27
|
}
|
|
28
|
-
|
|
28
|
+
rowGroup.rowCount += 1;
|
|
29
29
|
for (const field of schema.fieldList) {
|
|
30
|
-
Array.prototype.push.apply(
|
|
31
|
-
Array.prototype.push.apply(
|
|
32
|
-
Array.prototype.push.apply(
|
|
33
|
-
|
|
30
|
+
Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);
|
|
31
|
+
Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);
|
|
32
|
+
Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);
|
|
33
|
+
rowGroup.columnData[field.key].count += data[field.key].count;
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
@@ -73,20 +73,20 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
|
73
73
|
}
|
|
74
74
|
}
|
|
75
75
|
}
|
|
76
|
-
export function
|
|
77
|
-
const
|
|
78
|
-
for (let i = 0; i <
|
|
79
|
-
|
|
76
|
+
export function materializeRows(schema, rowGroup) {
|
|
77
|
+
const rows = [];
|
|
78
|
+
for (let i = 0; i < rowGroup.rowCount; i++) {
|
|
79
|
+
rows.push({});
|
|
80
80
|
}
|
|
81
|
-
for (const key in
|
|
82
|
-
const columnData =
|
|
81
|
+
for (const key in rowGroup.columnData) {
|
|
82
|
+
const columnData = rowGroup.columnData[key];
|
|
83
83
|
if (columnData.count) {
|
|
84
|
-
|
|
84
|
+
materializeColumnAsRows(schema, columnData, key, rows);
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
|
-
return
|
|
87
|
+
return rows;
|
|
88
88
|
}
|
|
89
|
-
function
|
|
89
|
+
function materializeColumnAsRows(schema, columnData, key, rows) {
|
|
90
90
|
const field = schema.findField(key);
|
|
91
91
|
const branch = schema.findFieldBranch(key);
|
|
92
92
|
const rLevels = new Array(field.rLevelMax + 1).fill(0);
|
|
@@ -97,7 +97,7 @@ function materializeColumn(schema, columnData, key, records) {
|
|
|
97
97
|
rLevels[rLevel]++;
|
|
98
98
|
rLevels.fill(0, rLevel + 1);
|
|
99
99
|
let rIndex = 0;
|
|
100
|
-
let record =
|
|
100
|
+
let record = rows[rLevels[rIndex++] - 1];
|
|
101
101
|
for (const step of branch) {
|
|
102
102
|
if (step === field || dLevel < step.dLevelMax) {
|
|
103
103
|
break;
|
|
@@ -138,4 +138,91 @@ function materializeColumn(schema, columnData, key, records) {
|
|
|
138
138
|
}
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
|
+
export function materializeColumns(schema, rowGroup) {
|
|
142
|
+
const columns = {};
|
|
143
|
+
for (const key in rowGroup.columnData) {
|
|
144
|
+
const columnData = rowGroup.columnData[key];
|
|
145
|
+
if (columnData.count) {
|
|
146
|
+
materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return columns;
|
|
150
|
+
}
|
|
151
|
+
function materializeColumnAsColumnarArray(schema, columnData, rowCount, key, columns) {
|
|
152
|
+
if (columnData.count <= 0) {
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
const field = schema.findField(key);
|
|
156
|
+
const branch = schema.findFieldBranch(key);
|
|
157
|
+
const columnName = branch[0].name;
|
|
158
|
+
let column;
|
|
159
|
+
const {
|
|
160
|
+
values
|
|
161
|
+
} = columnData;
|
|
162
|
+
if (values.length === rowCount && branch[0].primitiveType) {
|
|
163
|
+
column = values;
|
|
164
|
+
}
|
|
165
|
+
if (column) {
|
|
166
|
+
columns[columnName] = column;
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
column = new Array(rowCount);
|
|
170
|
+
for (let i = 0; i < rowCount; i++) {
|
|
171
|
+
column[i] = {};
|
|
172
|
+
}
|
|
173
|
+
columns[columnName] = column;
|
|
174
|
+
const rLevels = new Array(field.rLevelMax + 1).fill(0);
|
|
175
|
+
let vIndex = 0;
|
|
176
|
+
for (let i = 0; i < columnData.count; i++) {
|
|
177
|
+
const dLevel = columnData.dlevels[i];
|
|
178
|
+
const rLevel = columnData.rlevels[i];
|
|
179
|
+
rLevels[rLevel]++;
|
|
180
|
+
rLevels.fill(0, rLevel + 1);
|
|
181
|
+
let rIndex = 0;
|
|
182
|
+
let record = column[rLevels[rIndex++] - 1];
|
|
183
|
+
for (const step of branch) {
|
|
184
|
+
if (step === field || dLevel < step.dLevelMax) {
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
switch (step.repetitionType) {
|
|
188
|
+
case 'REPEATED':
|
|
189
|
+
if (!(step.name in record)) {
|
|
190
|
+
record[step.name] = [];
|
|
191
|
+
}
|
|
192
|
+
const ix = rLevels[rIndex++];
|
|
193
|
+
while (record[step.name].length <= ix) {
|
|
194
|
+
record[step.name].push({});
|
|
195
|
+
}
|
|
196
|
+
record = record[step.name][ix];
|
|
197
|
+
break;
|
|
198
|
+
default:
|
|
199
|
+
record[step.name] = record[step.name] || {};
|
|
200
|
+
record = record[step.name];
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (dLevel === field.dLevelMax) {
|
|
204
|
+
const value = Types.fromPrimitive(field.originalType || field.primitiveType, columnData.values[vIndex], field);
|
|
205
|
+
vIndex++;
|
|
206
|
+
switch (field.repetitionType) {
|
|
207
|
+
case 'REPEATED':
|
|
208
|
+
if (!(field.name in record)) {
|
|
209
|
+
record[field.name] = [];
|
|
210
|
+
}
|
|
211
|
+
const ix = rLevels[rIndex];
|
|
212
|
+
while (record[field.name].length <= ix) {
|
|
213
|
+
record[field.name].push(null);
|
|
214
|
+
}
|
|
215
|
+
record[field.name][ix] = value;
|
|
216
|
+
break;
|
|
217
|
+
default:
|
|
218
|
+
record[field.name] = value;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
for (let i = 0; i < rowCount; ++i) {
|
|
223
|
+
if (columnName in column[i]) {
|
|
224
|
+
column[i] = column[i][columnName];
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
141
228
|
//# sourceMappingURL=shred.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shred.js","names":["ParquetBuffer","Types","shredBuffer","schema","columnData","field","fieldList","key","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","concat","isNested","i","rlvl","rLevelMax","dLevelMax","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"sources":["../../../../src/parquetjs/schema/shred.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) {\n records.push({});\n }\n for (const key in buffer.columnData) {\n const columnData = buffer.columnData[key];\n if (columnData.count) {\n materializeColumn(schema, columnData, key, records);\n }\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n columnData: ParquetData,\n key: string,\n records: ParquetRecord[]\n): void {\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < columnData.count; i++) {\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes - Build a nested row object\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node - Add the value\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n }\n}\n\n// Columnar export\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n *\nexport function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {\n const columns: ParquetRecord = {};\n for (const key in buffer.columnData) {\n const columnData = buffer.columnData[key];\n if (columnData.count) {\n extractColumn(schema, columnData, key, columns);\n }\n }\n return columns;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction extractColumn(\n schema: ParquetSchema,\n columnData: ParquetData,\n key: string,\n columns: Record<string, unknown> \n) {\n if (columnData.count <= 0) {\n return;\n }\n\n const record = columns;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n\n let i = 0;\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n}\n*/\n"],"mappings":"AAEA,SAAQA,aAAa,QAAiD,WAAW;AAEjF,OAAO,KAAKC,KAAK,MAAM,SAAS;AAEhC,SAAQD,aAAa;AAErB,OAAO,SAASE,WAAWA,CAACC,MAAqB,EAAiB;EAChE,MAAMC,UAAuC,GAAG,CAAC,CAAC;EAClD,KAAK,MAAMC,KAAK,IAAIF,MAAM,CAACG,SAAS,EAAE;IACpCF,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,GAAG;MACtBC,OAAO,EAAE,EAAE;MACXC,OAAO,EAAE,EAAE;MACXC,MAAM,EAAE,EAAE;MACVC,WAAW,EAAE,EAAE;MACfC,KAAK,EAAE;IACT,CAAC;EACH;EACA,OAAO;IAACC,QAAQ,EAAE,CAAC;IAAET;EAAU,CAAC;AAClC;AAwBA,OAAO,SAASU,WAAWA,CAACX,MAAqB,EAAEY,MAAW,EAAEC,MAAqB,EAAQ;EAE3F,MAAMC,IAAI,GAAGf,WAAW,CAACC,MAAM,CAAC,CAACC,UAAU;EAE3Cc,iBAAiB,CAACf,MAAM,CAACgB,MAAM,EAAEJ,MAAM,EAAEE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;EAGpD,IAAID,MAAM,CAACH,QAAQ,KAAK,CAAC,EAAE;IACzBG,MAAM,CAACH,QAAQ,GAAG,CAAC;IACnBG,MAAM,CAACZ,UAAU,GAAGa,IAAI;IACxB;EACF;EACAD,MAAM,CAACH,QAAQ,IAAI,CAAC;EACpB,KAAK,MAAMR,KAAK,IAAIF,MAAM,CAACG,SAAS,EAAE;IACpCc,KAAK,CAACC,SAAS,CAACC,IAAI,CAACC,KAAK,CAACP,MAAM,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,EAAEQ,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,CAAC;IACzFW,KAAK,CAACC,SAAS,CAACC,IAAI,CAACC,KAAK,CAACP,MAAM,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,EAAES,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,CAAC;IACzFY,KAAK,CAACC,SAAS,CAACC,IAAI,CAACC,KAAK,CAACP,MAAM,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACG,MAAM,EAAEO,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACG,MAAM,CAAC;IACvFM,MAAM,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK,IAAIK,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK;EAC7D;AACF;AAGA,SAASM,iBAAiBA,CACxBC,MAAoC,EACpCJ,MAAW,EACXE,IAAiC,EACjCO,MAAc,EACdC,MAAc,EACd;EACA,KAAK,MAAMC,IAAI,IAAIP,MAAM,EAAE;IACzB,MAAMd,KAAK,GAAGc,MAAM,CAACO,IAAI,CAAC;IAG1B,IAAIhB,MAAa,GAAG,EAAE;IACtB,IACEK,MAAM,IACNV,KAAK,CAACqB,IAAI,IAAIX,MAAM,IACpBA,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,KAAKC,SAAS,IAChCZ,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,KAAK,IAAI,EAC3B;MACA,IAAIX,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACE,WAAW,KAAKR,KAAK,EAAE;QAC5CV,MAAM,GAAGK,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC;MAC7B,CAAC,MAAM;QACLhB,MAAM,CAACY,IAAI,CAACP,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAAC;MACjC;IACF;IAEA,IAAIhB,MAAM,CAACmB,MAAM,KAAK,CAAC,IAAIC,OAAO,CAACf,MAAM,CAAC,IAAIV,KAAK,CAAC0B,cAAc,KAAK,UAAU,EAAE;MACjF,MAAM,IAAIC,KAAK,4BAAAC,MAAA,CAA4B5B,KAAK,CAACqB,IAAI,CAAE,CAAC;IAC1D;IACA,IAAIhB,MAAM,CAACmB,MAAM,GAAG,CAAC,IAAIxB,KAAK,CAAC0B,cAAc,KAAK,UAAU,EAAE;MAC5D,MAAM,IAAIC,KAAK,+BAAAC,MAAA,CAA+B5B,KAAK,CAACqB,IAAI,CAAE,CAAC;IAC7D;IAGA,IAAIhB,MAAM,CAACmB,MAAM,KAAK,CAAC,EAAE;MACvB,IAAIxB,KAAK,CAAC6B,QAAQ,EAAE;QAClBhB,iBAAiB,CAACb,KAAK,CAACc,MAAM,EAAG,IAAI,EAAEF,IAAI,EAAEO,MAAM,EAAEC,MAAM,CAAC;MAC9D,CAAC,MAAM;QACLR,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,CAACa,IAAI,CAACE,MAAM,CAAC;QACpCP,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,CAACc,IAAI,CAACG,MAAM,CAAC;MACtC;MACA;IACF;IAGA,KAAK,IAAIU,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGzB,MAAM,CAACmB,MAAM,EAAEM,CAAC,EAAE,EAAE;MACtC,MAAMC,IAAI,GAAGD,CAAC,KAAK,CAAC,GAAGX,MAAM,GAAGnB,KAAK,CAACgC,SAAS;MAC/C,IAAIhC,KAAK,CAAC6B,QAAQ,EAAE;QAClBhB,iBAAiB,CAACb,KAAK,CAACc,MAAM,EAAGT,MAAM,CAACyB,CAAC,CAAC,EAAElB,IAAI,EAAEmB,IAAI,EAAE/B,KAAK,CAACiC,SAAS,CAAC;MAC1E,CAAC,MAAM;QACLrB,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,CAACa,IAAI,CAACc,IAAI,CAAC;QAClCnB,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,CAACc,IAAI,CAACjB,KAAK,CAACiC,SAAS,CAAC;QAC7CrB,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACG,MAAM,CAACY,IAAI,CACzBrB,KAAK,CAACsC,WAAW,CAAElC,KAAK,CAACmC,YAAY,IAAInC,KAAK,CAACoC,aAAa,EAAI/B,MAAM,CAACyB,CAAC,CAAC,CAC3E,CAAC;MACH;IACF;EACF;AACF;AAqBA,OAAO,SAASO,kBAAkBA,CAACvC,MAAqB,EAAEa,MAAqB,EAAmB;EAChG,MAAM2B,OAAwB,GAAG,EAAE;EACnC,KAAK,IAAIR,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGnB,MAAM,CAACH,QAAQ,EAAEsB,CAAC,EAAE,EAAE;IACxCQ,OAAO,CAACrB,IAAI,CAAC,CAAC,CAAC,CAAC;EAClB;EACA,KAAK,MAAMf,GAAG,IAAIS,MAAM,CAACZ,UAAU,EAAE;IACnC,MAAMA,UAAU,GAAGY,MAAM,CAACZ,UAAU,CAACG,GAAG,CAAC;IACzC,IAAIH,UAAU,CAACQ,KAAK,EAAE;MACpBgC,iBAAiB,CAACzC,MAAM,EAAEC,UAAU,EAAEG,GAAG,EAAEoC,OAAO,CAAC;IACrD;EACF;EACA,OAAOA,OAAO;AAChB;AAGA,SAASC,iBAAiBA,CACxBzC,MAAqB,EACrBC,UAAuB,EACvBG,GAAW,EACXoC,OAAwB,EAClB;EACN,MAAMtC,KAAK,GAAGF,MAAM,CAAC0C,SAAS,CAACtC,GAAG,CAAC;EACnC,MAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAe,CAACxC,GAAG,CAAC;EAG1C,MAAMyC,OAAiB,GAAG,IAAI5B,KAAK,CAACf,KAAK,CAACgC,SAAS,GAAG,CAAC,CAAC,CAACY,IAAI,CAAC,CAAC,CAAC;EAChE,IAAIC,MAAM,GAAG,CAAC;EACd,KAAK,IAAIf,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG/B,UAAU,CAACQ,KAAK,EAAEuB,CAAC,EAAE,EAAE;IACzC,MAAMV,MAAM,GAAGrB,UAAU,CAACI,OAAO,CAAC2B,CAAC,CAAC;IACpC,MAAMX,MAAM,GAAGpB,UAAU,CAACK,OAAO,CAAC0B,CAAC,CAAC;IACpCa,OAAO,CAACxB,MAAM,CAAC,EAAE;IACjBwB,OAAO,CAACC,IAAI,CAAC,CAAC,EAAEzB,MAAM,GAAG,CAAC,CAAC;IAE3B,IAAI2B,MAAM,GAAG,CAAC;IACd,IAAIpC,MAAM,GAAG4B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC;IAG3C,KAAK,MAAMC,IAAI,IAAIN,MAAM,EAAE;MACzB,IAAIM,IAAI,KAAK/C,KAAK,IAAIoB,MAAM,GAAG2B,IAAI,CAACd,SAAS,EAAE;QAC7C;MACF;MAEA,QAAQc,IAAI,CAACrB,cAAc;QACzB,KAAK,UAAU;UACb,IAAI,EAAEqB,IAAI,CAAC1B,IAAI,IAAIX,MAAM,CAAC,EAAE;YAE1BA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,GAAG,EAAE;UACxB;UACA,MAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAE,CAAC;UAC5B,OAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAACG,MAAM,IAAIwB,EAAE,EAAE;YAErCtC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAACJ,IAAI,CAAC,CAAC,CAAC,CAAC;UAC5B;UACAP,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAAC2B,EAAE,CAAC;UAC9B;QAEF;UACEtC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,GAAGX,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,IAAI,CAAC,CAAC;UAC3CX,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC;MAC9B;IACF;IAGA,IAAID,MAAM,KAAKpB,KAAK,CAACiC,SAAS,EAAE;MAC9B,MAAMgB,KAAK,GAAGrD,KAAK,CAACsD,aAAa,CAE/BlD,KAAK,CAACmC,YAAY,IAAInC,KAAK,CAACoC,aAAa,EACzCrC,UAAU,CAACM,MAAM,CAACwC,MAAM,CAAC,EACzB7C,KACF,CAAC;MACD6C,MAAM,EAAE;MAER,QAAQ7C,KAAK,CAAC0B,cAAc;QAC1B,KAAK,UAAU;UACb,IAAI,EAAE1B,KAAK,CAACqB,IAAI,IAAIX,MAAM,CAAC,EAAE;YAE3BA,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,GAAG,EAAE;UACzB;UACA,MAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,CAAC;UAC1B,OAAOpC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACG,MAAM,IAAIwB,EAAE,EAAE;YAEtCtC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACJ,IAAI,CAAC,IAAI,CAAC;UAC/B;UACAP,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAAC2B,EAAE,CAAC,GAAGC,KAAK;UAC9B;QAEF;UACEvC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,GAAG4B,KAAK;MAC9B;IACF;EACF;AACF"}
|
|
1
|
+
{"version":3,"file":"shred.js","names":["ParquetRowGroup","Types","shredBuffer","schema","columnData","field","fieldList","key","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","rowGroup","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","concat","isNested","i","rlvl","rLevelMax","dLevelMax","toPrimitive","originalType","primitiveType","materializeRows","rows","materializeColumnAsRows","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive","materializeColumns","columns","materializeColumnAsColumnarArray","columnName","column"],"sources":["../../../../src/parquetjs/schema/shred.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ArrayType} from '@loaders.gl/schema';\nimport {ParquetRowGroup, ParquetColumnChunk, ParquetField, ParquetRow} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetRowGroup};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetRowGroup {\n const columnData: Record<string, ParquetColumnChunk> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The rowGroup argument must point to an object into which the shredded record\n * will be returned. You may re-use the rowGroup for repeated calls to this function\n * to append to an existing rowGroup, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the rowGroup is as\n * follows:\n *\n * rowGroup = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(\n schema: ParquetSchema,\n record: ParquetRow,\n rowGroup: ParquetRowGroup\n): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the rowGroup */\n if (rowGroup.rowCount === 0) {\n rowGroup.rowCount = 1;\n rowGroup.columnData = data;\n return;\n }\n rowGroup.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);\n rowGroup.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: ParquetRow,\n data: Record<string, ParquetColumnChunk>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null!, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The rowGroup argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * rowGroup = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRows(schema: ParquetSchema, rowGroup: ParquetRowGroup): ParquetRow[] {\n const rows: ParquetRow[] = [];\n // rows = new Array(rowGroup.rowCount).fill({})'\n for (let i = 0; i < rowGroup.rowCount; i++) {\n rows.push({});\n }\n for (const key in rowGroup.columnData) {\n const columnData = rowGroup.columnData[key];\n if (columnData.count) {\n materializeColumnAsRows(schema, columnData, key, rows);\n }\n }\n return rows;\n}\n\n/** Populate record fields for one column */\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumnAsRows(\n schema: ParquetSchema,\n columnData: ParquetColumnChunk,\n key: string,\n rows: ParquetRow[]\n): void {\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < columnData.count; i++) {\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = rows[rLevels[rIndex++] - 1];\n\n // Internal nodes - Build a nested row object\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node - Add the value\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n }\n}\n\n// Columnar export\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The rowGroup argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * rowGroup = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeColumns(\n schema: ParquetSchema,\n rowGroup: ParquetRowGroup\n): Record<string, ArrayType> {\n const columns: Record<string, ArrayType> = {};\n for (const key in rowGroup.columnData) {\n const columnData = rowGroup.columnData[key];\n if (columnData.count) {\n materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);\n }\n }\n return columns;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumnAsColumnarArray(\n schema: ParquetSchema,\n columnData: ParquetColumnChunk,\n rowCount: number,\n key: string,\n columns: Record<string, ArrayType<any>>\n) {\n if (columnData.count <= 0) {\n return;\n }\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n const columnName = branch[0].name;\n\n let column: ArrayType | undefined;\n const {values} = columnData;\n if (values.length === rowCount && branch[0].primitiveType) {\n // if (branch[0].repetitionType === `REQUIRED`) {\n // switch (branch[0].primitiveType) {\n // case 'INT32': return values instanceof Int32Array ? values : new Int32Array(values);\n // }\n // }\n column = values;\n }\n\n if (column) {\n columns[columnName] = column;\n return;\n }\n\n column = new Array(rowCount);\n for (let i = 0; i < rowCount; i++) {\n column[i] = {};\n }\n columns[columnName] = column;\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < columnData.count; i++) {\n const dLevel = columnData.dlevels[i];\n const rLevel = columnData.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = column[rLevels[rIndex++] - 1] as ParquetRow;\n\n // Internal nodes - Build a nested row object\n for (const step of branch) {\n if (step === field || dLevel < step.dLevelMax) {\n break;\n }\n\n switch (step.repetitionType) {\n case 'REPEATED':\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n break;\n\n default:\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node - Add the value\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n columnData.values[vIndex],\n field\n );\n vIndex++;\n\n switch (field.repetitionType) {\n case 'REPEATED':\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n break;\n\n default:\n record[field.name] = value;\n }\n }\n }\n\n // Remove one level of nesting\n for (let i = 0; i < rowCount; ++i) {\n if (columnName in (column[i] as object)) {\n column[i] = (column[i] as object)[columnName];\n }\n }\n}\n"],"mappings":"AAGA,SAAQA,eAAe,QAAqD,WAAW;AAEvF,OAAO,KAAKC,KAAK,MAAM,SAAS;AAEhC,SAAQD,eAAe;AAEvB,OAAO,SAASE,WAAWA,CAACC,MAAqB,EAAmB;EAClE,MAAMC,UAA8C,GAAG,CAAC,CAAC;EACzD,KAAK,MAAMC,KAAK,IAAIF,MAAM,CAACG,SAAS,EAAE;IACpCF,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,GAAG;MACtBC,OAAO,EAAE,EAAE;MACXC,OAAO,EAAE,EAAE;MACXC,MAAM,EAAE,EAAE;MACVC,WAAW,EAAE,EAAE;MACfC,KAAK,EAAE;IACT,CAAC;EACH;EACA,OAAO;IAACC,QAAQ,EAAE,CAAC;IAAET;EAAU,CAAC;AAClC;AAwBA,OAAO,SAASU,WAAWA,CACzBX,MAAqB,EACrBY,MAAkB,EAClBC,QAAyB,EACnB;EAEN,MAAMC,IAAI,GAAGf,WAAW,CAACC,MAAM,CAAC,CAACC,UAAU;EAE3Cc,iBAAiB,CAACf,MAAM,CAACgB,MAAM,EAAEJ,MAAM,EAAEE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;EAGpD,IAAID,QAAQ,CAACH,QAAQ,KAAK,CAAC,EAAE;IAC3BG,QAAQ,CAACH,QAAQ,GAAG,CAAC;IACrBG,QAAQ,CAACZ,UAAU,GAAGa,IAAI;IAC1B;EACF;EACAD,QAAQ,CAACH,QAAQ,IAAI,CAAC;EACtB,KAAK,MAAMR,KAAK,IAAIF,MAAM,CAACG,SAAS,EAAE;IACpCc,KAAK,CAACC,SAAS,CAACC,IAAI,CAACC,KAAK,CAACP,QAAQ,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,EAAEQ,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,CAAC;IAC3FW,KAAK,CAACC,SAAS,CAACC,IAAI,CAACC,KAAK,CAACP,QAAQ,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,EAAES,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,CAAC;IAC3FY,KAAK,CAACC,SAAS,CAACC,IAAI,CAACC,KAAK,CAACP,QAAQ,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACG,MAAM,EAAEO,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACG,MAAM,CAAC;IACzFM,QAAQ,CAACZ,UAAU,CAACC,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK,IAAIK,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK;EAC/D;AACF;AAGA,SAASM,iBAAiBA,CACxBC,MAAoC,EACpCJ,MAAkB,EAClBE,IAAwC,EACxCO,MAAc,EACdC,MAAc,EACd;EACA,KAAK,MAAMC,IAAI,IAAIP,MAAM,EAAE;IACzB,MAAMd,KAAK,GAAGc,MAAM,CAACO,IAAI,CAAC;IAG1B,IAAIhB,MAAa,GAAG,EAAE;IACtB,IACEK,MAAM,IACNV,KAAK,CAACqB,IAAI,IAAIX,MAAM,IACpBA,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,KAAKC,SAAS,IAChCZ,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,KAAK,IAAI,EAC3B;MACA,IAAIX,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACE,WAAW,KAAKR,KAAK,EAAE;QAC5CV,MAAM,GAAGK,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC;MAC7B,CAAC,MAAM;QACLhB,MAAM,CAACY,IAAI,CAACP,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAAC;MACjC;IACF;IAEA,IAAIhB,MAAM,CAACmB,MAAM,KAAK,CAAC,IAAIC,OAAO,CAACf,MAAM,CAAC,IAAIV,KAAK,CAAC0B,cAAc,KAAK,UAAU,EAAE;MACjF,MAAM,IAAIC,KAAK,4BAAAC,MAAA,CAA4B5B,KAAK,CAACqB,IAAI,CAAE,CAAC;IAC1D;IACA,IAAIhB,MAAM,CAACmB,MAAM,GAAG,CAAC,IAAIxB,KAAK,CAAC0B,cAAc,KAAK,UAAU,EAAE;MAC5D,MAAM,IAAIC,KAAK,+BAAAC,MAAA,CAA+B5B,KAAK,CAACqB,IAAI,CAAE,CAAC;IAC7D;IAGA,IAAIhB,MAAM,CAACmB,MAAM,KAAK,CAAC,EAAE;MACvB,IAAIxB,KAAK,CAAC6B,QAAQ,EAAE;QAClBhB,iBAAiB,CAACb,KAAK,CAACc,MAAM,EAAG,IAAI,EAAGF,IAAI,EAAEO,MAAM,EAAEC,MAAM,CAAC;MAC/D,CAAC,MAAM;QACLR,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,CAACa,IAAI,CAACE,MAAM,CAAC;QACpCP,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,CAACc,IAAI,CAACG,MAAM,CAAC;MACtC;MACA;IACF;IAGA,KAAK,IAAIU,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGzB,MAAM,CAACmB,MAAM,EAAEM,CAAC,EAAE,EAAE;MACtC,MAAMC,IAAI,GAAGD,CAAC,KAAK,CAAC,GAAGX,MAAM,GAAGnB,KAAK,CAACgC,SAAS;MAC/C,IAAIhC,KAAK,CAAC6B,QAAQ,EAAE;QAClBhB,iBAAiB,CAACb,KAAK,CAACc,MAAM,EAAGT,MAAM,CAACyB,CAAC,CAAC,EAAElB,IAAI,EAAEmB,IAAI,EAAE/B,KAAK,CAACiC,SAAS,CAAC;MAC1E,CAAC,MAAM;QACLrB,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACK,KAAK,IAAI,CAAC;QAC1BK,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACE,OAAO,CAACa,IAAI,CAACc,IAAI,CAAC;QAClCnB,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACC,OAAO,CAACc,IAAI,CAACjB,KAAK,CAACiC,SAAS,CAAC;QAC7CrB,IAAI,CAACZ,KAAK,CAACE,GAAG,CAAC,CAACG,MAAM,CAACY,IAAI,CACzBrB,KAAK,CAACsC,WAAW,CAAElC,KAAK,CAACmC,YAAY,IAAInC,KAAK,CAACoC,aAAa,EAAI/B,MAAM,CAACyB,CAAC,CAAC,CAC3E,CAAC;MACH;IACF;EACF;AACF;AAqBA,OAAO,SAASO,eAAeA,CAACvC,MAAqB,EAAEa,QAAyB,EAAgB;EAC9F,MAAM2B,IAAkB,GAAG,EAAE;EAE7B,KAAK,IAAIR,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGnB,QAAQ,CAACH,QAAQ,EAAEsB,CAAC,EAAE,EAAE;IAC1CQ,IAAI,CAACrB,IAAI,CAAC,CAAC,CAAC,CAAC;EACf;EACA,KAAK,MAAMf,GAAG,IAAIS,QAAQ,CAACZ,UAAU,EAAE;IACrC,MAAMA,UAAU,GAAGY,QAAQ,CAACZ,UAAU,CAACG,GAAG,CAAC;IAC3C,IAAIH,UAAU,CAACQ,KAAK,EAAE;MACpBgC,uBAAuB,CAACzC,MAAM,EAAEC,UAAU,EAAEG,GAAG,EAAEoC,IAAI,CAAC;IACxD;EACF;EACA,OAAOA,IAAI;AACb;AAIA,SAASC,uBAAuBA,CAC9BzC,MAAqB,EACrBC,UAA8B,EAC9BG,GAAW,EACXoC,IAAkB,EACZ;EACN,MAAMtC,KAAK,GAAGF,MAAM,CAAC0C,SAAS,CAACtC,GAAG,CAAC;EACnC,MAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAe,CAACxC,GAAG,CAAC;EAG1C,MAAMyC,OAAiB,GAAG,IAAI5B,KAAK,CAACf,KAAK,CAACgC,SAAS,GAAG,CAAC,CAAC,CAACY,IAAI,CAAC,CAAC,CAAC;EAChE,IAAIC,MAAM,GAAG,CAAC;EACd,KAAK,IAAIf,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG/B,UAAU,CAACQ,KAAK,EAAEuB,CAAC,EAAE,EAAE;IACzC,MAAMV,MAAM,GAAGrB,UAAU,CAACI,OAAO,CAAC2B,CAAC,CAAC;IACpC,MAAMX,MAAM,GAAGpB,UAAU,CAACK,OAAO,CAAC0B,CAAC,CAAC;IACpCa,OAAO,CAACxB,MAAM,CAAC,EAAE;IACjBwB,OAAO,CAACC,IAAI,CAAC,CAAC,EAAEzB,MAAM,GAAG,CAAC,CAAC;IAE3B,IAAI2B,MAAM,GAAG,CAAC;IACd,IAAIpC,MAAM,GAAG4B,IAAI,CAACK,OAAO,CAACG,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC;IAGxC,KAAK,MAAMC,IAAI,IAAIN,MAAM,EAAE;MACzB,IAAIM,IAAI,KAAK/C,KAAK,IAAIoB,MAAM,GAAG2B,IAAI,CAACd,SAAS,EAAE;QAC7C;MACF;MAEA,QAAQc,IAAI,CAACrB,cAAc;QACzB,KAAK,UAAU;UACb,IAAI,EAAEqB,IAAI,CAAC1B,IAAI,IAAIX,MAAM,CAAC,EAAE;YAE1BA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,GAAG,EAAE;UACxB;UACA,MAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAE,CAAC;UAC5B,OAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAACG,MAAM,IAAIwB,EAAE,EAAE;YAErCtC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAACJ,IAAI,CAAC,CAAC,CAAC,CAAC;UAC5B;UACAP,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAAC2B,EAAE,CAAC;UAC9B;QAEF;UACEtC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,GAAGX,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,IAAI,CAAC,CAAC;UAC3CX,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC;MAC9B;IACF;IAGA,IAAID,MAAM,KAAKpB,KAAK,CAACiC,SAAS,EAAE;MAC9B,MAAMgB,KAAK,GAAGrD,KAAK,CAACsD,aAAa,CAE/BlD,KAAK,CAACmC,YAAY,IAAInC,KAAK,CAACoC,aAAa,EACzCrC,UAAU,CAACM,MAAM,CAACwC,MAAM,CAAC,EACzB7C,KACF,CAAC;MACD6C,MAAM,EAAE;MAER,QAAQ7C,KAAK,CAAC0B,cAAc;QAC1B,KAAK,UAAU;UACb,IAAI,EAAE1B,KAAK,CAACqB,IAAI,IAAIX,MAAM,CAAC,EAAE;YAE3BA,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,GAAG,EAAE;UACzB;UACA,MAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,CAAC;UAC1B,OAAOpC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACG,MAAM,IAAIwB,EAAE,EAAE;YAEtCtC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACJ,IAAI,CAAC,IAAI,CAAC;UAC/B;UACAP,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAAC2B,EAAE,CAAC,GAAGC,KAAK;UAC9B;QAEF;UACEvC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,GAAG4B,KAAK;MAC9B;IACF;EACF;AACF;AAuBA,OAAO,SAASE,kBAAkBA,CAChCrD,MAAqB,EACrBa,QAAyB,EACE;EAC3B,MAAMyC,OAAkC,GAAG,CAAC,CAAC;EAC7C,KAAK,MAAMlD,GAAG,IAAIS,QAAQ,CAACZ,UAAU,EAAE;IACrC,MAAMA,UAAU,GAAGY,QAAQ,CAACZ,UAAU,CAACG,GAAG,CAAC;IAC3C,IAAIH,UAAU,CAACQ,KAAK,EAAE;MACpB8C,gCAAgC,CAACvD,MAAM,EAAEC,UAAU,EAAEY,QAAQ,CAACH,QAAQ,EAAEN,GAAG,EAAEkD,OAAO,CAAC;IACvF;EACF;EACA,OAAOA,OAAO;AAChB;AAGA,SAASC,gCAAgCA,CACvCvD,MAAqB,EACrBC,UAA8B,EAC9BS,QAAgB,EAChBN,GAAW,EACXkD,OAAuC,EACvC;EACA,IAAIrD,UAAU,CAACQ,KAAK,IAAI,CAAC,EAAE;IACzB;EACF;EAEA,MAAMP,KAAK,GAAGF,MAAM,CAAC0C,SAAS,CAACtC,GAAG,CAAC;EACnC,MAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAe,CAACxC,GAAG,CAAC;EAE1C,MAAMoD,UAAU,GAAGb,MAAM,CAAC,CAAC,CAAC,CAACpB,IAAI;EAEjC,IAAIkC,MAA6B;EACjC,MAAM;IAAClD;EAAM,CAAC,GAAGN,UAAU;EAC3B,IAAIM,MAAM,CAACmB,MAAM,KAAKhB,QAAQ,IAAIiC,MAAM,CAAC,CAAC,CAAC,CAACL,aAAa,EAAE;IAMzDmB,MAAM,GAAGlD,MAAM;EACjB;EAEA,IAAIkD,MAAM,EAAE;IACVH,OAAO,CAACE,UAAU,CAAC,GAAGC,MAAM;IAC5B;EACF;EAEAA,MAAM,GAAG,IAAIxC,KAAK,CAACP,QAAQ,CAAC;EAC5B,KAAK,IAAIsB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGtB,QAAQ,EAAEsB,CAAC,EAAE,EAAE;IACjCyB,MAAM,CAACzB,CAAC,CAAC,GAAG,CAAC,CAAC;EAChB;EACAsB,OAAO,CAACE,UAAU,CAAC,GAAGC,MAAM;EAG5B,MAAMZ,OAAiB,GAAG,IAAI5B,KAAK,CAACf,KAAK,CAACgC,SAAS,GAAG,CAAC,CAAC,CAACY,IAAI,CAAC,CAAC,CAAC;EAChE,IAAIC,MAAM,GAAG,CAAC;EACd,KAAK,IAAIf,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG/B,UAAU,CAACQ,KAAK,EAAEuB,CAAC,EAAE,EAAE;IACzC,MAAMV,MAAM,GAAGrB,UAAU,CAACI,OAAO,CAAC2B,CAAC,CAAC;IACpC,MAAMX,MAAM,GAAGpB,UAAU,CAACK,OAAO,CAAC0B,CAAC,CAAC;IACpCa,OAAO,CAACxB,MAAM,CAAC,EAAE;IACjBwB,OAAO,CAACC,IAAI,CAAC,CAAC,EAAEzB,MAAM,GAAG,CAAC,CAAC;IAE3B,IAAI2B,MAAM,GAAG,CAAC;IACd,IAAIpC,MAAM,GAAG6C,MAAM,CAACZ,OAAO,CAACG,MAAM,EAAE,CAAC,GAAG,CAAC,CAAe;IAGxD,KAAK,MAAMC,IAAI,IAAIN,MAAM,EAAE;MACzB,IAAIM,IAAI,KAAK/C,KAAK,IAAIoB,MAAM,GAAG2B,IAAI,CAACd,SAAS,EAAE;QAC7C;MACF;MAEA,QAAQc,IAAI,CAACrB,cAAc;QACzB,KAAK,UAAU;UACb,IAAI,EAAEqB,IAAI,CAAC1B,IAAI,IAAIX,MAAM,CAAC,EAAE;YAE1BA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,GAAG,EAAE;UACxB;UACA,MAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAE,CAAC;UAC5B,OAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAACG,MAAM,IAAIwB,EAAE,EAAE;YAErCtC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAACJ,IAAI,CAAC,CAAC,CAAC,CAAC;UAC5B;UACAP,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,CAAC2B,EAAE,CAAC;UAC9B;QAEF;UACEtC,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,GAAGX,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC,IAAI,CAAC,CAAC;UAC3CX,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAI,CAAC;MAC9B;IACF;IAGA,IAAID,MAAM,KAAKpB,KAAK,CAACiC,SAAS,EAAE;MAC9B,MAAMgB,KAAK,GAAGrD,KAAK,CAACsD,aAAa,CAE/BlD,KAAK,CAACmC,YAAY,IAAInC,KAAK,CAACoC,aAAa,EACzCrC,UAAU,CAACM,MAAM,CAACwC,MAAM,CAAC,EACzB7C,KACF,CAAC;MACD6C,MAAM,EAAE;MAER,QAAQ7C,KAAK,CAAC0B,cAAc;QAC1B,KAAK,UAAU;UACb,IAAI,EAAE1B,KAAK,CAACqB,IAAI,IAAIX,MAAM,CAAC,EAAE;YAE3BA,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,GAAG,EAAE;UACzB;UACA,MAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,CAAC;UAC1B,OAAOpC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACG,MAAM,IAAIwB,EAAE,EAAE;YAEtCtC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAACJ,IAAI,CAAC,IAAI,CAAC;UAC/B;UACAP,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,CAAC2B,EAAE,CAAC,GAAGC,KAAK;UAC9B;QAEF;UACEvC,MAAM,CAACV,KAAK,CAACqB,IAAI,CAAC,GAAG4B,KAAK;MAC9B;IACF;EACF;EAGA,KAAK,IAAInB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGtB,QAAQ,EAAE,EAAEsB,CAAC,EAAE;IACjC,IAAIwB,UAAU,IAAKC,MAAM,CAACzB,CAAC,CAAY,EAAE;MACvCyB,MAAM,CAACzB,CAAC,CAAC,GAAIyB,MAAM,CAACzB,CAAC,CAAC,CAAYwB,UAAU,CAAC;IAC/C;EACF;AACF"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,63 +1,22 @@
|
|
|
1
1
|
import type { LoaderWithParser } from '@loaders.gl/loader-utils';
|
|
2
|
+
import type { ObjectRowTable, ObjectRowTableBatch, ColumnarTable, ColumnarTableBatch } from '@loaders.gl/schema';
|
|
3
|
+
import type { Table as ArrowTable } from 'apache-arrow';
|
|
4
|
+
import { ParquetLoader as ParquetWorkerLoader, ParquetLoaderOptions } from './parquet-loader';
|
|
5
|
+
import { ParquetWasmLoaderOptions } from './lib/wasm/parse-parquet-wasm';
|
|
2
6
|
import { ParquetWasmLoader as ParquetWasmWorkerLoader } from './parquet-wasm-loader';
|
|
3
|
-
import { ParquetLoader as ParquetWorkerLoader } from './parquet-loader';
|
|
4
|
-
import { parseParquet, parseParquetFileInBatches } from './lib/parsers/parse-parquet-to-rows';
|
|
5
|
-
import { parseParquetInColumns, parseParquetFileInColumnarBatches } from './lib/parsers/parse-parquet-to-columns';
|
|
6
|
-
import { parseParquet as parseParquetWasm } from './lib/wasm/parse-parquet-wasm';
|
|
7
7
|
export { ParquetWorkerLoader, ParquetWasmWorkerLoader };
|
|
8
8
|
/** ParquetJS table loader */
|
|
9
|
-
export declare const ParquetLoader:
|
|
10
|
-
parse: typeof parseParquet;
|
|
11
|
-
parseFileInBatches: typeof parseParquetFileInBatches;
|
|
12
|
-
name: string;
|
|
13
|
-
id: string;
|
|
14
|
-
module: string; /** ParquetJS table loader */
|
|
15
|
-
version: any;
|
|
16
|
-
worker: boolean;
|
|
17
|
-
category: string;
|
|
18
|
-
extensions: string[];
|
|
19
|
-
mimeTypes: string[];
|
|
20
|
-
binary: boolean;
|
|
21
|
-
tests: string[];
|
|
22
|
-
options: import("./parquet-loader").ParquetLoaderOptions;
|
|
23
|
-
};
|
|
9
|
+
export declare const ParquetLoader: LoaderWithParser<ObjectRowTable, ObjectRowTableBatch, ParquetLoaderOptions>;
|
|
24
10
|
/** ParquetJS table loader */
|
|
25
|
-
export declare const ParquetColumnarLoader:
|
|
26
|
-
|
|
27
|
-
parseFileInBatches: typeof parseParquetFileInColumnarBatches;
|
|
28
|
-
name: string;
|
|
29
|
-
id: string;
|
|
30
|
-
module: string; /** ParquetJS table loader */
|
|
31
|
-
version: any;
|
|
32
|
-
worker: boolean;
|
|
33
|
-
category: string;
|
|
34
|
-
extensions: string[];
|
|
35
|
-
mimeTypes: string[];
|
|
36
|
-
binary: boolean;
|
|
37
|
-
tests: string[];
|
|
38
|
-
options: import("./parquet-loader").ParquetLoaderOptions;
|
|
39
|
-
};
|
|
40
|
-
export declare const ParquetWasmLoader: {
|
|
41
|
-
parse: typeof parseParquetWasm;
|
|
42
|
-
name: string;
|
|
43
|
-
id: string;
|
|
44
|
-
module: string;
|
|
45
|
-
version: any;
|
|
46
|
-
worker: boolean;
|
|
47
|
-
category: string;
|
|
48
|
-
extensions: string[];
|
|
49
|
-
mimeTypes: string[];
|
|
50
|
-
binary: boolean;
|
|
51
|
-
tests: string[];
|
|
52
|
-
options: import("./parquet-wasm-loader").ParquetLoaderOptions;
|
|
53
|
-
};
|
|
11
|
+
export declare const ParquetColumnarLoader: LoaderWithParser<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
|
|
12
|
+
export declare const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions>;
|
|
54
13
|
export { ParquetWriter as _ParquetWriter } from './parquet-writer';
|
|
55
14
|
export { ParquetWasmWriter } from './parquet-wasm-writer';
|
|
56
15
|
export { preloadCompressions } from './parquetjs/compression';
|
|
57
16
|
export { ParquetSchema } from './parquetjs/schema/schema';
|
|
58
17
|
export { ParquetReader } from './parquetjs/parser/parquet-reader';
|
|
59
18
|
export { ParquetEncoder } from './parquetjs/encoder/parquet-encoder';
|
|
60
|
-
export {
|
|
19
|
+
export { convertParquetSchema, convertParquetSchema as convertParquetToArrowSchema } from './lib/arrow/convert-schema-from-parquet';
|
|
61
20
|
export declare const _typecheckParquetLoader: LoaderWithParser;
|
|
62
21
|
export { default as geoJSONSchema } from './lib/geo/geoparquet-schema';
|
|
63
22
|
export type { GeoMetadata } from './lib/geo/decode-geo-metadata';
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,gBAAgB,EAAC,MAAM,0BAA0B,CAAC;AAC/D,OAAO,KAAK,EACV,cAAc,EACd,mBAAmB,EACnB,aAAa,EACb,kBAAkB,EACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,KAAK,EAAC,KAAK,IAAI,UAAU,EAAC,MAAM,cAAc,CAAC;AAItD,OAAO,EACL,aAAa,IAAI,mBAAmB,EAEpC,oBAAoB,EACrB,MAAM,kBAAkB,CAAC;AAO1B,OAAO,EAAmB,wBAAwB,EAAC,MAAM,+BAA+B,CAAC;AACzF,OAAO,EAAC,iBAAiB,IAAI,uBAAuB,EAAC,MAAM,uBAAuB,CAAC;AAEnF,OAAO,EAAC,mBAAmB,EAAE,uBAAuB,EAAC,CAAC;AAEtD,6BAA6B;AAC7B,eAAO,MAAM,aAAa,EAAE,gBAAgB,CAC1C,cAAc,EACd,mBAAmB,EACnB,oBAAoB,CAKrB,CAAC;AAEF,6BAA6B;AAE7B,eAAO,MAAM,qBAAqB,EAAE,gBAAgB,CAClD,aAAa,EACb,kBAAkB,EAClB,oBAAoB,CAKrB,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,gBAAgB,CAAC,UAAU,EAAE,KAAK,EAAE,wBAAwB,CAG3F,CAAC;AAIF,OAAO,EAAC,aAAa,IAAI,cAAc,EAAC,MAAM,kBAAkB,CAAC;AACjE,OAAO,EAAC,iBAAiB,EAAC,MAAM,uBAAuB,CAAC;AAIxD,OAAO,EAAC,mBAAmB,EAAC,MAAM,yBAAyB,CAAC;AAE5D,OAAO,EAAC,aAAa,EAAC,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAC,aAAa,EAAC,MAAM,mCAAmC,CAAC;AAChE,OAAO,EAAC,cAAc,EAAC,MAAM,qCAAqC,CAAC;AAEnE,OAAO,EACL,oBAAoB,EACpB,oBAAoB,IAAI,2BAA2B,EACpD,MAAM,yCAAyC,CAAC;AAGjD,eAAO,MAAM,uBAAuB,EAAE,gBAAgC,CAAC;AAGvE,OAAO,EAAC,OAAO,IAAI,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAErE,YAAY,EAAC,WAAW,EAAC,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAC,cAAc,EAAE,cAAc,EAAE,iBAAiB,EAAC,MAAM,+BAA+B,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
// loaders.gl, MIT license
|
|
2
3
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
4
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
5
|
};
|
|
5
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = exports.geoJSONSchema = exports._typecheckParquetLoader = exports.convertParquetToArrowSchema = exports.
|
|
7
|
+
exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = exports.geoJSONSchema = exports._typecheckParquetLoader = exports.convertParquetToArrowSchema = exports.convertParquetSchema = exports.ParquetEncoder = exports.ParquetReader = exports.ParquetSchema = exports.preloadCompressions = exports.ParquetWasmWriter = exports._ParquetWriter = exports.ParquetWasmLoader = exports.ParquetColumnarLoader = exports.ParquetLoader = exports.ParquetWasmWorkerLoader = exports.ParquetWorkerLoader = void 0;
|
|
7
8
|
// ParquetLoader
|
|
8
|
-
const parquet_wasm_loader_1 = require("./parquet-wasm-loader");
|
|
9
|
-
Object.defineProperty(exports, "ParquetWasmWorkerLoader", { enumerable: true, get: function () { return parquet_wasm_loader_1.ParquetWasmLoader; } });
|
|
10
9
|
const parquet_loader_1 = require("./parquet-loader");
|
|
11
10
|
Object.defineProperty(exports, "ParquetWorkerLoader", { enumerable: true, get: function () { return parquet_loader_1.ParquetLoader; } });
|
|
12
11
|
const parse_parquet_to_rows_1 = require("./lib/parsers/parse-parquet-to-rows");
|
|
13
12
|
const parse_parquet_to_columns_1 = require("./lib/parsers/parse-parquet-to-columns");
|
|
14
13
|
const parse_parquet_wasm_1 = require("./lib/wasm/parse-parquet-wasm");
|
|
14
|
+
const parquet_wasm_loader_1 = require("./parquet-wasm-loader");
|
|
15
|
+
Object.defineProperty(exports, "ParquetWasmWorkerLoader", { enumerable: true, get: function () { return parquet_wasm_loader_1.ParquetWasmLoader; } });
|
|
15
16
|
/** ParquetJS table loader */
|
|
16
17
|
exports.ParquetLoader = {
|
|
17
18
|
...parquet_loader_1.ParquetLoader,
|
|
@@ -19,6 +20,7 @@ exports.ParquetLoader = {
|
|
|
19
20
|
parseFileInBatches: parse_parquet_to_rows_1.parseParquetFileInBatches
|
|
20
21
|
};
|
|
21
22
|
/** ParquetJS table loader */
|
|
23
|
+
// @ts-expect-error
|
|
22
24
|
exports.ParquetColumnarLoader = {
|
|
23
25
|
...parquet_loader_1.ParquetLoader,
|
|
24
26
|
parse: parse_parquet_to_columns_1.parseParquetInColumns,
|
|
@@ -26,7 +28,7 @@ exports.ParquetColumnarLoader = {
|
|
|
26
28
|
};
|
|
27
29
|
exports.ParquetWasmLoader = {
|
|
28
30
|
...parquet_wasm_loader_1.ParquetWasmLoader,
|
|
29
|
-
parse: parse_parquet_wasm_1.
|
|
31
|
+
parse: parse_parquet_wasm_1.parseParquetWasm
|
|
30
32
|
};
|
|
31
33
|
// ParquetWriter
|
|
32
34
|
var parquet_writer_1 = require("./parquet-writer");
|
|
@@ -43,8 +45,8 @@ Object.defineProperty(exports, "ParquetReader", { enumerable: true, get: functio
|
|
|
43
45
|
var parquet_encoder_1 = require("./parquetjs/encoder/parquet-encoder");
|
|
44
46
|
Object.defineProperty(exports, "ParquetEncoder", { enumerable: true, get: function () { return parquet_encoder_1.ParquetEncoder; } });
|
|
45
47
|
var convert_schema_from_parquet_1 = require("./lib/arrow/convert-schema-from-parquet");
|
|
46
|
-
Object.defineProperty(exports, "
|
|
47
|
-
Object.defineProperty(exports, "convertParquetToArrowSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.
|
|
48
|
+
Object.defineProperty(exports, "convertParquetSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.convertParquetSchema; } });
|
|
49
|
+
Object.defineProperty(exports, "convertParquetToArrowSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.convertParquetSchema; } });
|
|
48
50
|
// TESTS
|
|
49
51
|
exports._typecheckParquetLoader = exports.ParquetLoader;
|
|
50
52
|
// Geo Metadata
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { Schema } from '@loaders.gl/schema';
|
|
2
|
-
import {
|
|
3
|
-
export declare function convertParquetRowGroupToColumns(schema: Schema, rowGroup:
|
|
2
|
+
import { ParquetRowGroup } from '@loaders.gl/parquet/parquetjs/schema/declare';
|
|
3
|
+
export declare function convertParquetRowGroupToColumns(schema: Schema, rowGroup: ParquetRowGroup): Record<string, any[]>;
|
|
4
4
|
//# sourceMappingURL=convert-row-group-to-columns.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"convert-row-group-to-columns.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-row-group-to-columns.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,MAAM,EAAC,MAAM,oBAAoB,CAAC;AAC1C,OAAO,EAAC,
|
|
1
|
+
{"version":3,"file":"convert-row-group-to-columns.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-row-group-to-columns.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,MAAM,EAAC,MAAM,oBAAoB,CAAC;AAC1C,OAAO,EAAC,eAAe,EAAC,MAAM,8CAA8C,CAAC;AAE7E,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,eAAe,GACxB,MAAM,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAMvB"}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import { Schema, DataType } from '@loaders.gl/schema';
|
|
1
2
|
import type { ParquetSchema } from '../../parquetjs/schema/schema';
|
|
2
3
|
import type { ParquetType } from '../../parquetjs/schema/declare';
|
|
3
|
-
import { FileMetaData } from '
|
|
4
|
-
import { Schema, DataType } from '@loaders.gl/schema';
|
|
4
|
+
import { FileMetaData } from '../../parquetjs/parquet-thrift';
|
|
5
5
|
export declare const PARQUET_TYPE_MAPPING: {
|
|
6
|
-
[type in ParquetType]:
|
|
6
|
+
[type in ParquetType]: DataType;
|
|
7
7
|
};
|
|
8
|
-
export declare function
|
|
8
|
+
export declare function convertParquetSchema(parquetSchema: ParquetSchema, parquetMetadata: FileMetaData | null): Schema;
|
|
9
9
|
//# sourceMappingURL=convert-schema-from-parquet.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"convert-schema-from-parquet.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-schema-from-parquet.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,+BAA+B,CAAC;AACjE,OAAO,KAAK,EAAgC,WAAW,EAAC,MAAM,gCAAgC,CAAC;AAC/F,OAAO,EAAC,YAAY,EAAC,MAAM,
|
|
1
|
+
{"version":3,"file":"convert-schema-from-parquet.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-schema-from-parquet.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,MAAM,EAAS,QAAQ,EAAC,MAAM,oBAAoB,CAAC;AAE3D,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,+BAA+B,CAAC;AACjE,OAAO,KAAK,EAAgC,WAAW,EAAC,MAAM,gCAAgC,CAAC;AAC/F,OAAO,EAAC,YAAY,EAAC,MAAM,gCAAgC,CAAC;AAE5D,eAAO,MAAM,oBAAoB,EAAE;KAAE,IAAI,IAAI,WAAW,GAAG,QAAQ;CA+BlE,CAAC;AAEF,wBAAgB,oBAAoB,CAClC,aAAa,EAAE,aAAa,EAC5B,eAAe,EAAE,YAAY,GAAG,IAAI,GACnC,MAAM,CAUR"}
|
|
@@ -1,81 +1,85 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
// loaders.gl, MIT license
|
|
3
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
exports.
|
|
5
|
-
const schema_1 = require("@loaders.gl/schema");
|
|
4
|
+
exports.convertParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
|
|
6
5
|
exports.PARQUET_TYPE_MAPPING = {
|
|
7
|
-
BOOLEAN:
|
|
8
|
-
INT32:
|
|
9
|
-
INT64:
|
|
10
|
-
INT96:
|
|
11
|
-
FLOAT:
|
|
12
|
-
DOUBLE:
|
|
13
|
-
BYTE_ARRAY:
|
|
14
|
-
FIXED_LEN_BYTE_ARRAY:
|
|
15
|
-
UTF8:
|
|
16
|
-
DATE:
|
|
17
|
-
TIME_MILLIS:
|
|
18
|
-
TIME_MICROS:
|
|
19
|
-
TIMESTAMP_MILLIS:
|
|
20
|
-
TIMESTAMP_MICROS:
|
|
21
|
-
UINT_8:
|
|
22
|
-
UINT_16:
|
|
23
|
-
UINT_32:
|
|
24
|
-
UINT_64:
|
|
25
|
-
INT_8:
|
|
26
|
-
INT_16:
|
|
27
|
-
INT_32:
|
|
28
|
-
INT_64:
|
|
29
|
-
JSON:
|
|
30
|
-
BSON:
|
|
31
|
-
// TODO check
|
|
32
|
-
INTERVAL:
|
|
33
|
-
DECIMAL_INT32:
|
|
34
|
-
DECIMAL_INT64:
|
|
35
|
-
DECIMAL_BYTE_ARRAY:
|
|
36
|
-
DECIMAL_FIXED_LEN_BYTE_ARRAY:
|
|
6
|
+
BOOLEAN: 'bool',
|
|
7
|
+
INT32: 'int32',
|
|
8
|
+
INT64: 'float64',
|
|
9
|
+
INT96: 'float64',
|
|
10
|
+
FLOAT: 'float32',
|
|
11
|
+
DOUBLE: 'float64',
|
|
12
|
+
BYTE_ARRAY: 'binary',
|
|
13
|
+
FIXED_LEN_BYTE_ARRAY: 'binary',
|
|
14
|
+
UTF8: 'utf8',
|
|
15
|
+
DATE: 'int32',
|
|
16
|
+
TIME_MILLIS: 'int64',
|
|
17
|
+
TIME_MICROS: 'int64',
|
|
18
|
+
TIMESTAMP_MILLIS: 'int64',
|
|
19
|
+
TIMESTAMP_MICROS: 'int64',
|
|
20
|
+
UINT_8: 'int32',
|
|
21
|
+
UINT_16: 'uint16',
|
|
22
|
+
UINT_32: 'uint32',
|
|
23
|
+
UINT_64: 'uint64',
|
|
24
|
+
INT_8: 'int8',
|
|
25
|
+
INT_16: 'int16',
|
|
26
|
+
INT_32: 'int32',
|
|
27
|
+
INT_64: 'int64',
|
|
28
|
+
JSON: 'binary',
|
|
29
|
+
BSON: 'binary',
|
|
30
|
+
// TODO check interal type
|
|
31
|
+
INTERVAL: 'binary',
|
|
32
|
+
DECIMAL_INT32: 'float32',
|
|
33
|
+
DECIMAL_INT64: 'float64',
|
|
34
|
+
DECIMAL_BYTE_ARRAY: 'float64',
|
|
35
|
+
DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
|
|
37
36
|
};
|
|
38
|
-
function
|
|
37
|
+
function convertParquetSchema(parquetSchema, parquetMetadata) {
|
|
39
38
|
const fields = getFields(parquetSchema.schema);
|
|
40
39
|
const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
|
|
41
|
-
|
|
40
|
+
const schema = {
|
|
41
|
+
fields,
|
|
42
|
+
metadata: metadata || {}
|
|
43
|
+
};
|
|
44
|
+
return schema;
|
|
42
45
|
}
|
|
43
|
-
exports.
|
|
46
|
+
exports.convertParquetSchema = convertParquetSchema;
|
|
44
47
|
function getFields(schema) {
|
|
45
48
|
const fields = [];
|
|
46
49
|
for (const name in schema) {
|
|
47
50
|
const field = schema[name];
|
|
48
51
|
if (field.fields) {
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
fields.push(nestedField);
|
|
52
|
+
const children = getFields(field.fields);
|
|
53
|
+
fields.push({ name, type: { type: 'struct', children }, nullable: field.optional });
|
|
52
54
|
}
|
|
53
55
|
else {
|
|
54
|
-
const
|
|
56
|
+
const type = exports.PARQUET_TYPE_MAPPING[field.type];
|
|
55
57
|
const metadata = getFieldMetadata(field);
|
|
56
|
-
const arrowField =
|
|
58
|
+
const arrowField = { name, type, nullable: field.optional, metadata };
|
|
57
59
|
fields.push(arrowField);
|
|
58
60
|
}
|
|
59
61
|
}
|
|
60
62
|
return fields;
|
|
61
63
|
}
|
|
62
64
|
function getFieldMetadata(field) {
|
|
63
|
-
|
|
65
|
+
let metadata;
|
|
64
66
|
for (const key in field) {
|
|
65
67
|
if (key !== 'name') {
|
|
66
68
|
let value = field[key] || '';
|
|
67
69
|
value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
|
|
68
|
-
metadata
|
|
70
|
+
metadata = metadata || {};
|
|
71
|
+
metadata[key] = value;
|
|
69
72
|
}
|
|
70
73
|
}
|
|
71
74
|
return metadata;
|
|
72
75
|
}
|
|
73
76
|
function getSchemaMetadata(parquetMetadata) {
|
|
74
|
-
|
|
77
|
+
let metadata;
|
|
75
78
|
const keyValueList = parquetMetadata.key_value_metadata || [];
|
|
76
79
|
for (const { key, value } of keyValueList) {
|
|
77
80
|
if (typeof value === 'string') {
|
|
78
|
-
metadata
|
|
81
|
+
metadata = metadata || {};
|
|
82
|
+
metadata[key] = value;
|
|
79
83
|
}
|
|
80
84
|
}
|
|
81
85
|
return metadata;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { ParquetType } from '../../parquetjs/schema/declare';
|
|
2
2
|
import { Schema, DataType } from '@loaders.gl/schema';
|
|
3
3
|
export declare const PARQUET_TYPE_MAPPING: {
|
|
4
|
-
[type in ParquetType]:
|
|
4
|
+
[type in ParquetType]: DataType;
|
|
5
5
|
};
|
|
6
6
|
export declare function convertToParquetSchema(schema: Schema): Schema;
|
|
7
7
|
//# sourceMappingURL=convert-schema-to-parquet.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"convert-schema-to-parquet.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-schema-to-parquet.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAEV,WAAW,EACZ,MAAM,gCAAgC,CAAC;AAExC,OAAO,EACL,MAAM,EAGN,QAAQ,
|
|
1
|
+
{"version":3,"file":"convert-schema-to-parquet.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-schema-to-parquet.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAEV,WAAW,EACZ,MAAM,gCAAgC,CAAC;AAExC,OAAO,EACL,MAAM,EAGN,QAAQ,EACT,MAAM,oBAAoB,CAAC;AAE5B,eAAO,MAAM,oBAAoB,EAAE;KAAE,IAAI,IAAI,WAAW,GAAG,QAAQ;CA+BlE,CAAC;AAEF,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAK7D"}
|