@loaders.gl/parquet 4.0.0-alpha.23 → 4.0.0-alpha.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/dist.min.js +17 -17
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/parquet-loader.js +1 -1
  4. package/dist/es5/parquet-wasm-loader.js +1 -1
  5. package/dist/es5/parquet-wasm-writer.js +1 -1
  6. package/dist/es5/parquet-writer.js +1 -1
  7. package/dist/esm/parquet-loader.js +1 -1
  8. package/dist/esm/parquet-wasm-loader.js +1 -1
  9. package/dist/esm/parquet-wasm-writer.js +1 -1
  10. package/dist/esm/parquet-writer.js +1 -1
  11. package/dist/parquet-worker.js +17 -17
  12. package/dist/parquet-worker.js.map +3 -3
  13. package/package.json +11 -9
  14. package/dist/bundle.js +0 -5
  15. package/dist/constants.js +0 -18
  16. package/dist/index.js +0 -56
  17. package/dist/lib/arrow/convert-columns-to-row-group.js +0 -1
  18. package/dist/lib/arrow/convert-row-group-to-columns.js +0 -12
  19. package/dist/lib/arrow/convert-schema-from-parquet.js +0 -86
  20. package/dist/lib/arrow/convert-schema-to-parquet.js +0 -71
  21. package/dist/lib/geo/decode-geo-metadata.js +0 -77
  22. package/dist/lib/geo/geoparquet-schema.js +0 -69
  23. package/dist/lib/parsers/parse-parquet-to-columns.js +0 -46
  24. package/dist/lib/parsers/parse-parquet-to-rows.js +0 -37
  25. package/dist/lib/wasm/encode-parquet-wasm.js +0 -30
  26. package/dist/lib/wasm/load-wasm/index.js +0 -5
  27. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +0 -38
  28. package/dist/lib/wasm/load-wasm/load-wasm-node.js +0 -31
  29. package/dist/lib/wasm/parse-parquet-wasm.js +0 -27
  30. package/dist/parquet-loader.js +0 -41
  31. package/dist/parquet-wasm-loader.js +0 -26
  32. package/dist/parquet-wasm-writer.js +0 -24
  33. package/dist/parquet-writer.js +0 -21
  34. package/dist/parquetjs/codecs/declare.js +0 -2
  35. package/dist/parquetjs/codecs/dictionary.js +0 -14
  36. package/dist/parquetjs/codecs/index.js +0 -55
  37. package/dist/parquetjs/codecs/plain.js +0 -211
  38. package/dist/parquetjs/codecs/rle.js +0 -145
  39. package/dist/parquetjs/compression.js +0 -183
  40. package/dist/parquetjs/encoder/parquet-encoder.js +0 -484
  41. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
  42. package/dist/parquetjs/parquet-thrift/BsonType.js +0 -62
  43. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +0 -211
  44. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +0 -217
  45. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +0 -402
  46. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +0 -108
  47. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
  48. package/dist/parquetjs/parquet-thrift/ConvertedType.js +0 -34
  49. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +0 -170
  50. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -230
  51. package/dist/parquetjs/parquet-thrift/DateType.js +0 -62
  52. package/dist/parquetjs/parquet-thrift/DecimalType.js +0 -109
  53. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -126
  54. package/dist/parquetjs/parquet-thrift/Encoding.js +0 -20
  55. package/dist/parquetjs/parquet-thrift/EnumType.js +0 -62
  56. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
  57. package/dist/parquetjs/parquet-thrift/FileMetaData.js +0 -260
  58. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +0 -62
  59. package/dist/parquetjs/parquet-thrift/IntType.js +0 -109
  60. package/dist/parquetjs/parquet-thrift/JsonType.js +0 -62
  61. package/dist/parquetjs/parquet-thrift/KeyValue.js +0 -106
  62. package/dist/parquetjs/parquet-thrift/ListType.js +0 -62
  63. package/dist/parquetjs/parquet-thrift/LogicalType.js +0 -384
  64. package/dist/parquetjs/parquet-thrift/MapType.js +0 -62
  65. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +0 -62
  66. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +0 -62
  67. package/dist/parquetjs/parquet-thrift/NullType.js +0 -62
  68. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +0 -101
  69. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +0 -131
  70. package/dist/parquetjs/parquet-thrift/PageHeader.js +0 -220
  71. package/dist/parquetjs/parquet-thrift/PageLocation.js +0 -145
  72. package/dist/parquetjs/parquet-thrift/PageType.js +0 -16
  73. package/dist/parquetjs/parquet-thrift/RowGroup.js +0 -186
  74. package/dist/parquetjs/parquet-thrift/SchemaElement.js +0 -243
  75. package/dist/parquetjs/parquet-thrift/SortingColumn.js +0 -131
  76. package/dist/parquetjs/parquet-thrift/Statistics.js +0 -180
  77. package/dist/parquetjs/parquet-thrift/StringType.js +0 -62
  78. package/dist/parquetjs/parquet-thrift/TimeType.js +0 -110
  79. package/dist/parquetjs/parquet-thrift/TimeUnit.js +0 -131
  80. package/dist/parquetjs/parquet-thrift/TimestampType.js +0 -110
  81. package/dist/parquetjs/parquet-thrift/Type.js +0 -20
  82. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -62
  83. package/dist/parquetjs/parquet-thrift/UUIDType.js +0 -62
  84. package/dist/parquetjs/parquet-thrift/index.js +0 -65
  85. package/dist/parquetjs/parser/decoders.js +0 -318
  86. package/dist/parquetjs/parser/parquet-reader.js +0 -200
  87. package/dist/parquetjs/schema/declare.js +0 -12
  88. package/dist/parquetjs/schema/schema.js +0 -162
  89. package/dist/parquetjs/schema/shred.js +0 -355
  90. package/dist/parquetjs/schema/types.js +0 -416
  91. package/dist/parquetjs/utils/file-utils.js +0 -43
  92. package/dist/parquetjs/utils/read-utils.js +0 -109
  93. package/dist/workers/parquet-worker.js +0 -5
@@ -1,355 +0,0 @@
1
- "use strict";
2
- // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
3
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
- if (k2 === undefined) k2 = k;
5
- var desc = Object.getOwnPropertyDescriptor(m, k);
6
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
- desc = { enumerable: true, get: function() { return m[k]; } };
8
- }
9
- Object.defineProperty(o, k2, desc);
10
- }) : (function(o, m, k, k2) {
11
- if (k2 === undefined) k2 = k;
12
- o[k2] = m[k];
13
- }));
14
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
15
- Object.defineProperty(o, "default", { enumerable: true, value: v });
16
- }) : function(o, v) {
17
- o["default"] = v;
18
- });
19
- var __importStar = (this && this.__importStar) || function (mod) {
20
- if (mod && mod.__esModule) return mod;
21
- var result = {};
22
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
23
- __setModuleDefault(result, mod);
24
- return result;
25
- };
26
- Object.defineProperty(exports, "__esModule", { value: true });
27
- exports.materializeColumns = exports.materializeRows = exports.shredRecord = exports.shredBuffer = exports.ParquetRowGroup = void 0;
28
- const declare_1 = require("./declare");
29
- Object.defineProperty(exports, "ParquetRowGroup", { enumerable: true, get: function () { return declare_1.ParquetRowGroup; } });
30
- const Types = __importStar(require("./types"));
31
- function shredBuffer(schema) {
32
- const columnData = {};
33
- for (const field of schema.fieldList) {
34
- columnData[field.key] = {
35
- dlevels: [],
36
- rlevels: [],
37
- values: [],
38
- pageHeaders: [],
39
- count: 0
40
- };
41
- }
42
- return { rowCount: 0, columnData };
43
- }
44
- exports.shredBuffer = shredBuffer;
45
- /**
46
- * 'Shred' a record into a list of <value, repetition_level, definition_level>
47
- * tuples per column using the Google Dremel Algorithm..
48
- *
49
- * The rowGroup argument must point to an object into which the shredded record
50
- * will be returned. You may re-use the rowGroup for repeated calls to this function
51
- * to append to an existing rowGroup, as long as the schema is unchanged.
52
- *
53
- * The format in which the shredded records will be stored in the rowGroup is as
54
- * follows:
55
- *
56
- * rowGroup = {
57
- * columnData: [
58
- * 'my_col': {
59
- * dlevels: [d1, d2, .. dN],
60
- * rlevels: [r1, r2, .. rN],
61
- * values: [v1, v2, .. vN],
62
- * }, ...
63
- * ],
64
- * rowCount: X,
65
- * }
66
- */
67
- function shredRecord(schema, record, rowGroup) {
68
- /* shred the record, this may raise an exception */
69
- const data = shredBuffer(schema).columnData;
70
- shredRecordFields(schema.fields, record, data, 0, 0);
71
- /* if no error during shredding, add the shredded record to the rowGroup */
72
- if (rowGroup.rowCount === 0) {
73
- rowGroup.rowCount = 1;
74
- rowGroup.columnData = data;
75
- return;
76
- }
77
- rowGroup.rowCount += 1;
78
- for (const field of schema.fieldList) {
79
- Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);
80
- Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);
81
- Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);
82
- rowGroup.columnData[field.key].count += data[field.key].count;
83
- }
84
- }
85
- exports.shredRecord = shredRecord;
86
- // eslint-disable-next-line max-statements, complexity
87
- function shredRecordFields(fields, record, data, rLevel, dLevel) {
88
- for (const name in fields) {
89
- const field = fields[name];
90
- // fetch values
91
- let values = [];
92
- if (record &&
93
- field.name in record &&
94
- record[field.name] !== undefined &&
95
- record[field.name] !== null) {
96
- if (record[field.name].constructor === Array) {
97
- values = record[field.name];
98
- }
99
- else {
100
- values.push(record[field.name]);
101
- }
102
- }
103
- // check values
104
- if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {
105
- throw new Error(`missing required field: ${field.name}`);
106
- }
107
- if (values.length > 1 && field.repetitionType !== 'REPEATED') {
108
- throw new Error(`too many values for field: ${field.name}`);
109
- }
110
- // push null
111
- if (values.length === 0) {
112
- if (field.isNested) {
113
- shredRecordFields(field.fields, null, data, rLevel, dLevel);
114
- }
115
- else {
116
- data[field.key].count += 1;
117
- data[field.key].rlevels.push(rLevel);
118
- data[field.key].dlevels.push(dLevel);
119
- }
120
- continue; // eslint-disable-line no-continue
121
- }
122
- // push values
123
- for (let i = 0; i < values.length; i++) {
124
- const rlvl = i === 0 ? rLevel : field.rLevelMax;
125
- if (field.isNested) {
126
- shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
127
- }
128
- else {
129
- data[field.key].count += 1;
130
- data[field.key].rlevels.push(rlvl);
131
- data[field.key].dlevels.push(field.dLevelMax);
132
- data[field.key].values.push(Types.toPrimitive((field.originalType || field.primitiveType), values[i]));
133
- }
134
- }
135
- }
136
- }
137
- /**
138
- * 'Materialize' a list of <value, repetition_level, definition_level>
139
- * tuples back to nested records (objects/arrays) using the Google Dremel
140
- * Algorithm..
141
- *
142
- * The rowGroup argument must point to an object with the following structure (i.e.
143
- * the same structure that is returned by shredRecords):
144
- *
145
- * rowGroup = {
146
- * columnData: [
147
- * 'my_col': {
148
- * dlevels: [d1, d2, .. dN],
149
- * rlevels: [r1, r2, .. rN],
150
- * values: [v1, v2, .. vN],
151
- * }, ...
152
- * ],
153
- * rowCount: X,
154
- * }
155
- */
156
- function materializeRows(schema, rowGroup) {
157
- const rows = [];
158
- // rows = new Array(rowGroup.rowCount).fill({})'
159
- for (let i = 0; i < rowGroup.rowCount; i++) {
160
- rows.push({});
161
- }
162
- for (const key in rowGroup.columnData) {
163
- const columnData = rowGroup.columnData[key];
164
- if (columnData.count) {
165
- materializeColumnAsRows(schema, columnData, key, rows);
166
- }
167
- }
168
- return rows;
169
- }
170
- exports.materializeRows = materializeRows;
171
- /** Populate record fields for one column */
172
- // eslint-disable-next-line max-statements, complexity
173
- function materializeColumnAsRows(schema, columnData, key, rows) {
174
- const field = schema.findField(key);
175
- const branch = schema.findFieldBranch(key);
176
- // tslint:disable-next-line:prefer-array-literal
177
- const rLevels = new Array(field.rLevelMax + 1).fill(0);
178
- let vIndex = 0;
179
- for (let i = 0; i < columnData.count; i++) {
180
- const dLevel = columnData.dlevels[i];
181
- const rLevel = columnData.rlevels[i];
182
- rLevels[rLevel]++;
183
- rLevels.fill(0, rLevel + 1);
184
- let rIndex = 0;
185
- let record = rows[rLevels[rIndex++] - 1];
186
- // Internal nodes - Build a nested row object
187
- for (const step of branch) {
188
- if (step === field || dLevel < step.dLevelMax) {
189
- break;
190
- }
191
- switch (step.repetitionType) {
192
- case 'REPEATED':
193
- if (!(step.name in record)) {
194
- // eslint-disable max-depth
195
- record[step.name] = [];
196
- }
197
- const ix = rLevels[rIndex++];
198
- while (record[step.name].length <= ix) {
199
- // eslint-disable max-depth
200
- record[step.name].push({});
201
- }
202
- record = record[step.name][ix];
203
- break;
204
- default:
205
- record[step.name] = record[step.name] || {};
206
- record = record[step.name];
207
- }
208
- }
209
- // Leaf node - Add the value
210
- if (dLevel === field.dLevelMax) {
211
- const value = Types.fromPrimitive(
212
- // @ts-ignore
213
- field.originalType || field.primitiveType, columnData.values[vIndex], field);
214
- vIndex++;
215
- switch (field.repetitionType) {
216
- case 'REPEATED':
217
- if (!(field.name in record)) {
218
- // eslint-disable max-depth
219
- record[field.name] = [];
220
- }
221
- const ix = rLevels[rIndex];
222
- while (record[field.name].length <= ix) {
223
- // eslint-disable max-depth
224
- record[field.name].push(null);
225
- }
226
- record[field.name][ix] = value;
227
- break;
228
- default:
229
- record[field.name] = value;
230
- }
231
- }
232
- }
233
- }
234
- // Columnar export
235
- /**
236
- * 'Materialize' a list of <value, repetition_level, definition_level>
237
- * tuples back to nested records (objects/arrays) using the Google Dremel
238
- * Algorithm..
239
- *
240
- * The rowGroup argument must point to an object with the following structure (i.e.
241
- * the same structure that is returned by shredRecords):
242
- *
243
- * rowGroup = {
244
- * columnData: [
245
- * 'my_col': {
246
- * dlevels: [d1, d2, .. dN],
247
- * rlevels: [r1, r2, .. rN],
248
- * values: [v1, v2, .. vN],
249
- * }, ...
250
- * ],
251
- * rowCount: X,
252
- * }
253
- */
254
- function materializeColumns(schema, rowGroup) {
255
- const columns = {};
256
- for (const key in rowGroup.columnData) {
257
- const columnData = rowGroup.columnData[key];
258
- if (columnData.count) {
259
- materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);
260
- }
261
- }
262
- return columns;
263
- }
264
- exports.materializeColumns = materializeColumns;
265
- // eslint-disable-next-line max-statements, complexity
266
- function materializeColumnAsColumnarArray(schema, columnData, rowCount, key, columns) {
267
- if (columnData.count <= 0) {
268
- return;
269
- }
270
- const field = schema.findField(key);
271
- const branch = schema.findFieldBranch(key);
272
- const columnName = branch[0].name;
273
- let column;
274
- const { values } = columnData;
275
- if (values.length === rowCount && branch[0].primitiveType) {
276
- // if (branch[0].repetitionType === `REQUIRED`) {
277
- // switch (branch[0].primitiveType) {
278
- // case 'INT32': return values instanceof Int32Array ? values : new Int32Array(values);
279
- // }
280
- // }
281
- column = values;
282
- }
283
- if (column) {
284
- columns[columnName] = column;
285
- return;
286
- }
287
- column = new Array(rowCount);
288
- for (let i = 0; i < rowCount; i++) {
289
- column[i] = {};
290
- }
291
- columns[columnName] = column;
292
- // tslint:disable-next-line:prefer-array-literal
293
- const rLevels = new Array(field.rLevelMax + 1).fill(0);
294
- let vIndex = 0;
295
- for (let i = 0; i < columnData.count; i++) {
296
- const dLevel = columnData.dlevels[i];
297
- const rLevel = columnData.rlevels[i];
298
- rLevels[rLevel]++;
299
- rLevels.fill(0, rLevel + 1);
300
- let rIndex = 0;
301
- let record = column[rLevels[rIndex++] - 1];
302
- // Internal nodes - Build a nested row object
303
- for (const step of branch) {
304
- if (step === field || dLevel < step.dLevelMax) {
305
- break;
306
- }
307
- switch (step.repetitionType) {
308
- case 'REPEATED':
309
- if (!(step.name in record)) {
310
- // eslint-disable max-depth
311
- record[step.name] = [];
312
- }
313
- const ix = rLevels[rIndex++];
314
- while (record[step.name].length <= ix) {
315
- // eslint-disable max-depth
316
- record[step.name].push({});
317
- }
318
- record = record[step.name][ix];
319
- break;
320
- default:
321
- record[step.name] = record[step.name] || {};
322
- record = record[step.name];
323
- }
324
- }
325
- // Leaf node - Add the value
326
- if (dLevel === field.dLevelMax) {
327
- const value = Types.fromPrimitive(
328
- // @ts-ignore
329
- field.originalType || field.primitiveType, columnData.values[vIndex], field);
330
- vIndex++;
331
- switch (field.repetitionType) {
332
- case 'REPEATED':
333
- if (!(field.name in record)) {
334
- // eslint-disable max-depth
335
- record[field.name] = [];
336
- }
337
- const ix = rLevels[rIndex];
338
- while (record[field.name].length <= ix) {
339
- // eslint-disable max-depth
340
- record[field.name].push(null);
341
- }
342
- record[field.name][ix] = value;
343
- break;
344
- default:
345
- record[field.name] = value;
346
- }
347
- }
348
- }
349
- // Remove one level of nesting
350
- for (let i = 0; i < rowCount; ++i) {
351
- if (columnName in column[i]) {
352
- column[i] = column[i][columnName];
353
- }
354
- }
355
- }