@loaders.gl/parquet 3.4.10 → 3.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/dist.min.js +16 -16
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/parquet-loader.js +1 -1
  4. package/dist/es5/parquet-wasm-loader.js +1 -1
  5. package/dist/es5/parquet-wasm-writer.js +1 -1
  6. package/dist/es5/parquet-writer.js +1 -1
  7. package/dist/esm/parquet-loader.js +1 -1
  8. package/dist/esm/parquet-wasm-loader.js +1 -1
  9. package/dist/esm/parquet-wasm-writer.js +1 -1
  10. package/dist/esm/parquet-writer.js +1 -1
  11. package/dist/parquet-worker.js +2 -2
  12. package/package.json +9 -7
  13. package/dist/bundle.js +0 -5
  14. package/dist/constants.js +0 -18
  15. package/dist/index.js +0 -56
  16. package/dist/lib/arrow/convert-columns-to-row-group.js +0 -1
  17. package/dist/lib/arrow/convert-row-group-to-columns.js +0 -12
  18. package/dist/lib/arrow/convert-schema-from-parquet.js +0 -82
  19. package/dist/lib/arrow/convert-schema-to-parquet.js +0 -72
  20. package/dist/lib/geo/decode-geo-metadata.js +0 -73
  21. package/dist/lib/geo/geoparquet-schema.js +0 -69
  22. package/dist/lib/parsers/parse-parquet-to-columns.js +0 -40
  23. package/dist/lib/parsers/parse-parquet-to-rows.js +0 -40
  24. package/dist/lib/wasm/encode-parquet-wasm.js +0 -30
  25. package/dist/lib/wasm/load-wasm/index.js +0 -5
  26. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +0 -38
  27. package/dist/lib/wasm/load-wasm/load-wasm-node.js +0 -31
  28. package/dist/lib/wasm/parse-parquet-wasm.js +0 -27
  29. package/dist/parquet-loader.js +0 -29
  30. package/dist/parquet-wasm-loader.js +0 -27
  31. package/dist/parquet-wasm-writer.js +0 -23
  32. package/dist/parquet-writer.js +0 -21
  33. package/dist/parquetjs/codecs/declare.js +0 -2
  34. package/dist/parquetjs/codecs/dictionary.js +0 -14
  35. package/dist/parquetjs/codecs/index.js +0 -55
  36. package/dist/parquetjs/codecs/plain.js +0 -211
  37. package/dist/parquetjs/codecs/rle.js +0 -145
  38. package/dist/parquetjs/compression.js +0 -183
  39. package/dist/parquetjs/encoder/parquet-encoder.js +0 -484
  40. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
  41. package/dist/parquetjs/parquet-thrift/BsonType.js +0 -62
  42. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +0 -211
  43. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +0 -217
  44. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +0 -402
  45. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +0 -108
  46. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
  47. package/dist/parquetjs/parquet-thrift/ConvertedType.js +0 -34
  48. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +0 -170
  49. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -230
  50. package/dist/parquetjs/parquet-thrift/DateType.js +0 -62
  51. package/dist/parquetjs/parquet-thrift/DecimalType.js +0 -109
  52. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -126
  53. package/dist/parquetjs/parquet-thrift/Encoding.js +0 -20
  54. package/dist/parquetjs/parquet-thrift/EnumType.js +0 -62
  55. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
  56. package/dist/parquetjs/parquet-thrift/FileMetaData.js +0 -260
  57. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +0 -62
  58. package/dist/parquetjs/parquet-thrift/IntType.js +0 -109
  59. package/dist/parquetjs/parquet-thrift/JsonType.js +0 -62
  60. package/dist/parquetjs/parquet-thrift/KeyValue.js +0 -106
  61. package/dist/parquetjs/parquet-thrift/ListType.js +0 -62
  62. package/dist/parquetjs/parquet-thrift/LogicalType.js +0 -384
  63. package/dist/parquetjs/parquet-thrift/MapType.js +0 -62
  64. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +0 -62
  65. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +0 -62
  66. package/dist/parquetjs/parquet-thrift/NullType.js +0 -62
  67. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +0 -101
  68. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +0 -131
  69. package/dist/parquetjs/parquet-thrift/PageHeader.js +0 -220
  70. package/dist/parquetjs/parquet-thrift/PageLocation.js +0 -145
  71. package/dist/parquetjs/parquet-thrift/PageType.js +0 -16
  72. package/dist/parquetjs/parquet-thrift/RowGroup.js +0 -186
  73. package/dist/parquetjs/parquet-thrift/SchemaElement.js +0 -243
  74. package/dist/parquetjs/parquet-thrift/SortingColumn.js +0 -131
  75. package/dist/parquetjs/parquet-thrift/Statistics.js +0 -180
  76. package/dist/parquetjs/parquet-thrift/StringType.js +0 -62
  77. package/dist/parquetjs/parquet-thrift/TimeType.js +0 -110
  78. package/dist/parquetjs/parquet-thrift/TimeUnit.js +0 -131
  79. package/dist/parquetjs/parquet-thrift/TimestampType.js +0 -110
  80. package/dist/parquetjs/parquet-thrift/Type.js +0 -20
  81. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -62
  82. package/dist/parquetjs/parquet-thrift/UUIDType.js +0 -62
  83. package/dist/parquetjs/parquet-thrift/index.js +0 -65
  84. package/dist/parquetjs/parser/decoders.js +0 -318
  85. package/dist/parquetjs/parser/parquet-reader.js +0 -200
  86. package/dist/parquetjs/schema/declare.js +0 -12
  87. package/dist/parquetjs/schema/schema.js +0 -162
  88. package/dist/parquetjs/schema/shred.js +0 -347
  89. package/dist/parquetjs/schema/types.js +0 -416
  90. package/dist/parquetjs/utils/file-utils.js +0 -43
  91. package/dist/parquetjs/utils/read-utils.js +0 -109
  92. package/dist/workers/parquet-worker.js +0 -5
@@ -1,347 +0,0 @@
1
- "use strict";
2
- // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
3
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
- if (k2 === undefined) k2 = k;
5
- var desc = Object.getOwnPropertyDescriptor(m, k);
6
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
- desc = { enumerable: true, get: function() { return m[k]; } };
8
- }
9
- Object.defineProperty(o, k2, desc);
10
- }) : (function(o, m, k, k2) {
11
- if (k2 === undefined) k2 = k;
12
- o[k2] = m[k];
13
- }));
14
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
15
- Object.defineProperty(o, "default", { enumerable: true, value: v });
16
- }) : function(o, v) {
17
- o["default"] = v;
18
- });
19
- var __importStar = (this && this.__importStar) || function (mod) {
20
- if (mod && mod.__esModule) return mod;
21
- var result = {};
22
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
23
- __setModuleDefault(result, mod);
24
- return result;
25
- };
26
- Object.defineProperty(exports, "__esModule", { value: true });
27
- exports.materializeRecords = exports.shredRecord = exports.shredBuffer = exports.ParquetBuffer = void 0;
28
- const declare_1 = require("./declare");
29
- Object.defineProperty(exports, "ParquetBuffer", { enumerable: true, get: function () { return declare_1.ParquetBuffer; } });
30
- const Types = __importStar(require("./types"));
31
- function shredBuffer(schema) {
32
- const columnData = {};
33
- for (const field of schema.fieldList) {
34
- columnData[field.key] = {
35
- dlevels: [],
36
- rlevels: [],
37
- values: [],
38
- pageHeaders: [],
39
- count: 0
40
- };
41
- }
42
- return { rowCount: 0, columnData };
43
- }
44
- exports.shredBuffer = shredBuffer;
45
- /**
46
- * 'Shred' a record into a list of <value, repetition_level, definition_level>
47
- * tuples per column using the Google Dremel Algorithm..
48
- *
49
- * The buffer argument must point to an object into which the shredded record
50
- * will be returned. You may re-use the buffer for repeated calls to this function
51
- * to append to an existing buffer, as long as the schema is unchanged.
52
- *
53
- * The format in which the shredded records will be stored in the buffer is as
54
- * follows:
55
- *
56
- * buffer = {
57
- * columnData: [
58
- * 'my_col': {
59
- * dlevels: [d1, d2, .. dN],
60
- * rlevels: [r1, r2, .. rN],
61
- * values: [v1, v2, .. vN],
62
- * }, ...
63
- * ],
64
- * rowCount: X,
65
- * }
66
- */
67
- function shredRecord(schema, record, buffer) {
68
- /* shred the record, this may raise an exception */
69
- const data = shredBuffer(schema).columnData;
70
- shredRecordFields(schema.fields, record, data, 0, 0);
71
- /* if no error during shredding, add the shredded record to the buffer */
72
- if (buffer.rowCount === 0) {
73
- buffer.rowCount = 1;
74
- buffer.columnData = data;
75
- return;
76
- }
77
- buffer.rowCount += 1;
78
- for (const field of schema.fieldList) {
79
- Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
80
- Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
81
- Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
82
- buffer.columnData[field.key].count += data[field.key].count;
83
- }
84
- }
85
- exports.shredRecord = shredRecord;
86
- // eslint-disable-next-line max-statements, complexity
87
- function shredRecordFields(fields, record, data, rLevel, dLevel) {
88
- for (const name in fields) {
89
- const field = fields[name];
90
- // fetch values
91
- let values = [];
92
- if (record &&
93
- field.name in record &&
94
- record[field.name] !== undefined &&
95
- record[field.name] !== null) {
96
- if (record[field.name].constructor === Array) {
97
- values = record[field.name];
98
- }
99
- else {
100
- values.push(record[field.name]);
101
- }
102
- }
103
- // check values
104
- if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {
105
- throw new Error(`missing required field: ${field.name}`);
106
- }
107
- if (values.length > 1 && field.repetitionType !== 'REPEATED') {
108
- throw new Error(`too many values for field: ${field.name}`);
109
- }
110
- // push null
111
- if (values.length === 0) {
112
- if (field.isNested) {
113
- shredRecordFields(field.fields, null, data, rLevel, dLevel);
114
- }
115
- else {
116
- data[field.key].count += 1;
117
- data[field.key].rlevels.push(rLevel);
118
- data[field.key].dlevels.push(dLevel);
119
- }
120
- continue; // eslint-disable-line no-continue
121
- }
122
- // push values
123
- for (let i = 0; i < values.length; i++) {
124
- const rlvl = i === 0 ? rLevel : field.rLevelMax;
125
- if (field.isNested) {
126
- shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
127
- }
128
- else {
129
- data[field.key].count += 1;
130
- data[field.key].rlevels.push(rlvl);
131
- data[field.key].dlevels.push(field.dLevelMax);
132
- data[field.key].values.push(Types.toPrimitive((field.originalType || field.primitiveType), values[i]));
133
- }
134
- }
135
- }
136
- }
137
- /**
138
- * 'Materialize' a list of <value, repetition_level, definition_level>
139
- * tuples back to nested records (objects/arrays) using the Google Dremel
140
- * Algorithm..
141
- *
142
- * The buffer argument must point to an object with the following structure (i.e.
143
- * the same structure that is returned by shredRecords):
144
- *
145
- * buffer = {
146
- * columnData: [
147
- * 'my_col': {
148
- * dlevels: [d1, d2, .. dN],
149
- * rlevels: [r1, r2, .. rN],
150
- * values: [v1, v2, .. vN],
151
- * }, ...
152
- * ],
153
- * rowCount: X,
154
- * }
155
- */
156
- function materializeRecords(schema, buffer) {
157
- const records = [];
158
- for (let i = 0; i < buffer.rowCount; i++) {
159
- records.push({});
160
- }
161
- for (const key in buffer.columnData) {
162
- const columnData = buffer.columnData[key];
163
- if (columnData.count) {
164
- materializeColumn(schema, columnData, key, records);
165
- }
166
- }
167
- return records;
168
- }
169
- exports.materializeRecords = materializeRecords;
170
- // eslint-disable-next-line max-statements, complexity
171
- function materializeColumn(schema, columnData, key, records) {
172
- const field = schema.findField(key);
173
- const branch = schema.findFieldBranch(key);
174
- // tslint:disable-next-line:prefer-array-literal
175
- const rLevels = new Array(field.rLevelMax + 1).fill(0);
176
- let vIndex = 0;
177
- for (let i = 0; i < columnData.count; i++) {
178
- const dLevel = columnData.dlevels[i];
179
- const rLevel = columnData.rlevels[i];
180
- rLevels[rLevel]++;
181
- rLevels.fill(0, rLevel + 1);
182
- let rIndex = 0;
183
- let record = records[rLevels[rIndex++] - 1];
184
- // Internal nodes - Build a nested row object
185
- for (const step of branch) {
186
- if (step === field || dLevel < step.dLevelMax) {
187
- break;
188
- }
189
- switch (step.repetitionType) {
190
- case 'REPEATED':
191
- if (!(step.name in record)) {
192
- // eslint-disable max-depth
193
- record[step.name] = [];
194
- }
195
- const ix = rLevels[rIndex++];
196
- while (record[step.name].length <= ix) {
197
- // eslint-disable max-depth
198
- record[step.name].push({});
199
- }
200
- record = record[step.name][ix];
201
- break;
202
- default:
203
- record[step.name] = record[step.name] || {};
204
- record = record[step.name];
205
- }
206
- }
207
- // Leaf node - Add the value
208
- if (dLevel === field.dLevelMax) {
209
- const value = Types.fromPrimitive(
210
- // @ts-ignore
211
- field.originalType || field.primitiveType, columnData.values[vIndex], field);
212
- vIndex++;
213
- switch (field.repetitionType) {
214
- case 'REPEATED':
215
- if (!(field.name in record)) {
216
- // eslint-disable max-depth
217
- record[field.name] = [];
218
- }
219
- const ix = rLevels[rIndex];
220
- while (record[field.name].length <= ix) {
221
- // eslint-disable max-depth
222
- record[field.name].push(null);
223
- }
224
- record[field.name][ix] = value;
225
- break;
226
- default:
227
- record[field.name] = value;
228
- }
229
- }
230
- }
231
- }
232
- // Columnar export
233
- /**
234
- * 'Materialize' a list of <value, repetition_level, definition_level>
235
- * tuples back to nested records (objects/arrays) using the Google Dremel
236
- * Algorithm..
237
- *
238
- * The buffer argument must point to an object with the following structure (i.e.
239
- * the same structure that is returned by shredRecords):
240
- *
241
- * buffer = {
242
- * columnData: [
243
- * 'my_col': {
244
- * dlevels: [d1, d2, .. dN],
245
- * rlevels: [r1, r2, .. rN],
246
- * values: [v1, v2, .. vN],
247
- * }, ...
248
- * ],
249
- * rowCount: X,
250
- * }
251
- *
252
- export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
253
- const columns: ParquetRecord = {};
254
- for (const key in buffer.columnData) {
255
- const columnData = buffer.columnData[key];
256
- if (columnData.count) {
257
- extractColumn(schema, columnData, key, columns);
258
- }
259
- }
260
- return columns;
261
- }
262
-
263
- // eslint-disable-next-line max-statements, complexity
264
- function extractColumn(
265
- schema: ParquetSchema,
266
- columnData: ParquetData,
267
- key: string,
268
- columns: Record<string, unknown>
269
- ) {
270
- if (columnData.count <= 0) {
271
- return;
272
- }
273
-
274
- const record = columns;
275
-
276
- const field = schema.findField(key);
277
- const branch = schema.findFieldBranch(key);
278
-
279
- // tslint:disable-next-line:prefer-array-literal
280
- const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
281
- let vIndex = 0;
282
-
283
- let i = 0;
284
- const dLevel = columnData.dlevels[i];
285
- const rLevel = columnData.rlevels[i];
286
- rLevels[rLevel]++;
287
- rLevels.fill(0, rLevel + 1);
288
-
289
- let rIndex = 0;
290
- let record = records[rLevels[rIndex++] - 1];
291
-
292
- // Internal nodes
293
- for (const step of branch) {
294
- if (step === field || dLevel < step.dLevelMax) {
295
- break;
296
- }
297
-
298
- switch (step.repetitionType) {
299
- case 'REPEATED':
300
- if (!(step.name in record)) {
301
- // eslint-disable max-depth
302
- record[step.name] = [];
303
- }
304
- const ix = rLevels[rIndex++];
305
- while (record[step.name].length <= ix) {
306
- // eslint-disable max-depth
307
- record[step.name].push({});
308
- }
309
- record = record[step.name][ix];
310
- break;
311
-
312
- default:
313
- record[step.name] = record[step.name] || {};
314
- record = record[step.name];
315
- }
316
- }
317
-
318
- // Leaf node
319
- if (dLevel === field.dLevelMax) {
320
- const value = Types.fromPrimitive(
321
- // @ts-ignore
322
- field.originalType || field.primitiveType,
323
- columnData.values[vIndex],
324
- field
325
- );
326
- vIndex++;
327
-
328
- switch (field.repetitionType) {
329
- case 'REPEATED':
330
- if (!(field.name in record)) {
331
- // eslint-disable max-depth
332
- record[field.name] = [];
333
- }
334
- const ix = rLevels[rIndex];
335
- while (record[field.name].length <= ix) {
336
- // eslint-disable max-depth
337
- record[field.name].push(null);
338
- }
339
- record[field.name][ix] = value;
340
- break;
341
-
342
- default:
343
- record[field.name] = value;
344
- }
345
- }
346
- }
347
- */