@gscdump/engine 0.31.3 → 0.31.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2720 @@
1
+ const ParquetTypes$1 = [
2
+ "BOOLEAN",
3
+ "INT32",
4
+ "INT64",
5
+ "INT96",
6
+ "FLOAT",
7
+ "DOUBLE",
8
+ "BYTE_ARRAY",
9
+ "FIXED_LEN_BYTE_ARRAY"
10
+ ];
11
+ const Encodings$1 = [
12
+ "PLAIN",
13
+ "GROUP_VAR_INT",
14
+ "PLAIN_DICTIONARY",
15
+ "RLE",
16
+ "BIT_PACKED",
17
+ "DELTA_BINARY_PACKED",
18
+ "DELTA_LENGTH_BYTE_ARRAY",
19
+ "DELTA_BYTE_ARRAY",
20
+ "RLE_DICTIONARY",
21
+ "BYTE_STREAM_SPLIT"
22
+ ];
23
+ const FieldRepetitionTypes$1 = [
24
+ "REQUIRED",
25
+ "OPTIONAL",
26
+ "REPEATED"
27
+ ];
28
+ const ConvertedTypes$1 = [
29
+ "UTF8",
30
+ "MAP",
31
+ "MAP_KEY_VALUE",
32
+ "LIST",
33
+ "ENUM",
34
+ "DECIMAL",
35
+ "DATE",
36
+ "TIME_MILLIS",
37
+ "TIME_MICROS",
38
+ "TIMESTAMP_MILLIS",
39
+ "TIMESTAMP_MICROS",
40
+ "UINT_8",
41
+ "UINT_16",
42
+ "UINT_32",
43
+ "UINT_64",
44
+ "INT_8",
45
+ "INT_16",
46
+ "INT_32",
47
+ "INT_64",
48
+ "JSON",
49
+ "BSON",
50
+ "INTERVAL"
51
+ ];
52
+ const CompressionCodecs$1 = [
53
+ "UNCOMPRESSED",
54
+ "SNAPPY",
55
+ "GZIP",
56
+ "LZO",
57
+ "BROTLI",
58
+ "LZ4",
59
+ "ZSTD",
60
+ "LZ4_RAW"
61
+ ];
62
+ const PageTypes$1 = [
63
+ "DATA_PAGE",
64
+ "INDEX_PAGE",
65
+ "DICTIONARY_PAGE",
66
+ "DATA_PAGE_V2"
67
+ ];
68
+ const BoundaryOrders$1 = [
69
+ "UNORDERED",
70
+ "ASCENDING",
71
+ "DESCENDING"
72
+ ];
73
+ const EdgeInterpolationAlgorithms$1 = [
74
+ "SPHERICAL",
75
+ "VINCENTY",
76
+ "THOMAS",
77
+ "ANDOYER",
78
+ "KARNEY"
79
+ ];
80
+ function wkbToGeojson(reader) {
81
+ const flags = getFlags(reader);
82
+ if (flags.type === 1) return {
83
+ type: "Point",
84
+ coordinates: readPosition(reader, flags)
85
+ };
86
+ else if (flags.type === 2) return {
87
+ type: "LineString",
88
+ coordinates: readLine(reader, flags)
89
+ };
90
+ else if (flags.type === 3) return {
91
+ type: "Polygon",
92
+ coordinates: readPolygon(reader, flags)
93
+ };
94
+ else if (flags.type === 4) {
95
+ const points = [];
96
+ for (let i = 0; i < flags.count; i++) points.push(readPosition(reader, getFlags(reader)));
97
+ return {
98
+ type: "MultiPoint",
99
+ coordinates: points
100
+ };
101
+ } else if (flags.type === 5) {
102
+ const lines = [];
103
+ for (let i = 0; i < flags.count; i++) lines.push(readLine(reader, getFlags(reader)));
104
+ return {
105
+ type: "MultiLineString",
106
+ coordinates: lines
107
+ };
108
+ } else if (flags.type === 6) {
109
+ const polygons = [];
110
+ for (let i = 0; i < flags.count; i++) polygons.push(readPolygon(reader, getFlags(reader)));
111
+ return {
112
+ type: "MultiPolygon",
113
+ coordinates: polygons
114
+ };
115
+ } else if (flags.type === 7) {
116
+ const geometries = [];
117
+ for (let i = 0; i < flags.count; i++) geometries.push(wkbToGeojson(reader));
118
+ return {
119
+ type: "GeometryCollection",
120
+ geometries
121
+ };
122
+ } else throw new Error(`Unsupported geometry type: ${flags.type}`);
123
+ }
124
+ function getFlags(reader) {
125
+ const { view } = reader;
126
+ const littleEndian = view.getUint8(reader.offset++) === 1;
127
+ const rawType = view.getUint32(reader.offset, littleEndian);
128
+ reader.offset += 4;
129
+ const type = rawType % 1e3;
130
+ const flags = Math.floor(rawType / 1e3);
131
+ let count = 0;
132
+ if (type > 1 && type <= 7) {
133
+ count = view.getUint32(reader.offset, littleEndian);
134
+ reader.offset += 4;
135
+ }
136
+ let dim = 2;
137
+ if (flags) dim++;
138
+ if (flags === 3) dim++;
139
+ return {
140
+ littleEndian,
141
+ type,
142
+ dim,
143
+ count
144
+ };
145
+ }
146
+ function readPosition(reader, flags) {
147
+ const points = [];
148
+ for (let i = 0; i < flags.dim; i++) {
149
+ const coord = reader.view.getFloat64(reader.offset, flags.littleEndian);
150
+ reader.offset += 8;
151
+ points.push(coord);
152
+ }
153
+ return points;
154
+ }
155
+ function readLine(reader, flags) {
156
+ const points = [];
157
+ for (let i = 0; i < flags.count; i++) points.push(readPosition(reader, flags));
158
+ return points;
159
+ }
160
+ function readPolygon(reader, flags) {
161
+ const { view } = reader;
162
+ const rings = [];
163
+ for (let r = 0; r < flags.count; r++) {
164
+ const count = view.getUint32(reader.offset, flags.littleEndian);
165
+ reader.offset += 4;
166
+ rings.push(readLine(reader, {
167
+ ...flags,
168
+ count
169
+ }));
170
+ }
171
+ return rings;
172
+ }
173
+ const decoder$5 = new TextDecoder();
174
+ const DEFAULT_PARSERS = {
175
+ timestampFromMilliseconds(millis) {
176
+ return new Date(Number(millis));
177
+ },
178
+ timestampFromMicroseconds(micros) {
179
+ return new Date(Number(micros / 1000n));
180
+ },
181
+ timestampFromNanoseconds(nanos) {
182
+ return new Date(Number(nanos / 1000000n));
183
+ },
184
+ dateFromDays(days) {
185
+ return /* @__PURE__ */ new Date(days * 864e5);
186
+ },
187
+ stringFromBytes(bytes) {
188
+ return bytes && decoder$5.decode(bytes);
189
+ },
190
+ geometryFromBytes(bytes) {
191
+ return bytes && wkbToGeojson({
192
+ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength),
193
+ offset: 0
194
+ });
195
+ },
196
+ geographyFromBytes(bytes) {
197
+ return bytes && wkbToGeojson({
198
+ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength),
199
+ offset: 0
200
+ });
201
+ },
202
+ uuidFromBytes(bytes) {
203
+ if (!bytes) return void 0;
204
+ const hex = Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
205
+ return hex.slice(0, 8) + "-" + hex.slice(8, 12) + "-" + hex.slice(12, 16) + "-" + hex.slice(16, 20) + "-" + hex.slice(20, 32);
206
+ }
207
+ };
208
+ function convertWithDictionary(data, dictionary, encoding, columnDecoder) {
209
+ if (dictionary && encoding.endsWith("_DICTIONARY")) {
210
+ let output = data;
211
+ if (data instanceof Uint8Array && !(dictionary instanceof Uint8Array)) output = new dictionary.constructor(data.length);
212
+ for (let i = 0; i < data.length; i++) output[i] = dictionary[data[i]];
213
+ return output;
214
+ } else return convert(data, columnDecoder);
215
+ }
216
+ function convert(data, columnDecoder) {
217
+ const { element, parsers, utf8 = true, schemaPath } = columnDecoder;
218
+ const { type, converted_type: ctype, logical_type: ltype } = element;
219
+ const nullable = element.repetition_type !== "REQUIRED";
220
+ if (schemaPath?.some((s) => s.element.logical_type?.type === "VARIANT") && type === "BYTE_ARRAY" && ctype !== "UTF8" && ltype?.type !== "STRING") return data;
221
+ if (ctype === "DECIMAL") {
222
+ const factor = 10 ** -(element.scale || 0);
223
+ const arr = new Array(data.length);
224
+ for (let i = 0; i < arr.length; i++) if (data[i] instanceof Uint8Array) arr[i] = parseDecimal$1(data[i]) * factor;
225
+ else arr[i] = Number(data[i]) * factor;
226
+ return arr;
227
+ }
228
+ if (!ctype && type === "INT96") return Array.from(data).map((v) => parsers.timestampFromNanoseconds(parseInt96Nanos(v)));
229
+ if (ctype === "DATE") return Array.from(data).map((v) => parsers.dateFromDays(v));
230
+ if (ctype === "TIMESTAMP_MILLIS") return Array.from(data).map((v) => parsers.timestampFromMilliseconds(v));
231
+ if (ctype === "TIMESTAMP_MICROS") return Array.from(data).map((v) => parsers.timestampFromMicroseconds(v));
232
+ if (ctype === "JSON") return data.map((v) => JSON.parse(decoder$5.decode(v)));
233
+ if (ctype === "BSON") throw new Error("parquet bson not supported");
234
+ if (ctype === "INTERVAL") throw new Error("parquet interval not supported");
235
+ if (ltype?.type === "GEOMETRY") return data.map((v) => parsers.geometryFromBytes(v));
236
+ if (ltype?.type === "GEOGRAPHY") return data.map((v) => parsers.geographyFromBytes(v));
237
+ if (ltype?.type === "UUID") return data.map((v) => parsers.uuidFromBytes(v));
238
+ if (ctype === "UTF8" || ltype?.type === "STRING" || utf8 && type === "BYTE_ARRAY") return data.map((v) => parsers.stringFromBytes(v));
239
+ if (ctype === "UINT_64" || ltype?.type === "INTEGER" && ltype.bitWidth === 64 && !ltype.isSigned) {
240
+ if (data instanceof BigInt64Array) return new BigUint64Array(data.buffer, data.byteOffset, data.length);
241
+ const arr = nullable ? new Array(data.length) : new BigUint64Array(data.length);
242
+ for (let i = 0; i < arr.length; i++) arr[i] = data[i];
243
+ return arr;
244
+ }
245
+ if (ctype === "UINT_32" || ltype?.type === "INTEGER" && ltype.bitWidth === 32 && !ltype.isSigned) {
246
+ if (data instanceof Int32Array) return new Uint32Array(data.buffer, data.byteOffset, data.length);
247
+ const arr = nullable ? new Array(data.length) : new Uint32Array(data.length);
248
+ for (let i = 0; i < arr.length; i++) arr[i] = data[i] < 0 ? 4294967296 + data[i] : data[i];
249
+ return arr;
250
+ }
251
+ if (ltype?.type === "FLOAT16") return Array.from(data).map(parseFloat16);
252
+ if (ltype?.type === "TIMESTAMP") {
253
+ const { unit } = ltype;
254
+ let parser = parsers.timestampFromMilliseconds;
255
+ if (unit === "MICROS") parser = parsers.timestampFromMicroseconds;
256
+ if (unit === "NANOS") parser = parsers.timestampFromNanoseconds;
257
+ const arr = new Array(data.length);
258
+ for (let i = 0; i < arr.length; i++) arr[i] = parser(data[i]);
259
+ return arr;
260
+ }
261
+ return data;
262
+ }
263
+ function parseDecimal$1(bytes) {
264
+ if (!bytes.length) return 0;
265
+ let value = 0n;
266
+ for (const byte of bytes) value = value * 256n + BigInt(byte);
267
+ const bits = bytes.length * 8;
268
+ if (value >= 2n ** BigInt(bits - 1)) value -= 2n ** BigInt(bits);
269
+ return Number(value);
270
+ }
271
+ function parseInt96Nanos(value) {
272
+ const days = (value >> 64n) - 2440588n;
273
+ const nano = value & 18446744073709551615n;
274
+ return days * 86400000000000n + nano;
275
+ }
276
+ function parseFloat16(bytes) {
277
+ if (!bytes) return void 0;
278
+ const int16 = bytes[1] << 8 | bytes[0];
279
+ const sign = int16 >> 15 ? -1 : 1;
280
+ const exp = int16 >> 10 & 31;
281
+ const frac = int16 & 1023;
282
+ if (exp === 0) return sign * 2 ** -14 * (frac / 1024);
283
+ if (exp === 31) return frac ? NaN : sign * Infinity;
284
+ return sign * 2 ** (exp - 15) * (1 + frac / 1024);
285
+ }
286
+ function schemaTree$1(schema, rootIndex, path) {
287
+ const element = schema[rootIndex];
288
+ const children = [];
289
+ let count = 1;
290
+ if (element.num_children) while (children.length < element.num_children) {
291
+ const childElement = schema[rootIndex + count];
292
+ const child = schemaTree$1(schema, rootIndex + count, [...path, childElement.name]);
293
+ count += child.count;
294
+ children.push(child);
295
+ }
296
+ return {
297
+ count,
298
+ element,
299
+ children,
300
+ path
301
+ };
302
+ }
303
+ function getSchemaPath$1(schema, name) {
304
+ let tree = schemaTree$1(schema, 0, []);
305
+ const path = [tree];
306
+ for (const part of name) {
307
+ const child = tree.children.find((child) => child.element.name === part);
308
+ if (!child) throw new Error(`parquet schema element not found: ${name}`);
309
+ path.push(child);
310
+ tree = child;
311
+ }
312
+ return path;
313
+ }
314
+ function getPhysicalColumns(schemaTree) {
315
+ const columns = [];
316
+ function traverse(node) {
317
+ if (node.children.length) for (const child of node.children) traverse(child);
318
+ else columns.push(node.path.join("."));
319
+ }
320
+ traverse(schemaTree);
321
+ return columns;
322
+ }
323
+ function getMaxRepetitionLevel(schemaPath) {
324
+ let maxLevel = 0;
325
+ for (const { element } of schemaPath) if (element.repetition_type === "REPEATED") maxLevel++;
326
+ return maxLevel;
327
+ }
328
+ function getMaxDefinitionLevel$1(schemaPath) {
329
+ let maxLevel = 0;
330
+ for (const { element } of schemaPath.slice(1)) if (element.repetition_type !== "REQUIRED") maxLevel++;
331
+ return maxLevel;
332
+ }
333
+ function isListLike$1(schema) {
334
+ if (!schema) return false;
335
+ if (schema.element.converted_type !== "LIST") return false;
336
+ if (schema.children.length > 1) return false;
337
+ const firstChild = schema.children[0];
338
+ if (firstChild.children.length > 1) return false;
339
+ if (firstChild.element.repetition_type !== "REPEATED") return false;
340
+ return true;
341
+ }
342
+ function isMapLike$1(schema) {
343
+ if (!schema) return false;
344
+ if (schema.element.converted_type !== "MAP") return false;
345
+ if (schema.children.length > 1) return false;
346
+ const firstChild = schema.children[0];
347
+ if (firstChild.children.length !== 2) return false;
348
+ if (firstChild.element.repetition_type !== "REPEATED") return false;
349
+ if (firstChild.children.find((child) => child.element.name === "key")?.element.repetition_type === "REPEATED") return false;
350
+ if (firstChild.children.find((child) => child.element.name === "value")?.element.repetition_type === "REPEATED") return false;
351
+ return true;
352
+ }
353
+ function isFlatColumn(schemaPath) {
354
+ if (schemaPath.length !== 2) return false;
355
+ const [, column] = schemaPath;
356
+ if (column.element.repetition_type === "REPEATED") return false;
357
+ if (column.children.length) return false;
358
+ return true;
359
+ }
360
+ const STOP = 0;
361
+ const TRUE = 1;
362
+ const FALSE = 2;
363
+ const BYTE = 3;
364
+ const I16 = 4;
365
+ const I32 = 5;
366
+ const I64 = 6;
367
+ const DOUBLE = 7;
368
+ const BINARY = 8;
369
+ const LIST = 9;
370
+ const STRUCT = 12;
371
+ function deserializeTCompactProtocol(reader) {
372
+ const value = {};
373
+ let fid = 0;
374
+ while (reader.offset < reader.view.byteLength) {
375
+ const byte = reader.view.getUint8(reader.offset++);
376
+ const type = byte & 15;
377
+ if (type === STOP) break;
378
+ const delta = byte >> 4;
379
+ fid = delta ? fid + delta : readZigZag(reader);
380
+ value[`field_${fid}`] = readElement(reader, type);
381
+ }
382
+ return value;
383
+ }
384
+ function readElement(reader, type) {
385
+ switch (type) {
386
+ case TRUE: return true;
387
+ case FALSE: return false;
388
+ case BYTE: return reader.view.getInt8(reader.offset++);
389
+ case I16:
390
+ case I32: return readZigZag(reader);
391
+ case I64: return readZigZagBigInt(reader);
392
+ case DOUBLE: {
393
+ const value = reader.view.getFloat64(reader.offset, true);
394
+ reader.offset += 8;
395
+ return value;
396
+ }
397
+ case BINARY: {
398
+ const stringLength = readVarInt(reader);
399
+ const strBytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, stringLength);
400
+ reader.offset += stringLength;
401
+ return strBytes;
402
+ }
403
+ case LIST: {
404
+ const byte = reader.view.getUint8(reader.offset++);
405
+ const elemType = byte & 15;
406
+ let listSize = byte >> 4;
407
+ if (listSize === 15) listSize = readVarInt(reader);
408
+ const boolType = elemType === TRUE || elemType === FALSE;
409
+ const values = new Array(listSize);
410
+ for (let i = 0; i < listSize; i++) values[i] = boolType ? readElement(reader, BYTE) === 1 : readElement(reader, elemType);
411
+ return values;
412
+ }
413
+ case STRUCT: return deserializeTCompactProtocol(reader);
414
+ default: throw new Error(`thrift unhandled type: ${type}`);
415
+ }
416
+ }
417
+ function readVarInt(reader) {
418
+ let result = 0;
419
+ let shift = 0;
420
+ while (true) {
421
+ const byte = reader.view.getUint8(reader.offset++);
422
+ result |= (byte & 127) << shift;
423
+ if (!(byte & 128)) return result;
424
+ shift += 7;
425
+ }
426
+ }
427
+ function readVarBigInt(reader) {
428
+ let result = 0n;
429
+ let shift = 0n;
430
+ while (true) {
431
+ const byte = reader.view.getUint8(reader.offset++);
432
+ result |= BigInt(byte & 127) << shift;
433
+ if (!(byte & 128)) return result;
434
+ shift += 7n;
435
+ }
436
+ }
437
+ function readZigZag(reader) {
438
+ const zigzag = readVarInt(reader);
439
+ return zigzag >>> 1 ^ -(zigzag & 1);
440
+ }
441
+ function readZigZagBigInt(reader) {
442
+ const zigzag = readVarBigInt(reader);
443
+ return zigzag >> 1n ^ -(zigzag & 1n);
444
+ }
445
+ function markGeoColumns(schema, key_value_metadata) {
446
+ const columns = /* @__PURE__ */ new Map();
447
+ const geo = key_value_metadata?.find(({ key }) => key === "geo")?.value;
448
+ const decodedColumns = (geo && JSON.parse(geo)?.columns) ?? {};
449
+ for (const [name, column] of Object.entries(decodedColumns)) {
450
+ if (column.encoding !== "WKB") continue;
451
+ const type = column.edges === "spherical" ? "GEOGRAPHY" : "GEOMETRY";
452
+ const id = column.crs?.id ?? column.crs?.ids?.[0];
453
+ const crs = id ? `${id.authority}:${id.code.toString()}` : void 0;
454
+ columns.set(name, {
455
+ type,
456
+ crs
457
+ });
458
+ }
459
+ for (let i = 1; i < schema.length; i++) {
460
+ const { logical_type, name, num_children, type } = schema[i];
461
+ if (num_children) {
462
+ i += num_children;
463
+ continue;
464
+ }
465
+ if (type === "BYTE_ARRAY" && !logical_type) schema[i].logical_type = columns.get(name);
466
+ }
467
+ }
468
+ const defaultInitialFetchSize$1 = 1 << 19;
469
+ const decoder$4 = new TextDecoder();
470
+ function decode(value) {
471
+ return value && decoder$4.decode(value);
472
+ }
473
+ async function parquetMetadataAsync(asyncBuffer, { parsers, initialFetchSize = defaultInitialFetchSize$1, geoparquet = true } = {}) {
474
+ if (!asyncBuffer || !(asyncBuffer.byteLength >= 0)) throw new Error("parquet expected AsyncBuffer");
475
+ const footerOffset = Math.max(0, asyncBuffer.byteLength - initialFetchSize);
476
+ const footerBuffer = await asyncBuffer.slice(footerOffset, asyncBuffer.byteLength);
477
+ const footerView = new DataView(footerBuffer);
478
+ if (footerView.getUint32(footerBuffer.byteLength - 4, true) !== 827474256) throw new Error("parquet file invalid (footer != PAR1)");
479
+ const metadataLength = footerView.getUint32(footerBuffer.byteLength - 8, true);
480
+ if (metadataLength > asyncBuffer.byteLength - 8) throw new Error(`parquet metadata length ${metadataLength} exceeds available buffer ${asyncBuffer.byteLength - 8}`);
481
+ if (metadataLength + 8 > initialFetchSize) {
482
+ const metadataOffset = asyncBuffer.byteLength - metadataLength - 8;
483
+ const metadataBuffer = await asyncBuffer.slice(metadataOffset, footerOffset);
484
+ const combinedBuffer = new ArrayBuffer(metadataLength + 8);
485
+ const combinedView = new Uint8Array(combinedBuffer);
486
+ combinedView.set(new Uint8Array(metadataBuffer));
487
+ combinedView.set(new Uint8Array(footerBuffer), footerOffset - metadataOffset);
488
+ return parquetMetadata(combinedBuffer, {
489
+ parsers,
490
+ geoparquet
491
+ });
492
+ } else return parquetMetadata(footerBuffer, {
493
+ parsers,
494
+ geoparquet
495
+ });
496
+ }
497
+ function parquetMetadata(arrayBuffer, { parsers, geoparquet = true } = {}) {
498
+ if (!(arrayBuffer instanceof ArrayBuffer)) throw new Error("parquet expected ArrayBuffer");
499
+ const view = new DataView(arrayBuffer);
500
+ parsers = {
501
+ ...DEFAULT_PARSERS,
502
+ ...parsers
503
+ };
504
+ if (view.byteLength < 8) throw new Error("parquet file is too short");
505
+ if (view.getUint32(view.byteLength - 4, true) !== 827474256) throw new Error("parquet file invalid (footer != PAR1)");
506
+ const metadataLengthOffset = view.byteLength - 8;
507
+ const metadataLength = view.getUint32(metadataLengthOffset, true);
508
+ if (metadataLength > view.byteLength - 8) throw new Error(`parquet metadata length ${metadataLength} exceeds available buffer ${view.byteLength - 8}`);
509
+ const metadata = deserializeTCompactProtocol({
510
+ view,
511
+ offset: metadataLengthOffset - metadataLength
512
+ });
513
+ const version = metadata.field_1;
514
+ const schema = metadata.field_2.map((field) => ({
515
+ type: ParquetTypes$1[field.field_1],
516
+ type_length: field.field_2,
517
+ repetition_type: FieldRepetitionTypes$1[field.field_3],
518
+ name: decode(field.field_4),
519
+ num_children: field.field_5,
520
+ converted_type: ConvertedTypes$1[field.field_6],
521
+ scale: field.field_7,
522
+ precision: field.field_8,
523
+ field_id: field.field_9,
524
+ logical_type: logicalType(field.field_10)
525
+ }));
526
+ const columnSchema = schema.filter((e) => e.type);
527
+ const num_rows = metadata.field_3;
528
+ const row_groups = metadata.field_4.map((rowGroup) => ({
529
+ columns: rowGroup.field_1.map((column, columnIndex) => ({
530
+ file_path: decode(column.field_1),
531
+ file_offset: column.field_2,
532
+ meta_data: column.field_3 && {
533
+ type: ParquetTypes$1[column.field_3.field_1],
534
+ encodings: column.field_3.field_2?.map((e) => Encodings$1[e]),
535
+ path_in_schema: column.field_3.field_3.map(decode),
536
+ codec: CompressionCodecs$1[column.field_3.field_4],
537
+ num_values: column.field_3.field_5,
538
+ total_uncompressed_size: column.field_3.field_6,
539
+ total_compressed_size: column.field_3.field_7,
540
+ key_value_metadata: column.field_3.field_8?.map((kv) => ({
541
+ key: decode(kv.field_1),
542
+ value: decode(kv.field_2)
543
+ })),
544
+ data_page_offset: column.field_3.field_9,
545
+ index_page_offset: column.field_3.field_10,
546
+ dictionary_page_offset: column.field_3.field_11,
547
+ statistics: convertStats(column.field_3.field_12, columnSchema[columnIndex], parsers),
548
+ encoding_stats: column.field_3.field_13?.map((encodingStat) => ({
549
+ page_type: PageTypes$1[encodingStat.field_1],
550
+ encoding: Encodings$1[encodingStat.field_2],
551
+ count: encodingStat.field_3
552
+ })),
553
+ bloom_filter_offset: column.field_3.field_14,
554
+ bloom_filter_length: column.field_3.field_15,
555
+ size_statistics: column.field_3.field_16 && {
556
+ unencoded_byte_array_data_bytes: column.field_3.field_16.field_1,
557
+ repetition_level_histogram: column.field_3.field_16.field_2,
558
+ definition_level_histogram: column.field_3.field_16.field_3
559
+ },
560
+ geospatial_statistics: column.field_3.field_17 && {
561
+ bbox: column.field_3.field_17.field_1 && {
562
+ xmin: column.field_3.field_17.field_1.field_1,
563
+ xmax: column.field_3.field_17.field_1.field_2,
564
+ ymin: column.field_3.field_17.field_1.field_3,
565
+ ymax: column.field_3.field_17.field_1.field_4,
566
+ zmin: column.field_3.field_17.field_1.field_5,
567
+ zmax: column.field_3.field_17.field_1.field_6,
568
+ mmin: column.field_3.field_17.field_1.field_7,
569
+ mmax: column.field_3.field_17.field_1.field_8
570
+ },
571
+ geospatial_types: column.field_3.field_17.field_2
572
+ }
573
+ },
574
+ offset_index_offset: column.field_4,
575
+ offset_index_length: column.field_5,
576
+ column_index_offset: column.field_6,
577
+ column_index_length: column.field_7,
578
+ crypto_metadata: column.field_8,
579
+ encrypted_column_metadata: column.field_9
580
+ })),
581
+ total_byte_size: rowGroup.field_2,
582
+ num_rows: rowGroup.field_3,
583
+ sorting_columns: rowGroup.field_4?.map((sortingColumn) => ({
584
+ column_idx: sortingColumn.field_1,
585
+ descending: sortingColumn.field_2,
586
+ nulls_first: sortingColumn.field_3
587
+ })),
588
+ file_offset: rowGroup.field_5,
589
+ total_compressed_size: rowGroup.field_6,
590
+ ordinal: rowGroup.field_7
591
+ }));
592
+ const key_value_metadata = metadata.field_5?.map((kv) => ({
593
+ key: decode(kv.field_1),
594
+ value: decode(kv.field_2)
595
+ }));
596
+ const created_by = decode(metadata.field_6);
597
+ if (geoparquet) markGeoColumns(schema, key_value_metadata);
598
+ return {
599
+ version,
600
+ schema,
601
+ num_rows,
602
+ row_groups,
603
+ key_value_metadata,
604
+ created_by,
605
+ metadata_length: metadataLength
606
+ };
607
+ }
608
+ function parquetSchema({ schema }) {
609
+ return getSchemaPath$1(schema, [])[0];
610
+ }
611
+ function logicalType(logicalType) {
612
+ if (logicalType?.field_1) return { type: "STRING" };
613
+ if (logicalType?.field_2) return { type: "MAP" };
614
+ if (logicalType?.field_3) return { type: "LIST" };
615
+ if (logicalType?.field_4) return { type: "ENUM" };
616
+ if (logicalType?.field_5) return {
617
+ type: "DECIMAL",
618
+ scale: logicalType.field_5.field_1,
619
+ precision: logicalType.field_5.field_2
620
+ };
621
+ if (logicalType?.field_6) return { type: "DATE" };
622
+ if (logicalType?.field_7) return {
623
+ type: "TIME",
624
+ isAdjustedToUTC: logicalType.field_7.field_1,
625
+ unit: timeUnit(logicalType.field_7.field_2)
626
+ };
627
+ if (logicalType?.field_8) return {
628
+ type: "TIMESTAMP",
629
+ isAdjustedToUTC: logicalType.field_8.field_1,
630
+ unit: timeUnit(logicalType.field_8.field_2)
631
+ };
632
+ if (logicalType?.field_10) return {
633
+ type: "INTEGER",
634
+ bitWidth: logicalType.field_10.field_1,
635
+ isSigned: logicalType.field_10.field_2
636
+ };
637
+ if (logicalType?.field_11) return { type: "NULL" };
638
+ if (logicalType?.field_12) return { type: "JSON" };
639
+ if (logicalType?.field_13) return { type: "BSON" };
640
+ if (logicalType?.field_14) return { type: "UUID" };
641
+ if (logicalType?.field_15) return { type: "FLOAT16" };
642
+ if (logicalType?.field_16) return {
643
+ type: "VARIANT",
644
+ specification_version: logicalType.field_16.field_1
645
+ };
646
+ if (logicalType?.field_17) return {
647
+ type: "GEOMETRY",
648
+ crs: decode(logicalType.field_17.field_1)
649
+ };
650
+ if (logicalType?.field_18) return {
651
+ type: "GEOGRAPHY",
652
+ crs: decode(logicalType.field_18.field_1),
653
+ algorithm: EdgeInterpolationAlgorithms$1[logicalType.field_18.field_2]
654
+ };
655
+ return logicalType;
656
+ }
657
+ function timeUnit(unit) {
658
+ if (unit.field_1) return "MILLIS";
659
+ if (unit.field_2) return "MICROS";
660
+ if (unit.field_3) return "NANOS";
661
+ throw new Error("parquet time unit required");
662
+ }
663
+ function convertStats(stats, schema, parsers) {
664
+ return stats && {
665
+ max: convertMetadata(stats.field_1, schema, parsers),
666
+ min: convertMetadata(stats.field_2, schema, parsers),
667
+ null_count: stats.field_3,
668
+ distinct_count: stats.field_4,
669
+ max_value: convertMetadata(stats.field_5, schema, parsers),
670
+ min_value: convertMetadata(stats.field_6, schema, parsers),
671
+ is_max_value_exact: stats.field_7,
672
+ is_min_value_exact: stats.field_8
673
+ };
674
+ }
675
+ function convertMetadata(value, schema, parsers) {
676
+ const { type, converted_type, logical_type } = schema;
677
+ if (value === void 0) return value;
678
+ if (type === "BOOLEAN") return value[0] === 1;
679
+ if (type === "BYTE_ARRAY") return parsers.stringFromBytes(value);
680
+ const view = new DataView(value.buffer, value.byteOffset, value.byteLength);
681
+ if (type === "FLOAT" && view.byteLength === 4) return view.getFloat32(0, true);
682
+ if (type === "DOUBLE" && view.byteLength === 8) return view.getFloat64(0, true);
683
+ if (type === "INT32" && converted_type === "DATE") return parsers.dateFromDays(view.getInt32(0, true));
684
+ if (type === "INT64" && converted_type === "TIMESTAMP_MILLIS") return parsers.timestampFromMilliseconds(view.getBigInt64(0, true));
685
+ if (type === "INT64" && converted_type === "TIMESTAMP_MICROS") return parsers.timestampFromMicroseconds(view.getBigInt64(0, true));
686
+ if (type === "INT64" && logical_type?.type === "TIMESTAMP" && logical_type?.unit === "NANOS") return parsers.timestampFromNanoseconds(view.getBigInt64(0, true));
687
+ if (type === "INT64" && logical_type?.type === "TIMESTAMP" && logical_type?.unit === "MICROS") return parsers.timestampFromMicroseconds(view.getBigInt64(0, true));
688
+ if (type === "INT64" && logical_type?.type === "TIMESTAMP") return parsers.timestampFromMilliseconds(view.getBigInt64(0, true));
689
+ if (type === "INT32" && view.byteLength === 4) return view.getInt32(0, true);
690
+ if (type === "INT64" && view.byteLength === 8) return view.getBigInt64(0, true);
691
+ if (converted_type === "DECIMAL") return parseDecimal$1(value) * 10 ** -(schema.scale || 0);
692
+ if (logical_type?.type === "FLOAT16") return parseFloat16(value);
693
+ if (logical_type?.type === "UUID") return parsers.uuidFromBytes(value);
694
+ if (type === "FIXED_LEN_BYTE_ARRAY") return value;
695
+ return value;
696
+ }
697
+ function readOffsetIndex(reader) {
698
+ const thrift = deserializeTCompactProtocol(reader);
699
+ return {
700
+ page_locations: thrift.field_1.map((loc) => ({
701
+ offset: loc.field_1,
702
+ compressed_page_size: loc.field_2,
703
+ first_row_index: loc.field_3
704
+ })),
705
+ unencoded_byte_array_data_bytes: thrift.field_2
706
+ };
707
+ }
708
+ const MASK$1 = 18446744073709551615n;
709
+ const PRIME1$1 = 11400714785074694791n;
710
+ const PRIME2$1 = 14029467366897019727n;
711
+ const PRIME3$1 = 1609587929392839161n;
712
+ const PRIME4$1 = 9650029242287828579n;
713
+ const PRIME5$1 = 2870177450012600261n;
714
+ function rotl64$1(x, r) {
715
+ return (x << r | x >> 64n - r) & MASK$1;
716
+ }
717
+ function round$1(acc, val) {
718
+ acc = acc + val * PRIME2$1 & MASK$1;
719
+ acc = rotl64$1(acc, 31n);
720
+ return acc * PRIME1$1 & MASK$1;
721
+ }
722
+ function mergeRound$1(acc, val) {
723
+ acc ^= round$1(0n, val);
724
+ return acc * PRIME1$1 + PRIME4$1 & MASK$1;
725
+ }
726
+ function xxhash64$1(input, seed = 0n) {
727
+ const view = new DataView(input.buffer, input.byteOffset, input.byteLength);
728
+ const len = input.byteLength;
729
+ let offset = 0;
730
+ let h64;
731
+ if (len >= 32) {
732
+ let v1 = seed + PRIME1$1 + PRIME2$1 & MASK$1;
733
+ let v2 = seed + PRIME2$1 & MASK$1;
734
+ let v3 = seed;
735
+ let v4 = seed - PRIME1$1 & MASK$1;
736
+ while (offset + 32 <= len) {
737
+ v1 = round$1(v1, view.getBigUint64(offset, true));
738
+ offset += 8;
739
+ v2 = round$1(v2, view.getBigUint64(offset, true));
740
+ offset += 8;
741
+ v3 = round$1(v3, view.getBigUint64(offset, true));
742
+ offset += 8;
743
+ v4 = round$1(v4, view.getBigUint64(offset, true));
744
+ offset += 8;
745
+ }
746
+ h64 = rotl64$1(v1, 1n) + rotl64$1(v2, 7n) + rotl64$1(v3, 12n) + rotl64$1(v4, 18n) & MASK$1;
747
+ h64 = mergeRound$1(h64, v1);
748
+ h64 = mergeRound$1(h64, v2);
749
+ h64 = mergeRound$1(h64, v3);
750
+ h64 = mergeRound$1(h64, v4);
751
+ } else h64 = seed + PRIME5$1 & MASK$1;
752
+ h64 = h64 + BigInt(len) & MASK$1;
753
+ while (offset + 8 <= len) {
754
+ h64 ^= round$1(0n, view.getBigUint64(offset, true));
755
+ h64 = rotl64$1(h64, 27n) * PRIME1$1 + PRIME4$1 & MASK$1;
756
+ offset += 8;
757
+ }
758
+ if (offset + 4 <= len) {
759
+ h64 ^= BigInt(view.getUint32(offset, true)) * PRIME1$1 & MASK$1;
760
+ h64 = rotl64$1(h64, 23n) * PRIME2$1 + PRIME3$1 & MASK$1;
761
+ offset += 4;
762
+ }
763
+ while (offset < len) {
764
+ h64 ^= BigInt(view.getUint8(offset)) * PRIME5$1 & MASK$1;
765
+ h64 = rotl64$1(h64, 11n) * PRIME1$1 & MASK$1;
766
+ offset += 1;
767
+ }
768
+ h64 ^= h64 >> 33n;
769
+ h64 = h64 * PRIME2$1 & MASK$1;
770
+ h64 ^= h64 >> 29n;
771
+ h64 = h64 * PRIME3$1 & MASK$1;
772
+ h64 ^= h64 >> 32n;
773
+ return h64;
774
+ }
775
+ const textEncoder$1 = new TextEncoder();
776
+ const SALT$1 = new Uint32Array([
777
+ 1203114875,
778
+ 1150766481,
779
+ 2284105051,
780
+ 2729912477,
781
+ 1884591559,
782
+ 770785867,
783
+ 2667333959,
784
+ 1550580529
785
+ ]);
786
+ function blockIndex(hash, numBlocks) {
787
+ return Number((hash >> 32n) * BigInt(numBlocks) >> 32n);
788
+ }
789
+ function blockMask(hash) {
790
+ const m = /* @__PURE__ */ new Uint32Array(8);
791
+ const low = Number(hash & 4294967295n) | 0;
792
+ for (let i = 0; i < 8; i++) m[i] = 1 << (Math.imul(low, SALT$1[i]) >>> 27);
793
+ return m;
794
+ }
795
+ function sbbfContains(blocks, hash) {
796
+ const offset = blockIndex(hash, blocks.length >> 3) << 3;
797
+ const m = blockMask(hash);
798
+ for (let i = 0; i < 8; i++) if ((blocks[offset + i] & m[i]) === 0) return false;
799
+ return true;
800
+ }
801
+ function readBloomFilter(reader) {
802
+ const header = deserializeTCompactProtocol(reader);
803
+ const numBytes = header.field_1;
804
+ if (typeof numBytes !== "number" || numBytes <= 0 || numBytes % 32 !== 0) return void 0;
805
+ if (!header.field_2?.field_1) return void 0;
806
+ if (!header.field_3?.field_1) return void 0;
807
+ if (!header.field_4?.field_1) return void 0;
808
+ const { view, offset } = reader;
809
+ if (offset + numBytes > view.byteLength) throw new Error(`parquet bloom filter truncated: need ${numBytes} bytes, have ${view.byteLength - offset}`);
810
+ const blocks = new Uint32Array(numBytes >> 2);
811
+ for (let i = 0; i < blocks.length; i++) blocks[i] = view.getUint32(offset + i * 4, true);
812
+ reader.offset = offset + numBytes;
813
+ return {
814
+ numBytes,
815
+ blocks
816
+ };
817
+ }
818
+ function hashParquetValue$1(value, element) {
819
+ if (value === null || value === void 0) return void 0;
820
+ const { type, converted_type, logical_type } = element;
821
+ if (type === "BOOLEAN") {
822
+ if (typeof value !== "boolean") return void 0;
823
+ return xxhash64$1(new Uint8Array([value ? 1 : 0]));
824
+ }
825
+ if (type === "FLOAT") {
826
+ if (typeof value !== "number") return void 0;
827
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
828
+ new DataView(buf).setFloat32(0, value, true);
829
+ return xxhash64$1(new Uint8Array(buf));
830
+ }
831
+ if (type === "DOUBLE") {
832
+ if (typeof value !== "number") return void 0;
833
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
834
+ new DataView(buf).setFloat64(0, value, true);
835
+ return xxhash64$1(new Uint8Array(buf));
836
+ }
837
+ if (type === "INT32") {
838
+ if (converted_type === "DATE" || converted_type === "DECIMAL" || converted_type === "TIME_MILLIS") return void 0;
839
+ if (logical_type?.type === "DATE" || logical_type?.type === "TIME" || logical_type?.type === "DECIMAL") return void 0;
840
+ if (typeof value !== "number" || !Number.isInteger(value)) return void 0;
841
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
842
+ new DataView(buf).setInt32(0, value | 0, true);
843
+ return xxhash64$1(new Uint8Array(buf));
844
+ }
845
+ if (type === "INT64") {
846
+ if (converted_type === "TIMESTAMP_MILLIS" || converted_type === "TIMESTAMP_MICROS") return void 0;
847
+ if (converted_type === "TIME_MICROS" || converted_type === "DECIMAL") return void 0;
848
+ if (logical_type?.type === "TIMESTAMP" || logical_type?.type === "TIME" || logical_type?.type === "DECIMAL") return void 0;
849
+ let bigValue;
850
+ if (typeof value === "bigint") bigValue = value;
851
+ else if (typeof value === "number" && Number.isSafeInteger(value)) bigValue = BigInt(value);
852
+ else return void 0;
853
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
854
+ new DataView(buf).setBigUint64(0, BigInt.asUintN(64, bigValue), true);
855
+ return xxhash64$1(new Uint8Array(buf));
856
+ }
857
+ if (type === "BYTE_ARRAY") {
858
+ if (converted_type === "JSON" || converted_type === "BSON" || converted_type === "DECIMAL") return void 0;
859
+ if (logical_type?.type === "JSON" || logical_type?.type === "BSON" || logical_type?.type === "VARIANT") return void 0;
860
+ if (logical_type?.type === "GEOMETRY" || logical_type?.type === "GEOGRAPHY") return void 0;
861
+ if (typeof value === "string") return xxhash64$1(textEncoder$1.encode(value));
862
+ if (value instanceof Uint8Array) return xxhash64$1(value);
863
+ return;
864
+ }
865
+ if (type === "FIXED_LEN_BYTE_ARRAY") {
866
+ if (converted_type === "DECIMAL" || converted_type === "INTERVAL") return void 0;
867
+ if (logical_type?.type === "DECIMAL" || logical_type?.type === "UUID" || logical_type?.type === "FLOAT16") return void 0;
868
+ if (logical_type?.type === "GEOMETRY" || logical_type?.type === "GEOGRAPHY") return void 0;
869
+ if (value instanceof Uint8Array) return xxhash64$1(value);
870
+ return;
871
+ }
872
+ }
873
+ function bloomEligibleColumns(filter) {
874
+ const out = /* @__PURE__ */ new Set();
875
+ walkBloomEligible(filter, out);
876
+ return out;
877
+ }
878
+ function walkBloomEligible(filter, out) {
879
+ if (!filter) return;
880
+ if ("$and" in filter && Array.isArray(filter.$and)) {
881
+ for (const sub of filter.$and) walkBloomEligible(sub, out);
882
+ return;
883
+ }
884
+ if ("$or" in filter && Array.isArray(filter.$or)) {
885
+ for (const sub of filter.$or) walkBloomEligible(sub, out);
886
+ return;
887
+ }
888
+ if ("$nor" in filter) return;
889
+ for (const [field, condition] of Object.entries(filter)) {
890
+ if (field.startsWith("$")) continue;
891
+ if (typeof condition === "object" && condition !== null && !Array.isArray(condition)) {
892
+ if ("$eq" in condition || "$in" in condition) out.add(field);
893
+ } else out.add(field);
894
+ }
895
+ }
896
+ function toJson$1(obj) {
897
+ if (obj === void 0) return null;
898
+ if (typeof obj === "bigint") return Number(obj);
899
+ if (Object.is(obj, -0)) return 0;
900
+ if (Array.isArray(obj)) return obj.map(toJson$1);
901
+ if (obj instanceof Uint8Array) return Array.from(obj);
902
+ if (obj instanceof Date) return obj.toISOString();
903
+ if (obj instanceof Object) {
904
+ const newObj = {};
905
+ for (const key of Object.keys(obj)) {
906
+ if (obj[key] === void 0) continue;
907
+ newObj[key] = toJson$1(obj[key]);
908
+ }
909
+ return newObj;
910
+ }
911
+ return obj;
912
+ }
913
+ function concat(aaa, bbb) {
914
+ const chunk = 1e4;
915
+ for (let i = 0; i < bbb.length; i += chunk) aaa.push(...bbb.slice(i, i + chunk));
916
+ }
917
+ function equals(a, b, strict = true) {
918
+ if (strict ? a === b : a == b) return true;
919
+ if (!a || !b || typeof a !== "object" || typeof b !== "object") return false;
920
+ if (a instanceof Uint8Array && b instanceof Uint8Array) {
921
+ if (a.length !== b.length) return false;
922
+ for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
923
+ return true;
924
+ }
925
+ if (Array.isArray(a) && Array.isArray(b)) {
926
+ if (a.length !== b.length) return false;
927
+ for (let i = 0; i < a.length; i++) if (!equals(a[i], b[i], strict)) return false;
928
+ return true;
929
+ }
930
+ const aKeys = Object.keys(a);
931
+ if (aKeys.length !== Object.keys(b).length) return false;
932
+ for (const k of aKeys) if (!equals(a[k], b[k], strict)) return false;
933
+ return true;
934
+ }
935
+ function flatten(chunks) {
936
+ if (!chunks) return [];
937
+ if (chunks.length === 1) return chunks[0];
938
+ const output = [];
939
+ for (const chunk of chunks) concat(output, chunk);
940
+ return output;
941
+ }
942
+ function columnsNeededForFilter(filter) {
943
+ if (!filter) return [];
944
+ const columns = [];
945
+ if ("$and" in filter && Array.isArray(filter.$and)) columns.push(...filter.$and.flatMap(columnsNeededForFilter));
946
+ else if ("$or" in filter && Array.isArray(filter.$or)) columns.push(...filter.$or.flatMap(columnsNeededForFilter));
947
+ else if ("$nor" in filter && Array.isArray(filter.$nor)) columns.push(...filter.$nor.flatMap(columnsNeededForFilter));
948
+ else columns.push(...Object.keys(filter).map((key) => key.split(".")[0]));
949
+ return [...new Set(columns)];
950
+ }
951
+ function matchFilter(record, filter, strict = true) {
952
+ if ("$and" in filter && Array.isArray(filter.$and)) return filter.$and.every((subQuery) => matchFilter(record, subQuery, strict));
953
+ if ("$or" in filter && Array.isArray(filter.$or)) return filter.$or.some((subQuery) => matchFilter(record, subQuery, strict));
954
+ if ("$nor" in filter && Array.isArray(filter.$nor)) return !filter.$nor.some((subQuery) => matchFilter(record, subQuery, strict));
955
+ return Object.entries(filter).every(([field, condition]) => {
956
+ const value = resolve(record, field);
957
+ if (typeof condition !== "object" || condition === null || Array.isArray(condition)) return equals(value, condition, strict);
958
+ return Object.entries(condition || {}).every(([operator, target]) => {
959
+ if (operator === "$gt") return value > target;
960
+ if (operator === "$gte") return value >= target;
961
+ if (operator === "$lt") return value < target;
962
+ if (operator === "$lte") return value <= target;
963
+ if (operator === "$eq") return equals(value, target, strict);
964
+ if (operator === "$ne") return !equals(value, target, strict);
965
+ if (operator === "$in") return Array.isArray(target) && target.includes(value);
966
+ if (operator === "$nin") return Array.isArray(target) && !target.includes(value);
967
+ if (operator === "$not") return !matchFilter({ [field]: value }, { [field]: target }, strict);
968
+ return true;
969
+ });
970
+ });
971
+ }
972
+ function canSkipRowGroup({ rowGroup, physicalColumns, filter, strict = true, bloomFilters, schemaElements }) {
973
+ if (!filter) return false;
974
+ if ("$and" in filter && Array.isArray(filter.$and)) return filter.$and.some((subFilter) => canSkipRowGroup({
975
+ rowGroup,
976
+ physicalColumns,
977
+ filter: subFilter,
978
+ strict,
979
+ bloomFilters,
980
+ schemaElements
981
+ }));
982
+ if ("$or" in filter && Array.isArray(filter.$or)) return filter.$or.every((subFilter) => canSkipRowGroup({
983
+ rowGroup,
984
+ physicalColumns,
985
+ filter: subFilter,
986
+ strict,
987
+ bloomFilters,
988
+ schemaElements
989
+ }));
990
+ if ("$nor" in filter && Array.isArray(filter.$nor)) return false;
991
+ for (const [field, condition] of Object.entries(filter)) {
992
+ const columnIndex = physicalColumns.indexOf(field);
993
+ if (columnIndex === -1) continue;
994
+ const { min, max, min_value, max_value } = rowGroup.columns[columnIndex].meta_data?.statistics || {};
995
+ const minVal = min_value !== void 0 ? min_value : min;
996
+ const maxVal = max_value !== void 0 ? max_value : max;
997
+ const haveStats = minVal !== void 0 && maxVal !== void 0;
998
+ const bloom = bloomFilters?.[field];
999
+ const element = schemaElements?.[field];
1000
+ for (const [operator, target] of Object.entries(condition || {})) {
1001
+ if (haveStats) {
1002
+ if (operator === "$gt" && maxVal <= target) return true;
1003
+ if (operator === "$gte" && maxVal < target) return true;
1004
+ if (operator === "$lt" && minVal >= target) return true;
1005
+ if (operator === "$lte" && minVal > target) return true;
1006
+ if (operator === "$eq" && (target < minVal || target > maxVal)) return true;
1007
+ if (operator === "$ne" && equals(minVal, maxVal, strict) && equals(minVal, target, strict)) return true;
1008
+ if (operator === "$in" && Array.isArray(target) && target.every((v) => v < minVal || v > maxVal)) return true;
1009
+ if (operator === "$nin" && Array.isArray(target) && equals(minVal, maxVal, strict) && target.includes(minVal)) return true;
1010
+ }
1011
+ if (bloom && element) {
1012
+ if (operator === "$eq") {
1013
+ const hash = hashParquetValue$1(target, element);
1014
+ if (hash !== void 0 && !sbbfContains(bloom.blocks, hash)) return true;
1015
+ }
1016
+ if (operator === "$in" && Array.isArray(target) && target.length > 0) {
1017
+ let allAbsent = true;
1018
+ for (const v of target) {
1019
+ const h = hashParquetValue$1(v, element);
1020
+ if (h === void 0 || sbbfContains(bloom.blocks, h)) {
1021
+ allAbsent = false;
1022
+ break;
1023
+ }
1024
+ }
1025
+ if (allAbsent) return true;
1026
+ }
1027
+ }
1028
+ }
1029
+ }
1030
+ return false;
1031
+ }
1032
+ function resolve(record, path) {
1033
+ let value = record;
1034
+ for (const part of path.split(".")) value = value?.[part];
1035
+ return value;
1036
+ }
1037
+ const runLimit = 1 << 21;
1038
+ function parquetPlan({ metadata, rowStart = 0, rowEnd = Infinity, columns, filter, filterStrict = true, useOffsetIndex = false, bloomFiltersByGroup, schemaElements }) {
1039
+ if (!metadata) throw new Error("parquetPlan requires metadata");
1040
+ const groups = [];
1041
+ const fetches = [];
1042
+ const indexes = [];
1043
+ const physicalColumns = getPhysicalColumns(parquetSchema(metadata));
1044
+ let groupStart = 0;
1045
+ let rgIdx = 0;
1046
+ for (const rowGroup of metadata.row_groups) {
1047
+ const groupRows = Number(rowGroup.num_rows);
1048
+ const groupEnd = groupStart + groupRows;
1049
+ const bloomFilters = bloomFiltersByGroup?.[rgIdx];
1050
+ if (groupRows > 0 && groupEnd > rowStart && groupStart < rowEnd && !canSkipRowGroup({
1051
+ rowGroup,
1052
+ physicalColumns,
1053
+ filter,
1054
+ strict: filterStrict,
1055
+ bloomFilters,
1056
+ schemaElements
1057
+ })) {
1058
+ const chunks = [];
1059
+ let groupStartByte = Infinity;
1060
+ let groupEndByte = -Infinity;
1061
+ for (const chunk of rowGroup.columns) {
1062
+ const meta = chunk.meta_data;
1063
+ if (chunk.file_path) throw new Error("parquet file_path not supported");
1064
+ if (!meta) throw new Error("parquet column metadata is undefined");
1065
+ if (!columns || columns.includes(meta.path_in_schema[0])) {
1066
+ const columnOffset = meta.dictionary_page_offset || meta.data_page_offset;
1067
+ const startByte = Number(columnOffset);
1068
+ const endByte = Number(columnOffset + meta.total_compressed_size);
1069
+ if (startByte < groupStartByte) groupStartByte = startByte;
1070
+ if (endByte > groupEndByte) groupEndByte = endByte;
1071
+ if (useOffsetIndex && chunk.offset_index_offset && chunk.offset_index_length && (rowStart > groupStart || rowEnd < groupEnd)) {
1072
+ const offsetIndexStart = Number(chunk.offset_index_offset);
1073
+ chunks.push({
1074
+ columnMetadata: meta,
1075
+ offsetIndex: {
1076
+ startByte: offsetIndexStart,
1077
+ endByte: offsetIndexStart + chunk.offset_index_length
1078
+ },
1079
+ range: {
1080
+ startByte,
1081
+ endByte
1082
+ }
1083
+ });
1084
+ } else chunks.push({
1085
+ columnMetadata: meta,
1086
+ range: {
1087
+ startByte,
1088
+ endByte
1089
+ }
1090
+ });
1091
+ }
1092
+ }
1093
+ const selectStart = Math.max(rowStart - groupStart, 0);
1094
+ const selectEnd = Math.min(rowEnd - groupStart, groupRows);
1095
+ groups.push({
1096
+ chunks,
1097
+ rowGroup,
1098
+ groupStart,
1099
+ groupRows,
1100
+ selectStart,
1101
+ selectEnd
1102
+ });
1103
+ let run;
1104
+ for (const chunk of chunks) if ("offsetIndex" in chunk) indexes.push(chunk.offsetIndex);
1105
+ else {
1106
+ const { range } = chunk;
1107
+ if (columns) fetches.push(range);
1108
+ else if (run && range.endByte - run.startByte <= runLimit) run.endByte = range.endByte;
1109
+ else {
1110
+ if (run) fetches.push(run);
1111
+ run = { ...range };
1112
+ }
1113
+ }
1114
+ if (run) fetches.push(run);
1115
+ }
1116
+ groupStart = groupEnd;
1117
+ rgIdx++;
1118
+ }
1119
+ if (!isFinite(rowEnd)) rowEnd = groupStart;
1120
+ fetches.push(...indexes);
1121
+ return {
1122
+ metadata,
1123
+ rowStart,
1124
+ rowEnd,
1125
+ columns,
1126
+ fetches,
1127
+ groups
1128
+ };
1129
+ }
1130
+ async function prefetchBloomFilters({ file, metadata, filter, filterStrict = true }) {
1131
+ const result = metadata.row_groups.map(() => ({}));
1132
+ const eligibleCols = bloomEligibleColumns(filter);
1133
+ if (eligibleCols.size === 0) return result;
1134
+ const physicalColumns = getPhysicalColumns(parquetSchema(metadata));
1135
+ const tasks = [];
1136
+ metadata.row_groups.forEach((rowGroup, rgIdx) => {
1137
+ if (canSkipRowGroup({
1138
+ rowGroup,
1139
+ physicalColumns,
1140
+ filter,
1141
+ strict: filterStrict
1142
+ })) return;
1143
+ for (const colName of eligibleCols) {
1144
+ const columnIdx = physicalColumns.indexOf(colName);
1145
+ if (columnIdx === -1) continue;
1146
+ const meta = rowGroup.columns[columnIdx]?.meta_data;
1147
+ if (!meta?.bloom_filter_offset || !meta.bloom_filter_length) continue;
1148
+ const start = Number(meta.bloom_filter_offset);
1149
+ const end = start + meta.bloom_filter_length;
1150
+ tasks.push((async () => {
1151
+ const buffer = await file.slice(start, end);
1152
+ const bloom = readBloomFilter({
1153
+ view: new DataView(buffer),
1154
+ offset: 0
1155
+ });
1156
+ if (bloom) result[rgIdx][colName] = bloom;
1157
+ })());
1158
+ }
1159
+ });
1160
+ if (tasks.length) await Promise.all(tasks);
1161
+ return result;
1162
+ }
1163
+ function prefetchAsyncBuffer(file, { fetches }) {
1164
+ const promises = fetches.map(({ startByte, endByte }) => file.slice(startByte, endByte));
1165
+ return {
1166
+ byteLength: file.byteLength,
1167
+ slice(start, end = file.byteLength) {
1168
+ const index = fetches.findIndex(({ startByte, endByte }) => startByte <= start && end <= endByte);
1169
+ if (index < 0) return file.slice(start, end);
1170
+ if (fetches[index].startByte !== start || fetches[index].endByte !== end) {
1171
+ const startOffset = start - fetches[index].startByte;
1172
+ const endOffset = end - fetches[index].startByte;
1173
+ if (promises[index] instanceof Promise) return promises[index].then((buffer) => buffer.slice(startOffset, endOffset));
1174
+ else return promises[index].slice(startOffset, endOffset);
1175
+ } else return promises[index];
1176
+ }
1177
+ };
1178
+ }
1179
+ const decoder$3 = new TextDecoder();
1180
+ const metadataCache = /* @__PURE__ */ new WeakMap();
1181
+ function decodeVariantColumn(value, parsers = DEFAULT_PARSERS) {
1182
+ if (Array.isArray(value)) return value.map((entry) => decodeVariantColumn(entry, parsers));
1183
+ if (typeof value !== "object") return value;
1184
+ if ("metadata" in value) {
1185
+ const metadata = parseVariantMetadata(value.metadata);
1186
+ const shreddedFields = value.typed_value && decodeTypedValue(value.typed_value, metadata, parsers);
1187
+ const binaryValue = value.value && readVariant(makeReader(value.value), metadata, parsers);
1188
+ if (shreddedFields && binaryValue) return {
1189
+ ...binaryValue,
1190
+ ...shreddedFields
1191
+ };
1192
+ return shreddedFields ?? binaryValue;
1193
+ }
1194
+ return value;
1195
+ }
1196
+ function decodeTypedValue(typedValue, metadata, parsers) {
1197
+ if (typedValue instanceof Date) return typedValue;
1198
+ if (typedValue && typeof typedValue === "object" && !Array.isArray(typedValue) && !(typedValue instanceof Uint8Array)) {
1199
+ if ("typed_value" in typedValue && typedValue.typed_value !== null && typedValue.typed_value !== void 0) return decodeTypedValue(typedValue.typed_value, metadata, parsers);
1200
+ if ("value" in typedValue && typedValue.value instanceof Uint8Array) return readVariant(makeReader(typedValue.value), metadata, parsers);
1201
+ if ("typed_value" in typedValue || "value" in typedValue) return null;
1202
+ const result = {};
1203
+ for (const [key, field] of Object.entries(typedValue)) {
1204
+ if (!metadata.dictionary.includes(key)) continue;
1205
+ result[key] = decodeTypedValue(field, metadata, parsers);
1206
+ }
1207
+ return result;
1208
+ }
1209
+ if (typedValue instanceof Uint8Array) return readVariant(makeReader(typedValue), metadata, parsers);
1210
+ if (Array.isArray(typedValue)) return typedValue.map((element) => decodeTypedValue(element, metadata, parsers));
1211
+ return typedValue;
1212
+ }
1213
+ function makeReader(bytes) {
1214
+ return {
1215
+ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength),
1216
+ offset: 0
1217
+ };
1218
+ }
1219
+ function parseVariantMetadata(bytes) {
1220
+ let bufferCache = metadataCache.get(bytes.buffer);
1221
+ if (!bufferCache) {
1222
+ bufferCache = /* @__PURE__ */ new Map();
1223
+ metadataCache.set(bytes.buffer, bufferCache);
1224
+ }
1225
+ const key = `${bytes.byteOffset}:${bytes.byteLength}`;
1226
+ const cached = bufferCache.get(key);
1227
+ if (cached) return cached;
1228
+ const reader = makeReader(bytes);
1229
+ const header = reader.view.getUint8(reader.offset++);
1230
+ const version = header & 15;
1231
+ if (version !== 1) throw new Error(`parquet unsupported variant metadata version: ${version}`);
1232
+ const sorted = (header >> 4 & 1) === 1;
1233
+ const offsetSize = (header >> 6 & 3) + 1;
1234
+ const dictionarySize = readUnsigned(reader, offsetSize);
1235
+ const offsets = new Array(dictionarySize + 1);
1236
+ for (let i = 0; i < offsets.length; i++) offsets[i] = readUnsigned(reader, offsetSize);
1237
+ const base = reader.offset;
1238
+ const dictionary = new Array(dictionarySize);
1239
+ for (let i = 0; i < dictionarySize; i++) {
1240
+ const start = offsets[i];
1241
+ const end = offsets[i + 1];
1242
+ const strBytes = new Uint8Array(bytes.buffer, bytes.byteOffset + base + start, end - start);
1243
+ dictionary[i] = decoder$3.decode(strBytes);
1244
+ }
1245
+ const metadata = {
1246
+ dictionary,
1247
+ sorted
1248
+ };
1249
+ bufferCache.set(key, metadata);
1250
+ return metadata;
1251
+ }
1252
+ function readUnsigned(reader, byteWidth) {
1253
+ let value = 0;
1254
+ for (let i = 0; i < byteWidth; i++) value |= reader.view.getUint8(reader.offset + i) << i * 8;
1255
+ reader.offset += byteWidth;
1256
+ return value;
1257
+ }
1258
+ function readVariant(reader, metadata, parsers) {
1259
+ const typeByte = reader.view.getUint8(reader.offset++);
1260
+ const basicType = typeByte & 3;
1261
+ const header = typeByte >> 2;
1262
+ if (basicType === 0) return readVariantPrimitive(reader, header, parsers);
1263
+ if (basicType === 2) return readVariantObject(reader, header, metadata, parsers);
1264
+ if (basicType === 3) return readVariantArray(reader, header, metadata, parsers);
1265
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, header);
1266
+ reader.offset += header;
1267
+ return decoder$3.decode(bytes);
1268
+ }
1269
+ function readVariantPrimitive(reader, typeId, parsers) {
1270
+ switch (typeId) {
1271
+ case 0: return null;
1272
+ case 1: return true;
1273
+ case 2: return false;
1274
+ case 3: {
1275
+ const value = reader.view.getInt8(reader.offset);
1276
+ reader.offset += 1;
1277
+ return value;
1278
+ }
1279
+ case 4: {
1280
+ const value = reader.view.getInt16(reader.offset, true);
1281
+ reader.offset += 2;
1282
+ return value;
1283
+ }
1284
+ case 5: {
1285
+ const value = reader.view.getInt32(reader.offset, true);
1286
+ reader.offset += 4;
1287
+ return value;
1288
+ }
1289
+ case 6: {
1290
+ const value = reader.view.getBigInt64(reader.offset, true);
1291
+ reader.offset += 8;
1292
+ return value;
1293
+ }
1294
+ case 7: {
1295
+ const value = reader.view.getFloat64(reader.offset, true);
1296
+ reader.offset += 8;
1297
+ return value;
1298
+ }
1299
+ case 8: return readVariantDecimal(reader, 4);
1300
+ case 9: return readVariantDecimal(reader, 8);
1301
+ case 10: return readVariantDecimal(reader, 16);
1302
+ case 11: {
1303
+ const value = reader.view.getInt32(reader.offset, true);
1304
+ reader.offset += 4;
1305
+ return parsers.dateFromDays(value);
1306
+ }
1307
+ case 12:
1308
+ case 13: {
1309
+ const value = reader.view.getBigInt64(reader.offset, true);
1310
+ reader.offset += 8;
1311
+ return parsers.timestampFromMicroseconds(value);
1312
+ }
1313
+ case 14: {
1314
+ const value = reader.view.getFloat32(reader.offset, true);
1315
+ reader.offset += 4;
1316
+ return value;
1317
+ }
1318
+ case 15: return readVariantBinary(reader);
1319
+ case 16: {
1320
+ const bytes = readVariantBinary(reader);
1321
+ return decoder$3.decode(bytes);
1322
+ }
1323
+ case 17: {
1324
+ const value = reader.view.getBigInt64(reader.offset, true);
1325
+ reader.offset += 8;
1326
+ return value;
1327
+ }
1328
+ case 18:
1329
+ case 19: {
1330
+ const value = reader.view.getBigInt64(reader.offset, true);
1331
+ reader.offset += 8;
1332
+ return parsers.timestampFromNanoseconds(value);
1333
+ }
1334
+ case 20: {
1335
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, 16);
1336
+ reader.offset += 16;
1337
+ const hex = Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
1338
+ return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
1339
+ }
1340
+ default: throw new Error(`parquet unsupported variant primitive type: ${typeId}`);
1341
+ }
1342
+ }
1343
+ function readVariantObject(reader, header, metadata, parsers) {
1344
+ const offsetWidth = (header & 3) + 1;
1345
+ const idWidth = (header >> 2 & 3) + 1;
1346
+ const numElements = header >> 4 & 1 ? readUnsigned(reader, 4) : reader.view.getUint8(reader.offset++);
1347
+ const fieldIds = new Array(numElements);
1348
+ for (let i = 0; i < numElements; i++) fieldIds[i] = readUnsigned(reader, idWidth);
1349
+ const offsets = new Array(numElements + 1);
1350
+ for (let i = 0; i < offsets.length; i++) offsets[i] = readUnsigned(reader, offsetWidth);
1351
+ const out = {};
1352
+ for (let i = 0; i < numElements; i++) {
1353
+ const key = metadata.dictionary[fieldIds[i]];
1354
+ out[key] = readVariant({
1355
+ view: reader.view,
1356
+ offset: reader.offset + offsets[i]
1357
+ }, metadata, parsers);
1358
+ }
1359
+ reader.offset += offsets[offsets.length - 1];
1360
+ return out;
1361
+ }
1362
+ function readVariantArray(reader, header, metadata, parsers) {
1363
+ const fieldOffsetSize = header & 3;
1364
+ const isLarge = header >> 2 & 1;
1365
+ const offsetWidth = fieldOffsetSize + 1;
1366
+ const numElements = readUnsigned(reader, isLarge ? 4 : 1);
1367
+ const offsets = new Array(numElements + 1);
1368
+ for (let i = 0; i < offsets.length; i++) offsets[i] = readUnsigned(reader, offsetWidth);
1369
+ const valuesStart = reader.offset;
1370
+ const result = new Array(numElements);
1371
+ for (let i = 0; i < numElements; i++) result[i] = readVariant({
1372
+ view: reader.view,
1373
+ offset: valuesStart + offsets[i]
1374
+ }, metadata, parsers);
1375
+ reader.offset = valuesStart + offsets[offsets.length - 1];
1376
+ return result;
1377
+ }
1378
+ function readVariantDecimal(reader, width) {
1379
+ const scale = reader.view.getUint8(reader.offset);
1380
+ reader.offset += 1;
1381
+ let unscaled;
1382
+ if (width === 4) {
1383
+ unscaled = BigInt(reader.view.getInt32(reader.offset, true));
1384
+ reader.offset += 4;
1385
+ } else if (width === 8) {
1386
+ unscaled = reader.view.getBigInt64(reader.offset, true);
1387
+ reader.offset += 8;
1388
+ } else {
1389
+ const low = reader.view.getBigUint64(reader.offset, true);
1390
+ unscaled = reader.view.getBigInt64(reader.offset + 8, true) << 64n | low;
1391
+ reader.offset += 16;
1392
+ }
1393
+ return Number(unscaled) * 10 ** -scale;
1394
+ }
1395
+ function readVariantBinary(reader) {
1396
+ const length = reader.view.getUint32(reader.offset, true);
1397
+ reader.offset += 4;
1398
+ const bytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length);
1399
+ reader.offset += length;
1400
+ return bytes;
1401
+ }
1402
+ function assembleLists(output, definitionLevels, repetitionLevels, values, schemaPath) {
1403
+ const maxDefinitionLevel = getMaxDefinitionLevel$1(schemaPath);
1404
+ if (!definitionLevels?.length && !repetitionLevels.length) {
1405
+ if (!maxDefinitionLevel || !values.length) return values;
1406
+ definitionLevels = new Array(values.length).fill(maxDefinitionLevel);
1407
+ }
1408
+ const n = definitionLevels?.length || repetitionLevels.length;
1409
+ const repetitionPath = schemaPath.map(({ element }) => element.repetition_type);
1410
+ let valueIndex = 0;
1411
+ const containerStack = [output];
1412
+ let currentContainer = output;
1413
+ let currentDepth = 0;
1414
+ let currentDefLevel = 0;
1415
+ let currentRepLevel = 0;
1416
+ if (repetitionLevels[0]) while (currentDepth < repetitionPath.length - 2 && currentRepLevel < repetitionLevels[0]) {
1417
+ currentDepth++;
1418
+ if (repetitionPath[currentDepth] !== "REQUIRED") {
1419
+ currentContainer = currentContainer.at(-1);
1420
+ containerStack.push(currentContainer);
1421
+ currentDefLevel++;
1422
+ }
1423
+ if (repetitionPath[currentDepth] === "REPEATED") currentRepLevel++;
1424
+ }
1425
+ for (let i = 0; i < n; i++) {
1426
+ const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel;
1427
+ const rep = repetitionLevels[i];
1428
+ while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] !== "REPEATED")) {
1429
+ if (repetitionPath[currentDepth] !== "REQUIRED") {
1430
+ containerStack.pop();
1431
+ currentDefLevel--;
1432
+ }
1433
+ if (repetitionPath[currentDepth] === "REPEATED") currentRepLevel--;
1434
+ currentDepth--;
1435
+ }
1436
+ currentContainer = containerStack.at(-1);
1437
+ while ((currentDepth < repetitionPath.length - 2 || repetitionPath[currentDepth + 1] === "REPEATED") && (currentDefLevel < def || repetitionPath[currentDepth + 1] === "REQUIRED")) {
1438
+ currentDepth++;
1439
+ if (repetitionPath[currentDepth] !== "REQUIRED") {
1440
+ const newList = [];
1441
+ currentContainer.push(newList);
1442
+ currentContainer = newList;
1443
+ containerStack.push(newList);
1444
+ currentDefLevel++;
1445
+ }
1446
+ if (repetitionPath[currentDepth] === "REPEATED") currentRepLevel++;
1447
+ }
1448
+ if (def === maxDefinitionLevel) currentContainer.push(values[valueIndex++]);
1449
+ else if (currentDepth === repetitionPath.length - 2) currentContainer.push(null);
1450
+ else currentContainer.push([]);
1451
+ }
1452
+ if (!output.length) for (let i = 0; i < maxDefinitionLevel; i++) {
1453
+ const newList = [];
1454
+ currentContainer.push(newList);
1455
+ currentContainer = newList;
1456
+ }
1457
+ return output;
1458
+ }
1459
+ function assembleNested(subcolumnData, schema, parsers, depth = 0) {
1460
+ const path = schema.path.join(".");
1461
+ const optional = schema.element.repetition_type === "OPTIONAL";
1462
+ const nextDepth = optional ? depth + 1 : depth;
1463
+ if (isListLike$1(schema)) {
1464
+ let sublist = schema.children[0];
1465
+ let subDepth = nextDepth;
1466
+ if (sublist.children.length === 1) {
1467
+ sublist = sublist.children[0];
1468
+ subDepth++;
1469
+ }
1470
+ assembleNested(subcolumnData, sublist, parsers, subDepth);
1471
+ const subcolumn = sublist.path.join(".");
1472
+ const values = subcolumnData.get(subcolumn);
1473
+ if (!values) throw new Error("parquet list column missing values");
1474
+ if (optional) flattenAtDepth(values, depth);
1475
+ subcolumnData.set(path, values);
1476
+ subcolumnData.delete(subcolumn);
1477
+ return;
1478
+ }
1479
+ if (isMapLike$1(schema)) {
1480
+ const mapName = schema.children[0].element.name;
1481
+ assembleNested(subcolumnData, schema.children[0].children[0], parsers, nextDepth + 1);
1482
+ assembleNested(subcolumnData, schema.children[0].children[1], parsers, nextDepth + 1);
1483
+ const keys = subcolumnData.get(`${path}.${mapName}.key`);
1484
+ const values = subcolumnData.get(`${path}.${mapName}.value`);
1485
+ if (!keys) throw new Error("parquet map column missing keys");
1486
+ if (!values) throw new Error("parquet map column missing values");
1487
+ if (keys.length !== values.length) throw new Error("parquet map column key/value length mismatch");
1488
+ const out = assembleMaps(keys, values, nextDepth);
1489
+ if (optional) flattenAtDepth(out, depth);
1490
+ subcolumnData.delete(`${path}.${mapName}.key`);
1491
+ subcolumnData.delete(`${path}.${mapName}.value`);
1492
+ subcolumnData.set(path, out);
1493
+ return;
1494
+ }
1495
+ if (schema.children.length) {
1496
+ const invertDepth = schema.element.repetition_type === "REQUIRED" ? depth : depth + 1;
1497
+ const struct = {};
1498
+ for (const child of schema.children) {
1499
+ assembleNested(subcolumnData, child, parsers, invertDepth);
1500
+ const childData = subcolumnData.get(child.path.join("."));
1501
+ if (!childData) throw new Error("parquet struct missing child data");
1502
+ struct[child.element.name] = childData;
1503
+ }
1504
+ for (const child of schema.children) subcolumnData.delete(child.path.join("."));
1505
+ let inverted = invertStruct(struct, invertDepth);
1506
+ if (schema.element.logical_type?.type === "VARIANT") inverted = decodeVariantColumn(inverted, parsers);
1507
+ if (optional) flattenAtDepth(inverted, depth);
1508
+ subcolumnData.set(path, inverted);
1509
+ }
1510
+ }
1511
+ function flattenAtDepth(arr, depth) {
1512
+ for (let i = 0; i < arr.length; i++) if (depth) flattenAtDepth(arr[i], depth - 1);
1513
+ else arr[i] = arr[i][0];
1514
+ }
1515
+ function assembleMaps(keys, values, depth) {
1516
+ const out = [];
1517
+ for (let i = 0; i < keys.length; i++) if (depth) out.push(assembleMaps(keys[i], values[i], depth - 1));
1518
+ else if (keys[i]) {
1519
+ const obj = {};
1520
+ for (let j = 0; j < keys[i].length; j++) {
1521
+ const value = values[i][j];
1522
+ obj[keys[i][j]] = value === void 0 ? null : value;
1523
+ }
1524
+ out.push(obj);
1525
+ } else out.push(void 0);
1526
+ return out;
1527
+ }
1528
+ function invertStruct(struct, depth) {
1529
+ const keys = Object.keys(struct);
1530
+ const length = struct[keys[0]]?.length;
1531
+ const out = [];
1532
+ for (let i = 0; i < length; i++) {
1533
+ const obj = {};
1534
+ for (const key of keys) {
1535
+ if (struct[key].length !== length) throw new Error("parquet struct parsing error");
1536
+ obj[key] = struct[key][i];
1537
+ }
1538
+ if (depth) out.push(invertStruct(obj, depth - 1));
1539
+ else out.push(obj);
1540
+ }
1541
+ return out;
1542
+ }
1543
+ function deltaBinaryUnpack(reader, count, output) {
1544
+ const int32 = output instanceof Int32Array;
1545
+ const blockSize = readVarInt(reader);
1546
+ const miniblockPerBlock = readVarInt(reader);
1547
+ readVarInt(reader);
1548
+ let value = readZigZagBigInt(reader);
1549
+ let outputIndex = 0;
1550
+ output[outputIndex++] = int32 ? Number(value) : value;
1551
+ const valuesPerMiniblock = blockSize / miniblockPerBlock;
1552
+ while (outputIndex < count) {
1553
+ const minDelta = readZigZagBigInt(reader);
1554
+ const bitWidths = new Uint8Array(miniblockPerBlock);
1555
+ for (let i = 0; i < miniblockPerBlock; i++) bitWidths[i] = reader.view.getUint8(reader.offset++);
1556
+ for (let i = 0; i < miniblockPerBlock && outputIndex < count; i++) {
1557
+ const bitWidth = BigInt(bitWidths[i]);
1558
+ if (bitWidth) {
1559
+ let bitpackPos = 0n;
1560
+ let miniblockCount = valuesPerMiniblock;
1561
+ const mask = (1n << bitWidth) - 1n;
1562
+ while (miniblockCount && outputIndex < count) {
1563
+ let bits = BigInt(reader.view.getUint8(reader.offset)) >> bitpackPos & mask;
1564
+ bitpackPos += bitWidth;
1565
+ while (bitpackPos >= 8) {
1566
+ bitpackPos -= 8n;
1567
+ reader.offset++;
1568
+ if (bitpackPos) bits |= BigInt(reader.view.getUint8(reader.offset)) << bitWidth - bitpackPos & mask;
1569
+ }
1570
+ const delta = minDelta + bits;
1571
+ value += delta;
1572
+ output[outputIndex++] = int32 ? Number(value) : value;
1573
+ miniblockCount--;
1574
+ }
1575
+ if (miniblockCount) reader.offset += Math.ceil((miniblockCount * Number(bitWidth) + Number(bitpackPos)) / 8);
1576
+ } else for (let j = 0; j < valuesPerMiniblock && outputIndex < count; j++) {
1577
+ value += minDelta;
1578
+ output[outputIndex++] = int32 ? Number(value) : value;
1579
+ }
1580
+ }
1581
+ }
1582
+ }
1583
+ function deltaLengthByteArray(reader, count, output) {
1584
+ const lengths = new Int32Array(count);
1585
+ deltaBinaryUnpack(reader, count, lengths);
1586
+ for (let i = 0; i < count; i++) {
1587
+ output[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, lengths[i]);
1588
+ reader.offset += lengths[i];
1589
+ }
1590
+ }
1591
+ function deltaByteArray(reader, count, output) {
1592
+ const prefixData = new Int32Array(count);
1593
+ deltaBinaryUnpack(reader, count, prefixData);
1594
+ const suffixData = new Int32Array(count);
1595
+ deltaBinaryUnpack(reader, count, suffixData);
1596
+ for (let i = 0; i < count; i++) {
1597
+ const suffix = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, suffixData[i]);
1598
+ if (prefixData[i]) {
1599
+ output[i] = new Uint8Array(prefixData[i] + suffixData[i]);
1600
+ output[i].set(output[i - 1].subarray(0, prefixData[i]));
1601
+ output[i].set(suffix, prefixData[i]);
1602
+ } else output[i] = suffix;
1603
+ reader.offset += suffixData[i];
1604
+ }
1605
+ }
1606
+ function readRleBitPackedHybrid(reader, width, output, length) {
1607
+ if (length === void 0) {
1608
+ length = reader.view.getUint32(reader.offset, true);
1609
+ reader.offset += 4;
1610
+ }
1611
+ const startOffset = reader.offset;
1612
+ let seen = 0;
1613
+ while (seen < output.length) {
1614
+ const header = readVarInt(reader);
1615
+ if (header & 1) seen = readBitPacked(reader, header, width, output, seen);
1616
+ else {
1617
+ const count = header >>> 1;
1618
+ readRle(reader, count, width, output, seen);
1619
+ seen += count;
1620
+ }
1621
+ }
1622
+ reader.offset = startOffset + length;
1623
+ }
1624
+ function readRle(reader, count, bitWidth, output, seen) {
1625
+ const width = bitWidth + 7 >> 3;
1626
+ let value = 0;
1627
+ for (let i = 0; i < width; i++) value |= reader.view.getUint8(reader.offset++) << (i << 3);
1628
+ for (let i = 0; i < count; i++) output[seen + i] = value;
1629
+ }
1630
+ function readBitPacked(reader, header, bitWidth, output, seen) {
1631
+ let count = header >> 1 << 3;
1632
+ const mask = (1 << bitWidth) - 1;
1633
+ let data = 0;
1634
+ if (reader.offset < reader.view.byteLength) data = reader.view.getUint8(reader.offset++);
1635
+ else if (mask) throw new Error(`parquet bitpack offset ${reader.offset} out of range`);
1636
+ let left = 8;
1637
+ let right = 0;
1638
+ while (count) if (right > 8) {
1639
+ right -= 8;
1640
+ left -= 8;
1641
+ data >>>= 8;
1642
+ } else if (left - right < bitWidth) {
1643
+ data |= reader.view.getUint8(reader.offset) << left;
1644
+ reader.offset++;
1645
+ left += 8;
1646
+ } else {
1647
+ if (seen < output.length) output[seen++] = data >> right & mask;
1648
+ count--;
1649
+ right += bitWidth;
1650
+ }
1651
+ return seen;
1652
+ }
1653
+ function byteStreamSplit(reader, count, type, typeLength) {
1654
+ const width = byteWidth(type, typeLength);
1655
+ const bytes = new Uint8Array(count * width);
1656
+ for (let b = 0; b < width; b++) for (let i = 0; i < count; i++) bytes[i * width + b] = reader.view.getUint8(reader.offset++);
1657
+ if (type === "FLOAT") return new Float32Array(bytes.buffer);
1658
+ else if (type === "DOUBLE") return new Float64Array(bytes.buffer);
1659
+ else if (type === "INT32") return new Int32Array(bytes.buffer);
1660
+ else if (type === "INT64") return new BigInt64Array(bytes.buffer);
1661
+ else if (type === "FIXED_LEN_BYTE_ARRAY") {
1662
+ const split = new Array(count);
1663
+ for (let i = 0; i < count; i++) split[i] = bytes.subarray(i * width, (i + 1) * width);
1664
+ return split;
1665
+ }
1666
+ throw new Error(`parquet byte_stream_split unsupported type: ${type}`);
1667
+ }
1668
+ function byteWidth(type, typeLength) {
1669
+ switch (type) {
1670
+ case "INT32":
1671
+ case "FLOAT": return 4;
1672
+ case "INT64":
1673
+ case "DOUBLE": return 8;
1674
+ case "FIXED_LEN_BYTE_ARRAY":
1675
+ if (!typeLength) throw new Error("parquet byteWidth missing type_length");
1676
+ return typeLength;
1677
+ default: throw new Error(`parquet unsupported type: ${type}`);
1678
+ }
1679
+ }
1680
+ function readPlain(reader, type, count, fixedLength) {
1681
+ if (count === 0) return [];
1682
+ if (type === "BOOLEAN") return readPlainBoolean(reader, count);
1683
+ else if (type === "INT32") return readPlainInt32(reader, count);
1684
+ else if (type === "INT64") return readPlainInt64(reader, count);
1685
+ else if (type === "INT96") return readPlainInt96(reader, count);
1686
+ else if (type === "FLOAT") return readPlainFloat(reader, count);
1687
+ else if (type === "DOUBLE") return readPlainDouble(reader, count);
1688
+ else if (type === "BYTE_ARRAY") return readPlainByteArray(reader, count);
1689
+ else if (type === "FIXED_LEN_BYTE_ARRAY") {
1690
+ if (!fixedLength) throw new Error("parquet missing fixed length");
1691
+ return readPlainByteArrayFixed(reader, count, fixedLength);
1692
+ } else throw new Error(`parquet unhandled type: ${type}`);
1693
+ }
1694
+ function readPlainBoolean(reader, count) {
1695
+ const values = new Array(count);
1696
+ for (let i = 0; i < count; i++) {
1697
+ const byteOffset = reader.offset + (i / 8 | 0);
1698
+ const bitOffset = i % 8;
1699
+ values[i] = (reader.view.getUint8(byteOffset) & 1 << bitOffset) !== 0;
1700
+ }
1701
+ reader.offset += Math.ceil(count / 8);
1702
+ return values;
1703
+ }
1704
+ function readPlainInt32(reader, count) {
1705
+ const values = (reader.view.byteOffset + reader.offset) % 4 ? new Int32Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 4)) : new Int32Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count);
1706
+ reader.offset += count * 4;
1707
+ return values;
1708
+ }
1709
+ function readPlainInt64(reader, count) {
1710
+ const values = (reader.view.byteOffset + reader.offset) % 8 ? new BigInt64Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 8)) : new BigInt64Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count);
1711
+ reader.offset += count * 8;
1712
+ return values;
1713
+ }
1714
+ function readPlainInt96(reader, count) {
1715
+ const values = new Array(count);
1716
+ for (let i = 0; i < count; i++) {
1717
+ const low = reader.view.getBigInt64(reader.offset + i * 12, true);
1718
+ const high = reader.view.getInt32(reader.offset + i * 12 + 8, true);
1719
+ values[i] = BigInt(high) << 64n | low;
1720
+ }
1721
+ reader.offset += count * 12;
1722
+ return values;
1723
+ }
1724
+ function readPlainFloat(reader, count) {
1725
+ const values = (reader.view.byteOffset + reader.offset) % 4 ? new Float32Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 4)) : new Float32Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count);
1726
+ reader.offset += count * 4;
1727
+ return values;
1728
+ }
1729
+ function readPlainDouble(reader, count) {
1730
+ const values = (reader.view.byteOffset + reader.offset) % 8 ? new Float64Array(align(reader.view.buffer, reader.view.byteOffset + reader.offset, count * 8)) : new Float64Array(reader.view.buffer, reader.view.byteOffset + reader.offset, count);
1731
+ reader.offset += count * 8;
1732
+ return values;
1733
+ }
1734
+ function readPlainByteArray(reader, count) {
1735
+ const values = new Array(count);
1736
+ for (let i = 0; i < count; i++) {
1737
+ const length = reader.view.getUint32(reader.offset, true);
1738
+ reader.offset += 4;
1739
+ values[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length);
1740
+ reader.offset += length;
1741
+ }
1742
+ return values;
1743
+ }
1744
+ function readPlainByteArrayFixed(reader, count, fixedLength) {
1745
+ const values = new Array(count);
1746
+ for (let i = 0; i < count; i++) {
1747
+ values[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, fixedLength);
1748
+ reader.offset += fixedLength;
1749
+ }
1750
+ return values;
1751
+ }
1752
+ function align(buffer, offset, size) {
1753
+ const aligned = new ArrayBuffer(size);
1754
+ new Uint8Array(aligned).set(new Uint8Array(buffer, offset, size));
1755
+ return aligned;
1756
+ }
1757
+ const WORD_MASK = [
1758
+ 0,
1759
+ 255,
1760
+ 65535,
1761
+ 16777215,
1762
+ 4294967295
1763
+ ];
1764
+ function copyBytes(fromArray, fromPos, toArray, toPos, length) {
1765
+ for (let i = 0; i < length; i++) toArray[toPos + i] = fromArray[fromPos + i];
1766
+ }
1767
+ function snappyUncompress(input, output) {
1768
+ const inputLength = input.byteLength;
1769
+ const outputLength = output.byteLength;
1770
+ let pos = 0;
1771
+ let outPos = 0;
1772
+ while (pos < inputLength) {
1773
+ const c = input[pos];
1774
+ pos++;
1775
+ if (c < 128) break;
1776
+ }
1777
+ if (outputLength && pos >= inputLength) throw new Error("invalid snappy length header");
1778
+ while (pos < inputLength) {
1779
+ const c = input[pos];
1780
+ let len = 0;
1781
+ pos++;
1782
+ if (pos >= inputLength) throw new Error("missing eof marker");
1783
+ if ((c & 3) === 0) {
1784
+ let len = (c >>> 2) + 1;
1785
+ if (len > 60) {
1786
+ if (pos + 3 >= inputLength) throw new Error("snappy error literal pos + 3 >= inputLength");
1787
+ const lengthSize = len - 60;
1788
+ len = input[pos] + (input[pos + 1] << 8) + (input[pos + 2] << 16) + (input[pos + 3] << 24);
1789
+ len = (len & WORD_MASK[lengthSize]) + 1;
1790
+ pos += lengthSize;
1791
+ }
1792
+ if (pos + len > inputLength) throw new Error("snappy error literal exceeds input length");
1793
+ copyBytes(input, pos, output, outPos, len);
1794
+ pos += len;
1795
+ outPos += len;
1796
+ } else {
1797
+ let offset = 0;
1798
+ switch (c & 3) {
1799
+ case 1:
1800
+ len = (c >>> 2 & 7) + 4;
1801
+ offset = input[pos] + (c >>> 5 << 8);
1802
+ pos++;
1803
+ break;
1804
+ case 2:
1805
+ if (inputLength <= pos + 1) throw new Error("snappy error end of input");
1806
+ len = (c >>> 2) + 1;
1807
+ offset = input[pos] + (input[pos + 1] << 8);
1808
+ pos += 2;
1809
+ break;
1810
+ case 3:
1811
+ if (inputLength <= pos + 3) throw new Error("snappy error end of input");
1812
+ len = (c >>> 2) + 1;
1813
+ offset = input[pos] + (input[pos + 1] << 8) + (input[pos + 2] << 16) + (input[pos + 3] << 24);
1814
+ pos += 4;
1815
+ break;
1816
+ default: break;
1817
+ }
1818
+ if (offset === 0 || isNaN(offset)) throw new Error(`invalid offset ${offset} pos ${pos} inputLength ${inputLength}`);
1819
+ if (offset > outPos) throw new Error("cannot copy from before start of buffer");
1820
+ copyBytes(output, outPos - offset, output, outPos, len);
1821
+ outPos += len;
1822
+ }
1823
+ }
1824
+ if (outPos !== outputLength) throw new Error("premature end of input");
1825
+ }
1826
+ function readDataPage(bytes, daph, { type, element, schemaPath }) {
1827
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
1828
+ const reader = {
1829
+ view,
1830
+ offset: 0
1831
+ };
1832
+ let dataPage;
1833
+ const repetitionLevels = readRepetitionLevels(reader, daph, schemaPath);
1834
+ const { definitionLevels, numNulls } = readDefinitionLevels(reader, daph, schemaPath);
1835
+ const nValues = daph.num_values - numNulls;
1836
+ if (daph.encoding === "PLAIN") dataPage = readPlain(reader, type, nValues, element.type_length);
1837
+ else if (daph.encoding === "PLAIN_DICTIONARY" || daph.encoding === "RLE_DICTIONARY" || daph.encoding === "RLE") {
1838
+ const bitWidth = type === "BOOLEAN" ? 1 : view.getUint8(reader.offset++);
1839
+ if (bitWidth) {
1840
+ dataPage = new Array(nValues);
1841
+ if (type === "BOOLEAN") {
1842
+ readRleBitPackedHybrid(reader, bitWidth, dataPage);
1843
+ dataPage = dataPage.map((x) => !!x);
1844
+ } else readRleBitPackedHybrid(reader, bitWidth, dataPage, view.byteLength - reader.offset);
1845
+ } else dataPage = new Uint8Array(nValues);
1846
+ } else if (daph.encoding === "BYTE_STREAM_SPLIT") dataPage = byteStreamSplit(reader, nValues, type, element.type_length);
1847
+ else if (daph.encoding === "DELTA_BINARY_PACKED") {
1848
+ dataPage = type === "INT32" ? new Int32Array(nValues) : new BigInt64Array(nValues);
1849
+ deltaBinaryUnpack(reader, nValues, dataPage);
1850
+ } else if (daph.encoding === "DELTA_LENGTH_BYTE_ARRAY") {
1851
+ dataPage = new Array(nValues);
1852
+ deltaLengthByteArray(reader, nValues, dataPage);
1853
+ } else throw new Error(`parquet unsupported encoding: ${daph.encoding}`);
1854
+ return {
1855
+ definitionLevels,
1856
+ repetitionLevels,
1857
+ dataPage
1858
+ };
1859
+ }
1860
+ function readRepetitionLevels(reader, daph, schemaPath) {
1861
+ if (schemaPath.length > 1) {
1862
+ const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath);
1863
+ if (maxRepetitionLevel) {
1864
+ const values = new Array(daph.num_values);
1865
+ readRleBitPackedHybrid(reader, bitWidth(maxRepetitionLevel), values);
1866
+ return values;
1867
+ }
1868
+ }
1869
+ return [];
1870
+ }
1871
+ function readDefinitionLevels(reader, daph, schemaPath) {
1872
+ const maxDefinitionLevel = getMaxDefinitionLevel$1(schemaPath);
1873
+ if (!maxDefinitionLevel) return {
1874
+ definitionLevels: [],
1875
+ numNulls: 0
1876
+ };
1877
+ const definitionLevels = new Array(daph.num_values);
1878
+ readRleBitPackedHybrid(reader, bitWidth(maxDefinitionLevel), definitionLevels);
1879
+ let numNulls = daph.num_values;
1880
+ for (const def of definitionLevels) if (def === maxDefinitionLevel) numNulls--;
1881
+ if (numNulls === 0) definitionLevels.length = 0;
1882
+ return {
1883
+ definitionLevels,
1884
+ numNulls
1885
+ };
1886
+ }
1887
+ function decompressPage(compressedBytes, uncompressed_page_size, codec, compressors) {
1888
+ let page;
1889
+ const customDecompressor = compressors?.[codec];
1890
+ if (codec === "UNCOMPRESSED") page = compressedBytes;
1891
+ else if (customDecompressor) page = customDecompressor(compressedBytes, uncompressed_page_size);
1892
+ else if (codec === "SNAPPY") {
1893
+ page = new Uint8Array(uncompressed_page_size);
1894
+ snappyUncompress(compressedBytes, page);
1895
+ } else throw new Error(`parquet unsupported compression codec: ${codec}`);
1896
+ if (page?.length !== uncompressed_page_size) throw new Error(`parquet decompressed page length ${page?.length} does not match header ${uncompressed_page_size}`);
1897
+ return page;
1898
+ }
1899
+ function readDataPageV2(compressedBytes, ph, columnDecoder) {
1900
+ const reader = {
1901
+ view: new DataView(compressedBytes.buffer, compressedBytes.byteOffset, compressedBytes.byteLength),
1902
+ offset: 0
1903
+ };
1904
+ const { type, element, schemaPath, codec, compressors } = columnDecoder;
1905
+ const daph2 = ph.data_page_header_v2;
1906
+ if (!daph2) throw new Error("parquet data page header v2 is undefined");
1907
+ const repetitionLevels = readRepetitionLevelsV2(reader, daph2, schemaPath);
1908
+ reader.offset = daph2.repetition_levels_byte_length;
1909
+ const definitionLevels = readDefinitionLevelsV2(reader, daph2, schemaPath);
1910
+ const uncompressedPageSize = ph.uncompressed_page_size - daph2.definition_levels_byte_length - daph2.repetition_levels_byte_length;
1911
+ let page = compressedBytes.subarray(reader.offset);
1912
+ if (daph2.is_compressed !== false) page = decompressPage(page, uncompressedPageSize, codec, compressors);
1913
+ const pageView = new DataView(page.buffer, page.byteOffset, page.byteLength);
1914
+ const pageReader = {
1915
+ view: pageView,
1916
+ offset: 0
1917
+ };
1918
+ let dataPage;
1919
+ const nValues = daph2.num_values - daph2.num_nulls;
1920
+ if (daph2.encoding === "PLAIN") dataPage = readPlain(pageReader, type, nValues, element.type_length);
1921
+ else if (daph2.encoding === "RLE") {
1922
+ dataPage = new Array(nValues);
1923
+ readRleBitPackedHybrid(pageReader, 1, dataPage);
1924
+ dataPage = dataPage.map((x) => !!x);
1925
+ } else if (daph2.encoding === "PLAIN_DICTIONARY" || daph2.encoding === "RLE_DICTIONARY") {
1926
+ const bitWidth = pageView.getUint8(pageReader.offset++);
1927
+ dataPage = new Array(nValues);
1928
+ readRleBitPackedHybrid(pageReader, bitWidth, dataPage, uncompressedPageSize - 1);
1929
+ } else if (daph2.encoding === "DELTA_BINARY_PACKED") {
1930
+ dataPage = type === "INT32" ? new Int32Array(nValues) : new BigInt64Array(nValues);
1931
+ deltaBinaryUnpack(pageReader, nValues, dataPage);
1932
+ } else if (daph2.encoding === "DELTA_LENGTH_BYTE_ARRAY") {
1933
+ dataPage = new Array(nValues);
1934
+ deltaLengthByteArray(pageReader, nValues, dataPage);
1935
+ } else if (daph2.encoding === "DELTA_BYTE_ARRAY") {
1936
+ dataPage = new Array(nValues);
1937
+ deltaByteArray(pageReader, nValues, dataPage);
1938
+ } else if (daph2.encoding === "BYTE_STREAM_SPLIT") dataPage = byteStreamSplit(pageReader, nValues, type, element.type_length);
1939
+ else throw new Error(`parquet unsupported encoding: ${daph2.encoding}`);
1940
+ return {
1941
+ definitionLevels,
1942
+ repetitionLevels,
1943
+ dataPage
1944
+ };
1945
+ }
1946
+ function readRepetitionLevelsV2(reader, daph2, schemaPath) {
1947
+ const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath);
1948
+ if (!maxRepetitionLevel) return [];
1949
+ const values = new Array(daph2.num_values);
1950
+ readRleBitPackedHybrid(reader, bitWidth(maxRepetitionLevel), values, daph2.repetition_levels_byte_length);
1951
+ return values;
1952
+ }
1953
+ function readDefinitionLevelsV2(reader, daph2, schemaPath) {
1954
+ const maxDefinitionLevel = getMaxDefinitionLevel$1(schemaPath);
1955
+ if (maxDefinitionLevel) {
1956
+ const values = new Array(daph2.num_values);
1957
+ readRleBitPackedHybrid(reader, bitWidth(maxDefinitionLevel), values, daph2.definition_levels_byte_length);
1958
+ return values;
1959
+ }
1960
+ }
1961
+ function bitWidth(value) {
1962
+ return 32 - Math.clz32(value);
1963
+ }
1964
+ function readColumn(reader, { groupStart, selectStart, selectEnd }, columnDecoder, onPage) {
1965
+ const { pathInSchema, schemaPath } = columnDecoder;
1966
+ const isFlat = isFlatColumn(schemaPath);
1967
+ const chunks = [];
1968
+ let dictionary = void 0;
1969
+ let lastChunk = void 0;
1970
+ let rowCount = 0;
1971
+ let skipped = 0;
1972
+ const emitLastChunk = onPage && (() => {
1973
+ lastChunk && onPage({
1974
+ pathInSchema,
1975
+ columnData: lastChunk,
1976
+ rowStart: groupStart + rowCount - lastChunk.length,
1977
+ rowEnd: groupStart + rowCount
1978
+ });
1979
+ });
1980
+ while (isFlat ? rowCount < selectEnd : reader.offset < reader.view.byteLength - 1) {
1981
+ if (reader.offset >= reader.view.byteLength - 1) break;
1982
+ const header = parquetHeader(reader);
1983
+ if (header.type === "DICTIONARY_PAGE") {
1984
+ const { data } = readPage(reader, header, columnDecoder, dictionary, void 0, 0);
1985
+ if (data) dictionary = convert(data, columnDecoder);
1986
+ } else {
1987
+ const lastChunkLength = lastChunk?.length || 0;
1988
+ const result = readPage(reader, header, columnDecoder, dictionary, lastChunk, selectStart - rowCount);
1989
+ if (result.skipped) {
1990
+ if (!chunks.length) skipped += result.skipped;
1991
+ rowCount += result.skipped;
1992
+ } else if (result.data && lastChunk === result.data) rowCount += result.data.length - lastChunkLength;
1993
+ else if (result.data && result.data.length) {
1994
+ emitLastChunk?.();
1995
+ chunks.push(result.data);
1996
+ rowCount += result.data.length;
1997
+ lastChunk = result.data;
1998
+ }
1999
+ }
2000
+ }
2001
+ emitLastChunk?.();
2002
+ return {
2003
+ data: chunks,
2004
+ skipped
2005
+ };
2006
+ }
2007
+ function readPage(reader, header, columnDecoder, dictionary, previousChunk, pageStart) {
2008
+ const { type, element, schemaPath, codec, compressors } = columnDecoder;
2009
+ const compressedBytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, header.compressed_page_size);
2010
+ reader.offset += header.compressed_page_size;
2011
+ if (header.type === "DATA_PAGE") {
2012
+ const daph = header.data_page_header;
2013
+ if (!daph) throw new Error("parquet data page header is undefined");
2014
+ if (pageStart > daph.num_values && isFlatColumn(schemaPath)) return { skipped: daph.num_values };
2015
+ const { definitionLevels, repetitionLevels, dataPage } = readDataPage(decompressPage(compressedBytes, Number(header.uncompressed_page_size), codec, compressors), daph, columnDecoder);
2016
+ const values = convertWithDictionary(dataPage, dictionary, daph.encoding, columnDecoder);
2017
+ return {
2018
+ skipped: 0,
2019
+ data: assembleLists(Array.isArray(previousChunk) ? previousChunk : [], definitionLevels, repetitionLevels, values, schemaPath)
2020
+ };
2021
+ } else if (header.type === "DATA_PAGE_V2") {
2022
+ const daph2 = header.data_page_header_v2;
2023
+ if (!daph2) throw new Error("parquet data page header v2 is undefined");
2024
+ if (pageStart > daph2.num_rows) return { skipped: daph2.num_values };
2025
+ const { definitionLevels, repetitionLevels, dataPage } = readDataPageV2(compressedBytes, header, columnDecoder);
2026
+ const values = convertWithDictionary(dataPage, dictionary, daph2.encoding, columnDecoder);
2027
+ return {
2028
+ skipped: 0,
2029
+ data: assembleLists(Array.isArray(previousChunk) ? previousChunk : [], definitionLevels, repetitionLevels, values, schemaPath)
2030
+ };
2031
+ } else if (header.type === "DICTIONARY_PAGE") {
2032
+ const diph = header.dictionary_page_header;
2033
+ if (!diph) throw new Error("parquet dictionary page header is undefined");
2034
+ const page = decompressPage(compressedBytes, Number(header.uncompressed_page_size), codec, compressors);
2035
+ return {
2036
+ skipped: 0,
2037
+ data: readPlain({
2038
+ view: new DataView(page.buffer, page.byteOffset, page.byteLength),
2039
+ offset: 0
2040
+ }, type, diph.num_values, element.type_length)
2041
+ };
2042
+ } else throw new Error(`parquet unsupported page type: ${header.type}`);
2043
+ }
2044
+ function parquetHeader(reader) {
2045
+ const header = deserializeTCompactProtocol(reader);
2046
+ return {
2047
+ type: PageTypes$1[header.field_1],
2048
+ uncompressed_page_size: header.field_2,
2049
+ compressed_page_size: header.field_3,
2050
+ crc: header.field_4,
2051
+ data_page_header: header.field_5 && {
2052
+ num_values: header.field_5.field_1,
2053
+ encoding: Encodings$1[header.field_5.field_2],
2054
+ definition_level_encoding: Encodings$1[header.field_5.field_3],
2055
+ repetition_level_encoding: Encodings$1[header.field_5.field_4],
2056
+ statistics: header.field_5.field_5 && {
2057
+ max: header.field_5.field_5.field_1,
2058
+ min: header.field_5.field_5.field_2,
2059
+ null_count: header.field_5.field_5.field_3,
2060
+ distinct_count: header.field_5.field_5.field_4,
2061
+ max_value: header.field_5.field_5.field_5,
2062
+ min_value: header.field_5.field_5.field_6
2063
+ }
2064
+ },
2065
+ index_page_header: header.field_6,
2066
+ dictionary_page_header: header.field_7 && {
2067
+ num_values: header.field_7.field_1,
2068
+ encoding: Encodings$1[header.field_7.field_2],
2069
+ is_sorted: header.field_7.field_3
2070
+ },
2071
+ data_page_header_v2: header.field_8 && {
2072
+ num_values: header.field_8.field_1,
2073
+ num_nulls: header.field_8.field_2,
2074
+ num_rows: header.field_8.field_3,
2075
+ encoding: Encodings$1[header.field_8.field_4],
2076
+ definition_levels_byte_length: header.field_8.field_5,
2077
+ repetition_levels_byte_length: header.field_8.field_6,
2078
+ is_compressed: header.field_8.field_7 === void 0 ? true : header.field_8.field_7,
2079
+ statistics: header.field_8.field_8
2080
+ }
2081
+ };
2082
+ }
2083
+ function readRowGroup(options, { metadata }, groupPlan) {
2084
+ const asyncColumns = [];
2085
+ for (const chunk of groupPlan.chunks) {
2086
+ const { data_page_offset, dictionary_page_offset, path_in_schema: pathInSchema } = chunk.columnMetadata;
2087
+ const schemaPath = getSchemaPath$1(metadata.schema, pathInSchema);
2088
+ const columnDecoder = {
2089
+ pathInSchema,
2090
+ element: schemaPath[schemaPath.length - 1].element,
2091
+ schemaPath,
2092
+ parsers: {
2093
+ ...DEFAULT_PARSERS,
2094
+ ...options.parsers
2095
+ },
2096
+ ...options,
2097
+ ...chunk.columnMetadata
2098
+ };
2099
+ let { startByte, endByte } = chunk.range;
2100
+ if (!("offsetIndex" in chunk)) {
2101
+ asyncColumns.push({
2102
+ pathInSchema,
2103
+ data: Promise.resolve(options.file.slice(startByte, endByte)).then((buffer) => {
2104
+ return readColumn({
2105
+ view: new DataView(buffer),
2106
+ offset: 0
2107
+ }, groupPlan, columnDecoder, options.onPage);
2108
+ })
2109
+ });
2110
+ continue;
2111
+ }
2112
+ asyncColumns.push({
2113
+ pathInSchema,
2114
+ data: Promise.resolve(options.file.slice(chunk.offsetIndex.startByte, chunk.offsetIndex.endByte)).then(async (arrayBuffer) => {
2115
+ const { selectStart, selectEnd } = groupPlan;
2116
+ const pages = readOffsetIndex({
2117
+ view: new DataView(arrayBuffer),
2118
+ offset: 0
2119
+ }).page_locations;
2120
+ let skipped = -1;
2121
+ const hasDict = dictionary_page_offset || data_page_offset < pages[0].offset;
2122
+ for (let i = 0; i < pages.length; i++) {
2123
+ const page = pages[i];
2124
+ const pageStart = Number(page.first_row_index);
2125
+ const pageEnd = i + 1 < pages.length ? Number(pages[i + 1].first_row_index) : groupPlan.groupRows;
2126
+ if (skipped < 0 && !hasDict && pageEnd > selectStart) {
2127
+ startByte = Number(page.offset);
2128
+ skipped = pageStart;
2129
+ }
2130
+ if (pageStart < selectEnd) endByte = Number(page.offset) + page.compressed_page_size;
2131
+ }
2132
+ if (skipped < 0) skipped = 0;
2133
+ const buffer = await options.file.slice(startByte, endByte);
2134
+ const { data, skipped: columnSkipped } = readColumn({
2135
+ view: new DataView(buffer),
2136
+ offset: 0
2137
+ }, skipped ? {
2138
+ ...groupPlan,
2139
+ groupStart: groupPlan.groupStart + skipped,
2140
+ selectStart: groupPlan.selectStart - skipped,
2141
+ selectEnd: groupPlan.selectEnd - skipped
2142
+ } : groupPlan, columnDecoder, options.onPage);
2143
+ return {
2144
+ data,
2145
+ skipped: skipped + columnSkipped
2146
+ };
2147
+ })
2148
+ });
2149
+ }
2150
+ return {
2151
+ groupStart: groupPlan.groupStart,
2152
+ groupRows: groupPlan.groupRows,
2153
+ asyncColumns
2154
+ };
2155
+ }
2156
+ async function asyncGroupToRows({ asyncColumns }, selectStart, selectEnd, columns, rowFormat) {
2157
+ const asyncPages = await Promise.all(asyncColumns.map((column) => column.data.then(({ skipped, data }) => ({
2158
+ skipped,
2159
+ data: flatten(data)
2160
+ }))));
2161
+ const selectCount = selectEnd - selectStart;
2162
+ if (rowFormat === "object") {
2163
+ const groupData = Array(selectCount);
2164
+ for (let selectRow = 0; selectRow < selectCount; selectRow++) {
2165
+ const rowData = {};
2166
+ for (let i = 0; i < asyncColumns.length; i++) {
2167
+ const { data, skipped } = asyncPages[i];
2168
+ rowData[asyncColumns[i].pathInSchema[0]] = data[selectStart + selectRow - skipped];
2169
+ }
2170
+ groupData[selectRow] = rowData;
2171
+ }
2172
+ return groupData;
2173
+ }
2174
+ const includedColumnNames = asyncColumns.map((child) => child.pathInSchema[0]).filter((name) => !columns || columns.includes(name));
2175
+ const columnOrder = columns ?? includedColumnNames;
2176
+ const columnIndexes = columnOrder.map((name) => asyncColumns.findIndex((column) => column.pathInSchema[0] === name));
2177
+ const groupData = Array(selectCount);
2178
+ for (let selectRow = 0; selectRow < selectCount; selectRow++) {
2179
+ const rowData = Array(asyncColumns.length);
2180
+ for (let i = 0; i < columnOrder.length; i++) {
2181
+ const colIdx = columnIndexes[i];
2182
+ if (colIdx < 0) throw new Error(`parquet column not found: ${columnOrder[i]}`);
2183
+ const { data, skipped } = asyncPages[colIdx];
2184
+ rowData[i] = data[selectStart + selectRow - skipped];
2185
+ }
2186
+ groupData[selectRow] = rowData;
2187
+ }
2188
+ return groupData;
2189
+ }
2190
+ function assembleAsync(asyncRowGroup, schemaTree, parsers) {
2191
+ const { asyncColumns } = asyncRowGroup;
2192
+ parsers = {
2193
+ ...DEFAULT_PARSERS,
2194
+ ...parsers
2195
+ };
2196
+ const assembled = [];
2197
+ for (const child of schemaTree.children) if (child.children.length) {
2198
+ const childColumns = asyncColumns.filter((column) => column.pathInSchema[0] === child.element.name);
2199
+ if (!childColumns.length) continue;
2200
+ assembled.push({
2201
+ pathInSchema: child.path,
2202
+ data: (async () => {
2203
+ const resolved = await Promise.all(childColumns.map((c) => c.data));
2204
+ const subcolumnData = /* @__PURE__ */ new Map();
2205
+ let minLength = Infinity;
2206
+ for (let i = 0; i < childColumns.length; i++) {
2207
+ const flat = flatten(resolved[i].data);
2208
+ subcolumnData.set(childColumns[i].pathInSchema.join("."), flat);
2209
+ minLength = Math.min(minLength, flat.length);
2210
+ }
2211
+ for (const [key, value] of subcolumnData) if (value.length > minLength) subcolumnData.set(key, value.slice(0, minLength));
2212
+ assembleNested(subcolumnData, child, parsers);
2213
+ const assembled = subcolumnData.get(child.element.name);
2214
+ if (!assembled) throw new Error("parquet column data not assembled");
2215
+ return {
2216
+ data: [assembled],
2217
+ skipped: 0
2218
+ };
2219
+ })()
2220
+ });
2221
+ } else {
2222
+ const asyncColumn = asyncColumns.find((column) => column.pathInSchema[0] === child.element.name);
2223
+ if (asyncColumn) assembled.push(asyncColumn);
2224
+ }
2225
+ return {
2226
+ ...asyncRowGroup,
2227
+ asyncColumns: assembled
2228
+ };
2229
+ }
2230
+ async function parquetRead(options) {
2231
+ options.metadata ??= await parquetMetadataAsync(options.file, options);
2232
+ const { rowStart = 0, rowEnd, columns, onChunk, onComplete, rowFormat, filter, filterStrict = true } = options;
2233
+ if (filter && rowFormat !== "object") throw new Error("parquet filter requires rowFormat: \"object\"");
2234
+ const filterColumns = columnsNeededForFilter(filter);
2235
+ if (filterColumns.length) {
2236
+ const schemaColumns = parquetSchema(options.metadata).children.map((c) => c.element.name);
2237
+ const missingColumns = filterColumns.filter((c) => !schemaColumns.includes(c));
2238
+ if (missingColumns.length) throw new Error(`parquet filter columns not found: ${missingColumns.join(", ")}`);
2239
+ }
2240
+ let readColumns = columns;
2241
+ let requiresProjection = false;
2242
+ if (columns && filter) {
2243
+ const missingFilterColumns = filterColumns.filter((c) => !columns.includes(c));
2244
+ if (missingFilterColumns.length) {
2245
+ readColumns = [...columns, ...missingFilterColumns];
2246
+ requiresProjection = true;
2247
+ }
2248
+ }
2249
+ let readOptions = readColumns !== columns ? {
2250
+ ...options,
2251
+ columns: readColumns
2252
+ } : options;
2253
+ readOptions = await withBloomFilters(readOptions);
2254
+ const asyncGroups = parquetReadAsync(readOptions);
2255
+ if (!onComplete && !onChunk) {
2256
+ await awaitAllColumns(asyncGroups);
2257
+ return;
2258
+ }
2259
+ const schemaTree = parquetSchema(options.metadata);
2260
+ const assembled = asyncGroups.map((arg) => assembleAsync(arg, schemaTree, options.parsers));
2261
+ if (onChunk) for (const asyncGroup of assembled) for (const asyncColumn of asyncGroup.asyncColumns) asyncColumn.data.then(({ data, skipped }) => {
2262
+ let rowStart = asyncGroup.groupStart + skipped;
2263
+ for (const columnData of data) {
2264
+ onChunk({
2265
+ columnName: asyncColumn.pathInSchema[0],
2266
+ columnData,
2267
+ rowStart,
2268
+ rowEnd: rowStart + columnData.length
2269
+ });
2270
+ rowStart += columnData.length;
2271
+ }
2272
+ }, () => {});
2273
+ if (onComplete) {
2274
+ await awaitAllColumns(assembled);
2275
+ const rows = [];
2276
+ for (const asyncGroup of assembled) {
2277
+ const selectStart = Math.max(rowStart - asyncGroup.groupStart, 0);
2278
+ const selectEnd = Math.min((rowEnd ?? Infinity) - asyncGroup.groupStart, asyncGroup.groupRows);
2279
+ const groupData = rowFormat === "object" ? await asyncGroupToRows(asyncGroup, selectStart, selectEnd, readColumns, "object") : await asyncGroupToRows(asyncGroup, selectStart, selectEnd, columns, "array");
2280
+ if (filter) {
2281
+ for (const row of groupData) if (matchFilter(row, filter, filterStrict)) {
2282
+ if (requiresProjection && columns) {
2283
+ for (const col of filterColumns) if (!columns.includes(col)) delete row[col];
2284
+ }
2285
+ rows.push(row);
2286
+ }
2287
+ } else concat(rows, groupData);
2288
+ }
2289
+ onComplete(rows);
2290
+ } else await awaitAllColumns(assembled);
2291
+ }
2292
+ async function awaitAllColumns(asyncGroups) {
2293
+ const all = asyncGroups.flatMap((g) => g.asyncColumns.map((c) => c.data));
2294
+ const failed = (await Promise.allSettled(all)).find((r) => r.status === "rejected");
2295
+ if (failed) throw failed.reason;
2296
+ }
2297
+ function parquetReadAsync(options) {
2298
+ if (!options.metadata) throw new Error("parquet requires metadata");
2299
+ const plan = parquetPlan(options);
2300
+ options.file = prefetchAsyncBuffer(options.file, plan);
2301
+ return plan.groups.map((groupPlan) => readRowGroup(options, plan, groupPlan));
2302
+ }
2303
+ async function withBloomFilters(options) {
2304
+ if (!options.useBloomFilters) return options;
2305
+ if (!options.filter || !options.metadata) return options;
2306
+ const schemaTree = parquetSchema(options.metadata);
2307
+ const schemaElements = {};
2308
+ for (const child of schemaTree.children) schemaElements[child.element.name] = child.element;
2309
+ const bloomFiltersByGroup = await prefetchBloomFilters({
2310
+ file: options.file,
2311
+ metadata: options.metadata,
2312
+ filter: options.filter,
2313
+ filterStrict: options.filterStrict
2314
+ });
2315
+ return {
2316
+ ...options,
2317
+ bloomFiltersByGroup,
2318
+ schemaElements
2319
+ };
2320
+ }
2321
+ function parquetReadObjects(options) {
2322
+ return new Promise((onComplete, reject) => {
2323
+ parquetRead({
2324
+ ...options,
2325
+ rowFormat: "object",
2326
+ onComplete
2327
+ }).catch(reject);
2328
+ });
2329
+ }
2330
+ const ParquetTypes = [
2331
+ "BOOLEAN",
2332
+ "INT32",
2333
+ "INT64",
2334
+ "INT96",
2335
+ "FLOAT",
2336
+ "DOUBLE",
2337
+ "BYTE_ARRAY",
2338
+ "FIXED_LEN_BYTE_ARRAY"
2339
+ ];
2340
+ const Encodings = [
2341
+ "PLAIN",
2342
+ "GROUP_VAR_INT",
2343
+ "PLAIN_DICTIONARY",
2344
+ "RLE",
2345
+ "BIT_PACKED",
2346
+ "DELTA_BINARY_PACKED",
2347
+ "DELTA_LENGTH_BYTE_ARRAY",
2348
+ "DELTA_BYTE_ARRAY",
2349
+ "RLE_DICTIONARY",
2350
+ "BYTE_STREAM_SPLIT"
2351
+ ];
2352
+ const FieldRepetitionTypes = [
2353
+ "REQUIRED",
2354
+ "OPTIONAL",
2355
+ "REPEATED"
2356
+ ];
2357
+ const ConvertedTypes = [
2358
+ "UTF8",
2359
+ "MAP",
2360
+ "MAP_KEY_VALUE",
2361
+ "LIST",
2362
+ "ENUM",
2363
+ "DECIMAL",
2364
+ "DATE",
2365
+ "TIME_MILLIS",
2366
+ "TIME_MICROS",
2367
+ "TIMESTAMP_MILLIS",
2368
+ "TIMESTAMP_MICROS",
2369
+ "UINT_8",
2370
+ "UINT_16",
2371
+ "UINT_32",
2372
+ "UINT_64",
2373
+ "INT_8",
2374
+ "INT_16",
2375
+ "INT_32",
2376
+ "INT_64",
2377
+ "JSON",
2378
+ "BSON",
2379
+ "INTERVAL"
2380
+ ];
2381
+ const CompressionCodecs = [
2382
+ "UNCOMPRESSED",
2383
+ "SNAPPY",
2384
+ "GZIP",
2385
+ "LZO",
2386
+ "BROTLI",
2387
+ "LZ4",
2388
+ "ZSTD",
2389
+ "LZ4_RAW"
2390
+ ];
2391
+ const PageTypes = [
2392
+ "DATA_PAGE",
2393
+ "INDEX_PAGE",
2394
+ "DICTIONARY_PAGE",
2395
+ "DATA_PAGE_V2"
2396
+ ];
2397
+ const BoundaryOrders = [
2398
+ "UNORDERED",
2399
+ "ASCENDING",
2400
+ "DESCENDING"
2401
+ ];
2402
+ const EdgeInterpolationAlgorithms = [
2403
+ "SPHERICAL",
2404
+ "VINCENTY",
2405
+ "THOMAS",
2406
+ "ANDOYER",
2407
+ "KARNEY"
2408
+ ];
2409
+ new TextDecoder();
2410
+ function parseDecimal(bytes) {
2411
+ if (!bytes.length) return 0;
2412
+ let value = 0n;
2413
+ for (const byte of bytes) value = value * 256n + BigInt(byte);
2414
+ const bits = bytes.length * 8;
2415
+ if (value >= 2n ** BigInt(bits - 1)) value -= 2n ** BigInt(bits);
2416
+ return Number(value);
2417
+ }
2418
+ function schemaTree(schema, rootIndex, path) {
2419
+ const element = schema[rootIndex];
2420
+ const children = [];
2421
+ let count = 1;
2422
+ if (element.num_children) while (children.length < element.num_children) {
2423
+ const childElement = schema[rootIndex + count];
2424
+ const child = schemaTree(schema, rootIndex + count, [...path, childElement.name]);
2425
+ count += child.count;
2426
+ children.push(child);
2427
+ }
2428
+ return {
2429
+ count,
2430
+ element,
2431
+ children,
2432
+ path
2433
+ };
2434
+ }
2435
+ function getSchemaPath(schema, name) {
2436
+ let tree = schemaTree(schema, 0, []);
2437
+ const path = [tree];
2438
+ for (const part of name) {
2439
+ const child = tree.children.find((child) => child.element.name === part);
2440
+ if (!child) throw new Error(`parquet schema element not found: ${name}`);
2441
+ path.push(child);
2442
+ tree = child;
2443
+ }
2444
+ return path;
2445
+ }
2446
+ function getMaxDefinitionLevel(schemaPath) {
2447
+ let maxLevel = 0;
2448
+ for (const { element } of schemaPath.slice(1)) if (element.repetition_type !== "REQUIRED") maxLevel++;
2449
+ return maxLevel;
2450
+ }
2451
+ function isListLike(schema) {
2452
+ if (!schema) return false;
2453
+ if (schema.element.converted_type !== "LIST") return false;
2454
+ if (schema.children.length > 1) return false;
2455
+ const firstChild = schema.children[0];
2456
+ if (firstChild.children.length > 1) return false;
2457
+ if (firstChild.element.repetition_type !== "REPEATED") return false;
2458
+ return true;
2459
+ }
2460
+ function isMapLike(schema) {
2461
+ if (!schema) return false;
2462
+ if (schema.element.converted_type !== "MAP") return false;
2463
+ if (schema.children.length > 1) return false;
2464
+ const firstChild = schema.children[0];
2465
+ if (firstChild.children.length !== 2) return false;
2466
+ if (firstChild.element.repetition_type !== "REPEATED") return false;
2467
+ if (firstChild.children.find((child) => child.element.name === "key")?.element.repetition_type === "REPEATED") return false;
2468
+ if (firstChild.children.find((child) => child.element.name === "value")?.element.repetition_type === "REPEATED") return false;
2469
+ return true;
2470
+ }
2471
+ const defaultInitialFetchSize = 1 << 19;
2472
+ new TextDecoder();
2473
+ const MASK = 18446744073709551615n;
2474
+ const PRIME1 = 11400714785074694791n;
2475
+ const PRIME2 = 14029467366897019727n;
2476
+ const PRIME3 = 1609587929392839161n;
2477
+ const PRIME4 = 9650029242287828579n;
2478
+ const PRIME5 = 2870177450012600261n;
2479
+ function rotl64(x, r) {
2480
+ return (x << r | x >> 64n - r) & MASK;
2481
+ }
2482
+ function round(acc, val) {
2483
+ acc = acc + val * PRIME2 & MASK;
2484
+ acc = rotl64(acc, 31n);
2485
+ return acc * PRIME1 & MASK;
2486
+ }
2487
+ function mergeRound(acc, val) {
2488
+ acc ^= round(0n, val);
2489
+ return acc * PRIME1 + PRIME4 & MASK;
2490
+ }
2491
+ function xxhash64(input, seed = 0n) {
2492
+ const view = new DataView(input.buffer, input.byteOffset, input.byteLength);
2493
+ const len = input.byteLength;
2494
+ let offset = 0;
2495
+ let h64;
2496
+ if (len >= 32) {
2497
+ let v1 = seed + PRIME1 + PRIME2 & MASK;
2498
+ let v2 = seed + PRIME2 & MASK;
2499
+ let v3 = seed;
2500
+ let v4 = seed - PRIME1 & MASK;
2501
+ while (offset + 32 <= len) {
2502
+ v1 = round(v1, view.getBigUint64(offset, true));
2503
+ offset += 8;
2504
+ v2 = round(v2, view.getBigUint64(offset, true));
2505
+ offset += 8;
2506
+ v3 = round(v3, view.getBigUint64(offset, true));
2507
+ offset += 8;
2508
+ v4 = round(v4, view.getBigUint64(offset, true));
2509
+ offset += 8;
2510
+ }
2511
+ h64 = rotl64(v1, 1n) + rotl64(v2, 7n) + rotl64(v3, 12n) + rotl64(v4, 18n) & MASK;
2512
+ h64 = mergeRound(h64, v1);
2513
+ h64 = mergeRound(h64, v2);
2514
+ h64 = mergeRound(h64, v3);
2515
+ h64 = mergeRound(h64, v4);
2516
+ } else h64 = seed + PRIME5 & MASK;
2517
+ h64 = h64 + BigInt(len) & MASK;
2518
+ while (offset + 8 <= len) {
2519
+ h64 ^= round(0n, view.getBigUint64(offset, true));
2520
+ h64 = rotl64(h64, 27n) * PRIME1 + PRIME4 & MASK;
2521
+ offset += 8;
2522
+ }
2523
+ if (offset + 4 <= len) {
2524
+ h64 ^= BigInt(view.getUint32(offset, true)) * PRIME1 & MASK;
2525
+ h64 = rotl64(h64, 23n) * PRIME2 + PRIME3 & MASK;
2526
+ offset += 4;
2527
+ }
2528
+ while (offset < len) {
2529
+ h64 ^= BigInt(view.getUint8(offset)) * PRIME5 & MASK;
2530
+ h64 = rotl64(h64, 11n) * PRIME1 & MASK;
2531
+ offset += 1;
2532
+ }
2533
+ h64 ^= h64 >> 33n;
2534
+ h64 = h64 * PRIME2 & MASK;
2535
+ h64 ^= h64 >> 29n;
2536
+ h64 = h64 * PRIME3 & MASK;
2537
+ h64 ^= h64 >> 32n;
2538
+ return h64;
2539
+ }
2540
+ const textEncoder = new TextEncoder();
2541
+ new Uint32Array([
2542
+ 1203114875,
2543
+ 1150766481,
2544
+ 2284105051,
2545
+ 2729912477,
2546
+ 1884591559,
2547
+ 770785867,
2548
+ 2667333959,
2549
+ 1550580529
2550
+ ]);
2551
+ function hashParquetValue(value, element) {
2552
+ if (value === null || value === void 0) return void 0;
2553
+ const { type, converted_type, logical_type } = element;
2554
+ if (type === "BOOLEAN") {
2555
+ if (typeof value !== "boolean") return void 0;
2556
+ return xxhash64(new Uint8Array([value ? 1 : 0]));
2557
+ }
2558
+ if (type === "FLOAT") {
2559
+ if (typeof value !== "number") return void 0;
2560
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
2561
+ new DataView(buf).setFloat32(0, value, true);
2562
+ return xxhash64(new Uint8Array(buf));
2563
+ }
2564
+ if (type === "DOUBLE") {
2565
+ if (typeof value !== "number") return void 0;
2566
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2567
+ new DataView(buf).setFloat64(0, value, true);
2568
+ return xxhash64(new Uint8Array(buf));
2569
+ }
2570
+ if (type === "INT32") {
2571
+ if (converted_type === "DATE" || converted_type === "DECIMAL" || converted_type === "TIME_MILLIS") return void 0;
2572
+ if (logical_type?.type === "DATE" || logical_type?.type === "TIME" || logical_type?.type === "DECIMAL") return void 0;
2573
+ if (typeof value !== "number" || !Number.isInteger(value)) return void 0;
2574
+ const buf = /* @__PURE__ */ new ArrayBuffer(4);
2575
+ new DataView(buf).setInt32(0, value | 0, true);
2576
+ return xxhash64(new Uint8Array(buf));
2577
+ }
2578
+ if (type === "INT64") {
2579
+ if (converted_type === "TIMESTAMP_MILLIS" || converted_type === "TIMESTAMP_MICROS") return void 0;
2580
+ if (converted_type === "TIME_MICROS" || converted_type === "DECIMAL") return void 0;
2581
+ if (logical_type?.type === "TIMESTAMP" || logical_type?.type === "TIME" || logical_type?.type === "DECIMAL") return void 0;
2582
+ let bigValue;
2583
+ if (typeof value === "bigint") bigValue = value;
2584
+ else if (typeof value === "number" && Number.isSafeInteger(value)) bigValue = BigInt(value);
2585
+ else return void 0;
2586
+ const buf = /* @__PURE__ */ new ArrayBuffer(8);
2587
+ new DataView(buf).setBigUint64(0, BigInt.asUintN(64, bigValue), true);
2588
+ return xxhash64(new Uint8Array(buf));
2589
+ }
2590
+ if (type === "BYTE_ARRAY") {
2591
+ if (converted_type === "JSON" || converted_type === "BSON" || converted_type === "DECIMAL") return void 0;
2592
+ if (logical_type?.type === "JSON" || logical_type?.type === "BSON" || logical_type?.type === "VARIANT") return void 0;
2593
+ if (logical_type?.type === "GEOMETRY" || logical_type?.type === "GEOGRAPHY") return void 0;
2594
+ if (typeof value === "string") return xxhash64(textEncoder.encode(value));
2595
+ if (value instanceof Uint8Array) return xxhash64(value);
2596
+ return;
2597
+ }
2598
+ if (type === "FIXED_LEN_BYTE_ARRAY") {
2599
+ if (converted_type === "DECIMAL" || converted_type === "INTERVAL") return void 0;
2600
+ if (logical_type?.type === "DECIMAL" || logical_type?.type === "UUID" || logical_type?.type === "FLOAT16") return void 0;
2601
+ if (logical_type?.type === "GEOMETRY" || logical_type?.type === "GEOGRAPHY") return void 0;
2602
+ if (value instanceof Uint8Array) return xxhash64(value);
2603
+ return;
2604
+ }
2605
+ }
2606
+ function toJson(obj) {
2607
+ if (obj === void 0) return null;
2608
+ if (typeof obj === "bigint") return Number(obj);
2609
+ if (Object.is(obj, -0)) return 0;
2610
+ if (Array.isArray(obj)) return obj.map(toJson);
2611
+ if (obj instanceof Uint8Array) return Array.from(obj);
2612
+ if (obj instanceof Date) return obj.toISOString();
2613
+ if (obj instanceof Object) {
2614
+ const newObj = {};
2615
+ for (const key of Object.keys(obj)) {
2616
+ if (obj[key] === void 0) continue;
2617
+ newObj[key] = toJson(obj[key]);
2618
+ }
2619
+ return newObj;
2620
+ }
2621
+ return obj;
2622
+ }
2623
+ async function byteLengthFromUrlUsingGet(url, requestInit = {}, fetchFn = globalThis.fetch) {
2624
+ const controller = new AbortController();
2625
+ const headers = new Headers(requestInit.headers);
2626
+ headers.set("Range", "bytes=0-0");
2627
+ const res = await fetchFn(url, {
2628
+ ...requestInit,
2629
+ headers,
2630
+ signal: controller.signal
2631
+ });
2632
+ if (!res.ok) throw new Error(`fetch with range failed ${res.status}`);
2633
+ if (res.status === 206) {
2634
+ const contentRange = res.headers.get("Content-Range");
2635
+ if (!contentRange) throw new Error("missing content-range header");
2636
+ const match = contentRange.match(/bytes \d+-\d+\/(\d+)/);
2637
+ if (!match) throw new Error(`invalid content-range header: ${contentRange}`);
2638
+ return parseInt(match[1]);
2639
+ }
2640
+ if (res.status === 200) {
2641
+ const contentLength = res.headers.get("Content-Length");
2642
+ controller.abort();
2643
+ if (contentLength) return parseInt(contentLength);
2644
+ }
2645
+ throw new Error("server does not support range requests and missing content-length");
2646
+ }
2647
+ async function byteLengthFromUrl(url, requestInit, customFetch) {
2648
+ const fetch = customFetch ?? globalThis.fetch;
2649
+ const res = await fetch(url, {
2650
+ ...requestInit,
2651
+ method: "HEAD"
2652
+ });
2653
+ if (res.status === 403) return byteLengthFromUrlUsingGet(url, requestInit, fetch);
2654
+ if (!res.ok) throw new Error(`fetch head failed ${res.status}`);
2655
+ const length = res.headers.get("Content-Length");
2656
+ if (!length) return byteLengthFromUrlUsingGet(url, requestInit, fetch);
2657
+ return parseInt(length);
2658
+ }
2659
+ async function asyncBufferFromUrl({ url, byteLength, requestInit, fetch: customFetch }) {
2660
+ if (!url) throw new Error("missing url");
2661
+ const fetch = customFetch ?? globalThis.fetch;
2662
+ byteLength ??= await byteLengthFromUrl(url, requestInit, fetch);
2663
+ let buffer = void 0;
2664
+ const init = requestInit || {};
2665
+ return {
2666
+ byteLength,
2667
+ async slice(start, end) {
2668
+ if (buffer) return buffer.then((buffer) => buffer.slice(start, end));
2669
+ const headers = new Headers(init.headers);
2670
+ const endStr = end === void 0 ? "" : end - 1;
2671
+ headers.set("Range", `bytes=${start}-${endStr}`);
2672
+ const res = await fetch(url, {
2673
+ ...init,
2674
+ headers
2675
+ });
2676
+ if (!res.ok || !res.body) throw new Error(`fetch failed ${res.status}`);
2677
+ if (res.status === 200) {
2678
+ buffer = res.arrayBuffer();
2679
+ return buffer.then((buffer) => buffer.slice(start, end));
2680
+ } else if (res.status === 206) return res.arrayBuffer();
2681
+ else throw new Error(`fetch received unexpected status code ${res.status}`);
2682
+ }
2683
+ };
2684
+ }
2685
+ function cachedAsyncBuffer({ byteLength, slice }, { minSize = defaultInitialFetchSize } = {}) {
2686
+ if (byteLength < minSize) {
2687
+ const buffer = slice(0, byteLength);
2688
+ return {
2689
+ byteLength,
2690
+ async slice(start, end) {
2691
+ return (await buffer).slice(start, end);
2692
+ }
2693
+ };
2694
+ }
2695
+ const cache = /* @__PURE__ */ new Map();
2696
+ return {
2697
+ byteLength,
2698
+ slice(start, end) {
2699
+ const key = cacheKey(start, end, byteLength);
2700
+ const cached = cache.get(key);
2701
+ if (cached) return cached;
2702
+ const promise = slice(start, end);
2703
+ cache.set(key, promise);
2704
+ return promise;
2705
+ }
2706
+ };
2707
+ }
2708
+ function cacheKey(start, end, size) {
2709
+ if (start < 0) {
2710
+ if (end !== void 0) throw new Error(`invalid suffix range [${start}, ${end}]`);
2711
+ if (size === void 0) return `${start},`;
2712
+ return `${size + start},${size}`;
2713
+ } else if (end !== void 0) {
2714
+ if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`);
2715
+ return `${start},${end}`;
2716
+ } else if (size === void 0) return `${start},`;
2717
+ else return `${start},${size}`;
2718
+ }
2719
+ new TextDecoder();
2720
+ export { BoundaryOrders, BoundaryOrders$1, CompressionCodecs, CompressionCodecs$1, ConvertedTypes, ConvertedTypes$1, EdgeInterpolationAlgorithms, EdgeInterpolationAlgorithms$1, Encodings, Encodings$1, FieldRepetitionTypes, FieldRepetitionTypes$1, PageTypes, PageTypes$1, ParquetTypes, ParquetTypes$1, asyncBufferFromUrl, cachedAsyncBuffer, getMaxDefinitionLevel, getMaxDefinitionLevel$1, getSchemaPath, getSchemaPath$1, hashParquetValue, hashParquetValue$1, isListLike, isListLike$1, isMapLike, isMapLike$1, parquetReadObjects, parseDecimal, snappyUncompress, toJson, toJson$1 };