@drakkar.software/starfish-events 3.0.0-alpha.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3071 @@
1
+ // src/plugin.ts
2
+ import { getCrypto, bytesToHex, PARQUET_MIME_TYPE } from "@drakkar.software/starfish-protocol";
3
+ import { resolveDocumentKey } from "@drakkar.software/starfish-server";
4
+
5
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/bytewriter.js
6
+ function ByteWriter(initalSize = 1024) {
7
+ this.buffer = new ArrayBuffer(initalSize);
8
+ this.view = new DataView(this.buffer);
9
+ this.offset = 0;
10
+ this.index = 0;
11
+ return this;
12
+ }
13
+ ByteWriter.prototype.ensure = function(size) {
14
+ if (this.index + size > this.buffer.byteLength) {
15
+ const newSize = Math.max(this.buffer.byteLength * 2, this.index + size);
16
+ const newBuffer = new ArrayBuffer(newSize);
17
+ new Uint8Array(newBuffer).set(new Uint8Array(this.buffer));
18
+ this.buffer = newBuffer;
19
+ this.view = new DataView(this.buffer);
20
+ }
21
+ };
22
+ ByteWriter.prototype.finish = function() {
23
+ };
24
+ ByteWriter.prototype.getBuffer = function() {
25
+ return this.buffer.slice(0, this.index);
26
+ };
27
+ ByteWriter.prototype.getBytes = function() {
28
+ return new Uint8Array(this.buffer, 0, this.index);
29
+ };
30
+ ByteWriter.prototype.appendUint8 = function(value) {
31
+ this.ensure(this.index + 1);
32
+ this.view.setUint8(this.index, value);
33
+ this.offset++;
34
+ this.index++;
35
+ };
36
+ ByteWriter.prototype.appendUint32 = function(value) {
37
+ this.ensure(this.index + 4);
38
+ this.view.setUint32(this.index, value, true);
39
+ this.offset += 4;
40
+ this.index += 4;
41
+ };
42
+ ByteWriter.prototype.appendInt32 = function(value) {
43
+ this.ensure(this.index + 4);
44
+ this.view.setInt32(this.index, value, true);
45
+ this.offset += 4;
46
+ this.index += 4;
47
+ };
48
+ ByteWriter.prototype.appendInt64 = function(value) {
49
+ this.ensure(this.index + 8);
50
+ this.view.setBigInt64(this.index, BigInt(value), true);
51
+ this.offset += 8;
52
+ this.index += 8;
53
+ };
54
+ ByteWriter.prototype.appendFloat32 = function(value) {
55
+ this.ensure(this.index + 8);
56
+ this.view.setFloat32(this.index, value, true);
57
+ this.offset += 4;
58
+ this.index += 4;
59
+ };
60
+ ByteWriter.prototype.appendFloat64 = function(value) {
61
+ this.ensure(this.index + 8);
62
+ this.view.setFloat64(this.index, value, true);
63
+ this.offset += 8;
64
+ this.index += 8;
65
+ };
66
+ ByteWriter.prototype.appendBuffer = function(value) {
67
+ this.appendBytes(new Uint8Array(value));
68
+ };
69
+ ByteWriter.prototype.appendBytes = function(value) {
70
+ this.ensure(this.index + value.length);
71
+ new Uint8Array(this.buffer, this.index, value.length).set(value);
72
+ this.offset += value.length;
73
+ this.index += value.length;
74
+ };
75
+ ByteWriter.prototype.appendVarInt = function(value) {
76
+ while (true) {
77
+ if ((value & ~127) === 0) {
78
+ this.appendUint8(value);
79
+ return;
80
+ } else {
81
+ this.appendUint8(value & 127 | 128);
82
+ value >>>= 7;
83
+ }
84
+ }
85
+ };
86
+ ByteWriter.prototype.appendVarBigInt = function(value) {
87
+ while (true) {
88
+ if ((value & ~0x7fn) === 0n) {
89
+ this.appendUint8(Number(value));
90
+ return;
91
+ } else {
92
+ this.appendUint8(Number(value & 0x7fn | 0x80n));
93
+ value >>= 7n;
94
+ }
95
+ }
96
+ };
97
+ ByteWriter.prototype.appendZigZag = function(value) {
98
+ if (typeof value === "number") {
99
+ this.appendVarInt(value << 1 ^ value >> 31);
100
+ } else {
101
+ this.appendVarBigInt(value << 1n ^ value >> 63n);
102
+ }
103
+ };
104
+
105
+ // ../../../node_modules/.pnpm/hyparquet@1.26.1/node_modules/hyparquet/src/schema.js
106
+ function schemaTree(schema, rootIndex, path) {
107
+ const element = schema[rootIndex];
108
+ const children = [];
109
+ let count = 1;
110
+ if (element.num_children) {
111
+ while (children.length < element.num_children) {
112
+ const childElement = schema[rootIndex + count];
113
+ const child = schemaTree(schema, rootIndex + count, [...path, childElement.name]);
114
+ count += child.count;
115
+ children.push(child);
116
+ }
117
+ }
118
+ return { count, element, children, path };
119
+ }
120
+ function getSchemaPath(schema, name) {
121
+ let tree = schemaTree(schema, 0, []);
122
+ const path = [tree];
123
+ for (const part of name) {
124
+ const child = tree.children.find((child2) => child2.element.name === part);
125
+ if (!child) throw new Error(`parquet schema element not found: ${name}`);
126
+ path.push(child);
127
+ tree = child;
128
+ }
129
+ return path;
130
+ }
131
+ function getMaxDefinitionLevel(schemaPath) {
132
+ let maxLevel = 0;
133
+ for (const { element } of schemaPath.slice(1)) {
134
+ if (element.repetition_type !== "REQUIRED") {
135
+ maxLevel++;
136
+ }
137
+ }
138
+ return maxLevel;
139
+ }
140
+ function isListLike(schema) {
141
+ if (!schema) return false;
142
+ if (schema.element.converted_type !== "LIST") return false;
143
+ if (schema.children.length > 1) return false;
144
+ const firstChild = schema.children[0];
145
+ if (firstChild.children.length > 1) return false;
146
+ if (firstChild.element.repetition_type !== "REPEATED") return false;
147
+ return true;
148
+ }
149
+ function isMapLike(schema) {
150
+ if (!schema) return false;
151
+ if (schema.element.converted_type !== "MAP") return false;
152
+ if (schema.children.length > 1) return false;
153
+ const firstChild = schema.children[0];
154
+ if (firstChild.children.length !== 2) return false;
155
+ if (firstChild.element.repetition_type !== "REPEATED") return false;
156
+ const keyChild = firstChild.children.find((child) => child.element.name === "key");
157
+ if (keyChild?.element.repetition_type === "REPEATED") return false;
158
+ const valueChild = firstChild.children.find((child) => child.element.name === "value");
159
+ if (valueChild?.element.repetition_type === "REPEATED") return false;
160
+ return true;
161
+ }
162
+
163
+ // ../../../node_modules/.pnpm/hyparquet@1.26.1/node_modules/hyparquet/src/xxhash.js
164
+ var MASK = 0xffffffffffffffffn;
165
+ var PRIME1 = 0x9e3779b185ebca87n;
166
+ var PRIME2 = 0xc2b2ae3d27d4eb4fn;
167
+ var PRIME3 = 0x165667b19e3779f9n;
168
+ var PRIME4 = 0x85ebca77c2b2ae63n;
169
+ var PRIME5 = 0x27d4eb2f165667c5n;
170
+ function rotl64(x, r) {
171
+ return (x << r | x >> 64n - r) & MASK;
172
+ }
173
+ function round(acc, val) {
174
+ acc = acc + val * PRIME2 & MASK;
175
+ acc = rotl64(acc, 31n);
176
+ return acc * PRIME1 & MASK;
177
+ }
178
+ function mergeRound(acc, val) {
179
+ acc ^= round(0n, val);
180
+ return acc * PRIME1 + PRIME4 & MASK;
181
+ }
182
+ function xxhash64(input, seed = 0n) {
183
+ const view = new DataView(input.buffer, input.byteOffset, input.byteLength);
184
+ const len = input.byteLength;
185
+ let offset = 0;
186
+ let h64;
187
+ if (len >= 32) {
188
+ let v1 = seed + PRIME1 + PRIME2 & MASK;
189
+ let v2 = seed + PRIME2 & MASK;
190
+ let v3 = seed;
191
+ let v4 = seed - PRIME1 & MASK;
192
+ while (offset + 32 <= len) {
193
+ v1 = round(v1, view.getBigUint64(offset, true));
194
+ offset += 8;
195
+ v2 = round(v2, view.getBigUint64(offset, true));
196
+ offset += 8;
197
+ v3 = round(v3, view.getBigUint64(offset, true));
198
+ offset += 8;
199
+ v4 = round(v4, view.getBigUint64(offset, true));
200
+ offset += 8;
201
+ }
202
+ h64 = rotl64(v1, 1n) + rotl64(v2, 7n) + rotl64(v3, 12n) + rotl64(v4, 18n) & MASK;
203
+ h64 = mergeRound(h64, v1);
204
+ h64 = mergeRound(h64, v2);
205
+ h64 = mergeRound(h64, v3);
206
+ h64 = mergeRound(h64, v4);
207
+ } else {
208
+ h64 = seed + PRIME5 & MASK;
209
+ }
210
+ h64 = h64 + BigInt(len) & MASK;
211
+ while (offset + 8 <= len) {
212
+ h64 ^= round(0n, view.getBigUint64(offset, true));
213
+ h64 = rotl64(h64, 27n) * PRIME1 + PRIME4 & MASK;
214
+ offset += 8;
215
+ }
216
+ if (offset + 4 <= len) {
217
+ h64 ^= BigInt(view.getUint32(offset, true)) * PRIME1 & MASK;
218
+ h64 = rotl64(h64, 23n) * PRIME2 + PRIME3 & MASK;
219
+ offset += 4;
220
+ }
221
+ while (offset < len) {
222
+ h64 ^= BigInt(view.getUint8(offset)) * PRIME5 & MASK;
223
+ h64 = rotl64(h64, 11n) * PRIME1 & MASK;
224
+ offset += 1;
225
+ }
226
+ h64 ^= h64 >> 33n;
227
+ h64 = h64 * PRIME2 & MASK;
228
+ h64 ^= h64 >> 29n;
229
+ h64 = h64 * PRIME3 & MASK;
230
+ h64 ^= h64 >> 32n;
231
+ return h64;
232
+ }
233
+
234
+ // ../../../node_modules/.pnpm/hyparquet@1.26.1/node_modules/hyparquet/src/bloom.js
235
+ var textEncoder = new TextEncoder();
236
+ var SALT = new Uint32Array([
237
+ 1203114875,
238
+ 1150766481,
239
+ 2284105051,
240
+ 2729912477,
241
+ 1884591559,
242
+ 770785867,
243
+ 2667333959,
244
+ 1550580529
245
+ ]);
246
+ function hashParquetValue(value, element) {
247
+ if (value === null || value === void 0) return void 0;
248
+ const { type, converted_type, logical_type } = element;
249
+ if (type === "BOOLEAN") {
250
+ if (typeof value !== "boolean") return void 0;
251
+ return xxhash64(new Uint8Array([value ? 1 : 0]));
252
+ }
253
+ if (type === "FLOAT") {
254
+ if (typeof value !== "number") return void 0;
255
+ const buf = new ArrayBuffer(4);
256
+ new DataView(buf).setFloat32(0, value, true);
257
+ return xxhash64(new Uint8Array(buf));
258
+ }
259
+ if (type === "DOUBLE") {
260
+ if (typeof value !== "number") return void 0;
261
+ const buf = new ArrayBuffer(8);
262
+ new DataView(buf).setFloat64(0, value, true);
263
+ return xxhash64(new Uint8Array(buf));
264
+ }
265
+ if (type === "INT32") {
266
+ if (converted_type === "DATE" || converted_type === "DECIMAL" || converted_type === "TIME_MILLIS") return void 0;
267
+ if (logical_type?.type === "DATE" || logical_type?.type === "TIME" || logical_type?.type === "DECIMAL") return void 0;
268
+ if (typeof value !== "number" || !Number.isInteger(value)) return void 0;
269
+ const buf = new ArrayBuffer(4);
270
+ new DataView(buf).setInt32(0, value | 0, true);
271
+ return xxhash64(new Uint8Array(buf));
272
+ }
273
+ if (type === "INT64") {
274
+ if (converted_type === "TIMESTAMP_MILLIS" || converted_type === "TIMESTAMP_MICROS") return void 0;
275
+ if (converted_type === "TIME_MICROS" || converted_type === "DECIMAL") return void 0;
276
+ if (logical_type?.type === "TIMESTAMP" || logical_type?.type === "TIME" || logical_type?.type === "DECIMAL") return void 0;
277
+ let bigValue;
278
+ if (typeof value === "bigint") bigValue = value;
279
+ else if (typeof value === "number" && Number.isSafeInteger(value)) bigValue = BigInt(value);
280
+ else return void 0;
281
+ const buf = new ArrayBuffer(8);
282
+ new DataView(buf).setBigUint64(0, BigInt.asUintN(64, bigValue), true);
283
+ return xxhash64(new Uint8Array(buf));
284
+ }
285
+ if (type === "BYTE_ARRAY") {
286
+ if (converted_type === "JSON" || converted_type === "BSON" || converted_type === "DECIMAL") return void 0;
287
+ if (logical_type?.type === "JSON" || logical_type?.type === "BSON" || logical_type?.type === "VARIANT") return void 0;
288
+ if (logical_type?.type === "GEOMETRY" || logical_type?.type === "GEOGRAPHY") return void 0;
289
+ if (typeof value === "string") return xxhash64(textEncoder.encode(value));
290
+ if (value instanceof Uint8Array) return xxhash64(value);
291
+ return void 0;
292
+ }
293
+ if (type === "FIXED_LEN_BYTE_ARRAY") {
294
+ if (converted_type === "DECIMAL" || converted_type === "INTERVAL") return void 0;
295
+ if (logical_type?.type === "DECIMAL" || logical_type?.type === "UUID" || logical_type?.type === "FLOAT16") return void 0;
296
+ if (logical_type?.type === "GEOMETRY" || logical_type?.type === "GEOGRAPHY") return void 0;
297
+ if (value instanceof Uint8Array) return xxhash64(value);
298
+ return void 0;
299
+ }
300
+ return void 0;
301
+ }
302
+
303
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/thrift.js
304
+ var STOP = 0;
305
+ var TRUE = 1;
306
+ var FALSE = 2;
307
+ var BYTE = 3;
308
+ var I32 = 5;
309
+ var I64 = 6;
310
+ var DOUBLE = 7;
311
+ var BINARY = 8;
312
+ var LIST = 9;
313
+ var STRUCT = 12;
314
+ function serializeTCompactProtocol(writer, data) {
315
+ writeElement(writer, STRUCT, data);
316
+ }
317
+ function writeElement(writer, type, value) {
318
+ if (type === TRUE) return;
319
+ if (type === FALSE) return;
320
+ if (type === BYTE && typeof value === "number") {
321
+ writer.appendUint8(value);
322
+ } else if (type === I32 && typeof value === "number") {
323
+ writer.appendZigZag(value);
324
+ } else if (type === I64 && typeof value === "bigint") {
325
+ writer.appendZigZag(value);
326
+ } else if (type === DOUBLE && typeof value === "number") {
327
+ writer.appendFloat64(value);
328
+ } else if (type === BINARY && typeof value === "string") {
329
+ const bytes = new TextEncoder().encode(value);
330
+ writer.appendVarInt(bytes.length);
331
+ writer.appendBytes(bytes);
332
+ } else if (type === BINARY && value instanceof Uint8Array) {
333
+ writer.appendVarInt(value.byteLength);
334
+ writer.appendBytes(value);
335
+ } else if (type === LIST && Array.isArray(value)) {
336
+ const elemType = getCompactTypeForList(value);
337
+ if (value.length > 14) {
338
+ writer.appendUint8(15 << 4 | elemType);
339
+ writer.appendVarInt(value.length);
340
+ } else {
341
+ writer.appendUint8(value.length << 4 | elemType);
342
+ }
343
+ if (elemType === FALSE) {
344
+ for (const v of value) {
345
+ writer.appendUint8(v ? 1 : 0);
346
+ }
347
+ } else {
348
+ for (const v of value) {
349
+ writeElement(writer, elemType, v);
350
+ }
351
+ }
352
+ } else if (type === STRUCT && typeof value === "object") {
353
+ let lastFid = 0;
354
+ for (const [k, v] of Object.entries(value)) {
355
+ if (v === void 0) continue;
356
+ const fid = parseInt(k.replace(/^field_/, ""), 10);
357
+ if (Number.isNaN(fid)) {
358
+ throw new Error(`thrift invalid field name: ${k}. Expected "field_###"`);
359
+ }
360
+ const t = getCompactTypeForValue(v);
361
+ const delta = fid - lastFid;
362
+ if (delta <= 0) {
363
+ throw new Error(`thrift non-monotonic field id: fid=${fid}, lastFid=${lastFid}`);
364
+ }
365
+ if (delta > 15) {
366
+ writer.appendUint8(t);
367
+ writer.appendZigZag(fid);
368
+ } else {
369
+ writer.appendUint8(delta << 4 | t);
370
+ }
371
+ writeElement(writer, t, v);
372
+ lastFid = fid;
373
+ }
374
+ writer.appendUint8(STOP);
375
+ } else {
376
+ throw new Error(`thrift invalid type ${type} for value ${value}`);
377
+ }
378
+ }
379
+ function getCompactTypeForValue(value) {
380
+ if (value === true) return TRUE;
381
+ if (value === false) return FALSE;
382
+ if (Number.isInteger(value)) return I32;
383
+ if (typeof value === "number") return DOUBLE;
384
+ if (typeof value === "bigint") return I64;
385
+ if (typeof value === "string") return BINARY;
386
+ if (value instanceof Uint8Array) return BINARY;
387
+ if (Array.isArray(value)) return LIST;
388
+ if (value && typeof value === "object") return STRUCT;
389
+ throw new Error(`Cannot determine thrift compact type for: ${value}`);
390
+ }
391
+ function getCompactTypeForList(value) {
392
+ let elemType = 0;
393
+ for (const v of value) {
394
+ let t = getCompactTypeForValue(v);
395
+ if (t === TRUE) t = FALSE;
396
+ if (!elemType) elemType = t;
397
+ if (elemType === DOUBLE && t === I32) t = DOUBLE;
398
+ if (elemType === I32 && t === DOUBLE) elemType = DOUBLE;
399
+ if (t !== elemType) {
400
+ throw new Error(`thrift invalid type for list element: ${v} (expected type ${elemType})`);
401
+ }
402
+ }
403
+ return elemType ?? BYTE;
404
+ }
405
+
406
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/bloom.js
407
+ var SALT2 = new Uint32Array([
408
+ 1203114875,
409
+ 1150766481,
410
+ 2284105051,
411
+ 2729912477,
412
+ 1884591559,
413
+ 770785867,
414
+ 2667333959,
415
+ 1550580529
416
+ ]);
417
+ var BYTES_PER_BLOCK = 32;
418
+ var MIN_BYTES = 32;
419
+ var MAX_BYTES = 128 * 1024 * 1024;
420
+ function blockIndex(hash, numBlocks) {
421
+ return Number((hash >> 32n) * BigInt(numBlocks) >> 32n);
422
+ }
423
+ function blockMask(hash) {
424
+ const m = new Uint32Array(8);
425
+ const low = Number(hash & 0xffffffffn) | 0;
426
+ for (let i = 0; i < 8; i++) {
427
+ m[i] = 1 << (Math.imul(low, SALT2[i]) >>> 27);
428
+ }
429
+ return m;
430
+ }
431
+ function sbbfInsert(blocks, hash) {
432
+ const offset = blockIndex(hash, blocks.length >> 3) << 3;
433
+ const m = blockMask(hash);
434
+ for (let i = 0; i < 8; i++) {
435
+ blocks[offset + i] |= m[i];
436
+ }
437
+ }
438
+ function nextPowerOfTwo(n) {
439
+ let p = 1;
440
+ while (p < n) p <<= 1;
441
+ return p;
442
+ }
443
+ function optimalNumBytes(ndv, fpp) {
444
+ if (!(fpp > 0 && fpp < 1)) throw new Error(`bloom filter fpp must be in (0, 1), got ${fpp}`);
445
+ if (!(ndv >= 0)) throw new Error(`bloom filter ndv must be >= 0, got ${ndv}`);
446
+ const m = -8 * ndv / Math.log(1 - fpp ** (1 / 8));
447
+ let numBits = Math.ceil(m);
448
+ if (!isFinite(numBits) || numBits > MAX_BYTES << 3) numBits = MAX_BYTES << 3;
449
+ const blockBits = BYTES_PER_BLOCK << 3;
450
+ numBits = Math.ceil(numBits / blockBits) * blockBits;
451
+ let numBytes = numBits >> 3;
452
+ if (numBytes < MIN_BYTES) numBytes = MIN_BYTES;
453
+ if (numBytes < 1024) numBytes = nextPowerOfTwo(numBytes);
454
+ return numBytes;
455
+ }
456
+ var BloomBuilder = class {
457
+ /**
458
+ * @param {SchemaElement} element
459
+ * @param {{ fpp?: number, maxBytes?: number }} [options]
460
+ */
461
+ constructor(element, { fpp = 0.01, maxBytes = 1024 * 1024 } = {}) {
462
+ this.element = element;
463
+ this.fpp = fpp;
464
+ this.maxBytes = maxBytes;
465
+ this.hashes = /* @__PURE__ */ new Set();
466
+ this.skipped = 0;
467
+ }
468
+ /** @param {any} value */
469
+ insert(value) {
470
+ if (value === null || value === void 0) return;
471
+ const h = hashParquetValue(value, this.element);
472
+ if (h === void 0) {
473
+ this.skipped++;
474
+ return;
475
+ }
476
+ this.hashes.add(h);
477
+ }
478
+ /** @returns {Uint32Array | undefined} */
479
+ finalize() {
480
+ if (this.skipped > 0 || this.hashes.size === 0) return void 0;
481
+ const numBytes = optimalNumBytes(this.hashes.size, this.fpp);
482
+ if (numBytes > this.maxBytes) return void 0;
483
+ const blocks = new Uint32Array(numBytes >> 2);
484
+ for (const h of this.hashes) sbbfInsert(blocks, h);
485
+ return blocks;
486
+ }
487
+ };
488
+ function writeBloomFilter(writer, blocks) {
489
+ if (blocks.length % 8 !== 0) {
490
+ throw new Error(`bloom filter block count must be a multiple of 8 uint32 words, got ${blocks.length}`);
491
+ }
492
+ serializeTCompactProtocol(writer, {
493
+ field_1: blocks.byteLength,
494
+ // numBytes
495
+ field_2: { field_1: {} },
496
+ // algorithm: SplitBlockAlgorithm
497
+ field_3: { field_1: {} },
498
+ // hash: XxHash
499
+ field_4: { field_1: {} }
500
+ // compression: Uncompressed
501
+ });
502
+ for (let i = 0; i < blocks.length; i++) {
503
+ writer.appendUint32(blocks[i]);
504
+ }
505
+ }
506
+ function writeBlooms(writer, pageIndexes) {
507
+ for (const { chunk, bloomFilter } of pageIndexes) {
508
+ if (!bloomFilter || !chunk.meta_data) continue;
509
+ const offset = writer.offset;
510
+ writeBloomFilter(writer, bloomFilter);
511
+ chunk.meta_data.bloom_filter_offset = BigInt(offset);
512
+ chunk.meta_data.bloom_filter_length = writer.offset - offset;
513
+ }
514
+ }
515
+
516
+ // ../../../node_modules/.pnpm/hyparquet@1.26.1/node_modules/hyparquet/src/constants.js
517
+ var ParquetTypes = [
518
+ "BOOLEAN",
519
+ "INT32",
520
+ "INT64",
521
+ "INT96",
522
+ // deprecated
523
+ "FLOAT",
524
+ "DOUBLE",
525
+ "BYTE_ARRAY",
526
+ "FIXED_LEN_BYTE_ARRAY"
527
+ ];
528
+ var Encodings = [
529
+ "PLAIN",
530
+ "GROUP_VAR_INT",
531
+ // deprecated
532
+ "PLAIN_DICTIONARY",
533
+ "RLE",
534
+ "BIT_PACKED",
535
+ // deprecated
536
+ "DELTA_BINARY_PACKED",
537
+ "DELTA_LENGTH_BYTE_ARRAY",
538
+ "DELTA_BYTE_ARRAY",
539
+ "RLE_DICTIONARY",
540
+ "BYTE_STREAM_SPLIT"
541
+ ];
542
+ var FieldRepetitionTypes = [
543
+ "REQUIRED",
544
+ "OPTIONAL",
545
+ "REPEATED"
546
+ ];
547
+ var ConvertedTypes = [
548
+ "UTF8",
549
+ "MAP",
550
+ "MAP_KEY_VALUE",
551
+ "LIST",
552
+ "ENUM",
553
+ "DECIMAL",
554
+ "DATE",
555
+ "TIME_MILLIS",
556
+ "TIME_MICROS",
557
+ "TIMESTAMP_MILLIS",
558
+ "TIMESTAMP_MICROS",
559
+ "UINT_8",
560
+ "UINT_16",
561
+ "UINT_32",
562
+ "UINT_64",
563
+ "INT_8",
564
+ "INT_16",
565
+ "INT_32",
566
+ "INT_64",
567
+ "JSON",
568
+ "BSON",
569
+ "INTERVAL"
570
+ ];
571
+ var CompressionCodecs = [
572
+ "UNCOMPRESSED",
573
+ "SNAPPY",
574
+ "GZIP",
575
+ "LZO",
576
+ "BROTLI",
577
+ "LZ4",
578
+ "ZSTD",
579
+ "LZ4_RAW"
580
+ ];
581
+ var PageTypes = [
582
+ "DATA_PAGE",
583
+ "INDEX_PAGE",
584
+ "DICTIONARY_PAGE",
585
+ "DATA_PAGE_V2"
586
+ ];
587
+ var BoundaryOrders = [
588
+ "UNORDERED",
589
+ "ASCENDING",
590
+ "DESCENDING"
591
+ ];
592
+ var EdgeInterpolationAlgorithms = [
593
+ "SPHERICAL",
594
+ "VINCENTY",
595
+ "THOMAS",
596
+ "ANDOYER",
597
+ "KARNEY"
598
+ ];
599
+
600
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/delta.js
601
+ var BLOCK_SIZE = 128;
602
+ var MINIBLOCKS_PER_BLOCK = 4;
603
+ var VALUES_PER_MINIBLOCK = BLOCK_SIZE / MINIBLOCKS_PER_BLOCK;
604
+ function deltaBinaryPack(writer, values) {
605
+ const count = values.length;
606
+ if (count === 0) {
607
+ writer.appendVarInt(BLOCK_SIZE);
608
+ writer.appendVarInt(MINIBLOCKS_PER_BLOCK);
609
+ writer.appendVarInt(0);
610
+ writer.appendVarInt(0);
611
+ return;
612
+ }
613
+ if (typeof values[0] !== "number" && typeof values[0] !== "bigint") {
614
+ throw new Error("deltaBinaryPack only supports number or bigint arrays");
615
+ }
616
+ writer.appendVarInt(BLOCK_SIZE);
617
+ writer.appendVarInt(MINIBLOCKS_PER_BLOCK);
618
+ writer.appendVarInt(count);
619
+ writer.appendZigZag(values[0]);
620
+ let index = 1;
621
+ while (index < count) {
622
+ const blockEnd = Math.min(index + BLOCK_SIZE, count);
623
+ const blockSize = blockEnd - index;
624
+ const blockDeltas = new BigInt64Array(blockSize);
625
+ let minDelta = BigInt(values[index]) - BigInt(values[index - 1]);
626
+ blockDeltas[0] = minDelta;
627
+ for (let i = 1; i < blockSize; i++) {
628
+ const delta = BigInt(values[index + i]) - BigInt(values[index + i - 1]);
629
+ blockDeltas[i] = delta;
630
+ if (delta < minDelta) minDelta = delta;
631
+ }
632
+ writer.appendZigZag(minDelta);
633
+ const bitWidths = new Uint8Array(MINIBLOCKS_PER_BLOCK);
634
+ for (let mb = 0; mb < MINIBLOCKS_PER_BLOCK; mb++) {
635
+ const mbStart = mb * VALUES_PER_MINIBLOCK;
636
+ const mbEnd = Math.min(mbStart + VALUES_PER_MINIBLOCK, blockSize);
637
+ let maxAdjusted = 0n;
638
+ for (let i = mbStart; i < mbEnd; i++) {
639
+ const adjusted = blockDeltas[i] - minDelta;
640
+ if (adjusted > maxAdjusted) maxAdjusted = adjusted;
641
+ }
642
+ bitWidths[mb] = bitWidth(maxAdjusted);
643
+ }
644
+ writer.appendBytes(bitWidths);
645
+ for (let mb = 0; mb < MINIBLOCKS_PER_BLOCK; mb++) {
646
+ const bitWidth2 = bitWidths[mb];
647
+ if (bitWidth2 === 0) continue;
648
+ const mbStart = mb * VALUES_PER_MINIBLOCK;
649
+ const mbEnd = Math.min(mbStart + VALUES_PER_MINIBLOCK, blockSize);
650
+ let buffer = 0n;
651
+ let bitsUsed = 0;
652
+ for (let i = 0; i < VALUES_PER_MINIBLOCK; i++) {
653
+ const adjusted = mbStart + i < mbEnd ? blockDeltas[mbStart + i] - minDelta : 0n;
654
+ buffer |= adjusted << BigInt(bitsUsed);
655
+ bitsUsed += bitWidth2;
656
+ while (bitsUsed >= 8) {
657
+ writer.appendUint8(Number(buffer & 0xffn));
658
+ buffer >>= 8n;
659
+ bitsUsed -= 8;
660
+ }
661
+ }
662
+ }
663
+ index = blockEnd;
664
+ }
665
+ }
666
+ function deltaLengthByteArray(writer, values) {
667
+ const lengths = new Int32Array(values.length);
668
+ for (let i = 0; i < values.length; i++) {
669
+ const value = values[i];
670
+ if (!(value instanceof Uint8Array)) {
671
+ throw new Error("deltaLengthByteArray expects Uint8Array values");
672
+ }
673
+ lengths[i] = value.length;
674
+ }
675
+ deltaBinaryPack(writer, lengths);
676
+ for (const value of values) {
677
+ writer.appendBytes(value);
678
+ }
679
+ }
680
+ function deltaByteArray(writer, values) {
681
+ if (values.length === 0) {
682
+ deltaBinaryPack(writer, []);
683
+ deltaBinaryPack(writer, []);
684
+ return;
685
+ }
686
+ const prefixLengths = new Int32Array(values.length);
687
+ const suffixLengths = new Int32Array(values.length);
688
+ const suffixes = new Array(values.length);
689
+ const value = values[0];
690
+ if (!(value instanceof Uint8Array)) {
691
+ throw new Error("deltaByteArray expects Uint8Array values");
692
+ }
693
+ prefixLengths[0] = 0;
694
+ suffixLengths[0] = values[0].length;
695
+ suffixes[0] = values[0];
696
+ for (let i = 1; i < values.length; i++) {
697
+ const prev = values[i - 1];
698
+ const curr = values[i];
699
+ if (!(curr instanceof Uint8Array)) {
700
+ throw new Error("deltaByteArray expects Uint8Array values");
701
+ }
702
+ let prefixLen = 0;
703
+ const maxPrefix = Math.min(prev.length, curr.length);
704
+ while (prefixLen < maxPrefix && prev[prefixLen] === curr[prefixLen]) {
705
+ prefixLen++;
706
+ }
707
+ prefixLengths[i] = prefixLen;
708
+ suffixLengths[i] = curr.length - prefixLen;
709
+ suffixes[i] = curr.subarray(prefixLen);
710
+ }
711
+ deltaBinaryPack(writer, prefixLengths);
712
+ deltaBinaryPack(writer, suffixLengths);
713
+ for (const suffix of suffixes) {
714
+ writer.appendBytes(suffix);
715
+ }
716
+ }
717
+ function bitWidth(value) {
718
+ if (value === 0n) return 0;
719
+ let bits = 0;
720
+ while (value > 0n) {
721
+ bits++;
722
+ value >>= 1n;
723
+ }
724
+ return bits;
725
+ }
726
+
727
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/encoding.js
728
+ function writeRleBitPackedHybrid(writer, values, bitWidth2) {
729
+ const offsetStart = writer.offset;
730
+ let pendingBitPackedGroups = 0;
731
+ let bitPackedStart = 0;
732
+ let i = 0;
733
+ while (i < values.length) {
734
+ let rleCount = 1;
735
+ const firstVal = values[i];
736
+ while (i + rleCount < values.length && values[i + rleCount] === firstVal) {
737
+ rleCount++;
738
+ }
739
+ if (rleCount >= 8) {
740
+ if (pendingBitPackedGroups) {
741
+ writeBitPackedGroups(writer, values, bitPackedStart, pendingBitPackedGroups, bitWidth2);
742
+ pendingBitPackedGroups = 0;
743
+ }
744
+ writeRleRun(writer, firstVal, rleCount, bitWidth2);
745
+ i += rleCount;
746
+ } else {
747
+ if (pendingBitPackedGroups === 0) {
748
+ bitPackedStart = i;
749
+ }
750
+ pendingBitPackedGroups++;
751
+ i += 8;
752
+ }
753
+ }
754
+ if (pendingBitPackedGroups) {
755
+ writeBitPackedGroups(writer, values, bitPackedStart, pendingBitPackedGroups, bitWidth2);
756
+ }
757
+ return writer.offset - offsetStart;
758
+ }
759
+ function writeRleRun(writer, value, count, bitWidth2) {
760
+ writer.appendVarInt(count << 1);
761
+ const width = bitWidth2 + 7 >> 3;
762
+ for (let j = 0; j < width; j++) {
763
+ writer.appendUint8(value >> (j << 3) & 255);
764
+ }
765
+ }
766
+ function writeBitPackedGroups(writer, values, start, numGroups, bitWidth2) {
767
+ writer.appendVarInt(numGroups << 1 | 1);
768
+ if (bitWidth2 === 0) return;
769
+ const mask = (1 << bitWidth2) - 1;
770
+ let buffer = 0;
771
+ let bitsUsed = 0;
772
+ const totalValues = numGroups * 8;
773
+ for (let i = 0; i < totalValues; i++) {
774
+ const idx = start + i;
775
+ const v = idx < values.length ? values[idx] & mask : 0;
776
+ buffer |= v << bitsUsed;
777
+ bitsUsed += bitWidth2;
778
+ while (bitsUsed >= 8) {
779
+ writer.appendUint8(buffer & 255);
780
+ buffer >>>= 8;
781
+ bitsUsed -= 8;
782
+ }
783
+ }
784
+ if (bitsUsed > 0) {
785
+ writer.appendUint8(buffer & 255);
786
+ }
787
+ }
788
+
789
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/plain.js
790
+ function writePlain(writer, values, type, fixedLength) {
791
+ if (type === "BOOLEAN") {
792
+ writePlainBoolean(writer, values);
793
+ } else if (type === "INT32") {
794
+ writePlainInt32(writer, values);
795
+ } else if (type === "INT64") {
796
+ writePlainInt64(writer, values);
797
+ } else if (type === "FLOAT") {
798
+ writePlainFloat(writer, values);
799
+ } else if (type === "DOUBLE") {
800
+ writePlainDouble(writer, values);
801
+ } else if (type === "BYTE_ARRAY") {
802
+ writePlainByteArray(writer, values);
803
+ } else if (type === "FIXED_LEN_BYTE_ARRAY") {
804
+ if (!fixedLength) throw new Error("parquet FIXED_LEN_BYTE_ARRAY expected type_length");
805
+ writePlainByteArrayFixed(writer, values, fixedLength);
806
+ } else {
807
+ throw new Error(`parquet unsupported type: ${type}`);
808
+ }
809
+ }
810
+ function writePlainBoolean(writer, values) {
811
+ let currentByte = 0;
812
+ for (let i = 0; i < values.length; i++) {
813
+ const value = values[i];
814
+ if (typeof value !== "boolean") throw new Error("parquet expected boolean value, got " + value);
815
+ const bitOffset = i % 8;
816
+ if (value) {
817
+ currentByte |= 1 << bitOffset;
818
+ }
819
+ if (bitOffset === 7) {
820
+ writer.appendUint8(currentByte);
821
+ currentByte = 0;
822
+ }
823
+ }
824
+ if (values.length % 8) {
825
+ writer.appendUint8(currentByte);
826
+ }
827
+ }
828
+ function writePlainInt32(writer, values) {
829
+ for (const value of values) {
830
+ if (!Number.isSafeInteger(value)) throw new Error("parquet expected integer value, got " + value);
831
+ if (value < -2147483648 || value > 2147483647) throw new Error("parquet expected int32 value, got " + value);
832
+ writer.appendInt32(value);
833
+ }
834
+ }
835
+ function writePlainInt64(writer, values) {
836
+ for (const value of values) {
837
+ if (typeof value !== "bigint") throw new Error("parquet expected bigint value, got " + value);
838
+ writer.appendInt64(value);
839
+ }
840
+ }
841
+ function writePlainFloat(writer, values) {
842
+ for (const value of values) {
843
+ if (typeof value !== "number") throw new Error("parquet expected number value, got " + value);
844
+ writer.appendFloat32(value);
845
+ }
846
+ }
847
+ function writePlainDouble(writer, values) {
848
+ for (const value of values) {
849
+ if (typeof value !== "number") throw new Error("parquet expected number value, got " + value);
850
+ writer.appendFloat64(value);
851
+ }
852
+ }
853
+ function writePlainByteArray(writer, values) {
854
+ for (const value of values) {
855
+ let bytes = value;
856
+ if (typeof bytes === "string") {
857
+ bytes = new TextEncoder().encode(value);
858
+ }
859
+ if (!(bytes instanceof Uint8Array)) {
860
+ throw new Error("parquet expected Uint8Array value, got " + typeof bytes);
861
+ }
862
+ writer.appendUint32(bytes.length);
863
+ writer.appendBytes(bytes);
864
+ }
865
+ }
866
+ function writePlainByteArrayFixed(writer, values, fixedLength) {
867
+ for (const value of values) {
868
+ if (!(value instanceof Uint8Array)) throw new Error("parquet expected Uint8Array value, got " + typeof value);
869
+ if (value.length !== fixedLength) throw new Error(`parquet expected Uint8Array of length ${fixedLength}`);
870
+ writer.appendBytes(value);
871
+ }
872
+ }
873
+
874
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/variant.js
875
+ var encoder = new TextEncoder();
876
+ var INT64_MIN = -(2n ** 63n);
877
+ var INT64_MAX = 2n ** 63n - 1n;
878
+ var VARIANT_NULL = new Uint8Array([0]);
879
+ var RESERVED_SHREDDING_FIELDS = /* @__PURE__ */ new Set(["value", "typed_value"]);
880
+ var EMPTY_KEY_INDEX = /* @__PURE__ */ new Map();
881
+ var EMPTY_METADATA = writeVariantMetadata([]);
882
+ function encodeVariantColumn(values, shredding, column) {
883
+ if (column?.required) {
884
+ for (let i = 0; i < values.length; i++) {
885
+ if (values[i] === void 0) {
886
+ throw new Error(`required variant column ${column.name} has undefined value at index ${i}`);
887
+ }
888
+ }
889
+ }
890
+ const shreddingConfig = shredding && normalizeShreddingConfig(shredding);
891
+ if (shreddingConfig) {
892
+ const metadataCache = /* @__PURE__ */ new Map();
893
+ return values.map((value) => {
894
+ if (value === void 0) return null;
895
+ const keys = /* @__PURE__ */ new Set();
896
+ collectKeys(value, keys);
897
+ const { metadata: metadata2, keyIndex: keyIndex2 } = getVariantRowMetadata(keys, metadataCache);
898
+ return { metadata: metadata2, ...encodeShredded(value, shreddingConfig, keyIndex2, true) };
899
+ });
900
+ }
901
+ const dictionary = buildVariantDictionary(values);
902
+ const metadata = writeVariantMetadata(dictionary);
903
+ const keyIndex = /* @__PURE__ */ new Map();
904
+ for (let i = 0; i < dictionary.length; i++) {
905
+ keyIndex.set(dictionary[i], i);
906
+ }
907
+ return values.map((value) => {
908
+ if (value === void 0) return null;
909
+ return { metadata, value: writeVariantValue(value, keyIndex) };
910
+ });
911
+ }
912
+ function encodeShredded(value, shredType, keyIndex, allowPartialObjects) {
913
+ if (value === null || value === void 0) {
914
+ return { value: VARIANT_NULL, typed_value: null };
915
+ }
916
+ if (Array.isArray(shredType)) {
917
+ if (!Array.isArray(value)) {
918
+ return { value: writeVariantValue(value, keyIndex), typed_value: null };
919
+ }
920
+ const elemShred = shredType[0];
921
+ return { value: null, typed_value: value.map((el) => encodeShredded(el, elemShred, keyIndex, false)) };
922
+ }
923
+ if (typeof shredType === "object") {
924
+ if (typeof value !== "object" || Array.isArray(value) || value instanceof Date || value instanceof Uint8Array) {
925
+ return { value: writeVariantValue(value, keyIndex), typed_value: null };
926
+ }
927
+ const remaining = {};
928
+ let hasRemaining = false;
929
+ for (const k of Object.keys(value)) {
930
+ if (k in shredType || value[k] === void 0) continue;
931
+ remaining[k] = value[k];
932
+ hasRemaining = true;
933
+ }
934
+ if (hasRemaining && !allowPartialObjects) {
935
+ return { value: writeVariantValue(value, keyIndex), typed_value: null };
936
+ }
937
+ const fieldNames = Object.keys(shredType);
938
+ const hasMissingFieldConflict = fieldNames.some(
939
+ (fieldName) => (!Object.prototype.hasOwnProperty.call(value, fieldName) || value[fieldName] === void 0) && keyIndex.has(fieldName)
940
+ );
941
+ if (hasMissingFieldConflict) {
942
+ return { value: writeVariantValue(value, keyIndex), typed_value: null };
943
+ }
944
+ const typedValue = {};
945
+ for (const fieldName of fieldNames) {
946
+ if (!Object.prototype.hasOwnProperty.call(value, fieldName) || value[fieldName] === void 0) {
947
+ continue;
948
+ }
949
+ typedValue[fieldName] = encodeShredded(value[fieldName], shredType[fieldName], keyIndex, false);
950
+ }
951
+ const binaryValue = hasRemaining ? writeVariantValue(remaining, keyIndex) : null;
952
+ return { value: binaryValue, typed_value: typedValue };
953
+ }
954
+ if (matchesType(value, shredType)) {
955
+ return { value: null, typed_value: value };
956
+ }
957
+ return { value: writeVariantValue(value, keyIndex), typed_value: null };
958
+ }
959
+ function getVariantRowMetadata(keys, metadataCache) {
960
+ if (keys.size === 0) {
961
+ return { metadata: EMPTY_METADATA, keyIndex: EMPTY_KEY_INDEX };
962
+ }
963
+ const dictionary = [...keys].sort();
964
+ const cacheKey = dictionary.join("\0");
965
+ const cached = metadataCache.get(cacheKey);
966
+ if (cached) {
967
+ return cached;
968
+ }
969
+ const metadata = writeVariantMetadata(dictionary);
970
+ const keyIndex = /* @__PURE__ */ new Map();
971
+ for (let i = 0; i < dictionary.length; i++) keyIndex.set(dictionary[i], i);
972
+ const rowMetadata = { metadata, keyIndex };
973
+ metadataCache.set(cacheKey, rowMetadata);
974
+ return rowMetadata;
975
+ }
976
+ function matchesType(value, type) {
977
+ if (value === null || value === void 0) return false;
978
+ switch (type) {
979
+ case "BOOLEAN":
980
+ return typeof value === "boolean";
981
+ case "INT32":
982
+ return typeof value === "number" && Number.isInteger(value) && value >= -2147483648 && value <= 2147483647;
983
+ case "INT64":
984
+ return typeof value === "bigint" && value >= INT64_MIN && value <= INT64_MAX;
985
+ case "FLOAT":
986
+ return typeof value === "number";
987
+ case "DOUBLE":
988
+ return typeof value === "number";
989
+ case "STRING":
990
+ return typeof value === "string";
991
+ case "TIMESTAMP":
992
+ return value instanceof Date;
993
+ default:
994
+ return false;
995
+ }
996
+ }
997
+ var MAX_SHRED_DEPTH = 3;
998
+ var MAX_SHRED_LEAVES = 256;
999
+ function autoDetectShredding(values) {
1000
+ const detected = detectShred(values, 0);
1001
+ if (detected === void 0 || typeof detected !== "object") return void 0;
1002
+ const normalized = normalizeShreddingConfig(detected);
1003
+ if (normalized === void 0 || countShredLeaves(normalized) > MAX_SHRED_LEAVES) return void 0;
1004
+ return normalized;
1005
+ }
1006
+ function countShredLeaves(shredType) {
1007
+ if (Array.isArray(shredType)) return shredType.length ? countShredLeaves(shredType[0]) : 0;
1008
+ if (shredType && typeof shredType === "object") {
1009
+ let leaves = 0;
1010
+ for (const key of Object.keys(shredType)) leaves += countShredLeaves(shredType[key]);
1011
+ return leaves;
1012
+ }
1013
+ return 1;
1014
+ }
1015
+ function detectShred(values, depth) {
1016
+ const nonNull = [];
1017
+ for (const v of values) {
1018
+ if (v !== null && v !== void 0) nonNull.push(v);
1019
+ }
1020
+ if (!nonNull.length) return void 0;
1021
+ if (nonNull.some(isPlainObject)) {
1022
+ if (depth >= MAX_SHRED_DEPTH) return void 0;
1023
+ const fieldValues = /* @__PURE__ */ new Map();
1024
+ for (const v of nonNull) {
1025
+ if (!isPlainObject(v)) continue;
1026
+ for (const [key, fieldValue] of Object.entries(v)) {
1027
+ if (fieldValue === void 0) continue;
1028
+ const arr = fieldValues.get(key);
1029
+ if (arr) arr.push(fieldValue);
1030
+ else fieldValues.set(key, [fieldValue]);
1031
+ }
1032
+ }
1033
+ const shredding = {};
1034
+ for (const [key, vals] of fieldValues) {
1035
+ const fieldShred = detectShred(vals, depth + 1);
1036
+ if (fieldShred !== void 0) shredding[key] = fieldShred;
1037
+ }
1038
+ return Object.keys(shredding).length > 0 ? shredding : void 0;
1039
+ }
1040
+ if (nonNull.every(Array.isArray)) {
1041
+ if (depth >= MAX_SHRED_DEPTH) return void 0;
1042
+ const elements = [];
1043
+ for (const arr of nonNull) for (const el of arr) elements.push(el);
1044
+ const elemShred = detectShred(elements, depth + 1);
1045
+ return elemShred === void 0 ? void 0 : [elemShred];
1046
+ }
1047
+ let jsType;
1048
+ for (const v of nonNull) {
1049
+ if (Array.isArray(v)) return void 0;
1050
+ const t = v instanceof Date ? "date" : typeof v;
1051
+ if (jsType === void 0) jsType = t;
1052
+ else if (jsType !== t) return void 0;
1053
+ }
1054
+ return jsType ? jsTypeToBasicType(jsType) : void 0;
1055
+ }
1056
+ function isPlainObject(v) {
1057
+ return typeof v === "object" && v !== null && !Array.isArray(v) && !(v instanceof Date) && !(v instanceof Uint8Array);
1058
+ }
1059
+ function normalizeShreddingConfig(shredding) {
1060
+ if (Array.isArray(shredding)) {
1061
+ const elem = shredding.length ? normalizeShreddingConfig(shredding[0]) : void 0;
1062
+ return elem === void 0 ? void 0 : [elem];
1063
+ }
1064
+ if (typeof shredding === "object") {
1065
+ const normalized = {};
1066
+ for (const [key, type] of Object.entries(shredding)) {
1067
+ if (RESERVED_SHREDDING_FIELDS.has(key)) continue;
1068
+ const norm = normalizeShreddingConfig(type);
1069
+ if (norm !== void 0) normalized[key] = norm;
1070
+ }
1071
+ return Object.keys(normalized).length > 0 ? normalized : void 0;
1072
+ }
1073
+ return shredding;
1074
+ }
1075
+ function jsTypeToBasicType(jsType) {
1076
+ switch (jsType) {
1077
+ case "boolean":
1078
+ return "BOOLEAN";
1079
+ case "string":
1080
+ return "STRING";
1081
+ case "number":
1082
+ return "DOUBLE";
1083
+ case "bigint":
1084
+ return "INT64";
1085
+ case "date":
1086
+ return "TIMESTAMP";
1087
+ default:
1088
+ return void 0;
1089
+ }
1090
+ }
1091
+ function buildVariantDictionary(values) {
1092
+ const keys = /* @__PURE__ */ new Set();
1093
+ collectKeys(values, keys);
1094
+ return [...keys].sort();
1095
+ }
1096
+ function collectKeys(value, keys) {
1097
+ if (value === null || value === void 0) return;
1098
+ if (Array.isArray(value)) {
1099
+ for (const item of value) {
1100
+ collectKeys(item, keys);
1101
+ }
1102
+ return;
1103
+ }
1104
+ if (value instanceof Date || value instanceof Uint8Array) return;
1105
+ if (typeof value === "object") {
1106
+ for (const key of Object.keys(value)) {
1107
+ keys.add(key);
1108
+ collectKeys(value[key], keys);
1109
+ }
1110
+ }
1111
+ }
1112
+ function writeVariantMetadata(dictionary) {
1113
+ const n = dictionary.length;
1114
+ const encoded = new Array(n);
1115
+ let totalStringBytes = 0;
1116
+ for (let i = 0; i < n; i++) {
1117
+ const e = encoder.encode(dictionary[i]);
1118
+ encoded[i] = e;
1119
+ totalStringBytes += e.length;
1120
+ }
1121
+ const offsetSize = byteWidth(totalStringBytes);
1122
+ const header = 1 | 1 << 4 | offsetSize - 1 << 6;
1123
+ const totalSize = 1 + offsetSize + (n + 1) * offsetSize + totalStringBytes;
1124
+ const bytes = new Uint8Array(totalSize);
1125
+ let offset = 0;
1126
+ bytes[offset++] = header;
1127
+ for (let j = 0; j < offsetSize; j++) bytes[offset++] = n >> j * 8 & 255;
1128
+ let strOffset = 0;
1129
+ for (let i = 0; i < n; i++) {
1130
+ for (let j = 0; j < offsetSize; j++) bytes[offset++] = strOffset >> j * 8 & 255;
1131
+ strOffset += encoded[i].length;
1132
+ }
1133
+ for (let j = 0; j < offsetSize; j++) bytes[offset++] = strOffset >> j * 8 & 255;
1134
+ for (let i = 0; i < n; i++) {
1135
+ bytes.set(encoded[i], offset);
1136
+ offset += encoded[i].length;
1137
+ }
1138
+ return bytes;
1139
+ }
1140
+ function writeVariantValue(value, keyIndex) {
1141
+ const writer = new ByteWriter(8);
1142
+ writeValue(value, writer, keyIndex);
1143
+ return writer.getBytes();
1144
+ }
1145
+ function writeValue(val, writer, keyIndex) {
1146
+ if (val === null || val === void 0) {
1147
+ writer.appendUint8(0);
1148
+ return;
1149
+ }
1150
+ if (val === true) {
1151
+ writer.appendUint8(4);
1152
+ return;
1153
+ }
1154
+ if (val === false) {
1155
+ writer.appendUint8(8);
1156
+ return;
1157
+ }
1158
+ if (typeof val === "bigint") {
1159
+ if (val < INT64_MIN || val > INT64_MAX) {
1160
+ throw new RangeError(`variant bigint out of int64 range: ${val}`);
1161
+ }
1162
+ writer.appendUint8(6 << 2);
1163
+ writer.appendInt64(val);
1164
+ return;
1165
+ }
1166
+ if (typeof val === "number") {
1167
+ if (Number.isInteger(val)) {
1168
+ if (val >= -128 && val <= 127) {
1169
+ writer.appendUint8(3 << 2);
1170
+ writer.appendUint8(val & 255);
1171
+ return;
1172
+ }
1173
+ if (val >= -32768 && val <= 32767) {
1174
+ writer.appendUint8(4 << 2);
1175
+ appendUnsignedLE(writer, val, 2);
1176
+ return;
1177
+ }
1178
+ if (val >= -2147483648 && val <= 2147483647) {
1179
+ writer.appendUint8(5 << 2);
1180
+ writer.appendInt32(val);
1181
+ return;
1182
+ }
1183
+ }
1184
+ writer.appendUint8(7 << 2);
1185
+ writer.appendFloat64(val);
1186
+ return;
1187
+ }
1188
+ if (typeof val === "string") {
1189
+ const strBytes = encoder.encode(val);
1190
+ if (strBytes.length <= 63) {
1191
+ writer.appendUint8(strBytes.length << 2 | 1);
1192
+ writer.appendBytes(strBytes);
1193
+ } else {
1194
+ writer.appendUint8(16 << 2);
1195
+ writer.appendUint32(strBytes.length);
1196
+ writer.appendBytes(strBytes);
1197
+ }
1198
+ return;
1199
+ }
1200
+ if (val instanceof Date) {
1201
+ writer.appendUint8(13 << 2);
1202
+ writer.appendInt64(BigInt(val.getTime()) * 1000n);
1203
+ return;
1204
+ }
1205
+ if (val instanceof Uint8Array) {
1206
+ writer.appendUint8(15 << 2);
1207
+ writer.appendUint32(val.length);
1208
+ writer.appendBytes(val);
1209
+ return;
1210
+ }
1211
+ if (Array.isArray(val)) {
1212
+ writeVariantArray(val, writer, keyIndex);
1213
+ return;
1214
+ }
1215
+ if (typeof val === "object") {
1216
+ writeVariantObject(val, writer, keyIndex);
1217
+ return;
1218
+ }
1219
+ throw new Error(`variant cannot encode value: ${val}`);
1220
+ }
1221
+ function writeVariantObject(obj, writer, keyIndex) {
1222
+ const entries = Object.keys(obj).filter((key) => obj[key] !== void 0).map((key) => {
1223
+ const id = keyIndex.get(key);
1224
+ if (id === void 0) throw new Error(`variant key not in dictionary: ${key}`);
1225
+ return { id, key };
1226
+ });
1227
+ entries.sort((a, b) => a.id - b.id);
1228
+ const numElements = entries.length;
1229
+ const maxFieldId = numElements > 0 ? entries[numElements - 1].id : 0;
1230
+ const idWidth = byteWidth(maxFieldId);
1231
+ const scratch = new ByteWriter(8);
1232
+ const offsets = new Array(numElements + 1);
1233
+ offsets[0] = 0;
1234
+ for (let i = 0; i < numElements; i++) {
1235
+ writeValue(obj[entries[i].key], scratch, keyIndex);
1236
+ offsets[i + 1] = scratch.index;
1237
+ }
1238
+ const offsetWidth = byteWidth(offsets[numElements]);
1239
+ const isLarge = numElements > 255 ? 1 : 0;
1240
+ writer.appendUint8((offsetWidth - 1 | idWidth - 1 << 2 | isLarge << 4) << 2 | 2);
1241
+ if (isLarge) writer.appendUint32(numElements);
1242
+ else writer.appendUint8(numElements);
1243
+ for (const { id } of entries) appendUnsignedLE(writer, id, idWidth);
1244
+ for (const off of offsets) appendUnsignedLE(writer, off, offsetWidth);
1245
+ writer.appendBytes(scratch.getBytes());
1246
+ }
1247
+ function writeVariantArray(arr, writer, keyIndex) {
1248
+ const numElements = arr.length;
1249
+ const scratch = new ByteWriter(8);
1250
+ const offsets = new Array(numElements + 1);
1251
+ offsets[0] = 0;
1252
+ for (let i = 0; i < numElements; i++) {
1253
+ writeValue(arr[i], scratch, keyIndex);
1254
+ offsets[i + 1] = scratch.index;
1255
+ }
1256
+ const offsetWidth = byteWidth(offsets[numElements]);
1257
+ const isLarge = numElements > 255 ? 1 : 0;
1258
+ writer.appendUint8((offsetWidth - 1 | isLarge << 2) << 2 | 3);
1259
+ if (isLarge) writer.appendUint32(numElements);
1260
+ else writer.appendUint8(numElements);
1261
+ for (const off of offsets) appendUnsignedLE(writer, off, offsetWidth);
1262
+ writer.appendBytes(scratch.getBytes());
1263
+ }
1264
+ function byteWidth(maxValue) {
1265
+ if (maxValue <= 255) return 1;
1266
+ if (maxValue <= 65535) return 2;
1267
+ if (maxValue <= 16777215) return 3;
1268
+ return 4;
1269
+ }
1270
+ function appendUnsignedLE(writer, value, width) {
1271
+ for (let i = 0; i < width; i++) {
1272
+ writer.appendUint8(value >> i * 8 & 255);
1273
+ }
1274
+ }
1275
+
1276
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/schema.js
1277
+ function schemaFromColumnData({ columnData, schemaOverrides }) {
1278
+ const schema = [{
1279
+ name: "root",
1280
+ num_children: columnData.length
1281
+ }];
1282
+ for (const { name, data, type, nullable, shredding } of columnData) {
1283
+ if (schemaOverrides?.[name]) {
1284
+ const override = schemaOverrides[name];
1285
+ if (type || nullable !== void 0) {
1286
+ throw new Error(`cannot provide both type and schema override for column ${name}`);
1287
+ }
1288
+ if (override.name !== name) {
1289
+ throw new Error(`schema override for column ${name} must have matching name, got ${override.name}`);
1290
+ }
1291
+ if (override.type === "FIXED_LEN_BYTE_ARRAY" && !override.type_length) {
1292
+ throw new Error("schema override for FIXED_LEN_BYTE_ARRAY must include type_length");
1293
+ }
1294
+ if (override.num_children) {
1295
+ throw new Error("schema override does not support nested types");
1296
+ }
1297
+ schema.push(override);
1298
+ } else if (type === "VARIANT") {
1299
+ const repetition_type = nullable === false ? "REQUIRED" : "OPTIONAL";
1300
+ const shreddingConfig = shredding && shredding !== true ? normalizeShreddingConfig(shredding) : void 0;
1301
+ if (shreddingConfig) {
1302
+ schema.push(
1303
+ { name, repetition_type, num_children: 3, logical_type: { type: "VARIANT" } },
1304
+ { name: "metadata", type: "BYTE_ARRAY", repetition_type: "REQUIRED" },
1305
+ { name: "value", type: "BYTE_ARRAY", repetition_type: "OPTIONAL" },
1306
+ ...buildVariantTypedValue(shreddingConfig)
1307
+ );
1308
+ } else {
1309
+ schema.push(
1310
+ { name, repetition_type, num_children: 2, logical_type: { type: "VARIANT" } },
1311
+ { name: "metadata", type: "BYTE_ARRAY", repetition_type: "REQUIRED" },
1312
+ { name: "value", type: "BYTE_ARRAY", repetition_type: "OPTIONAL" }
1313
+ );
1314
+ }
1315
+ } else if (type) {
1316
+ schema.push(basicTypeToSchemaElement(name, type, nullable));
1317
+ } else {
1318
+ schema.push(autoSchemaElement(name, data.slice(0, 1e3)));
1319
+ }
1320
+ }
1321
+ return schema;
1322
+ }
1323
+ function buildVariantTypedValue(shredType) {
1324
+ if (Array.isArray(shredType)) {
1325
+ return [
1326
+ { name: "typed_value", repetition_type: "OPTIONAL", converted_type: "LIST", num_children: 1 },
1327
+ { name: "list", repetition_type: "REPEATED", num_children: 1 },
1328
+ { name: "element", repetition_type: "REQUIRED", num_children: 2 },
1329
+ { name: "value", type: "BYTE_ARRAY", repetition_type: "OPTIONAL" },
1330
+ ...buildVariantTypedValue(shredType[0])
1331
+ ];
1332
+ }
1333
+ if (typeof shredType === "object") {
1334
+ const fieldNames = Object.keys(shredType);
1335
+ const elements = [
1336
+ { name: "typed_value", repetition_type: "OPTIONAL", num_children: fieldNames.length }
1337
+ ];
1338
+ for (const fieldName of fieldNames) {
1339
+ elements.push(
1340
+ { name: fieldName, repetition_type: "OPTIONAL", num_children: 2 },
1341
+ { name: "value", type: "BYTE_ARRAY", repetition_type: "OPTIONAL" },
1342
+ ...buildVariantTypedValue(shredType[fieldName])
1343
+ );
1344
+ }
1345
+ return elements;
1346
+ }
1347
+ return [shreddedLeafElement(shredType)];
1348
+ }
1349
+ function shreddedLeafElement(type) {
1350
+ switch (type) {
1351
+ case "STRING":
1352
+ return { name: "typed_value", type: "BYTE_ARRAY", converted_type: "UTF8", repetition_type: "OPTIONAL" };
1353
+ case "INT32":
1354
+ return { name: "typed_value", type: "INT32", repetition_type: "OPTIONAL" };
1355
+ case "INT64":
1356
+ return { name: "typed_value", type: "INT64", repetition_type: "OPTIONAL" };
1357
+ case "DOUBLE":
1358
+ return { name: "typed_value", type: "DOUBLE", repetition_type: "OPTIONAL" };
1359
+ case "FLOAT":
1360
+ return { name: "typed_value", type: "FLOAT", repetition_type: "OPTIONAL" };
1361
+ case "BOOLEAN":
1362
+ return { name: "typed_value", type: "BOOLEAN", repetition_type: "OPTIONAL" };
1363
+ case "TIMESTAMP":
1364
+ return { name: "typed_value", type: "INT64", converted_type: "TIMESTAMP_MICROS", repetition_type: "OPTIONAL" };
1365
+ default:
1366
+ throw new Error(`unsupported shredded field type: ${type}`);
1367
+ }
1368
+ }
1369
+ function basicTypeToSchemaElement(name, type, nullable) {
1370
+ const repetition_type = nullable === false ? "REQUIRED" : "OPTIONAL";
1371
+ if (type === "STRING") {
1372
+ return { name, type: "BYTE_ARRAY", converted_type: "UTF8", repetition_type };
1373
+ }
1374
+ if (type === "JSON") {
1375
+ return { name, type: "BYTE_ARRAY", converted_type: "JSON", repetition_type };
1376
+ }
1377
+ if (type === "TIMESTAMP") {
1378
+ return { name, type: "INT64", converted_type: "TIMESTAMP_MILLIS", repetition_type };
1379
+ }
1380
+ if (type === "UUID") {
1381
+ return { name, type: "FIXED_LEN_BYTE_ARRAY", type_length: 16, logical_type: { type: "UUID" }, repetition_type };
1382
+ }
1383
+ if (type === "FLOAT16") {
1384
+ return { name, type: "FIXED_LEN_BYTE_ARRAY", type_length: 2, logical_type: { type: "FLOAT16" }, repetition_type };
1385
+ }
1386
+ if (type === "GEOMETRY") {
1387
+ return { name, type: "BYTE_ARRAY", logical_type: { type: "GEOMETRY" }, repetition_type };
1388
+ }
1389
+ if (type === "GEOGRAPHY") {
1390
+ return { name, type: "BYTE_ARRAY", logical_type: { type: "GEOGRAPHY" }, repetition_type };
1391
+ }
1392
+ return { name, type, repetition_type };
1393
+ }
1394
+ function autoSchemaElement(name, values) {
1395
+ let type;
1396
+ let repetition_type = "REQUIRED";
1397
+ let converted_type;
1398
+ if (values instanceof Int32Array) return { name, type: "INT32", repetition_type };
1399
+ if (values instanceof BigInt64Array) return { name, type: "INT64", repetition_type };
1400
+ if (values instanceof Float32Array) return { name, type: "FLOAT", repetition_type };
1401
+ if (values instanceof Float64Array) return { name, type: "DOUBLE", repetition_type };
1402
+ for (const value of values) {
1403
+ if (value === null || value === void 0) {
1404
+ repetition_type = "OPTIONAL";
1405
+ } else {
1406
+ let valueType;
1407
+ let valueConvertedType;
1408
+ if (typeof value === "boolean") valueType = "BOOLEAN";
1409
+ else if (typeof value === "bigint") valueType = "INT64";
1410
+ else if (Number.isInteger(value)) valueType = "INT32";
1411
+ else if (typeof value === "number") valueType = "DOUBLE";
1412
+ else if (value instanceof Uint8Array) valueType = "BYTE_ARRAY";
1413
+ else if (typeof value === "string") {
1414
+ valueType = "BYTE_ARRAY";
1415
+ valueConvertedType = "UTF8";
1416
+ } else if (value instanceof Date) {
1417
+ valueType = "INT64";
1418
+ valueConvertedType = "TIMESTAMP_MILLIS";
1419
+ } else if (typeof value === "object") {
1420
+ valueType = "BYTE_ARRAY";
1421
+ valueConvertedType = "JSON";
1422
+ } else throw new Error(`cannot determine parquet type for: ${value}`);
1423
+ if (type === void 0) {
1424
+ type = valueType;
1425
+ converted_type = valueConvertedType;
1426
+ } else if (type === "INT32" && valueType === "DOUBLE") {
1427
+ type = "DOUBLE";
1428
+ } else if (type === "DOUBLE" && valueType === "INT32") {
1429
+ valueType = "DOUBLE";
1430
+ } else if (type !== valueType || converted_type !== valueConvertedType) {
1431
+ throw new Error(`parquet cannot write mixed types: ${converted_type ?? type} and ${valueConvertedType ?? valueType}`);
1432
+ }
1433
+ }
1434
+ }
1435
+ if (!type) {
1436
+ type = "BYTE_ARRAY";
1437
+ repetition_type = "OPTIONAL";
1438
+ }
1439
+ return { name, type, repetition_type, converted_type };
1440
+ }
1441
+ function getMaxRepetitionLevel(schemaPath) {
1442
+ let maxLevel = 0;
1443
+ for (const element of schemaPath) {
1444
+ if (element.repetition_type === "REPEATED") {
1445
+ maxLevel++;
1446
+ }
1447
+ }
1448
+ return maxLevel;
1449
+ }
1450
+
1451
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/splitstream.js
1452
+ function writeByteStreamSplit(writer, values, type, typeLength) {
1453
+ const count = values.length;
1454
+ let bytes;
1455
+ let width;
1456
+ if (type === "FLOAT") {
1457
+ const typed = values instanceof Float32Array ? values : new Float32Array(numberArray(values));
1458
+ bytes = new Uint8Array(typed.buffer, typed.byteOffset, typed.byteLength);
1459
+ width = 4;
1460
+ } else if (type === "DOUBLE") {
1461
+ const typed = values instanceof Float64Array ? values : new Float64Array(numberArray(values));
1462
+ bytes = new Uint8Array(typed.buffer, typed.byteOffset, typed.byteLength);
1463
+ width = 8;
1464
+ } else if (type === "INT32") {
1465
+ const typed = values instanceof Int32Array ? values : new Int32Array(numberArray(values));
1466
+ bytes = new Uint8Array(typed.buffer, typed.byteOffset, typed.byteLength);
1467
+ width = 4;
1468
+ } else if (type === "INT64") {
1469
+ const typed = bigIntArray(values);
1470
+ bytes = new Uint8Array(typed.buffer, typed.byteOffset, typed.byteLength);
1471
+ width = 8;
1472
+ } else if (type === "FIXED_LEN_BYTE_ARRAY") {
1473
+ if (!typeLength) throw new Error("parquet byte_stream_split missing type_length");
1474
+ width = typeLength;
1475
+ bytes = new Uint8Array(count * width);
1476
+ for (let i = 0; i < count; i++) {
1477
+ bytes.set(values[i], i * width);
1478
+ }
1479
+ } else {
1480
+ throw new Error(`parquet byte_stream_split unsupported type: ${type}`);
1481
+ }
1482
+ for (let b = 0; b < width; b++) {
1483
+ for (let i = 0; i < count; i++) {
1484
+ writer.appendUint8(bytes[i * width + b]);
1485
+ }
1486
+ }
1487
+ }
1488
+ function numberArray(values) {
1489
+ if (Array.isArray(values) && values.every((v) => typeof v === "number")) {
1490
+ return values;
1491
+ }
1492
+ throw new Error("Expected number array for BYTE_STREAM_SPLIT encoding");
1493
+ }
1494
+ function bigIntArray(values) {
1495
+ if (values instanceof BigInt64Array) return values;
1496
+ if (Array.isArray(values) && values.every((v) => typeof v === "bigint")) {
1497
+ return new BigInt64Array(values);
1498
+ }
1499
+ throw new Error("Expected bigint array for BYTE_STREAM_SPLIT encoding");
1500
+ }
1501
+
1502
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/datapage.js
1503
+ function writeDataPageV2({ writer, column, encoding, pageData }) {
1504
+ const { columnName, element, codec, compressors } = column;
1505
+ const { type, type_length, repetition_type } = element;
1506
+ if (!type) throw new Error(`column ${columnName} cannot determine type`);
1507
+ if (repetition_type === "REPEATED") throw new Error(`column ${columnName} repeated types not supported`);
1508
+ const levelWriter = new ByteWriter();
1509
+ const {
1510
+ definition_levels_byte_length,
1511
+ repetition_levels_byte_length,
1512
+ num_nulls,
1513
+ num_values,
1514
+ num_rows
1515
+ } = writeLevels(levelWriter, column, pageData);
1516
+ const nonnull = num_nulls ? pageData.values.filter((v) => v !== null && v !== void 0) : pageData.values;
1517
+ const page = new ByteWriter();
1518
+ if (encoding === "PLAIN") {
1519
+ writePlain(page, nonnull, type, type_length);
1520
+ } else if (encoding === "RLE") {
1521
+ if (type !== "BOOLEAN") throw new Error("RLE encoding only supported for BOOLEAN type");
1522
+ const rleData = new ByteWriter();
1523
+ writeRleBitPackedHybrid(rleData, nonnull, 1);
1524
+ page.appendUint32(rleData.offset);
1525
+ page.appendBytes(rleData.getBytes());
1526
+ } else if (encoding === "PLAIN_DICTIONARY" || encoding === "RLE_DICTIONARY") {
1527
+ let maxValue = 0;
1528
+ for (const v of nonnull) if (v > maxValue) maxValue = v;
1529
+ const bitWidth2 = Math.ceil(Math.log2(maxValue + 1));
1530
+ page.appendUint8(bitWidth2);
1531
+ writeRleBitPackedHybrid(page, nonnull, bitWidth2);
1532
+ } else if (encoding === "DELTA_BINARY_PACKED") {
1533
+ if (type !== "INT32" && type !== "INT64") {
1534
+ throw new Error("DELTA_BINARY_PACKED encoding only supported for INT32 and INT64 types");
1535
+ }
1536
+ deltaBinaryPack(page, nonnull);
1537
+ } else if (encoding === "DELTA_LENGTH_BYTE_ARRAY") {
1538
+ if (type !== "BYTE_ARRAY") {
1539
+ throw new Error("DELTA_LENGTH_BYTE_ARRAY encoding only supported for BYTE_ARRAY type");
1540
+ }
1541
+ deltaLengthByteArray(page, nonnull);
1542
+ } else if (encoding === "DELTA_BYTE_ARRAY") {
1543
+ if (type !== "BYTE_ARRAY") {
1544
+ throw new Error("DELTA_BYTE_ARRAY encoding only supported for BYTE_ARRAY type");
1545
+ }
1546
+ deltaByteArray(page, nonnull);
1547
+ } else if (encoding === "BYTE_STREAM_SPLIT") {
1548
+ writeByteStreamSplit(page, nonnull, type, type_length);
1549
+ } else {
1550
+ throw new Error(`parquet unsupported encoding: ${encoding}`);
1551
+ }
1552
+ const pageBytes = page.getBytes();
1553
+ const compressedBytes = compressors[codec]?.(pageBytes) ?? pageBytes;
1554
+ writePageHeader(writer, {
1555
+ type: "DATA_PAGE_V2",
1556
+ uncompressed_page_size: levelWriter.offset + page.offset,
1557
+ compressed_page_size: levelWriter.offset + compressedBytes.length,
1558
+ data_page_header_v2: {
1559
+ num_values,
1560
+ num_nulls,
1561
+ num_rows,
1562
+ encoding,
1563
+ definition_levels_byte_length,
1564
+ repetition_levels_byte_length,
1565
+ is_compressed: !!codec
1566
+ // is there benefit to page statistics here?
1567
+ }
1568
+ });
1569
+ writer.appendBytes(levelWriter.getBytes());
1570
+ writer.appendBytes(compressedBytes);
1571
+ }
1572
+ function writePageHeader(writer, header) {
1573
+ const compact = {
1574
+ field_1: PageTypes.indexOf(header.type),
1575
+ field_2: header.uncompressed_page_size,
1576
+ field_3: header.compressed_page_size,
1577
+ field_4: header.crc,
1578
+ field_5: header.data_page_header && {
1579
+ field_1: header.data_page_header.num_values,
1580
+ field_2: Encodings.indexOf(header.data_page_header.encoding),
1581
+ field_3: Encodings.indexOf(header.data_page_header.definition_level_encoding),
1582
+ field_4: Encodings.indexOf(header.data_page_header.repetition_level_encoding)
1583
+ // field_5: header.data_page_header.statistics,
1584
+ },
1585
+ field_7: header.dictionary_page_header && {
1586
+ field_1: header.dictionary_page_header.num_values,
1587
+ field_2: Encodings.indexOf(header.dictionary_page_header.encoding)
1588
+ },
1589
+ field_8: header.data_page_header_v2 && {
1590
+ field_1: header.data_page_header_v2.num_values,
1591
+ field_2: header.data_page_header_v2.num_nulls,
1592
+ field_3: header.data_page_header_v2.num_rows,
1593
+ field_4: Encodings.indexOf(header.data_page_header_v2.encoding),
1594
+ field_5: header.data_page_header_v2.definition_levels_byte_length,
1595
+ field_6: header.data_page_header_v2.repetition_levels_byte_length,
1596
+ field_7: header.data_page_header_v2.is_compressed ? void 0 : false
1597
+ // default true
1598
+ }
1599
+ };
1600
+ serializeTCompactProtocol(writer, compact);
1601
+ }
1602
+ function writeLevels(writer, column, dataPage) {
1603
+ const { schemaPath } = column;
1604
+ const { values, definitionLevels, repetitionLevels, maxDefinitionLevel } = dataPage;
1605
+ const num_values = definitionLevels.length || values.length;
1606
+ let num_nulls = 0;
1607
+ let num_rows = 0;
1608
+ if (repetitionLevels.length) {
1609
+ for (let i = 0; i < repetitionLevels.length; i++) {
1610
+ if (repetitionLevels[i] === 0) num_rows++;
1611
+ }
1612
+ } else {
1613
+ num_rows = values.length;
1614
+ }
1615
+ if (definitionLevels.length) {
1616
+ for (let i = 0; i < definitionLevels.length; i++) {
1617
+ if (definitionLevels[i] < maxDefinitionLevel) num_nulls++;
1618
+ }
1619
+ }
1620
+ const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath);
1621
+ let repetition_levels_byte_length = 0;
1622
+ if (maxRepetitionLevel) {
1623
+ const bitWidth2 = Math.ceil(Math.log2(maxRepetitionLevel + 1));
1624
+ repetition_levels_byte_length = writeRleBitPackedHybrid(writer, repetitionLevels, bitWidth2);
1625
+ }
1626
+ let definition_levels_byte_length = 0;
1627
+ if (maxDefinitionLevel) {
1628
+ const bitWidth2 = Math.ceil(Math.log2(maxDefinitionLevel + 1));
1629
+ definition_levels_byte_length = writeRleBitPackedHybrid(writer, definitionLevels, bitWidth2);
1630
+ }
1631
+ return { definition_levels_byte_length, repetition_levels_byte_length, num_values, num_nulls, num_rows };
1632
+ }
1633
+
1634
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/dictionary.js
1635
+ function estimateValueSize(value, type, type_length) {
1636
+ if (value === null || value === void 0) return 0;
1637
+ if (type === "BOOLEAN") return 0.125;
1638
+ if (type === "INT32" || type === "FLOAT") return 4;
1639
+ if (type === "INT64" || type === "DOUBLE") return 8;
1640
+ if (type === "INT96") return 12;
1641
+ if (type === "FIXED_LEN_BYTE_ARRAY") return type_length ?? 0;
1642
+ if (type === "BYTE_ARRAY") {
1643
+ if (value instanceof Uint8Array) return value.byteLength;
1644
+ if (typeof value === "string") return value.length;
1645
+ }
1646
+ return 0;
1647
+ }
1648
+ function hashBytes(bytes) {
1649
+ let h = 2166136261;
1650
+ for (let i = 0; i < bytes.length; i++) {
1651
+ h ^= bytes[i];
1652
+ h = Math.imul(h, 16777619);
1653
+ }
1654
+ return h >>> 0;
1655
+ }
1656
+ function bytesEqual(a, b) {
1657
+ if (a.length !== b.length) return false;
1658
+ for (let i = 0; i < a.length; i++) {
1659
+ if (a[i] !== b[i]) return false;
1660
+ }
1661
+ return true;
1662
+ }
1663
+ function useDictionary(values, type, type_length, encoding, pageSize) {
1664
+ if (encoding && encoding !== "RLE_DICTIONARY") return {};
1665
+ if (type === "BOOLEAN") return {};
1666
+ const sample = values.slice(0, 1e3);
1667
+ const sampleKeys = /* @__PURE__ */ new Set();
1668
+ for (const value of sample) {
1669
+ sampleKeys.add(value instanceof Uint8Array ? hashBytes(value) : value);
1670
+ }
1671
+ if (sampleKeys.size === 0 || sampleKeys.size / sample.length > 0.5) return {};
1672
+ const dictionary = [];
1673
+ const indexes = new Array(values.length);
1674
+ const valueIndex = /* @__PURE__ */ new Map();
1675
+ const hashBuckets = /* @__PURE__ */ new Map();
1676
+ let dictSize = 0;
1677
+ for (let i = 0; i < values.length; i++) {
1678
+ const value = values[i];
1679
+ if (value === null || value === void 0) continue;
1680
+ let index;
1681
+ if (value instanceof Uint8Array) {
1682
+ const hash = hashBytes(value);
1683
+ const bucket = hashBuckets.get(hash);
1684
+ if (bucket) {
1685
+ for (const j of bucket) {
1686
+ if (bytesEqual(dictionary[j], value)) {
1687
+ index = j;
1688
+ break;
1689
+ }
1690
+ }
1691
+ }
1692
+ if (index === void 0) {
1693
+ dictSize += value.byteLength;
1694
+ if (pageSize && dictSize > pageSize) return {};
1695
+ index = dictionary.length;
1696
+ dictionary.push(value);
1697
+ if (bucket) bucket.push(index);
1698
+ else hashBuckets.set(hash, [index]);
1699
+ }
1700
+ } else {
1701
+ index = valueIndex.get(value);
1702
+ if (index === void 0) {
1703
+ dictSize += estimateValueSize(value, type, type_length);
1704
+ if (pageSize && dictSize > pageSize) return {};
1705
+ index = dictionary.length;
1706
+ dictionary.push(value);
1707
+ valueIndex.set(value, index);
1708
+ }
1709
+ }
1710
+ indexes[i] = index;
1711
+ }
1712
+ return { dictionary, indexes };
1713
+ }
1714
+ function writeDictionaryPage(writer, column, dictionary) {
1715
+ const { element, codec, compressors } = column;
1716
+ const { type, type_length } = element;
1717
+ if (!type) throw new Error(`column ${column.columnName} cannot determine type`);
1718
+ const dictionaryPage = new ByteWriter();
1719
+ writePlain(dictionaryPage, dictionary, type, type_length);
1720
+ const dictionaryBytes = dictionaryPage.getBytes();
1721
+ const compressedBytes = compressors[codec]?.(dictionaryBytes) ?? dictionaryBytes;
1722
+ writePageHeader(writer, {
1723
+ type: "DICTIONARY_PAGE",
1724
+ uncompressed_page_size: dictionaryBytes.byteLength,
1725
+ compressed_page_size: compressedBytes.byteLength,
1726
+ dictionary_page_header: {
1727
+ num_values: dictionary.length,
1728
+ encoding: "PLAIN"
1729
+ }
1730
+ });
1731
+ writer.appendBytes(compressedBytes);
1732
+ }
1733
+
1734
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/geospatial.js
1735
+ function geospatialStatistics(values) {
1736
+ const typeCodes = /* @__PURE__ */ new Set();
1737
+ let partial;
1738
+ for (const value of values) {
1739
+ if (value === null || value === void 0) continue;
1740
+ if (typeof value !== "object") {
1741
+ throw new Error("geospatial column expects GeoJSON geometries");
1742
+ }
1743
+ partial = extendBoundsFromGeometry(partial, value);
1744
+ typeCodes.add(geometryTypeCodeWithDimension(value));
1745
+ }
1746
+ let bbox;
1747
+ const { xmin, ymin, xmax, ymax } = partial ?? {};
1748
+ if (xmin !== void 0 && ymin !== void 0 && xmax !== void 0 && ymax !== void 0) {
1749
+ bbox = { ...partial, xmin, ymin, xmax, ymax };
1750
+ }
1751
+ if (typeCodes.size || bbox) {
1752
+ return {
1753
+ bbox,
1754
+ // Geospatial type codes of all instances, or an empty list if not known
1755
+ geospatial_types: typeCodes.size ? Array.from(typeCodes).sort((a, b) => a - b) : []
1756
+ };
1757
+ }
1758
+ }
1759
+ function extendBoundsFromGeometry(bbox, geometry) {
1760
+ if (geometry.type === "GeometryCollection") {
1761
+ for (const child of geometry.geometries || []) {
1762
+ bbox = extendBoundsFromGeometry(bbox, child);
1763
+ }
1764
+ return bbox;
1765
+ }
1766
+ return extendBoundsFromCoordinates(bbox, geometry.coordinates);
1767
+ }
1768
+ function extendBoundsFromCoordinates(bbox, coordinates) {
1769
+ if (typeof coordinates[0] === "number") {
1770
+ bbox = updateAxis(bbox, "xmin", "xmax", coordinates[0]);
1771
+ bbox = updateAxis(bbox, "ymin", "ymax", coordinates[1]);
1772
+ if (coordinates.length > 2) bbox = updateAxis(bbox, "zmin", "zmax", coordinates[2]);
1773
+ if (coordinates.length > 3) bbox = updateAxis(bbox, "mmin", "mmax", coordinates[3]);
1774
+ return bbox;
1775
+ }
1776
+ for (const child of coordinates) {
1777
+ bbox = extendBoundsFromCoordinates(bbox, child);
1778
+ }
1779
+ return bbox;
1780
+ }
1781
+ function updateAxis(bbox, minKey, maxKey, value) {
1782
+ if (value === void 0 || !Number.isFinite(value)) return bbox;
1783
+ if (!bbox) bbox = {};
1784
+ const min = bbox[minKey];
1785
+ const max = bbox[maxKey];
1786
+ if (min === void 0 || value < min) bbox[minKey] = value;
1787
+ if (max === void 0 || value > max) bbox[maxKey] = value;
1788
+ return bbox;
1789
+ }
1790
+ function geometryTypeCodeWithDimension(geometry) {
1791
+ const base = geometryTypeCodes[geometry.type];
1792
+ if (base === void 0) throw new Error(`unknown geometry type: ${geometry.type}`);
1793
+ const dim = inferGeometryDimensions(geometry);
1794
+ if (dim === 2) return base;
1795
+ if (dim === 3) return base + 1e3;
1796
+ if (dim === 4) return base + 3e3;
1797
+ throw new Error(`unsupported geometry dimensions: ${dim}`);
1798
+ }
1799
+ var geometryTypeCodes = {
1800
+ Point: 1,
1801
+ LineString: 2,
1802
+ Polygon: 3,
1803
+ MultiPoint: 4,
1804
+ MultiLineString: 5,
1805
+ MultiPolygon: 6,
1806
+ GeometryCollection: 7
1807
+ };
1808
+ function inferGeometryDimensions(geometry) {
1809
+ if (geometry.type === "GeometryCollection") {
1810
+ let maxDim = 0;
1811
+ for (const child of geometry.geometries || []) {
1812
+ maxDim = Math.max(maxDim, inferGeometryDimensions(child));
1813
+ }
1814
+ return maxDim || 2;
1815
+ }
1816
+ return inferCoordinateDimensions(geometry.coordinates);
1817
+ }
1818
+ function inferCoordinateDimensions(value) {
1819
+ if (!value.length) return 2;
1820
+ if (typeof value[0] === "number") return value.length;
1821
+ let maxDim = 0;
1822
+ for (const item of value) {
1823
+ maxDim = Math.max(maxDim, inferCoordinateDimensions(item));
1824
+ }
1825
+ return maxDim || 2;
1826
+ }
1827
+
1828
+ // ../../../node_modules/.pnpm/hyparquet@1.26.1/node_modules/hyparquet/src/utils.js
1829
+ function toJson(obj) {
1830
+ if (obj === void 0) return null;
1831
+ if (typeof obj === "bigint") return Number(obj);
1832
+ if (Object.is(obj, -0)) return 0;
1833
+ if (Array.isArray(obj)) return obj.map(toJson);
1834
+ if (obj instanceof Uint8Array) return Array.from(obj);
1835
+ if (obj instanceof Date) return obj.toISOString();
1836
+ if (obj instanceof Object) {
1837
+ const newObj = {};
1838
+ for (const key of Object.keys(obj)) {
1839
+ if (obj[key] === void 0) continue;
1840
+ newObj[key] = toJson(obj[key]);
1841
+ }
1842
+ return newObj;
1843
+ }
1844
+ return obj;
1845
+ }
1846
+
1847
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/wkb.js
1848
+ function geojsonToWkb(geometry) {
1849
+ const writer = new ByteWriter();
1850
+ writeGeometry(writer, geometry);
1851
+ return writer.getBytes();
1852
+ }
1853
+ function writeGeometry(writer, geometry) {
1854
+ if (typeof geometry !== "object") {
1855
+ throw new Error("geometry values must be GeoJSON geometries");
1856
+ }
1857
+ const typeCode = geometryTypeCode(geometry.type);
1858
+ const dim = inferGeometryDimensions2(geometry);
1859
+ let flag = 0;
1860
+ if (dim === 3) flag = 1;
1861
+ else if (dim === 4) flag = 3;
1862
+ else if (dim > 4) throw new Error(`unsupported geometry dimensions: ${dim}`);
1863
+ writer.appendUint8(1);
1864
+ writer.appendUint32(typeCode + flag * 1e3);
1865
+ if (geometry.type === "Point") {
1866
+ writePosition(writer, geometry.coordinates, dim);
1867
+ } else if (geometry.type === "LineString") {
1868
+ writeLine(writer, geometry.coordinates, dim);
1869
+ } else if (geometry.type === "Polygon") {
1870
+ writer.appendUint32(geometry.coordinates.length);
1871
+ for (const ring of geometry.coordinates) {
1872
+ writeLine(writer, ring, dim);
1873
+ }
1874
+ } else if (geometry.type === "MultiPoint") {
1875
+ writer.appendUint32(geometry.coordinates.length);
1876
+ for (const coordinates of geometry.coordinates) {
1877
+ writeGeometry(writer, { type: "Point", coordinates });
1878
+ }
1879
+ } else if (geometry.type === "MultiLineString") {
1880
+ writer.appendUint32(geometry.coordinates.length);
1881
+ for (const coordinates of geometry.coordinates) {
1882
+ writeGeometry(writer, { type: "LineString", coordinates });
1883
+ }
1884
+ } else if (geometry.type === "MultiPolygon") {
1885
+ writer.appendUint32(geometry.coordinates.length);
1886
+ for (const coordinates of geometry.coordinates) {
1887
+ writeGeometry(writer, { type: "Polygon", coordinates });
1888
+ }
1889
+ } else if (geometry.type === "GeometryCollection") {
1890
+ writer.appendUint32(geometry.geometries.length);
1891
+ for (const child of geometry.geometries) {
1892
+ writeGeometry(writer, child);
1893
+ }
1894
+ } else {
1895
+ throw new Error("unsupported geometry type");
1896
+ }
1897
+ }
1898
+ function writePosition(writer, position, dim) {
1899
+ if (position.length < dim) {
1900
+ throw new Error("geometry position dimensions mismatch");
1901
+ }
1902
+ for (let i = 0; i < dim; i++) {
1903
+ writer.appendFloat64(position[i]);
1904
+ }
1905
+ }
1906
+ function writeLine(writer, coordinates, dim) {
1907
+ writer.appendUint32(coordinates.length);
1908
+ for (const position of coordinates) {
1909
+ writePosition(writer, position, dim);
1910
+ }
1911
+ }
1912
+ function geometryTypeCode(type) {
1913
+ if (type === "Point") return 1;
1914
+ if (type === "LineString") return 2;
1915
+ if (type === "Polygon") return 3;
1916
+ if (type === "MultiPoint") return 4;
1917
+ if (type === "MultiLineString") return 5;
1918
+ if (type === "MultiPolygon") return 6;
1919
+ if (type === "GeometryCollection") return 7;
1920
+ throw new Error(`unknown geometry type: ${type}`);
1921
+ }
1922
+ function inferGeometryDimensions2(geometry) {
1923
+ if (geometry.type === "GeometryCollection") {
1924
+ let maxDim = 0;
1925
+ for (const child of geometry.geometries) {
1926
+ maxDim = Math.max(maxDim, inferGeometryDimensions2(child));
1927
+ }
1928
+ return maxDim || 2;
1929
+ }
1930
+ return inferCoordinateDimensions2(geometry.coordinates);
1931
+ }
1932
+ function inferCoordinateDimensions2(value) {
1933
+ if (!Array.isArray(value)) return 2;
1934
+ if (!value.length) return 2;
1935
+ if (typeof value[0] === "number") return value.length;
1936
+ let maxDim = 0;
1937
+ for (const item of value) {
1938
+ maxDim = Math.max(maxDim, inferCoordinateDimensions2(item));
1939
+ }
1940
+ return maxDim || 2;
1941
+ }
1942
+
1943
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/unconvert.js
1944
+ var dayMillis = 864e5;
1945
+ function unconvert(element, values) {
1946
+ const { type, converted_type: ctype, logical_type: ltype } = element;
1947
+ if (ctype === "DECIMAL") {
1948
+ const factor = 10 ** (element.scale || 0);
1949
+ return values.map((v) => {
1950
+ if (v === null || v === void 0) return v;
1951
+ if (typeof v !== "number") throw new Error("DECIMAL must be a number");
1952
+ return unconvertDecimal(element, BigInt(Math.round(v * factor)));
1953
+ });
1954
+ }
1955
+ if (ctype === "DATE") {
1956
+ return Array.from(values).map((v) => {
1957
+ if (v instanceof Date) return Math.floor(v.getTime() / dayMillis);
1958
+ return v;
1959
+ });
1960
+ }
1961
+ if (ctype === "TIMESTAMP_MILLIS") {
1962
+ return Array.from(values).map((v) => {
1963
+ if (v === null || v === void 0) return v;
1964
+ if (v instanceof Date) return BigInt(v.getTime());
1965
+ return BigInt(v);
1966
+ });
1967
+ }
1968
+ if (ctype === "TIMESTAMP_MICROS") {
1969
+ return Array.from(values).map((v) => {
1970
+ if (v === null || v === void 0) return v;
1971
+ if (v instanceof Date) return BigInt(v.getTime() * 1e3);
1972
+ return BigInt(v);
1973
+ });
1974
+ }
1975
+ if (ctype === "JSON") {
1976
+ if (!Array.isArray(values)) throw new Error("JSON must be an array");
1977
+ const encoder2 = new TextEncoder();
1978
+ return values.map((v) => v === void 0 ? void 0 : encoder2.encode(JSON.stringify(toJson(v))));
1979
+ }
1980
+ if (ctype === "UTF8") {
1981
+ if (!Array.isArray(values)) throw new Error("strings must be an array");
1982
+ const encoder2 = new TextEncoder();
1983
+ return values.map((v) => typeof v === "string" ? encoder2.encode(v) : v);
1984
+ }
1985
+ if (ctype === "UINT_32" || ltype?.type === "INTEGER" && ltype.bitWidth === 32 && !ltype.isSigned) {
1986
+ if (values instanceof Uint32Array) return values;
1987
+ if (values instanceof Int32Array) return new Uint32Array(values.buffer, values.byteOffset, values.length);
1988
+ return Array.from(values).map((v) => {
1989
+ if (v === null || v === void 0) return v;
1990
+ if (!Number.isSafeInteger(v)) throw new Error("expected integer value, got " + v);
1991
+ if (v < 0 || v > 4294967295) throw new Error("expected uint32 value, got " + v);
1992
+ if (v > 2147483647) return v - 4294967296;
1993
+ return v;
1994
+ });
1995
+ }
1996
+ if (ltype?.type === "FLOAT16") {
1997
+ if (type !== "FIXED_LEN_BYTE_ARRAY") throw new Error("FLOAT16 must be FIXED_LEN_BYTE_ARRAY type");
1998
+ if (element.type_length !== 2) throw new Error("FLOAT16 expected type_length to be 2 bytes");
1999
+ return Array.from(values).map(unconvertFloat16);
2000
+ }
2001
+ if (ltype?.type === "UUID") {
2002
+ if (!Array.isArray(values)) throw new Error("UUID must be an array");
2003
+ if (type !== "FIXED_LEN_BYTE_ARRAY") throw new Error("UUID must be FIXED_LEN_BYTE_ARRAY type");
2004
+ if (element.type_length !== 16) throw new Error("UUID expected type_length to be 16 bytes");
2005
+ return values.map(unconvertUuid);
2006
+ }
2007
+ if (ltype?.type === "TIMESTAMP") {
2008
+ return Array.from(values).map((v) => {
2009
+ if (v === null || v === void 0) return v;
2010
+ if (v instanceof Date) {
2011
+ const millis = BigInt(v.getTime());
2012
+ if (ltype.unit === "NANOS") return millis * 1000000n;
2013
+ if (ltype.unit === "MICROS") return millis * 1000n;
2014
+ return millis;
2015
+ }
2016
+ return BigInt(v);
2017
+ });
2018
+ }
2019
+ if (ltype?.type === "GEOMETRY" || ltype?.type === "GEOGRAPHY") {
2020
+ if (!Array.isArray(values)) throw new Error("geometry must be an array");
2021
+ return values.map((v) => {
2022
+ if (v === null || v === void 0) return v;
2023
+ return geojsonToWkb(v);
2024
+ });
2025
+ }
2026
+ return values;
2027
+ }
2028
+ function unconvertUuid(value) {
2029
+ if (value === void 0 || value === null) return;
2030
+ if (value instanceof Uint8Array) return value;
2031
+ if (typeof value === "string") {
2032
+ const uuidRegex = /^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$/i;
2033
+ if (!uuidRegex.test(value)) {
2034
+ throw new Error("UUID must be a valid UUID string");
2035
+ }
2036
+ value = value.replace(/-/g, "").toLowerCase();
2037
+ const bytes = new Uint8Array(16);
2038
+ for (let i = 0; i < 16; i++) {
2039
+ bytes[i] = parseInt(value.slice(i * 2, i * 2 + 2), 16);
2040
+ }
2041
+ return bytes;
2042
+ }
2043
+ throw new Error("UUID must be a string or Uint8Array");
2044
+ }
2045
+ var STATS_TRUNCATE_LENGTH = 16;
2046
+ function truncateStatistic(bytes, isMax) {
2047
+ if (bytes.length <= STATS_TRUNCATE_LENGTH) return bytes;
2048
+ const prefix = bytes.slice(0, STATS_TRUNCATE_LENGTH);
2049
+ if (!isMax) return prefix;
2050
+ let i = prefix.length - 1;
2051
+ while (i >= 0 && prefix[i] === 255) i--;
2052
+ if (i < 0) return void 0;
2053
+ const rounded = prefix.slice(0, i + 1);
2054
+ rounded[i] += 1;
2055
+ return rounded;
2056
+ }
2057
+ function minMaxIsExact(value, element) {
2058
+ if (value === void 0 || value === null) return void 0;
2059
+ const { type } = element;
2060
+ if (type !== "BYTE_ARRAY" && type !== "FIXED_LEN_BYTE_ARRAY") return void 0;
2061
+ if (element.logical_type?.type === "UUID") return void 0;
2062
+ const bytes = value instanceof Uint8Array ? value : new TextEncoder().encode(value.toString());
2063
+ return bytes.length > STATS_TRUNCATE_LENGTH ? false : void 0;
2064
+ }
2065
+ function unconvertMinMax(value, element, isMax) {
2066
+ if (value === void 0 || value === null) return void 0;
2067
+ const { type, converted_type } = element;
2068
+ if (type === "BOOLEAN") return new Uint8Array([value ? 1 : 0]);
2069
+ if (element.logical_type?.type === "UUID" && (typeof value === "string" || value instanceof Uint8Array)) {
2070
+ return unconvertUuid(value);
2071
+ }
2072
+ if (converted_type === "DECIMAL") {
2073
+ if (typeof value !== "number") throw new Error("DECIMAL must be a number");
2074
+ const factor = 10 ** (element.scale || 0);
2075
+ const out = unconvertDecimal(element, BigInt(Math.round(value * factor)));
2076
+ if (out instanceof Uint8Array) return out;
2077
+ if (typeof out === "number") {
2078
+ const buffer = new ArrayBuffer(4);
2079
+ new DataView(buffer).setFloat32(0, out, true);
2080
+ return new Uint8Array(buffer);
2081
+ }
2082
+ if (typeof out === "bigint") {
2083
+ const buffer = new ArrayBuffer(8);
2084
+ new DataView(buffer).setBigInt64(0, out, true);
2085
+ return new Uint8Array(buffer);
2086
+ }
2087
+ }
2088
+ if (type === "BYTE_ARRAY" || type === "FIXED_LEN_BYTE_ARRAY") {
2089
+ const bytes = value instanceof Uint8Array ? value : new TextEncoder().encode(value.toString());
2090
+ return truncateStatistic(bytes, isMax);
2091
+ }
2092
+ if (type === "FLOAT" && typeof value === "number") {
2093
+ const buffer = new ArrayBuffer(4);
2094
+ new DataView(buffer).setFloat32(0, value, true);
2095
+ return new Uint8Array(buffer);
2096
+ }
2097
+ if (type === "DOUBLE" && typeof value === "number") {
2098
+ const buffer = new ArrayBuffer(8);
2099
+ new DataView(buffer).setFloat64(0, value, true);
2100
+ return new Uint8Array(buffer);
2101
+ }
2102
+ if (type === "INT32" && typeof value === "number") {
2103
+ const buffer = new ArrayBuffer(4);
2104
+ new DataView(buffer).setInt32(0, value, true);
2105
+ return new Uint8Array(buffer);
2106
+ }
2107
+ if (type === "INT64" && typeof value === "bigint") {
2108
+ const buffer = new ArrayBuffer(8);
2109
+ new DataView(buffer).setBigInt64(0, value, true);
2110
+ return new Uint8Array(buffer);
2111
+ }
2112
+ if (type === "INT32" && converted_type === "DATE" && value instanceof Date) {
2113
+ const buffer = new ArrayBuffer(4);
2114
+ new DataView(buffer).setInt32(0, Math.floor(value.getTime() / dayMillis), true);
2115
+ return new Uint8Array(buffer);
2116
+ }
2117
+ if (type === "INT64" && converted_type === "TIMESTAMP_MILLIS" && value instanceof Date) {
2118
+ const buffer = new ArrayBuffer(8);
2119
+ new DataView(buffer).setBigInt64(0, BigInt(value.getTime()), true);
2120
+ return new Uint8Array(buffer);
2121
+ }
2122
+ if (type === "INT64" && converted_type === "TIMESTAMP_MICROS" && value instanceof Date) {
2123
+ const buffer = new ArrayBuffer(8);
2124
+ new DataView(buffer).setBigInt64(0, BigInt(value.getTime() * 1e3), true);
2125
+ return new Uint8Array(buffer);
2126
+ }
2127
+ if (type === "INT64" && element.logical_type?.type === "TIMESTAMP" && value instanceof Date) {
2128
+ const millis = BigInt(value.getTime());
2129
+ const { unit } = element.logical_type;
2130
+ let bigintValue = millis;
2131
+ if (unit === "NANOS") bigintValue = millis * 1000000n;
2132
+ else if (unit === "MICROS") bigintValue = millis * 1000n;
2133
+ const buffer = new ArrayBuffer(8);
2134
+ new DataView(buffer).setBigInt64(0, bigintValue, true);
2135
+ return new Uint8Array(buffer);
2136
+ }
2137
+ throw new Error(`unsupported type for statistics: ${type} with value ${value}`);
2138
+ }
2139
+ function unconvertStatistics(stats, element) {
2140
+ return {
2141
+ field_1: unconvertMinMax(stats.max, element, true),
2142
+ field_2: unconvertMinMax(stats.min, element, false),
2143
+ field_3: stats.null_count,
2144
+ field_4: stats.distinct_count,
2145
+ field_5: unconvertMinMax(stats.max_value, element, true),
2146
+ field_6: unconvertMinMax(stats.min_value, element, false),
2147
+ field_7: stats.is_max_value_exact ?? minMaxIsExact(stats.max_value ?? stats.max, element),
2148
+ field_8: stats.is_min_value_exact ?? minMaxIsExact(stats.min_value ?? stats.min, element)
2149
+ };
2150
+ }
2151
+ function unconvertDecimal({ type, type_length }, value) {
2152
+ if (type === "INT32") return Number(value);
2153
+ if (type === "INT64") return value;
2154
+ if (type === "FIXED_LEN_BYTE_ARRAY" && !type_length) {
2155
+ throw new Error("fixed length byte array type_length is required");
2156
+ }
2157
+ if (!type_length && !value) return new Uint8Array();
2158
+ const bytes = [];
2159
+ while (true) {
2160
+ const byte = Number(value & 0xffn);
2161
+ bytes.unshift(byte);
2162
+ value >>= 8n;
2163
+ if (type_length) {
2164
+ if (bytes.length >= type_length) break;
2165
+ } else {
2166
+ const sign = byte & 128;
2167
+ if (!sign && value === 0n || sign && value === -1n) {
2168
+ break;
2169
+ }
2170
+ }
2171
+ }
2172
+ return new Uint8Array(bytes);
2173
+ }
2174
+ function unconvertFloat16(value) {
2175
+ if (value === void 0 || value === null) return;
2176
+ if (typeof value !== "number") throw new Error("parquet float16 expected number value");
2177
+ if (Number.isNaN(value)) return new Uint8Array([0, 126]);
2178
+ const sign = value < 0 || Object.is(value, -0) ? 1 : 0;
2179
+ const abs = Math.abs(value);
2180
+ if (!isFinite(abs)) return new Uint8Array([0, sign << 7 | 124]);
2181
+ if (abs === 0) return new Uint8Array([0, sign << 7]);
2182
+ const buf = new ArrayBuffer(4);
2183
+ new Float32Array(buf)[0] = abs;
2184
+ const bits32 = new Uint32Array(buf)[0];
2185
+ let exp32 = bits32 >>> 23 & 255;
2186
+ let mant32 = bits32 & 8388607;
2187
+ exp32 -= 127;
2188
+ if (exp32 < -14) {
2189
+ const shift = -14 - exp32;
2190
+ mant32 = (mant32 | 8388608) >> shift + 13;
2191
+ if (mant32 & 1) mant32 += 1;
2192
+ const bits162 = sign << 15 | mant32;
2193
+ return new Uint8Array([bits162 & 255, bits162 >> 8]);
2194
+ }
2195
+ if (exp32 > 15) return new Uint8Array([0, sign << 7 | 124]);
2196
+ let exp16 = exp32 + 15;
2197
+ mant32 = mant32 + 4096;
2198
+ if (mant32 & 8388608) {
2199
+ mant32 = 0;
2200
+ if (++exp16 === 31)
2201
+ return new Uint8Array([0, sign << 7 | 124]);
2202
+ }
2203
+ const bits16 = sign << 15 | exp16 << 10 | mant32 >> 13;
2204
+ return new Uint8Array([bits16 & 255, bits16 >> 8]);
2205
+ }
2206
+
2207
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/column.js
2208
+ function writeColumn({ writer, column, pageData }) {
2209
+ const { columnName, element, schemaPath, stats, pageSize, encoding: userEncoding } = column;
2210
+ const { type, type_length } = element;
2211
+ if (!type) throw new Error(`column ${columnName} cannot determine type`);
2212
+ const { values, definitionLevels, repetitionLevels, maxDefinitionLevel } = pageData;
2213
+ const offsetStart = writer.offset;
2214
+ const encodings = [];
2215
+ const isGeospatial = element?.logical_type?.type === "GEOMETRY" || element?.logical_type?.type === "GEOGRAPHY";
2216
+ const statistics = stats ? getStatistics(values) : void 0;
2217
+ const geospatial_statistics = stats && isGeospatial ? geospatialStatistics(values) : void 0;
2218
+ let bloomFilter;
2219
+ if (column.bloomFilter) {
2220
+ const opts = typeof column.bloomFilter === "object" ? column.bloomFilter : void 0;
2221
+ const builder = new BloomBuilder(element, opts);
2222
+ for (const v of values) builder.insert(v);
2223
+ bloomFilter = builder.finalize();
2224
+ }
2225
+ let dictionary_page_offset;
2226
+ const { dictionary, indexes } = useDictionary(values, type, type_length, userEncoding, pageSize);
2227
+ let encoding;
2228
+ let writeValues;
2229
+ let writeType = type;
2230
+ if (dictionary && indexes) {
2231
+ writeValues = indexes;
2232
+ writeType = "INT32";
2233
+ encoding = "RLE_DICTIONARY";
2234
+ dictionary_page_offset = BigInt(writer.offset);
2235
+ const unconverted = unconvert(element, dictionary);
2236
+ writeDictionaryPage(writer, column, unconverted);
2237
+ } else {
2238
+ writeValues = unconvert(element, values);
2239
+ encoding = userEncoding ?? (type === "BOOLEAN" && values.length > 16 ? "RLE" : "PLAIN");
2240
+ }
2241
+ encodings.push(encoding);
2242
+ const pageBoundaries = getPageBoundaries(writeValues, writeType, type_length, pageSize);
2243
+ const columnIndex = column.columnIndex && pageBoundaries.length > 1 ? {
2244
+ null_pages: [],
2245
+ min_values: [],
2246
+ max_values: [],
2247
+ boundary_order: "UNORDERED",
2248
+ null_counts: []
2249
+ } : void 0;
2250
+ const offsetIndex = column.offsetIndex && pageBoundaries.length > 1 ? {
2251
+ page_locations: []
2252
+ } : void 0;
2253
+ const data_page_offset = BigInt(writer.offset);
2254
+ let first_row_index = 0n;
2255
+ let prevStart = 0;
2256
+ let prevMinValue;
2257
+ let prevMaxValue;
2258
+ let ascending = true;
2259
+ let descending = true;
2260
+ for (const { start, end } of pageBoundaries) {
2261
+ const pageOffset = writer.offset;
2262
+ const pageChunk = {
2263
+ values: writeValues.slice(start, end),
2264
+ definitionLevels: definitionLevels.slice(start, end),
2265
+ repetitionLevels: repetitionLevels.slice(start, end),
2266
+ maxDefinitionLevel
2267
+ };
2268
+ writeDataPageV2({ writer, column, encoding, pageData: pageChunk });
2269
+ if (columnIndex) {
2270
+ const pageValues = values.slice(start, end);
2271
+ const { min_value, max_value, null_count = 0n } = getStatistics(pageValues);
2272
+ columnIndex.null_pages.push(null_count === BigInt(end - start));
2273
+ columnIndex.min_values.push(unconvertMinMax(min_value, element, false) ?? new Uint8Array());
2274
+ columnIndex.max_values.push(unconvertMinMax(max_value, element, true) ?? new Uint8Array());
2275
+ columnIndex.null_counts?.push(null_count);
2276
+ if (prevMinValue !== void 0 && min_value !== void 0) {
2277
+ if (prevMinValue > min_value) ascending = false;
2278
+ if (prevMinValue < min_value) descending = false;
2279
+ }
2280
+ if (prevMaxValue !== void 0 && max_value !== void 0) {
2281
+ if (prevMaxValue > max_value) ascending = false;
2282
+ if (prevMaxValue < max_value) descending = false;
2283
+ }
2284
+ prevMinValue = min_value;
2285
+ prevMaxValue = max_value;
2286
+ }
2287
+ if (offsetIndex) {
2288
+ if (repetitionLevels.length) {
2289
+ for (let i = prevStart + 1; i <= start; i++) {
2290
+ if (repetitionLevels[i] === 0) first_row_index++;
2291
+ }
2292
+ } else {
2293
+ first_row_index = BigInt(start);
2294
+ }
2295
+ offsetIndex.page_locations.push({
2296
+ offset: BigInt(pageOffset),
2297
+ compressed_page_size: writer.offset - pageOffset,
2298
+ first_row_index
2299
+ });
2300
+ }
2301
+ prevStart = start;
2302
+ }
2303
+ if (columnIndex) {
2304
+ if (ascending) columnIndex.boundary_order = "ASCENDING";
2305
+ else if (descending) columnIndex.boundary_order = "DESCENDING";
2306
+ }
2307
+ let encoding_stats;
2308
+ if (stats) {
2309
+ encoding_stats = [];
2310
+ if (dictionary_page_offset !== void 0) {
2311
+ encoding_stats.push({ page_type: "DICTIONARY_PAGE", encoding: "PLAIN", count: 1 });
2312
+ }
2313
+ encoding_stats.push({ page_type: "DATA_PAGE_V2", encoding, count: pageBoundaries.length });
2314
+ }
2315
+ return {
2316
+ chunk: {
2317
+ meta_data: {
2318
+ type,
2319
+ encodings,
2320
+ path_in_schema: schemaPath.slice(1).map((s) => s.name),
2321
+ codec: column.codec ?? "UNCOMPRESSED",
2322
+ num_values: BigInt(values.length),
2323
+ total_compressed_size: BigInt(writer.offset - offsetStart),
2324
+ total_uncompressed_size: BigInt(writer.offset - offsetStart),
2325
+ // TODO: uncompressed pages + headers
2326
+ data_page_offset,
2327
+ dictionary_page_offset,
2328
+ statistics,
2329
+ encoding_stats,
2330
+ geospatial_statistics
2331
+ },
2332
+ file_offset: BigInt(offsetStart)
2333
+ },
2334
+ columnIndex,
2335
+ offsetIndex,
2336
+ bloomFilter
2337
+ };
2338
+ }
2339
+ function getPageBoundaries(values, type, type_length, pageSize) {
2340
+ if (!pageSize) {
2341
+ return [{ start: 0, end: values.length }];
2342
+ }
2343
+ const boundaries = [];
2344
+ let start = 0;
2345
+ let accumulatedBytes = 0;
2346
+ for (let i = 0; i < values.length; i++) {
2347
+ const valueSize = estimateValueSize(values[i], type, type_length);
2348
+ accumulatedBytes += valueSize;
2349
+ if (accumulatedBytes >= pageSize && i > start) {
2350
+ boundaries.push({ start, end: i });
2351
+ start = i;
2352
+ accumulatedBytes = valueSize;
2353
+ }
2354
+ }
2355
+ if (start < values.length) {
2356
+ boundaries.push({ start, end: values.length });
2357
+ }
2358
+ return boundaries;
2359
+ }
2360
+ function getStatistics(values) {
2361
+ let min_value = void 0;
2362
+ let max_value = void 0;
2363
+ let null_count = 0n;
2364
+ for (const value of values) {
2365
+ if (value === null || value === void 0) {
2366
+ null_count++;
2367
+ continue;
2368
+ }
2369
+ if (typeof value === "object") continue;
2370
+ if (typeof value === "number" && Number.isNaN(value)) continue;
2371
+ if (min_value === void 0 || value < min_value) min_value = value;
2372
+ if (max_value === void 0 || value > max_value) max_value = value;
2373
+ }
2374
+ if (min_value === 0) min_value = -0;
2375
+ if (max_value === 0) max_value = 0;
2376
+ return { min_value, max_value, null_count };
2377
+ }
2378
+
2379
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/dremel.js
2380
+ function encodeNestedValues(treePath, rows) {
2381
+ const schemaPath = treePath.map((n) => n.element);
2382
+ if (treePath.length < 2) throw new Error("parquet schema path must include column");
2383
+ const definitionLevels = [];
2384
+ const repetitionLevels = [];
2385
+ const maxDefinitionLevel = getMaxDefinitionLevel(treePath);
2386
+ if (treePath.length === 2 && maxDefinitionLevel === 0) {
2387
+ return { values: rows, definitionLevels, repetitionLevels, maxDefinitionLevel };
2388
+ }
2389
+ if (treePath.length === 2 && maxDefinitionLevel === 1) {
2390
+ const definitionLevels2 = new Array(rows.length);
2391
+ for (let i = 0; i < rows.length; i++) {
2392
+ definitionLevels2[i] = rows[i] === null || rows[i] === void 0 ? 0 : 1;
2393
+ }
2394
+ return { values: rows, definitionLevels: definitionLevels2, repetitionLevels, maxDefinitionLevel };
2395
+ }
2396
+ const repLevelPrior = new Array(treePath.length);
2397
+ let repeatedCount = 0;
2398
+ for (let i = 0; i < treePath.length; i++) {
2399
+ repLevelPrior[i] = repeatedCount;
2400
+ if (schemaPath[i].repetition_type === "REPEATED") repeatedCount++;
2401
+ }
2402
+ const values = [];
2403
+ for (const row of rows) {
2404
+ visit(1, row, 0, 0, false);
2405
+ }
2406
+ return { values, definitionLevels, repetitionLevels, maxDefinitionLevel };
2407
+ function visit(depth, value, defLevel, repLevel, allowNull) {
2408
+ const element = schemaPath[depth];
2409
+ const repetition = element.repetition_type || "REQUIRED";
2410
+ if (depth === treePath.length - 1) {
2411
+ if (value === null || value === void 0) {
2412
+ if (repetition === "REQUIRED" && !allowNull) {
2413
+ throw new Error("parquet required value is undefined");
2414
+ }
2415
+ definitionLevels.push(defLevel);
2416
+ } else {
2417
+ definitionLevels.push(repetition === "REQUIRED" ? defLevel : defLevel + 1);
2418
+ }
2419
+ repetitionLevels.push(repLevel);
2420
+ values.push(value);
2421
+ return;
2422
+ }
2423
+ if (repetition === "REPEATED") {
2424
+ if (value === null || value === void 0) {
2425
+ if (!allowNull) throw new Error("parquet required value is undefined");
2426
+ visit(depth + 1, void 0, defLevel, repLevel, true);
2427
+ return;
2428
+ }
2429
+ if (!Array.isArray(value)) {
2430
+ throw new Error(`parquet repeated field ${element.name} must be an array`);
2431
+ }
2432
+ if (!value.length) {
2433
+ visit(depth + 1, void 0, defLevel, repLevel, true);
2434
+ return;
2435
+ }
2436
+ const isMapEntry = isMapLike(treePath[depth - 1]);
2437
+ const childElement = schemaPath[depth + 1];
2438
+ for (let i = 0; i < value.length; i++) {
2439
+ let childValue = value[i];
2440
+ if (isMapEntry && childValue && typeof childValue === "object" && childElement) {
2441
+ childValue = childValue[childElement.name];
2442
+ }
2443
+ const childRep = i === 0 ? repLevel : repLevelPrior[depth] + 1;
2444
+ visit(depth + 1, childValue, defLevel + 1, childRep, false);
2445
+ }
2446
+ return;
2447
+ }
2448
+ if (repetition === "OPTIONAL") {
2449
+ if (value === null || value === void 0) {
2450
+ visit(depth + 1, void 0, defLevel, repLevel, true);
2451
+ } else {
2452
+ const childValue = getChildValue(depth, value);
2453
+ const childIsNull = childValue === null || childValue === void 0;
2454
+ const isLogicalContainer = isListLike(treePath[depth]) || isMapLike(treePath[depth]);
2455
+ const isStruct = element.num_children && !element.type && !isLogicalContainer;
2456
+ const nextDef = isStruct || !childIsNull ? defLevel + 1 : defLevel;
2457
+ visit(depth + 1, childValue, nextDef, repLevel, childIsNull);
2458
+ }
2459
+ return;
2460
+ }
2461
+ if (value === null || value === void 0) {
2462
+ if (!allowNull) throw new Error("parquet required value is undefined");
2463
+ visit(depth + 1, void 0, defLevel, repLevel, true);
2464
+ } else {
2465
+ visit(depth + 1, getChildValue(depth, value), defLevel, repLevel, false);
2466
+ }
2467
+ }
2468
+ function getChildValue(depth, currentValue) {
2469
+ if (currentValue === null || currentValue === void 0) return void 0;
2470
+ const child = schemaPath[depth + 1];
2471
+ if (!child) return void 0;
2472
+ if (isListLike(treePath[depth])) return currentValue;
2473
+ if (isMapLike(treePath[depth])) {
2474
+ return normalizeMap(currentValue, schemaPath[depth]);
2475
+ }
2476
+ if (typeof currentValue === "object" && !Array.isArray(currentValue)) {
2477
+ return currentValue[child.name];
2478
+ }
2479
+ throw new Error(`parquet expected struct, got ${currentValue}`);
2480
+ }
2481
+ }
2482
+ function normalizeMap(value, element) {
2483
+ if (value instanceof Map) {
2484
+ return Array.from(value.entries(), ([k, v]) => ({ key: k, value: v }));
2485
+ }
2486
+ if (Array.isArray(value)) {
2487
+ return value.map((entry) => {
2488
+ if (entry && typeof entry === "object" && "key" in entry && "value" in entry) {
2489
+ return entry;
2490
+ }
2491
+ if (Array.isArray(entry) && entry.length === 2) {
2492
+ return { key: entry[0], value: entry[1] };
2493
+ }
2494
+ throw new Error("parquet map entry must provide key and value");
2495
+ });
2496
+ }
2497
+ if (typeof value === "object") {
2498
+ return Object.entries(value).map(([k, v]) => ({ key: k, value: v }));
2499
+ }
2500
+ throw new Error(`parquet map field ${element.name} must be Map, array, or object`);
2501
+ }
2502
+
2503
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/indexes.js
2504
+ function writeIndexes(writer, pageIndexes) {
2505
+ for (const { chunk, columnIndex } of pageIndexes) {
2506
+ writeColumnIndex(writer, chunk, columnIndex);
2507
+ }
2508
+ for (const { chunk, offsetIndex } of pageIndexes) {
2509
+ writeOffsetIndex(writer, chunk, offsetIndex);
2510
+ }
2511
+ }
2512
+ function writeColumnIndex(writer, columnChunk, columnIndex) {
2513
+ if (!columnIndex || columnIndex.min_values.length <= 1) return;
2514
+ const columnIndexOffset = writer.offset;
2515
+ serializeTCompactProtocol(writer, {
2516
+ field_1: columnIndex.null_pages,
2517
+ field_2: columnIndex.min_values,
2518
+ field_3: columnIndex.max_values,
2519
+ field_4: BoundaryOrders.indexOf(columnIndex.boundary_order),
2520
+ field_5: columnIndex.null_counts
2521
+ });
2522
+ columnChunk.column_index_offset = BigInt(columnIndexOffset);
2523
+ columnChunk.column_index_length = writer.offset - columnIndexOffset;
2524
+ }
2525
+ function writeOffsetIndex(writer, columnChunk, offsetIndex) {
2526
+ if (!offsetIndex || offsetIndex.page_locations.length <= 1) return;
2527
+ const offsetIndexOffset = writer.offset;
2528
+ serializeTCompactProtocol(writer, {
2529
+ field_1: offsetIndex.page_locations.map((p) => ({
2530
+ field_1: p.offset,
2531
+ field_2: p.compressed_page_size,
2532
+ field_3: p.first_row_index
2533
+ }))
2534
+ });
2535
+ columnChunk.offset_index_offset = BigInt(offsetIndexOffset);
2536
+ columnChunk.offset_index_length = writer.offset - offsetIndexOffset;
2537
+ }
2538
+
2539
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/metadata.js
2540
+ function writeMetadata(writer, metadata) {
2541
+ const compact = {
2542
+ field_1: metadata.version,
2543
+ field_2: metadata.schema.map((element) => ({
2544
+ field_1: element.type && ParquetTypes.indexOf(element.type),
2545
+ field_2: element.type_length,
2546
+ field_3: element.repetition_type && FieldRepetitionTypes.indexOf(element.repetition_type),
2547
+ field_4: element.name,
2548
+ field_5: element.num_children,
2549
+ field_6: element.converted_type && ConvertedTypes.indexOf(element.converted_type),
2550
+ field_7: element.scale,
2551
+ field_8: element.precision,
2552
+ field_9: element.field_id,
2553
+ field_10: logicalType(element.logical_type)
2554
+ })),
2555
+ field_3: metadata.num_rows,
2556
+ field_4: metadata.row_groups.map((rg) => ({
2557
+ field_1: rg.columns.map((c) => ({
2558
+ field_1: c.file_path,
2559
+ field_2: c.file_offset,
2560
+ field_3: c.meta_data && {
2561
+ field_1: ParquetTypes.indexOf(c.meta_data.type),
2562
+ field_2: c.meta_data.encodings.map((e) => Encodings.indexOf(e)),
2563
+ field_3: c.meta_data.path_in_schema,
2564
+ field_4: CompressionCodecs.indexOf(c.meta_data.codec),
2565
+ field_5: c.meta_data.num_values,
2566
+ field_6: c.meta_data.total_uncompressed_size,
2567
+ field_7: c.meta_data.total_compressed_size,
2568
+ field_8: c.meta_data.key_value_metadata && c.meta_data.key_value_metadata.map((kv) => ({
2569
+ field_1: kv.key,
2570
+ field_2: kv.value
2571
+ })),
2572
+ field_9: c.meta_data.data_page_offset,
2573
+ field_10: c.meta_data.index_page_offset,
2574
+ field_11: c.meta_data.dictionary_page_offset,
2575
+ field_12: c.meta_data.statistics && unconvertStatistics(
2576
+ c.meta_data.statistics,
2577
+ schemaElement(metadata.schema, c.meta_data.path_in_schema)
2578
+ ),
2579
+ field_13: c.meta_data.encoding_stats && c.meta_data.encoding_stats.map((es) => ({
2580
+ field_1: PageTypes.indexOf(es.page_type),
2581
+ field_2: Encodings.indexOf(es.encoding),
2582
+ field_3: es.count
2583
+ })),
2584
+ field_14: c.meta_data.bloom_filter_offset,
2585
+ field_15: c.meta_data.bloom_filter_length,
2586
+ field_16: c.meta_data.size_statistics && {
2587
+ field_1: c.meta_data.size_statistics.unencoded_byte_array_data_bytes,
2588
+ field_2: c.meta_data.size_statistics.repetition_level_histogram,
2589
+ field_3: c.meta_data.size_statistics.definition_level_histogram
2590
+ },
2591
+ field_17: c.meta_data.geospatial_statistics && {
2592
+ field_1: c.meta_data.geospatial_statistics.bbox && {
2593
+ field_1: c.meta_data.geospatial_statistics.bbox.xmin,
2594
+ field_2: c.meta_data.geospatial_statistics.bbox.xmax,
2595
+ field_3: c.meta_data.geospatial_statistics.bbox.ymin,
2596
+ field_4: c.meta_data.geospatial_statistics.bbox.ymax,
2597
+ field_5: c.meta_data.geospatial_statistics.bbox.zmin,
2598
+ field_6: c.meta_data.geospatial_statistics.bbox.zmax,
2599
+ field_7: c.meta_data.geospatial_statistics.bbox.mmin,
2600
+ field_8: c.meta_data.geospatial_statistics.bbox.mmax
2601
+ },
2602
+ field_2: c.meta_data.geospatial_statistics.geospatial_types
2603
+ }
2604
+ },
2605
+ field_4: c.offset_index_offset,
2606
+ field_5: c.offset_index_length,
2607
+ field_6: c.column_index_offset,
2608
+ field_7: c.column_index_length,
2609
+ // field_8: c.crypto_metadata,
2610
+ field_9: c.encrypted_column_metadata
2611
+ })),
2612
+ field_2: rg.total_byte_size,
2613
+ field_3: rg.num_rows,
2614
+ field_4: rg.sorting_columns && rg.sorting_columns.map((sc) => ({
2615
+ field_1: sc.column_idx,
2616
+ field_2: sc.descending,
2617
+ field_3: sc.nulls_first
2618
+ })),
2619
+ field_5: rg.file_offset,
2620
+ field_6: rg.total_compressed_size
2621
+ // field_7: rg.ordinal, // should be int16
2622
+ })),
2623
+ field_5: metadata.key_value_metadata && metadata.key_value_metadata.map((kv) => ({
2624
+ field_1: kv.key,
2625
+ field_2: kv.value
2626
+ })),
2627
+ field_6: metadata.created_by
2628
+ };
2629
+ const metadataStart = writer.offset;
2630
+ serializeTCompactProtocol(writer, compact);
2631
+ const metadataLength = writer.offset - metadataStart;
2632
+ writer.appendUint32(metadataLength);
2633
+ }
2634
+ function schemaElement(schema, path) {
2635
+ const tree = getSchemaPath(schema, path);
2636
+ return tree[tree.length - 1].element;
2637
+ }
2638
+ function logicalType(type) {
2639
+ if (!type) return;
2640
+ if (type.type === "STRING") return { field_1: {} };
2641
+ if (type.type === "MAP") return { field_2: {} };
2642
+ if (type.type === "LIST") return { field_3: {} };
2643
+ if (type.type === "ENUM") return { field_4: {} };
2644
+ if (type.type === "DECIMAL") return { field_5: {
2645
+ field_1: type.scale,
2646
+ field_2: type.precision
2647
+ } };
2648
+ if (type.type === "DATE") return { field_6: {} };
2649
+ if (type.type === "TIME") return { field_7: {
2650
+ field_1: type.isAdjustedToUTC,
2651
+ field_2: timeUnit(type.unit)
2652
+ } };
2653
+ if (type.type === "TIMESTAMP") return { field_8: {
2654
+ field_1: type.isAdjustedToUTC,
2655
+ field_2: timeUnit(type.unit)
2656
+ } };
2657
+ if (type.type === "INTEGER") return { field_10: {
2658
+ field_1: type.bitWidth,
2659
+ field_2: type.isSigned
2660
+ } };
2661
+ if (type.type === "NULL") return { field_11: {} };
2662
+ if (type.type === "JSON") return { field_12: {} };
2663
+ if (type.type === "BSON") return { field_13: {} };
2664
+ if (type.type === "UUID") return { field_14: {} };
2665
+ if (type.type === "FLOAT16") return { field_15: {} };
2666
+ if (type.type === "VARIANT") return { field_16: {} };
2667
+ if (type.type === "GEOMETRY") return { field_17: {
2668
+ field_1: type.crs
2669
+ } };
2670
+ if (type.type === "GEOGRAPHY") return { field_18: {
2671
+ field_1: type.crs,
2672
+ field_2: type.algorithm && EdgeInterpolationAlgorithms.indexOf(type.algorithm)
2673
+ } };
2674
+ }
2675
+ function timeUnit(unit) {
2676
+ if (unit === "NANOS") return { field_3: {} };
2677
+ if (unit === "MICROS") return { field_2: {} };
2678
+ return { field_1: {} };
2679
+ }
2680
+
2681
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/snappy.js
2682
+ var BLOCK_LOG = 16;
2683
+ var BLOCK_SIZE2 = 1 << BLOCK_LOG;
2684
+ var MAX_HASH_TABLE_BITS = 14;
2685
+ var globalHashTables = new Array(MAX_HASH_TABLE_BITS + 1);
2686
+ function snappyCompress(input) {
2687
+ const writer = new ByteWriter();
2688
+ writer.appendVarInt(input.length);
2689
+ let pos = 0;
2690
+ while (pos < input.length) {
2691
+ const fragmentSize = Math.min(input.length - pos, BLOCK_SIZE2);
2692
+ compressFragment(writer, input, pos, fragmentSize);
2693
+ pos += fragmentSize;
2694
+ }
2695
+ return writer.getBytes();
2696
+ }
2697
+ function hashFunc(key, hashFuncShift) {
2698
+ return key * 506832829 >>> hashFuncShift;
2699
+ }
2700
+ function load32(array, pos) {
2701
+ return array[pos] + (array[pos + 1] << 8) + (array[pos + 2] << 16) + (array[pos + 3] << 24);
2702
+ }
2703
+ function equals32(array, pos1, pos2) {
2704
+ return array[pos1] === array[pos2] && array[pos1 + 1] === array[pos2 + 1] && array[pos1 + 2] === array[pos2 + 2] && array[pos1 + 3] === array[pos2 + 3];
2705
+ }
2706
+ function emitLiteral(writer, input, ip, len) {
2707
+ if (len <= 60) {
2708
+ writer.appendUint8(len - 1 << 2);
2709
+ } else if (len < 256) {
2710
+ writer.appendUint8(60 << 2);
2711
+ writer.appendUint8(len - 1);
2712
+ } else {
2713
+ writer.appendUint8(61 << 2);
2714
+ writer.appendUint8(len - 1 & 255);
2715
+ writer.appendUint8(len - 1 >>> 8);
2716
+ }
2717
+ writer.appendBytes(input.subarray(ip, ip + len));
2718
+ }
2719
+ function emitCopyLessThan64(writer, offset, len) {
2720
+ if (len < 12 && offset < 2048) {
2721
+ writer.appendUint8(1 + (len - 4 << 2) + (offset >>> 8 << 5));
2722
+ writer.appendUint8(offset & 255);
2723
+ } else {
2724
+ writer.appendUint8(2 + (len - 1 << 2));
2725
+ writer.appendUint8(offset & 255);
2726
+ writer.appendUint8(offset >>> 8);
2727
+ }
2728
+ }
2729
+ function emitCopy(writer, offset, len) {
2730
+ while (len >= 68) {
2731
+ emitCopyLessThan64(writer, offset, 64);
2732
+ len -= 64;
2733
+ }
2734
+ if (len > 64) {
2735
+ emitCopyLessThan64(writer, offset, 60);
2736
+ len -= 60;
2737
+ }
2738
+ emitCopyLessThan64(writer, offset, len);
2739
+ }
2740
+ function compressFragment(writer, input, ip, inputSize) {
2741
+ let hashTableBits = 1;
2742
+ while (1 << hashTableBits <= inputSize && hashTableBits <= MAX_HASH_TABLE_BITS) {
2743
+ hashTableBits++;
2744
+ }
2745
+ hashTableBits--;
2746
+ const hashFuncShift = 32 - hashTableBits;
2747
+ globalHashTables[hashTableBits] ??= new Uint16Array(1 << hashTableBits);
2748
+ const hashTable = globalHashTables[hashTableBits];
2749
+ hashTable.fill(0);
2750
+ const ipEnd = ip + inputSize;
2751
+ let ipLimit;
2752
+ const baseIp = ip;
2753
+ let nextEmit = ip;
2754
+ let hash, nextHash;
2755
+ let nextIp, candidate, skip;
2756
+ let bytesBetweenHashLookups;
2757
+ let base, matched, offset;
2758
+ let prevHash, curHash;
2759
+ let flag = true;
2760
+ const INPUT_MARGIN = 15;
2761
+ if (inputSize >= INPUT_MARGIN) {
2762
+ ipLimit = ipEnd - INPUT_MARGIN;
2763
+ ip++;
2764
+ nextHash = hashFunc(load32(input, ip), hashFuncShift);
2765
+ while (flag) {
2766
+ skip = 32;
2767
+ nextIp = ip;
2768
+ do {
2769
+ ip = nextIp;
2770
+ hash = nextHash;
2771
+ bytesBetweenHashLookups = skip >>> 5;
2772
+ skip++;
2773
+ nextIp = ip + bytesBetweenHashLookups;
2774
+ if (ip > ipLimit) {
2775
+ flag = false;
2776
+ break;
2777
+ }
2778
+ nextHash = hashFunc(load32(input, nextIp), hashFuncShift);
2779
+ candidate = baseIp + hashTable[hash];
2780
+ hashTable[hash] = ip - baseIp;
2781
+ } while (!equals32(input, ip, candidate));
2782
+ if (!flag) {
2783
+ break;
2784
+ }
2785
+ emitLiteral(writer, input, nextEmit, ip - nextEmit);
2786
+ do {
2787
+ base = ip;
2788
+ matched = 4;
2789
+ while (ip + matched < ipEnd && input[ip + matched] === input[candidate + matched]) {
2790
+ matched++;
2791
+ }
2792
+ ip += matched;
2793
+ offset = base - candidate;
2794
+ emitCopy(writer, offset, matched);
2795
+ nextEmit = ip;
2796
+ if (ip >= ipLimit) {
2797
+ flag = false;
2798
+ break;
2799
+ }
2800
+ prevHash = hashFunc(load32(input, ip - 1), hashFuncShift);
2801
+ hashTable[prevHash] = ip - 1 - baseIp;
2802
+ curHash = hashFunc(load32(input, ip), hashFuncShift);
2803
+ candidate = baseIp + hashTable[curHash];
2804
+ hashTable[curHash] = ip - baseIp;
2805
+ } while (equals32(input, ip, candidate));
2806
+ if (!flag) {
2807
+ break;
2808
+ }
2809
+ ip++;
2810
+ nextHash = hashFunc(load32(input, ip), hashFuncShift);
2811
+ }
2812
+ }
2813
+ if (nextEmit < ipEnd) {
2814
+ emitLiteral(writer, input, nextEmit, ipEnd - nextEmit);
2815
+ }
2816
+ }
2817
+
2818
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/parquet-writer.js
2819
+ function ParquetWriter({ writer, schema, codec = "SNAPPY", compressors, statistics = true, kvMetadata }) {
2820
+ this.writer = writer;
2821
+ this.schema = schema;
2822
+ this.codec = codec;
2823
+ this.compressors = { SNAPPY: snappyCompress, ...compressors };
2824
+ this.statistics = statistics;
2825
+ this.kvMetadata = kvMetadata;
2826
+ this.row_groups = [];
2827
+ this.num_rows = 0n;
2828
+ this.pendingIndexes = [];
2829
+ this.writer.appendUint32(827474256);
2830
+ }
2831
+ ParquetWriter.prototype.write = function({ columnData, rowGroupSize = [1e3, 1e5], pageSize = 1048576 }) {
2832
+ const columnDataRows = columnData[0]?.data?.length || 0;
2833
+ let pending;
2834
+ for (const { groupStartIndex, groupSize: groupSize2 } of groupIterator({ columnDataRows, rowGroupSize })) {
2835
+ const writeGroup = () => {
2836
+ const groupStartOffset = this.writer.offset;
2837
+ const columns = [];
2838
+ for (let j = 0; j < columnData.length; j++) {
2839
+ const { name, data, encoding, codec = this.codec, columnIndex = false, offsetIndex = true, shredding, bloomFilter } = columnData[j];
2840
+ if (columnIndex && !offsetIndex) {
2841
+ throw new Error("parquet ColumnIndex cannot be present without OffsetIndex");
2842
+ }
2843
+ if (data.length !== columnDataRows) {
2844
+ throw new Error("parquet columns must have the same length");
2845
+ }
2846
+ const groupData = data.slice(groupStartIndex, groupStartIndex + groupSize2);
2847
+ const columnPath = getSchemaPath(this.schema, [name]);
2848
+ const leafPaths = getLeafSchemaPaths(columnPath);
2849
+ const columnElement = columnPath.at(-1)?.element;
2850
+ const shreddingConfig = shredding && shredding !== true ? shredding : void 0;
2851
+ const isVariant = columnElement?.logical_type?.type === "VARIANT";
2852
+ const isRequired = columnElement?.repetition_type === "REQUIRED";
2853
+ const rows = isVariant ? encodeVariantColumn(Array.from(groupData), shreddingConfig, { name, required: isRequired }) : groupData;
2854
+ for (const leafPath of leafPaths) {
2855
+ const schemaPath = leafPath.map((node) => node.element);
2856
+ const column = {
2857
+ columnName: schemaPath.slice(1).map((s) => s.name).join("."),
2858
+ element: schemaPath[schemaPath.length - 1],
2859
+ schemaPath,
2860
+ codec,
2861
+ compressors: this.compressors,
2862
+ stats: this.statistics,
2863
+ pageSize,
2864
+ columnIndex,
2865
+ offsetIndex,
2866
+ encoding,
2867
+ bloomFilter
2868
+ };
2869
+ const pageData = encodeNestedValues(leafPath, rows);
2870
+ const result = writeColumn({
2871
+ writer: this.writer,
2872
+ column,
2873
+ pageData
2874
+ });
2875
+ columns.push(result.chunk);
2876
+ this.pendingIndexes.push(result);
2877
+ }
2878
+ }
2879
+ this.num_rows += BigInt(groupSize2);
2880
+ this.row_groups.push({
2881
+ columns,
2882
+ total_byte_size: BigInt(this.writer.offset - groupStartOffset),
2883
+ num_rows: BigInt(groupSize2)
2884
+ });
2885
+ return this.writer.flush?.();
2886
+ };
2887
+ if (pending) {
2888
+ pending = pending.then(writeGroup);
2889
+ } else {
2890
+ const r = writeGroup();
2891
+ if (r) pending = Promise.resolve(r);
2892
+ }
2893
+ }
2894
+ return pending;
2895
+ };
2896
+ ParquetWriter.prototype.finish = function() {
2897
+ writeIndexes(this.writer, this.pendingIndexes);
2898
+ writeBlooms(this.writer, this.pendingIndexes);
2899
+ const metadata = {
2900
+ version: 2,
2901
+ created_by: "hyparquet",
2902
+ schema: this.schema,
2903
+ num_rows: this.num_rows,
2904
+ row_groups: this.row_groups,
2905
+ metadata_length: 0,
2906
+ key_value_metadata: this.kvMetadata
2907
+ };
2908
+ delete metadata.metadata_length;
2909
+ writeMetadata(this.writer, metadata);
2910
+ this.writer.appendUint32(827474256);
2911
+ return this.writer.finish();
2912
+ };
2913
+ function groupSize(rowGroupSize, i) {
2914
+ return Array.isArray(rowGroupSize) ? rowGroupSize[Math.min(i, rowGroupSize.length - 1)] : rowGroupSize;
2915
+ }
2916
+ function groupIterator({ columnDataRows, rowGroupSize }) {
2917
+ if (Array.isArray(rowGroupSize) && !rowGroupSize.length) {
2918
+ throw new Error("rowGroupSize array cannot be empty");
2919
+ }
2920
+ const groups = [];
2921
+ let groupIndex = 0;
2922
+ let groupStartIndex = 0;
2923
+ while (groupStartIndex < columnDataRows) {
2924
+ const size = groupSize(rowGroupSize, groupIndex);
2925
+ groups.push({ groupStartIndex, groupSize: Math.min(size, columnDataRows - groupStartIndex) });
2926
+ groupStartIndex += size;
2927
+ groupIndex++;
2928
+ }
2929
+ return groups;
2930
+ }
2931
+ function getLeafSchemaPaths(schemaPath) {
2932
+ const leaves = [];
2933
+ dfs(schemaPath);
2934
+ return leaves;
2935
+ function dfs(path) {
2936
+ const node = path[path.length - 1];
2937
+ if (!node.children.length) {
2938
+ leaves.push(path);
2939
+ return;
2940
+ }
2941
+ for (const child of node.children) {
2942
+ dfs([...path, child]);
2943
+ }
2944
+ }
2945
+ }
2946
+
2947
+ // ../../../node_modules/.pnpm/hyparquet-writer@0.16.1/node_modules/hyparquet-writer/src/write.js
2948
+ function parquetWrite({
2949
+ writer,
2950
+ columnData,
2951
+ schema,
2952
+ codec = "SNAPPY",
2953
+ compressors,
2954
+ statistics = true,
2955
+ rowGroupSize = [1e3, 1e5],
2956
+ kvMetadata,
2957
+ pageSize = 1048576
2958
+ }) {
2959
+ columnData = columnData.map((col) => {
2960
+ if (col.shredding === true && col.type === "VARIANT") {
2961
+ const detected = autoDetectShredding(Array.from(col.data));
2962
+ return detected ? { ...col, shredding: detected } : { ...col, shredding: void 0 };
2963
+ }
2964
+ if (col.shredding !== void 0 && col.shredding !== true && col.type === "VARIANT") {
2965
+ const shredding = normalizeShreddingConfig(col.shredding);
2966
+ return shredding ? { ...col, shredding } : { ...col, shredding: void 0 };
2967
+ }
2968
+ return col;
2969
+ });
2970
+ if (!schema) {
2971
+ schema = schemaFromColumnData({ columnData });
2972
+ } else if (columnData.some(({ type }) => type)) {
2973
+ throw new Error("cannot provide both schema and columnData type");
2974
+ } else {
2975
+ }
2976
+ const pq = new ParquetWriter({
2977
+ writer,
2978
+ schema,
2979
+ codec,
2980
+ compressors,
2981
+ statistics,
2982
+ kvMetadata
2983
+ });
2984
+ const w = pq.write({
2985
+ columnData,
2986
+ rowGroupSize,
2987
+ pageSize
2988
+ });
2989
+ return w ? w.then(() => pq.finish()) : pq.finish();
2990
+ }
2991
+ function parquetWriteBuffer(options) {
2992
+ const writer = new ByteWriter();
2993
+ parquetWrite({ ...options, writer });
2994
+ return writer.getBuffer();
2995
+ }
2996
+
2997
+ // src/encode.ts
2998
+ var COLUMNS = [
2999
+ "event_type",
3000
+ "event",
3001
+ "distinct_id",
3002
+ "anonymous_id",
3003
+ "ts",
3004
+ "message_id",
3005
+ "properties",
3006
+ "context",
3007
+ "dt",
3008
+ "received_at"
3009
+ ];
3010
+ function encodeParquet(rows) {
3011
+ const columnData = COLUMNS.map((name) => ({
3012
+ name,
3013
+ type: "STRING",
3014
+ nullable: false,
3015
+ data: rows.map((r) => String(r[name] ?? ""))
3016
+ }));
3017
+ const buffer = parquetWriteBuffer({ columnData, codec: "UNCOMPRESSED" });
3018
+ return new Uint8Array(buffer);
3019
+ }
3020
+
3021
+ // src/plugin.ts
3022
+ function createEventsServerPlugin(opts) {
3023
+ const { store, collection, storagePath } = opts;
3024
+ if (!store.putBytes) {
3025
+ throw new Error(
3026
+ "[starfish-events] the provided ObjectStore does not implement putBytes. Use S3ObjectStore or another store that supports binary writes."
3027
+ );
3028
+ }
3029
+ return {
3030
+ name: "starfish-events",
3031
+ interceptPush: async (ctx) => {
3032
+ if (ctx.collection !== collection) return { action: "proceed" };
3033
+ let events;
3034
+ try {
3035
+ const envelope = JSON.parse(ctx.rawBody);
3036
+ const raw = envelope?.data?.events;
3037
+ events = Array.isArray(raw) ? raw : [];
3038
+ } catch {
3039
+ return {
3040
+ action: "reject",
3041
+ status: 400,
3042
+ error: "Invalid JSON body \u2014 expected { data: { events: [...] }, baseHash }"
3043
+ };
3044
+ }
3045
+ const receivedAt = (/* @__PURE__ */ new Date()).toISOString();
3046
+ const rows = events.map((e) => ({ ...e, received_at: receivedAt }));
3047
+ let parquetBytes;
3048
+ try {
3049
+ parquetBytes = encodeParquet(rows);
3050
+ } catch (err) {
3051
+ const msg = err instanceof Error ? err.message : String(err);
3052
+ console.error(`[starfish-events] Parquet encoding failed: ${msg}`);
3053
+ return { action: "reject", status: 500, error: "Parquet encoding failed" };
3054
+ }
3055
+ let key = resolveDocumentKey(storagePath, ctx.params);
3056
+ if (!key.endsWith(".parquet")) key += ".parquet";
3057
+ await store.putBytes(key, parquetBytes, { contentType: PARQUET_MIME_TYPE });
3058
+ const cr = getCrypto();
3059
+ const hashBuf = await cr.subtle.digest("SHA-256", parquetBytes.buffer);
3060
+ const hash = bytesToHex(new Uint8Array(hashBuf));
3061
+ console.log(
3062
+ `[starfish-events] wrote ${events.length} event(s) \u2192 ${key} (${parquetBytes.byteLength} bytes)`
3063
+ );
3064
+ return { action: "respond", status: 200, body: { hash } };
3065
+ }
3066
+ };
3067
+ }
3068
+ export {
3069
+ createEventsServerPlugin
3070
+ };
3071
+ //# sourceMappingURL=index.js.map