@loaders.gl/parquet 4.2.0-alpha.5 → 4.2.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +200 -48
- package/dist/index.cjs.map +3 -3
- package/dist/lib/constants.js +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet.js +4 -0
- package/dist/parquet-loader.d.ts +151 -7
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +9 -1
- package/dist/parquet-wasm-loader.d.ts +22 -3
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +2 -0
- package/dist/parquet-wasm-writer.d.ts +1 -3
- package/dist/parquet-wasm-writer.d.ts.map +1 -1
- package/dist/parquet-writer.d.ts +15 -3
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquetjs/compression.d.ts +1 -1
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +3 -1
- package/dist/parquetjs/encoder/parquet-encoder.js +14 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +7 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +13 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +8 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js +2 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +3 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +7 -0
- package/dist/parquetjs/parquet-thrift/IntType.js +2 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.js +2 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.js +13 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +1 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +3 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.js +8 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.js +3 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.js +4 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +10 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +3 -0
- package/dist/parquetjs/parquet-thrift/Statistics.js +6 -0
- package/dist/parquetjs/parquet-thrift/TimeType.js +2 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +2 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.js +2 -0
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +8 -5
- package/dist/parquetjs/schema/declare.js +4 -0
- package/dist/parquetjs/schema/schema.js +3 -0
- package/dist/parquetjs/schema/types.js +2 -0
- package/dist/parquetjs/utils/read-utils.js +1 -4
- package/dist/polyfills/buffer/buffer.js +9 -12
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
- package/package.json +15 -15
- package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
- package/src/lib/parsers/parse-parquet.ts +5 -0
- package/src/parquet-loader.ts +52 -51
- package/src/parquet-wasm-loader.ts +7 -4
- package/src/parquet-wasm-writer.ts +2 -2
- package/src/parquet-writer.ts +2 -2
- package/src/parquetjs/compression.ts +6 -2
- package/src/parquetjs/parser/parquet-reader.ts +2 -1
- package/src/parquetjs/schema/types.ts +3 -1
- package/src/polyfills/buffer/buffer.ts +0 -3
package/dist/index.cjs
CHANGED
|
@@ -5,6 +5,7 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
|
|
8
9
|
var __export = (target, all) => {
|
|
9
10
|
for (var name in all)
|
|
10
11
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -26,6 +27,10 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
26
27
|
mod
|
|
27
28
|
));
|
|
28
29
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
30
|
+
var __publicField = (obj, key, value) => {
|
|
31
|
+
__defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
|
|
32
|
+
return value;
|
|
33
|
+
};
|
|
29
34
|
|
|
30
35
|
// dist/index.js
|
|
31
36
|
var dist_exports = {};
|
|
@@ -57,21 +62,30 @@ var import_base64_js = __toESM(require("base64-js"), 1);
|
|
|
57
62
|
var import_ieee754 = __toESM(require("ieee754"), 1);
|
|
58
63
|
var kMaxLength = 2147483647;
|
|
59
64
|
var INSPECT_MAX_BYTES = 50;
|
|
60
|
-
var
|
|
65
|
+
var _Buffer = class extends Uint8Array {
|
|
66
|
+
// not used by this implementation
|
|
61
67
|
// length: number; inherited
|
|
62
68
|
get parent() {
|
|
63
|
-
if (!
|
|
69
|
+
if (!_Buffer.isBuffer(this))
|
|
64
70
|
return void 0;
|
|
65
71
|
return this.buffer;
|
|
66
72
|
}
|
|
67
73
|
get offset() {
|
|
68
|
-
if (!
|
|
74
|
+
if (!_Buffer.isBuffer(this))
|
|
69
75
|
return void 0;
|
|
70
76
|
return this.byteOffset;
|
|
71
77
|
}
|
|
78
|
+
/** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
|
|
79
|
+
* to detect a Buffer instance. It's not possible to use `instanceof Buffer`
|
|
80
|
+
* reliably in a browserify context because there could be multiple different
|
|
81
|
+
* copies of the 'buffer' package in use. This method works even for Buffer
|
|
82
|
+
* instances that were created from another copy of the `buffer` package.
|
|
83
|
+
* @see: https://github.com/feross/buffer/issues/154
|
|
84
|
+
*/
|
|
85
|
+
_isBuffer = true;
|
|
72
86
|
constructor(arg, encodingOrOffset, length) {
|
|
73
87
|
if (typeof arg !== "number") {
|
|
74
|
-
return
|
|
88
|
+
return _Buffer.from(arg, encodingOrOffset, length);
|
|
75
89
|
}
|
|
76
90
|
const size = arg;
|
|
77
91
|
if (size > kMaxLength) {
|
|
@@ -81,7 +95,6 @@ var Buffer2 = class extends Uint8Array {
|
|
|
81
95
|
throw new TypeError('The "string" argument must be of type string. Received type number');
|
|
82
96
|
}
|
|
83
97
|
super(size < 0 ? 0 : checked(size) | 0);
|
|
84
|
-
this._isBuffer = true;
|
|
85
98
|
return;
|
|
86
99
|
}
|
|
87
100
|
static from(value, encodingOrOffset, length) {
|
|
@@ -105,13 +118,13 @@ var Buffer2 = class extends Uint8Array {
|
|
|
105
118
|
}
|
|
106
119
|
const valueOf = value.valueOf && value.valueOf();
|
|
107
120
|
if (valueOf != null && valueOf !== value) {
|
|
108
|
-
return
|
|
121
|
+
return _Buffer.from(valueOf, encodingOrOffset, length);
|
|
109
122
|
}
|
|
110
123
|
const b = fromObject(value);
|
|
111
124
|
if (b)
|
|
112
125
|
return b;
|
|
113
126
|
if (typeof Symbol !== "undefined" && Symbol.toPrimitive != null && typeof value[Symbol.toPrimitive] === "function") {
|
|
114
|
-
return
|
|
127
|
+
return _Buffer.from(value[Symbol.toPrimitive]("string"), encodingOrOffset, length);
|
|
115
128
|
}
|
|
116
129
|
throw new TypeError(`${"The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type "}${typeof value}`);
|
|
117
130
|
}
|
|
@@ -121,17 +134,17 @@ var Buffer2 = class extends Uint8Array {
|
|
|
121
134
|
* @param obj object to test.
|
|
122
135
|
*/
|
|
123
136
|
static isBuffer(b) {
|
|
124
|
-
return b != null && b._isBuffer === true && b !==
|
|
137
|
+
return b != null && b._isBuffer === true && b !== _Buffer.prototype;
|
|
125
138
|
}
|
|
126
139
|
/**
|
|
127
140
|
* The same as buf1.compare(buf2).
|
|
128
141
|
*/
|
|
129
142
|
static compare(a, b) {
|
|
130
|
-
if (!
|
|
131
|
-
a =
|
|
132
|
-
if (!
|
|
133
|
-
b =
|
|
134
|
-
if (!
|
|
143
|
+
if (!_Buffer.isBuffer(a) && isInstance(a, Uint8Array))
|
|
144
|
+
a = _Buffer.from(a, a.offset, a.byteLength);
|
|
145
|
+
if (!_Buffer.isBuffer(b) && isInstance(b, Uint8Array))
|
|
146
|
+
b = _Buffer.from(b, b.offset, b.byteLength);
|
|
147
|
+
if (!_Buffer.isBuffer(a) || !_Buffer.isBuffer(b)) {
|
|
135
148
|
throw new TypeError('The "buf1", "buf2" arguments must be one of type Buffer or Uint8Array');
|
|
136
149
|
}
|
|
137
150
|
if (a === b)
|
|
@@ -191,7 +204,7 @@ var Buffer2 = class extends Uint8Array {
|
|
|
191
204
|
throw new TypeError('"list" argument must be an Array of Buffers');
|
|
192
205
|
}
|
|
193
206
|
if (list.length === 0) {
|
|
194
|
-
return
|
|
207
|
+
return _Buffer.alloc(0);
|
|
195
208
|
}
|
|
196
209
|
let i;
|
|
197
210
|
if (length === void 0) {
|
|
@@ -200,20 +213,20 @@ var Buffer2 = class extends Uint8Array {
|
|
|
200
213
|
length += list[i].length;
|
|
201
214
|
}
|
|
202
215
|
}
|
|
203
|
-
const buffer =
|
|
216
|
+
const buffer = _Buffer.allocUnsafe(length);
|
|
204
217
|
let pos = 0;
|
|
205
218
|
for (i = 0; i < list.length; ++i) {
|
|
206
219
|
let buf = list[i];
|
|
207
220
|
if (isInstance(buf, Uint8Array)) {
|
|
208
221
|
if (pos + buf.length > buffer.length) {
|
|
209
|
-
if (!
|
|
210
|
-
buf =
|
|
222
|
+
if (!_Buffer.isBuffer(buf)) {
|
|
223
|
+
buf = _Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
211
224
|
}
|
|
212
225
|
buf.copy(buffer, pos);
|
|
213
226
|
} else {
|
|
214
227
|
Uint8Array.prototype.set.call(buffer, buf, pos);
|
|
215
228
|
}
|
|
216
|
-
} else if (!
|
|
229
|
+
} else if (!_Buffer.isBuffer(buf)) {
|
|
217
230
|
throw new TypeError('"list" argument must be an Array of Buffers');
|
|
218
231
|
} else {
|
|
219
232
|
buf.copy(buffer, pos);
|
|
@@ -241,7 +254,7 @@ var Buffer2 = class extends Uint8Array {
|
|
|
241
254
|
*/
|
|
242
255
|
static allocUnsafe(size) {
|
|
243
256
|
assertSize(size);
|
|
244
|
-
return new
|
|
257
|
+
return new _Buffer(size);
|
|
245
258
|
}
|
|
246
259
|
/**
|
|
247
260
|
* Allocates a new non-pooled buffer of {size} octets, leaving memory not initialized, so the contents
|
|
@@ -707,7 +720,7 @@ var Buffer2 = class extends Uint8Array {
|
|
|
707
720
|
}
|
|
708
721
|
// copy(targetBuffer, targetStart=0, sourceStart=0, sourceEnd=buffer.length)
|
|
709
722
|
copy(target, targetStart, start, end) {
|
|
710
|
-
if (!
|
|
723
|
+
if (!_Buffer.isBuffer(target))
|
|
711
724
|
throw new TypeError("argument should be a Buffer");
|
|
712
725
|
if (!start)
|
|
713
726
|
start = 0;
|
|
@@ -760,7 +773,7 @@ var Buffer2 = class extends Uint8Array {
|
|
|
760
773
|
if (encoding !== void 0 && typeof encoding !== "string") {
|
|
761
774
|
throw new TypeError("encoding must be a string");
|
|
762
775
|
}
|
|
763
|
-
if (typeof encoding === "string" && !
|
|
776
|
+
if (typeof encoding === "string" && !_Buffer.isEncoding(encoding)) {
|
|
764
777
|
throw new TypeError(`Unknown encoding: ${encoding}`);
|
|
765
778
|
}
|
|
766
779
|
if (val.length === 1) {
|
|
@@ -790,7 +803,7 @@ var Buffer2 = class extends Uint8Array {
|
|
|
790
803
|
this[i] = val;
|
|
791
804
|
}
|
|
792
805
|
} else {
|
|
793
|
-
const bytes =
|
|
806
|
+
const bytes = _Buffer.isBuffer(val) ? val : _Buffer.from(val, encoding);
|
|
794
807
|
const len = bytes.length;
|
|
795
808
|
if (len === 0) {
|
|
796
809
|
throw new TypeError(`The value "${val}" is invalid for argument "value"`);
|
|
@@ -861,17 +874,17 @@ var Buffer2 = class extends Uint8Array {
|
|
|
861
874
|
// }
|
|
862
875
|
// }
|
|
863
876
|
equals(b) {
|
|
864
|
-
if (!
|
|
877
|
+
if (!_Buffer.isBuffer(b))
|
|
865
878
|
throw new TypeError("Argument must be a Buffer");
|
|
866
879
|
if (this === b)
|
|
867
880
|
return true;
|
|
868
|
-
return
|
|
881
|
+
return _Buffer.compare(this, b) === 0;
|
|
869
882
|
}
|
|
870
883
|
compare(target, start, end, thisStart, thisEnd) {
|
|
871
|
-
if (!
|
|
872
|
-
target =
|
|
884
|
+
if (!_Buffer.isBuffer(target) && isInstance(target, Uint8Array)) {
|
|
885
|
+
target = _Buffer.from(target, target.offset, target.byteLength);
|
|
873
886
|
}
|
|
874
|
-
if (!
|
|
887
|
+
if (!_Buffer.isBuffer(target)) {
|
|
875
888
|
throw new TypeError(`${'The "target" argument must be one of type Buffer or Uint8Array. Received type '}${typeof target}`);
|
|
876
889
|
}
|
|
877
890
|
if (start === void 0) {
|
|
@@ -949,7 +962,7 @@ var Buffer2 = class extends Uint8Array {
|
|
|
949
962
|
if (end < start)
|
|
950
963
|
end = start;
|
|
951
964
|
const newBuf = this.subarray(start, end);
|
|
952
|
-
Object.setPrototypeOf(newBuf,
|
|
965
|
+
Object.setPrototypeOf(newBuf, _Buffer.prototype);
|
|
953
966
|
return newBuf;
|
|
954
967
|
}
|
|
955
968
|
// Typo support?
|
|
@@ -1024,7 +1037,8 @@ var Buffer2 = class extends Uint8Array {
|
|
|
1024
1037
|
}
|
|
1025
1038
|
}
|
|
1026
1039
|
};
|
|
1027
|
-
Buffer2
|
|
1040
|
+
var Buffer2 = _Buffer;
|
|
1041
|
+
__publicField(Buffer2, "poolSize", 8192);
|
|
1028
1042
|
function checkInt(buf, value, offset, ext, max, min) {
|
|
1029
1043
|
if (!Buffer2.isBuffer(buf))
|
|
1030
1044
|
throw new TypeError('"buffer" argument must be a Buffer instance');
|
|
@@ -1729,7 +1743,7 @@ globalThis.process.env = globalThis.process.env || {};
|
|
|
1729
1743
|
var Buffer3 = installBufferPolyfill();
|
|
1730
1744
|
|
|
1731
1745
|
// dist/parquet-loader.js
|
|
1732
|
-
var
|
|
1746
|
+
var import_loader_utils2 = require("@loaders.gl/loader-utils");
|
|
1733
1747
|
|
|
1734
1748
|
// dist/parquetjs/codecs/plain.js
|
|
1735
1749
|
var import_int53 = __toESM(require("int53"), 1);
|
|
@@ -2088,6 +2102,7 @@ var PARQUET_CODECS = {
|
|
|
2088
2102
|
|
|
2089
2103
|
// dist/parquetjs/compression.js
|
|
2090
2104
|
var import_compression = require("@loaders.gl/compression");
|
|
2105
|
+
var import_loader_utils = require("@loaders.gl/loader-utils");
|
|
2091
2106
|
var import_lz4js = __toESM(require("lz4js"), 1);
|
|
2092
2107
|
function toBuffer(arrayBuffer) {
|
|
2093
2108
|
return Buffer.from(arrayBuffer);
|
|
@@ -2124,8 +2139,9 @@ var PARQUET_COMPRESSION_METHODS = {
|
|
|
2124
2139
|
ZSTD: new import_compression.ZstdCompression({ modules })
|
|
2125
2140
|
};
|
|
2126
2141
|
async function preloadCompressions(options) {
|
|
2142
|
+
(0, import_loader_utils.registerJSModules)(options == null ? void 0 : options.modules);
|
|
2127
2143
|
const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
|
|
2128
|
-
return await Promise.all(compressions.map((compression) => compression.preload()));
|
|
2144
|
+
return await Promise.all(compressions.map((compression) => compression.preload(options == null ? void 0 : options.modules)));
|
|
2129
2145
|
}
|
|
2130
2146
|
async function deflate(method, value) {
|
|
2131
2147
|
const compression = PARQUET_COMPRESSION_METHODS[method];
|
|
@@ -2773,6 +2789,9 @@ function materializeColumnAsColumnarArray(schema, columnData, rowCount, key, col
|
|
|
2773
2789
|
|
|
2774
2790
|
// dist/parquetjs/schema/schema.js
|
|
2775
2791
|
var ParquetSchema = class {
|
|
2792
|
+
schema;
|
|
2793
|
+
fields;
|
|
2794
|
+
fieldList;
|
|
2776
2795
|
/**
|
|
2777
2796
|
* Create a new schema from a JSON schema definition
|
|
2778
2797
|
*/
|
|
@@ -3004,6 +3023,12 @@ var PageType;
|
|
|
3004
3023
|
var import_node_int64 = __toESM(require("node-int64"), 1);
|
|
3005
3024
|
var thrift = __toESM(require("thrift"), 1);
|
|
3006
3025
|
var Statistics = class {
|
|
3026
|
+
max;
|
|
3027
|
+
min;
|
|
3028
|
+
null_count;
|
|
3029
|
+
distinct_count;
|
|
3030
|
+
max_value;
|
|
3031
|
+
min_value;
|
|
3007
3032
|
constructor(args) {
|
|
3008
3033
|
if (args != null && args.max != null) {
|
|
3009
3034
|
this.max = args.max;
|
|
@@ -3365,6 +3390,8 @@ var NullType = class {
|
|
|
3365
3390
|
// dist/parquetjs/parquet-thrift/DecimalType.js
|
|
3366
3391
|
var thrift9 = __toESM(require("thrift"), 1);
|
|
3367
3392
|
var DecimalType = class {
|
|
3393
|
+
scale;
|
|
3394
|
+
precision;
|
|
3368
3395
|
constructor(args) {
|
|
3369
3396
|
if (args != null && args.scale != null) {
|
|
3370
3397
|
this.scale = args.scale;
|
|
@@ -3505,6 +3532,8 @@ var thrift13 = __toESM(require("thrift"), 1);
|
|
|
3505
3532
|
// dist/parquetjs/parquet-thrift/TimeUnit.js
|
|
3506
3533
|
var thrift12 = __toESM(require("thrift"), 1);
|
|
3507
3534
|
var TimeUnit = class {
|
|
3535
|
+
MILLIS;
|
|
3536
|
+
MICROS;
|
|
3508
3537
|
constructor(args) {
|
|
3509
3538
|
let _fieldsSet = 0;
|
|
3510
3539
|
if (args != null) {
|
|
@@ -3597,6 +3626,8 @@ var TimeUnit = class {
|
|
|
3597
3626
|
|
|
3598
3627
|
// dist/parquetjs/parquet-thrift/TimestampType.js
|
|
3599
3628
|
var TimestampType = class {
|
|
3629
|
+
isAdjustedToUTC;
|
|
3630
|
+
unit;
|
|
3600
3631
|
constructor(args) {
|
|
3601
3632
|
if (args != null && args.isAdjustedToUTC != null) {
|
|
3602
3633
|
this.isAdjustedToUTC = args.isAdjustedToUTC;
|
|
@@ -3670,6 +3701,8 @@ var TimestampType = class {
|
|
|
3670
3701
|
// dist/parquetjs/parquet-thrift/TimeType.js
|
|
3671
3702
|
var thrift14 = __toESM(require("thrift"), 1);
|
|
3672
3703
|
var TimeType = class {
|
|
3704
|
+
isAdjustedToUTC;
|
|
3705
|
+
unit;
|
|
3673
3706
|
constructor(args) {
|
|
3674
3707
|
if (args != null && args.isAdjustedToUTC != null) {
|
|
3675
3708
|
this.isAdjustedToUTC = args.isAdjustedToUTC;
|
|
@@ -3743,6 +3776,8 @@ var TimeType = class {
|
|
|
3743
3776
|
// dist/parquetjs/parquet-thrift/IntType.js
|
|
3744
3777
|
var thrift15 = __toESM(require("thrift"), 1);
|
|
3745
3778
|
var IntType = class {
|
|
3779
|
+
bitWidth;
|
|
3780
|
+
isSigned;
|
|
3746
3781
|
constructor(args) {
|
|
3747
3782
|
if (args != null && args.bitWidth != null) {
|
|
3748
3783
|
this.bitWidth = args.bitWidth;
|
|
@@ -3883,6 +3918,19 @@ var thrift19 = __toESM(require("thrift"), 1);
|
|
|
3883
3918
|
// dist/parquetjs/parquet-thrift/LogicalType.js
|
|
3884
3919
|
var thrift18 = __toESM(require("thrift"), 1);
|
|
3885
3920
|
var LogicalType = class {
|
|
3921
|
+
STRING;
|
|
3922
|
+
MAP;
|
|
3923
|
+
LIST;
|
|
3924
|
+
ENUM;
|
|
3925
|
+
DECIMAL;
|
|
3926
|
+
DATE;
|
|
3927
|
+
TIME;
|
|
3928
|
+
TIMESTAMP;
|
|
3929
|
+
INTEGER;
|
|
3930
|
+
UNKNOWN;
|
|
3931
|
+
JSON;
|
|
3932
|
+
BSON;
|
|
3933
|
+
UUID;
|
|
3886
3934
|
constructor(args) {
|
|
3887
3935
|
let _fieldsSet = 0;
|
|
3888
3936
|
if (args != null) {
|
|
@@ -4206,6 +4254,16 @@ var LogicalType = class {
|
|
|
4206
4254
|
|
|
4207
4255
|
// dist/parquetjs/parquet-thrift/SchemaElement.js
|
|
4208
4256
|
var SchemaElement = class {
|
|
4257
|
+
type;
|
|
4258
|
+
type_length;
|
|
4259
|
+
repetition_type;
|
|
4260
|
+
name;
|
|
4261
|
+
num_children;
|
|
4262
|
+
converted_type;
|
|
4263
|
+
scale;
|
|
4264
|
+
precision;
|
|
4265
|
+
field_id;
|
|
4266
|
+
logicalType;
|
|
4209
4267
|
constructor(args) {
|
|
4210
4268
|
if (args != null && args.type != null) {
|
|
4211
4269
|
this.type = args.type;
|
|
@@ -4405,6 +4463,11 @@ var SchemaElement = class {
|
|
|
4405
4463
|
// dist/parquetjs/parquet-thrift/DataPageHeader.js
|
|
4406
4464
|
var thrift20 = __toESM(require("thrift"), 1);
|
|
4407
4465
|
var DataPageHeader = class {
|
|
4466
|
+
num_values;
|
|
4467
|
+
encoding;
|
|
4468
|
+
definition_level_encoding;
|
|
4469
|
+
repetition_level_encoding;
|
|
4470
|
+
statistics;
|
|
4408
4471
|
constructor(args) {
|
|
4409
4472
|
if (args != null && args.num_values != null) {
|
|
4410
4473
|
this.num_values = args.num_values;
|
|
@@ -4562,6 +4625,9 @@ var IndexPageHeader = class {
|
|
|
4562
4625
|
// dist/parquetjs/parquet-thrift/DictionaryPageHeader.js
|
|
4563
4626
|
var thrift22 = __toESM(require("thrift"), 1);
|
|
4564
4627
|
var DictionaryPageHeader = class {
|
|
4628
|
+
num_values;
|
|
4629
|
+
encoding;
|
|
4630
|
+
is_sorted;
|
|
4565
4631
|
constructor(args) {
|
|
4566
4632
|
if (args != null && args.num_values != null) {
|
|
4567
4633
|
this.num_values = args.num_values;
|
|
@@ -4651,8 +4717,15 @@ var DictionaryPageHeader = class {
|
|
|
4651
4717
|
// dist/parquetjs/parquet-thrift/DataPageHeaderV2.js
|
|
4652
4718
|
var thrift23 = __toESM(require("thrift"), 1);
|
|
4653
4719
|
var DataPageHeaderV2 = class {
|
|
4720
|
+
num_values;
|
|
4721
|
+
num_nulls;
|
|
4722
|
+
num_rows;
|
|
4723
|
+
encoding;
|
|
4724
|
+
definition_levels_byte_length;
|
|
4725
|
+
repetition_levels_byte_length;
|
|
4726
|
+
is_compressed = true;
|
|
4727
|
+
statistics;
|
|
4654
4728
|
constructor(args) {
|
|
4655
|
-
this.is_compressed = true;
|
|
4656
4729
|
if (args != null && args.num_values != null) {
|
|
4657
4730
|
this.num_values = args.num_values;
|
|
4658
4731
|
} else {
|
|
@@ -4829,6 +4902,14 @@ var DataPageHeaderV2 = class {
|
|
|
4829
4902
|
// dist/parquetjs/parquet-thrift/PageHeader.js
|
|
4830
4903
|
var thrift24 = __toESM(require("thrift"), 1);
|
|
4831
4904
|
var PageHeader = class {
|
|
4905
|
+
type;
|
|
4906
|
+
uncompressed_page_size;
|
|
4907
|
+
compressed_page_size;
|
|
4908
|
+
crc;
|
|
4909
|
+
data_page_header;
|
|
4910
|
+
index_page_header;
|
|
4911
|
+
dictionary_page_header;
|
|
4912
|
+
data_page_header_v2;
|
|
4832
4913
|
constructor(args) {
|
|
4833
4914
|
if (args != null && args.type != null) {
|
|
4834
4915
|
this.type = args.type;
|
|
@@ -5000,6 +5081,8 @@ var PageHeader = class {
|
|
|
5000
5081
|
// dist/parquetjs/parquet-thrift/KeyValue.js
|
|
5001
5082
|
var thrift25 = __toESM(require("thrift"), 1);
|
|
5002
5083
|
var KeyValue = class {
|
|
5084
|
+
key;
|
|
5085
|
+
value;
|
|
5003
5086
|
constructor(args) {
|
|
5004
5087
|
if (args != null && args.key != null) {
|
|
5005
5088
|
this.key = args.key;
|
|
@@ -5071,6 +5154,9 @@ var KeyValue = class {
|
|
|
5071
5154
|
// dist/parquetjs/parquet-thrift/SortingColumn.js
|
|
5072
5155
|
var thrift26 = __toESM(require("thrift"), 1);
|
|
5073
5156
|
var SortingColumn = class {
|
|
5157
|
+
column_idx;
|
|
5158
|
+
descending;
|
|
5159
|
+
nulls_first;
|
|
5074
5160
|
constructor(args) {
|
|
5075
5161
|
if (args != null && args.column_idx != null) {
|
|
5076
5162
|
this.column_idx = args.column_idx;
|
|
@@ -5162,6 +5248,9 @@ var SortingColumn = class {
|
|
|
5162
5248
|
// dist/parquetjs/parquet-thrift/PageEncodingStats.js
|
|
5163
5249
|
var thrift27 = __toESM(require("thrift"), 1);
|
|
5164
5250
|
var PageEncodingStats = class {
|
|
5251
|
+
page_type;
|
|
5252
|
+
encoding;
|
|
5253
|
+
count;
|
|
5165
5254
|
constructor(args) {
|
|
5166
5255
|
if (args != null && args.page_type != null) {
|
|
5167
5256
|
this.page_type = args.page_type;
|
|
@@ -5254,6 +5343,19 @@ var PageEncodingStats = class {
|
|
|
5254
5343
|
var import_node_int642 = __toESM(require("node-int64"), 1);
|
|
5255
5344
|
var thrift28 = __toESM(require("thrift"), 1);
|
|
5256
5345
|
var ColumnMetaData = class {
|
|
5346
|
+
type;
|
|
5347
|
+
encodings;
|
|
5348
|
+
path_in_schema;
|
|
5349
|
+
codec;
|
|
5350
|
+
num_values;
|
|
5351
|
+
total_uncompressed_size;
|
|
5352
|
+
total_compressed_size;
|
|
5353
|
+
key_value_metadata;
|
|
5354
|
+
data_page_offset;
|
|
5355
|
+
index_page_offset;
|
|
5356
|
+
dictionary_page_offset;
|
|
5357
|
+
statistics;
|
|
5358
|
+
encoding_stats;
|
|
5257
5359
|
constructor(args) {
|
|
5258
5360
|
if (args != null && args.type != null) {
|
|
5259
5361
|
this.type = args.type;
|
|
@@ -5584,6 +5686,13 @@ var ColumnMetaData = class {
|
|
|
5584
5686
|
var import_node_int643 = __toESM(require("node-int64"), 1);
|
|
5585
5687
|
var thrift29 = __toESM(require("thrift"), 1);
|
|
5586
5688
|
var ColumnChunk = class {
|
|
5689
|
+
file_path;
|
|
5690
|
+
file_offset;
|
|
5691
|
+
meta_data;
|
|
5692
|
+
offset_index_offset;
|
|
5693
|
+
offset_index_length;
|
|
5694
|
+
column_index_offset;
|
|
5695
|
+
column_index_length;
|
|
5587
5696
|
constructor(args) {
|
|
5588
5697
|
if (args != null && args.file_path != null) {
|
|
5589
5698
|
this.file_path = args.file_path;
|
|
@@ -5748,6 +5857,10 @@ var ColumnChunk = class {
|
|
|
5748
5857
|
var import_node_int644 = __toESM(require("node-int64"), 1);
|
|
5749
5858
|
var thrift30 = __toESM(require("thrift"), 1);
|
|
5750
5859
|
var RowGroup = class {
|
|
5860
|
+
columns;
|
|
5861
|
+
total_byte_size;
|
|
5862
|
+
num_rows;
|
|
5863
|
+
sorting_columns;
|
|
5751
5864
|
constructor(args) {
|
|
5752
5865
|
if (args != null && args.columns != null) {
|
|
5753
5866
|
this.columns = args.columns;
|
|
@@ -5921,6 +6034,7 @@ var thrift33 = __toESM(require("thrift"), 1);
|
|
|
5921
6034
|
// dist/parquetjs/parquet-thrift/ColumnOrder.js
|
|
5922
6035
|
var thrift32 = __toESM(require("thrift"), 1);
|
|
5923
6036
|
var ColumnOrder = class {
|
|
6037
|
+
TYPE_ORDER;
|
|
5924
6038
|
constructor(args) {
|
|
5925
6039
|
let _fieldsSet = 0;
|
|
5926
6040
|
if (args != null) {
|
|
@@ -5992,6 +6106,13 @@ var ColumnOrder = class {
|
|
|
5992
6106
|
|
|
5993
6107
|
// dist/parquetjs/parquet-thrift/FileMetaData.js
|
|
5994
6108
|
var FileMetaData = class {
|
|
6109
|
+
version;
|
|
6110
|
+
schema;
|
|
6111
|
+
num_rows;
|
|
6112
|
+
row_groups;
|
|
6113
|
+
key_value_metadata;
|
|
6114
|
+
created_by;
|
|
6115
|
+
column_orders;
|
|
5995
6116
|
constructor(args = null) {
|
|
5996
6117
|
if (args != null && args.version != null) {
|
|
5997
6118
|
this.version = args.version;
|
|
@@ -6195,7 +6316,7 @@ var FileMetaData = class {
|
|
|
6195
6316
|
};
|
|
6196
6317
|
|
|
6197
6318
|
// dist/lib/constants.js
|
|
6198
|
-
var VERSION = true ? "4.2.0-alpha.
|
|
6319
|
+
var VERSION = true ? "4.2.0-alpha.6" : "latest";
|
|
6199
6320
|
var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.0-beta.1/esm/arrow1_bg.wasm";
|
|
6200
6321
|
var PARQUET_MAGIC = "PAR1";
|
|
6201
6322
|
var PARQUET_MAGIC_ENCRYPTED = "PARE";
|
|
@@ -6204,10 +6325,7 @@ var PARQUET_RDLVL_ENCODING = "RLE";
|
|
|
6204
6325
|
|
|
6205
6326
|
// dist/parquetjs/utils/read-utils.js
|
|
6206
6327
|
var UFramedTransport = class extends import_thrift.TFramedTransport {
|
|
6207
|
-
|
|
6208
|
-
super(...arguments);
|
|
6209
|
-
this.readPos = 0;
|
|
6210
|
-
}
|
|
6328
|
+
readPos = 0;
|
|
6211
6329
|
};
|
|
6212
6330
|
function serializeThrift(obj) {
|
|
6213
6331
|
const output = [];
|
|
@@ -6547,11 +6665,13 @@ function preserveBinary(d) {
|
|
|
6547
6665
|
}
|
|
6548
6666
|
|
|
6549
6667
|
// dist/parquetjs/parser/parquet-reader.js
|
|
6550
|
-
var
|
|
6668
|
+
var _ParquetReader = class {
|
|
6669
|
+
props;
|
|
6670
|
+
file;
|
|
6671
|
+
metadata = null;
|
|
6551
6672
|
constructor(file, props) {
|
|
6552
|
-
this.metadata = null;
|
|
6553
6673
|
this.file = file;
|
|
6554
|
-
this.props = { ...
|
|
6674
|
+
this.props = { ..._ParquetReader.defaultProps, ...props };
|
|
6555
6675
|
}
|
|
6556
6676
|
close() {
|
|
6557
6677
|
this.file.close();
|
|
@@ -6723,10 +6843,12 @@ var ParquetReader = class {
|
|
|
6723
6843
|
return decodedPage.dictionary;
|
|
6724
6844
|
}
|
|
6725
6845
|
};
|
|
6726
|
-
ParquetReader
|
|
6727
|
-
|
|
6846
|
+
var ParquetReader = _ParquetReader;
|
|
6847
|
+
__publicField(ParquetReader, "defaultProps", {
|
|
6848
|
+
// max ArrayBuffer size in js is 2Gb
|
|
6849
|
+
defaultDictionarySize: 2147483648,
|
|
6728
6850
|
preserveBinary: false
|
|
6729
|
-
};
|
|
6851
|
+
});
|
|
6730
6852
|
|
|
6731
6853
|
// dist/lib/arrow/convert-schema-from-parquet.js
|
|
6732
6854
|
var PARQUET_TYPE_MAPPING = {
|
|
@@ -6825,6 +6947,7 @@ async function getSchemaFromParquetReader(reader) {
|
|
|
6825
6947
|
async function parseParquetFile(file, options) {
|
|
6826
6948
|
var _a, _b;
|
|
6827
6949
|
installBufferPolyfill();
|
|
6950
|
+
await preloadCompressions(options);
|
|
6828
6951
|
const reader = new ParquetReader(file, {
|
|
6829
6952
|
preserveBinary: (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.preserveBinary
|
|
6830
6953
|
});
|
|
@@ -6846,6 +6969,8 @@ async function parseParquetFile(file, options) {
|
|
|
6846
6969
|
}
|
|
6847
6970
|
async function* parseParquetFileInBatches(file, options) {
|
|
6848
6971
|
var _a, _b;
|
|
6972
|
+
installBufferPolyfill();
|
|
6973
|
+
await preloadCompressions(options);
|
|
6849
6974
|
const reader = new ParquetReader(file, {
|
|
6850
6975
|
preserveBinary: (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.preserveBinary
|
|
6851
6976
|
});
|
|
@@ -6937,6 +7062,7 @@ function convertBatch(objectRowBatch, shape) {
|
|
|
6937
7062
|
// dist/lib/parsers/parse-parquet-to-columns.js
|
|
6938
7063
|
async function parseParquetFileInColumns(file, options) {
|
|
6939
7064
|
installBufferPolyfill();
|
|
7065
|
+
await preloadCompressions(options);
|
|
6940
7066
|
for await (const batch of parseParquetFileInColumnarBatches(file, options)) {
|
|
6941
7067
|
return {
|
|
6942
7068
|
shape: "columnar-table",
|
|
@@ -6947,6 +7073,8 @@ async function parseParquetFileInColumns(file, options) {
|
|
|
6947
7073
|
throw new Error("empty table");
|
|
6948
7074
|
}
|
|
6949
7075
|
async function* parseParquetFileInColumnarBatches(file, options) {
|
|
7076
|
+
installBufferPolyfill();
|
|
7077
|
+
await preloadCompressions(options);
|
|
6950
7078
|
const reader = new ParquetReader(file);
|
|
6951
7079
|
const schema = await getSchemaFromParquetReader(reader);
|
|
6952
7080
|
const parquetSchema = await reader.getSchema();
|
|
@@ -6967,8 +7095,10 @@ function convertRowGroupToTableBatch(rowGroup, parquetSchema, schema) {
|
|
|
6967
7095
|
}
|
|
6968
7096
|
|
|
6969
7097
|
// dist/parquet-loader.js
|
|
6970
|
-
var VERSION2 = true ? "4.2.0-alpha.
|
|
7098
|
+
var VERSION2 = true ? "4.2.0-alpha.6" : "latest";
|
|
6971
7099
|
var ParquetWorkerLoader = {
|
|
7100
|
+
dataType: null,
|
|
7101
|
+
batchType: null,
|
|
6972
7102
|
name: "Apache Parquet",
|
|
6973
7103
|
id: "parquet",
|
|
6974
7104
|
module: "parquet",
|
|
@@ -6991,12 +7121,16 @@ var ParquetWorkerLoader = {
|
|
|
6991
7121
|
};
|
|
6992
7122
|
var ParquetLoader = {
|
|
6993
7123
|
...ParquetWorkerLoader,
|
|
6994
|
-
|
|
7124
|
+
dataType: null,
|
|
7125
|
+
batchType: null,
|
|
7126
|
+
parse: (arrayBuffer, options) => parseParquetFile(new import_loader_utils2.BlobFile(arrayBuffer), options),
|
|
6995
7127
|
parseFile: parseParquetFile,
|
|
6996
7128
|
parseFileInBatches: parseParquetFileInBatches
|
|
6997
7129
|
};
|
|
6998
7130
|
ParquetLoader.Buffer = Buffer;
|
|
6999
7131
|
var GeoParquetWorkerLoader = {
|
|
7132
|
+
dataType: null,
|
|
7133
|
+
batchType: null,
|
|
7000
7134
|
name: "Apache Parquet",
|
|
7001
7135
|
id: "parquet",
|
|
7002
7136
|
module: "parquet",
|
|
@@ -7020,12 +7154,14 @@ var GeoParquetWorkerLoader = {
|
|
|
7020
7154
|
var GeoParquetLoader = {
|
|
7021
7155
|
...GeoParquetWorkerLoader,
|
|
7022
7156
|
parse(arrayBuffer, options) {
|
|
7023
|
-
return parseGeoParquetFile(new
|
|
7157
|
+
return parseGeoParquetFile(new import_loader_utils2.BlobFile(arrayBuffer), options);
|
|
7024
7158
|
},
|
|
7025
7159
|
parseFile: parseGeoParquetFile,
|
|
7026
7160
|
parseFileInBatches: parseGeoParquetFileInBatches
|
|
7027
7161
|
};
|
|
7028
7162
|
var ParquetColumnarWorkerLoader = {
|
|
7163
|
+
dataType: null,
|
|
7164
|
+
batchType: null,
|
|
7029
7165
|
name: "Apache Parquet",
|
|
7030
7166
|
id: "parquet",
|
|
7031
7167
|
module: "parquet",
|
|
@@ -7041,14 +7177,14 @@ var ParquetColumnarWorkerLoader = {
|
|
|
7041
7177
|
var ParquetColumnarLoader = {
|
|
7042
7178
|
...ParquetColumnarWorkerLoader,
|
|
7043
7179
|
parse(arrayBuffer, options) {
|
|
7044
|
-
return parseParquetFileInColumns(new
|
|
7180
|
+
return parseParquetFileInColumns(new import_loader_utils2.BlobFile(arrayBuffer), options);
|
|
7045
7181
|
},
|
|
7046
7182
|
parseFile: parseParquetFileInColumns,
|
|
7047
7183
|
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
7048
7184
|
};
|
|
7049
7185
|
|
|
7050
7186
|
// dist/parquet-writer.js
|
|
7051
|
-
var VERSION3 = true ? "4.2.0-alpha.
|
|
7187
|
+
var VERSION3 = true ? "4.2.0-alpha.6" : "latest";
|
|
7052
7188
|
var ParquetWriter = {
|
|
7053
7189
|
name: "Apache Parquet",
|
|
7054
7190
|
id: "parquet",
|
|
@@ -7105,6 +7241,8 @@ async function parseParquetWasm(arrayBuffer, options) {
|
|
|
7105
7241
|
|
|
7106
7242
|
// dist/parquet-wasm-loader.js
|
|
7107
7243
|
var ParquetWasmWorkerLoader = {
|
|
7244
|
+
dataType: null,
|
|
7245
|
+
batchType: null,
|
|
7108
7246
|
name: "Apache Parquet",
|
|
7109
7247
|
id: "parquet-wasm",
|
|
7110
7248
|
module: "parquet",
|
|
@@ -7224,6 +7362,12 @@ var ParquetEncoder = class {
|
|
|
7224
7362
|
const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
7225
7363
|
return new ParquetEncoder(schema, envelopeWriter, opts);
|
|
7226
7364
|
}
|
|
7365
|
+
schema;
|
|
7366
|
+
envelopeWriter;
|
|
7367
|
+
rowBuffer;
|
|
7368
|
+
rowGroupSize;
|
|
7369
|
+
closed;
|
|
7370
|
+
userMetadata;
|
|
7227
7371
|
/**
|
|
7228
7372
|
* Create a new buffered parquet writer for a given envelope writer
|
|
7229
7373
|
*/
|
|
@@ -7309,6 +7453,14 @@ var ParquetEnvelopeWriter = class {
|
|
|
7309
7453
|
const closeFn = osclose.bind(void 0, outputStream);
|
|
7310
7454
|
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
|
|
7311
7455
|
}
|
|
7456
|
+
schema;
|
|
7457
|
+
write;
|
|
7458
|
+
close;
|
|
7459
|
+
offset;
|
|
7460
|
+
rowCount;
|
|
7461
|
+
rowGroups;
|
|
7462
|
+
pageSize;
|
|
7463
|
+
useDataPageV2;
|
|
7312
7464
|
constructor(schema, writeFn, closeFn, fileOffset, opts) {
|
|
7313
7465
|
this.schema = schema;
|
|
7314
7466
|
this.write = writeFn;
|