@loaders.gl/parquet 4.2.0-alpha.6 → 4.2.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +35 -11
- package/dist/index.cjs.map +3 -3
- package/dist/lib/constants.js +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet.js +4 -0
- package/dist/parquet-loader.d.ts +151 -7
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +9 -1
- package/dist/parquet-wasm-loader.d.ts +22 -3
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +2 -0
- package/dist/parquet-wasm-writer.d.ts +1 -3
- package/dist/parquet-wasm-writer.d.ts.map +1 -1
- package/dist/parquet-writer.d.ts +15 -3
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +6 -2
- package/dist/parquetjs/compression.d.ts +1 -1
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +3 -1
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.js +4 -1
- package/dist/parquetjs/schema/types.js +1 -0
- package/package.json +9 -9
- package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
- package/src/lib/parsers/parse-parquet.ts +5 -0
- package/src/parquet-loader.ts +52 -51
- package/src/parquet-wasm-loader.ts +7 -4
- package/src/parquet-wasm-writer.ts +2 -2
- package/src/parquet-writer.ts +2 -2
- package/src/parquetjs/codecs/rle.ts +6 -2
- package/src/parquetjs/compression.ts +6 -2
- package/src/parquetjs/parser/decoders.ts +7 -1
- package/src/parquetjs/schema/types.ts +2 -1
package/dist/index.cjs
CHANGED
|
@@ -1743,7 +1743,7 @@ globalThis.process.env = globalThis.process.env || {};
|
|
|
1743
1743
|
var Buffer3 = installBufferPolyfill();
|
|
1744
1744
|
|
|
1745
1745
|
// dist/parquet-loader.js
|
|
1746
|
-
var
|
|
1746
|
+
var import_loader_utils2 = require("@loaders.gl/loader-utils");
|
|
1747
1747
|
|
|
1748
1748
|
// dist/parquetjs/codecs/plain.js
|
|
1749
1749
|
var import_int53 = __toESM(require("int53"), 1);
|
|
@@ -2005,12 +2005,16 @@ function decodeValues2(type, cursor, count, opts) {
|
|
|
2005
2005
|
while (values.length < count) {
|
|
2006
2006
|
const header = import_varint.default.decode(cursor.buffer, cursor.offset);
|
|
2007
2007
|
cursor.offset += import_varint.default.encodingLength(header);
|
|
2008
|
+
let decodedValues;
|
|
2008
2009
|
if (header & 1) {
|
|
2009
2010
|
const count2 = (header >> 1) * 8;
|
|
2010
|
-
|
|
2011
|
+
decodedValues = decodeRunBitpacked(cursor, count2, opts);
|
|
2011
2012
|
} else {
|
|
2012
2013
|
const count2 = header >> 1;
|
|
2013
|
-
|
|
2014
|
+
decodedValues = decodeRunRepeated(cursor, count2, opts);
|
|
2015
|
+
}
|
|
2016
|
+
for (const value of decodedValues) {
|
|
2017
|
+
values.push(value);
|
|
2014
2018
|
}
|
|
2015
2019
|
}
|
|
2016
2020
|
values = values.slice(0, count);
|
|
@@ -2102,6 +2106,7 @@ var PARQUET_CODECS = {
|
|
|
2102
2106
|
|
|
2103
2107
|
// dist/parquetjs/compression.js
|
|
2104
2108
|
var import_compression = require("@loaders.gl/compression");
|
|
2109
|
+
var import_loader_utils = require("@loaders.gl/loader-utils");
|
|
2105
2110
|
var import_lz4js = __toESM(require("lz4js"), 1);
|
|
2106
2111
|
function toBuffer(arrayBuffer) {
|
|
2107
2112
|
return Buffer.from(arrayBuffer);
|
|
@@ -2138,8 +2143,9 @@ var PARQUET_COMPRESSION_METHODS = {
|
|
|
2138
2143
|
ZSTD: new import_compression.ZstdCompression({ modules })
|
|
2139
2144
|
};
|
|
2140
2145
|
async function preloadCompressions(options) {
|
|
2146
|
+
(0, import_loader_utils.registerJSModules)(options == null ? void 0 : options.modules);
|
|
2141
2147
|
const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
|
|
2142
|
-
return await Promise.all(compressions.map((compression) => compression.preload()));
|
|
2148
|
+
return await Promise.all(compressions.map((compression) => compression.preload(options == null ? void 0 : options.modules)));
|
|
2143
2149
|
}
|
|
2144
2150
|
async function deflate(method, value) {
|
|
2145
2151
|
const compression = PARQUET_COMPRESSION_METHODS[method];
|
|
@@ -6314,7 +6320,7 @@ var FileMetaData = class {
|
|
|
6314
6320
|
};
|
|
6315
6321
|
|
|
6316
6322
|
// dist/lib/constants.js
|
|
6317
|
-
var VERSION = true ? "4.2.0-
|
|
6323
|
+
var VERSION = true ? "4.2.0-beta.1" : "latest";
|
|
6318
6324
|
var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.0-beta.1/esm/arrow1_bg.wasm";
|
|
6319
6325
|
var PARQUET_MAGIC = "PAR1";
|
|
6320
6326
|
var PARQUET_MAGIC_ENCRYPTED = "PARE";
|
|
@@ -6393,6 +6399,7 @@ function fieldIndexOf(arr, elem) {
|
|
|
6393
6399
|
|
|
6394
6400
|
// dist/parquetjs/parser/decoders.js
|
|
6395
6401
|
async function decodeDataPages(buffer, context) {
|
|
6402
|
+
var _a;
|
|
6396
6403
|
const cursor = {
|
|
6397
6404
|
buffer,
|
|
6398
6405
|
offset: 0,
|
|
@@ -6415,7 +6422,8 @@ async function decodeDataPages(buffer, context) {
|
|
|
6415
6422
|
dictionary = page.dictionary;
|
|
6416
6423
|
continue;
|
|
6417
6424
|
}
|
|
6418
|
-
|
|
6425
|
+
const valueEncoding = getThriftEnum(Encoding, (_a = page.pageHeader.data_page_header) == null ? void 0 : _a.encoding);
|
|
6426
|
+
if (dictionary.length && valueEncoding !== "PLAIN") {
|
|
6419
6427
|
page.values = page.values.map((value) => dictionary[value]);
|
|
6420
6428
|
}
|
|
6421
6429
|
for (let index = 0; index < page.rlevels.length; index++) {
|
|
@@ -6945,6 +6953,7 @@ async function getSchemaFromParquetReader(reader) {
|
|
|
6945
6953
|
async function parseParquetFile(file, options) {
|
|
6946
6954
|
var _a, _b;
|
|
6947
6955
|
installBufferPolyfill();
|
|
6956
|
+
await preloadCompressions(options);
|
|
6948
6957
|
const reader = new ParquetReader(file, {
|
|
6949
6958
|
preserveBinary: (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.preserveBinary
|
|
6950
6959
|
});
|
|
@@ -6966,6 +6975,8 @@ async function parseParquetFile(file, options) {
|
|
|
6966
6975
|
}
|
|
6967
6976
|
async function* parseParquetFileInBatches(file, options) {
|
|
6968
6977
|
var _a, _b;
|
|
6978
|
+
installBufferPolyfill();
|
|
6979
|
+
await preloadCompressions(options);
|
|
6969
6980
|
const reader = new ParquetReader(file, {
|
|
6970
6981
|
preserveBinary: (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.preserveBinary
|
|
6971
6982
|
});
|
|
@@ -7057,6 +7068,7 @@ function convertBatch(objectRowBatch, shape) {
|
|
|
7057
7068
|
// dist/lib/parsers/parse-parquet-to-columns.js
|
|
7058
7069
|
async function parseParquetFileInColumns(file, options) {
|
|
7059
7070
|
installBufferPolyfill();
|
|
7071
|
+
await preloadCompressions(options);
|
|
7060
7072
|
for await (const batch of parseParquetFileInColumnarBatches(file, options)) {
|
|
7061
7073
|
return {
|
|
7062
7074
|
shape: "columnar-table",
|
|
@@ -7067,6 +7079,8 @@ async function parseParquetFileInColumns(file, options) {
|
|
|
7067
7079
|
throw new Error("empty table");
|
|
7068
7080
|
}
|
|
7069
7081
|
async function* parseParquetFileInColumnarBatches(file, options) {
|
|
7082
|
+
installBufferPolyfill();
|
|
7083
|
+
await preloadCompressions(options);
|
|
7070
7084
|
const reader = new ParquetReader(file);
|
|
7071
7085
|
const schema = await getSchemaFromParquetReader(reader);
|
|
7072
7086
|
const parquetSchema = await reader.getSchema();
|
|
@@ -7087,8 +7101,10 @@ function convertRowGroupToTableBatch(rowGroup, parquetSchema, schema) {
|
|
|
7087
7101
|
}
|
|
7088
7102
|
|
|
7089
7103
|
// dist/parquet-loader.js
|
|
7090
|
-
var VERSION2 = true ? "4.2.0-
|
|
7104
|
+
var VERSION2 = true ? "4.2.0-beta.1" : "latest";
|
|
7091
7105
|
var ParquetWorkerLoader = {
|
|
7106
|
+
dataType: null,
|
|
7107
|
+
batchType: null,
|
|
7092
7108
|
name: "Apache Parquet",
|
|
7093
7109
|
id: "parquet",
|
|
7094
7110
|
module: "parquet",
|
|
@@ -7111,12 +7127,16 @@ var ParquetWorkerLoader = {
|
|
|
7111
7127
|
};
|
|
7112
7128
|
var ParquetLoader = {
|
|
7113
7129
|
...ParquetWorkerLoader,
|
|
7114
|
-
|
|
7130
|
+
dataType: null,
|
|
7131
|
+
batchType: null,
|
|
7132
|
+
parse: (arrayBuffer, options) => parseParquetFile(new import_loader_utils2.BlobFile(arrayBuffer), options),
|
|
7115
7133
|
parseFile: parseParquetFile,
|
|
7116
7134
|
parseFileInBatches: parseParquetFileInBatches
|
|
7117
7135
|
};
|
|
7118
7136
|
ParquetLoader.Buffer = Buffer;
|
|
7119
7137
|
var GeoParquetWorkerLoader = {
|
|
7138
|
+
dataType: null,
|
|
7139
|
+
batchType: null,
|
|
7120
7140
|
name: "Apache Parquet",
|
|
7121
7141
|
id: "parquet",
|
|
7122
7142
|
module: "parquet",
|
|
@@ -7140,12 +7160,14 @@ var GeoParquetWorkerLoader = {
|
|
|
7140
7160
|
var GeoParquetLoader = {
|
|
7141
7161
|
...GeoParquetWorkerLoader,
|
|
7142
7162
|
parse(arrayBuffer, options) {
|
|
7143
|
-
return parseGeoParquetFile(new
|
|
7163
|
+
return parseGeoParquetFile(new import_loader_utils2.BlobFile(arrayBuffer), options);
|
|
7144
7164
|
},
|
|
7145
7165
|
parseFile: parseGeoParquetFile,
|
|
7146
7166
|
parseFileInBatches: parseGeoParquetFileInBatches
|
|
7147
7167
|
};
|
|
7148
7168
|
var ParquetColumnarWorkerLoader = {
|
|
7169
|
+
dataType: null,
|
|
7170
|
+
batchType: null,
|
|
7149
7171
|
name: "Apache Parquet",
|
|
7150
7172
|
id: "parquet",
|
|
7151
7173
|
module: "parquet",
|
|
@@ -7161,14 +7183,14 @@ var ParquetColumnarWorkerLoader = {
|
|
|
7161
7183
|
var ParquetColumnarLoader = {
|
|
7162
7184
|
...ParquetColumnarWorkerLoader,
|
|
7163
7185
|
parse(arrayBuffer, options) {
|
|
7164
|
-
return parseParquetFileInColumns(new
|
|
7186
|
+
return parseParquetFileInColumns(new import_loader_utils2.BlobFile(arrayBuffer), options);
|
|
7165
7187
|
},
|
|
7166
7188
|
parseFile: parseParquetFileInColumns,
|
|
7167
7189
|
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
7168
7190
|
};
|
|
7169
7191
|
|
|
7170
7192
|
// dist/parquet-writer.js
|
|
7171
|
-
var VERSION3 = true ? "4.2.0-
|
|
7193
|
+
var VERSION3 = true ? "4.2.0-beta.1" : "latest";
|
|
7172
7194
|
var ParquetWriter = {
|
|
7173
7195
|
name: "Apache Parquet",
|
|
7174
7196
|
id: "parquet",
|
|
@@ -7225,6 +7247,8 @@ async function parseParquetWasm(arrayBuffer, options) {
|
|
|
7225
7247
|
|
|
7226
7248
|
// dist/parquet-wasm-loader.js
|
|
7227
7249
|
var ParquetWasmWorkerLoader = {
|
|
7250
|
+
dataType: null,
|
|
7251
|
+
batchType: null,
|
|
7228
7252
|
name: "Apache Parquet",
|
|
7229
7253
|
id: "parquet-wasm",
|
|
7230
7254
|
module: "parquet",
|