@loaders.gl/parquet 4.2.1 → 4.3.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +112 -24
- package/dist/index.cjs.map +4 -4
- package/dist/lib/constants.d.ts +1 -1
- package/dist/lib/constants.d.ts.map +1 -1
- package/dist/lib/constants.js +2 -2
- package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/{wasm → encoders}/encode-parquet-wasm.js +1 -1
- package/dist/lib/parsers/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-wasm.js +51 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet.js +10 -0
- package/dist/lib/utils/load-wasm.d.ts +3 -0
- package/dist/lib/utils/load-wasm.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.d.ts +11 -0
- package/dist/lib/utils/make-stream-iterator.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.js +67 -0
- package/dist/parquet-loader.js +1 -1
- package/dist/parquet-wasm-loader.d.ts +17 -5
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +19 -4
- package/dist/parquet-wasm-writer.js +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquet_wasm_bg.wasm +0 -0
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +1 -0
- package/package.json +13 -12
- package/src/lib/constants.ts +2 -1
- package/src/lib/{wasm → encoders}/encode-parquet-wasm.ts +1 -1
- package/src/lib/parsers/parse-parquet-wasm.ts +72 -0
- package/src/lib/parsers/parse-parquet.ts +10 -0
- package/src/lib/utils/make-stream-iterator.ts +87 -0
- package/src/parquet-wasm-loader.ts +36 -9
- package/src/parquet-wasm-writer.ts +1 -1
- package/src/parquetjs/codecs/rle.ts +3 -1
- package/dist/arrow1_bg.wasm +0 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm.d.ts +0 -3
- package/dist/lib/wasm/load-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +0 -4
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +0 -24
- package/src/lib/wasm/parse-parquet-wasm.ts +0 -33
- package/src/lib/wip/convert-schema-deep.java.disabled +0 -910
- package/src/lib/wip/convert-schema-deep.rs.disabled +0 -976
- /package/dist/lib/{wasm → encoders}/encode-parquet-wasm.d.ts +0 -0
- /package/dist/lib/{wasm → utils}/load-wasm.js +0 -0
- /package/src/lib/{wasm → utils}/load-wasm.ts +0 -0
package/dist/index.cjs
CHANGED
|
@@ -1745,6 +1745,9 @@ var Buffer3 = installBufferPolyfill();
|
|
|
1745
1745
|
// dist/parquet-loader.js
|
|
1746
1746
|
var import_loader_utils2 = require("@loaders.gl/loader-utils");
|
|
1747
1747
|
|
|
1748
|
+
// dist/lib/parsers/parse-parquet.js
|
|
1749
|
+
var import_log = __toESM(require("@probe.gl/log"), 1);
|
|
1750
|
+
|
|
1748
1751
|
// dist/parquetjs/codecs/plain.js
|
|
1749
1752
|
var import_int53 = __toESM(require("int53"), 1);
|
|
1750
1753
|
function encodeValues(type, values, opts) {
|
|
@@ -6320,8 +6323,8 @@ var FileMetaData = class {
|
|
|
6320
6323
|
};
|
|
6321
6324
|
|
|
6322
6325
|
// dist/lib/constants.js
|
|
6323
|
-
var VERSION = true ? "4.
|
|
6324
|
-
var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.
|
|
6326
|
+
var VERSION = true ? "4.3.0-alpha.1" : "latest";
|
|
6327
|
+
var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.1/esm/parquet_wasm_bg.wasm";
|
|
6325
6328
|
var PARQUET_MAGIC = "PAR1";
|
|
6326
6329
|
var PARQUET_MAGIC_ENCRYPTED = "PARE";
|
|
6327
6330
|
var PARQUET_RDLVL_TYPE = "INT32";
|
|
@@ -6961,9 +6964,18 @@ async function parseParquetFile(file, options) {
|
|
|
6961
6964
|
const rows = [];
|
|
6962
6965
|
const rowBatches = reader.rowBatchIterator(options == null ? void 0 : options.parquet);
|
|
6963
6966
|
for await (const rowBatch of rowBatches) {
|
|
6967
|
+
let limitHasReached = false;
|
|
6964
6968
|
for (const row of rowBatch) {
|
|
6969
|
+
if ((options == null ? void 0 : options.limit) && rows.length >= (options == null ? void 0 : options.limit)) {
|
|
6970
|
+
limitHasReached = true;
|
|
6971
|
+
break;
|
|
6972
|
+
}
|
|
6965
6973
|
rows.push(row);
|
|
6966
6974
|
}
|
|
6975
|
+
if (limitHasReached) {
|
|
6976
|
+
import_log.default.warn(`Rows number limit has been reached. Only first ${options == null ? void 0 : options.limit} are loaded`)();
|
|
6977
|
+
break;
|
|
6978
|
+
}
|
|
6967
6979
|
}
|
|
6968
6980
|
const objectRowTable = {
|
|
6969
6981
|
shape: "object-row-table",
|
|
@@ -7101,7 +7113,7 @@ function convertRowGroupToTableBatch(rowGroup, parquetSchema, schema) {
|
|
|
7101
7113
|
}
|
|
7102
7114
|
|
|
7103
7115
|
// dist/parquet-loader.js
|
|
7104
|
-
var VERSION2 = true ? "4.
|
|
7116
|
+
var VERSION2 = true ? "4.3.0-alpha.1" : "latest";
|
|
7105
7117
|
var ParquetWorkerLoader = {
|
|
7106
7118
|
dataType: null,
|
|
7107
7119
|
batchType: null,
|
|
@@ -7190,7 +7202,7 @@ var ParquetColumnarLoader = {
|
|
|
7190
7202
|
};
|
|
7191
7203
|
|
|
7192
7204
|
// dist/parquet-writer.js
|
|
7193
|
-
var VERSION3 = true ? "4.
|
|
7205
|
+
var VERSION3 = true ? "4.3.0-alpha.1" : "latest";
|
|
7194
7206
|
var ParquetWriter = {
|
|
7195
7207
|
name: "Apache Parquet",
|
|
7196
7208
|
id: "parquet",
|
|
@@ -7207,10 +7219,14 @@ function encodeSync(data, options) {
|
|
|
7207
7219
|
return new ArrayBuffer(0);
|
|
7208
7220
|
}
|
|
7209
7221
|
|
|
7210
|
-
// dist/
|
|
7222
|
+
// dist/parquet-wasm-loader.js
|
|
7223
|
+
var import_loader_utils4 = require("@loaders.gl/loader-utils");
|
|
7224
|
+
|
|
7225
|
+
// dist/lib/parsers/parse-parquet-wasm.js
|
|
7211
7226
|
var import_arrow = require("@loaders.gl/arrow");
|
|
7227
|
+
var arrow = __toESM(require("apache-arrow"), 1);
|
|
7212
7228
|
|
|
7213
|
-
// dist/lib/
|
|
7229
|
+
// dist/lib/utils/load-wasm.js
|
|
7214
7230
|
var import_parquet_wasm = __toESM(require("parquet-wasm"), 1);
|
|
7215
7231
|
var parquetWasm = __toESM(require("parquet-wasm"), 1);
|
|
7216
7232
|
var initializePromise;
|
|
@@ -7225,23 +7241,75 @@ async function loadWasm(wasmUrl = PARQUET_WASM_URL) {
|
|
|
7225
7241
|
return parquetWasm;
|
|
7226
7242
|
}
|
|
7227
7243
|
|
|
7228
|
-
// dist/lib/
|
|
7229
|
-
var
|
|
7230
|
-
|
|
7231
|
-
|
|
7232
|
-
|
|
7233
|
-
|
|
7234
|
-
const
|
|
7235
|
-
|
|
7244
|
+
// dist/lib/utils/make-stream-iterator.js
|
|
7245
|
+
var import_loader_utils3 = require("@loaders.gl/loader-utils");
|
|
7246
|
+
function makeStreamIterator(stream, options) {
|
|
7247
|
+
return import_loader_utils3.isBrowser ? makeBrowserStreamIterator(stream, options) : makeNodeStreamIterator(stream, options);
|
|
7248
|
+
}
|
|
7249
|
+
async function* makeBrowserStreamIterator(stream, options) {
|
|
7250
|
+
const reader = stream.getReader();
|
|
7251
|
+
let nextBatchPromise;
|
|
7236
7252
|
try {
|
|
7237
|
-
|
|
7238
|
-
|
|
7239
|
-
|
|
7253
|
+
while (true) {
|
|
7254
|
+
const currentBatchPromise = nextBatchPromise || reader.read();
|
|
7255
|
+
if (options == null ? void 0 : options._streamReadAhead) {
|
|
7256
|
+
nextBatchPromise = reader.read();
|
|
7257
|
+
}
|
|
7258
|
+
const { done, value } = await currentBatchPromise;
|
|
7259
|
+
if (done) {
|
|
7260
|
+
return;
|
|
7261
|
+
}
|
|
7262
|
+
if (value) {
|
|
7263
|
+
yield value;
|
|
7264
|
+
}
|
|
7265
|
+
}
|
|
7266
|
+
} catch (error) {
|
|
7267
|
+
reader.releaseLock();
|
|
7268
|
+
}
|
|
7269
|
+
}
|
|
7270
|
+
async function* makeNodeStreamIterator(stream, options) {
|
|
7271
|
+
yield* stream;
|
|
7272
|
+
}
|
|
7273
|
+
|
|
7274
|
+
// dist/lib/parsers/parse-parquet-wasm.js
|
|
7275
|
+
async function parseParquetFileWasm(file, options) {
|
|
7276
|
+
const wasmUrl = options == null ? void 0 : options.wasmUrl;
|
|
7277
|
+
const wasm = await loadWasm(wasmUrl);
|
|
7278
|
+
let parquetFile;
|
|
7279
|
+
if (file.handle instanceof Blob) {
|
|
7280
|
+
parquetFile = await wasm.ParquetFile.fromFile(file.handle);
|
|
7281
|
+
} else {
|
|
7282
|
+
parquetFile = await wasm.ParquetFile.fromUrl(file.url);
|
|
7283
|
+
}
|
|
7284
|
+
const wasmTable = await parquetFile.read(options);
|
|
7285
|
+
const ipcStream = wasmTable.intoIPCStream();
|
|
7286
|
+
const arrowTable = arrow.tableFromIPC(ipcStream);
|
|
7287
|
+
return {
|
|
7288
|
+
shape: "arrow-table",
|
|
7289
|
+
schema: (0, import_arrow.serializeArrowSchema)(arrowTable.schema),
|
|
7290
|
+
data: arrowTable
|
|
7291
|
+
};
|
|
7292
|
+
}
|
|
7293
|
+
async function* parseParquetFileInBatchesWasm(file, options) {
|
|
7294
|
+
const wasmUrl = options == null ? void 0 : options.wasmUrl;
|
|
7295
|
+
const wasm = await loadWasm(wasmUrl);
|
|
7296
|
+
let parquetFile;
|
|
7297
|
+
if (file.handle instanceof Blob) {
|
|
7298
|
+
parquetFile = await wasm.ParquetFile.fromFile(file.handle);
|
|
7299
|
+
} else {
|
|
7300
|
+
parquetFile = await wasm.ParquetFile.fromUrl(file.url);
|
|
7301
|
+
}
|
|
7302
|
+
const stream = await parquetFile.stream(options);
|
|
7303
|
+
let schema;
|
|
7304
|
+
for await (const table of makeStreamIterator(stream)) {
|
|
7305
|
+
schema ||= (0, import_arrow.serializeArrowSchema)(table.schema);
|
|
7306
|
+
yield {
|
|
7307
|
+
batchType: "data",
|
|
7240
7308
|
shape: "arrow-table",
|
|
7241
|
-
schema
|
|
7242
|
-
data:
|
|
7309
|
+
schema,
|
|
7310
|
+
data: table.batches[0],
|
|
7311
|
+
length: table.numRows
|
|
7243
7312
|
};
|
|
7244
|
-
} finally {
|
|
7245
7313
|
}
|
|
7246
7314
|
}
|
|
7247
7315
|
|
|
@@ -7261,7 +7329,19 @@ var ParquetWasmWorkerLoader = {
|
|
|
7261
7329
|
tests: ["PAR1", "PARE"],
|
|
7262
7330
|
options: {
|
|
7263
7331
|
parquet: {
|
|
7264
|
-
|
|
7332
|
+
shape: "arrow-table",
|
|
7333
|
+
limit: void 0,
|
|
7334
|
+
// Provide a limit to the number of rows to be read.
|
|
7335
|
+
offset: 0,
|
|
7336
|
+
// Provide an offset to skip over the given number of rows.
|
|
7337
|
+
batchSize: void 0,
|
|
7338
|
+
// The number of rows in each batch. If not provided, the upstream parquet default is 1024.
|
|
7339
|
+
columns: void 0,
|
|
7340
|
+
// The column names from the file to read.
|
|
7341
|
+
rowGroups: void 0,
|
|
7342
|
+
// Only read data from the provided row group indexes.
|
|
7343
|
+
concurrency: void 0,
|
|
7344
|
+
// The number of concurrent requests to make
|
|
7265
7345
|
wasmUrl: PARQUET_WASM_URL
|
|
7266
7346
|
}
|
|
7267
7347
|
}
|
|
@@ -7269,12 +7349,20 @@ var ParquetWasmWorkerLoader = {
|
|
|
7269
7349
|
var ParquetWasmLoader = {
|
|
7270
7350
|
...ParquetWasmWorkerLoader,
|
|
7271
7351
|
parse(arrayBuffer, options) {
|
|
7272
|
-
|
|
7273
|
-
return
|
|
7352
|
+
const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
|
|
7353
|
+
return parseParquetFileWasm(new import_loader_utils4.BlobFile(arrayBuffer), wasmOptions);
|
|
7354
|
+
},
|
|
7355
|
+
parseFile(file, options) {
|
|
7356
|
+
const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
|
|
7357
|
+
return parseParquetFileWasm(file, wasmOptions);
|
|
7358
|
+
},
|
|
7359
|
+
parseFileInBatches(file, options) {
|
|
7360
|
+
const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
|
|
7361
|
+
return parseParquetFileInBatchesWasm(file, wasmOptions);
|
|
7274
7362
|
}
|
|
7275
7363
|
};
|
|
7276
7364
|
|
|
7277
|
-
// dist/lib/
|
|
7365
|
+
// dist/lib/encoders/encode-parquet-wasm.js
|
|
7278
7366
|
var arrow2 = __toESM(require("apache-arrow"), 1);
|
|
7279
7367
|
async function encode(table, options) {
|
|
7280
7368
|
var _a;
|