@loaders.gl/parquet 4.3.0-alpha.1 → 4.3.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +100 -24
- package/dist/index.cjs.map +4 -4
- package/dist/lib/constants.d.ts +1 -1
- package/dist/lib/constants.d.ts.map +1 -1
- package/dist/lib/constants.js +2 -2
- package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/{wasm → encoders}/encode-parquet-wasm.js +1 -1
- package/dist/lib/parsers/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-wasm.js +51 -0
- package/dist/lib/utils/load-wasm.d.ts +3 -0
- package/dist/lib/utils/load-wasm.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.d.ts +11 -0
- package/dist/lib/utils/make-stream-iterator.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.js +67 -0
- package/dist/parquet-loader.js +1 -1
- package/dist/parquet-wasm-loader.d.ts +17 -5
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +19 -4
- package/dist/parquet-wasm-writer.js +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquet_wasm_bg.wasm +0 -0
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +1 -0
- package/package.json +12 -12
- package/src/lib/constants.ts +2 -1
- package/src/lib/{wasm → encoders}/encode-parquet-wasm.ts +1 -1
- package/src/lib/parsers/parse-parquet-wasm.ts +72 -0
- package/src/lib/utils/make-stream-iterator.ts +87 -0
- package/src/parquet-wasm-loader.ts +36 -9
- package/src/parquet-wasm-writer.ts +1 -1
- package/src/parquetjs/codecs/rle.ts +3 -1
- package/dist/arrow1_bg.wasm +0 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm.d.ts +0 -3
- package/dist/lib/wasm/load-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +0 -4
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +0 -24
- package/src/lib/wasm/parse-parquet-wasm.ts +0 -33
- package/src/lib/wip/convert-schema-deep.java.disabled +0 -910
- package/src/lib/wip/convert-schema-deep.rs.disabled +0 -976
- /package/dist/lib/{wasm → encoders}/encode-parquet-wasm.d.ts +0 -0
- /package/dist/lib/{wasm → utils}/load-wasm.js +0 -0
- /package/src/lib/{wasm → utils}/load-wasm.ts +0 -0
package/dist/index.cjs
CHANGED
|
@@ -6323,8 +6323,8 @@ var FileMetaData = class {
|
|
|
6323
6323
|
};
|
|
6324
6324
|
|
|
6325
6325
|
// dist/lib/constants.js
|
|
6326
|
-
var VERSION = true ? "4.
|
|
6327
|
-
var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.
|
|
6326
|
+
var VERSION = true ? "4.3.0-alpha.2" : "latest";
|
|
6327
|
+
var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.1/esm/parquet_wasm_bg.wasm";
|
|
6328
6328
|
var PARQUET_MAGIC = "PAR1";
|
|
6329
6329
|
var PARQUET_MAGIC_ENCRYPTED = "PARE";
|
|
6330
6330
|
var PARQUET_RDLVL_TYPE = "INT32";
|
|
@@ -7113,7 +7113,7 @@ function convertRowGroupToTableBatch(rowGroup, parquetSchema, schema) {
|
|
|
7113
7113
|
}
|
|
7114
7114
|
|
|
7115
7115
|
// dist/parquet-loader.js
|
|
7116
|
-
var VERSION2 = true ? "4.
|
|
7116
|
+
var VERSION2 = true ? "4.3.0-alpha.2" : "latest";
|
|
7117
7117
|
var ParquetWorkerLoader = {
|
|
7118
7118
|
dataType: null,
|
|
7119
7119
|
batchType: null,
|
|
@@ -7202,7 +7202,7 @@ var ParquetColumnarLoader = {
|
|
|
7202
7202
|
};
|
|
7203
7203
|
|
|
7204
7204
|
// dist/parquet-writer.js
|
|
7205
|
-
var VERSION3 = true ? "4.
|
|
7205
|
+
var VERSION3 = true ? "4.3.0-alpha.2" : "latest";
|
|
7206
7206
|
var ParquetWriter = {
|
|
7207
7207
|
name: "Apache Parquet",
|
|
7208
7208
|
id: "parquet",
|
|
@@ -7219,10 +7219,14 @@ function encodeSync(data, options) {
|
|
|
7219
7219
|
return new ArrayBuffer(0);
|
|
7220
7220
|
}
|
|
7221
7221
|
|
|
7222
|
-
// dist/
|
|
7222
|
+
// dist/parquet-wasm-loader.js
|
|
7223
|
+
var import_loader_utils4 = require("@loaders.gl/loader-utils");
|
|
7224
|
+
|
|
7225
|
+
// dist/lib/parsers/parse-parquet-wasm.js
|
|
7223
7226
|
var import_arrow = require("@loaders.gl/arrow");
|
|
7227
|
+
var arrow = __toESM(require("apache-arrow"), 1);
|
|
7224
7228
|
|
|
7225
|
-
// dist/lib/
|
|
7229
|
+
// dist/lib/utils/load-wasm.js
|
|
7226
7230
|
var import_parquet_wasm = __toESM(require("parquet-wasm"), 1);
|
|
7227
7231
|
var parquetWasm = __toESM(require("parquet-wasm"), 1);
|
|
7228
7232
|
var initializePromise;
|
|
@@ -7237,23 +7241,75 @@ async function loadWasm(wasmUrl = PARQUET_WASM_URL) {
|
|
|
7237
7241
|
return parquetWasm;
|
|
7238
7242
|
}
|
|
7239
7243
|
|
|
7240
|
-
// dist/lib/
|
|
7241
|
-
var
|
|
7242
|
-
|
|
7243
|
-
|
|
7244
|
-
|
|
7245
|
-
|
|
7246
|
-
const
|
|
7247
|
-
|
|
7244
|
+
// dist/lib/utils/make-stream-iterator.js
|
|
7245
|
+
var import_loader_utils3 = require("@loaders.gl/loader-utils");
|
|
7246
|
+
function makeStreamIterator(stream, options) {
|
|
7247
|
+
return import_loader_utils3.isBrowser ? makeBrowserStreamIterator(stream, options) : makeNodeStreamIterator(stream, options);
|
|
7248
|
+
}
|
|
7249
|
+
async function* makeBrowserStreamIterator(stream, options) {
|
|
7250
|
+
const reader = stream.getReader();
|
|
7251
|
+
let nextBatchPromise;
|
|
7248
7252
|
try {
|
|
7249
|
-
|
|
7250
|
-
|
|
7251
|
-
|
|
7253
|
+
while (true) {
|
|
7254
|
+
const currentBatchPromise = nextBatchPromise || reader.read();
|
|
7255
|
+
if (options == null ? void 0 : options._streamReadAhead) {
|
|
7256
|
+
nextBatchPromise = reader.read();
|
|
7257
|
+
}
|
|
7258
|
+
const { done, value } = await currentBatchPromise;
|
|
7259
|
+
if (done) {
|
|
7260
|
+
return;
|
|
7261
|
+
}
|
|
7262
|
+
if (value) {
|
|
7263
|
+
yield value;
|
|
7264
|
+
}
|
|
7265
|
+
}
|
|
7266
|
+
} catch (error) {
|
|
7267
|
+
reader.releaseLock();
|
|
7268
|
+
}
|
|
7269
|
+
}
|
|
7270
|
+
async function* makeNodeStreamIterator(stream, options) {
|
|
7271
|
+
yield* stream;
|
|
7272
|
+
}
|
|
7273
|
+
|
|
7274
|
+
// dist/lib/parsers/parse-parquet-wasm.js
|
|
7275
|
+
async function parseParquetFileWasm(file, options) {
|
|
7276
|
+
const wasmUrl = options == null ? void 0 : options.wasmUrl;
|
|
7277
|
+
const wasm = await loadWasm(wasmUrl);
|
|
7278
|
+
let parquetFile;
|
|
7279
|
+
if (file.handle instanceof Blob) {
|
|
7280
|
+
parquetFile = await wasm.ParquetFile.fromFile(file.handle);
|
|
7281
|
+
} else {
|
|
7282
|
+
parquetFile = await wasm.ParquetFile.fromUrl(file.url);
|
|
7283
|
+
}
|
|
7284
|
+
const wasmTable = await parquetFile.read(options);
|
|
7285
|
+
const ipcStream = wasmTable.intoIPCStream();
|
|
7286
|
+
const arrowTable = arrow.tableFromIPC(ipcStream);
|
|
7287
|
+
return {
|
|
7288
|
+
shape: "arrow-table",
|
|
7289
|
+
schema: (0, import_arrow.serializeArrowSchema)(arrowTable.schema),
|
|
7290
|
+
data: arrowTable
|
|
7291
|
+
};
|
|
7292
|
+
}
|
|
7293
|
+
async function* parseParquetFileInBatchesWasm(file, options) {
|
|
7294
|
+
const wasmUrl = options == null ? void 0 : options.wasmUrl;
|
|
7295
|
+
const wasm = await loadWasm(wasmUrl);
|
|
7296
|
+
let parquetFile;
|
|
7297
|
+
if (file.handle instanceof Blob) {
|
|
7298
|
+
parquetFile = await wasm.ParquetFile.fromFile(file.handle);
|
|
7299
|
+
} else {
|
|
7300
|
+
parquetFile = await wasm.ParquetFile.fromUrl(file.url);
|
|
7301
|
+
}
|
|
7302
|
+
const stream = await parquetFile.stream(options);
|
|
7303
|
+
let schema;
|
|
7304
|
+
for await (const table of makeStreamIterator(stream)) {
|
|
7305
|
+
schema ||= (0, import_arrow.serializeArrowSchema)(table.schema);
|
|
7306
|
+
yield {
|
|
7307
|
+
batchType: "data",
|
|
7252
7308
|
shape: "arrow-table",
|
|
7253
|
-
schema
|
|
7254
|
-
data:
|
|
7309
|
+
schema,
|
|
7310
|
+
data: table.batches[0],
|
|
7311
|
+
length: table.numRows
|
|
7255
7312
|
};
|
|
7256
|
-
} finally {
|
|
7257
7313
|
}
|
|
7258
7314
|
}
|
|
7259
7315
|
|
|
@@ -7273,7 +7329,19 @@ var ParquetWasmWorkerLoader = {
|
|
|
7273
7329
|
tests: ["PAR1", "PARE"],
|
|
7274
7330
|
options: {
|
|
7275
7331
|
parquet: {
|
|
7276
|
-
|
|
7332
|
+
shape: "arrow-table",
|
|
7333
|
+
limit: void 0,
|
|
7334
|
+
// Provide a limit to the number of rows to be read.
|
|
7335
|
+
offset: 0,
|
|
7336
|
+
// Provide an offset to skip over the given number of rows.
|
|
7337
|
+
batchSize: void 0,
|
|
7338
|
+
// The number of rows in each batch. If not provided, the upstream parquet default is 1024.
|
|
7339
|
+
columns: void 0,
|
|
7340
|
+
// The column names from the file to read.
|
|
7341
|
+
rowGroups: void 0,
|
|
7342
|
+
// Only read data from the provided row group indexes.
|
|
7343
|
+
concurrency: void 0,
|
|
7344
|
+
// The number of concurrent requests to make
|
|
7277
7345
|
wasmUrl: PARQUET_WASM_URL
|
|
7278
7346
|
}
|
|
7279
7347
|
}
|
|
@@ -7281,12 +7349,20 @@ var ParquetWasmWorkerLoader = {
|
|
|
7281
7349
|
var ParquetWasmLoader = {
|
|
7282
7350
|
...ParquetWasmWorkerLoader,
|
|
7283
7351
|
parse(arrayBuffer, options) {
|
|
7284
|
-
|
|
7285
|
-
return
|
|
7352
|
+
const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
|
|
7353
|
+
return parseParquetFileWasm(new import_loader_utils4.BlobFile(arrayBuffer), wasmOptions);
|
|
7354
|
+
},
|
|
7355
|
+
parseFile(file, options) {
|
|
7356
|
+
const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
|
|
7357
|
+
return parseParquetFileWasm(file, wasmOptions);
|
|
7358
|
+
},
|
|
7359
|
+
parseFileInBatches(file, options) {
|
|
7360
|
+
const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
|
|
7361
|
+
return parseParquetFileInBatchesWasm(file, wasmOptions);
|
|
7286
7362
|
}
|
|
7287
7363
|
};
|
|
7288
7364
|
|
|
7289
|
-
// dist/lib/
|
|
7365
|
+
// dist/lib/encoders/encode-parquet-wasm.js
|
|
7290
7366
|
var arrow2 = __toESM(require("apache-arrow"), 1);
|
|
7291
7367
|
async function encode(table, options) {
|
|
7292
7368
|
var _a;
|