@loaders.gl/parquet 4.3.0-alpha.1 → 4.3.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/index.cjs +100 -24
  2. package/dist/index.cjs.map +4 -4
  3. package/dist/lib/constants.d.ts +1 -1
  4. package/dist/lib/constants.d.ts.map +1 -1
  5. package/dist/lib/constants.js +2 -2
  6. package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +1 -0
  7. package/dist/lib/{wasm → encoders}/encode-parquet-wasm.js +1 -1
  8. package/dist/lib/parsers/parse-parquet-wasm.d.ts +10 -0
  9. package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +1 -0
  10. package/dist/lib/parsers/parse-parquet-wasm.js +51 -0
  11. package/dist/lib/utils/load-wasm.d.ts +3 -0
  12. package/dist/lib/utils/load-wasm.d.ts.map +1 -0
  13. package/dist/lib/utils/make-stream-iterator.d.ts +11 -0
  14. package/dist/lib/utils/make-stream-iterator.d.ts.map +1 -0
  15. package/dist/lib/utils/make-stream-iterator.js +67 -0
  16. package/dist/parquet-loader.js +1 -1
  17. package/dist/parquet-wasm-loader.d.ts +17 -5
  18. package/dist/parquet-wasm-loader.d.ts.map +1 -1
  19. package/dist/parquet-wasm-loader.js +19 -4
  20. package/dist/parquet-wasm-writer.js +1 -1
  21. package/dist/parquet-writer.js +1 -1
  22. package/dist/parquet_wasm_bg.wasm +0 -0
  23. package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
  24. package/dist/parquetjs/codecs/rle.js +1 -0
  25. package/package.json +12 -12
  26. package/src/lib/constants.ts +2 -1
  27. package/src/lib/{wasm → encoders}/encode-parquet-wasm.ts +1 -1
  28. package/src/lib/parsers/parse-parquet-wasm.ts +72 -0
  29. package/src/lib/utils/make-stream-iterator.ts +87 -0
  30. package/src/parquet-wasm-loader.ts +36 -9
  31. package/src/parquet-wasm-writer.ts +1 -1
  32. package/src/parquetjs/codecs/rle.ts +3 -1
  33. package/dist/arrow1_bg.wasm +0 -0
  34. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
  35. package/dist/lib/wasm/load-wasm.d.ts +0 -3
  36. package/dist/lib/wasm/load-wasm.d.ts.map +0 -1
  37. package/dist/lib/wasm/parse-parquet-wasm.d.ts +0 -4
  38. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
  39. package/dist/lib/wasm/parse-parquet-wasm.js +0 -24
  40. package/src/lib/wasm/parse-parquet-wasm.ts +0 -33
  41. package/src/lib/wip/convert-schema-deep.java.disabled +0 -910
  42. package/src/lib/wip/convert-schema-deep.rs.disabled +0 -976
  43. /package/dist/lib/{wasm → encoders}/encode-parquet-wasm.d.ts +0 -0
  44. /package/dist/lib/{wasm → utils}/load-wasm.js +0 -0
  45. /package/src/lib/{wasm → utils}/load-wasm.ts +0 -0
package/dist/index.cjs CHANGED
@@ -6323,8 +6323,8 @@ var FileMetaData = class {
6323
6323
  };
6324
6324
 
6325
6325
  // dist/lib/constants.js
6326
- var VERSION = true ? "4.2.0" : "latest";
6327
- var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.0-beta.1/esm/arrow1_bg.wasm";
6326
+ var VERSION = true ? "4.3.0-alpha.2" : "latest";
6327
+ var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.1/esm/parquet_wasm_bg.wasm";
6328
6328
  var PARQUET_MAGIC = "PAR1";
6329
6329
  var PARQUET_MAGIC_ENCRYPTED = "PARE";
6330
6330
  var PARQUET_RDLVL_TYPE = "INT32";
@@ -7113,7 +7113,7 @@ function convertRowGroupToTableBatch(rowGroup, parquetSchema, schema) {
7113
7113
  }
7114
7114
 
7115
7115
  // dist/parquet-loader.js
7116
- var VERSION2 = true ? "4.2.0" : "latest";
7116
+ var VERSION2 = true ? "4.3.0-alpha.2" : "latest";
7117
7117
  var ParquetWorkerLoader = {
7118
7118
  dataType: null,
7119
7119
  batchType: null,
@@ -7202,7 +7202,7 @@ var ParquetColumnarLoader = {
7202
7202
  };
7203
7203
 
7204
7204
  // dist/parquet-writer.js
7205
- var VERSION3 = true ? "4.2.0" : "latest";
7205
+ var VERSION3 = true ? "4.3.0-alpha.2" : "latest";
7206
7206
  var ParquetWriter = {
7207
7207
  name: "Apache Parquet",
7208
7208
  id: "parquet",
@@ -7219,10 +7219,14 @@ function encodeSync(data, options) {
7219
7219
  return new ArrayBuffer(0);
7220
7220
  }
7221
7221
 
7222
- // dist/lib/wasm/parse-parquet-wasm.js
7222
+ // dist/parquet-wasm-loader.js
7223
+ var import_loader_utils4 = require("@loaders.gl/loader-utils");
7224
+
7225
+ // dist/lib/parsers/parse-parquet-wasm.js
7223
7226
  var import_arrow = require("@loaders.gl/arrow");
7227
+ var arrow = __toESM(require("apache-arrow"), 1);
7224
7228
 
7225
- // dist/lib/wasm/load-wasm.js
7229
+ // dist/lib/utils/load-wasm.js
7226
7230
  var import_parquet_wasm = __toESM(require("parquet-wasm"), 1);
7227
7231
  var parquetWasm = __toESM(require("parquet-wasm"), 1);
7228
7232
  var initializePromise;
@@ -7237,23 +7241,75 @@ async function loadWasm(wasmUrl = PARQUET_WASM_URL) {
7237
7241
  return parquetWasm;
7238
7242
  }
7239
7243
 
7240
- // dist/lib/wasm/parse-parquet-wasm.js
7241
- var arrow = __toESM(require("apache-arrow"), 1);
7242
- async function parseParquetWasm(arrayBuffer, options) {
7243
- var _a;
7244
- const arr = new Uint8Array(arrayBuffer);
7245
- const wasmUrl = (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.wasmUrl;
7246
- const wasm = await loadWasm(wasmUrl);
7247
- const wasmTable = wasm.readParquet(arr);
7244
+ // dist/lib/utils/make-stream-iterator.js
7245
+ var import_loader_utils3 = require("@loaders.gl/loader-utils");
7246
+ function makeStreamIterator(stream, options) {
7247
+ return import_loader_utils3.isBrowser ? makeBrowserStreamIterator(stream, options) : makeNodeStreamIterator(stream, options);
7248
+ }
7249
+ async function* makeBrowserStreamIterator(stream, options) {
7250
+ const reader = stream.getReader();
7251
+ let nextBatchPromise;
7248
7252
  try {
7249
- const ipcStream = wasmTable.intoIPCStream();
7250
- const arrowTable = arrow.tableFromIPC(ipcStream);
7251
- return {
7253
+ while (true) {
7254
+ const currentBatchPromise = nextBatchPromise || reader.read();
7255
+ if (options == null ? void 0 : options._streamReadAhead) {
7256
+ nextBatchPromise = reader.read();
7257
+ }
7258
+ const { done, value } = await currentBatchPromise;
7259
+ if (done) {
7260
+ return;
7261
+ }
7262
+ if (value) {
7263
+ yield value;
7264
+ }
7265
+ }
7266
+ } catch (error) {
7267
+ reader.releaseLock();
7268
+ }
7269
+ }
7270
+ async function* makeNodeStreamIterator(stream, options) {
7271
+ yield* stream;
7272
+ }
7273
+
7274
+ // dist/lib/parsers/parse-parquet-wasm.js
7275
+ async function parseParquetFileWasm(file, options) {
7276
+ const wasmUrl = options == null ? void 0 : options.wasmUrl;
7277
+ const wasm = await loadWasm(wasmUrl);
7278
+ let parquetFile;
7279
+ if (file.handle instanceof Blob) {
7280
+ parquetFile = await wasm.ParquetFile.fromFile(file.handle);
7281
+ } else {
7282
+ parquetFile = await wasm.ParquetFile.fromUrl(file.url);
7283
+ }
7284
+ const wasmTable = await parquetFile.read(options);
7285
+ const ipcStream = wasmTable.intoIPCStream();
7286
+ const arrowTable = arrow.tableFromIPC(ipcStream);
7287
+ return {
7288
+ shape: "arrow-table",
7289
+ schema: (0, import_arrow.serializeArrowSchema)(arrowTable.schema),
7290
+ data: arrowTable
7291
+ };
7292
+ }
7293
+ async function* parseParquetFileInBatchesWasm(file, options) {
7294
+ const wasmUrl = options == null ? void 0 : options.wasmUrl;
7295
+ const wasm = await loadWasm(wasmUrl);
7296
+ let parquetFile;
7297
+ if (file.handle instanceof Blob) {
7298
+ parquetFile = await wasm.ParquetFile.fromFile(file.handle);
7299
+ } else {
7300
+ parquetFile = await wasm.ParquetFile.fromUrl(file.url);
7301
+ }
7302
+ const stream = await parquetFile.stream(options);
7303
+ let schema;
7304
+ for await (const table of makeStreamIterator(stream)) {
7305
+ schema ||= (0, import_arrow.serializeArrowSchema)(table.schema);
7306
+ yield {
7307
+ batchType: "data",
7252
7308
  shape: "arrow-table",
7253
- schema: (0, import_arrow.serializeArrowSchema)(arrowTable.schema),
7254
- data: arrowTable
7309
+ schema,
7310
+ data: table.batches[0],
7311
+ length: table.numRows
7255
7312
  };
7256
- } finally {
7257
7313
  }
7258
7314
  }
7259
7315
 
@@ -7273,7 +7329,19 @@ var ParquetWasmWorkerLoader = {
7273
7329
  tests: ["PAR1", "PARE"],
7274
7330
  options: {
7275
7331
  parquet: {
7276
- type: "arrow-table",
7332
+ shape: "arrow-table",
7333
+ limit: void 0,
7334
+ // Provide a limit to the number of rows to be read.
7335
+ offset: 0,
7336
+ // Provide an offset to skip over the given number of rows.
7337
+ batchSize: void 0,
7338
+ // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
7339
+ columns: void 0,
7340
+ // The column names from the file to read.
7341
+ rowGroups: void 0,
7342
+ // Only read data from the provided row group indexes.
7343
+ concurrency: void 0,
7344
+ // The number of concurrent requests to make
7277
7345
  wasmUrl: PARQUET_WASM_URL
7278
7346
  }
7279
7347
  }
@@ -7281,12 +7349,20 @@ var ParquetWasmWorkerLoader = {
7281
7349
  var ParquetWasmLoader = {
7282
7350
  ...ParquetWasmWorkerLoader,
7283
7351
  parse(arrayBuffer, options) {
7284
- options = { parquet: { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet }, ...options };
7285
- return parseParquetWasm(arrayBuffer, options);
7352
+ const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
7353
+ return parseParquetFileWasm(new import_loader_utils4.BlobFile(arrayBuffer), wasmOptions);
7354
+ },
7355
+ parseFile(file, options) {
7356
+ const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
7357
+ return parseParquetFileWasm(file, wasmOptions);
7358
+ },
7359
+ parseFileInBatches(file, options) {
7360
+ const wasmOptions = { ...ParquetWasmLoader.options.parquet, ...options == null ? void 0 : options.parquet };
7361
+ return parseParquetFileInBatchesWasm(file, wasmOptions);
7286
7362
  }
7287
7363
  };
7288
7364
 
7289
- // dist/lib/wasm/encode-parquet-wasm.js
7365
+ // dist/lib/encoders/encode-parquet-wasm.js
7290
7366
  var arrow2 = __toESM(require("apache-arrow"), 1);
7291
7367
  async function encode(table, options) {
7292
7368
  var _a;