@loaders.gl/parquet 4.2.0-alpha.6 → 4.2.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/index.cjs +35 -11
  2. package/dist/index.cjs.map +3 -3
  3. package/dist/lib/constants.js +1 -1
  4. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  5. package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
  6. package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
  7. package/dist/lib/parsers/parse-parquet.js +4 -0
  8. package/dist/parquet-loader.d.ts +151 -7
  9. package/dist/parquet-loader.d.ts.map +1 -1
  10. package/dist/parquet-loader.js +9 -1
  11. package/dist/parquet-wasm-loader.d.ts +22 -3
  12. package/dist/parquet-wasm-loader.d.ts.map +1 -1
  13. package/dist/parquet-wasm-loader.js +2 -0
  14. package/dist/parquet-wasm-writer.d.ts +1 -3
  15. package/dist/parquet-wasm-writer.d.ts.map +1 -1
  16. package/dist/parquet-writer.d.ts +15 -3
  17. package/dist/parquet-writer.d.ts.map +1 -1
  18. package/dist/parquet-writer.js +1 -1
  19. package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
  20. package/dist/parquetjs/codecs/rle.js +6 -2
  21. package/dist/parquetjs/compression.d.ts +1 -1
  22. package/dist/parquetjs/compression.d.ts.map +1 -1
  23. package/dist/parquetjs/compression.js +3 -1
  24. package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
  25. package/dist/parquetjs/parser/decoders.js +4 -1
  26. package/dist/parquetjs/schema/types.js +1 -0
  27. package/package.json +9 -9
  28. package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
  29. package/src/lib/parsers/parse-parquet.ts +5 -0
  30. package/src/parquet-loader.ts +52 -51
  31. package/src/parquet-wasm-loader.ts +7 -4
  32. package/src/parquet-wasm-writer.ts +2 -2
  33. package/src/parquet-writer.ts +2 -2
  34. package/src/parquetjs/codecs/rle.ts +6 -2
  35. package/src/parquetjs/compression.ts +6 -2
  36. package/src/parquetjs/parser/decoders.ts +7 -1
  37. package/src/parquetjs/schema/types.ts +2 -1
package/dist/index.cjs CHANGED
@@ -1743,7 +1743,7 @@ globalThis.process.env = globalThis.process.env || {};
1743
1743
  var Buffer3 = installBufferPolyfill();
1744
1744
 
1745
1745
  // dist/parquet-loader.js
1746
- var import_loader_utils = require("@loaders.gl/loader-utils");
1746
+ var import_loader_utils2 = require("@loaders.gl/loader-utils");
1747
1747
 
1748
1748
  // dist/parquetjs/codecs/plain.js
1749
1749
  var import_int53 = __toESM(require("int53"), 1);
@@ -2005,12 +2005,16 @@ function decodeValues2(type, cursor, count, opts) {
2005
2005
  while (values.length < count) {
2006
2006
  const header = import_varint.default.decode(cursor.buffer, cursor.offset);
2007
2007
  cursor.offset += import_varint.default.encodingLength(header);
2008
+ let decodedValues;
2008
2009
  if (header & 1) {
2009
2010
  const count2 = (header >> 1) * 8;
2010
- values.push(...decodeRunBitpacked(cursor, count2, opts));
2011
+ decodedValues = decodeRunBitpacked(cursor, count2, opts);
2011
2012
  } else {
2012
2013
  const count2 = header >> 1;
2013
- values.push(...decodeRunRepeated(cursor, count2, opts));
2014
+ decodedValues = decodeRunRepeated(cursor, count2, opts);
2015
+ }
2016
+ for (const value of decodedValues) {
2017
+ values.push(value);
2014
2018
  }
2015
2019
  }
2016
2020
  values = values.slice(0, count);
@@ -2102,6 +2106,7 @@ var PARQUET_CODECS = {
2102
2106
 
2103
2107
  // dist/parquetjs/compression.js
2104
2108
  var import_compression = require("@loaders.gl/compression");
2109
+ var import_loader_utils = require("@loaders.gl/loader-utils");
2105
2110
  var import_lz4js = __toESM(require("lz4js"), 1);
2106
2111
  function toBuffer(arrayBuffer) {
2107
2112
  return Buffer.from(arrayBuffer);
@@ -2138,8 +2143,9 @@ var PARQUET_COMPRESSION_METHODS = {
2138
2143
  ZSTD: new import_compression.ZstdCompression({ modules })
2139
2144
  };
2140
2145
  async function preloadCompressions(options) {
2146
+ (0, import_loader_utils.registerJSModules)(options == null ? void 0 : options.modules);
2141
2147
  const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
2142
- return await Promise.all(compressions.map((compression) => compression.preload()));
2148
+ return await Promise.all(compressions.map((compression) => compression.preload(options == null ? void 0 : options.modules)));
2143
2149
  }
2144
2150
  async function deflate(method, value) {
2145
2151
  const compression = PARQUET_COMPRESSION_METHODS[method];
@@ -6314,7 +6320,7 @@ var FileMetaData = class {
6314
6320
  };
6315
6321
 
6316
6322
  // dist/lib/constants.js
6317
- var VERSION = true ? "4.2.0-alpha.5" : "latest";
6323
+ var VERSION = true ? "4.2.0-beta.1" : "latest";
6318
6324
  var PARQUET_WASM_URL = "https://unpkg.com/parquet-wasm@0.6.0-beta.1/esm/arrow1_bg.wasm";
6319
6325
  var PARQUET_MAGIC = "PAR1";
6320
6326
  var PARQUET_MAGIC_ENCRYPTED = "PARE";
@@ -6393,6 +6399,7 @@ function fieldIndexOf(arr, elem) {
6393
6399
 
6394
6400
  // dist/parquetjs/parser/decoders.js
6395
6401
  async function decodeDataPages(buffer, context) {
6402
+ var _a;
6396
6403
  const cursor = {
6397
6404
  buffer,
6398
6405
  offset: 0,
@@ -6415,7 +6422,8 @@ async function decodeDataPages(buffer, context) {
6415
6422
  dictionary = page.dictionary;
6416
6423
  continue;
6417
6424
  }
6418
- if (dictionary.length) {
6425
+ const valueEncoding = getThriftEnum(Encoding, (_a = page.pageHeader.data_page_header) == null ? void 0 : _a.encoding);
6426
+ if (dictionary.length && valueEncoding !== "PLAIN") {
6419
6427
  page.values = page.values.map((value) => dictionary[value]);
6420
6428
  }
6421
6429
  for (let index = 0; index < page.rlevels.length; index++) {
@@ -6945,6 +6953,7 @@ async function getSchemaFromParquetReader(reader) {
6945
6953
  async function parseParquetFile(file, options) {
6946
6954
  var _a, _b;
6947
6955
  installBufferPolyfill();
6956
+ await preloadCompressions(options);
6948
6957
  const reader = new ParquetReader(file, {
6949
6958
  preserveBinary: (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.preserveBinary
6950
6959
  });
@@ -6966,6 +6975,8 @@ async function parseParquetFile(file, options) {
6966
6975
  }
6967
6976
  async function* parseParquetFileInBatches(file, options) {
6968
6977
  var _a, _b;
6978
+ installBufferPolyfill();
6979
+ await preloadCompressions(options);
6969
6980
  const reader = new ParquetReader(file, {
6970
6981
  preserveBinary: (_a = options == null ? void 0 : options.parquet) == null ? void 0 : _a.preserveBinary
6971
6982
  });
@@ -7057,6 +7068,7 @@ function convertBatch(objectRowBatch, shape) {
7057
7068
  // dist/lib/parsers/parse-parquet-to-columns.js
7058
7069
  async function parseParquetFileInColumns(file, options) {
7059
7070
  installBufferPolyfill();
7071
+ await preloadCompressions(options);
7060
7072
  for await (const batch of parseParquetFileInColumnarBatches(file, options)) {
7061
7073
  return {
7062
7074
  shape: "columnar-table",
@@ -7067,6 +7079,8 @@ async function parseParquetFileInColumns(file, options) {
7067
7079
  throw new Error("empty table");
7068
7080
  }
7069
7081
  async function* parseParquetFileInColumnarBatches(file, options) {
7082
+ installBufferPolyfill();
7083
+ await preloadCompressions(options);
7070
7084
  const reader = new ParquetReader(file);
7071
7085
  const schema = await getSchemaFromParquetReader(reader);
7072
7086
  const parquetSchema = await reader.getSchema();
@@ -7087,8 +7101,10 @@ function convertRowGroupToTableBatch(rowGroup, parquetSchema, schema) {
7087
7101
  }
7088
7102
 
7089
7103
  // dist/parquet-loader.js
7090
- var VERSION2 = true ? "4.2.0-alpha.5" : "latest";
7104
+ var VERSION2 = true ? "4.2.0-beta.1" : "latest";
7091
7105
  var ParquetWorkerLoader = {
7106
+ dataType: null,
7107
+ batchType: null,
7092
7108
  name: "Apache Parquet",
7093
7109
  id: "parquet",
7094
7110
  module: "parquet",
@@ -7111,12 +7127,16 @@ var ParquetWorkerLoader = {
7111
7127
  };
7112
7128
  var ParquetLoader = {
7113
7129
  ...ParquetWorkerLoader,
7114
- parse: (arrayBuffer, options) => parseParquetFile(new import_loader_utils.BlobFile(arrayBuffer), options),
7130
+ dataType: null,
7131
+ batchType: null,
7132
+ parse: (arrayBuffer, options) => parseParquetFile(new import_loader_utils2.BlobFile(arrayBuffer), options),
7115
7133
  parseFile: parseParquetFile,
7116
7134
  parseFileInBatches: parseParquetFileInBatches
7117
7135
  };
7118
7136
  ParquetLoader.Buffer = Buffer;
7119
7137
  var GeoParquetWorkerLoader = {
7138
+ dataType: null,
7139
+ batchType: null,
7120
7140
  name: "Apache Parquet",
7121
7141
  id: "parquet",
7122
7142
  module: "parquet",
@@ -7140,12 +7160,14 @@ var GeoParquetWorkerLoader = {
7140
7160
  var GeoParquetLoader = {
7141
7161
  ...GeoParquetWorkerLoader,
7142
7162
  parse(arrayBuffer, options) {
7143
- return parseGeoParquetFile(new import_loader_utils.BlobFile(arrayBuffer), options);
7163
+ return parseGeoParquetFile(new import_loader_utils2.BlobFile(arrayBuffer), options);
7144
7164
  },
7145
7165
  parseFile: parseGeoParquetFile,
7146
7166
  parseFileInBatches: parseGeoParquetFileInBatches
7147
7167
  };
7148
7168
  var ParquetColumnarWorkerLoader = {
7169
+ dataType: null,
7170
+ batchType: null,
7149
7171
  name: "Apache Parquet",
7150
7172
  id: "parquet",
7151
7173
  module: "parquet",
@@ -7161,14 +7183,14 @@ var ParquetColumnarWorkerLoader = {
7161
7183
  var ParquetColumnarLoader = {
7162
7184
  ...ParquetColumnarWorkerLoader,
7163
7185
  parse(arrayBuffer, options) {
7164
- return parseParquetFileInColumns(new import_loader_utils.BlobFile(arrayBuffer), options);
7186
+ return parseParquetFileInColumns(new import_loader_utils2.BlobFile(arrayBuffer), options);
7165
7187
  },
7166
7188
  parseFile: parseParquetFileInColumns,
7167
7189
  parseFileInBatches: parseParquetFileInColumnarBatches
7168
7190
  };
7169
7191
 
7170
7192
  // dist/parquet-writer.js
7171
- var VERSION3 = true ? "4.2.0-alpha.5" : "latest";
7193
+ var VERSION3 = true ? "4.2.0-beta.1" : "latest";
7172
7194
  var ParquetWriter = {
7173
7195
  name: "Apache Parquet",
7174
7196
  id: "parquet",
@@ -7225,6 +7247,8 @@ async function parseParquetWasm(arrayBuffer, options) {
7225
7247
 
7226
7248
  // dist/parquet-wasm-loader.js
7227
7249
  var ParquetWasmWorkerLoader = {
7250
+ dataType: null,
7251
+ batchType: null,
7228
7252
  name: "Apache Parquet",
7229
7253
  id: "parquet-wasm",
7230
7254
  module: "parquet",