@loaders.gl/parquet 4.2.0-alpha.5 → 4.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/index.cjs +200 -48
  2. package/dist/index.cjs.map +3 -3
  3. package/dist/lib/constants.js +1 -1
  4. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  5. package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
  6. package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
  7. package/dist/lib/parsers/parse-parquet.js +4 -0
  8. package/dist/parquet-loader.d.ts +151 -7
  9. package/dist/parquet-loader.d.ts.map +1 -1
  10. package/dist/parquet-loader.js +9 -1
  11. package/dist/parquet-wasm-loader.d.ts +22 -3
  12. package/dist/parquet-wasm-loader.d.ts.map +1 -1
  13. package/dist/parquet-wasm-loader.js +2 -0
  14. package/dist/parquet-wasm-writer.d.ts +1 -3
  15. package/dist/parquet-wasm-writer.d.ts.map +1 -1
  16. package/dist/parquet-writer.d.ts +15 -3
  17. package/dist/parquet-writer.d.ts.map +1 -1
  18. package/dist/parquet-writer.js +1 -1
  19. package/dist/parquetjs/compression.d.ts +1 -1
  20. package/dist/parquetjs/compression.d.ts.map +1 -1
  21. package/dist/parquetjs/compression.js +3 -1
  22. package/dist/parquetjs/encoder/parquet-encoder.js +14 -0
  23. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +7 -0
  24. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -0
  25. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +13 -0
  26. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +1 -0
  27. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -0
  28. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +8 -1
  29. package/dist/parquetjs/parquet-thrift/DecimalType.js +2 -0
  30. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +3 -0
  31. package/dist/parquetjs/parquet-thrift/FileMetaData.js +7 -0
  32. package/dist/parquetjs/parquet-thrift/IntType.js +2 -0
  33. package/dist/parquetjs/parquet-thrift/KeyValue.js +2 -0
  34. package/dist/parquetjs/parquet-thrift/LogicalType.js +13 -0
  35. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +1 -0
  36. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +3 -0
  37. package/dist/parquetjs/parquet-thrift/PageHeader.js +8 -0
  38. package/dist/parquetjs/parquet-thrift/PageLocation.js +3 -0
  39. package/dist/parquetjs/parquet-thrift/RowGroup.js +4 -0
  40. package/dist/parquetjs/parquet-thrift/SchemaElement.js +10 -0
  41. package/dist/parquetjs/parquet-thrift/SortingColumn.js +3 -0
  42. package/dist/parquetjs/parquet-thrift/Statistics.js +6 -0
  43. package/dist/parquetjs/parquet-thrift/TimeType.js +2 -0
  44. package/dist/parquetjs/parquet-thrift/TimeUnit.js +2 -0
  45. package/dist/parquetjs/parquet-thrift/TimestampType.js +2 -0
  46. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  47. package/dist/parquetjs/parser/parquet-reader.js +8 -5
  48. package/dist/parquetjs/schema/declare.js +4 -0
  49. package/dist/parquetjs/schema/schema.js +3 -0
  50. package/dist/parquetjs/schema/types.js +2 -0
  51. package/dist/parquetjs/utils/read-utils.js +1 -4
  52. package/dist/polyfills/buffer/buffer.js +9 -12
  53. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
  54. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
  55. package/package.json +15 -15
  56. package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
  57. package/src/lib/parsers/parse-parquet.ts +5 -0
  58. package/src/parquet-loader.ts +52 -51
  59. package/src/parquet-wasm-loader.ts +7 -4
  60. package/src/parquet-wasm-writer.ts +2 -2
  61. package/src/parquet-writer.ts +2 -2
  62. package/src/parquetjs/compression.ts +6 -2
  63. package/src/parquetjs/parser/parquet-reader.ts +2 -1
  64. package/src/parquetjs/schema/types.ts +3 -1
  65. package/src/polyfills/buffer/buffer.ts +0 -3
@@ -6,6 +6,9 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class PageEncodingStats {
9
+ page_type;
10
+ encoding;
11
+ count;
9
12
  constructor(args) {
10
13
  if (args != null && args.page_type != null) {
11
14
  this.page_type = args.page_type;
@@ -10,6 +10,14 @@ import * as DataPageHeaderV2 from "./DataPageHeaderV2.js";
10
10
  import * as DictionaryPageHeader from "./DictionaryPageHeader.js";
11
11
  import * as IndexPageHeader from "./IndexPageHeader.js";
12
12
  export class PageHeader {
13
+ type;
14
+ uncompressed_page_size;
15
+ compressed_page_size;
16
+ crc;
17
+ data_page_header;
18
+ index_page_header;
19
+ dictionary_page_header;
20
+ data_page_header_v2;
13
21
  constructor(args) {
14
22
  if (args != null && args.type != null) {
15
23
  this.type = args.type;
@@ -7,6 +7,9 @@
7
7
  import Int64 from 'node-int64';
8
8
  import * as thrift from 'thrift';
9
9
  export class PageLocation {
10
+ offset;
11
+ compressed_page_size;
12
+ first_row_index;
10
13
  constructor(args) {
11
14
  if (args != null && args.offset != null) {
12
15
  if (typeof args.offset === 'number') {
@@ -9,6 +9,10 @@ import * as thrift from 'thrift';
9
9
  import * as ColumnChunk from "./ColumnChunk.js";
10
10
  import * as SortingColumn from "./SortingColumn.js";
11
11
  export class RowGroup {
12
+ columns;
13
+ total_byte_size;
14
+ num_rows;
15
+ sorting_columns;
12
16
  constructor(args) {
13
17
  if (args != null && args.columns != null) {
14
18
  this.columns = args.columns;
@@ -7,6 +7,16 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as LogicalType from "./LogicalType.js";
9
9
  export class SchemaElement {
10
+ type;
11
+ type_length;
12
+ repetition_type;
13
+ name;
14
+ num_children;
15
+ converted_type;
16
+ scale;
17
+ precision;
18
+ field_id;
19
+ logicalType;
10
20
  constructor(args) {
11
21
  if (args != null && args.type != null) {
12
22
  this.type = args.type;
@@ -6,6 +6,9 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class SortingColumn {
9
+ column_idx;
10
+ descending;
11
+ nulls_first;
9
12
  constructor(args) {
10
13
  if (args != null && args.column_idx != null) {
11
14
  this.column_idx = args.column_idx;
@@ -7,6 +7,12 @@
7
7
  import Int64 from 'node-int64';
8
8
  import * as thrift from 'thrift';
9
9
  export class Statistics {
10
+ max;
11
+ min;
12
+ null_count;
13
+ distinct_count;
14
+ max_value;
15
+ min_value;
10
16
  constructor(args) {
11
17
  if (args != null && args.max != null) {
12
18
  this.max = args.max;
@@ -7,6 +7,8 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as TimeUnit from "./TimeUnit.js";
9
9
  export class TimeType {
10
+ isAdjustedToUTC;
11
+ unit;
10
12
  constructor(args) {
11
13
  if (args != null && args.isAdjustedToUTC != null) {
12
14
  this.isAdjustedToUTC = args.isAdjustedToUTC;
@@ -8,6 +8,8 @@ import * as thrift from 'thrift';
8
8
  import * as MicroSeconds from "./MicroSeconds.js";
9
9
  import * as MilliSeconds from "./MilliSeconds.js";
10
10
  export class TimeUnit {
11
+ MILLIS;
12
+ MICROS;
11
13
  constructor(args) {
12
14
  let _fieldsSet = 0;
13
15
  if (args != null) {
@@ -7,6 +7,8 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as TimeUnit from "./TimeUnit.js";
9
9
  export class TimestampType {
10
+ isAdjustedToUTC;
11
+ unit;
10
12
  constructor(args) {
11
13
  if (args != null && args.isAdjustedToUTC != null) {
12
14
  this.isAdjustedToUTC = args.isAdjustedToUTC;
@@ -1 +1 @@
1
- {"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,4BAAyB;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,mCAAgC;AACpG,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,oBAAoB,EACrB,6BAA0B;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAG/C;IAEF,KAAK,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACpC,IAAI,EAAE,YAAY,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAQ;gBAElC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,kBAAkB;IAK1D,KAAK,IAAI,IAAI;IAOb,8BAA8B;IACvB,WAAW,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAShD,wCAAwC;IACjC,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAOrD,sCAAsC;IAC/B,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAqB/C,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAK9B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC;IAQzC;;;OAGG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASpD,eAAe,IAAI,OAAO,CAAC,YAAY,CAAC;IAU9C,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAcjC,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;IAyBzC,4EAA4E;IACtE,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,eAAe,CAAC;IAgB3B;;OAEG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAuDhG;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,oBAAoB,EAC7B,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;CAwBrB"}
1
+ {"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,4BAAyB;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,mCAAgC;AACpG,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,oBAAoB,EACrB,6BAA0B;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAI/C;IAEF,KAAK,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACpC,IAAI,EAAE,YAAY,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAQ;gBAElC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,kBAAkB;IAK1D,KAAK,IAAI,IAAI;IAOb,8BAA8B;IACvB,WAAW,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAShD,wCAAwC;IACjC,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAOrD,sCAAsC;IAC/B,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAqB/C,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAK9B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC;IAQzC;;;OAGG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASpD,eAAe,IAAI,OAAO,CAAC,YAAY,CAAC;IAU9C,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAcjC,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;IAyBzC,4EAA4E;IACtE,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,eAAe,CAAC;IAgB3B;;OAEG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAuDhG;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,oBAAoB,EAC7B,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;CAwBrB"}
@@ -12,8 +12,15 @@ import { decodeDataPages, decodePage } from "./decoders.js";
12
12
  * rows from a parquet file use the ParquetReader instead
13
13
  */
14
14
  export class ParquetReader {
15
+ static defaultProps = {
16
+ // max ArrayBuffer size in js is 2Gb
17
+ defaultDictionarySize: 2147483648,
18
+ preserveBinary: false
19
+ };
20
+ props;
21
+ file;
22
+ metadata = null;
15
23
  constructor(file, props) {
16
- this.metadata = null;
17
24
  this.file = file;
18
25
  this.props = { ...ParquetReader.defaultProps, ...props };
19
26
  }
@@ -198,7 +205,3 @@ export class ParquetReader {
198
205
  return decodedPage.dictionary;
199
206
  }
200
207
  }
201
- ParquetReader.defaultProps = {
202
- defaultDictionarySize: 1e6,
203
- preserveBinary: false
204
- };
@@ -1,6 +1,10 @@
1
1
  /** @
2
2
  * Holds data for one row group (column chunks) */
3
3
  export class ParquetRowGroup {
4
+ /** Number of rows in this page */
5
+ rowCount;
6
+ /** Map of Column chunks */
7
+ columnData;
4
8
  constructor(rowCount = 0, columnData = {}) {
5
9
  this.rowCount = rowCount;
6
10
  this.columnData = columnData;
@@ -7,6 +7,9 @@ import { PARQUET_LOGICAL_TYPES } from "./types.js";
7
7
  * A parquet file schema
8
8
  */
9
9
  export class ParquetSchema {
10
+ schema;
11
+ fields;
12
+ fieldList;
10
13
  /**
11
14
  * Create a new schema from a JSON schema definition
12
15
  */
@@ -296,6 +296,7 @@ function fromPrimitive_JSON(value) {
296
296
  return JSON.parse(value);
297
297
  }
298
298
  function toPrimitive_BSON(value) {
299
+ // @ts-ignore
299
300
  const arrayBuffer = BSONWriter.encodeSync?.(value);
300
301
  return Buffer.from(arrayBuffer);
301
302
  }
@@ -304,6 +305,7 @@ function fromPrimitive_BSON(value) {
304
305
  }
305
306
  function toPrimitive_TIME_MILLIS(value) {
306
307
  const v = parseInt(value, 10);
308
+ // eslint-disable-next-line @typescript-eslint/no-loss-of-precision
307
309
  if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
308
310
  throw new Error(`invalid value for TIME_MILLIS: ${value}`);
309
311
  }
@@ -1,10 +1,7 @@
1
1
  import { TBufferedTransport, TCompactProtocol, TFramedTransport } from "../parquet-thrift/index.js";
2
2
  import { FileMetaData, PageHeader } from "../parquet-thrift/index.js";
3
3
  class UFramedTransport extends TFramedTransport {
4
- constructor() {
5
- super(...arguments);
6
- this.readPos = 0;
7
- }
4
+ readPos = 0;
8
5
  }
9
6
  /**
10
7
  * Helper function that serializes a thrift object into a buffer
@@ -41,6 +41,7 @@ export const INSPECT_MAX_BYTES = 50;
41
41
  * The `Uint8Array` prototype remains unmodified.
42
42
  */
43
43
  export class Buffer extends Uint8Array {
44
+ static poolSize = 8192; // not used by this implementation
44
45
  // length: number; inherited
45
46
  get parent() {
46
47
  if (!Buffer.isBuffer(this))
@@ -52,6 +53,14 @@ export class Buffer extends Uint8Array {
52
53
  return undefined;
53
54
  return this.byteOffset;
54
55
  }
56
+ /** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
57
+ * to detect a Buffer instance. It's not possible to use `instanceof Buffer`
58
+ * reliably in a browserify context because there could be multiple different
59
+ * copies of the 'buffer' package in use. This method works even for Buffer
60
+ * instances that were created from another copy of the `buffer` package.
61
+ * @see: https://github.com/feross/buffer/issues/154
62
+ */
63
+ _isBuffer = true;
55
64
  constructor(arg, encodingOrOffset, length) {
56
65
  if (typeof arg !== 'number') {
57
66
  return Buffer.from(arg, encodingOrOffset, length);
@@ -65,14 +74,6 @@ export class Buffer extends Uint8Array {
65
74
  throw new TypeError('The "string" argument must be of type string. Received type number');
66
75
  }
67
76
  super(size < 0 ? 0 : checked(size) | 0);
68
- /** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
69
- * to detect a Buffer instance. It's not possible to use `instanceof Buffer`
70
- * reliably in a browserify context because there could be multiple different
71
- * copies of the 'buffer' package in use. This method works even for Buffer
72
- * instances that were created from another copy of the `buffer` package.
73
- * @see: https://github.com/feross/buffer/issues/154
74
- */
75
- this._isBuffer = true;
76
77
  return;
77
78
  }
78
79
  static from(value, encodingOrOffset, length) {
@@ -1064,7 +1065,6 @@ export class Buffer extends Uint8Array {
1064
1065
  }
1065
1066
  }
1066
1067
  }
1067
- Buffer.poolSize = 8192; // not used by this implementation
1068
1068
  function checkInt(buf, value, offset, ext, max, min) {
1069
1069
  if (!Buffer.isBuffer(buf))
1070
1070
  throw new TypeError('"buffer" argument must be a Buffer instance');
@@ -1603,7 +1603,6 @@ function writeDouble(buf, value, offset, littleEndian, noAssert) {
1603
1603
  return offset + 8;
1604
1604
  }
1605
1605
  // CUSTOM ERRORS
1606
- // =============
1607
1606
  // Simplified versions from Node, changed for Buffer-only usage
1608
1607
  const errors = {};
1609
1608
  function E(sym, getMessage, Base) {
@@ -1674,7 +1673,6 @@ function addNumericalSeparator(val) {
1674
1673
  return `${val.slice(0, i)}${res}`;
1675
1674
  }
1676
1675
  // CHECK FUNCTIONS
1677
- // ===============
1678
1676
  function checkBounds(buf, offset, byteLength) {
1679
1677
  validateNumber(offset, 'offset');
1680
1678
  if (buf[offset] === undefined || buf[offset + byteLength] === undefined) {
@@ -1718,7 +1716,6 @@ function boundsError(value, length, type) {
1718
1716
  throw new errors.ERR_OUT_OF_RANGE(type || 'offset', `>= ${type ? 1 : 0} and <= ${length}`, value);
1719
1717
  }
1720
1718
  // HELPER FUNCTIONS
1721
- // ================
1722
1719
  const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
1723
1720
  function base64clean(str) {
1724
1721
  // Node takes equal signs as end of the Base64 encoding
@@ -1,3 +1,30 @@
1
1
  /// <reference types="node" />
2
- export declare const Buffer: BufferConstructor;
2
+ export declare const Buffer: {
3
+ new (str: string, encoding?: string | undefined): Buffer;
4
+ new (size: number): Buffer;
5
+ new (array: Uint8Array): Buffer;
6
+ new (arrayBuffer: ArrayBuffer | SharedArrayBuffer): Buffer;
7
+ new (array: readonly any[]): Buffer;
8
+ new (buffer: Buffer): Buffer;
9
+ prototype: Buffer;
10
+ from(arrayBuffer: ArrayBuffer | SharedArrayBuffer, byteOffset?: number | undefined, length?: number | undefined): Buffer;
11
+ from(data: readonly any[]): Buffer;
12
+ from(data: Uint8Array): Buffer;
13
+ from(obj: {
14
+ valueOf(): string | object;
15
+ } | {
16
+ [Symbol.toPrimitive](hint: "string"): string;
17
+ }, byteOffset?: number | undefined, length?: number | undefined): Buffer;
18
+ from(str: string, encoding?: string | undefined): Buffer;
19
+ of(...items: number[]): Buffer;
20
+ isBuffer(obj: any): obj is Buffer;
21
+ isEncoding(encoding: string): boolean | undefined;
22
+ byteLength(string: string | ArrayBuffer | SharedArrayBuffer | NodeJS.TypedArray | DataView, encoding?: string | undefined): number;
23
+ concat(list: readonly Uint8Array[], totalLength?: number | undefined): Buffer;
24
+ compare(buf1: Uint8Array, buf2: Uint8Array): number;
25
+ alloc(size: number, fill?: string | number | Buffer | undefined, encoding?: string | undefined): Buffer;
26
+ allocUnsafe(size: number): Buffer;
27
+ allocUnsafeSlow(size: number): Buffer;
28
+ poolSize: number;
29
+ };
3
30
  //# sourceMappingURL=install-buffer-polyfill.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"install-buffer-polyfill.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/install-buffer-polyfill.ts"],"names":[],"mappings":";AAOA,eAAO,MAAM,MAAM,mBAA0B,CAAC"}
1
+ {"version":3,"file":"install-buffer-polyfill.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/install-buffer-polyfill.ts"],"names":[],"mappings":";AAOA,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;CAA0B,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "4.2.0-alpha.5",
3
+ "version": "4.2.0-beta.1",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -46,27 +46,27 @@
46
46
  "copy-wasm": "cp ../../node_modules/parquet-wasm/esm2/arrow1_bg.wasm dist/arrow1_bg.wasm"
47
47
  },
48
48
  "browser": {
49
+ "./dist/polyfills/buffer/buffer-polyfill.node.js": "./dist/polyfills/buffer/buffer-polyfill.browser.js",
50
+ "./src/polyfills/buffer/buffer-polyfill.node.ts": "./src/polyfills/buffer/buffer-polyfill.browser.ts",
49
51
  "child_process": false,
52
+ "events": false,
53
+ "fs": false,
50
54
  "net": false,
51
- "tls": false,
52
55
  "stream": false,
53
- "fs": false,
54
- "util": false,
55
- "events": false,
56
- "./src/polyfills/buffer/buffer-polyfill.node.ts": "./src/polyfills/buffer/buffer-polyfill.browser.ts",
57
- "./dist/polyfills/buffer/buffer-polyfill.node.js": "./dist/polyfills/buffer/buffer-polyfill.browser.js"
56
+ "tls": false,
57
+ "util": false
58
58
  },
59
59
  "comments": [
60
60
  "base64-js and ieee754 are used by buffer polyfill"
61
61
  ],
62
62
  "dependencies": {
63
- "@loaders.gl/arrow": "4.2.0-alpha.5",
64
- "@loaders.gl/bson": "4.2.0-alpha.5",
65
- "@loaders.gl/compression": "4.2.0-alpha.5",
66
- "@loaders.gl/gis": "4.2.0-alpha.5",
67
- "@loaders.gl/loader-utils": "4.2.0-alpha.5",
68
- "@loaders.gl/schema": "4.2.0-alpha.5",
69
- "@loaders.gl/wkt": "4.2.0-alpha.5",
63
+ "@loaders.gl/arrow": "4.2.0-beta.1",
64
+ "@loaders.gl/bson": "4.2.0-beta.1",
65
+ "@loaders.gl/compression": "4.2.0-beta.1",
66
+ "@loaders.gl/gis": "4.2.0-beta.1",
67
+ "@loaders.gl/loader-utils": "4.2.0-beta.1",
68
+ "@loaders.gl/schema": "4.2.0-beta.1",
69
+ "@loaders.gl/wkt": "4.2.0-beta.1",
70
70
  "async-mutex": "^0.2.2",
71
71
  "base64-js": "^1.3.1",
72
72
  "brotli": "^1.3.2",
@@ -93,5 +93,5 @@
93
93
  "@loaders.gl/core": "^4.0.0",
94
94
  "apache-arrow": ">= 15.0.0"
95
95
  },
96
- "gitHead": "32d95a81971f104e4dfeb88ab57065f05321a76a"
96
+ "gitHead": "c386a9196516fe3ff24847b40e6c77be039cf905"
97
97
  }
@@ -11,6 +11,7 @@ import {ParquetSchema} from '../../parquetjs/schema/schema';
11
11
  import {materializeColumns} from '../../parquetjs/schema/shred';
12
12
  import {getSchemaFromParquetReader} from './get-parquet-schema';
13
13
  import {installBufferPolyfill} from '../../polyfills/buffer/index';
14
+ import {preloadCompressions} from '../../parquetjs/compression';
14
15
 
15
16
  /**
16
17
  * @deprecated
@@ -20,6 +21,8 @@ export async function parseParquetFileInColumns(
20
21
  options?: ParquetLoaderOptions
21
22
  ): Promise<ColumnarTable> {
22
23
  installBufferPolyfill();
24
+ await preloadCompressions(options);
25
+
23
26
  for await (const batch of parseParquetFileInColumnarBatches(file, options)) {
24
27
  return {
25
28
  shape: 'columnar-table',
@@ -37,6 +40,9 @@ export async function* parseParquetFileInColumnarBatches(
37
40
  file: ReadableFile,
38
41
  options?: ParquetLoaderOptions
39
42
  ): AsyncIterable<ColumnarTableBatch> {
43
+ installBufferPolyfill();
44
+ await preloadCompressions(options);
45
+
40
46
  const reader = new ParquetReader(file);
41
47
 
42
48
  // Extract schema and geo metadata
@@ -10,6 +10,7 @@ import type {ParquetRow} from '../../parquetjs/schema/declare';
10
10
  import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
11
11
  import {getSchemaFromParquetReader} from './get-parquet-schema';
12
12
  import {installBufferPolyfill} from '../../polyfills/buffer/index';
13
+ import {preloadCompressions} from '../../parquetjs/compression';
13
14
 
14
15
  /**
15
16
  * * Parse a parquet file using parquetjs
@@ -22,6 +23,7 @@ export async function parseParquetFile(
22
23
  options?: ParquetLoaderOptions
23
24
  ): Promise<ObjectRowTable> {
24
25
  installBufferPolyfill();
26
+ await preloadCompressions(options);
25
27
 
26
28
  const reader = new ParquetReader(file, {
27
29
  preserveBinary: options?.parquet?.preserveBinary
@@ -57,6 +59,9 @@ export async function* parseParquetFileInBatches(
57
59
  file: ReadableFile,
58
60
  options?: ParquetLoaderOptions
59
61
  ): AsyncIterable<ObjectRowTableBatch> {
62
+ installBufferPolyfill();
63
+ await preloadCompressions(options);
64
+
60
65
  const reader = new ParquetReader(file, {
61
66
  preserveBinary: options?.parquet?.preserveBinary
62
67
  });
@@ -46,11 +46,10 @@ export type ParquetLoaderOptions = LoaderOptions & {
46
46
  /**
47
47
  * ParquetJS table loader
48
48
  */
49
- export const ParquetWorkerLoader: Loader<
50
- ObjectRowTable,
51
- ObjectRowTableBatch,
52
- ParquetLoaderOptions
53
- > = {
49
+ export const ParquetWorkerLoader = {
50
+ dataType: null as unknown as ObjectRowTable,
51
+ batchType: null as unknown as ObjectRowTableBatch,
52
+
54
53
  name: 'Apache Parquet',
55
54
  id: 'parquet',
56
55
  module: 'parquet',
@@ -70,69 +69,75 @@ export const ParquetWorkerLoader: Loader<
70
69
  preserveBinary: false
71
70
  }
72
71
  }
73
- };
72
+ } as const satisfies Loader<ObjectRowTable, ObjectRowTableBatch, ParquetLoaderOptions>;
74
73
 
75
74
  /** ParquetJS table loader */
76
- export const ParquetLoader: LoaderWithParser<
77
- ObjectRowTable | GeoJSONTable,
78
- ObjectRowTableBatch | GeoJSONTableBatch,
79
- ParquetLoaderOptions
80
- > = {
75
+ export const ParquetLoader = {
81
76
  ...ParquetWorkerLoader,
77
+
78
+ dataType: null as unknown as ObjectRowTable | GeoJSONTable,
79
+ batchType: null as unknown as ObjectRowTableBatch | GeoJSONTableBatch,
80
+
82
81
  parse: (arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) =>
83
82
  parseParquetFile(new BlobFile(arrayBuffer), options),
84
83
 
85
84
  parseFile: parseParquetFile,
86
85
  parseFileInBatches: parseParquetFileInBatches
87
- };
86
+ } as const satisfies LoaderWithParser<
87
+ ObjectRowTable | GeoJSONTable,
88
+ ObjectRowTableBatch | GeoJSONTableBatch,
89
+ ParquetLoaderOptions
90
+ >;
88
91
 
89
92
  // Defeat tree shaking
90
93
  // @ts-ignore
91
94
  ParquetLoader.Buffer = Buffer;
92
95
 
93
- export const GeoParquetWorkerLoader: Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions> =
94
- {
95
- name: 'Apache Parquet',
96
- id: 'parquet',
97
- module: 'parquet',
98
- version: VERSION,
99
- worker: true,
100
- category: 'table',
101
- extensions: ['parquet'],
102
- mimeTypes: ['application/octet-stream'],
103
- binary: true,
104
- tests: ['PAR1', 'PARE'],
105
- options: {
106
- parquet: {
107
- shape: 'geojson-table',
108
- columnList: [],
109
- geoparquet: true,
110
- url: undefined,
111
- preserveBinary: false
112
- }
96
+ export const GeoParquetWorkerLoader = {
97
+ dataType: null as unknown as GeoJSONTable,
98
+ batchType: null as unknown as GeoJSONTableBatch,
99
+
100
+ name: 'Apache Parquet',
101
+ id: 'parquet',
102
+ module: 'parquet',
103
+ version: VERSION,
104
+ worker: true,
105
+ category: 'table',
106
+ extensions: ['parquet'],
107
+ mimeTypes: ['application/octet-stream'],
108
+ binary: true,
109
+ tests: ['PAR1', 'PARE'],
110
+ options: {
111
+ parquet: {
112
+ shape: 'geojson-table',
113
+ columnList: [],
114
+ geoparquet: true,
115
+ url: undefined,
116
+ preserveBinary: false
113
117
  }
114
- };
118
+ }
119
+ } as const satisfies Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions>;
115
120
 
116
121
  /** ParquetJS table loader */
117
- export const GeoParquetLoader: LoaderWithParser<
118
- ObjectRowTable | GeoJSONTable,
119
- ObjectRowTableBatch | GeoJSONTableBatch,
120
- ParquetLoaderOptions
121
- > = {
122
+ export const GeoParquetLoader = {
122
123
  ...GeoParquetWorkerLoader,
124
+
123
125
  parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
124
126
  return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
125
127
  },
126
128
  parseFile: parseGeoParquetFile,
127
129
  parseFileInBatches: parseGeoParquetFileInBatches
128
- };
130
+ } as const satisfies LoaderWithParser<
131
+ ObjectRowTable | GeoJSONTable,
132
+ ObjectRowTableBatch | GeoJSONTableBatch,
133
+ ParquetLoaderOptions
134
+ >;
129
135
 
130
136
  /** @deprecated Test to see if we can improve perf of parquetjs loader */
131
- export const ParquetColumnarWorkerLoader: Loader<
132
- ColumnarTable,
133
- ColumnarTableBatch,
134
- ParquetLoaderOptions
135
- > = {
137
+ export const ParquetColumnarWorkerLoader = {
138
+ dataType: null as any as ColumnarTable,
139
+ batchType: null as any as ColumnarTableBatch,
140
+
136
141
  name: 'Apache Parquet',
137
142
  id: 'parquet',
138
143
  module: 'parquet',
@@ -144,18 +149,14 @@ export const ParquetColumnarWorkerLoader: Loader<
144
149
  binary: true,
145
150
  tests: ['PAR1', 'PARE'],
146
151
  options: ParquetLoader.options
147
- };
152
+ } as const satisfies Loader<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
148
153
 
149
154
  /** @deprecated Test to see if we can improve perf of parquetjs loader */
150
- export const ParquetColumnarLoader: LoaderWithParser<
151
- ColumnarTable,
152
- ColumnarTableBatch,
153
- ParquetLoaderOptions
154
- > = {
155
+ export const ParquetColumnarLoader = {
155
156
  ...ParquetColumnarWorkerLoader,
156
157
  parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
157
158
  return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
158
159
  },
159
160
  parseFile: parseParquetFileInColumns,
160
161
  parseFileInBatches: parseParquetFileInColumnarBatches
161
- };
162
+ } as const satisfies LoaderWithParser<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
@@ -17,7 +17,10 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
17
17
  };
18
18
 
19
19
  /** Parquet WASM table loader */
20
- export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptions> = {
20
+ export const ParquetWasmWorkerLoader = {
21
+ dataType: null as unknown as ArrowTable,
22
+ batchType: null as never,
23
+
21
24
  name: 'Apache Parquet',
22
25
  id: 'parquet-wasm',
23
26
  module: 'parquet',
@@ -34,13 +37,13 @@ export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoade
34
37
  wasmUrl: PARQUET_WASM_URL
35
38
  }
36
39
  }
37
- };
40
+ } as const satisfies Loader<ArrowTable, never, ParquetWasmLoaderOptions>;
38
41
 
39
42
  /** Parquet WASM table loader */
40
- export const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {
43
+ export const ParquetWasmLoader = {
41
44
  ...ParquetWasmWorkerLoader,
42
45
  parse(arrayBuffer: ArrayBuffer, options?: ParquetWasmLoaderOptions) {
43
46
  options = {parquet: {...ParquetWasmLoader.options.parquet, ...options?.parquet}, ...options};
44
47
  return parseParquetWasm(arrayBuffer, options);
45
48
  }
46
- };
49
+ } as const satisfies LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions>;
@@ -16,7 +16,7 @@ export type ParquetWriterOptions = WriterOptions & {
16
16
  };
17
17
 
18
18
  /** Parquet WASM writer */
19
- export const ParquetWasmWriter: WriterWithEncoder<ArrowTable, never, ParquetWriterOptions> = {
19
+ export const ParquetWasmWriter = {
20
20
  name: 'Apache Parquet',
21
21
  id: 'parquet-wasm',
22
22
  module: 'parquet',
@@ -33,4 +33,4 @@ export const ParquetWasmWriter: WriterWithEncoder<ArrowTable, never, ParquetWrit
33
33
  options = {parquet: {...ParquetWasmWriter.options.parquet, ...options?.parquet}, ...options};
34
34
  return encode(arrowTable, options);
35
35
  }
36
- };
36
+ } as const satisfies WriterWithEncoder<ArrowTable, never, ParquetWriterOptions>;
@@ -11,7 +11,7 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
11
11
 
12
12
  export type ParquetWriterOptions = {};
13
13
 
14
- export const ParquetWriter: WriterWithEncoder<Table, TableBatch, ParquetWriterOptions> = {
14
+ export const ParquetWriter = {
15
15
  name: 'Apache Parquet',
16
16
  id: 'parquet',
17
17
  module: 'parquet',
@@ -22,7 +22,7 @@ export const ParquetWriter: WriterWithEncoder<Table, TableBatch, ParquetWriterOp
22
22
  options: {},
23
23
  encode: async (data, options) => encodeSync(data, options),
24
24
  encodeSync
25
- };
25
+ } as const satisfies WriterWithEncoder<Table, TableBatch, ParquetWriterOptions>;
26
26
 
27
27
  function encodeSync(data, options?: ParquetWriterOptions) {
28
28
  return new ArrayBuffer(0);