@loaders.gl/parquet 4.2.0-alpha.5 → 4.2.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/index.cjs +177 -43
  2. package/dist/index.cjs.map +3 -3
  3. package/dist/lib/constants.js +1 -1
  4. package/dist/parquet-loader.js +1 -1
  5. package/dist/parquet-writer.js +1 -1
  6. package/dist/parquetjs/encoder/parquet-encoder.js +14 -0
  7. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +7 -0
  8. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -0
  9. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +13 -0
  10. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +1 -0
  11. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -0
  12. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +8 -1
  13. package/dist/parquetjs/parquet-thrift/DecimalType.js +2 -0
  14. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +3 -0
  15. package/dist/parquetjs/parquet-thrift/FileMetaData.js +7 -0
  16. package/dist/parquetjs/parquet-thrift/IntType.js +2 -0
  17. package/dist/parquetjs/parquet-thrift/KeyValue.js +2 -0
  18. package/dist/parquetjs/parquet-thrift/LogicalType.js +13 -0
  19. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +1 -0
  20. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +3 -0
  21. package/dist/parquetjs/parquet-thrift/PageHeader.js +8 -0
  22. package/dist/parquetjs/parquet-thrift/PageLocation.js +3 -0
  23. package/dist/parquetjs/parquet-thrift/RowGroup.js +4 -0
  24. package/dist/parquetjs/parquet-thrift/SchemaElement.js +10 -0
  25. package/dist/parquetjs/parquet-thrift/SortingColumn.js +3 -0
  26. package/dist/parquetjs/parquet-thrift/Statistics.js +6 -0
  27. package/dist/parquetjs/parquet-thrift/TimeType.js +2 -0
  28. package/dist/parquetjs/parquet-thrift/TimeUnit.js +2 -0
  29. package/dist/parquetjs/parquet-thrift/TimestampType.js +2 -0
  30. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  31. package/dist/parquetjs/parser/parquet-reader.js +8 -5
  32. package/dist/parquetjs/schema/declare.js +4 -0
  33. package/dist/parquetjs/schema/schema.js +3 -0
  34. package/dist/parquetjs/schema/types.js +1 -0
  35. package/dist/parquetjs/utils/read-utils.js +1 -4
  36. package/dist/polyfills/buffer/buffer.js +9 -12
  37. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
  38. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
  39. package/package.json +15 -15
  40. package/src/parquetjs/parser/parquet-reader.ts +2 -1
  41. package/src/parquetjs/schema/types.ts +1 -0
  42. package/src/polyfills/buffer/buffer.ts +0 -3
@@ -3,7 +3,7 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
  // __VERSION__ is injected by babel-plugin-version-inline
5
5
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
6
- export const VERSION = typeof "4.2.0-alpha.4" !== 'undefined' ? "4.2.0-alpha.4" : 'latest';
6
+ export const VERSION = typeof "4.2.0-alpha.5" !== 'undefined' ? "4.2.0-alpha.5" : 'latest';
7
7
  export const PARQUET_WASM_URL = 'https://unpkg.com/parquet-wasm@0.6.0-beta.1/esm/arrow1_bg.wasm';
8
8
  /**
9
9
  * Parquet File Magic String
@@ -8,7 +8,7 @@ import { parseParquetFileInColumns, parseParquetFileInColumnarBatches } from "./
8
8
  export { Buffer } from "./polyfills/buffer/install-buffer-polyfill.js";
9
9
  // __VERSION__ is injected by babel-plugin-version-inline
10
10
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
11
- const VERSION = typeof "4.2.0-alpha.4" !== 'undefined' ? "4.2.0-alpha.4" : 'latest';
11
+ const VERSION = typeof "4.2.0-alpha.5" !== 'undefined' ? "4.2.0-alpha.5" : 'latest';
12
12
  /**
13
13
  * ParquetJS table loader
14
14
  */
@@ -3,7 +3,7 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
  // __VERSION__ is injected by babel-plugin-version-inline
5
5
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
6
- const VERSION = typeof "4.2.0-alpha.4" !== 'undefined' ? "4.2.0-alpha.4" : 'latest';
6
+ const VERSION = typeof "4.2.0-alpha.5" !== 'undefined' ? "4.2.0-alpha.5" : 'latest';
7
7
  export const ParquetWriter = {
8
8
  name: 'Apache Parquet',
9
9
  id: 'parquet',
@@ -46,6 +46,12 @@ export class ParquetEncoder {
46
46
  const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
47
47
  return new ParquetEncoder(schema, envelopeWriter, opts);
48
48
  }
49
+ schema;
50
+ envelopeWriter;
51
+ rowBuffer;
52
+ rowGroupSize;
53
+ closed;
54
+ userMetadata;
49
55
  /**
50
56
  * Create a new buffered parquet writer for a given envelope writer
51
57
  */
@@ -145,6 +151,14 @@ export class ParquetEnvelopeWriter {
145
151
  const closeFn = osclose.bind(undefined, outputStream);
146
152
  return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
147
153
  }
154
+ schema;
155
+ write;
156
+ close;
157
+ offset;
158
+ rowCount;
159
+ rowGroups;
160
+ pageSize;
161
+ useDataPageV2;
148
162
  constructor(schema, writeFn, closeFn, fileOffset, opts) {
149
163
  this.schema = schema;
150
164
  this.write = writeFn;
@@ -8,6 +8,13 @@ import Int64 from 'node-int64';
8
8
  import * as thrift from 'thrift';
9
9
  import * as ColumnMetaData from "./ColumnMetaData.js";
10
10
  export class ColumnChunk {
11
+ file_path;
12
+ file_offset;
13
+ meta_data;
14
+ offset_index_offset;
15
+ offset_index_length;
16
+ column_index_offset;
17
+ column_index_length;
11
18
  constructor(args) {
12
19
  if (args != null && args.file_path != null) {
13
20
  this.file_path = args.file_path;
@@ -7,6 +7,11 @@
7
7
  import Int64 from 'node-int64';
8
8
  import * as thrift from 'thrift';
9
9
  export class ColumnIndex {
10
+ null_pages;
11
+ min_values;
12
+ max_values;
13
+ boundary_order;
14
+ null_counts;
10
15
  constructor(args) {
11
16
  if (args != null && args.null_pages != null) {
12
17
  this.null_pages = args.null_pages;
@@ -10,6 +10,19 @@ import * as KeyValue from "./KeyValue.js";
10
10
  import * as PageEncodingStats from "./PageEncodingStats.js";
11
11
  import * as Statistics from "./Statistics.js";
12
12
  export class ColumnMetaData {
13
+ type;
14
+ encodings;
15
+ path_in_schema;
16
+ codec;
17
+ num_values;
18
+ total_uncompressed_size;
19
+ total_compressed_size;
20
+ key_value_metadata;
21
+ data_page_offset;
22
+ index_page_offset;
23
+ dictionary_page_offset;
24
+ statistics;
25
+ encoding_stats;
13
26
  constructor(args) {
14
27
  if (args != null && args.type != null) {
15
28
  this.type = args.type;
@@ -7,6 +7,7 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as TypeDefinedOrder from "./TypeDefinedOrder.js";
9
9
  export class ColumnOrder {
10
+ TYPE_ORDER;
10
11
  constructor(args) {
11
12
  let _fieldsSet = 0;
12
13
  if (args != null) {
@@ -7,6 +7,11 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as Statistics from "./Statistics.js";
9
9
  export class DataPageHeader {
10
+ num_values;
11
+ encoding;
12
+ definition_level_encoding;
13
+ repetition_level_encoding;
14
+ statistics;
10
15
  constructor(args) {
11
16
  if (args != null && args.num_values != null) {
12
17
  this.num_values = args.num_values;
@@ -7,8 +7,15 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as Statistics from "./Statistics.js";
9
9
  export class DataPageHeaderV2 {
10
+ num_values;
11
+ num_nulls;
12
+ num_rows;
13
+ encoding;
14
+ definition_levels_byte_length;
15
+ repetition_levels_byte_length;
16
+ is_compressed = true;
17
+ statistics;
10
18
  constructor(args) {
11
- this.is_compressed = true;
12
19
  if (args != null && args.num_values != null) {
13
20
  this.num_values = args.num_values;
14
21
  }
@@ -6,6 +6,8 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class DecimalType {
9
+ scale;
10
+ precision;
9
11
  constructor(args) {
10
12
  if (args != null && args.scale != null) {
11
13
  this.scale = args.scale;
@@ -6,6 +6,9 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class DictionaryPageHeader {
9
+ num_values;
10
+ encoding;
11
+ is_sorted;
9
12
  constructor(args) {
10
13
  if (args != null && args.num_values != null) {
11
14
  this.num_values = args.num_values;
@@ -11,6 +11,13 @@ import * as KeyValue from "./KeyValue.js";
11
11
  import * as RowGroup from "./RowGroup.js";
12
12
  import * as SchemaElement from "./SchemaElement.js";
13
13
  export class FileMetaData {
14
+ version;
15
+ schema;
16
+ num_rows;
17
+ row_groups;
18
+ key_value_metadata;
19
+ created_by;
20
+ column_orders;
14
21
  constructor(args = null) {
15
22
  if (args != null && args.version != null) {
16
23
  this.version = args.version;
@@ -6,6 +6,8 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class IntType {
9
+ bitWidth;
10
+ isSigned;
9
11
  constructor(args) {
10
12
  if (args != null && args.bitWidth != null) {
11
13
  this.bitWidth = args.bitWidth;
@@ -6,6 +6,8 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class KeyValue {
9
+ key;
10
+ value;
9
11
  constructor(args) {
10
12
  if (args != null && args.key != null) {
11
13
  this.key = args.key;
@@ -19,6 +19,19 @@ import * as TimestampType from "./TimestampType.js";
19
19
  import * as TimeType from "./TimeType.js";
20
20
  import * as UUIDType from "./UUIDType.js";
21
21
  export class LogicalType {
22
+ STRING;
23
+ MAP;
24
+ LIST;
25
+ ENUM;
26
+ DECIMAL;
27
+ DATE;
28
+ TIME;
29
+ TIMESTAMP;
30
+ INTEGER;
31
+ UNKNOWN;
32
+ JSON;
33
+ BSON;
34
+ UUID;
22
35
  constructor(args) {
23
36
  let _fieldsSet = 0;
24
37
  if (args != null) {
@@ -7,6 +7,7 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as PageLocation from "./PageLocation.js";
9
9
  export class OffsetIndex {
10
+ page_locations;
10
11
  constructor(args) {
11
12
  if (args != null && args.page_locations != null) {
12
13
  this.page_locations = args.page_locations;
@@ -6,6 +6,9 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class PageEncodingStats {
9
+ page_type;
10
+ encoding;
11
+ count;
9
12
  constructor(args) {
10
13
  if (args != null && args.page_type != null) {
11
14
  this.page_type = args.page_type;
@@ -10,6 +10,14 @@ import * as DataPageHeaderV2 from "./DataPageHeaderV2.js";
10
10
  import * as DictionaryPageHeader from "./DictionaryPageHeader.js";
11
11
  import * as IndexPageHeader from "./IndexPageHeader.js";
12
12
  export class PageHeader {
13
+ type;
14
+ uncompressed_page_size;
15
+ compressed_page_size;
16
+ crc;
17
+ data_page_header;
18
+ index_page_header;
19
+ dictionary_page_header;
20
+ data_page_header_v2;
13
21
  constructor(args) {
14
22
  if (args != null && args.type != null) {
15
23
  this.type = args.type;
@@ -7,6 +7,9 @@
7
7
  import Int64 from 'node-int64';
8
8
  import * as thrift from 'thrift';
9
9
  export class PageLocation {
10
+ offset;
11
+ compressed_page_size;
12
+ first_row_index;
10
13
  constructor(args) {
11
14
  if (args != null && args.offset != null) {
12
15
  if (typeof args.offset === 'number') {
@@ -9,6 +9,10 @@ import * as thrift from 'thrift';
9
9
  import * as ColumnChunk from "./ColumnChunk.js";
10
10
  import * as SortingColumn from "./SortingColumn.js";
11
11
  export class RowGroup {
12
+ columns;
13
+ total_byte_size;
14
+ num_rows;
15
+ sorting_columns;
12
16
  constructor(args) {
13
17
  if (args != null && args.columns != null) {
14
18
  this.columns = args.columns;
@@ -7,6 +7,16 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as LogicalType from "./LogicalType.js";
9
9
  export class SchemaElement {
10
+ type;
11
+ type_length;
12
+ repetition_type;
13
+ name;
14
+ num_children;
15
+ converted_type;
16
+ scale;
17
+ precision;
18
+ field_id;
19
+ logicalType;
10
20
  constructor(args) {
11
21
  if (args != null && args.type != null) {
12
22
  this.type = args.type;
@@ -6,6 +6,9 @@
6
6
  */
7
7
  import * as thrift from 'thrift';
8
8
  export class SortingColumn {
9
+ column_idx;
10
+ descending;
11
+ nulls_first;
9
12
  constructor(args) {
10
13
  if (args != null && args.column_idx != null) {
11
14
  this.column_idx = args.column_idx;
@@ -7,6 +7,12 @@
7
7
  import Int64 from 'node-int64';
8
8
  import * as thrift from 'thrift';
9
9
  export class Statistics {
10
+ max;
11
+ min;
12
+ null_count;
13
+ distinct_count;
14
+ max_value;
15
+ min_value;
10
16
  constructor(args) {
11
17
  if (args != null && args.max != null) {
12
18
  this.max = args.max;
@@ -7,6 +7,8 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as TimeUnit from "./TimeUnit.js";
9
9
  export class TimeType {
10
+ isAdjustedToUTC;
11
+ unit;
10
12
  constructor(args) {
11
13
  if (args != null && args.isAdjustedToUTC != null) {
12
14
  this.isAdjustedToUTC = args.isAdjustedToUTC;
@@ -8,6 +8,8 @@ import * as thrift from 'thrift';
8
8
  import * as MicroSeconds from "./MicroSeconds.js";
9
9
  import * as MilliSeconds from "./MilliSeconds.js";
10
10
  export class TimeUnit {
11
+ MILLIS;
12
+ MICROS;
11
13
  constructor(args) {
12
14
  let _fieldsSet = 0;
13
15
  if (args != null) {
@@ -7,6 +7,8 @@
7
7
  import * as thrift from 'thrift';
8
8
  import * as TimeUnit from "./TimeUnit.js";
9
9
  export class TimestampType {
10
+ isAdjustedToUTC;
11
+ unit;
10
12
  constructor(args) {
11
13
  if (args != null && args.isAdjustedToUTC != null) {
12
14
  this.isAdjustedToUTC = args.isAdjustedToUTC;
@@ -1 +1 @@
1
- {"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,4BAAyB;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,mCAAgC;AACpG,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,oBAAoB,EACrB,6BAA0B;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAG/C;IAEF,KAAK,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACpC,IAAI,EAAE,YAAY,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAQ;gBAElC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,kBAAkB;IAK1D,KAAK,IAAI,IAAI;IAOb,8BAA8B;IACvB,WAAW,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAShD,wCAAwC;IACjC,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAOrD,sCAAsC;IAC/B,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAqB/C,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAK9B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC;IAQzC;;;OAGG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASpD,eAAe,IAAI,OAAO,CAAC,YAAY,CAAC;IAU9C,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAcjC,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;IAyBzC,4EAA4E;IACtE,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,eAAe,CAAC;IAgB3B;;OAEG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAuDhG;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,oBAAoB,EAC7B,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;CAwBrB"}
1
+ {"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,4BAAyB;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,mCAAgC;AACpG,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,oBAAoB,EACrB,6BAA0B;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAI/C;IAEF,KAAK,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACpC,IAAI,EAAE,YAAY,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAQ;gBAElC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,kBAAkB;IAK1D,KAAK,IAAI,IAAI;IAOb,8BAA8B;IACvB,WAAW,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAShD,wCAAwC;IACjC,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAOrD,sCAAsC;IAC/B,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAqB/C,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAK9B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC;IAQzC;;;OAGG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASpD,eAAe,IAAI,OAAO,CAAC,YAAY,CAAC;IAU9C,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAcjC,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;IAyBzC,4EAA4E;IACtE,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,eAAe,CAAC;IAgB3B;;OAEG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAuDhG;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,oBAAoB,EAC7B,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;CAwBrB"}
@@ -12,8 +12,15 @@ import { decodeDataPages, decodePage } from "./decoders.js";
12
12
  * rows from a parquet file use the ParquetReader instead
13
13
  */
14
14
  export class ParquetReader {
15
+ static defaultProps = {
16
+ // max ArrayBuffer size in js is 2Gb
17
+ defaultDictionarySize: 2147483648,
18
+ preserveBinary: false
19
+ };
20
+ props;
21
+ file;
22
+ metadata = null;
15
23
  constructor(file, props) {
16
- this.metadata = null;
17
24
  this.file = file;
18
25
  this.props = { ...ParquetReader.defaultProps, ...props };
19
26
  }
@@ -198,7 +205,3 @@ export class ParquetReader {
198
205
  return decodedPage.dictionary;
199
206
  }
200
207
  }
201
- ParquetReader.defaultProps = {
202
- defaultDictionarySize: 1e6,
203
- preserveBinary: false
204
- };
@@ -1,6 +1,10 @@
1
1
  /** @
2
2
  * Holds data for one row group (column chunks) */
3
3
  export class ParquetRowGroup {
4
+ /** Number of rows in this page */
5
+ rowCount;
6
+ /** Map of Column chunks */
7
+ columnData;
4
8
  constructor(rowCount = 0, columnData = {}) {
5
9
  this.rowCount = rowCount;
6
10
  this.columnData = columnData;
@@ -7,6 +7,9 @@ import { PARQUET_LOGICAL_TYPES } from "./types.js";
7
7
  * A parquet file schema
8
8
  */
9
9
  export class ParquetSchema {
10
+ schema;
11
+ fields;
12
+ fieldList;
10
13
  /**
11
14
  * Create a new schema from a JSON schema definition
12
15
  */
@@ -304,6 +304,7 @@ function fromPrimitive_BSON(value) {
304
304
  }
305
305
  function toPrimitive_TIME_MILLIS(value) {
306
306
  const v = parseInt(value, 10);
307
+ // eslint-disable-next-line @typescript-eslint/no-loss-of-precision
307
308
  if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
308
309
  throw new Error(`invalid value for TIME_MILLIS: ${value}`);
309
310
  }
@@ -1,10 +1,7 @@
1
1
  import { TBufferedTransport, TCompactProtocol, TFramedTransport } from "../parquet-thrift/index.js";
2
2
  import { FileMetaData, PageHeader } from "../parquet-thrift/index.js";
3
3
  class UFramedTransport extends TFramedTransport {
4
- constructor() {
5
- super(...arguments);
6
- this.readPos = 0;
7
- }
4
+ readPos = 0;
8
5
  }
9
6
  /**
10
7
  * Helper function that serializes a thrift object into a buffer
@@ -41,6 +41,7 @@ export const INSPECT_MAX_BYTES = 50;
41
41
  * The `Uint8Array` prototype remains unmodified.
42
42
  */
43
43
  export class Buffer extends Uint8Array {
44
+ static poolSize = 8192; // not used by this implementation
44
45
  // length: number; inherited
45
46
  get parent() {
46
47
  if (!Buffer.isBuffer(this))
@@ -52,6 +53,14 @@ export class Buffer extends Uint8Array {
52
53
  return undefined;
53
54
  return this.byteOffset;
54
55
  }
56
+ /** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
57
+ * to detect a Buffer instance. It's not possible to use `instanceof Buffer`
58
+ * reliably in a browserify context because there could be multiple different
59
+ * copies of the 'buffer' package in use. This method works even for Buffer
60
+ * instances that were created from another copy of the `buffer` package.
61
+ * @see: https://github.com/feross/buffer/issues/154
62
+ */
63
+ _isBuffer = true;
55
64
  constructor(arg, encodingOrOffset, length) {
56
65
  if (typeof arg !== 'number') {
57
66
  return Buffer.from(arg, encodingOrOffset, length);
@@ -65,14 +74,6 @@ export class Buffer extends Uint8Array {
65
74
  throw new TypeError('The "string" argument must be of type string. Received type number');
66
75
  }
67
76
  super(size < 0 ? 0 : checked(size) | 0);
68
- /** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
69
- * to detect a Buffer instance. It's not possible to use `instanceof Buffer`
70
- * reliably in a browserify context because there could be multiple different
71
- * copies of the 'buffer' package in use. This method works even for Buffer
72
- * instances that were created from another copy of the `buffer` package.
73
- * @see: https://github.com/feross/buffer/issues/154
74
- */
75
- this._isBuffer = true;
76
77
  return;
77
78
  }
78
79
  static from(value, encodingOrOffset, length) {
@@ -1064,7 +1065,6 @@ export class Buffer extends Uint8Array {
1064
1065
  }
1065
1066
  }
1066
1067
  }
1067
- Buffer.poolSize = 8192; // not used by this implementation
1068
1068
  function checkInt(buf, value, offset, ext, max, min) {
1069
1069
  if (!Buffer.isBuffer(buf))
1070
1070
  throw new TypeError('"buffer" argument must be a Buffer instance');
@@ -1603,7 +1603,6 @@ function writeDouble(buf, value, offset, littleEndian, noAssert) {
1603
1603
  return offset + 8;
1604
1604
  }
1605
1605
  // CUSTOM ERRORS
1606
- // =============
1607
1606
  // Simplified versions from Node, changed for Buffer-only usage
1608
1607
  const errors = {};
1609
1608
  function E(sym, getMessage, Base) {
@@ -1674,7 +1673,6 @@ function addNumericalSeparator(val) {
1674
1673
  return `${val.slice(0, i)}${res}`;
1675
1674
  }
1676
1675
  // CHECK FUNCTIONS
1677
- // ===============
1678
1676
  function checkBounds(buf, offset, byteLength) {
1679
1677
  validateNumber(offset, 'offset');
1680
1678
  if (buf[offset] === undefined || buf[offset + byteLength] === undefined) {
@@ -1718,7 +1716,6 @@ function boundsError(value, length, type) {
1718
1716
  throw new errors.ERR_OUT_OF_RANGE(type || 'offset', `>= ${type ? 1 : 0} and <= ${length}`, value);
1719
1717
  }
1720
1718
  // HELPER FUNCTIONS
1721
- // ================
1722
1719
  const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
1723
1720
  function base64clean(str) {
1724
1721
  // Node takes equal signs as end of the Base64 encoding
@@ -1,3 +1,30 @@
1
1
  /// <reference types="node" />
2
- export declare const Buffer: BufferConstructor;
2
+ export declare const Buffer: {
3
+ new (str: string, encoding?: string | undefined): Buffer;
4
+ new (size: number): Buffer;
5
+ new (array: Uint8Array): Buffer;
6
+ new (arrayBuffer: ArrayBuffer | SharedArrayBuffer): Buffer;
7
+ new (array: readonly any[]): Buffer;
8
+ new (buffer: Buffer): Buffer;
9
+ prototype: Buffer;
10
+ from(arrayBuffer: ArrayBuffer | SharedArrayBuffer, byteOffset?: number | undefined, length?: number | undefined): Buffer;
11
+ from(data: readonly any[]): Buffer;
12
+ from(data: Uint8Array): Buffer;
13
+ from(obj: {
14
+ valueOf(): string | object;
15
+ } | {
16
+ [Symbol.toPrimitive](hint: "string"): string;
17
+ }, byteOffset?: number | undefined, length?: number | undefined): Buffer;
18
+ from(str: string, encoding?: string | undefined): Buffer;
19
+ of(...items: number[]): Buffer;
20
+ isBuffer(obj: any): obj is Buffer;
21
+ isEncoding(encoding: string): boolean | undefined;
22
+ byteLength(string: string | ArrayBuffer | SharedArrayBuffer | NodeJS.TypedArray | DataView, encoding?: string | undefined): number;
23
+ concat(list: readonly Uint8Array[], totalLength?: number | undefined): Buffer;
24
+ compare(buf1: Uint8Array, buf2: Uint8Array): number;
25
+ alloc(size: number, fill?: string | number | Buffer | undefined, encoding?: string | undefined): Buffer;
26
+ allocUnsafe(size: number): Buffer;
27
+ allocUnsafeSlow(size: number): Buffer;
28
+ poolSize: number;
29
+ };
3
30
  //# sourceMappingURL=install-buffer-polyfill.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"install-buffer-polyfill.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/install-buffer-polyfill.ts"],"names":[],"mappings":";AAOA,eAAO,MAAM,MAAM,mBAA0B,CAAC"}
1
+ {"version":3,"file":"install-buffer-polyfill.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/install-buffer-polyfill.ts"],"names":[],"mappings":";AAOA,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;CAA0B,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "4.2.0-alpha.5",
3
+ "version": "4.2.0-alpha.6",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -46,27 +46,27 @@
46
46
  "copy-wasm": "cp ../../node_modules/parquet-wasm/esm2/arrow1_bg.wasm dist/arrow1_bg.wasm"
47
47
  },
48
48
  "browser": {
49
+ "./dist/polyfills/buffer/buffer-polyfill.node.js": "./dist/polyfills/buffer/buffer-polyfill.browser.js",
50
+ "./src/polyfills/buffer/buffer-polyfill.node.ts": "./src/polyfills/buffer/buffer-polyfill.browser.ts",
49
51
  "child_process": false,
52
+ "events": false,
53
+ "fs": false,
50
54
  "net": false,
51
- "tls": false,
52
55
  "stream": false,
53
- "fs": false,
54
- "util": false,
55
- "events": false,
56
- "./src/polyfills/buffer/buffer-polyfill.node.ts": "./src/polyfills/buffer/buffer-polyfill.browser.ts",
57
- "./dist/polyfills/buffer/buffer-polyfill.node.js": "./dist/polyfills/buffer/buffer-polyfill.browser.js"
56
+ "tls": false,
57
+ "util": false
58
58
  },
59
59
  "comments": [
60
60
  "base64-js and ieee754 are used by buffer polyfill"
61
61
  ],
62
62
  "dependencies": {
63
- "@loaders.gl/arrow": "4.2.0-alpha.5",
64
- "@loaders.gl/bson": "4.2.0-alpha.5",
65
- "@loaders.gl/compression": "4.2.0-alpha.5",
66
- "@loaders.gl/gis": "4.2.0-alpha.5",
67
- "@loaders.gl/loader-utils": "4.2.0-alpha.5",
68
- "@loaders.gl/schema": "4.2.0-alpha.5",
69
- "@loaders.gl/wkt": "4.2.0-alpha.5",
63
+ "@loaders.gl/arrow": "4.2.0-alpha.6",
64
+ "@loaders.gl/bson": "4.2.0-alpha.6",
65
+ "@loaders.gl/compression": "4.2.0-alpha.6",
66
+ "@loaders.gl/gis": "4.2.0-alpha.6",
67
+ "@loaders.gl/loader-utils": "4.2.0-alpha.6",
68
+ "@loaders.gl/schema": "4.2.0-alpha.6",
69
+ "@loaders.gl/wkt": "4.2.0-alpha.6",
70
70
  "async-mutex": "^0.2.2",
71
71
  "base64-js": "^1.3.1",
72
72
  "brotli": "^1.3.2",
@@ -93,5 +93,5 @@
93
93
  "@loaders.gl/core": "^4.0.0",
94
94
  "apache-arrow": ">= 15.0.0"
95
95
  },
96
- "gitHead": "32d95a81971f104e4dfeb88ab57065f05321a76a"
96
+ "gitHead": "37bd8ca71763529f18727ee4bf29dd176aa914ca"
97
97
  }
@@ -36,7 +36,8 @@ export type ParquetIterationProps = {
36
36
  */
37
37
  export class ParquetReader {
38
38
  static defaultProps: Required<ParquetReaderProps> = {
39
- defaultDictionarySize: 1e6,
39
+ // max ArrayBuffer size in js is 2Gb
40
+ defaultDictionarySize: 2147483648,
40
41
  preserveBinary: false
41
42
  };
42
43
 
@@ -350,6 +350,7 @@ function fromPrimitive_BSON(value: any) {
350
350
 
351
351
  function toPrimitive_TIME_MILLIS(value: any) {
352
352
  const v = parseInt(value, 10);
353
+ // eslint-disable-next-line @typescript-eslint/no-loss-of-precision
353
354
  if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
354
355
  throw new Error(`invalid value for TIME_MILLIS: ${value}`);
355
356
  }
@@ -1901,7 +1901,6 @@ function writeDouble(buf: Buffer, value, offset, littleEndian, noAssert): number
1901
1901
  }
1902
1902
 
1903
1903
  // CUSTOM ERRORS
1904
- // =============
1905
1904
 
1906
1905
  // Simplified versions from Node, changed for Buffer-only usage
1907
1906
  const errors: Record<string, any> = {};
@@ -1993,7 +1992,6 @@ function addNumericalSeparator(val) {
1993
1992
  }
1994
1993
 
1995
1994
  // CHECK FUNCTIONS
1996
- // ===============
1997
1995
 
1998
1996
  function checkBounds(buf, offset, byteLength) {
1999
1997
  validateNumber(offset, 'offset');
@@ -2042,7 +2040,6 @@ function boundsError(value, length, type?) {
2042
2040
  }
2043
2041
 
2044
2042
  // HELPER FUNCTIONS
2045
- // ================
2046
2043
 
2047
2044
  const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
2048
2045