@loaders.gl/parquet 4.2.0-alpha.5 → 4.2.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +200 -48
- package/dist/index.cjs.map +3 -3
- package/dist/lib/constants.js +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet.js +4 -0
- package/dist/parquet-loader.d.ts +151 -7
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +9 -1
- package/dist/parquet-wasm-loader.d.ts +22 -3
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +2 -0
- package/dist/parquet-wasm-writer.d.ts +1 -3
- package/dist/parquet-wasm-writer.d.ts.map +1 -1
- package/dist/parquet-writer.d.ts +15 -3
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquetjs/compression.d.ts +1 -1
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +3 -1
- package/dist/parquetjs/encoder/parquet-encoder.js +14 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +7 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +13 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +8 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js +2 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +3 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +7 -0
- package/dist/parquetjs/parquet-thrift/IntType.js +2 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.js +2 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.js +13 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +1 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +3 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.js +8 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.js +3 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.js +4 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +10 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +3 -0
- package/dist/parquetjs/parquet-thrift/Statistics.js +6 -0
- package/dist/parquetjs/parquet-thrift/TimeType.js +2 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +2 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.js +2 -0
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +8 -5
- package/dist/parquetjs/schema/declare.js +4 -0
- package/dist/parquetjs/schema/schema.js +3 -0
- package/dist/parquetjs/schema/types.js +2 -0
- package/dist/parquetjs/utils/read-utils.js +1 -4
- package/dist/polyfills/buffer/buffer.js +9 -12
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
- package/package.json +15 -15
- package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
- package/src/lib/parsers/parse-parquet.ts +5 -0
- package/src/parquet-loader.ts +52 -51
- package/src/parquet-wasm-loader.ts +7 -4
- package/src/parquet-wasm-writer.ts +2 -2
- package/src/parquet-writer.ts +2 -2
- package/src/parquetjs/compression.ts +6 -2
- package/src/parquetjs/parser/parquet-reader.ts +2 -1
- package/src/parquetjs/schema/types.ts +3 -1
- package/src/polyfills/buffer/buffer.ts +0 -3
|
@@ -10,6 +10,14 @@ import * as DataPageHeaderV2 from "./DataPageHeaderV2.js";
|
|
|
10
10
|
import * as DictionaryPageHeader from "./DictionaryPageHeader.js";
|
|
11
11
|
import * as IndexPageHeader from "./IndexPageHeader.js";
|
|
12
12
|
export class PageHeader {
|
|
13
|
+
type;
|
|
14
|
+
uncompressed_page_size;
|
|
15
|
+
compressed_page_size;
|
|
16
|
+
crc;
|
|
17
|
+
data_page_header;
|
|
18
|
+
index_page_header;
|
|
19
|
+
dictionary_page_header;
|
|
20
|
+
data_page_header_v2;
|
|
13
21
|
constructor(args) {
|
|
14
22
|
if (args != null && args.type != null) {
|
|
15
23
|
this.type = args.type;
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
import Int64 from 'node-int64';
|
|
8
8
|
import * as thrift from 'thrift';
|
|
9
9
|
export class PageLocation {
|
|
10
|
+
offset;
|
|
11
|
+
compressed_page_size;
|
|
12
|
+
first_row_index;
|
|
10
13
|
constructor(args) {
|
|
11
14
|
if (args != null && args.offset != null) {
|
|
12
15
|
if (typeof args.offset === 'number') {
|
|
@@ -9,6 +9,10 @@ import * as thrift from 'thrift';
|
|
|
9
9
|
import * as ColumnChunk from "./ColumnChunk.js";
|
|
10
10
|
import * as SortingColumn from "./SortingColumn.js";
|
|
11
11
|
export class RowGroup {
|
|
12
|
+
columns;
|
|
13
|
+
total_byte_size;
|
|
14
|
+
num_rows;
|
|
15
|
+
sorting_columns;
|
|
12
16
|
constructor(args) {
|
|
13
17
|
if (args != null && args.columns != null) {
|
|
14
18
|
this.columns = args.columns;
|
|
@@ -7,6 +7,16 @@
|
|
|
7
7
|
import * as thrift from 'thrift';
|
|
8
8
|
import * as LogicalType from "./LogicalType.js";
|
|
9
9
|
export class SchemaElement {
|
|
10
|
+
type;
|
|
11
|
+
type_length;
|
|
12
|
+
repetition_type;
|
|
13
|
+
name;
|
|
14
|
+
num_children;
|
|
15
|
+
converted_type;
|
|
16
|
+
scale;
|
|
17
|
+
precision;
|
|
18
|
+
field_id;
|
|
19
|
+
logicalType;
|
|
10
20
|
constructor(args) {
|
|
11
21
|
if (args != null && args.type != null) {
|
|
12
22
|
this.type = args.type;
|
|
@@ -7,6 +7,12 @@
|
|
|
7
7
|
import Int64 from 'node-int64';
|
|
8
8
|
import * as thrift from 'thrift';
|
|
9
9
|
export class Statistics {
|
|
10
|
+
max;
|
|
11
|
+
min;
|
|
12
|
+
null_count;
|
|
13
|
+
distinct_count;
|
|
14
|
+
max_value;
|
|
15
|
+
min_value;
|
|
10
16
|
constructor(args) {
|
|
11
17
|
if (args != null && args.max != null) {
|
|
12
18
|
this.max = args.max;
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
import * as thrift from 'thrift';
|
|
8
8
|
import * as TimeUnit from "./TimeUnit.js";
|
|
9
9
|
export class TimestampType {
|
|
10
|
+
isAdjustedToUTC;
|
|
11
|
+
unit;
|
|
10
12
|
constructor(args) {
|
|
11
13
|
if (args != null && args.isAdjustedToUTC != null) {
|
|
12
14
|
this.isAdjustedToUTC = args.isAdjustedToUTC;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,4BAAyB;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,mCAAgC;AACpG,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,oBAAoB,EACrB,6BAA0B;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,kBAAkB,CAAC,
|
|
1
|
+
{"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,4BAAyB;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,mCAAgC;AACpG,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,oBAAoB,EACrB,6BAA0B;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAI/C;IAEF,KAAK,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACpC,IAAI,EAAE,YAAY,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAQ;gBAElC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,kBAAkB;IAK1D,KAAK,IAAI,IAAI;IAOb,8BAA8B;IACvB,WAAW,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAShD,wCAAwC;IACjC,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAOrD,sCAAsC;IAC/B,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAqB/C,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAK9B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC;IAQzC;;;OAGG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASpD,eAAe,IAAI,OAAO,CAAC,YAAY,CAAC;IAU9C,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAcjC,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;IAyBzC,4EAA4E;IACtE,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,eAAe,CAAC;IAgB3B;;OAEG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAuDhG;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,oBAAoB,EAC7B,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;CAwBrB"}
|
|
@@ -12,8 +12,15 @@ import { decodeDataPages, decodePage } from "./decoders.js";
|
|
|
12
12
|
* rows from a parquet file use the ParquetReader instead
|
|
13
13
|
*/
|
|
14
14
|
export class ParquetReader {
|
|
15
|
+
static defaultProps = {
|
|
16
|
+
// max ArrayBuffer size in js is 2Gb
|
|
17
|
+
defaultDictionarySize: 2147483648,
|
|
18
|
+
preserveBinary: false
|
|
19
|
+
};
|
|
20
|
+
props;
|
|
21
|
+
file;
|
|
22
|
+
metadata = null;
|
|
15
23
|
constructor(file, props) {
|
|
16
|
-
this.metadata = null;
|
|
17
24
|
this.file = file;
|
|
18
25
|
this.props = { ...ParquetReader.defaultProps, ...props };
|
|
19
26
|
}
|
|
@@ -198,7 +205,3 @@ export class ParquetReader {
|
|
|
198
205
|
return decodedPage.dictionary;
|
|
199
206
|
}
|
|
200
207
|
}
|
|
201
|
-
ParquetReader.defaultProps = {
|
|
202
|
-
defaultDictionarySize: 1e6,
|
|
203
|
-
preserveBinary: false
|
|
204
|
-
};
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
/** @
|
|
2
2
|
* Holds data for one row group (column chunks) */
|
|
3
3
|
export class ParquetRowGroup {
|
|
4
|
+
/** Number of rows in this page */
|
|
5
|
+
rowCount;
|
|
6
|
+
/** Map of Column chunks */
|
|
7
|
+
columnData;
|
|
4
8
|
constructor(rowCount = 0, columnData = {}) {
|
|
5
9
|
this.rowCount = rowCount;
|
|
6
10
|
this.columnData = columnData;
|
|
@@ -296,6 +296,7 @@ function fromPrimitive_JSON(value) {
|
|
|
296
296
|
return JSON.parse(value);
|
|
297
297
|
}
|
|
298
298
|
function toPrimitive_BSON(value) {
|
|
299
|
+
// @ts-ignore
|
|
299
300
|
const arrayBuffer = BSONWriter.encodeSync?.(value);
|
|
300
301
|
return Buffer.from(arrayBuffer);
|
|
301
302
|
}
|
|
@@ -304,6 +305,7 @@ function fromPrimitive_BSON(value) {
|
|
|
304
305
|
}
|
|
305
306
|
function toPrimitive_TIME_MILLIS(value) {
|
|
306
307
|
const v = parseInt(value, 10);
|
|
308
|
+
// eslint-disable-next-line @typescript-eslint/no-loss-of-precision
|
|
307
309
|
if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
|
|
308
310
|
throw new Error(`invalid value for TIME_MILLIS: ${value}`);
|
|
309
311
|
}
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
import { TBufferedTransport, TCompactProtocol, TFramedTransport } from "../parquet-thrift/index.js";
|
|
2
2
|
import { FileMetaData, PageHeader } from "../parquet-thrift/index.js";
|
|
3
3
|
class UFramedTransport extends TFramedTransport {
|
|
4
|
-
|
|
5
|
-
super(...arguments);
|
|
6
|
-
this.readPos = 0;
|
|
7
|
-
}
|
|
4
|
+
readPos = 0;
|
|
8
5
|
}
|
|
9
6
|
/**
|
|
10
7
|
* Helper function that serializes a thrift object into a buffer
|
|
@@ -41,6 +41,7 @@ export const INSPECT_MAX_BYTES = 50;
|
|
|
41
41
|
* The `Uint8Array` prototype remains unmodified.
|
|
42
42
|
*/
|
|
43
43
|
export class Buffer extends Uint8Array {
|
|
44
|
+
static poolSize = 8192; // not used by this implementation
|
|
44
45
|
// length: number; inherited
|
|
45
46
|
get parent() {
|
|
46
47
|
if (!Buffer.isBuffer(this))
|
|
@@ -52,6 +53,14 @@ export class Buffer extends Uint8Array {
|
|
|
52
53
|
return undefined;
|
|
53
54
|
return this.byteOffset;
|
|
54
55
|
}
|
|
56
|
+
/** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
|
|
57
|
+
* to detect a Buffer instance. It's not possible to use `instanceof Buffer`
|
|
58
|
+
* reliably in a browserify context because there could be multiple different
|
|
59
|
+
* copies of the 'buffer' package in use. This method works even for Buffer
|
|
60
|
+
* instances that were created from another copy of the `buffer` package.
|
|
61
|
+
* @see: https://github.com/feross/buffer/issues/154
|
|
62
|
+
*/
|
|
63
|
+
_isBuffer = true;
|
|
55
64
|
constructor(arg, encodingOrOffset, length) {
|
|
56
65
|
if (typeof arg !== 'number') {
|
|
57
66
|
return Buffer.from(arg, encodingOrOffset, length);
|
|
@@ -65,14 +74,6 @@ export class Buffer extends Uint8Array {
|
|
|
65
74
|
throw new TypeError('The "string" argument must be of type string. Received type number');
|
|
66
75
|
}
|
|
67
76
|
super(size < 0 ? 0 : checked(size) | 0);
|
|
68
|
-
/** This property is used by `Buffer.isBuffer` (and the `is-buffer` npm package)
|
|
69
|
-
* to detect a Buffer instance. It's not possible to use `instanceof Buffer`
|
|
70
|
-
* reliably in a browserify context because there could be multiple different
|
|
71
|
-
* copies of the 'buffer' package in use. This method works even for Buffer
|
|
72
|
-
* instances that were created from another copy of the `buffer` package.
|
|
73
|
-
* @see: https://github.com/feross/buffer/issues/154
|
|
74
|
-
*/
|
|
75
|
-
this._isBuffer = true;
|
|
76
77
|
return;
|
|
77
78
|
}
|
|
78
79
|
static from(value, encodingOrOffset, length) {
|
|
@@ -1064,7 +1065,6 @@ export class Buffer extends Uint8Array {
|
|
|
1064
1065
|
}
|
|
1065
1066
|
}
|
|
1066
1067
|
}
|
|
1067
|
-
Buffer.poolSize = 8192; // not used by this implementation
|
|
1068
1068
|
function checkInt(buf, value, offset, ext, max, min) {
|
|
1069
1069
|
if (!Buffer.isBuffer(buf))
|
|
1070
1070
|
throw new TypeError('"buffer" argument must be a Buffer instance');
|
|
@@ -1603,7 +1603,6 @@ function writeDouble(buf, value, offset, littleEndian, noAssert) {
|
|
|
1603
1603
|
return offset + 8;
|
|
1604
1604
|
}
|
|
1605
1605
|
// CUSTOM ERRORS
|
|
1606
|
-
// =============
|
|
1607
1606
|
// Simplified versions from Node, changed for Buffer-only usage
|
|
1608
1607
|
const errors = {};
|
|
1609
1608
|
function E(sym, getMessage, Base) {
|
|
@@ -1674,7 +1673,6 @@ function addNumericalSeparator(val) {
|
|
|
1674
1673
|
return `${val.slice(0, i)}${res}`;
|
|
1675
1674
|
}
|
|
1676
1675
|
// CHECK FUNCTIONS
|
|
1677
|
-
// ===============
|
|
1678
1676
|
function checkBounds(buf, offset, byteLength) {
|
|
1679
1677
|
validateNumber(offset, 'offset');
|
|
1680
1678
|
if (buf[offset] === undefined || buf[offset + byteLength] === undefined) {
|
|
@@ -1718,7 +1716,6 @@ function boundsError(value, length, type) {
|
|
|
1718
1716
|
throw new errors.ERR_OUT_OF_RANGE(type || 'offset', `>= ${type ? 1 : 0} and <= ${length}`, value);
|
|
1719
1717
|
}
|
|
1720
1718
|
// HELPER FUNCTIONS
|
|
1721
|
-
// ================
|
|
1722
1719
|
const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
|
|
1723
1720
|
function base64clean(str) {
|
|
1724
1721
|
// Node takes equal signs as end of the Base64 encoding
|
|
@@ -1,3 +1,30 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
|
-
export declare const Buffer:
|
|
2
|
+
export declare const Buffer: {
|
|
3
|
+
new (str: string, encoding?: string | undefined): Buffer;
|
|
4
|
+
new (size: number): Buffer;
|
|
5
|
+
new (array: Uint8Array): Buffer;
|
|
6
|
+
new (arrayBuffer: ArrayBuffer | SharedArrayBuffer): Buffer;
|
|
7
|
+
new (array: readonly any[]): Buffer;
|
|
8
|
+
new (buffer: Buffer): Buffer;
|
|
9
|
+
prototype: Buffer;
|
|
10
|
+
from(arrayBuffer: ArrayBuffer | SharedArrayBuffer, byteOffset?: number | undefined, length?: number | undefined): Buffer;
|
|
11
|
+
from(data: readonly any[]): Buffer;
|
|
12
|
+
from(data: Uint8Array): Buffer;
|
|
13
|
+
from(obj: {
|
|
14
|
+
valueOf(): string | object;
|
|
15
|
+
} | {
|
|
16
|
+
[Symbol.toPrimitive](hint: "string"): string;
|
|
17
|
+
}, byteOffset?: number | undefined, length?: number | undefined): Buffer;
|
|
18
|
+
from(str: string, encoding?: string | undefined): Buffer;
|
|
19
|
+
of(...items: number[]): Buffer;
|
|
20
|
+
isBuffer(obj: any): obj is Buffer;
|
|
21
|
+
isEncoding(encoding: string): boolean | undefined;
|
|
22
|
+
byteLength(string: string | ArrayBuffer | SharedArrayBuffer | NodeJS.TypedArray | DataView, encoding?: string | undefined): number;
|
|
23
|
+
concat(list: readonly Uint8Array[], totalLength?: number | undefined): Buffer;
|
|
24
|
+
compare(buf1: Uint8Array, buf2: Uint8Array): number;
|
|
25
|
+
alloc(size: number, fill?: string | number | Buffer | undefined, encoding?: string | undefined): Buffer;
|
|
26
|
+
allocUnsafe(size: number): Buffer;
|
|
27
|
+
allocUnsafeSlow(size: number): Buffer;
|
|
28
|
+
poolSize: number;
|
|
29
|
+
};
|
|
3
30
|
//# sourceMappingURL=install-buffer-polyfill.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"install-buffer-polyfill.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/install-buffer-polyfill.ts"],"names":[],"mappings":";AAOA,eAAO,MAAM,MAAM,
|
|
1
|
+
{"version":3,"file":"install-buffer-polyfill.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/install-buffer-polyfill.ts"],"names":[],"mappings":";AAOA,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;CAA0B,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/parquet",
|
|
3
|
-
"version": "4.2.0-
|
|
3
|
+
"version": "4.2.0-beta.1",
|
|
4
4
|
"description": "Framework-independent loader for Apache Parquet files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -46,27 +46,27 @@
|
|
|
46
46
|
"copy-wasm": "cp ../../node_modules/parquet-wasm/esm2/arrow1_bg.wasm dist/arrow1_bg.wasm"
|
|
47
47
|
},
|
|
48
48
|
"browser": {
|
|
49
|
+
"./dist/polyfills/buffer/buffer-polyfill.node.js": "./dist/polyfills/buffer/buffer-polyfill.browser.js",
|
|
50
|
+
"./src/polyfills/buffer/buffer-polyfill.node.ts": "./src/polyfills/buffer/buffer-polyfill.browser.ts",
|
|
49
51
|
"child_process": false,
|
|
52
|
+
"events": false,
|
|
53
|
+
"fs": false,
|
|
50
54
|
"net": false,
|
|
51
|
-
"tls": false,
|
|
52
55
|
"stream": false,
|
|
53
|
-
"
|
|
54
|
-
"util": false
|
|
55
|
-
"events": false,
|
|
56
|
-
"./src/polyfills/buffer/buffer-polyfill.node.ts": "./src/polyfills/buffer/buffer-polyfill.browser.ts",
|
|
57
|
-
"./dist/polyfills/buffer/buffer-polyfill.node.js": "./dist/polyfills/buffer/buffer-polyfill.browser.js"
|
|
56
|
+
"tls": false,
|
|
57
|
+
"util": false
|
|
58
58
|
},
|
|
59
59
|
"comments": [
|
|
60
60
|
"base64-js and ieee754 are used by buffer polyfill"
|
|
61
61
|
],
|
|
62
62
|
"dependencies": {
|
|
63
|
-
"@loaders.gl/arrow": "4.2.0-
|
|
64
|
-
"@loaders.gl/bson": "4.2.0-
|
|
65
|
-
"@loaders.gl/compression": "4.2.0-
|
|
66
|
-
"@loaders.gl/gis": "4.2.0-
|
|
67
|
-
"@loaders.gl/loader-utils": "4.2.0-
|
|
68
|
-
"@loaders.gl/schema": "4.2.0-
|
|
69
|
-
"@loaders.gl/wkt": "4.2.0-
|
|
63
|
+
"@loaders.gl/arrow": "4.2.0-beta.1",
|
|
64
|
+
"@loaders.gl/bson": "4.2.0-beta.1",
|
|
65
|
+
"@loaders.gl/compression": "4.2.0-beta.1",
|
|
66
|
+
"@loaders.gl/gis": "4.2.0-beta.1",
|
|
67
|
+
"@loaders.gl/loader-utils": "4.2.0-beta.1",
|
|
68
|
+
"@loaders.gl/schema": "4.2.0-beta.1",
|
|
69
|
+
"@loaders.gl/wkt": "4.2.0-beta.1",
|
|
70
70
|
"async-mutex": "^0.2.2",
|
|
71
71
|
"base64-js": "^1.3.1",
|
|
72
72
|
"brotli": "^1.3.2",
|
|
@@ -93,5 +93,5 @@
|
|
|
93
93
|
"@loaders.gl/core": "^4.0.0",
|
|
94
94
|
"apache-arrow": ">= 15.0.0"
|
|
95
95
|
},
|
|
96
|
-
"gitHead": "
|
|
96
|
+
"gitHead": "c386a9196516fe3ff24847b40e6c77be039cf905"
|
|
97
97
|
}
|
|
@@ -11,6 +11,7 @@ import {ParquetSchema} from '../../parquetjs/schema/schema';
|
|
|
11
11
|
import {materializeColumns} from '../../parquetjs/schema/shred';
|
|
12
12
|
import {getSchemaFromParquetReader} from './get-parquet-schema';
|
|
13
13
|
import {installBufferPolyfill} from '../../polyfills/buffer/index';
|
|
14
|
+
import {preloadCompressions} from '../../parquetjs/compression';
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* @deprecated
|
|
@@ -20,6 +21,8 @@ export async function parseParquetFileInColumns(
|
|
|
20
21
|
options?: ParquetLoaderOptions
|
|
21
22
|
): Promise<ColumnarTable> {
|
|
22
23
|
installBufferPolyfill();
|
|
24
|
+
await preloadCompressions(options);
|
|
25
|
+
|
|
23
26
|
for await (const batch of parseParquetFileInColumnarBatches(file, options)) {
|
|
24
27
|
return {
|
|
25
28
|
shape: 'columnar-table',
|
|
@@ -37,6 +40,9 @@ export async function* parseParquetFileInColumnarBatches(
|
|
|
37
40
|
file: ReadableFile,
|
|
38
41
|
options?: ParquetLoaderOptions
|
|
39
42
|
): AsyncIterable<ColumnarTableBatch> {
|
|
43
|
+
installBufferPolyfill();
|
|
44
|
+
await preloadCompressions(options);
|
|
45
|
+
|
|
40
46
|
const reader = new ParquetReader(file);
|
|
41
47
|
|
|
42
48
|
// Extract schema and geo metadata
|
|
@@ -10,6 +10,7 @@ import type {ParquetRow} from '../../parquetjs/schema/declare';
|
|
|
10
10
|
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
|
|
11
11
|
import {getSchemaFromParquetReader} from './get-parquet-schema';
|
|
12
12
|
import {installBufferPolyfill} from '../../polyfills/buffer/index';
|
|
13
|
+
import {preloadCompressions} from '../../parquetjs/compression';
|
|
13
14
|
|
|
14
15
|
/**
|
|
15
16
|
* * Parse a parquet file using parquetjs
|
|
@@ -22,6 +23,7 @@ export async function parseParquetFile(
|
|
|
22
23
|
options?: ParquetLoaderOptions
|
|
23
24
|
): Promise<ObjectRowTable> {
|
|
24
25
|
installBufferPolyfill();
|
|
26
|
+
await preloadCompressions(options);
|
|
25
27
|
|
|
26
28
|
const reader = new ParquetReader(file, {
|
|
27
29
|
preserveBinary: options?.parquet?.preserveBinary
|
|
@@ -57,6 +59,9 @@ export async function* parseParquetFileInBatches(
|
|
|
57
59
|
file: ReadableFile,
|
|
58
60
|
options?: ParquetLoaderOptions
|
|
59
61
|
): AsyncIterable<ObjectRowTableBatch> {
|
|
62
|
+
installBufferPolyfill();
|
|
63
|
+
await preloadCompressions(options);
|
|
64
|
+
|
|
60
65
|
const reader = new ParquetReader(file, {
|
|
61
66
|
preserveBinary: options?.parquet?.preserveBinary
|
|
62
67
|
});
|
package/src/parquet-loader.ts
CHANGED
|
@@ -46,11 +46,10 @@ export type ParquetLoaderOptions = LoaderOptions & {
|
|
|
46
46
|
/**
|
|
47
47
|
* ParquetJS table loader
|
|
48
48
|
*/
|
|
49
|
-
export const ParquetWorkerLoader
|
|
50
|
-
ObjectRowTable,
|
|
51
|
-
ObjectRowTableBatch,
|
|
52
|
-
|
|
53
|
-
> = {
|
|
49
|
+
export const ParquetWorkerLoader = {
|
|
50
|
+
dataType: null as unknown as ObjectRowTable,
|
|
51
|
+
batchType: null as unknown as ObjectRowTableBatch,
|
|
52
|
+
|
|
54
53
|
name: 'Apache Parquet',
|
|
55
54
|
id: 'parquet',
|
|
56
55
|
module: 'parquet',
|
|
@@ -70,69 +69,75 @@ export const ParquetWorkerLoader: Loader<
|
|
|
70
69
|
preserveBinary: false
|
|
71
70
|
}
|
|
72
71
|
}
|
|
73
|
-
}
|
|
72
|
+
} as const satisfies Loader<ObjectRowTable, ObjectRowTableBatch, ParquetLoaderOptions>;
|
|
74
73
|
|
|
75
74
|
/** ParquetJS table loader */
|
|
76
|
-
export const ParquetLoader
|
|
77
|
-
ObjectRowTable | GeoJSONTable,
|
|
78
|
-
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
79
|
-
ParquetLoaderOptions
|
|
80
|
-
> = {
|
|
75
|
+
export const ParquetLoader = {
|
|
81
76
|
...ParquetWorkerLoader,
|
|
77
|
+
|
|
78
|
+
dataType: null as unknown as ObjectRowTable | GeoJSONTable,
|
|
79
|
+
batchType: null as unknown as ObjectRowTableBatch | GeoJSONTableBatch,
|
|
80
|
+
|
|
82
81
|
parse: (arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) =>
|
|
83
82
|
parseParquetFile(new BlobFile(arrayBuffer), options),
|
|
84
83
|
|
|
85
84
|
parseFile: parseParquetFile,
|
|
86
85
|
parseFileInBatches: parseParquetFileInBatches
|
|
87
|
-
}
|
|
86
|
+
} as const satisfies LoaderWithParser<
|
|
87
|
+
ObjectRowTable | GeoJSONTable,
|
|
88
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
89
|
+
ParquetLoaderOptions
|
|
90
|
+
>;
|
|
88
91
|
|
|
89
92
|
// Defeat tree shaking
|
|
90
93
|
// @ts-ignore
|
|
91
94
|
ParquetLoader.Buffer = Buffer;
|
|
92
95
|
|
|
93
|
-
export const GeoParquetWorkerLoader
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
96
|
+
export const GeoParquetWorkerLoader = {
|
|
97
|
+
dataType: null as unknown as GeoJSONTable,
|
|
98
|
+
batchType: null as unknown as GeoJSONTableBatch,
|
|
99
|
+
|
|
100
|
+
name: 'Apache Parquet',
|
|
101
|
+
id: 'parquet',
|
|
102
|
+
module: 'parquet',
|
|
103
|
+
version: VERSION,
|
|
104
|
+
worker: true,
|
|
105
|
+
category: 'table',
|
|
106
|
+
extensions: ['parquet'],
|
|
107
|
+
mimeTypes: ['application/octet-stream'],
|
|
108
|
+
binary: true,
|
|
109
|
+
tests: ['PAR1', 'PARE'],
|
|
110
|
+
options: {
|
|
111
|
+
parquet: {
|
|
112
|
+
shape: 'geojson-table',
|
|
113
|
+
columnList: [],
|
|
114
|
+
geoparquet: true,
|
|
115
|
+
url: undefined,
|
|
116
|
+
preserveBinary: false
|
|
113
117
|
}
|
|
114
|
-
}
|
|
118
|
+
}
|
|
119
|
+
} as const satisfies Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions>;
|
|
115
120
|
|
|
116
121
|
/** ParquetJS table loader */
|
|
117
|
-
export const GeoParquetLoader
|
|
118
|
-
ObjectRowTable | GeoJSONTable,
|
|
119
|
-
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
120
|
-
ParquetLoaderOptions
|
|
121
|
-
> = {
|
|
122
|
+
export const GeoParquetLoader = {
|
|
122
123
|
...GeoParquetWorkerLoader,
|
|
124
|
+
|
|
123
125
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
124
126
|
return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
|
|
125
127
|
},
|
|
126
128
|
parseFile: parseGeoParquetFile,
|
|
127
129
|
parseFileInBatches: parseGeoParquetFileInBatches
|
|
128
|
-
}
|
|
130
|
+
} as const satisfies LoaderWithParser<
|
|
131
|
+
ObjectRowTable | GeoJSONTable,
|
|
132
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
133
|
+
ParquetLoaderOptions
|
|
134
|
+
>;
|
|
129
135
|
|
|
130
136
|
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
131
|
-
export const ParquetColumnarWorkerLoader
|
|
132
|
-
ColumnarTable,
|
|
133
|
-
ColumnarTableBatch,
|
|
134
|
-
|
|
135
|
-
> = {
|
|
137
|
+
export const ParquetColumnarWorkerLoader = {
|
|
138
|
+
dataType: null as any as ColumnarTable,
|
|
139
|
+
batchType: null as any as ColumnarTableBatch,
|
|
140
|
+
|
|
136
141
|
name: 'Apache Parquet',
|
|
137
142
|
id: 'parquet',
|
|
138
143
|
module: 'parquet',
|
|
@@ -144,18 +149,14 @@ export const ParquetColumnarWorkerLoader: Loader<
|
|
|
144
149
|
binary: true,
|
|
145
150
|
tests: ['PAR1', 'PARE'],
|
|
146
151
|
options: ParquetLoader.options
|
|
147
|
-
}
|
|
152
|
+
} as const satisfies Loader<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
|
|
148
153
|
|
|
149
154
|
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
150
|
-
export const ParquetColumnarLoader
|
|
151
|
-
ColumnarTable,
|
|
152
|
-
ColumnarTableBatch,
|
|
153
|
-
ParquetLoaderOptions
|
|
154
|
-
> = {
|
|
155
|
+
export const ParquetColumnarLoader = {
|
|
155
156
|
...ParquetColumnarWorkerLoader,
|
|
156
157
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
157
158
|
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
|
|
158
159
|
},
|
|
159
160
|
parseFile: parseParquetFileInColumns,
|
|
160
161
|
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
161
|
-
}
|
|
162
|
+
} as const satisfies LoaderWithParser<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
|
|
@@ -17,7 +17,10 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
/** Parquet WASM table loader */
|
|
20
|
-
export const ParquetWasmWorkerLoader
|
|
20
|
+
export const ParquetWasmWorkerLoader = {
|
|
21
|
+
dataType: null as unknown as ArrowTable,
|
|
22
|
+
batchType: null as never,
|
|
23
|
+
|
|
21
24
|
name: 'Apache Parquet',
|
|
22
25
|
id: 'parquet-wasm',
|
|
23
26
|
module: 'parquet',
|
|
@@ -34,13 +37,13 @@ export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoade
|
|
|
34
37
|
wasmUrl: PARQUET_WASM_URL
|
|
35
38
|
}
|
|
36
39
|
}
|
|
37
|
-
}
|
|
40
|
+
} as const satisfies Loader<ArrowTable, never, ParquetWasmLoaderOptions>;
|
|
38
41
|
|
|
39
42
|
/** Parquet WASM table loader */
|
|
40
|
-
export const ParquetWasmLoader
|
|
43
|
+
export const ParquetWasmLoader = {
|
|
41
44
|
...ParquetWasmWorkerLoader,
|
|
42
45
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetWasmLoaderOptions) {
|
|
43
46
|
options = {parquet: {...ParquetWasmLoader.options.parquet, ...options?.parquet}, ...options};
|
|
44
47
|
return parseParquetWasm(arrayBuffer, options);
|
|
45
48
|
}
|
|
46
|
-
}
|
|
49
|
+
} as const satisfies LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions>;
|
|
@@ -16,7 +16,7 @@ export type ParquetWriterOptions = WriterOptions & {
|
|
|
16
16
|
};
|
|
17
17
|
|
|
18
18
|
/** Parquet WASM writer */
|
|
19
|
-
export const ParquetWasmWriter
|
|
19
|
+
export const ParquetWasmWriter = {
|
|
20
20
|
name: 'Apache Parquet',
|
|
21
21
|
id: 'parquet-wasm',
|
|
22
22
|
module: 'parquet',
|
|
@@ -33,4 +33,4 @@ export const ParquetWasmWriter: WriterWithEncoder<ArrowTable, never, ParquetWrit
|
|
|
33
33
|
options = {parquet: {...ParquetWasmWriter.options.parquet, ...options?.parquet}, ...options};
|
|
34
34
|
return encode(arrowTable, options);
|
|
35
35
|
}
|
|
36
|
-
}
|
|
36
|
+
} as const satisfies WriterWithEncoder<ArrowTable, never, ParquetWriterOptions>;
|
package/src/parquet-writer.ts
CHANGED
|
@@ -11,7 +11,7 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
|
11
11
|
|
|
12
12
|
export type ParquetWriterOptions = {};
|
|
13
13
|
|
|
14
|
-
export const ParquetWriter
|
|
14
|
+
export const ParquetWriter = {
|
|
15
15
|
name: 'Apache Parquet',
|
|
16
16
|
id: 'parquet',
|
|
17
17
|
module: 'parquet',
|
|
@@ -22,7 +22,7 @@ export const ParquetWriter: WriterWithEncoder<Table, TableBatch, ParquetWriterOp
|
|
|
22
22
|
options: {},
|
|
23
23
|
encode: async (data, options) => encodeSync(data, options),
|
|
24
24
|
encodeSync
|
|
25
|
-
}
|
|
25
|
+
} as const satisfies WriterWithEncoder<Table, TableBatch, ParquetWriterOptions>;
|
|
26
26
|
|
|
27
27
|
function encodeSync(data, options?: ParquetWriterOptions) {
|
|
28
28
|
return new ArrayBuffer(0);
|