@loaders.gl/parquet 3.1.8 → 3.2.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +31 -15
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +25 -1
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/wasm/encode-parquet-wasm.js +56 -0
- package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/index.js +14 -0
- package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +58 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +43 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +80 -0
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -0
- package/dist/es5/parquet-loader.js +1 -1
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +30 -0
- package/dist/es5/parquet-wasm-loader.js.map +1 -0
- package/dist/es5/parquet-wasm-writer.js +28 -0
- package/dist/es5/parquet-wasm-writer.js.map +1 -0
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/esm/index.js +7 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/wasm/encode-parquet-wasm.js +16 -0
- package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/index.js +2 -0
- package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +12 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js +5 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +25 -0
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -0
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +22 -0
- package/dist/esm/parquet-wasm-loader.js.map +1 -0
- package/dist/esm/parquet-wasm-writer.js +19 -0
- package/dist/esm/parquet-wasm-writer.js.map +1 -0
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/index.d.ts +18 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -1
- package/dist/lib/wasm/encode-parquet-wasm.d.ts +21 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/wasm/encode-parquet-wasm.js +30 -0
- package/dist/lib/wasm/load-wasm/index.d.ts +2 -0
- package/dist/lib/wasm/load-wasm/index.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/index.js +5 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts +3 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.js +34 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts +3 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.js +27 -0
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/wasm/parse-parquet-wasm.js +27 -0
- package/dist/parquet-wasm-loader.d.ts +23 -0
- package/dist/parquet-wasm-loader.d.ts.map +1 -0
- package/dist/parquet-wasm-loader.js +27 -0
- package/dist/parquet-wasm-writer.d.ts +3 -0
- package/dist/parquet-wasm-writer.d.ts.map +1 -0
- package/dist/parquet-wasm-writer.js +23 -0
- package/dist/parquet-worker.js +32 -16
- package/dist/parquet-worker.js.map +3 -3
- package/package.json +13 -7
- package/src/index.ts +9 -1
- package/src/lib/wasm/encode-parquet-wasm.ts +40 -0
- package/src/lib/wasm/load-wasm/index.ts +1 -0
- package/src/lib/wasm/load-wasm/load-wasm-browser.ts +15 -0
- package/src/lib/wasm/load-wasm/load-wasm-node.ts +5 -0
- package/src/lib/wasm/parse-parquet-wasm.ts +42 -0
- package/src/parquet-wasm-loader.ts +36 -0
- package/src/parquet-wasm-writer.ts +24 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/parquet",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0-alpha.3",
|
|
4
4
|
"description": "Framework-independent loader for Apache Parquet files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"publishConfig": {
|
|
@@ -36,12 +36,13 @@
|
|
|
36
36
|
"child_process": false,
|
|
37
37
|
"net": false,
|
|
38
38
|
"tls": false,
|
|
39
|
-
"lzo": false
|
|
39
|
+
"lzo": false,
|
|
40
|
+
"./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
|
|
40
41
|
},
|
|
41
42
|
"dependencies": {
|
|
42
|
-
"@loaders.gl/compression": "3.
|
|
43
|
-
"@loaders.gl/loader-utils": "3.
|
|
44
|
-
"@loaders.gl/schema": "3.
|
|
43
|
+
"@loaders.gl/compression": "3.2.0-alpha.3",
|
|
44
|
+
"@loaders.gl/loader-utils": "3.2.0-alpha.3",
|
|
45
|
+
"@loaders.gl/schema": "3.2.0-alpha.3",
|
|
45
46
|
"async-mutex": "^0.2.2",
|
|
46
47
|
"brotli": "^1.3.2",
|
|
47
48
|
"bson": "^1.0.4",
|
|
@@ -50,17 +51,22 @@
|
|
|
50
51
|
"lzo": "^0.4.11",
|
|
51
52
|
"node-int64": "^0.4.0",
|
|
52
53
|
"object-stream": "0.0.1",
|
|
54
|
+
"parquet-wasm": "^0.3.1",
|
|
53
55
|
"snappyjs": "^0.6.0",
|
|
54
56
|
"thrift": "^0.14.2",
|
|
55
57
|
"varint": "^5.0.0",
|
|
56
58
|
"zstd-codec": "^0.1"
|
|
57
59
|
},
|
|
60
|
+
"peerDependencies": {
|
|
61
|
+
"apache-arrow": "*"
|
|
62
|
+
},
|
|
58
63
|
"devDependencies": {
|
|
59
64
|
"@types/bson": "^4.0.0",
|
|
60
65
|
"@types/node": "^10.14.15",
|
|
61
66
|
"@types/node-int64": "^0.4.29",
|
|
62
67
|
"@types/thrift": "^0.10.8",
|
|
63
|
-
"@types/varint": "^5.0.0"
|
|
68
|
+
"@types/varint": "^5.0.0",
|
|
69
|
+
"apache-arrow": "^4.0.0"
|
|
64
70
|
},
|
|
65
|
-
"gitHead": "
|
|
71
|
+
"gitHead": "f0d4b801efeb7094283106352ee759eccfb21f10"
|
|
66
72
|
}
|
package/src/index.ts
CHANGED
|
@@ -2,10 +2,12 @@ import type {LoaderWithParser} from '@loaders.gl/loader-utils';
|
|
|
2
2
|
|
|
3
3
|
// ParquetLoader
|
|
4
4
|
|
|
5
|
+
import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';
|
|
5
6
|
import {ParquetLoader as ParquetWorkerLoader} from './parquet-loader';
|
|
6
7
|
import {parseParquet, parseParquetFileInBatches} from './lib/parse-parquet';
|
|
8
|
+
import {parseParquet as parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
|
|
7
9
|
|
|
8
|
-
export {ParquetWorkerLoader};
|
|
10
|
+
export {ParquetWorkerLoader, ParquetWasmWorkerLoader};
|
|
9
11
|
|
|
10
12
|
/** ParquetJS table loader */
|
|
11
13
|
export const ParquetLoader = {
|
|
@@ -14,9 +16,15 @@ export const ParquetLoader = {
|
|
|
14
16
|
parseFileInBatches: parseParquetFileInBatches
|
|
15
17
|
};
|
|
16
18
|
|
|
19
|
+
export const ParquetWasmLoader = {
|
|
20
|
+
...ParquetWasmWorkerLoader,
|
|
21
|
+
parse: parseParquetWasm
|
|
22
|
+
};
|
|
23
|
+
|
|
17
24
|
// ParquetWriter
|
|
18
25
|
|
|
19
26
|
export {ParquetWriter as _ParquetWriter} from './parquet-writer';
|
|
27
|
+
export {ParquetWasmWriter} from './parquet-wasm-writer';
|
|
20
28
|
|
|
21
29
|
// EXPERIMENTAL - expose the internal parquetjs API
|
|
22
30
|
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type {Table} from 'apache-arrow';
|
|
2
|
+
import type {WriterOptions} from '@loaders.gl/loader-utils';
|
|
3
|
+
|
|
4
|
+
import {RecordBatchStreamWriter} from 'apache-arrow';
|
|
5
|
+
import {loadWasm} from './load-wasm';
|
|
6
|
+
|
|
7
|
+
export type ParquetWriterOptions = WriterOptions & {
|
|
8
|
+
parquet?: {
|
|
9
|
+
wasmUrl?: string;
|
|
10
|
+
};
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Encode Arrow Table to Parquet buffer
|
|
15
|
+
*/
|
|
16
|
+
export async function encode(table: Table, options?: ParquetWriterOptions): Promise<ArrayBuffer> {
|
|
17
|
+
const wasmUrl = options?.parquet?.wasmUrl;
|
|
18
|
+
const wasm = await loadWasm(wasmUrl);
|
|
19
|
+
|
|
20
|
+
const arrowIPCBytes = tableToIPC(table);
|
|
21
|
+
// TODO: provide options for how to write table.
|
|
22
|
+
const writerProperties = new wasm.WriterPropertiesBuilder().build();
|
|
23
|
+
const parquetBytes = wasm.writeParquet(arrowIPCBytes, writerProperties);
|
|
24
|
+
return parquetBytes.buffer.slice(
|
|
25
|
+
parquetBytes.byteOffset,
|
|
26
|
+
parquetBytes.byteLength + parquetBytes.byteOffset
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Serialize a {@link Table} to the IPC format. This function is a convenience
|
|
32
|
+
* wrapper for {@link RecordBatchStreamWriter} and {@link RecordBatchFileWriter}.
|
|
33
|
+
* Opposite of {@link tableFromIPC}.
|
|
34
|
+
*
|
|
35
|
+
* @param table The Table to serialize.
|
|
36
|
+
* @param type Whether to serialize the Table as a file or a stream.
|
|
37
|
+
*/
|
|
38
|
+
export function tableToIPC(table: Table): Uint8Array {
|
|
39
|
+
return RecordBatchStreamWriter.writeAll(table).toUint8Array(true);
|
|
40
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {loadWasm} from './load-wasm-node';
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import * as wasmEsm from 'parquet-wasm/esm2/arrow1';
|
|
2
|
+
|
|
3
|
+
let cached: typeof wasmEsm | null = null;
|
|
4
|
+
|
|
5
|
+
export async function loadWasm(wasmUrl?: string) {
|
|
6
|
+
if (cached !== null) {
|
|
7
|
+
return cached;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// For ESM bundles, need to await the default export, which loads the WASM
|
|
11
|
+
await wasmEsm.default(wasmUrl);
|
|
12
|
+
cached = wasmEsm;
|
|
13
|
+
|
|
14
|
+
return wasmEsm;
|
|
15
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
// eslint-disable
|
|
2
|
+
import type {RecordBatch} from 'apache-arrow';
|
|
3
|
+
import type {LoaderOptions} from '@loaders.gl/loader-utils';
|
|
4
|
+
import {Table, RecordBatchStreamReader} from 'apache-arrow';
|
|
5
|
+
import {loadWasm} from './load-wasm/load-wasm-node';
|
|
6
|
+
|
|
7
|
+
export type ParquetLoaderOptions = LoaderOptions & {
|
|
8
|
+
parquet?: {
|
|
9
|
+
type?: 'arrow-table';
|
|
10
|
+
wasmUrl?: string;
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
export async function parseParquet(
|
|
15
|
+
arrayBuffer: ArrayBuffer,
|
|
16
|
+
options?: ParquetLoaderOptions
|
|
17
|
+
): Promise<Table> {
|
|
18
|
+
const wasmUrl = options?.parquet?.wasmUrl;
|
|
19
|
+
const wasm = await loadWasm(wasmUrl);
|
|
20
|
+
|
|
21
|
+
const arr = new Uint8Array(arrayBuffer);
|
|
22
|
+
const arrowIPCUint8Arr = wasm.readParquet(arr);
|
|
23
|
+
const arrowIPCBuffer = arrowIPCUint8Arr.buffer.slice(
|
|
24
|
+
arrowIPCUint8Arr.byteOffset,
|
|
25
|
+
arrowIPCUint8Arr.byteLength + arrowIPCUint8Arr.byteOffset
|
|
26
|
+
);
|
|
27
|
+
const arrowTable = tableFromIPC(arrowIPCBuffer);
|
|
28
|
+
return arrowTable;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Deserialize the IPC format into a {@link Table}. This function is a
|
|
33
|
+
* convenience wrapper for {@link RecordBatchReader}. Opposite of {@link tableToIPC}.
|
|
34
|
+
*/
|
|
35
|
+
function tableFromIPC(input: ArrayBuffer): Table {
|
|
36
|
+
const reader = RecordBatchStreamReader.from(input);
|
|
37
|
+
const recordBatches: RecordBatch[] = [];
|
|
38
|
+
for (const recordBatch of reader) {
|
|
39
|
+
recordBatches.push(recordBatch);
|
|
40
|
+
}
|
|
41
|
+
return new Table(recordBatches);
|
|
42
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
2
|
+
|
|
3
|
+
// __VERSION__ is injected by babel-plugin-version-inline
|
|
4
|
+
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
5
|
+
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
6
|
+
|
|
7
|
+
export type ParquetLoaderOptions = LoaderOptions & {
|
|
8
|
+
parquet?: {
|
|
9
|
+
type?: 'arrow-table';
|
|
10
|
+
wasmUrl?: string;
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {
|
|
15
|
+
parquet: {
|
|
16
|
+
type: 'arrow-table',
|
|
17
|
+
wasmUrl: 'https://unpkg.com/parquet-wasm@0.3.1/esm2/arrow1_bg.wasm'
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/** ParquetJS table loader */
|
|
22
|
+
export const ParquetWasmLoader = {
|
|
23
|
+
name: 'Apache Parquet',
|
|
24
|
+
id: 'parquet-wasm',
|
|
25
|
+
module: 'parquet',
|
|
26
|
+
version: VERSION,
|
|
27
|
+
worker: false,
|
|
28
|
+
category: 'table',
|
|
29
|
+
extensions: ['parquet'],
|
|
30
|
+
mimeTypes: ['application/octet-stream'],
|
|
31
|
+
binary: true,
|
|
32
|
+
tests: ['PAR1', 'PARE'],
|
|
33
|
+
options: DEFAULT_PARQUET_LOADER_OPTIONS
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export const _typecheckParquetLoader: Loader = ParquetWasmLoader;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type {Writer} from '@loaders.gl/loader-utils';
|
|
2
|
+
import {encode, ParquetWriterOptions} from './lib/wasm/encode-parquet-wasm';
|
|
3
|
+
|
|
4
|
+
// __VERSION__ is injected by babel-plugin-version-inline
|
|
5
|
+
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
6
|
+
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
7
|
+
|
|
8
|
+
const DEFAULT_PARQUET_WRITER_OPTIONS: ParquetWriterOptions = {
|
|
9
|
+
parquet: {
|
|
10
|
+
wasmUrl: 'https://unpkg.com/parquet-wasm@0.3.1/esm2/arrow1_bg.wasm'
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
export const ParquetWasmWriter: Writer = {
|
|
15
|
+
name: 'Apache Parquet',
|
|
16
|
+
id: 'parquet-wasm',
|
|
17
|
+
module: 'parquet',
|
|
18
|
+
version: VERSION,
|
|
19
|
+
extensions: ['parquet'],
|
|
20
|
+
mimeTypes: ['application/octet-stream'],
|
|
21
|
+
encode,
|
|
22
|
+
binary: true,
|
|
23
|
+
options: DEFAULT_PARQUET_WRITER_OPTIONS
|
|
24
|
+
};
|