@loaders.gl/parquet 4.2.1 → 4.3.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +112 -24
- package/dist/index.cjs.map +4 -4
- package/dist/lib/constants.d.ts +1 -1
- package/dist/lib/constants.d.ts.map +1 -1
- package/dist/lib/constants.js +2 -2
- package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/{wasm → encoders}/encode-parquet-wasm.js +1 -1
- package/dist/lib/parsers/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-wasm.js +51 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet.js +10 -0
- package/dist/lib/utils/load-wasm.d.ts +3 -0
- package/dist/lib/utils/load-wasm.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.d.ts +11 -0
- package/dist/lib/utils/make-stream-iterator.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.js +67 -0
- package/dist/parquet-loader.js +1 -1
- package/dist/parquet-wasm-loader.d.ts +17 -5
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +19 -4
- package/dist/parquet-wasm-writer.js +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquet_wasm_bg.wasm +0 -0
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +1 -0
- package/package.json +13 -12
- package/src/lib/constants.ts +2 -1
- package/src/lib/{wasm → encoders}/encode-parquet-wasm.ts +1 -1
- package/src/lib/parsers/parse-parquet-wasm.ts +72 -0
- package/src/lib/parsers/parse-parquet.ts +10 -0
- package/src/lib/utils/make-stream-iterator.ts +87 -0
- package/src/parquet-wasm-loader.ts +36 -9
- package/src/parquet-wasm-writer.ts +1 -1
- package/src/parquetjs/codecs/rle.ts +3 -1
- package/dist/arrow1_bg.wasm +0 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm.d.ts +0 -3
- package/dist/lib/wasm/load-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +0 -4
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +0 -24
- package/src/lib/wasm/parse-parquet-wasm.ts +0 -33
- package/src/lib/wip/convert-schema-deep.java.disabled +0 -910
- package/src/lib/wip/convert-schema-deep.rs.disabled +0 -976
- /package/dist/lib/{wasm → encoders}/encode-parquet-wasm.d.ts +0 -0
- /package/dist/lib/{wasm → utils}/load-wasm.js +0 -0
- /package/src/lib/{wasm → utils}/load-wasm.ts +0 -0
|
@@ -2,16 +2,26 @@
|
|
|
2
2
|
// SPDX-License-Identifier: MIT
|
|
3
3
|
// Copyright (c) vis.gl contributors
|
|
4
4
|
|
|
5
|
+
import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
|
|
5
6
|
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
6
|
-
import
|
|
7
|
+
import {ReadableFile, BlobFile} from '@loaders.gl/loader-utils';
|
|
7
8
|
|
|
8
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
parseParquetFileWasm,
|
|
11
|
+
parseParquetFileInBatchesWasm
|
|
12
|
+
} from './lib/parsers/parse-parquet-wasm';
|
|
9
13
|
import {VERSION, PARQUET_WASM_URL} from './lib/constants';
|
|
10
14
|
|
|
11
15
|
/** Parquet WASM loader options */
|
|
12
16
|
export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
13
17
|
parquet?: {
|
|
14
|
-
|
|
18
|
+
shape?: 'arrow-table';
|
|
19
|
+
limit?: number; // Provide a limit to the number of rows to be read.
|
|
20
|
+
offset?: number; // Provide an offset to skip over the given number of rows.
|
|
21
|
+
batchSize?: number; // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
|
|
22
|
+
columns?: string[]; // The column names from the file to read.
|
|
23
|
+
rowGroups?: number[]; // Only read data from the provided row group indexes.
|
|
24
|
+
concurrency?: number; // The number of concurrent requests to make
|
|
15
25
|
wasmUrl?: string;
|
|
16
26
|
};
|
|
17
27
|
};
|
|
@@ -19,7 +29,7 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
|
19
29
|
/** Parquet WASM table loader */
|
|
20
30
|
export const ParquetWasmWorkerLoader = {
|
|
21
31
|
dataType: null as unknown as ArrowTable,
|
|
22
|
-
batchType: null as
|
|
32
|
+
batchType: null as unknown as ArrowTableBatch,
|
|
23
33
|
|
|
24
34
|
name: 'Apache Parquet',
|
|
25
35
|
id: 'parquet-wasm',
|
|
@@ -33,17 +43,34 @@ export const ParquetWasmWorkerLoader = {
|
|
|
33
43
|
tests: ['PAR1', 'PARE'],
|
|
34
44
|
options: {
|
|
35
45
|
parquet: {
|
|
36
|
-
|
|
46
|
+
shape: 'arrow-table',
|
|
47
|
+
limit: undefined, // Provide a limit to the number of rows to be read.
|
|
48
|
+
offset: 0, // Provide an offset to skip over the given number of rows.
|
|
49
|
+
batchSize: undefined, // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
|
|
50
|
+
columns: undefined, // The column names from the file to read.
|
|
51
|
+
rowGroups: undefined, // Only read data from the provided row group indexes.
|
|
52
|
+
concurrency: undefined, // The number of concurrent requests to make
|
|
37
53
|
wasmUrl: PARQUET_WASM_URL
|
|
38
54
|
}
|
|
39
55
|
}
|
|
40
|
-
} as const satisfies Loader<ArrowTable,
|
|
56
|
+
} as const satisfies Loader<ArrowTable, ArrowTableBatch, ParquetWasmLoaderOptions>;
|
|
41
57
|
|
|
42
58
|
/** Parquet WASM table loader */
|
|
43
59
|
export const ParquetWasmLoader = {
|
|
44
60
|
...ParquetWasmWorkerLoader,
|
|
61
|
+
|
|
45
62
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetWasmLoaderOptions) {
|
|
46
|
-
|
|
47
|
-
return
|
|
63
|
+
const wasmOptions = {...ParquetWasmLoader.options.parquet, ...options?.parquet};
|
|
64
|
+
return parseParquetFileWasm(new BlobFile(arrayBuffer), wasmOptions);
|
|
65
|
+
},
|
|
66
|
+
|
|
67
|
+
parseFile(file: ReadableFile, options?: ParquetWasmLoaderOptions) {
|
|
68
|
+
const wasmOptions = {...ParquetWasmLoader.options.parquet, ...options?.parquet};
|
|
69
|
+
return parseParquetFileWasm(file, wasmOptions);
|
|
70
|
+
},
|
|
71
|
+
|
|
72
|
+
parseFileInBatches(file: ReadableFile, options?: ParquetWasmLoaderOptions) {
|
|
73
|
+
const wasmOptions = {...ParquetWasmLoader.options.parquet, ...options?.parquet};
|
|
74
|
+
return parseParquetFileInBatchesWasm(file, wasmOptions);
|
|
48
75
|
}
|
|
49
|
-
} as const satisfies LoaderWithParser<ArrowTable,
|
|
76
|
+
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, ParquetWasmLoaderOptions>;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import type {WriterWithEncoder} from '@loaders.gl/loader-utils';
|
|
6
6
|
import type {ArrowTable} from '@loaders.gl/arrow';
|
|
7
|
-
import {encode} from './lib/
|
|
7
|
+
import {encode} from './lib/encoders/encode-parquet-wasm';
|
|
8
8
|
import type {WriterOptions} from '@loaders.gl/loader-utils';
|
|
9
9
|
|
|
10
10
|
import {VERSION, PARQUET_WASM_URL} from './lib/constants';
|
|
@@ -97,7 +97,9 @@ export function decodeValues(
|
|
|
97
97
|
const count = header >> 1;
|
|
98
98
|
decodedValues = decodeRunRepeated(cursor, count, opts);
|
|
99
99
|
}
|
|
100
|
-
|
|
100
|
+
|
|
101
|
+
// strange failure in docusaurus / webpack if we don't cast the type here
|
|
102
|
+
for (const value of decodedValues as any[]) {
|
|
101
103
|
values.push(value);
|
|
102
104
|
}
|
|
103
105
|
}
|
package/dist/arrow1_bg.wasm
DELETED
|
Binary file
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"encode-parquet-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/encode-parquet-wasm.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,mBAAmB,CAAC;AAKlD,OAAO,KAAK,EAAC,oBAAoB,EAAC,qCAAkC;AAEpE;;GAEG;AACH,wBAAsB,MAAM,CAC1B,KAAK,EAAE,UAAU,EACjB,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,WAAW,CAAC,CAsBtB"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"load-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/load-wasm.ts"],"names":[],"mappings":"AAKA,OAAO,QAAQ,MAAM,cAAc,CAAC;AAMpC,wBAAsB,QAAQ,CAAC,OAAO,GAAE,MAAyB,4BAUhE"}
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
import type { ArrowTable } from '@loaders.gl/arrow';
|
|
2
|
-
import type { ParquetWasmLoaderOptions } from "../../parquet-wasm-loader.js";
|
|
3
|
-
export declare function parseParquetWasm(arrayBuffer: ArrayBuffer, options: ParquetWasmLoaderOptions): Promise<ArrowTable>;
|
|
4
|
-
//# sourceMappingURL=parse-parquet-wasm.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"parse-parquet-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/parse-parquet-wasm.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,mBAAmB,CAAC;AAElD,OAAO,KAAK,EAAC,wBAAwB,EAAC,qCAAkC;AAIxE,wBAAsB,gBAAgB,CACpC,WAAW,EAAE,WAAW,EACxB,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,UAAU,CAAC,CAkBrB"}
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
// loaders.gl
|
|
2
|
-
// SPDX-License-Identifier: MIT
|
|
3
|
-
// Copyright (c) vis.gl contributors
|
|
4
|
-
import { serializeArrowSchema } from '@loaders.gl/arrow';
|
|
5
|
-
import { loadWasm } from "./load-wasm.js";
|
|
6
|
-
import * as arrow from 'apache-arrow';
|
|
7
|
-
export async function parseParquetWasm(arrayBuffer, options) {
|
|
8
|
-
const arr = new Uint8Array(arrayBuffer);
|
|
9
|
-
const wasmUrl = options?.parquet?.wasmUrl;
|
|
10
|
-
const wasm = await loadWasm(wasmUrl);
|
|
11
|
-
const wasmTable = wasm.readParquet(arr);
|
|
12
|
-
try {
|
|
13
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
14
|
-
const arrowTable = arrow.tableFromIPC(ipcStream);
|
|
15
|
-
return {
|
|
16
|
-
shape: 'arrow-table',
|
|
17
|
-
schema: serializeArrowSchema(arrowTable.schema),
|
|
18
|
-
data: arrowTable
|
|
19
|
-
};
|
|
20
|
-
}
|
|
21
|
-
finally {
|
|
22
|
-
// wasmTable.free();
|
|
23
|
-
}
|
|
24
|
-
}
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
// loaders.gl
|
|
2
|
-
// SPDX-License-Identifier: MIT
|
|
3
|
-
// Copyright (c) vis.gl contributors
|
|
4
|
-
|
|
5
|
-
// eslint-disable
|
|
6
|
-
import type {ArrowTable} from '@loaders.gl/arrow';
|
|
7
|
-
import {serializeArrowSchema} from '@loaders.gl/arrow';
|
|
8
|
-
import type {ParquetWasmLoaderOptions} from '../../parquet-wasm-loader';
|
|
9
|
-
import {loadWasm} from './load-wasm';
|
|
10
|
-
import * as arrow from 'apache-arrow';
|
|
11
|
-
|
|
12
|
-
export async function parseParquetWasm(
|
|
13
|
-
arrayBuffer: ArrayBuffer,
|
|
14
|
-
options: ParquetWasmLoaderOptions
|
|
15
|
-
): Promise<ArrowTable> {
|
|
16
|
-
const arr = new Uint8Array(arrayBuffer);
|
|
17
|
-
|
|
18
|
-
const wasmUrl = options?.parquet?.wasmUrl;
|
|
19
|
-
const wasm = await loadWasm(wasmUrl);
|
|
20
|
-
const wasmTable = wasm.readParquet(arr);
|
|
21
|
-
try {
|
|
22
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
23
|
-
const arrowTable = arrow.tableFromIPC(ipcStream);
|
|
24
|
-
|
|
25
|
-
return {
|
|
26
|
-
shape: 'arrow-table',
|
|
27
|
-
schema: serializeArrowSchema(arrowTable.schema),
|
|
28
|
-
data: arrowTable
|
|
29
|
-
};
|
|
30
|
-
} finally {
|
|
31
|
-
// wasmTable.free();
|
|
32
|
-
}
|
|
33
|
-
}
|