@loaders.gl/parquet 4.0.4 → 4.1.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js.map +1 -1
- package/dist/index.cjs +138 -66
- package/dist/index.d.ts +1 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -21
- package/dist/index.js.map +1 -1
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -1
- package/dist/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/lib/parsers/get-parquet-schema.d.ts.map +1 -1
- package/dist/lib/parsers/get-parquet-schema.js.map +1 -1
- package/dist/lib/parsers/parse-geoparquet.d.ts +6 -0
- package/dist/lib/parsers/parse-geoparquet.d.ts.map +1 -0
- package/dist/lib/parsers/parse-geoparquet.js +56 -0
- package/dist/lib/parsers/parse-geoparquet.js.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +6 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/lib/parsers/parse-parquet.d.ts +17 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -0
- package/dist/lib/parsers/{parse-parquet-to-rows.js → parse-parquet.js} +2 -8
- package/dist/lib/parsers/parse-parquet.js.map +1 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/encode-parquet-wasm.js.map +1 -1
- package/dist/lib/wasm/load-wasm-browser.d.ts.map +1 -1
- package/dist/lib/wasm/load-wasm-browser.js.map +1 -1
- package/dist/lib/wasm/load-wasm-node.d.ts.map +1 -1
- package/dist/lib/wasm/load-wasm-node.js.map +1 -1
- package/dist/lib/wasm/load-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/load-wasm.js.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/parquet-loader.d.ts +14 -4
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +51 -5
- package/dist/parquet-loader.js.map +1 -1
- package/dist/parquet-wasm-loader.d.ts +4 -2
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +7 -2
- package/dist/parquet-wasm-loader.js.map +1 -1
- package/dist/parquet-wasm-writer.d.ts.map +1 -1
- package/dist/parquet-wasm-writer.js +1 -1
- package/dist/parquet-wasm-writer.js.map +1 -1
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquet-writer.js.map +1 -1
- package/dist/parquetjs/modules.d.ts +21 -0
- package/dist/polyfills/buffer/buffer-polyfill.browser.d.ts.map +1 -1
- package/dist/polyfills/buffer/buffer-polyfill.browser.js.map +1 -1
- package/dist/polyfills/buffer/buffer-polyfill.node.d.ts.map +1 -1
- package/dist/polyfills/buffer/buffer-polyfill.node.js.map +1 -1
- package/dist/polyfills/buffer/buffer.d.ts.map +1 -1
- package/dist/polyfills/buffer/buffer.js.map +1 -1
- package/dist/polyfills/buffer/index.d.ts.map +1 -1
- package/dist/polyfills/buffer/index.js.map +1 -1
- package/dist/polyfills/util.d.ts.map +1 -1
- package/dist/polyfills/util.js.map +1 -1
- package/dist/workers/parquet-worker.js +1 -1
- package/dist/workers/parquet-worker.js.map +1 -1
- package/package.json +17 -14
- package/src/constants.ts +2 -1
- package/src/index.ts +9 -61
- package/src/lib/arrow/convert-row-group-to-columns.ts +2 -1
- package/src/lib/arrow/convert-schema-from-parquet.ts +2 -1
- package/src/lib/arrow/convert-schema-to-parquet.ts +2 -1
- package/src/lib/parsers/get-parquet-schema.ts +4 -0
- package/src/lib/parsers/parse-geoparquet.ts +88 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +8 -1
- package/src/lib/parsers/{parse-parquet-to-rows.ts → parse-parquet.ts} +21 -21
- package/src/lib/wasm/encode-parquet-wasm.ts +4 -0
- package/src/lib/wasm/load-wasm-browser.ts +4 -0
- package/src/lib/wasm/load-wasm-node.ts +4 -0
- package/src/lib/wasm/load-wasm.ts +4 -0
- package/src/lib/wasm/parse-parquet-wasm.ts +4 -0
- package/src/parquet-loader.ts +91 -10
- package/src/parquet-wasm-loader.ts +12 -3
- package/src/parquet-wasm-writer.ts +2 -1
- package/src/parquet-writer.ts +2 -1
- package/src/polyfills/buffer/buffer-polyfill.browser.ts +3 -1
- package/src/polyfills/buffer/buffer-polyfill.node.ts +3 -1
- package/src/polyfills/buffer/buffer.ts +2 -1
- package/src/polyfills/buffer/index.ts +2 -1
- package/src/polyfills/util.js +2 -1
- package/src/workers/parquet-worker.ts +3 -2
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +0 -6
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +0 -1
- package/dist/lib/parsers/parse-parquet-to-rows.js.map +0 -1
- package/dist/parquetjs/modules.d.js +0 -2
- package/dist/parquetjs/modules.d.js.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/polyfills/buffer/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,MAAM,IAAI,cAAc,EAAC,MAAM,UAAU,CAAC;AAClD,OAAO,EAAC,MAAM,EAAC,MAAM,2BAA2B,CAAC;AACjD,OAAO,EAAC,qBAAqB,EAAC,MAAM,wBAAwB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","names":["Buffer","BufferPolyfill","installBufferPolyfill"],"sources":["../../../src/polyfills/buffer/index.ts"],"sourcesContent":["// loaders.gl
|
|
1
|
+
{"version":3,"file":"index.js","names":["Buffer","BufferPolyfill","installBufferPolyfill"],"sources":["../../../src/polyfills/buffer/index.ts"],"sourcesContent":["// loaders.gl\n// SPDX-License-Identifier: MIT\n// Copyright (c) vis.gl contributors\nexport {Buffer as BufferPolyfill} from './buffer';\nexport {Buffer} from './install-buffer-polyfill';\nexport {installBufferPolyfill} from './buffer-polyfill.node';\n\n// import { installBufferPolyfill } from \"./buffer-polyfill.node\";\n\n// installBufferPolyfill();\n"],"mappings":"SAGQA,MAAM,IAAIC,cAAc;AAAA,SACxBD,MAAM;AAAA,SACNE,qBAAqB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.d.ts","sourceRoot":"","sources":["../../src/polyfills/util.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"util.d.ts","sourceRoot":"","sources":["../../src/polyfills/util.js"],"names":[],"mappings":"AAMA;;;EAAkD;AAClD;;;EAAkD"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","names":["TextEncoder","globalThis","TextDecoder"],"sources":["../../src/polyfills/util.js"],"sourcesContent":["// loaders.gl
|
|
1
|
+
{"version":3,"file":"util.js","names":["TextEncoder","globalThis","TextDecoder"],"sources":["../../src/polyfills/util.js"],"sourcesContent":["// loaders.gl\n// SPDX-License-Identifier: MIT\n// Copyright (c) vis.gl contributors\n\n// Polyfill for Node.js util library\n\nexport const TextEncoder = globalThis.TextEncoder;\nexport const TextDecoder = globalThis.TextDecoder;\n"],"mappings":"AAMA,OAAO,MAAMA,WAAW,GAAGC,UAAU,CAACD,WAAW;AACjD,OAAO,MAAME,WAAW,GAAGD,UAAU,CAACC,WAAW"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-worker.js","names":["createLoaderWorker","ParquetLoader"],"sources":["../../src/workers/parquet-worker.ts"],"sourcesContent":["// loaders.gl
|
|
1
|
+
{"version":3,"file":"parquet-worker.js","names":["createLoaderWorker","ParquetLoader"],"sources":["../../src/workers/parquet-worker.ts"],"sourcesContent":["// loaders.gl\n// SPDX-License-Identifier: MIT\n// Copyright (c) vis.gl contributors\n\nimport {createLoaderWorker} from '@loaders.gl/loader-utils';\nimport {ParquetLoader} from '../parquet-loader';\n\ncreateLoaderWorker(ParquetLoader);\n"],"mappings":"AAIA,SAAQA,kBAAkB,QAAO,0BAA0B;AAAC,SACpDC,aAAa;AAErBD,kBAAkB,CAACC,aAAa,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/parquet",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.1.0-alpha.10",
|
|
4
4
|
"description": "Framework-independent loader for Apache Parquet files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -12,21 +12,24 @@
|
|
|
12
12
|
"url": "https://github.com/visgl/loaders.gl"
|
|
13
13
|
},
|
|
14
14
|
"keywords": [
|
|
15
|
-
"webgl",
|
|
16
15
|
"loader",
|
|
17
16
|
"parser",
|
|
18
17
|
"table",
|
|
19
|
-
"
|
|
20
|
-
"
|
|
18
|
+
"parquet",
|
|
19
|
+
"streaming",
|
|
20
|
+
"cloud native",
|
|
21
|
+
"geoparquet",
|
|
22
|
+
"Apache Parquet",
|
|
23
|
+
"apache-parquet"
|
|
21
24
|
],
|
|
22
25
|
"types": "dist/index.d.ts",
|
|
23
26
|
"main": "dist/index.cjs",
|
|
24
27
|
"module": "dist/index.js",
|
|
25
28
|
"exports": {
|
|
26
29
|
".": {
|
|
30
|
+
"types": "./dist/index.d.ts",
|
|
27
31
|
"import": "./dist/index.js",
|
|
28
|
-
"require": "./dist/index.cjs"
|
|
29
|
-
"types": "./dist/index.d.ts"
|
|
32
|
+
"require": "./dist/index.cjs"
|
|
30
33
|
}
|
|
31
34
|
},
|
|
32
35
|
"sideEffects": false,
|
|
@@ -58,13 +61,13 @@
|
|
|
58
61
|
"base64-js and ieee754 are used by buffer polyfill"
|
|
59
62
|
],
|
|
60
63
|
"dependencies": {
|
|
61
|
-
"@loaders.gl/arrow": "4.0.
|
|
62
|
-
"@loaders.gl/bson": "4.0.
|
|
63
|
-
"@loaders.gl/compression": "4.0.
|
|
64
|
-
"@loaders.gl/gis": "4.0.
|
|
65
|
-
"@loaders.gl/loader-utils": "4.0.
|
|
66
|
-
"@loaders.gl/schema": "4.0.
|
|
67
|
-
"@loaders.gl/wkt": "4.0.
|
|
64
|
+
"@loaders.gl/arrow": "4.1.0-alpha.10",
|
|
65
|
+
"@loaders.gl/bson": "4.1.0-alpha.10",
|
|
66
|
+
"@loaders.gl/compression": "4.1.0-alpha.10",
|
|
67
|
+
"@loaders.gl/gis": "4.1.0-alpha.10",
|
|
68
|
+
"@loaders.gl/loader-utils": "4.1.0-alpha.10",
|
|
69
|
+
"@loaders.gl/schema": "4.1.0-alpha.10",
|
|
70
|
+
"@loaders.gl/wkt": "4.1.0-alpha.10",
|
|
68
71
|
"async-mutex": "^0.2.2",
|
|
69
72
|
"base64-js": "^1.3.1",
|
|
70
73
|
"brotli": "^1.3.2",
|
|
@@ -90,5 +93,5 @@
|
|
|
90
93
|
"@types/varint": "^5.0.0",
|
|
91
94
|
"apache-arrow": "^13.0.0"
|
|
92
95
|
},
|
|
93
|
-
"gitHead": "
|
|
96
|
+
"gitHead": "19f43c2d90d8b50860c3f8e487429779a386287d"
|
|
94
97
|
}
|
package/src/constants.ts
CHANGED
package/src/index.ts
CHANGED
|
@@ -1,76 +1,24 @@
|
|
|
1
|
-
// loaders.gl
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
2
3
|
// Copyright (c) vis.gl contributors
|
|
3
4
|
|
|
4
5
|
export {Buffer} from './polyfills/buffer/install-buffer-polyfill';
|
|
5
6
|
|
|
6
|
-
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
|
|
7
|
-
import type {
|
|
8
|
-
ObjectRowTable,
|
|
9
|
-
ObjectRowTableBatch,
|
|
10
|
-
ColumnarTable,
|
|
11
|
-
ColumnarTableBatch,
|
|
12
|
-
GeoJSONTable,
|
|
13
|
-
GeoJSONTableBatch
|
|
14
|
-
} from '@loaders.gl/schema';
|
|
15
|
-
|
|
16
7
|
// import {ArrowTable, ArrowTableBatch} from '@loaders.gl/arrow';
|
|
17
8
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
9
|
+
export {
|
|
10
|
+
ParquetWorkerLoader,
|
|
11
|
+
ParquetLoader,
|
|
12
|
+
GeoParquetWorkerLoader,
|
|
13
|
+
GeoParquetLoader,
|
|
14
|
+
ParquetColumnarWorkerLoader,
|
|
15
|
+
ParquetColumnarLoader
|
|
25
16
|
} from './parquet-loader';
|
|
26
|
-
import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-rows';
|
|
27
|
-
import {
|
|
28
|
-
parseParquetFileInColumns,
|
|
29
|
-
parseParquetFileInColumnarBatches
|
|
30
|
-
} from './lib/parsers/parse-parquet-to-columns';
|
|
31
17
|
|
|
32
18
|
// import type {ParquetWasmLoaderOptions} from './lib/wasm/parse-parquet-wasm';
|
|
33
19
|
// import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
|
|
34
20
|
// import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';
|
|
35
21
|
|
|
36
|
-
export {ParquetWorkerLoader};
|
|
37
|
-
// export {ParquetWasmWorkerLoader};
|
|
38
|
-
|
|
39
|
-
/** ParquetJS table loader */
|
|
40
|
-
export const ParquetLoader: LoaderWithParser<
|
|
41
|
-
ObjectRowTable | GeoJSONTable,
|
|
42
|
-
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
43
|
-
ParquetLoaderOptions
|
|
44
|
-
> = {
|
|
45
|
-
...ParquetWorkerLoader,
|
|
46
|
-
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
47
|
-
return parseParquetFile(new BlobFile(arrayBuffer), options);
|
|
48
|
-
},
|
|
49
|
-
parseFile: parseParquetFile,
|
|
50
|
-
parseFileInBatches: parseParquetFileInBatches
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
/** ParquetJS table loader */
|
|
54
|
-
export const ParquetColumnarLoader: LoaderWithParser<
|
|
55
|
-
ColumnarTable,
|
|
56
|
-
ColumnarTableBatch,
|
|
57
|
-
ParquetLoaderOptions
|
|
58
|
-
> = {
|
|
59
|
-
...ParquetColumnarWorkerLoader,
|
|
60
|
-
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
61
|
-
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
|
|
62
|
-
},
|
|
63
|
-
parseFile: parseParquetFileInColumns,
|
|
64
|
-
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
// export const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {
|
|
68
|
-
// ...ParquetWasmWorkerLoader,
|
|
69
|
-
// parse: parseParquetWasm
|
|
70
|
-
// };
|
|
71
|
-
|
|
72
|
-
// ParquetWriter
|
|
73
|
-
|
|
74
22
|
export {ParquetWriter as _ParquetWriter} from './parquet-writer';
|
|
75
23
|
// export {ParquetWasmWriter} from './parquet-wasm-writer';
|
|
76
24
|
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {ReadableFile} from '@loaders.gl/loader-utils';
|
|
6
|
+
import type {
|
|
7
|
+
GeoJSONTable,
|
|
8
|
+
GeoJSONTableBatch,
|
|
9
|
+
ObjectRowTable,
|
|
10
|
+
ObjectRowTableBatch
|
|
11
|
+
} from '@loaders.gl/schema';
|
|
12
|
+
import {convertWKBTableToGeoJSON} from '@loaders.gl/gis';
|
|
13
|
+
import {WKTLoader, WKBLoader} from '@loaders.gl/wkt';
|
|
14
|
+
|
|
15
|
+
import type {ParquetLoaderOptions} from '../../parquet-loader';
|
|
16
|
+
|
|
17
|
+
import {parseParquetFile, parseParquetFileInBatches} from './parse-parquet';
|
|
18
|
+
|
|
19
|
+
export async function parseGeoParquetFile(
|
|
20
|
+
file: ReadableFile,
|
|
21
|
+
options?: ParquetLoaderOptions
|
|
22
|
+
): Promise<ObjectRowTable | GeoJSONTable> {
|
|
23
|
+
const table = await parseParquetFile(file, {...options, shape: 'object-row-table'});
|
|
24
|
+
const shape = options?.parquet?.shape;
|
|
25
|
+
return convertTable(table, shape);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export async function* parseGeoParquetFileInBatches(
|
|
29
|
+
file: ReadableFile,
|
|
30
|
+
options?: ParquetLoaderOptions
|
|
31
|
+
): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch> {
|
|
32
|
+
const tableBatches = parseParquetFileInBatches(file, {...options, shape: 'object-row-table'});
|
|
33
|
+
|
|
34
|
+
for await (const batch of tableBatches) {
|
|
35
|
+
const shape = options?.parquet?.shape;
|
|
36
|
+
yield convertBatch(batch, shape);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function convertTable(
|
|
41
|
+
objectRowTable: ObjectRowTable,
|
|
42
|
+
shape?: 'object-row-table' | 'geojson-table'
|
|
43
|
+
): ObjectRowTable | GeoJSONTable {
|
|
44
|
+
switch (shape) {
|
|
45
|
+
case 'object-row-table':
|
|
46
|
+
return objectRowTable;
|
|
47
|
+
|
|
48
|
+
case 'geojson-table':
|
|
49
|
+
try {
|
|
50
|
+
return convertWKBTableToGeoJSON(objectRowTable, objectRowTable.schema!, [
|
|
51
|
+
WKTLoader,
|
|
52
|
+
WKBLoader
|
|
53
|
+
]);
|
|
54
|
+
} catch (error) {
|
|
55
|
+
return objectRowTable;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
default:
|
|
59
|
+
throw new Error(shape);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function convertBatch(
|
|
64
|
+
objectRowBatch: ObjectRowTableBatch,
|
|
65
|
+
shape?: 'object-row-table' | 'geojson-table'
|
|
66
|
+
): ObjectRowTableBatch | GeoJSONTableBatch {
|
|
67
|
+
switch (shape) {
|
|
68
|
+
case 'object-row-table':
|
|
69
|
+
return objectRowBatch;
|
|
70
|
+
|
|
71
|
+
case 'geojson-table':
|
|
72
|
+
try {
|
|
73
|
+
const geojsonTable = convertWKBTableToGeoJSON(objectRowBatch, objectRowBatch.schema!, [
|
|
74
|
+
WKTLoader,
|
|
75
|
+
WKBLoader
|
|
76
|
+
]);
|
|
77
|
+
return {
|
|
78
|
+
...objectRowBatch,
|
|
79
|
+
...geojsonTable
|
|
80
|
+
};
|
|
81
|
+
} catch (error) {
|
|
82
|
+
return objectRowBatch;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
default:
|
|
86
|
+
throw new Error(shape);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
// loaders.gl
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
2
3
|
// Copyright (c) vis.gl contributors
|
|
3
4
|
|
|
4
5
|
import type {ColumnarTable, ColumnarTableBatch, Schema} from '@loaders.gl/schema';
|
|
@@ -11,6 +12,9 @@ import {materializeColumns} from '../../parquetjs/schema/shred';
|
|
|
11
12
|
import {getSchemaFromParquetReader} from './get-parquet-schema';
|
|
12
13
|
import {installBufferPolyfill} from '../../polyfills/buffer';
|
|
13
14
|
|
|
15
|
+
/**
|
|
16
|
+
* @deprecated
|
|
17
|
+
*/
|
|
14
18
|
export async function parseParquetFileInColumns(
|
|
15
19
|
file: ReadableFile,
|
|
16
20
|
options?: ParquetLoaderOptions
|
|
@@ -26,6 +30,9 @@ export async function parseParquetFileInColumns(
|
|
|
26
30
|
throw new Error('empty table');
|
|
27
31
|
}
|
|
28
32
|
|
|
33
|
+
/**
|
|
34
|
+
* @deprecated
|
|
35
|
+
*/
|
|
29
36
|
export async function* parseParquetFileInColumnarBatches(
|
|
30
37
|
file: ReadableFile,
|
|
31
38
|
options?: ParquetLoaderOptions
|
|
@@ -1,14 +1,9 @@
|
|
|
1
|
-
//
|
|
2
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
3
5
|
import type {ReadableFile} from '@loaders.gl/loader-utils';
|
|
4
|
-
import type {
|
|
5
|
-
GeoJSONTable,
|
|
6
|
-
GeoJSONTableBatch,
|
|
7
|
-
ObjectRowTable,
|
|
8
|
-
ObjectRowTableBatch
|
|
9
|
-
} from '@loaders.gl/schema';
|
|
10
|
-
import {convertWKBTableToGeoJSON} from '@loaders.gl/gis';
|
|
11
|
-
import {WKTLoader, WKBLoader} from '@loaders.gl/wkt';
|
|
6
|
+
import type {ObjectRowTable, ObjectRowTableBatch} from '@loaders.gl/schema';
|
|
12
7
|
|
|
13
8
|
import type {ParquetLoaderOptions} from '../../parquet-loader';
|
|
14
9
|
import type {ParquetRow} from '../../parquetjs/schema/declare';
|
|
@@ -16,10 +11,16 @@ import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
|
|
|
16
11
|
import {getSchemaFromParquetReader} from './get-parquet-schema';
|
|
17
12
|
import {installBufferPolyfill} from '../../polyfills/buffer';
|
|
18
13
|
|
|
14
|
+
/**
|
|
15
|
+
* * Parse a parquet file using parquetjs
|
|
16
|
+
* @param file
|
|
17
|
+
* @param options
|
|
18
|
+
* @returns
|
|
19
|
+
*/
|
|
19
20
|
export async function parseParquetFile(
|
|
20
21
|
file: ReadableFile,
|
|
21
22
|
options?: ParquetLoaderOptions
|
|
22
|
-
): Promise<ObjectRowTable
|
|
23
|
+
): Promise<ObjectRowTable> {
|
|
23
24
|
installBufferPolyfill();
|
|
24
25
|
|
|
25
26
|
const reader = new ParquetReader(file, {
|
|
@@ -47,10 +48,15 @@ export async function parseParquetFile(
|
|
|
47
48
|
return convertTable(objectRowTable, shape);
|
|
48
49
|
}
|
|
49
50
|
|
|
51
|
+
/**
|
|
52
|
+
* Parse a parquet file in batches using parquetjs
|
|
53
|
+
* @param file
|
|
54
|
+
* @param options
|
|
55
|
+
*/
|
|
50
56
|
export async function* parseParquetFileInBatches(
|
|
51
57
|
file: ReadableFile,
|
|
52
58
|
options?: ParquetLoaderOptions
|
|
53
|
-
): AsyncIterable<ObjectRowTableBatch
|
|
59
|
+
): AsyncIterable<ObjectRowTableBatch> {
|
|
54
60
|
const reader = new ParquetReader(file, {
|
|
55
61
|
preserveBinary: options?.parquet?.preserveBinary
|
|
56
62
|
});
|
|
@@ -78,20 +84,14 @@ export async function* parseParquetFileInBatches(
|
|
|
78
84
|
function convertTable(
|
|
79
85
|
objectRowTable: ObjectRowTable,
|
|
80
86
|
shape?: 'object-row-table' | 'geojson-table'
|
|
81
|
-
): ObjectRowTable
|
|
87
|
+
): ObjectRowTable {
|
|
82
88
|
switch (shape) {
|
|
83
89
|
case 'object-row-table':
|
|
84
90
|
return objectRowTable;
|
|
85
91
|
|
|
92
|
+
// Hack until geoparquet fixes up forwarded shape
|
|
86
93
|
case 'geojson-table':
|
|
87
|
-
|
|
88
|
-
return convertWKBTableToGeoJSON(objectRowTable, objectRowTable.schema!, [
|
|
89
|
-
WKTLoader,
|
|
90
|
-
WKBLoader
|
|
91
|
-
]);
|
|
92
|
-
} catch (error) {
|
|
93
|
-
return objectRowTable;
|
|
94
|
-
}
|
|
94
|
+
return objectRowTable;
|
|
95
95
|
|
|
96
96
|
default:
|
|
97
97
|
throw new Error(shape);
|
package/src/parquet-loader.ts
CHANGED
|
@@ -1,13 +1,24 @@
|
|
|
1
|
-
// loaders.gl
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
2
3
|
// Copyright (c) vis.gl contributors
|
|
3
4
|
|
|
4
|
-
import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
|
+
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
6
|
import type {
|
|
6
7
|
ObjectRowTable,
|
|
7
8
|
ObjectRowTableBatch,
|
|
9
|
+
GeoJSONTable,
|
|
10
|
+
GeoJSONTableBatch,
|
|
8
11
|
ColumnarTable,
|
|
9
12
|
ColumnarTableBatch
|
|
10
13
|
} from '@loaders.gl/schema';
|
|
14
|
+
import {BlobFile} from '@loaders.gl/loader-utils';
|
|
15
|
+
|
|
16
|
+
import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet';
|
|
17
|
+
import {parseGeoParquetFile, parseGeoParquetFileInBatches} from './lib/parsers/parse-geoparquet';
|
|
18
|
+
import {
|
|
19
|
+
parseParquetFileInColumns,
|
|
20
|
+
parseParquetFileInColumnarBatches
|
|
21
|
+
} from './lib/parsers/parse-parquet-to-columns';
|
|
11
22
|
|
|
12
23
|
export {Buffer} from './polyfills/buffer/install-buffer-polyfill';
|
|
13
24
|
|
|
@@ -32,8 +43,14 @@ export type ParquetLoaderOptions = LoaderOptions & {
|
|
|
32
43
|
};
|
|
33
44
|
};
|
|
34
45
|
|
|
35
|
-
/**
|
|
36
|
-
|
|
46
|
+
/**
|
|
47
|
+
* ParquetJS table loader
|
|
48
|
+
*/
|
|
49
|
+
export const ParquetWorkerLoader: Loader<
|
|
50
|
+
ObjectRowTable,
|
|
51
|
+
ObjectRowTableBatch,
|
|
52
|
+
ParquetLoaderOptions
|
|
53
|
+
> = {
|
|
37
54
|
name: 'Apache Parquet',
|
|
38
55
|
id: 'parquet',
|
|
39
56
|
module: 'parquet',
|
|
@@ -55,7 +72,63 @@ export const ParquetLoader: Loader<ObjectRowTable, ObjectRowTableBatch, ParquetL
|
|
|
55
72
|
}
|
|
56
73
|
};
|
|
57
74
|
|
|
58
|
-
|
|
75
|
+
/** ParquetJS table loader */
|
|
76
|
+
export const ParquetLoader: LoaderWithParser<
|
|
77
|
+
ObjectRowTable | GeoJSONTable,
|
|
78
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
79
|
+
ParquetLoaderOptions
|
|
80
|
+
> = {
|
|
81
|
+
...ParquetWorkerLoader,
|
|
82
|
+
parse: (arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) =>
|
|
83
|
+
parseParquetFile(new BlobFile(arrayBuffer), options),
|
|
84
|
+
|
|
85
|
+
parseFile: parseParquetFile,
|
|
86
|
+
parseFileInBatches: parseParquetFileInBatches
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
// Defeat tree shaking
|
|
90
|
+
// @ts-ignore
|
|
91
|
+
ParquetLoader.Buffer = Buffer;
|
|
92
|
+
|
|
93
|
+
export const GeoParquetWorkerLoader: Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions> =
|
|
94
|
+
{
|
|
95
|
+
name: 'Apache Parquet',
|
|
96
|
+
id: 'parquet',
|
|
97
|
+
module: 'parquet',
|
|
98
|
+
version: VERSION,
|
|
99
|
+
worker: true,
|
|
100
|
+
category: 'table',
|
|
101
|
+
extensions: ['parquet'],
|
|
102
|
+
mimeTypes: ['application/octet-stream'],
|
|
103
|
+
binary: true,
|
|
104
|
+
tests: ['PAR1', 'PARE'],
|
|
105
|
+
options: {
|
|
106
|
+
parquet: {
|
|
107
|
+
shape: 'geojson-table',
|
|
108
|
+
columnList: [],
|
|
109
|
+
geoparquet: true,
|
|
110
|
+
url: undefined,
|
|
111
|
+
preserveBinary: false
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
/** ParquetJS table loader */
|
|
117
|
+
export const GeoParquetLoader: LoaderWithParser<
|
|
118
|
+
ObjectRowTable | GeoJSONTable,
|
|
119
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
120
|
+
ParquetLoaderOptions
|
|
121
|
+
> = {
|
|
122
|
+
...GeoParquetWorkerLoader,
|
|
123
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
124
|
+
return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
|
|
125
|
+
},
|
|
126
|
+
parseFile: parseGeoParquetFile,
|
|
127
|
+
parseFileInBatches: parseGeoParquetFileInBatches
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
131
|
+
export const ParquetColumnarWorkerLoader: Loader<
|
|
59
132
|
ColumnarTable,
|
|
60
133
|
ColumnarTableBatch,
|
|
61
134
|
ParquetLoaderOptions
|
|
@@ -73,8 +146,16 @@ export const ParquetColumnarLoader: Loader<
|
|
|
73
146
|
options: ParquetLoader.options
|
|
74
147
|
};
|
|
75
148
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
149
|
+
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
150
|
+
export const ParquetColumnarLoader: LoaderWithParser<
|
|
151
|
+
ColumnarTable,
|
|
152
|
+
ColumnarTableBatch,
|
|
153
|
+
ParquetLoaderOptions
|
|
154
|
+
> = {
|
|
155
|
+
...ParquetColumnarWorkerLoader,
|
|
156
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
157
|
+
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
|
|
158
|
+
},
|
|
159
|
+
parseFile: parseParquetFileInColumns,
|
|
160
|
+
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
161
|
+
};
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
// loaders.gl
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
2
3
|
// Copyright (c) vis.gl contributors
|
|
3
4
|
|
|
4
|
-
import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
|
+
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
6
|
import type {ArrowTable} from '@loaders.gl/arrow';
|
|
6
7
|
|
|
8
|
+
import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
|
|
9
|
+
|
|
7
10
|
// __VERSION__ is injected by babel-plugin-version-inline
|
|
8
11
|
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
9
12
|
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
@@ -17,7 +20,7 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
|
17
20
|
};
|
|
18
21
|
|
|
19
22
|
/** Parquet WASM table loader */
|
|
20
|
-
export const
|
|
23
|
+
export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptions> = {
|
|
21
24
|
name: 'Apache Parquet',
|
|
22
25
|
id: 'parquet-wasm',
|
|
23
26
|
module: 'parquet',
|
|
@@ -35,3 +38,9 @@ export const ParquetWasmLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptio
|
|
|
35
38
|
}
|
|
36
39
|
}
|
|
37
40
|
};
|
|
41
|
+
|
|
42
|
+
/** Parquet WASM table loader */
|
|
43
|
+
export const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {
|
|
44
|
+
...ParquetWasmWorkerLoader,
|
|
45
|
+
parse: parseParquetWasm
|
|
46
|
+
};
|
package/src/parquet-writer.ts
CHANGED