@loaders.gl/parquet 3.3.2 → 3.4.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +16 -16
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +46 -5
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
- package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +2 -2
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/es5/parquet-loader.js +3 -2
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/esm/index.js +12 -2
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
- package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/esm/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +2 -2
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/esm/parquet-loader.js +3 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/index.d.ts +23 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +24 -6
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/lib/geo/decode-geo-metadata.js +73 -0
- package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
- package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
- package/dist/lib/geo/geoparquet-schema.js +69 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +1 -1
- package/dist/parquet-loader.d.ts +1 -0
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +2 -1
- package/dist/parquet-worker.js +19 -19
- package/dist/parquet-worker.js.map +3 -3
- package/package.json +5 -5
- package/src/index.ts +22 -2
- package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
- package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
- package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
- package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
- package/src/lib/geo/decode-geo-metadata.ts +99 -0
- package/src/lib/geo/geoparquet-schema.ts +69 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
- package/src/lib/{parse-parquet.ts → parsers/parse-parquet-to-rows.ts} +2 -2
- package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/src/parquet-loader.ts +3 -1
- package/dist/es5/lib/convert-schema.js.map +0 -1
- package/dist/es5/lib/parse-parquet.js.map +0 -1
- package/dist/es5/lib/read-array-buffer.js +0 -43
- package/dist/es5/lib/read-array-buffer.js.map +0 -1
- package/dist/esm/lib/convert-schema.js.map +0 -1
- package/dist/esm/lib/parse-parquet.js.map +0 -1
- package/dist/esm/lib/read-array-buffer.js +0 -10
- package/dist/esm/lib/read-array-buffer.js.map +0 -1
- package/dist/lib/convert-schema.d.ts +0 -8
- package/dist/lib/convert-schema.d.ts.map +0 -1
- package/dist/lib/parse-parquet.d.ts +0 -4
- package/dist/lib/parse-parquet.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.d.ts +0 -19
- package/dist/lib/read-array-buffer.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.js +0 -29
- package/src/lib/read-array-buffer.ts +0 -31
- /package/dist/es5/lib/{convert-schema-deep.ts.disabled → wip/convert-schema-deep.java.disabled} +0 -0
- /package/dist/esm/lib/{convert-schema-deep.ts.disabled → wip/convert-schema-deep.java.disabled} +0 -0
- /package/src/lib/{convert-schema-deep.ts.disabled → wip/convert-schema-deep.java.disabled} +0 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/parquet",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.4.0-alpha.2",
|
|
4
4
|
"description": "Framework-independent loader for Apache Parquet files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"publishConfig": {
|
|
@@ -42,9 +42,9 @@
|
|
|
42
42
|
"./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
|
|
43
43
|
},
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"@loaders.gl/compression": "3.
|
|
46
|
-
"@loaders.gl/loader-utils": "3.
|
|
47
|
-
"@loaders.gl/schema": "3.
|
|
45
|
+
"@loaders.gl/compression": "3.4.0-alpha.2",
|
|
46
|
+
"@loaders.gl/loader-utils": "3.4.0-alpha.2",
|
|
47
|
+
"@loaders.gl/schema": "3.4.0-alpha.2",
|
|
48
48
|
"async-mutex": "^0.2.2",
|
|
49
49
|
"brotli": "^1.3.2",
|
|
50
50
|
"bson": "^1.0.4",
|
|
@@ -70,5 +70,5 @@
|
|
|
70
70
|
"@types/varint": "^5.0.0",
|
|
71
71
|
"apache-arrow": "^4.0.0"
|
|
72
72
|
},
|
|
73
|
-
"gitHead": "
|
|
73
|
+
"gitHead": "f1c00c124d8d0c41a138ff40afb0d1a00711bf2e"
|
|
74
74
|
}
|
package/src/index.ts
CHANGED
|
@@ -4,7 +4,11 @@ import type {LoaderWithParser} from '@loaders.gl/loader-utils';
|
|
|
4
4
|
|
|
5
5
|
import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';
|
|
6
6
|
import {ParquetLoader as ParquetWorkerLoader} from './parquet-loader';
|
|
7
|
-
import {parseParquet, parseParquetFileInBatches} from './lib/parse-parquet';
|
|
7
|
+
import {parseParquet, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-rows';
|
|
8
|
+
import {
|
|
9
|
+
parseParquetInColumns,
|
|
10
|
+
parseParquetFileInColumnarBatches
|
|
11
|
+
} from './lib/parsers/parse-parquet-to-columns';
|
|
8
12
|
import {parseParquet as parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
|
|
9
13
|
|
|
10
14
|
export {ParquetWorkerLoader, ParquetWasmWorkerLoader};
|
|
@@ -16,6 +20,13 @@ export const ParquetLoader = {
|
|
|
16
20
|
parseFileInBatches: parseParquetFileInBatches
|
|
17
21
|
};
|
|
18
22
|
|
|
23
|
+
/** ParquetJS table loader */
|
|
24
|
+
export const ParquetColumnarLoader = {
|
|
25
|
+
...ParquetWorkerLoader,
|
|
26
|
+
parse: parseParquetInColumns,
|
|
27
|
+
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
28
|
+
};
|
|
29
|
+
|
|
19
30
|
export const ParquetWasmLoader = {
|
|
20
31
|
...ParquetWasmWorkerLoader,
|
|
21
32
|
parse: parseParquetWasm
|
|
@@ -34,7 +45,16 @@ export {ParquetSchema} from './parquetjs/schema/schema';
|
|
|
34
45
|
export {ParquetReader} from './parquetjs/parser/parquet-reader';
|
|
35
46
|
export {ParquetEncoder} from './parquetjs/encoder/parquet-encoder';
|
|
36
47
|
|
|
37
|
-
export {
|
|
48
|
+
export {
|
|
49
|
+
convertSchemaFromParquet,
|
|
50
|
+
convertSchemaFromParquet as convertParquetToArrowSchema
|
|
51
|
+
} from './lib/arrow/convert-schema-from-parquet';
|
|
38
52
|
|
|
39
53
|
// TESTS
|
|
40
54
|
export const _typecheckParquetLoader: LoaderWithParser = ParquetLoader;
|
|
55
|
+
|
|
56
|
+
// Geo Metadata
|
|
57
|
+
export {default as geoJSONSchema} from './lib/geo/geoparquet-schema';
|
|
58
|
+
|
|
59
|
+
export type {GeoMetadata} from './lib/geo/decode-geo-metadata';
|
|
60
|
+
export {getGeoMetadata, setGeoMetadata, unpackGeoMetadata} from './lib/geo/decode-geo-metadata';
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
import {Schema} from '@loaders.gl/schema';
|
|
4
|
+
import {ParquetBuffer} from '@loaders.gl/parquet/parquetjs/schema/declare';
|
|
5
|
+
|
|
6
|
+
export function convertParquetRowGroupToColumns(
|
|
7
|
+
schema: Schema,
|
|
8
|
+
rowGroup: ParquetBuffer
|
|
9
|
+
): Record<string, any[]> {
|
|
10
|
+
const columns: Record<string, any[]> = {};
|
|
11
|
+
for (const [columnName, data] of Object.entries(rowGroup.columnData)) {
|
|
12
|
+
columns[columnName] = columns[columnName] || data.values;
|
|
13
|
+
}
|
|
14
|
+
return columns;
|
|
15
|
+
}
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
import type {ParquetSchema} from '../../parquetjs/schema/schema';
|
|
4
|
+
import type {FieldDefinition, ParquetField, ParquetType} from '../../parquetjs/schema/declare';
|
|
5
|
+
import {FileMetaData} from '@loaders.gl/parquet/parquetjs/parquet-thrift';
|
|
3
6
|
|
|
4
7
|
import {
|
|
5
8
|
Schema,
|
|
@@ -45,7 +48,7 @@ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
|
|
|
45
48
|
INT_64: Int64,
|
|
46
49
|
JSON: Binary,
|
|
47
50
|
BSON: Binary,
|
|
48
|
-
// TODO check
|
|
51
|
+
// TODO check interval type
|
|
49
52
|
INTERVAL: Binary,
|
|
50
53
|
DECIMAL_INT32: Float32,
|
|
51
54
|
DECIMAL_INT64: Float64,
|
|
@@ -53,24 +56,13 @@ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
|
|
|
53
56
|
DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
|
|
54
57
|
};
|
|
55
58
|
|
|
56
|
-
export function
|
|
59
|
+
export function convertSchemaFromParquet(
|
|
60
|
+
parquetSchema: ParquetSchema,
|
|
61
|
+
parquetMetadata?: FileMetaData
|
|
62
|
+
): Schema {
|
|
57
63
|
const fields = getFields(parquetSchema.schema);
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
return new Schema(fields);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function getFieldMetadata(field: ParquetField): Map<string, string> {
|
|
64
|
-
const metadata = new Map();
|
|
65
|
-
|
|
66
|
-
for (const key in field) {
|
|
67
|
-
if (key !== 'name') {
|
|
68
|
-
const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
|
|
69
|
-
metadata.set(key, value);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
return metadata;
|
|
64
|
+
const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
|
|
65
|
+
return new Schema(fields, metadata);
|
|
74
66
|
}
|
|
75
67
|
|
|
76
68
|
function getFields(schema: FieldDefinition): Field[] {
|
|
@@ -80,8 +72,8 @@ function getFields(schema: FieldDefinition): Field[] {
|
|
|
80
72
|
const field = schema[name];
|
|
81
73
|
|
|
82
74
|
if (field.fields) {
|
|
83
|
-
const
|
|
84
|
-
const nestedField = new Field(name, new Struct(
|
|
75
|
+
const childFields = getFields(field.fields);
|
|
76
|
+
const nestedField = new Field(name, new Struct(childFields), field.optional);
|
|
85
77
|
fields.push(nestedField);
|
|
86
78
|
} else {
|
|
87
79
|
const FieldType = PARQUET_TYPE_MAPPING[field.type];
|
|
@@ -93,3 +85,30 @@ function getFields(schema: FieldDefinition): Field[] {
|
|
|
93
85
|
|
|
94
86
|
return fields;
|
|
95
87
|
}
|
|
88
|
+
|
|
89
|
+
function getFieldMetadata(field: ParquetField): Map<string, string> {
|
|
90
|
+
const metadata = new Map();
|
|
91
|
+
|
|
92
|
+
for (const key in field) {
|
|
93
|
+
if (key !== 'name') {
|
|
94
|
+
let value = field[key] || '';
|
|
95
|
+
value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
|
|
96
|
+
metadata.set(key, value);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return metadata;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function getSchemaMetadata(parquetMetadata: FileMetaData): Map<string, string> {
|
|
104
|
+
const metadata = new Map();
|
|
105
|
+
|
|
106
|
+
const keyValueList = parquetMetadata.key_value_metadata || [];
|
|
107
|
+
for (const {key, value} of keyValueList) {
|
|
108
|
+
if (typeof value === 'string') {
|
|
109
|
+
metadata.set(key, value);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return metadata;
|
|
114
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
// import type {ParquetSchema} from '../../parquetjs/schema/schema';
|
|
4
|
+
import type {
|
|
5
|
+
// FieldDefinition, ParquetField,
|
|
6
|
+
ParquetType
|
|
7
|
+
} from '../../parquetjs/schema/declare';
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
Schema,
|
|
11
|
+
// Struct,
|
|
12
|
+
// Field,
|
|
13
|
+
DataType,
|
|
14
|
+
Bool,
|
|
15
|
+
Float64,
|
|
16
|
+
Int32,
|
|
17
|
+
Float32,
|
|
18
|
+
Binary,
|
|
19
|
+
Utf8,
|
|
20
|
+
Int64,
|
|
21
|
+
Uint16,
|
|
22
|
+
Uint32,
|
|
23
|
+
Uint64,
|
|
24
|
+
Int8,
|
|
25
|
+
Int16
|
|
26
|
+
} from '@loaders.gl/schema';
|
|
27
|
+
|
|
28
|
+
export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
|
|
29
|
+
BOOLEAN: Bool,
|
|
30
|
+
INT32: Int32,
|
|
31
|
+
INT64: Float64,
|
|
32
|
+
INT96: Float64,
|
|
33
|
+
FLOAT: Float32,
|
|
34
|
+
DOUBLE: Float64,
|
|
35
|
+
BYTE_ARRAY: Binary,
|
|
36
|
+
FIXED_LEN_BYTE_ARRAY: Binary,
|
|
37
|
+
UTF8: Utf8,
|
|
38
|
+
DATE: Int32,
|
|
39
|
+
TIME_MILLIS: Int64,
|
|
40
|
+
TIME_MICROS: Int64,
|
|
41
|
+
TIMESTAMP_MILLIS: Int64,
|
|
42
|
+
TIMESTAMP_MICROS: Int64,
|
|
43
|
+
UINT_8: Int32,
|
|
44
|
+
UINT_16: Uint16,
|
|
45
|
+
UINT_32: Uint32,
|
|
46
|
+
UINT_64: Uint64,
|
|
47
|
+
INT_8: Int8,
|
|
48
|
+
INT_16: Int16,
|
|
49
|
+
INT_32: Int32,
|
|
50
|
+
INT_64: Int64,
|
|
51
|
+
JSON: Binary,
|
|
52
|
+
BSON: Binary,
|
|
53
|
+
// TODO check interval type
|
|
54
|
+
INTERVAL: Binary,
|
|
55
|
+
DECIMAL_INT32: Float32,
|
|
56
|
+
DECIMAL_INT64: Float64,
|
|
57
|
+
DECIMAL_BYTE_ARRAY: Float64,
|
|
58
|
+
DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
export function convertToParquetSchema(schema: Schema): Schema {
|
|
62
|
+
const fields = []; // getFields(schema.fields);
|
|
63
|
+
|
|
64
|
+
// TODO add metadata if needed.
|
|
65
|
+
return new Schema(fields);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// function getFields(schema: Field[]): Definition[] {
|
|
69
|
+
// const fields: Field[] = [];
|
|
70
|
+
|
|
71
|
+
// for (const name in schema) {
|
|
72
|
+
// const field = schema[name];
|
|
73
|
+
|
|
74
|
+
// // @ts-ignore
|
|
75
|
+
// const children = field.children as DataType[];
|
|
76
|
+
// if (children) {
|
|
77
|
+
// const childField = getFields(field.fields);
|
|
78
|
+
// const nestedField = new Field(name, new Struct(childField), field.optional);
|
|
79
|
+
// fields.push(nestedField);
|
|
80
|
+
// } else {
|
|
81
|
+
// const FieldType = PARQUET_TYPE_MAPPING[field.type];
|
|
82
|
+
// const metadata = getFieldMetadata(field);
|
|
83
|
+
// const arrowField = new Field(name, new FieldType(), field.optional, metadata);
|
|
84
|
+
// fields.push(arrowField);
|
|
85
|
+
// }
|
|
86
|
+
// }
|
|
87
|
+
|
|
88
|
+
// return fields;
|
|
89
|
+
// }
|
|
90
|
+
|
|
91
|
+
// function getFieldMetadata(field: ParquetField): Map<string, string> {
|
|
92
|
+
// const metadata = new Map();
|
|
93
|
+
|
|
94
|
+
// for (const key in field) {
|
|
95
|
+
// if (key !== 'name') {
|
|
96
|
+
// const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
|
|
97
|
+
// metadata.set(key, value);
|
|
98
|
+
// }
|
|
99
|
+
// }
|
|
100
|
+
|
|
101
|
+
// return metadata;
|
|
102
|
+
// }
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
import {Schema, Field} from '@loaders.gl/schema';
|
|
3
|
+
|
|
4
|
+
/* eslint-disable camelcase */
|
|
5
|
+
|
|
6
|
+
/** A geoarrow / geoparquet geo metadata object (stored in stringified form in the top level metadata 'geo' key) */
|
|
7
|
+
export type GeoMetadata = {
|
|
8
|
+
version?: string;
|
|
9
|
+
primary_column?: string;
|
|
10
|
+
columns: Record<string, GeoColumnMetadata>;
|
|
11
|
+
[key: string]: unknown;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/** A geoarrow / geoparquet geo metadata for one geometry column */
|
|
15
|
+
export type GeoColumnMetadata = {
|
|
16
|
+
bounding_box?:
|
|
17
|
+
| [number, number, number, number]
|
|
18
|
+
| [number, number, number, number, number, number];
|
|
19
|
+
crs?: string;
|
|
20
|
+
geometry_type?: string[];
|
|
21
|
+
edges?: string;
|
|
22
|
+
[key: string]: unknown;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Reads the GeoMetadata object from the metadata
|
|
27
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
|
|
28
|
+
export function getGeoMetadata(schema: Schema): GeoMetadata | null {
|
|
29
|
+
const stringifiedGeoMetadata = schema.metadata.get('geo');
|
|
30
|
+
if (!stringifiedGeoMetadata) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
const geoMetadata = JSON.parse(stringifiedGeoMetadata) as GeoMetadata;
|
|
36
|
+
return geoMetadata;
|
|
37
|
+
} catch {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Stores a geoarrow / geoparquet geo metadata object in the schema
|
|
44
|
+
* @note geoarrow / geoparquet geo metadata is a single stringified JSON field
|
|
45
|
+
*/
|
|
46
|
+
export function setGeoMetadata(schema: Schema, geoMetadata: GeoMetadata): void {
|
|
47
|
+
const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
|
|
48
|
+
schema.metadata.set('geo', stringifiedGeoMetadata);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Unpacks geo metadata into separate metadata fields (parses the long JSON string)
|
|
53
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
|
|
54
|
+
*/
|
|
55
|
+
export function unpackGeoMetadata(schema: Schema): void {
|
|
56
|
+
const geoMetadata = getGeoMetadata(schema);
|
|
57
|
+
if (!geoMetadata) {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Store Parquet Schema Level Metadata
|
|
62
|
+
|
|
63
|
+
const {version, primary_column, columns} = geoMetadata;
|
|
64
|
+
if (version) {
|
|
65
|
+
schema.metadata.set('geo.version', version);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (primary_column) {
|
|
69
|
+
schema.metadata.set('geo.primary_column', primary_column);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// store column names as comma separated list
|
|
73
|
+
schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
|
|
74
|
+
|
|
75
|
+
for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
|
|
76
|
+
const field = schema.fields.find((field) => field.name === columnName);
|
|
77
|
+
if (field) {
|
|
78
|
+
if (field.name === primary_column) {
|
|
79
|
+
field.metadata.set('geo.primary_field', 'true');
|
|
80
|
+
}
|
|
81
|
+
unpackGeoFieldMetadata(field, columnMetadata);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function unpackGeoFieldMetadata(field: Field, columnMetadata): void {
|
|
87
|
+
for (const [key, value] of Object.entries(columnMetadata || {})) {
|
|
88
|
+
switch (key) {
|
|
89
|
+
case 'geometry_type':
|
|
90
|
+
field.metadata.set(`geo.${key}`, (value as string[]).join(','));
|
|
91
|
+
break;
|
|
92
|
+
case 'bbox':
|
|
93
|
+
case 'crs':
|
|
94
|
+
case 'edges':
|
|
95
|
+
default:
|
|
96
|
+
field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
/* eslint-disable camelcase */
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Geoparquet JSON schema for geo metadata
|
|
7
|
+
* @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
|
|
8
|
+
* @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
|
|
9
|
+
*/
|
|
10
|
+
export default {
|
|
11
|
+
$schema: 'http://json-schema.org/draft-07/schema#',
|
|
12
|
+
title: 'GeoParquet',
|
|
13
|
+
description: 'Parquet metadata included in the geo field.',
|
|
14
|
+
type: 'object',
|
|
15
|
+
required: ['version', 'primary_column', 'columns'],
|
|
16
|
+
properties: {
|
|
17
|
+
version: {type: 'string', const: '1.0.0-beta.1'},
|
|
18
|
+
primary_column: {type: 'string', minLength: 1},
|
|
19
|
+
columns: {
|
|
20
|
+
type: 'object',
|
|
21
|
+
minProperties: 1,
|
|
22
|
+
patternProperties: {
|
|
23
|
+
'.+': {
|
|
24
|
+
type: 'object',
|
|
25
|
+
required: ['encoding', 'geometry_types'],
|
|
26
|
+
properties: {
|
|
27
|
+
encoding: {type: 'string', const: 'WKB'},
|
|
28
|
+
geometry_types: {
|
|
29
|
+
type: 'array',
|
|
30
|
+
uniqueItems: true,
|
|
31
|
+
items: {
|
|
32
|
+
type: 'string',
|
|
33
|
+
pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
crs: {
|
|
37
|
+
oneOf: [
|
|
38
|
+
{
|
|
39
|
+
$ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
|
|
40
|
+
},
|
|
41
|
+
{type: 'null'}
|
|
42
|
+
]
|
|
43
|
+
},
|
|
44
|
+
edges: {type: 'string', enum: ['planar', 'spherical']},
|
|
45
|
+
orientation: {type: 'string', const: 'counterclockwise'},
|
|
46
|
+
bbox: {
|
|
47
|
+
type: 'array',
|
|
48
|
+
items: {type: 'number'},
|
|
49
|
+
oneOf: [
|
|
50
|
+
{
|
|
51
|
+
description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
|
|
52
|
+
minItems: 4,
|
|
53
|
+
maxItems: 4
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
|
|
57
|
+
minItems: 6,
|
|
58
|
+
maxItems: 6
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
epoch: {type: 'number'}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
additionalProperties: false
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
4
|
+
import {ColumnarTableBatch, Schema} from '@loaders.gl/schema';
|
|
5
|
+
import {makeReadableFile} from '@loaders.gl/loader-utils';
|
|
6
|
+
import type {ParquetLoaderOptions} from '../../parquet-loader';
|
|
7
|
+
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
|
|
8
|
+
import {ParquetBuffer} from '../../parquetjs/schema/declare';
|
|
9
|
+
import {convertSchemaFromParquet} from '../arrow/convert-schema-from-parquet';
|
|
10
|
+
import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
|
|
11
|
+
import {unpackGeoMetadata} from '../geo/decode-geo-metadata';
|
|
12
|
+
|
|
13
|
+
export async function parseParquetInColumns(
|
|
14
|
+
arrayBuffer: ArrayBuffer,
|
|
15
|
+
options?: ParquetLoaderOptions
|
|
16
|
+
) {
|
|
17
|
+
const blob = new Blob([arrayBuffer]);
|
|
18
|
+
for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
|
|
19
|
+
return batch;
|
|
20
|
+
}
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function* parseParquetFileInColumnarBatches(
|
|
25
|
+
blob: Blob,
|
|
26
|
+
options?: ParquetLoaderOptions
|
|
27
|
+
): AsyncIterable<ColumnarTableBatch> {
|
|
28
|
+
const file = makeReadableFile(blob);
|
|
29
|
+
const reader = new ParquetReader(file);
|
|
30
|
+
const parquetSchema = await reader.getSchema();
|
|
31
|
+
const parquetMetadata = await reader.getFileMetadata();
|
|
32
|
+
const schema = convertSchemaFromParquet(parquetSchema, parquetMetadata);
|
|
33
|
+
unpackGeoMetadata(schema);
|
|
34
|
+
const rowGroups = reader.rowGroupIterator(options?.parquet);
|
|
35
|
+
for await (const rowGroup of rowGroups) {
|
|
36
|
+
yield convertRowGroupToTableBatch(schema, rowGroup);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function convertRowGroupToTableBatch(schema: Schema, rowGroup: ParquetBuffer): ColumnarTableBatch {
|
|
41
|
+
const data = convertParquetRowGroupToColumns(schema, rowGroup);
|
|
42
|
+
return {
|
|
43
|
+
shape: 'columnar-table',
|
|
44
|
+
batchType: 'data',
|
|
45
|
+
schema,
|
|
46
|
+
data,
|
|
47
|
+
length: rowGroup.rowCount
|
|
48
|
+
};
|
|
49
|
+
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
2
2
|
// import {ColumnarTableBatch} from '@loaders.gl/schema';
|
|
3
3
|
import {makeReadableFile} from '@loaders.gl/loader-utils';
|
|
4
|
-
import type {ParquetLoaderOptions} from '
|
|
5
|
-
import {ParquetReader} from '
|
|
4
|
+
import type {ParquetLoaderOptions} from '../../parquet-loader';
|
|
5
|
+
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
|
|
6
6
|
|
|
7
7
|
export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
8
8
|
const blob = new Blob([arrayBuffer]);
|