@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +19 -19
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +49 -8
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
- package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +51 -27
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/es5/parquet-loader.js +4 -2
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquetjs/compression.js +15 -5
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/{writer.js → parquet-encoder.js} +70 -158
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +553 -222
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +3 -1
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +39 -33
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +2 -3
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/esm/index.js +13 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
- package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +19 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/esm/parquet-loader.js +4 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquetjs/compression.js +10 -1
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +7 -37
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +158 -72
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +1 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +42 -34
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/index.d.ts +24 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +26 -9
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/lib/geo/decode-geo-metadata.js +73 -0
- package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
- package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
- package/dist/lib/geo/geoparquet-schema.js +69 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
- package/dist/parquet-loader.d.ts +2 -0
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +3 -1
- package/dist/parquet-worker.js +20 -20
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +16 -5
- package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
- package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
- package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +168 -102
- package/dist/parquetjs/schema/declare.d.ts +14 -7
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +2 -0
- package/dist/parquetjs/schema/shred.d.ts +115 -0
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +161 -43
- package/dist/parquetjs/schema/types.d.ts +2 -2
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.d.ts +3 -4
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +2 -5
- package/package.json +7 -5
- package/src/index.ts +24 -4
- package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
- package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
- package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
- package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
- package/src/lib/geo/decode-geo-metadata.ts +99 -0
- package/src/lib/geo/geoparquet-schema.ts +69 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
- package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
- package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/src/parquet-loader.ts +5 -1
- package/src/parquetjs/compression.ts +14 -1
- package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
- package/src/parquetjs/parser/parquet-reader.ts +239 -122
- package/src/parquetjs/schema/declare.ts +17 -9
- package/src/parquetjs/schema/shred.ts +157 -28
- package/src/parquetjs/schema/types.ts +21 -27
- package/src/parquetjs/utils/file-utils.ts +3 -4
- package/dist/es5/lib/convert-schema.js.map +0 -1
- package/dist/es5/lib/parse-parquet.js.map +0 -1
- package/dist/es5/lib/read-array-buffer.js +0 -43
- package/dist/es5/lib/read-array-buffer.js.map +0 -1
- package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
- package/dist/es5/parquetjs/file.js +0 -94
- package/dist/es5/parquetjs/file.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/esm/lib/convert-schema.js.map +0 -1
- package/dist/esm/lib/parse-parquet.js +0 -25
- package/dist/esm/lib/parse-parquet.js.map +0 -1
- package/dist/esm/lib/read-array-buffer.js +0 -10
- package/dist/esm/lib/read-array-buffer.js.map +0 -1
- package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
- package/dist/esm/parquetjs/file.js +0 -81
- package/dist/esm/parquetjs/file.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/lib/convert-schema.d.ts +0 -8
- package/dist/lib/convert-schema.d.ts.map +0 -1
- package/dist/lib/parse-parquet.d.ts +0 -4
- package/dist/lib/parse-parquet.d.ts.map +0 -1
- package/dist/lib/parse-parquet.js +0 -28
- package/dist/lib/read-array-buffer.d.ts +0 -19
- package/dist/lib/read-array-buffer.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.js +0 -29
- package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
- package/dist/parquetjs/file.d.ts +0 -10
- package/dist/parquetjs/file.d.ts.map +0 -1
- package/dist/parquetjs/file.js +0 -99
- package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
- package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.js +0 -74
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
- package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
- package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.js +0 -22
- package/src/lib/parse-parquet.ts +0 -27
- package/src/lib/read-array-buffer.ts +0 -31
- package/src/parquetjs/file.ts +0 -90
- package/src/parquetjs/parser/parquet-cursor.ts +0 -94
- package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
- package/src/parquetjs/utils/buffer-utils.ts +0 -18
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ParquetType } from '../../parquetjs/schema/declare';
|
|
2
|
+
import { Schema, DataType } from '@loaders.gl/schema';
|
|
3
|
+
export declare const PARQUET_TYPE_MAPPING: {
|
|
4
|
+
[type in ParquetType]: typeof DataType;
|
|
5
|
+
};
|
|
6
|
+
export declare function convertToParquetSchema(schema: Schema): Schema;
|
|
7
|
+
//# sourceMappingURL=convert-schema-to-parquet.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"convert-schema-to-parquet.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-schema-to-parquet.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAEV,WAAW,EACZ,MAAM,gCAAgC,CAAC;AAExC,OAAO,EACL,MAAM,EAGN,QAAQ,EAaT,MAAM,oBAAoB,CAAC;AAE5B,eAAO,MAAM,oBAAoB,EAAE;KAAE,IAAI,IAAI,WAAW,GAAG,OAAO,QAAQ;CA+BzE,CAAC;AAEF,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAK7D"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// loaders.gl, MIT license
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.convertToParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
|
|
5
|
+
const schema_1 = require("@loaders.gl/schema");
|
|
6
|
+
exports.PARQUET_TYPE_MAPPING = {
|
|
7
|
+
BOOLEAN: schema_1.Bool,
|
|
8
|
+
INT32: schema_1.Int32,
|
|
9
|
+
INT64: schema_1.Float64,
|
|
10
|
+
INT96: schema_1.Float64,
|
|
11
|
+
FLOAT: schema_1.Float32,
|
|
12
|
+
DOUBLE: schema_1.Float64,
|
|
13
|
+
BYTE_ARRAY: schema_1.Binary,
|
|
14
|
+
FIXED_LEN_BYTE_ARRAY: schema_1.Binary,
|
|
15
|
+
UTF8: schema_1.Utf8,
|
|
16
|
+
DATE: schema_1.Int32,
|
|
17
|
+
TIME_MILLIS: schema_1.Int64,
|
|
18
|
+
TIME_MICROS: schema_1.Int64,
|
|
19
|
+
TIMESTAMP_MILLIS: schema_1.Int64,
|
|
20
|
+
TIMESTAMP_MICROS: schema_1.Int64,
|
|
21
|
+
UINT_8: schema_1.Int32,
|
|
22
|
+
UINT_16: schema_1.Uint16,
|
|
23
|
+
UINT_32: schema_1.Uint32,
|
|
24
|
+
UINT_64: schema_1.Uint64,
|
|
25
|
+
INT_8: schema_1.Int8,
|
|
26
|
+
INT_16: schema_1.Int16,
|
|
27
|
+
INT_32: schema_1.Int32,
|
|
28
|
+
INT_64: schema_1.Int64,
|
|
29
|
+
JSON: schema_1.Binary,
|
|
30
|
+
BSON: schema_1.Binary,
|
|
31
|
+
// TODO check interval type
|
|
32
|
+
INTERVAL: schema_1.Binary,
|
|
33
|
+
DECIMAL_INT32: schema_1.Float32,
|
|
34
|
+
DECIMAL_INT64: schema_1.Float64,
|
|
35
|
+
DECIMAL_BYTE_ARRAY: schema_1.Float64,
|
|
36
|
+
DECIMAL_FIXED_LEN_BYTE_ARRAY: schema_1.Float64
|
|
37
|
+
};
|
|
38
|
+
function convertToParquetSchema(schema) {
|
|
39
|
+
const fields = []; // getFields(schema.fields);
|
|
40
|
+
// TODO add metadata if needed.
|
|
41
|
+
return new schema_1.Schema(fields);
|
|
42
|
+
}
|
|
43
|
+
exports.convertToParquetSchema = convertToParquetSchema;
|
|
44
|
+
// function getFields(schema: Field[]): Definition[] {
|
|
45
|
+
// const fields: Field[] = [];
|
|
46
|
+
// for (const name in schema) {
|
|
47
|
+
// const field = schema[name];
|
|
48
|
+
// // @ts-ignore
|
|
49
|
+
// const children = field.children as DataType[];
|
|
50
|
+
// if (children) {
|
|
51
|
+
// const childField = getFields(field.fields);
|
|
52
|
+
// const nestedField = new Field(name, new Struct(childField), field.optional);
|
|
53
|
+
// fields.push(nestedField);
|
|
54
|
+
// } else {
|
|
55
|
+
// const FieldType = PARQUET_TYPE_MAPPING[field.type];
|
|
56
|
+
// const metadata = getFieldMetadata(field);
|
|
57
|
+
// const arrowField = new Field(name, new FieldType(), field.optional, metadata);
|
|
58
|
+
// fields.push(arrowField);
|
|
59
|
+
// }
|
|
60
|
+
// }
|
|
61
|
+
// return fields;
|
|
62
|
+
// }
|
|
63
|
+
// function getFieldMetadata(field: ParquetField): Map<string, string> {
|
|
64
|
+
// const metadata = new Map();
|
|
65
|
+
// for (const key in field) {
|
|
66
|
+
// if (key !== 'name') {
|
|
67
|
+
// const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
|
|
68
|
+
// metadata.set(key, value);
|
|
69
|
+
// }
|
|
70
|
+
// }
|
|
71
|
+
// return metadata;
|
|
72
|
+
// }
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { Schema } from '@loaders.gl/schema';
|
|
2
|
+
/** A geoarrow / geoparquet geo metadata object (stored in stringified form in the top level metadata 'geo' key) */
|
|
3
|
+
export type GeoMetadata = {
|
|
4
|
+
version?: string;
|
|
5
|
+
primary_column?: string;
|
|
6
|
+
columns: Record<string, GeoColumnMetadata>;
|
|
7
|
+
[key: string]: unknown;
|
|
8
|
+
};
|
|
9
|
+
/** A geoarrow / geoparquet geo metadata for one geometry column */
|
|
10
|
+
export type GeoColumnMetadata = {
|
|
11
|
+
bounding_box?: [number, number, number, number] | [number, number, number, number, number, number];
|
|
12
|
+
crs?: string;
|
|
13
|
+
geometry_type?: string[];
|
|
14
|
+
edges?: string;
|
|
15
|
+
[key: string]: unknown;
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Reads the GeoMetadata object from the metadata
|
|
19
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
|
|
20
|
+
export declare function getGeoMetadata(schema: Schema): GeoMetadata | null;
|
|
21
|
+
/**
|
|
22
|
+
* Stores a geoarrow / geoparquet geo metadata object in the schema
|
|
23
|
+
* @note geoarrow / geoparquet geo metadata is a single stringified JSON field
|
|
24
|
+
*/
|
|
25
|
+
export declare function setGeoMetadata(schema: Schema, geoMetadata: GeoMetadata): void;
|
|
26
|
+
/**
|
|
27
|
+
* Unpacks geo metadata into separate metadata fields (parses the long JSON string)
|
|
28
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
|
|
29
|
+
*/
|
|
30
|
+
export declare function unpackGeoMetadata(schema: Schema): void;
|
|
31
|
+
//# sourceMappingURL=decode-geo-metadata.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"decode-geo-metadata.d.ts","sourceRoot":"","sources":["../../../src/lib/geo/decode-geo-metadata.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,MAAM,EAAQ,MAAM,oBAAoB,CAAC;AAIjD,mHAAmH;AACnH,MAAM,MAAM,WAAW,GAAG;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC3C,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB,CAAC;AAEF,oEAAoE;AACpE,MAAM,MAAM,iBAAiB,GAAG;IAC9B,YAAY,CAAC,EACT,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,GAChC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB,CAAC;AAEF;;yGAEyG;AACzG,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,IAAI,CAYjE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,GAAG,IAAI,CAG7E;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CA6BtD"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Reads the GeoMetadata object from the metadata
|
|
6
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
|
|
7
|
+
function getGeoMetadata(schema) {
|
|
8
|
+
const stringifiedGeoMetadata = schema.metadata.get('geo');
|
|
9
|
+
if (!stringifiedGeoMetadata) {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
try {
|
|
13
|
+
const geoMetadata = JSON.parse(stringifiedGeoMetadata);
|
|
14
|
+
return geoMetadata;
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
exports.getGeoMetadata = getGeoMetadata;
|
|
21
|
+
/**
|
|
22
|
+
* Stores a geoarrow / geoparquet geo metadata object in the schema
|
|
23
|
+
* @note geoarrow / geoparquet geo metadata is a single stringified JSON field
|
|
24
|
+
*/
|
|
25
|
+
function setGeoMetadata(schema, geoMetadata) {
|
|
26
|
+
const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
|
|
27
|
+
schema.metadata.set('geo', stringifiedGeoMetadata);
|
|
28
|
+
}
|
|
29
|
+
exports.setGeoMetadata = setGeoMetadata;
|
|
30
|
+
/**
|
|
31
|
+
* Unpacks geo metadata into separate metadata fields (parses the long JSON string)
|
|
32
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
|
|
33
|
+
*/
|
|
34
|
+
function unpackGeoMetadata(schema) {
|
|
35
|
+
const geoMetadata = getGeoMetadata(schema);
|
|
36
|
+
if (!geoMetadata) {
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
// Store Parquet Schema Level Metadata
|
|
40
|
+
const { version, primary_column, columns } = geoMetadata;
|
|
41
|
+
if (version) {
|
|
42
|
+
schema.metadata.set('geo.version', version);
|
|
43
|
+
}
|
|
44
|
+
if (primary_column) {
|
|
45
|
+
schema.metadata.set('geo.primary_column', primary_column);
|
|
46
|
+
}
|
|
47
|
+
// store column names as comma separated list
|
|
48
|
+
schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
|
|
49
|
+
for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
|
|
50
|
+
const field = schema.fields.find((field) => field.name === columnName);
|
|
51
|
+
if (field) {
|
|
52
|
+
if (field.name === primary_column) {
|
|
53
|
+
field.metadata.set('geo.primary_field', 'true');
|
|
54
|
+
}
|
|
55
|
+
unpackGeoFieldMetadata(field, columnMetadata);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
exports.unpackGeoMetadata = unpackGeoMetadata;
|
|
60
|
+
function unpackGeoFieldMetadata(field, columnMetadata) {
|
|
61
|
+
for (const [key, value] of Object.entries(columnMetadata || {})) {
|
|
62
|
+
switch (key) {
|
|
63
|
+
case 'geometry_type':
|
|
64
|
+
field.metadata.set(`geo.${key}`, value.join(','));
|
|
65
|
+
break;
|
|
66
|
+
case 'bbox':
|
|
67
|
+
case 'crs':
|
|
68
|
+
case 'edges':
|
|
69
|
+
default:
|
|
70
|
+
field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Geoparquet JSON schema for geo metadata
|
|
3
|
+
* @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
|
|
4
|
+
* @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
|
|
5
|
+
*/
|
|
6
|
+
declare const _default: {
|
|
7
|
+
$schema: string;
|
|
8
|
+
title: string;
|
|
9
|
+
description: string;
|
|
10
|
+
type: string;
|
|
11
|
+
required: string[];
|
|
12
|
+
properties: {
|
|
13
|
+
version: {
|
|
14
|
+
type: string;
|
|
15
|
+
const: string;
|
|
16
|
+
};
|
|
17
|
+
primary_column: {
|
|
18
|
+
type: string;
|
|
19
|
+
minLength: number;
|
|
20
|
+
};
|
|
21
|
+
columns: {
|
|
22
|
+
type: string;
|
|
23
|
+
minProperties: number;
|
|
24
|
+
patternProperties: {
|
|
25
|
+
'.+': {
|
|
26
|
+
type: string;
|
|
27
|
+
required: string[];
|
|
28
|
+
properties: {
|
|
29
|
+
encoding: {
|
|
30
|
+
type: string;
|
|
31
|
+
const: string;
|
|
32
|
+
};
|
|
33
|
+
geometry_types: {
|
|
34
|
+
type: string;
|
|
35
|
+
uniqueItems: boolean;
|
|
36
|
+
items: {
|
|
37
|
+
type: string;
|
|
38
|
+
pattern: string;
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
crs: {
|
|
42
|
+
oneOf: ({
|
|
43
|
+
$ref: string;
|
|
44
|
+
type?: undefined;
|
|
45
|
+
} | {
|
|
46
|
+
type: string;
|
|
47
|
+
$ref?: undefined;
|
|
48
|
+
})[];
|
|
49
|
+
};
|
|
50
|
+
edges: {
|
|
51
|
+
type: string;
|
|
52
|
+
enum: string[];
|
|
53
|
+
};
|
|
54
|
+
orientation: {
|
|
55
|
+
type: string;
|
|
56
|
+
const: string;
|
|
57
|
+
};
|
|
58
|
+
bbox: {
|
|
59
|
+
type: string;
|
|
60
|
+
items: {
|
|
61
|
+
type: string;
|
|
62
|
+
};
|
|
63
|
+
oneOf: {
|
|
64
|
+
description: string;
|
|
65
|
+
minItems: number;
|
|
66
|
+
maxItems: number;
|
|
67
|
+
}[];
|
|
68
|
+
};
|
|
69
|
+
epoch: {
|
|
70
|
+
type: string;
|
|
71
|
+
};
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
additionalProperties: boolean;
|
|
76
|
+
};
|
|
77
|
+
};
|
|
78
|
+
};
|
|
79
|
+
export default _default;
|
|
80
|
+
//# sourceMappingURL=geoparquet-schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"geoparquet-schema.d.ts","sourceRoot":"","sources":["../../../src/lib/geo/geoparquet-schema.ts"],"names":[],"mappings":"AAIA;;;;GAIG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACH,wBA2DE"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// loaders.gl, MIT license
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
/* eslint-disable camelcase */
|
|
5
|
+
/**
|
|
6
|
+
* Geoparquet JSON schema for geo metadata
|
|
7
|
+
* @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
|
|
8
|
+
* @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
|
|
9
|
+
*/
|
|
10
|
+
exports.default = {
|
|
11
|
+
$schema: 'http://json-schema.org/draft-07/schema#',
|
|
12
|
+
title: 'GeoParquet',
|
|
13
|
+
description: 'Parquet metadata included in the geo field.',
|
|
14
|
+
type: 'object',
|
|
15
|
+
required: ['version', 'primary_column', 'columns'],
|
|
16
|
+
properties: {
|
|
17
|
+
version: { type: 'string', const: '1.0.0-beta.1' },
|
|
18
|
+
primary_column: { type: 'string', minLength: 1 },
|
|
19
|
+
columns: {
|
|
20
|
+
type: 'object',
|
|
21
|
+
minProperties: 1,
|
|
22
|
+
patternProperties: {
|
|
23
|
+
'.+': {
|
|
24
|
+
type: 'object',
|
|
25
|
+
required: ['encoding', 'geometry_types'],
|
|
26
|
+
properties: {
|
|
27
|
+
encoding: { type: 'string', const: 'WKB' },
|
|
28
|
+
geometry_types: {
|
|
29
|
+
type: 'array',
|
|
30
|
+
uniqueItems: true,
|
|
31
|
+
items: {
|
|
32
|
+
type: 'string',
|
|
33
|
+
pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
crs: {
|
|
37
|
+
oneOf: [
|
|
38
|
+
{
|
|
39
|
+
$ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
|
|
40
|
+
},
|
|
41
|
+
{ type: 'null' }
|
|
42
|
+
]
|
|
43
|
+
},
|
|
44
|
+
edges: { type: 'string', enum: ['planar', 'spherical'] },
|
|
45
|
+
orientation: { type: 'string', const: 'counterclockwise' },
|
|
46
|
+
bbox: {
|
|
47
|
+
type: 'array',
|
|
48
|
+
items: { type: 'number' },
|
|
49
|
+
oneOf: [
|
|
50
|
+
{
|
|
51
|
+
description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
|
|
52
|
+
minItems: 4,
|
|
53
|
+
maxItems: 4
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
|
|
57
|
+
minItems: 6,
|
|
58
|
+
maxItems: 6
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
epoch: { type: 'number' }
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
additionalProperties: false
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
};
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { ColumnarTableBatch } from '@loaders.gl/schema';
|
|
2
|
+
import type { ParquetLoaderOptions } from '../../parquet-loader';
|
|
3
|
+
export declare function parseParquetInColumns(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<ColumnarTableBatch | null>;
|
|
4
|
+
export declare function parseParquetFileInColumnarBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncIterable<ColumnarTableBatch>;
|
|
5
|
+
//# sourceMappingURL=parse-parquet-to-columns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-parquet-to-columns.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-columns.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,kBAAkB,EAAS,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAO/D,wBAAsB,qBAAqB,CACzC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,oBAAoB,sCAO/B;AAED,wBAAuB,iCAAiC,CACtD,IAAI,EAAE,IAAI,EACV,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,kBAAkB,CAAC,CAWnC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// loaders.gl, MIT license
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.parseParquetFileInColumnarBatches = exports.parseParquetInColumns = void 0;
|
|
5
|
+
const loader_utils_1 = require("@loaders.gl/loader-utils");
|
|
6
|
+
const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
|
|
7
|
+
const convert_schema_from_parquet_1 = require("../arrow/convert-schema-from-parquet");
|
|
8
|
+
const convert_row_group_to_columns_1 = require("../arrow/convert-row-group-to-columns");
|
|
9
|
+
const decode_geo_metadata_1 = require("../geo/decode-geo-metadata");
|
|
10
|
+
async function parseParquetInColumns(arrayBuffer, options) {
|
|
11
|
+
const blob = new Blob([arrayBuffer]);
|
|
12
|
+
for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
|
|
13
|
+
return batch;
|
|
14
|
+
}
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
exports.parseParquetInColumns = parseParquetInColumns;
|
|
18
|
+
async function* parseParquetFileInColumnarBatches(blob, options) {
|
|
19
|
+
const file = (0, loader_utils_1.makeReadableFile)(blob);
|
|
20
|
+
const reader = new parquet_reader_1.ParquetReader(file);
|
|
21
|
+
const parquetSchema = await reader.getSchema();
|
|
22
|
+
const parquetMetadata = await reader.getFileMetadata();
|
|
23
|
+
const schema = (0, convert_schema_from_parquet_1.convertSchemaFromParquet)(parquetSchema, parquetMetadata);
|
|
24
|
+
(0, decode_geo_metadata_1.unpackGeoMetadata)(schema);
|
|
25
|
+
const rowGroups = reader.rowGroupIterator(options?.parquet);
|
|
26
|
+
for await (const rowGroup of rowGroups) {
|
|
27
|
+
yield convertRowGroupToTableBatch(schema, rowGroup);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
exports.parseParquetFileInColumnarBatches = parseParquetFileInColumnarBatches;
|
|
31
|
+
function convertRowGroupToTableBatch(schema, rowGroup) {
|
|
32
|
+
const data = (0, convert_row_group_to_columns_1.convertParquetRowGroupToColumns)(schema, rowGroup);
|
|
33
|
+
return {
|
|
34
|
+
shape: 'columnar-table',
|
|
35
|
+
batchType: 'data',
|
|
36
|
+
schema,
|
|
37
|
+
data,
|
|
38
|
+
length: rowGroup.rowCount
|
|
39
|
+
};
|
|
40
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ParquetLoaderOptions } from '../../parquet-loader';
|
|
2
|
+
export declare function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<import("../../parquetjs/schema/declare").ParquetRecord[] | null>;
|
|
3
|
+
export declare function parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncGenerator<import("../../parquetjs/schema/declare").ParquetRecord[], void, unknown>;
|
|
4
|
+
//# sourceMappingURL=parse-parquet-to-rows.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-parquet-to-rows.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAG/D,wBAAsB,YAAY,CAAC,WAAW,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,oBAAoB,4EAM1F;AAED,wBAAuB,yBAAyB,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,oBAAoB,2FAO1F"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.parseParquetFileInBatches = exports.parseParquet = void 0;
|
|
4
|
+
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
|
+
// import {ColumnarTableBatch} from '@loaders.gl/schema';
|
|
6
|
+
const loader_utils_1 = require("@loaders.gl/loader-utils");
|
|
7
|
+
const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
|
|
8
|
+
async function parseParquet(arrayBuffer, options) {
|
|
9
|
+
const blob = new Blob([arrayBuffer]);
|
|
10
|
+
for await (const batch of parseParquetFileInBatches(blob, options)) {
|
|
11
|
+
return batch;
|
|
12
|
+
}
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
exports.parseParquet = parseParquet;
|
|
16
|
+
async function* parseParquetFileInBatches(blob, options) {
|
|
17
|
+
const file = (0, loader_utils_1.makeReadableFile)(blob);
|
|
18
|
+
const reader = new parquet_reader_1.ParquetReader(file);
|
|
19
|
+
const rowBatches = reader.rowBatchIterator(options?.parquet);
|
|
20
|
+
for await (const rows of rowBatches) {
|
|
21
|
+
yield rows;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
exports.parseParquetFileInBatches = parseParquetFileInBatches;
|
|
25
|
+
// export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
|
|
26
|
+
// const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
|
|
27
|
+
// try {
|
|
28
|
+
// for await (const rowGroup of rowGroupReader) {
|
|
29
|
+
// yield convertRowGroupToTableBatch(rowGroup);
|
|
30
|
+
// }
|
|
31
|
+
// } finally {
|
|
32
|
+
// await rowGroupReader.close();
|
|
33
|
+
// }
|
|
34
|
+
// }
|
|
35
|
+
// function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
|
|
36
|
+
// // @ts-expect-error
|
|
37
|
+
// return {
|
|
38
|
+
// data: rowGroup
|
|
39
|
+
// };
|
|
40
|
+
// }
|
package/dist/parquet-loader.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-loader.d.ts","sourceRoot":"","sources":["../src/parquet-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAMpE,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"parquet-loader.d.ts","sourceRoot":"","sources":["../src/parquet-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAMpE,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;QACnC,UAAU,CAAC,EAAE,OAAO,CAAC;KACtB,CAAC;CACH,CAAC;AAWF,6BAA6B;AAC7B,eAAO,MAAM,aAAa;;;;;;;;;;;;CAYzB,CAAC;AAEF,eAAO,MAAM,uBAAuB,EAAE,MAAsB,CAAC"}
|
package/dist/parquet-loader.js
CHANGED
|
@@ -7,7 +7,9 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
|
7
7
|
const DEFAULT_PARQUET_LOADER_OPTIONS = {
|
|
8
8
|
parquet: {
|
|
9
9
|
type: 'object-row-table',
|
|
10
|
-
url: undefined
|
|
10
|
+
url: undefined,
|
|
11
|
+
columnList: [],
|
|
12
|
+
geoparquet: true
|
|
11
13
|
}
|
|
12
14
|
};
|
|
13
15
|
/** ParquetJS table loader */
|