@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +19 -19
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +49 -8
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
- package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +51 -27
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/es5/parquet-loader.js +4 -2
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquetjs/compression.js +15 -5
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/{writer.js → parquet-encoder.js} +70 -158
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +553 -222
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +3 -1
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +39 -33
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +2 -3
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/esm/index.js +13 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
- package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +19 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/esm/parquet-loader.js +4 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquetjs/compression.js +10 -1
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +7 -37
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +158 -72
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +1 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +42 -34
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/index.d.ts +24 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +26 -9
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/lib/geo/decode-geo-metadata.js +73 -0
- package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
- package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
- package/dist/lib/geo/geoparquet-schema.js +69 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
- package/dist/parquet-loader.d.ts +2 -0
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +3 -1
- package/dist/parquet-worker.js +20 -20
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +16 -5
- package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
- package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
- package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +168 -102
- package/dist/parquetjs/schema/declare.d.ts +14 -7
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +2 -0
- package/dist/parquetjs/schema/shred.d.ts +115 -0
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +161 -43
- package/dist/parquetjs/schema/types.d.ts +2 -2
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.d.ts +3 -4
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +2 -5
- package/package.json +7 -5
- package/src/index.ts +24 -4
- package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
- package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
- package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
- package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
- package/src/lib/geo/decode-geo-metadata.ts +99 -0
- package/src/lib/geo/geoparquet-schema.ts +69 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
- package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
- package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/src/parquet-loader.ts +5 -1
- package/src/parquetjs/compression.ts +14 -1
- package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
- package/src/parquetjs/parser/parquet-reader.ts +239 -122
- package/src/parquetjs/schema/declare.ts +17 -9
- package/src/parquetjs/schema/shred.ts +157 -28
- package/src/parquetjs/schema/types.ts +21 -27
- package/src/parquetjs/utils/file-utils.ts +3 -4
- package/dist/es5/lib/convert-schema.js.map +0 -1
- package/dist/es5/lib/parse-parquet.js.map +0 -1
- package/dist/es5/lib/read-array-buffer.js +0 -43
- package/dist/es5/lib/read-array-buffer.js.map +0 -1
- package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
- package/dist/es5/parquetjs/file.js +0 -94
- package/dist/es5/parquetjs/file.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/esm/lib/convert-schema.js.map +0 -1
- package/dist/esm/lib/parse-parquet.js +0 -25
- package/dist/esm/lib/parse-parquet.js.map +0 -1
- package/dist/esm/lib/read-array-buffer.js +0 -10
- package/dist/esm/lib/read-array-buffer.js.map +0 -1
- package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
- package/dist/esm/parquetjs/file.js +0 -81
- package/dist/esm/parquetjs/file.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/lib/convert-schema.d.ts +0 -8
- package/dist/lib/convert-schema.d.ts.map +0 -1
- package/dist/lib/parse-parquet.d.ts +0 -4
- package/dist/lib/parse-parquet.d.ts.map +0 -1
- package/dist/lib/parse-parquet.js +0 -28
- package/dist/lib/read-array-buffer.d.ts +0 -19
- package/dist/lib/read-array-buffer.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.js +0 -29
- package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
- package/dist/parquetjs/file.d.ts +0 -10
- package/dist/parquetjs/file.d.ts.map +0 -1
- package/dist/parquetjs/file.js +0 -99
- package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
- package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.js +0 -74
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
- package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
- package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.js +0 -22
- package/src/lib/parse-parquet.ts +0 -27
- package/src/lib/read-array-buffer.ts +0 -31
- package/src/parquetjs/file.ts +0 -90
- package/src/parquetjs/parser/parquet-cursor.ts +0 -94
- package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
- package/src/parquetjs/utils/buffer-utils.ts +0 -18
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
import {Schema, Field} from '@loaders.gl/schema';
|
|
3
|
+
|
|
4
|
+
/* eslint-disable camelcase */
|
|
5
|
+
|
|
6
|
+
/** A geoarrow / geoparquet geo metadata object (stored in stringified form in the top level metadata 'geo' key) */
|
|
7
|
+
export type GeoMetadata = {
|
|
8
|
+
version?: string;
|
|
9
|
+
primary_column?: string;
|
|
10
|
+
columns: Record<string, GeoColumnMetadata>;
|
|
11
|
+
[key: string]: unknown;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/** A geoarrow / geoparquet geo metadata for one geometry column */
|
|
15
|
+
export type GeoColumnMetadata = {
|
|
16
|
+
bounding_box?:
|
|
17
|
+
| [number, number, number, number]
|
|
18
|
+
| [number, number, number, number, number, number];
|
|
19
|
+
crs?: string;
|
|
20
|
+
geometry_type?: string[];
|
|
21
|
+
edges?: string;
|
|
22
|
+
[key: string]: unknown;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Reads the GeoMetadata object from the metadata
|
|
27
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
|
|
28
|
+
export function getGeoMetadata(schema: Schema): GeoMetadata | null {
|
|
29
|
+
const stringifiedGeoMetadata = schema.metadata.get('geo');
|
|
30
|
+
if (!stringifiedGeoMetadata) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
const geoMetadata = JSON.parse(stringifiedGeoMetadata) as GeoMetadata;
|
|
36
|
+
return geoMetadata;
|
|
37
|
+
} catch {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Stores a geoarrow / geoparquet geo metadata object in the schema
|
|
44
|
+
* @note geoarrow / geoparquet geo metadata is a single stringified JSON field
|
|
45
|
+
*/
|
|
46
|
+
export function setGeoMetadata(schema: Schema, geoMetadata: GeoMetadata): void {
|
|
47
|
+
const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
|
|
48
|
+
schema.metadata.set('geo', stringifiedGeoMetadata);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Unpacks geo metadata into separate metadata fields (parses the long JSON string)
|
|
53
|
+
* @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
|
|
54
|
+
*/
|
|
55
|
+
export function unpackGeoMetadata(schema: Schema): void {
|
|
56
|
+
const geoMetadata = getGeoMetadata(schema);
|
|
57
|
+
if (!geoMetadata) {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Store Parquet Schema Level Metadata
|
|
62
|
+
|
|
63
|
+
const {version, primary_column, columns} = geoMetadata;
|
|
64
|
+
if (version) {
|
|
65
|
+
schema.metadata.set('geo.version', version);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (primary_column) {
|
|
69
|
+
schema.metadata.set('geo.primary_column', primary_column);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// store column names as comma separated list
|
|
73
|
+
schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
|
|
74
|
+
|
|
75
|
+
for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
|
|
76
|
+
const field = schema.fields.find((field) => field.name === columnName);
|
|
77
|
+
if (field) {
|
|
78
|
+
if (field.name === primary_column) {
|
|
79
|
+
field.metadata.set('geo.primary_field', 'true');
|
|
80
|
+
}
|
|
81
|
+
unpackGeoFieldMetadata(field, columnMetadata);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function unpackGeoFieldMetadata(field: Field, columnMetadata): void {
|
|
87
|
+
for (const [key, value] of Object.entries(columnMetadata || {})) {
|
|
88
|
+
switch (key) {
|
|
89
|
+
case 'geometry_type':
|
|
90
|
+
field.metadata.set(`geo.${key}`, (value as string[]).join(','));
|
|
91
|
+
break;
|
|
92
|
+
case 'bbox':
|
|
93
|
+
case 'crs':
|
|
94
|
+
case 'edges':
|
|
95
|
+
default:
|
|
96
|
+
field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
/* eslint-disable camelcase */
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Geoparquet JSON schema for geo metadata
|
|
7
|
+
* @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
|
|
8
|
+
* @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
|
|
9
|
+
*/
|
|
10
|
+
export default {
|
|
11
|
+
$schema: 'http://json-schema.org/draft-07/schema#',
|
|
12
|
+
title: 'GeoParquet',
|
|
13
|
+
description: 'Parquet metadata included in the geo field.',
|
|
14
|
+
type: 'object',
|
|
15
|
+
required: ['version', 'primary_column', 'columns'],
|
|
16
|
+
properties: {
|
|
17
|
+
version: {type: 'string', const: '1.0.0-beta.1'},
|
|
18
|
+
primary_column: {type: 'string', minLength: 1},
|
|
19
|
+
columns: {
|
|
20
|
+
type: 'object',
|
|
21
|
+
minProperties: 1,
|
|
22
|
+
patternProperties: {
|
|
23
|
+
'.+': {
|
|
24
|
+
type: 'object',
|
|
25
|
+
required: ['encoding', 'geometry_types'],
|
|
26
|
+
properties: {
|
|
27
|
+
encoding: {type: 'string', const: 'WKB'},
|
|
28
|
+
geometry_types: {
|
|
29
|
+
type: 'array',
|
|
30
|
+
uniqueItems: true,
|
|
31
|
+
items: {
|
|
32
|
+
type: 'string',
|
|
33
|
+
pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
crs: {
|
|
37
|
+
oneOf: [
|
|
38
|
+
{
|
|
39
|
+
$ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
|
|
40
|
+
},
|
|
41
|
+
{type: 'null'}
|
|
42
|
+
]
|
|
43
|
+
},
|
|
44
|
+
edges: {type: 'string', enum: ['planar', 'spherical']},
|
|
45
|
+
orientation: {type: 'string', const: 'counterclockwise'},
|
|
46
|
+
bbox: {
|
|
47
|
+
type: 'array',
|
|
48
|
+
items: {type: 'number'},
|
|
49
|
+
oneOf: [
|
|
50
|
+
{
|
|
51
|
+
description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
|
|
52
|
+
minItems: 4,
|
|
53
|
+
maxItems: 4
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
|
|
57
|
+
minItems: 6,
|
|
58
|
+
maxItems: 6
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
epoch: {type: 'number'}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
additionalProperties: false
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
3
|
+
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
4
|
+
import {ColumnarTableBatch, Schema} from '@loaders.gl/schema';
|
|
5
|
+
import {makeReadableFile} from '@loaders.gl/loader-utils';
|
|
6
|
+
import type {ParquetLoaderOptions} from '../../parquet-loader';
|
|
7
|
+
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
|
|
8
|
+
import {ParquetBuffer} from '../../parquetjs/schema/declare';
|
|
9
|
+
import {convertSchemaFromParquet} from '../arrow/convert-schema-from-parquet';
|
|
10
|
+
import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
|
|
11
|
+
import {unpackGeoMetadata} from '../geo/decode-geo-metadata';
|
|
12
|
+
|
|
13
|
+
export async function parseParquetInColumns(
|
|
14
|
+
arrayBuffer: ArrayBuffer,
|
|
15
|
+
options?: ParquetLoaderOptions
|
|
16
|
+
) {
|
|
17
|
+
const blob = new Blob([arrayBuffer]);
|
|
18
|
+
for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
|
|
19
|
+
return batch;
|
|
20
|
+
}
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function* parseParquetFileInColumnarBatches(
|
|
25
|
+
blob: Blob,
|
|
26
|
+
options?: ParquetLoaderOptions
|
|
27
|
+
): AsyncIterable<ColumnarTableBatch> {
|
|
28
|
+
const file = makeReadableFile(blob);
|
|
29
|
+
const reader = new ParquetReader(file);
|
|
30
|
+
const parquetSchema = await reader.getSchema();
|
|
31
|
+
const parquetMetadata = await reader.getFileMetadata();
|
|
32
|
+
const schema = convertSchemaFromParquet(parquetSchema, parquetMetadata);
|
|
33
|
+
unpackGeoMetadata(schema);
|
|
34
|
+
const rowGroups = reader.rowGroupIterator(options?.parquet);
|
|
35
|
+
for await (const rowGroup of rowGroups) {
|
|
36
|
+
yield convertRowGroupToTableBatch(schema, rowGroup);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function convertRowGroupToTableBatch(schema: Schema, rowGroup: ParquetBuffer): ColumnarTableBatch {
|
|
41
|
+
const data = convertParquetRowGroupToColumns(schema, rowGroup);
|
|
42
|
+
return {
|
|
43
|
+
shape: 'columnar-table',
|
|
44
|
+
batchType: 'data',
|
|
45
|
+
schema,
|
|
46
|
+
data,
|
|
47
|
+
length: rowGroup.rowCount
|
|
48
|
+
};
|
|
49
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
2
|
+
// import {ColumnarTableBatch} from '@loaders.gl/schema';
|
|
3
|
+
import {makeReadableFile} from '@loaders.gl/loader-utils';
|
|
4
|
+
import type {ParquetLoaderOptions} from '../../parquet-loader';
|
|
5
|
+
import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
|
|
6
|
+
|
|
7
|
+
export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
8
|
+
const blob = new Blob([arrayBuffer]);
|
|
9
|
+
for await (const batch of parseParquetFileInBatches(blob, options)) {
|
|
10
|
+
return batch;
|
|
11
|
+
}
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {
|
|
16
|
+
const file = makeReadableFile(blob);
|
|
17
|
+
const reader = new ParquetReader(file);
|
|
18
|
+
const rowBatches = reader.rowBatchIterator(options?.parquet);
|
|
19
|
+
for await (const rows of rowBatches) {
|
|
20
|
+
yield rows;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
|
|
25
|
+
// const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
|
|
26
|
+
// try {
|
|
27
|
+
// for await (const rowGroup of rowGroupReader) {
|
|
28
|
+
// yield convertRowGroupToTableBatch(rowGroup);
|
|
29
|
+
// }
|
|
30
|
+
// } finally {
|
|
31
|
+
// await rowGroupReader.close();
|
|
32
|
+
// }
|
|
33
|
+
// }
|
|
34
|
+
|
|
35
|
+
// function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
|
|
36
|
+
// // @ts-expect-error
|
|
37
|
+
// return {
|
|
38
|
+
// data: rowGroup
|
|
39
|
+
// };
|
|
40
|
+
// }
|