@loaders.gl/parquet 4.3.1 → 4.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +76 -286
- package/dist/index.cjs.map +4 -4
- package/dist/index.d.ts +6 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -4
- package/dist/lib/constants.js +1 -1
- package/dist/lib/encoders/encode-arrow-to-parquet.d.ts +7 -0
- package/dist/lib/encoders/encode-arrow-to-parquet.d.ts.map +1 -0
- package/dist/lib/encoders/{encode-parquet-wasm.js → encode-arrow-to-parquet.js} +1 -1
- package/dist/lib/parsers/get-parquet-schema.d.ts.map +1 -1
- package/dist/lib/parsers/get-parquet-schema.js +3 -3
- package/dist/lib/parsers/parse-geoparquet-to-geojson.d.ts +6 -0
- package/dist/lib/parsers/parse-geoparquet-to-geojson.d.ts.map +1 -0
- package/dist/lib/parsers/parse-geoparquet-to-geojson.js +15 -0
- package/dist/lib/parsers/{parse-parquet-wasm.d.ts → parse-parquet-to-arrow.d.ts} +3 -3
- package/dist/lib/parsers/parse-parquet-to-arrow.d.ts.map +1 -0
- package/dist/lib/parsers/{parse-parquet-wasm.js → parse-parquet-to-arrow.js} +8 -8
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +4 -4
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +1 -1
- package/dist/lib/parsers/{parse-parquet.d.ts → parse-parquet-to-json.d.ts} +4 -4
- package/dist/lib/parsers/parse-parquet-to-json.d.ts.map +1 -0
- package/dist/lib/parsers/{parse-parquet.js → parse-parquet-to-json.js} +2 -16
- package/dist/{parquet-wasm-loader.d.ts → parquet-arrow-loader.d.ts} +4 -6
- package/dist/parquet-arrow-loader.d.ts.map +1 -0
- package/dist/{parquet-wasm-loader.js → parquet-arrow-loader.js} +10 -11
- package/dist/parquet-arrow-writer.d.ts +9 -0
- package/dist/parquet-arrow-writer.d.ts.map +1 -0
- package/dist/{parquet-wasm-writer.js → parquet-arrow-writer.js} +6 -8
- package/dist/parquet-format.d.ts +9 -0
- package/dist/parquet-format.d.ts.map +1 -0
- package/dist/parquet-format.js +11 -0
- package/dist/{parquet-loader.d.ts → parquet-json-loader.d.ts} +57 -87
- package/dist/parquet-json-loader.d.ts.map +1 -0
- package/dist/{parquet-loader.js → parquet-json-loader.js} +42 -47
- package/dist/parquet-json-writer.d.ts +19 -0
- package/dist/parquet-json-writer.d.ts.map +1 -0
- package/dist/{parquet-writer.js → parquet-json-writer.js} +4 -6
- package/dist/parquetjs/codecs/declare.d.ts.map +1 -1
- package/dist/parquetjs/codecs/declare.js +5 -0
- package/dist/parquetjs/codecs/dictionary.d.ts.map +1 -1
- package/dist/parquetjs/codecs/dictionary.js +5 -0
- package/dist/parquetjs/codecs/index.d.ts.map +1 -1
- package/dist/parquetjs/codecs/index.js +5 -0
- package/dist/parquetjs/codecs/plain.d.ts.map +1 -1
- package/dist/parquetjs/codecs/plain.js +5 -0
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +5 -1
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +5 -2
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
- package/dist/parquetjs/encoder/parquet-encoder.js +5 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +5 -0
- package/dist/parquetjs/parquet-thrift/BsonType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/BsonType.js +5 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +5 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +5 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +5 -0
- package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +5 -0
- package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +5 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +5 -0
- package/dist/parquetjs/parquet-thrift/DateType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/DateType.js +5 -0
- package/dist/parquetjs/parquet-thrift/DecimalType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js +5 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +5 -0
- package/dist/parquetjs/parquet-thrift/Encoding.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/Encoding.js +5 -0
- package/dist/parquetjs/parquet-thrift/EnumType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/EnumType.js +5 -0
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +5 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +5 -0
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +5 -0
- package/dist/parquetjs/parquet-thrift/IntType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/IntType.js +5 -0
- package/dist/parquetjs/parquet-thrift/JsonType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/JsonType.js +5 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/KeyValue.js +5 -0
- package/dist/parquetjs/parquet-thrift/ListType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/ListType.js +5 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/LogicalType.js +5 -0
- package/dist/parquetjs/parquet-thrift/MapType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/MapType.js +5 -0
- package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +5 -0
- package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +5 -0
- package/dist/parquetjs/parquet-thrift/NullType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/NullType.js +5 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +5 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +5 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/PageHeader.js +5 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/PageLocation.js +5 -0
- package/dist/parquetjs/parquet-thrift/PageType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/PageType.js +5 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/RowGroup.js +5 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +5 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +5 -0
- package/dist/parquetjs/parquet-thrift/Statistics.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/Statistics.js +5 -0
- package/dist/parquetjs/parquet-thrift/StringType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/StringType.js +5 -0
- package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/TimeType.js +5 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +5 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/TimestampType.js +5 -0
- package/dist/parquetjs/parquet-thrift/Type.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/Type.js +5 -0
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +5 -0
- package/dist/parquetjs/parquet-thrift/UUIDType.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/UUIDType.js +5 -0
- package/dist/parquetjs/parquet-thrift/index.d.ts.map +1 -1
- package/dist/parquetjs/parquet-thrift/index.js +5 -0
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.js +5 -0
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +5 -0
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +5 -0
- package/dist/parquetjs/schema/schema.d.ts.map +1 -1
- package/dist/parquetjs/schema/schema.js +5 -1
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +5 -1
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/schema/types.js +5 -1
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +5 -0
- package/dist/parquetjs/utils/read-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/read-utils.js +5 -0
- package/dist/polyfills/buffer/buffer.d.ts.map +1 -1
- package/dist/polyfills/buffer/buffer.js +0 -1
- package/dist/polyfills/buffer/index.d.ts.map +1 -1
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
- package/dist/polyfills/buffer/install-buffer-polyfill.js +3 -0
- package/dist/workers/parquet-json-worker.d.ts +2 -0
- package/dist/workers/parquet-json-worker.d.ts.map +1 -0
- package/dist/workers/{parquet-worker.js → parquet-json-worker.js} +2 -2
- package/package.json +13 -13
- package/src/index.ts +16 -9
- package/src/lib/encoders/{encode-parquet-wasm.ts → encode-arrow-to-parquet.ts} +4 -4
- package/src/lib/parsers/get-parquet-schema.ts +3 -3
- package/src/lib/parsers/parse-geoparquet-to-geojson.ts +36 -0
- package/src/lib/parsers/{parse-parquet-wasm.ts → parse-parquet-to-arrow.ts} +9 -9
- package/src/lib/parsers/parse-parquet-to-columns.ts +4 -4
- package/src/lib/parsers/{parse-parquet.ts → parse-parquet-to-json.ts} +5 -25
- package/src/{parquet-wasm-loader.ts → parquet-arrow-loader.ts} +18 -20
- package/src/parquet-arrow-writer.ts +33 -0
- package/src/parquet-format.ts +14 -0
- package/src/{parquet-loader.ts → parquet-json-loader.ts} +42 -48
- package/src/{parquet-writer.ts → parquet-json-writer.ts} +6 -8
- package/src/parquetjs/codecs/declare.ts +6 -1
- package/src/parquetjs/codecs/dictionary.ts +6 -0
- package/src/parquetjs/codecs/index.ts +6 -1
- package/src/parquetjs/codecs/plain.ts +6 -1
- package/src/parquetjs/codecs/rle.ts +5 -1
- package/src/parquetjs/compression.ts +5 -2
- package/src/parquetjs/encoder/parquet-encoder.ts +6 -1
- package/src/parquetjs/modules.d.ts +5 -1
- package/src/parquetjs/parquet-thrift/BoundaryOrder.ts +6 -0
- package/src/parquetjs/parquet-thrift/BsonType.ts +6 -0
- package/src/parquetjs/parquet-thrift/ColumnChunk.ts +6 -0
- package/src/parquetjs/parquet-thrift/ColumnIndex.ts +6 -0
- package/src/parquetjs/parquet-thrift/ColumnMetaData.ts +6 -0
- package/src/parquetjs/parquet-thrift/ColumnOrder.ts +6 -0
- package/src/parquetjs/parquet-thrift/CompressionCodec.ts +6 -0
- package/src/parquetjs/parquet-thrift/ConvertedType.ts +6 -0
- package/src/parquetjs/parquet-thrift/DataPageHeader.ts +6 -0
- package/src/parquetjs/parquet-thrift/DataPageHeaderV2.ts +6 -0
- package/src/parquetjs/parquet-thrift/DateType.ts +6 -0
- package/src/parquetjs/parquet-thrift/DecimalType.ts +6 -0
- package/src/parquetjs/parquet-thrift/DictionaryPageHeader.ts +6 -0
- package/src/parquetjs/parquet-thrift/Encoding.ts +6 -0
- package/src/parquetjs/parquet-thrift/EnumType.ts +6 -0
- package/src/parquetjs/parquet-thrift/FieldRepetitionType.ts +6 -0
- package/src/parquetjs/parquet-thrift/FileMetaData.ts +6 -0
- package/src/parquetjs/parquet-thrift/IndexPageHeader.ts +6 -0
- package/src/parquetjs/parquet-thrift/IntType.ts +6 -0
- package/src/parquetjs/parquet-thrift/JsonType.ts +6 -0
- package/src/parquetjs/parquet-thrift/KeyValue.ts +6 -0
- package/src/parquetjs/parquet-thrift/ListType.ts +6 -0
- package/src/parquetjs/parquet-thrift/LogicalType.ts +6 -0
- package/src/parquetjs/parquet-thrift/MapType.ts +6 -0
- package/src/parquetjs/parquet-thrift/MicroSeconds.ts +6 -0
- package/src/parquetjs/parquet-thrift/MilliSeconds.ts +6 -0
- package/src/parquetjs/parquet-thrift/NullType.ts +6 -0
- package/src/parquetjs/parquet-thrift/OffsetIndex.ts +6 -0
- package/src/parquetjs/parquet-thrift/PageEncodingStats.ts +6 -0
- package/src/parquetjs/parquet-thrift/PageHeader.ts +6 -0
- package/src/parquetjs/parquet-thrift/PageLocation.ts +6 -0
- package/src/parquetjs/parquet-thrift/PageType.ts +6 -0
- package/src/parquetjs/parquet-thrift/RowGroup.ts +6 -0
- package/src/parquetjs/parquet-thrift/SchemaElement.ts +6 -0
- package/src/parquetjs/parquet-thrift/SortingColumn.ts +6 -0
- package/src/parquetjs/parquet-thrift/Statistics.ts +6 -0
- package/src/parquetjs/parquet-thrift/StringType.ts +6 -0
- package/src/parquetjs/parquet-thrift/TimeType.ts +6 -0
- package/src/parquetjs/parquet-thrift/TimeUnit.ts +6 -0
- package/src/parquetjs/parquet-thrift/TimestampType.ts +6 -0
- package/src/parquetjs/parquet-thrift/Type.ts +6 -0
- package/src/parquetjs/parquet-thrift/TypeDefinedOrder.ts +6 -0
- package/src/parquetjs/parquet-thrift/UUIDType.ts +6 -0
- package/src/parquetjs/parquet-thrift/index.ts +6 -0
- package/src/parquetjs/parser/decoders.ts +6 -1
- package/src/parquetjs/parser/parquet-reader.ts +6 -1
- package/src/parquetjs/schema/declare.ts +6 -1
- package/src/parquetjs/schema/schema.ts +5 -1
- package/src/parquetjs/schema/shred.ts +5 -1
- package/src/parquetjs/schema/types.ts +6 -1
- package/src/parquetjs/utils/file-utils.ts +6 -1
- package/src/parquetjs/utils/read-utils.ts +6 -0
- package/src/polyfills/buffer/buffer.ts +0 -1
- package/src/polyfills/buffer/index.ts +1 -0
- package/src/polyfills/buffer/install-buffer-polyfill.ts +4 -0
- package/src/workers/{parquet-worker.ts → parquet-json-worker.ts} +2 -2
- package/dist/lib/encoders/encode-parquet-wasm.d.ts +0 -7
- package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/parsers/parse-geoparquet.d.ts +0 -6
- package/dist/lib/parsers/parse-geoparquet.d.ts.map +0 -1
- package/dist/lib/parsers/parse-geoparquet.js +0 -58
- package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/parsers/parse-parquet.d.ts.map +0 -1
- package/dist/parquet-loader.d.ts.map +0 -1
- package/dist/parquet-wasm-loader.d.ts.map +0 -1
- package/dist/parquet-wasm-writer.d.ts +0 -9
- package/dist/parquet-wasm-writer.d.ts.map +0 -1
- package/dist/parquet-writer.d.ts +0 -17
- package/dist/parquet-writer.d.ts.map +0 -1
- package/dist/workers/parquet-worker.d.ts +0 -2
- package/dist/workers/parquet-worker.d.ts.map +0 -1
- package/src/lib/parsers/parse-geoparquet.ts +0 -88
- package/src/parquet-wasm-writer.ts +0 -35
|
@@ -7,15 +7,14 @@ import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-u
|
|
|
7
7
|
import {ReadableFile, BlobFile} from '@loaders.gl/loader-utils';
|
|
8
8
|
|
|
9
9
|
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
} from './lib/parsers/parse-parquet-
|
|
10
|
+
parseParquetFileToArrow,
|
|
11
|
+
parseParquetFileToArrowInBatches
|
|
12
|
+
} from './lib/parsers/parse-parquet-to-arrow';
|
|
13
13
|
import {VERSION, PARQUET_WASM_URL} from './lib/constants';
|
|
14
14
|
|
|
15
15
|
/** Parquet WASM loader options */
|
|
16
|
-
export type
|
|
16
|
+
export type ParquetArrowLoaderOptions = LoaderOptions & {
|
|
17
17
|
parquet?: {
|
|
18
|
-
shape?: 'arrow-table';
|
|
19
18
|
limit?: number; // Provide a limit to the number of rows to be read.
|
|
20
19
|
offset?: number; // Provide an offset to skip over the given number of rows.
|
|
21
20
|
batchSize?: number; // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
|
|
@@ -27,7 +26,7 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
|
27
26
|
};
|
|
28
27
|
|
|
29
28
|
/** Parquet WASM table loader */
|
|
30
|
-
export const
|
|
29
|
+
export const ParquetArrowWorkerLoader = {
|
|
31
30
|
dataType: null as unknown as ArrowTable,
|
|
32
31
|
batchType: null as unknown as ArrowTableBatch,
|
|
33
32
|
|
|
@@ -43,7 +42,6 @@ export const ParquetWasmWorkerLoader = {
|
|
|
43
42
|
tests: ['PAR1', 'PARE'],
|
|
44
43
|
options: {
|
|
45
44
|
parquet: {
|
|
46
|
-
shape: 'arrow-table',
|
|
47
45
|
limit: undefined, // Provide a limit to the number of rows to be read.
|
|
48
46
|
offset: 0, // Provide an offset to skip over the given number of rows.
|
|
49
47
|
batchSize: undefined, // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
|
|
@@ -53,24 +51,24 @@ export const ParquetWasmWorkerLoader = {
|
|
|
53
51
|
wasmUrl: PARQUET_WASM_URL
|
|
54
52
|
}
|
|
55
53
|
}
|
|
56
|
-
} as const satisfies Loader<ArrowTable, ArrowTableBatch,
|
|
54
|
+
} as const satisfies Loader<ArrowTable, ArrowTableBatch, ParquetArrowLoaderOptions>;
|
|
57
55
|
|
|
58
56
|
/** Parquet WASM table loader */
|
|
59
|
-
export const
|
|
60
|
-
...
|
|
57
|
+
export const ParquetArrowLoader = {
|
|
58
|
+
...ParquetArrowWorkerLoader,
|
|
61
59
|
|
|
62
|
-
parse(arrayBuffer: ArrayBuffer, options?:
|
|
63
|
-
const wasmOptions = {...
|
|
64
|
-
return
|
|
60
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetArrowLoaderOptions) {
|
|
61
|
+
const wasmOptions = {...ParquetArrowLoader.options.parquet, ...options?.parquet};
|
|
62
|
+
return parseParquetFileToArrow(new BlobFile(arrayBuffer), wasmOptions);
|
|
65
63
|
},
|
|
66
64
|
|
|
67
|
-
parseFile(file: ReadableFile, options?:
|
|
68
|
-
const wasmOptions = {...
|
|
69
|
-
return
|
|
65
|
+
parseFile(file: ReadableFile, options?: ParquetArrowLoaderOptions) {
|
|
66
|
+
const wasmOptions = {...ParquetArrowLoader.options.parquet, ...options?.parquet};
|
|
67
|
+
return parseParquetFileToArrow(file, wasmOptions);
|
|
70
68
|
},
|
|
71
69
|
|
|
72
|
-
parseFileInBatches(file: ReadableFile, options?:
|
|
73
|
-
const wasmOptions = {...
|
|
74
|
-
return
|
|
70
|
+
parseFileInBatches(file: ReadableFile, options?: ParquetArrowLoaderOptions) {
|
|
71
|
+
const wasmOptions = {...ParquetArrowLoader.options.parquet, ...options?.parquet};
|
|
72
|
+
return parseParquetFileToArrowInBatches(file, wasmOptions);
|
|
75
73
|
}
|
|
76
|
-
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch,
|
|
74
|
+
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, ParquetArrowLoaderOptions>;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils';
|
|
6
|
+
import type {ArrowTable} from '@loaders.gl/schema';
|
|
7
|
+
import {encodeArrowToParquet} from './lib/encoders/encode-arrow-to-parquet';
|
|
8
|
+
import {ParquetFormat} from './parquet-format';
|
|
9
|
+
|
|
10
|
+
import {VERSION, PARQUET_WASM_URL} from './lib/constants';
|
|
11
|
+
|
|
12
|
+
export type ParquetArrowWriterOptions = WriterOptions & {
|
|
13
|
+
parquet?: {
|
|
14
|
+
wasmUrl?: string;
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
/** Parquet WASM writer */
|
|
19
|
+
export const ParquetArrowWriter = {
|
|
20
|
+
...ParquetFormat,
|
|
21
|
+
id: 'parquet-wasm',
|
|
22
|
+
module: 'parquet',
|
|
23
|
+
version: VERSION,
|
|
24
|
+
options: {
|
|
25
|
+
parquet: {
|
|
26
|
+
wasmUrl: PARQUET_WASM_URL
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
encode(arrowTable: ArrowTable, options?: ParquetArrowWriterOptions) {
|
|
30
|
+
options = {parquet: {...ParquetArrowWriter.options.parquet, ...options?.parquet}, ...options};
|
|
31
|
+
return encodeArrowToParquet(arrowTable, options);
|
|
32
|
+
}
|
|
33
|
+
} as const satisfies WriterWithEncoder<ArrowTable, never, ParquetArrowWriterOptions>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {Format} from '@loaders.gl/loader-utils';
|
|
6
|
+
|
|
7
|
+
export const ParquetFormat = {
|
|
8
|
+
name: 'Apache Parquet',
|
|
9
|
+
category: 'table',
|
|
10
|
+
extensions: ['parquet'],
|
|
11
|
+
mimeTypes: ['application/octet-stream'],
|
|
12
|
+
binary: true,
|
|
13
|
+
tests: ['PAR1', 'PARE']
|
|
14
|
+
} as const satisfies Partial<Format>;
|
|
@@ -7,18 +7,22 @@ import type {
|
|
|
7
7
|
ObjectRowTable,
|
|
8
8
|
ObjectRowTableBatch,
|
|
9
9
|
GeoJSONTable,
|
|
10
|
-
GeoJSONTableBatch
|
|
11
|
-
ColumnarTable,
|
|
12
|
-
ColumnarTableBatch
|
|
10
|
+
GeoJSONTableBatch
|
|
11
|
+
// ColumnarTable,
|
|
12
|
+
// ColumnarTableBatch
|
|
13
13
|
} from '@loaders.gl/schema';
|
|
14
14
|
import {BlobFile} from '@loaders.gl/loader-utils';
|
|
15
15
|
|
|
16
|
-
import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet';
|
|
17
|
-
import {parseGeoParquetFile, parseGeoParquetFileInBatches} from './lib/parsers/parse-geoparquet';
|
|
16
|
+
import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-json';
|
|
18
17
|
import {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
} from './lib/parsers/parse-
|
|
18
|
+
parseGeoParquetFile,
|
|
19
|
+
parseGeoParquetFileInBatches
|
|
20
|
+
} from './lib/parsers/parse-geoparquet-to-geojson';
|
|
21
|
+
// import {
|
|
22
|
+
// parseParquetFileInColumns,
|
|
23
|
+
// parseParquetFileInColumnarBatches
|
|
24
|
+
// } from './lib/parsers/parse-parquet-to-columns';
|
|
25
|
+
import {ParquetFormat} from './parquet-format';
|
|
22
26
|
|
|
23
27
|
// Note: The Buffer polyfill is quite fragile
|
|
24
28
|
// For some reason, just exporting directly fails with some bundlers
|
|
@@ -31,11 +35,9 @@ export {Buffer};
|
|
|
31
35
|
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
32
36
|
|
|
33
37
|
/** Options for the parquet loader */
|
|
34
|
-
export type
|
|
38
|
+
export type ParquetJSONLoaderOptions = LoaderOptions & {
|
|
35
39
|
/** Options for the parquet loader */
|
|
36
40
|
parquet?: {
|
|
37
|
-
/** Format of returned parsed data */
|
|
38
|
-
shape?: 'object-row-table' | 'geojson-table';
|
|
39
41
|
/** Restrict which columns that are parsed from the table. Can save significant memory. */
|
|
40
42
|
columnList?: string[] | string[][];
|
|
41
43
|
/** If true, binary values are not converted to strings */
|
|
@@ -50,83 +52,74 @@ export type ParquetLoaderOptions = LoaderOptions & {
|
|
|
50
52
|
/**
|
|
51
53
|
* ParquetJS table loader
|
|
52
54
|
*/
|
|
53
|
-
export const
|
|
55
|
+
export const ParquetJSONWorkerLoader = {
|
|
56
|
+
...ParquetFormat,
|
|
57
|
+
|
|
54
58
|
dataType: null as unknown as ObjectRowTable,
|
|
55
59
|
batchType: null as unknown as ObjectRowTableBatch,
|
|
56
60
|
|
|
57
|
-
name: 'Apache Parquet',
|
|
58
61
|
id: 'parquet',
|
|
59
62
|
module: 'parquet',
|
|
60
63
|
version: VERSION,
|
|
61
64
|
worker: false,
|
|
62
|
-
category: 'table',
|
|
63
|
-
extensions: ['parquet'],
|
|
64
|
-
mimeTypes: ['application/octet-stream'],
|
|
65
|
-
binary: true,
|
|
66
|
-
tests: ['PAR1', 'PARE'],
|
|
67
65
|
options: {
|
|
68
66
|
parquet: {
|
|
69
|
-
shape: 'object-row-table',
|
|
70
67
|
columnList: [],
|
|
71
68
|
geoparquet: true,
|
|
72
69
|
url: undefined,
|
|
73
70
|
preserveBinary: false
|
|
74
71
|
}
|
|
75
72
|
}
|
|
76
|
-
} as const satisfies Loader<ObjectRowTable, ObjectRowTableBatch,
|
|
73
|
+
} as const satisfies Loader<ObjectRowTable, ObjectRowTableBatch, ParquetJSONLoaderOptions>;
|
|
77
74
|
|
|
78
75
|
/** ParquetJS table loader */
|
|
79
|
-
export const
|
|
80
|
-
...
|
|
76
|
+
export const ParquetJSONLoader = {
|
|
77
|
+
...ParquetJSONWorkerLoader,
|
|
81
78
|
|
|
82
|
-
dataType: null as unknown as ObjectRowTable
|
|
83
|
-
batchType: null as unknown as ObjectRowTableBatch
|
|
79
|
+
dataType: null as unknown as ObjectRowTable,
|
|
80
|
+
batchType: null as unknown as ObjectRowTableBatch,
|
|
84
81
|
|
|
85
|
-
parse: (arrayBuffer: ArrayBuffer, options?:
|
|
82
|
+
parse: (arrayBuffer: ArrayBuffer, options?: ParquetJSONLoaderOptions) =>
|
|
86
83
|
parseParquetFile(new BlobFile(arrayBuffer), options),
|
|
87
84
|
|
|
88
85
|
parseFile: parseParquetFile,
|
|
89
86
|
parseFileInBatches: parseParquetFileInBatches
|
|
90
87
|
} as const satisfies LoaderWithParser<
|
|
91
|
-
ObjectRowTable
|
|
92
|
-
ObjectRowTableBatch
|
|
93
|
-
|
|
88
|
+
ObjectRowTable,
|
|
89
|
+
ObjectRowTableBatch,
|
|
90
|
+
ParquetJSONLoaderOptions
|
|
94
91
|
>;
|
|
95
92
|
|
|
96
93
|
// Defeat tree shaking
|
|
97
94
|
// @ts-ignore
|
|
98
|
-
|
|
95
|
+
ParquetJSONLoader.Buffer = Buffer;
|
|
99
96
|
|
|
100
97
|
export const GeoParquetWorkerLoader = {
|
|
98
|
+
...ParquetFormat,
|
|
99
|
+
|
|
101
100
|
dataType: null as unknown as GeoJSONTable,
|
|
102
101
|
batchType: null as unknown as GeoJSONTableBatch,
|
|
103
102
|
|
|
104
|
-
name: 'Apache Parquet',
|
|
105
103
|
id: 'parquet',
|
|
106
104
|
module: 'parquet',
|
|
107
105
|
version: VERSION,
|
|
108
106
|
worker: true,
|
|
109
|
-
|
|
110
|
-
extensions: ['parquet'],
|
|
111
|
-
mimeTypes: ['application/octet-stream'],
|
|
112
|
-
binary: true,
|
|
113
|
-
tests: ['PAR1', 'PARE'],
|
|
107
|
+
|
|
114
108
|
options: {
|
|
115
109
|
parquet: {
|
|
116
|
-
shape: 'geojson-table',
|
|
117
110
|
columnList: [],
|
|
118
111
|
geoparquet: true,
|
|
119
112
|
url: undefined,
|
|
120
113
|
preserveBinary: false
|
|
121
114
|
}
|
|
122
115
|
}
|
|
123
|
-
} as const satisfies Loader<GeoJSONTable, GeoJSONTableBatch,
|
|
116
|
+
} as const satisfies Loader<GeoJSONTable, GeoJSONTableBatch, ParquetJSONLoaderOptions>;
|
|
124
117
|
|
|
125
118
|
/** ParquetJS table loader */
|
|
126
119
|
export const GeoParquetLoader = {
|
|
127
120
|
...GeoParquetWorkerLoader,
|
|
128
121
|
|
|
129
|
-
parse(arrayBuffer: ArrayBuffer, options?:
|
|
122
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetJSONLoaderOptions) {
|
|
130
123
|
return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
|
|
131
124
|
},
|
|
132
125
|
parseFile: parseGeoParquetFile,
|
|
@@ -134,11 +127,11 @@ export const GeoParquetLoader = {
|
|
|
134
127
|
} as const satisfies LoaderWithParser<
|
|
135
128
|
ObjectRowTable | GeoJSONTable,
|
|
136
129
|
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
137
|
-
|
|
130
|
+
ParquetJSONLoaderOptions
|
|
138
131
|
>;
|
|
139
132
|
|
|
140
|
-
/** @deprecated Test to see if we can improve perf of parquetjs loader
|
|
141
|
-
export const
|
|
133
|
+
/** @deprecated Test to see if we can improve perf of parquetjs loader *
|
|
134
|
+
export const ParquetJSONColumnarWorkerLoader = {
|
|
142
135
|
dataType: null as any as ColumnarTable,
|
|
143
136
|
batchType: null as any as ColumnarTableBatch,
|
|
144
137
|
|
|
@@ -152,15 +145,16 @@ export const ParquetColumnarWorkerLoader = {
|
|
|
152
145
|
mimeTypes: ['application/octet-stream'],
|
|
153
146
|
binary: true,
|
|
154
147
|
tests: ['PAR1', 'PARE'],
|
|
155
|
-
options:
|
|
156
|
-
} as const satisfies Loader<ColumnarTable, ColumnarTableBatch,
|
|
148
|
+
options: ParquetJSONLoader.options
|
|
149
|
+
} as const satisfies Loader<ColumnarTable, ColumnarTableBatch, ParquetJSONLoaderOptions>;
|
|
157
150
|
|
|
158
|
-
/** @deprecated Test to see if we can improve perf of parquetjs loader
|
|
159
|
-
export const
|
|
160
|
-
...
|
|
161
|
-
parse(arrayBuffer: ArrayBuffer, options?:
|
|
151
|
+
/** @deprecated Test to see if we can improve perf of parquetjs loader *
|
|
152
|
+
export const ParquetJSONColumnarLoader = {
|
|
153
|
+
...ParquetJSONColumnarWorkerLoader,
|
|
154
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetJSONLoaderOptions) {
|
|
162
155
|
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
|
|
163
156
|
},
|
|
164
157
|
parseFile: parseParquetFileInColumns,
|
|
165
158
|
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
166
|
-
} as const satisfies LoaderWithParser<ColumnarTable, ColumnarTableBatch,
|
|
159
|
+
} as const satisfies LoaderWithParser<ColumnarTable, ColumnarTableBatch, ParquetJSONLoaderOptions>;
|
|
160
|
+
*/
|
|
@@ -4,26 +4,24 @@
|
|
|
4
4
|
|
|
5
5
|
import type {WriterWithEncoder} from '@loaders.gl/loader-utils';
|
|
6
6
|
import {Table, TableBatch} from '@loaders.gl/schema';
|
|
7
|
+
import {ParquetFormat} from './parquet-format';
|
|
7
8
|
|
|
8
9
|
// __VERSION__ is injected by babel-plugin-version-inline
|
|
9
10
|
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
10
11
|
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
11
12
|
|
|
12
|
-
export type
|
|
13
|
+
export type ParquetJSONWriterOptions = {};
|
|
13
14
|
|
|
14
|
-
export const
|
|
15
|
-
|
|
15
|
+
export const ParquetJSONWriter = {
|
|
16
|
+
...ParquetFormat,
|
|
16
17
|
id: 'parquet',
|
|
17
18
|
module: 'parquet',
|
|
18
19
|
version: VERSION,
|
|
19
|
-
extensions: ['parquet'],
|
|
20
|
-
mimeTypes: ['application/octet-stream'],
|
|
21
|
-
binary: true,
|
|
22
20
|
options: {},
|
|
23
21
|
encode: async (data, options) => encodeSync(data, options),
|
|
24
22
|
encodeSync
|
|
25
|
-
} as const satisfies WriterWithEncoder<Table, TableBatch,
|
|
23
|
+
} as const satisfies WriterWithEncoder<Table, TableBatch, ParquetJSONWriterOptions>;
|
|
26
24
|
|
|
27
|
-
function encodeSync(data, options?:
|
|
25
|
+
function encodeSync(data, options?: ParquetJSONWriterOptions) {
|
|
28
26
|
return new ArrayBuffer(0);
|
|
29
27
|
}
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
6
|
+
|
|
2
7
|
import {PrimitiveType} from '../schema/declare';
|
|
3
8
|
|
|
4
9
|
export interface CursorBuffer {
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
6
|
+
|
|
1
7
|
import {decodeValues as decodeRleValues} from './rle';
|
|
2
8
|
|
|
3
9
|
export function decodeValues(type, cursor, count, opts) {
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
6
|
+
|
|
2
7
|
import type {ParquetCodec} from '../schema/declare';
|
|
3
8
|
import type {ParquetCodecKit} from './declare';
|
|
4
9
|
import * as PLAIN from './plain';
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
6
|
+
|
|
2
7
|
/* eslint-disable camelcase */
|
|
3
8
|
import type {PrimitiveType} from '../schema/declare';
|
|
4
9
|
import type {CursorBuffer, ParquetCodecOptions} from './declare';
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
2
6
|
|
|
3
7
|
import type {PrimitiveType} from '../schema/declare';
|
|
4
8
|
import type {CursorBuffer, ParquetCodecOptions} from './declare';
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
-
//
|
|
2
|
-
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
3
6
|
// Forked from https://github.com/ironSource/parquetjs under MIT license
|
|
4
7
|
|
|
5
8
|
import {
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
6
|
+
|
|
2
7
|
/* eslint-disable camelcase */
|
|
3
8
|
import {stream} from '@loaders.gl/loader-utils';
|
|
4
9
|
import {ParquetCodecOptions, PARQUET_CODECS} from '../codecs/index';
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2017 ironSource Ltd.
|
|
5
|
+
// Forked from https://github.com/kbajalc/parquets under MIT license
|
|
2
6
|
|
|
3
7
|
declare module 'int53' {
|
|
4
8
|
declare function readInt64BE(buffer: Buffer, offset?: number): number;
|