@loaders.gl/parquet 4.0.0-alpha.5 → 4.0.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +2 -2
- package/dist/constants.js +18 -6
- package/dist/dist.min.js +27 -25
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/bundle.js +6 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/constants.js +17 -0
- package/dist/es5/constants.js.map +1 -0
- package/dist/es5/index.js +128 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +114 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +47 -0
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +81 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
- package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +172 -0
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/es5/lib/wasm/encode-parquet-wasm.js +43 -0
- package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/index.js +13 -0
- package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +42 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +31 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +60 -0
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -0
- package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/es5/parquet-loader.js +44 -0
- package/dist/es5/parquet-loader.js.map +1 -0
- package/dist/es5/parquet-wasm-loader.js +30 -0
- package/dist/es5/parquet-wasm-loader.js.map +1 -0
- package/dist/es5/parquet-wasm-writer.js +26 -0
- package/dist/es5/parquet-wasm-writer.js.map +1 -0
- package/dist/es5/parquet-writer.js +24 -0
- package/dist/es5/parquet-writer.js.map +1 -0
- package/dist/es5/parquetjs/codecs/declare.js +2 -0
- package/dist/es5/parquetjs/codecs/declare.js.map +1 -0
- package/dist/es5/parquetjs/codecs/dictionary.js +23 -0
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/es5/parquetjs/codecs/index.js +47 -0
- package/dist/es5/parquetjs/codecs/index.js.map +1 -0
- package/dist/es5/parquetjs/codecs/plain.js +208 -0
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -0
- package/dist/es5/parquetjs/codecs/rle.js +132 -0
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -0
- package/dist/es5/parquetjs/compression.js +137 -0
- package/dist/es5/parquetjs/compression.js.map +1 -0
- package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +14 -0
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +193 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +198 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +367 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +99 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +19 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +33 -0
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +152 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +207 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +96 -0
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -0
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +19 -0
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -0
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +239 -0
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +96 -0
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +94 -0
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +423 -0
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +89 -0
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +115 -0
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +204 -0
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +124 -0
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +15 -0
- package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +165 -0
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +231 -0
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +115 -0
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +165 -0
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +97 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +126 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +97 -0
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/Type.js +19 -0
- package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/index.js +479 -0
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/es5/parquetjs/parser/decoders.js +393 -0
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +610 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/es5/parquetjs/schema/declare.js +21 -0
- package/dist/es5/parquetjs/schema/declare.js.map +1 -0
- package/dist/es5/parquetjs/schema/schema.js +165 -0
- package/dist/es5/parquetjs/schema/schema.js.map +1 -0
- package/dist/es5/parquetjs/schema/shred.js +282 -0
- package/dist/es5/parquetjs/schema/shred.js.map +1 -0
- package/dist/es5/parquetjs/schema/types.js +406 -0
- package/dist/es5/parquetjs/schema/types.js.map +1 -0
- package/dist/es5/parquetjs/utils/file-utils.js +47 -0
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/es5/parquetjs/utils/read-utils.js +120 -0
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/es5/workers/parquet-worker.js +6 -0
- package/dist/es5/workers/parquet-worker.js.map +1 -0
- package/dist/esm/bundle.js +4 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/constants.js +6 -0
- package/dist/esm/constants.js.map +1 -0
- package/dist/esm/index.js +31 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js +95 -0
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +39 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +62 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
- package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +39 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +29 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/esm/lib/wasm/encode-parquet-wasm.js +15 -0
- package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/index.js +2 -0
- package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +11 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js +5 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +21 -0
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -0
- package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/esm/parquet-loader.js +36 -0
- package/dist/esm/parquet-loader.js.map +1 -0
- package/dist/esm/parquet-wasm-loader.js +22 -0
- package/dist/esm/parquet-wasm-loader.js.map +1 -0
- package/dist/esm/parquet-wasm-writer.js +19 -0
- package/dist/esm/parquet-wasm-writer.js.map +1 -0
- package/dist/esm/parquet-writer.js +17 -0
- package/dist/esm/parquet-writer.js.map +1 -0
- package/dist/esm/parquetjs/LICENSE +20 -0
- package/dist/esm/parquetjs/codecs/declare.js +2 -0
- package/dist/esm/parquetjs/codecs/declare.js.map +1 -0
- package/dist/esm/parquetjs/codecs/dictionary.js +13 -0
- package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/esm/parquetjs/codecs/index.js +23 -0
- package/dist/esm/parquetjs/codecs/index.js.map +1 -0
- package/dist/esm/parquetjs/codecs/plain.js +200 -0
- package/dist/esm/parquetjs/codecs/plain.js.map +1 -0
- package/dist/esm/parquetjs/codecs/rle.js +119 -0
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -0
- package/dist/esm/parquetjs/compression.js +61 -0
- package/dist/esm/parquetjs/compression.js.map +1 -0
- package/dist/{parquetjs/encoder/writer.js → esm/parquetjs/encoder/parquet-encoder.js} +8 -106
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/esm/parquetjs/modules.d.ts +21 -0
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +7 -0
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +173 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +176 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +347 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +77 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +12 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +26 -0
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +132 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +187 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DateType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +76 -0
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +93 -0
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js +12 -0
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -0
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +219 -0
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/IntType.js +76 -0
- package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +74 -0
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ListType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +377 -0
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/MapType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/NullType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +69 -0
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +95 -0
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +184 -0
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +104 -0
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageType.js +8 -0
- package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +145 -0
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +211 -0
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +95 -0
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js +145 -0
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/StringType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js +77 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +102 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +77 -0
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/Type.js +12 -0
- package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/index.js +44 -0
- package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/esm/parquetjs/parser/decoders.js +253 -0
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
- package/dist/{parquetjs/parser/parquet-envelope-reader.js → esm/parquetjs/parser/parquet-reader.js} +95 -74
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/esm/parquetjs/schema/declare.js +12 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -0
- package/dist/esm/parquetjs/schema/schema.js +140 -0
- package/dist/esm/parquetjs/schema/schema.js.map +1 -0
- package/dist/esm/parquetjs/schema/shred.js +228 -0
- package/dist/esm/parquetjs/schema/shred.js.map +1 -0
- package/dist/esm/parquetjs/schema/types.js +397 -0
- package/dist/esm/parquetjs/schema/types.js.map +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js +34 -0
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/esm/parquetjs/utils/read-utils.js +90 -0
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/esm/workers/parquet-worker.js +4 -0
- package/dist/esm/workers/parquet-worker.js.map +1 -0
- package/dist/index.d.ts +16 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +58 -15
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-from-parquet.js +86 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +71 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/lib/geo/decode-geo-metadata.js +77 -0
- package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
- package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
- package/dist/lib/geo/geoparquet-schema.js +69 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +46 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-rows.js +37 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts +21 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/wasm/encode-parquet-wasm.js +30 -0
- package/dist/lib/wasm/load-wasm/index.d.ts +2 -0
- package/dist/lib/wasm/load-wasm/index.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/index.js +5 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts +3 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.js +38 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts +3 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.js +31 -0
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/wasm/parse-parquet-wasm.js +27 -0
- package/dist/parquet-loader.d.ts +6 -15
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +38 -19
- package/dist/parquet-wasm-loader.d.ts +23 -0
- package/dist/parquet-wasm-loader.d.ts.map +1 -0
- package/dist/parquet-wasm-loader.js +27 -0
- package/dist/parquet-wasm-writer.d.ts +3 -0
- package/dist/parquet-wasm-writer.d.ts.map +1 -0
- package/dist/parquet-wasm-writer.js +23 -0
- package/dist/parquet-worker.js +27 -25
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.d.ts +3 -2
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +18 -14
- package/dist/parquetjs/codecs/declare.js +2 -2
- package/dist/parquetjs/codecs/dictionary.js +12 -10
- package/dist/parquetjs/codecs/index.js +54 -22
- package/dist/parquetjs/codecs/plain.js +173 -232
- package/dist/parquetjs/codecs/rle.js +134 -140
- package/dist/parquetjs/compression.d.ts +3 -0
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +169 -48
- package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +15 -23
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.js +484 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +14 -7
- package/dist/parquetjs/parquet-thrift/BsonType.js +60 -37
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +209 -215
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +210 -211
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +394 -421
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +102 -89
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +19 -12
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +33 -26
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +165 -161
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +228 -234
- package/dist/parquetjs/parquet-thrift/DateType.js +60 -37
- package/dist/parquetjs/parquet-thrift/DecimalType.js +104 -90
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +121 -112
- package/dist/parquetjs/parquet-thrift/Encoding.js +19 -12
- package/dist/parquetjs/parquet-thrift/EnumType.js +60 -37
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -7
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +253 -263
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +60 -37
- package/dist/parquetjs/parquet-thrift/IntType.js +104 -90
- package/dist/parquetjs/parquet-thrift/JsonType.js +60 -37
- package/dist/parquetjs/parquet-thrift/KeyValue.js +101 -88
- package/dist/parquetjs/parquet-thrift/ListType.js +60 -37
- package/dist/parquetjs/parquet-thrift/LogicalType.js +366 -449
- package/dist/parquetjs/parquet-thrift/MapType.js +60 -37
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +60 -37
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +60 -37
- package/dist/parquetjs/parquet-thrift/NullType.js +60 -37
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +96 -80
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +126 -114
- package/dist/parquetjs/parquet-thrift/PageHeader.js +218 -231
- package/dist/parquetjs/parquet-thrift/PageLocation.js +140 -123
- package/dist/parquetjs/parquet-thrift/PageType.js +15 -8
- package/dist/parquetjs/parquet-thrift/RowGroup.js +179 -171
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +241 -268
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +126 -114
- package/dist/parquetjs/parquet-thrift/Statistics.js +175 -178
- package/dist/parquetjs/parquet-thrift/StringType.js +60 -37
- package/dist/parquetjs/parquet-thrift/TimeType.js +105 -91
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +124 -119
- package/dist/parquetjs/parquet-thrift/TimestampType.js +105 -91
- package/dist/parquetjs/parquet-thrift/Type.js +19 -12
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +60 -37
- package/dist/parquetjs/parquet-thrift/UUIDType.js +60 -37
- package/dist/parquetjs/parquet-thrift/index.js +65 -44
- package/dist/parquetjs/parser/decoders.d.ts +2 -2
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.js +301 -283
- package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +193 -113
- package/dist/parquetjs/schema/declare.d.ts +26 -18
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +11 -12
- package/dist/parquetjs/schema/schema.d.ts +4 -4
- package/dist/parquetjs/schema/schema.d.ts.map +1 -1
- package/dist/parquetjs/schema/schema.js +148 -162
- package/dist/parquetjs/schema/shred.d.ts +33 -12
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +340 -147
- package/dist/parquetjs/schema/types.d.ts +2 -2
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/schema/types.js +355 -415
- package/dist/parquetjs/utils/file-utils.d.ts +5 -4
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +37 -28
- package/dist/parquetjs/utils/read-utils.js +99 -95
- package/dist/workers/parquet-worker.js +5 -4
- package/package.json +17 -12
- package/src/index.ts +58 -7
- package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
- package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
- package/src/lib/arrow/convert-schema-from-parquet.ts +104 -0
- package/src/lib/arrow/convert-schema-to-parquet.ts +90 -0
- package/src/lib/geo/decode-geo-metadata.ts +108 -0
- package/src/lib/geo/geoparquet-schema.ts +69 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +60 -0
- package/src/lib/parsers/parse-parquet-to-rows.ts +45 -0
- package/src/lib/wasm/encode-parquet-wasm.ts +40 -0
- package/src/lib/wasm/load-wasm/index.ts +1 -0
- package/src/lib/wasm/load-wasm/load-wasm-browser.ts +15 -0
- package/src/lib/wasm/load-wasm/load-wasm-node.ts +5 -0
- package/src/lib/wasm/parse-parquet-wasm.ts +42 -0
- package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/src/parquet-loader.ts +30 -3
- package/src/parquet-wasm-loader.ts +36 -0
- package/src/parquet-wasm-writer.ts +24 -0
- package/src/parquet-writer.ts +4 -1
- package/src/parquetjs/compression.ts +24 -7
- package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +33 -38
- package/src/parquetjs/parser/decoders.ts +3 -3
- package/src/parquetjs/parser/parquet-reader.ts +239 -122
- package/src/parquetjs/schema/declare.ts +22 -13
- package/src/parquetjs/schema/schema.ts +8 -8
- package/src/parquetjs/schema/shred.ts +239 -71
- package/src/parquetjs/schema/types.ts +25 -30
- package/src/parquetjs/utils/file-utils.ts +3 -4
- package/dist/bundle.js.map +0 -1
- package/dist/constants.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/convert-schema.d.ts +0 -8
- package/dist/lib/convert-schema.d.ts.map +0 -1
- package/dist/lib/convert-schema.js +0 -71
- package/dist/lib/convert-schema.js.map +0 -1
- package/dist/lib/parse-parquet.d.ts +0 -4
- package/dist/lib/parse-parquet.d.ts.map +0 -1
- package/dist/lib/parse-parquet.js +0 -28
- package/dist/lib/parse-parquet.js.map +0 -1
- package/dist/lib/read-array-buffer.d.ts +0 -19
- package/dist/lib/read-array-buffer.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.js +0 -9
- package/dist/lib/read-array-buffer.js.map +0 -1
- package/dist/parquet-loader.js.map +0 -1
- package/dist/parquet-writer.js.map +0 -1
- package/dist/parquetjs/codecs/declare.js.map +0 -1
- package/dist/parquetjs/codecs/dictionary.js.map +0 -1
- package/dist/parquetjs/codecs/index.js.map +0 -1
- package/dist/parquetjs/codecs/plain.js.map +0 -1
- package/dist/parquetjs/codecs/rle.js.map +0 -1
- package/dist/parquetjs/compression.js.map +0 -1
- package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
- package/dist/parquetjs/encoder/writer.js.map +0 -1
- package/dist/parquetjs/file.d.ts +0 -10
- package/dist/parquetjs/file.d.ts.map +0 -1
- package/dist/parquetjs/file.js +0 -80
- package/dist/parquetjs/file.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/BsonType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DateType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/Encoding.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/EnumType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/IntType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/JsonType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ListType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/MapType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/NullType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/Statistics.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/StringType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimeType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/Type.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/index.js.map +0 -1
- package/dist/parquetjs/parser/decoders.js.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
- package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.js +0 -90
- package/dist/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/parquetjs/parser/parquet-reader.js.map +0 -1
- package/dist/parquetjs/schema/declare.js.map +0 -1
- package/dist/parquetjs/schema/schema.js.map +0 -1
- package/dist/parquetjs/schema/shred.js.map +0 -1
- package/dist/parquetjs/schema/types.js.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
- package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.js +0 -12
- package/dist/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/parquetjs/utils/file-utils.js.map +0 -1
- package/dist/parquetjs/utils/read-utils.js.map +0 -1
- package/dist/workers/parquet-worker.js.map +0 -1
- package/src/lib/convert-schema.ts +0 -95
- package/src/lib/parse-parquet.ts +0 -27
- package/src/lib/read-array-buffer.ts +0 -31
- package/src/parquetjs/file.ts +0 -90
- package/src/parquetjs/parser/parquet-cursor.ts +0 -94
- package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
- package/src/parquetjs/utils/buffer-utils.ts +0 -18
- /package/dist/{parquetjs → es5/parquetjs}/LICENSE +0 -0
- /package/dist/{parquetjs → es5/parquetjs}/modules.d.ts +0 -0
|
@@ -1,163 +1,280 @@
|
|
|
1
1
|
// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
|
|
2
|
-
import {
|
|
3
|
-
|
|
2
|
+
import type {ReadableFile} from '@loaders.gl/loader-utils';
|
|
3
|
+
|
|
4
4
|
import {ParquetSchema} from '../schema/schema';
|
|
5
|
-
import {ParquetCursor} from './parquet-cursor';
|
|
6
|
-
import {PARQUET_VERSION} from '../../constants';
|
|
7
5
|
import {decodeSchema} from './decoders';
|
|
6
|
+
import {materializeRows} from '../schema/shred';
|
|
7
|
+
|
|
8
|
+
import {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';
|
|
9
|
+
import {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';
|
|
10
|
+
import {
|
|
11
|
+
ParquetRowGroup,
|
|
12
|
+
ParquetCompression,
|
|
13
|
+
ParquetColumnChunk,
|
|
14
|
+
PrimitiveType,
|
|
15
|
+
ParquetOptions
|
|
16
|
+
} from '../schema/declare';
|
|
17
|
+
import {decodeFileMetadata, getThriftEnum, fieldIndexOf} from '../utils/read-utils';
|
|
18
|
+
import {decodeDataPages, decodePage} from './decoders';
|
|
19
|
+
|
|
20
|
+
export type ParquetReaderProps = {
|
|
21
|
+
defaultDictionarySize?: number;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/** Properties for initializing a ParquetRowGroupReader */
|
|
25
|
+
export type ParquetIterationProps = {
|
|
26
|
+
/** Filter allowing some columns to be dropped */
|
|
27
|
+
columnList?: string[] | string[][];
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const DEFAULT_PROPS: Required<ParquetReaderProps> = {
|
|
31
|
+
defaultDictionarySize: 1e6
|
|
32
|
+
};
|
|
8
33
|
|
|
9
34
|
/**
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* avoid leaking file descriptors.
|
|
35
|
+
* The parquet envelope reader allows direct, unbuffered access to the individual
|
|
36
|
+
* sections of the parquet file, namely the header, footer and the row groups.
|
|
37
|
+
* This class is intended for advanced/internal users; if you just want to retrieve
|
|
38
|
+
* rows from a parquet file use the ParquetReader instead
|
|
15
39
|
*/
|
|
16
|
-
export class ParquetReader
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
};
|
|
25
|
-
const closeFn = async () => {};
|
|
26
|
-
const size = blob.size;
|
|
27
|
-
const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
|
|
28
|
-
try {
|
|
29
|
-
await envelopeReader.readHeader();
|
|
30
|
-
const metadata = await envelopeReader.readFooter();
|
|
31
|
-
return new ParquetReader(metadata, envelopeReader);
|
|
32
|
-
} catch (err) {
|
|
33
|
-
await envelopeReader.close();
|
|
34
|
-
throw err;
|
|
35
|
-
}
|
|
40
|
+
export class ParquetReader {
|
|
41
|
+
props: Required<ParquetReaderProps>;
|
|
42
|
+
file: ReadableFile;
|
|
43
|
+
metadata: Promise<FileMetaData> | null = null;
|
|
44
|
+
|
|
45
|
+
constructor(file: ReadableFile, props?: ParquetReaderProps) {
|
|
46
|
+
this.file = file;
|
|
47
|
+
this.props = {...DEFAULT_PROPS, ...props};
|
|
36
48
|
}
|
|
37
49
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
throw err;
|
|
50
|
+
close(): void {
|
|
51
|
+
// eslint-disable-next-line @typescript-eslint/no-floating-promises
|
|
52
|
+
this.file.close();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// HIGH LEVEL METHODS
|
|
56
|
+
|
|
57
|
+
/** Yield one row at a time */
|
|
58
|
+
async *rowIterator(props?: ParquetIterationProps) {
|
|
59
|
+
for await (const rows of this.rowBatchIterator(props)) {
|
|
60
|
+
// yield *rows
|
|
61
|
+
for (const row of rows) {
|
|
62
|
+
yield row;
|
|
63
|
+
}
|
|
53
64
|
}
|
|
54
65
|
}
|
|
55
66
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
return new ParquetReader<T>(metadata, envelopeReader);
|
|
62
|
-
} catch (err) {
|
|
63
|
-
await envelopeReader.close();
|
|
64
|
-
throw err;
|
|
67
|
+
/** Yield one batch of rows at a time */
|
|
68
|
+
async *rowBatchIterator(props?: ParquetIterationProps) {
|
|
69
|
+
const schema = await this.getSchema();
|
|
70
|
+
for await (const rowGroup of this.rowGroupIterator(props)) {
|
|
71
|
+
yield materializeRows(schema, rowGroup);
|
|
65
72
|
}
|
|
66
73
|
}
|
|
67
74
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
75
|
+
/** Iterate over the raw row groups */
|
|
76
|
+
async *rowGroupIterator(props?: ParquetIterationProps) {
|
|
77
|
+
// Ensure strings are nested in arrays
|
|
78
|
+
const columnList: string[][] = (props?.columnList || []).map((x) =>
|
|
79
|
+
Array.isArray(x) ? x : [x]
|
|
80
|
+
);
|
|
71
81
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
82
|
+
const metadata = await this.getFileMetadata();
|
|
83
|
+
const schema = await this.getSchema();
|
|
84
|
+
|
|
85
|
+
const rowGroupCount = metadata?.row_groups.length || 0;
|
|
86
|
+
|
|
87
|
+
for (let rowGroupIndex = 0; rowGroupIndex < rowGroupCount; rowGroupIndex++) {
|
|
88
|
+
const rowGroup = await this.readRowGroup(
|
|
89
|
+
schema,
|
|
90
|
+
metadata.row_groups[rowGroupIndex],
|
|
91
|
+
columnList
|
|
92
|
+
);
|
|
93
|
+
yield rowGroup;
|
|
81
94
|
}
|
|
95
|
+
}
|
|
82
96
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);
|
|
87
|
-
this.schema = new ParquetSchema(schema);
|
|
97
|
+
async getRowCount(): Promise<number> {
|
|
98
|
+
const metadata = await this.getFileMetadata();
|
|
99
|
+
return Number(metadata.num_rows);
|
|
88
100
|
}
|
|
89
101
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
// this.envelopeReader = null;
|
|
97
|
-
// this.metadata = null;
|
|
102
|
+
async getSchema(): Promise<ParquetSchema> {
|
|
103
|
+
const metadata = await this.getFileMetadata();
|
|
104
|
+
const root = metadata.schema[0];
|
|
105
|
+
const {schema: schemaDefinition} = decodeSchema(metadata.schema, 1, root.num_children!);
|
|
106
|
+
const schema = new ParquetSchema(schemaDefinition);
|
|
107
|
+
return schema;
|
|
98
108
|
}
|
|
99
109
|
|
|
100
110
|
/**
|
|
101
|
-
*
|
|
102
|
-
*
|
|
103
|
-
* the reader object.
|
|
104
|
-
*
|
|
105
|
-
* The required_columns parameter controls which columns are actually read
|
|
106
|
-
* from disk. An empty array or no value implies all columns. A list of column
|
|
107
|
-
* names means that only those columns should be loaded from disk.
|
|
111
|
+
* Returns the user (key/value) metadata for this file
|
|
112
|
+
* In parquet this is not stored on the schema like it is in arrow
|
|
108
113
|
*/
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
if (!columnList) {
|
|
115
|
-
// tslint:disable-next-line:no-parameter-reassignment
|
|
116
|
-
columnList = [];
|
|
114
|
+
async getSchemaMetadata(): Promise<Record<string, string>> {
|
|
115
|
+
const metadata = await this.getFileMetadata();
|
|
116
|
+
const md: Record<string, string> = {};
|
|
117
|
+
for (const kv of metadata.key_value_metadata!) {
|
|
118
|
+
md[kv.key] = kv.value!;
|
|
117
119
|
}
|
|
120
|
+
return md;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async getFileMetadata(): Promise<FileMetaData> {
|
|
124
|
+
if (!this.metadata) {
|
|
125
|
+
await this.readHeader();
|
|
126
|
+
this.metadata = this.readFooter();
|
|
127
|
+
}
|
|
128
|
+
return this.metadata;
|
|
129
|
+
}
|
|
118
130
|
|
|
119
|
-
|
|
120
|
-
columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));
|
|
131
|
+
// LOW LEVEL METHODS
|
|
121
132
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
133
|
+
/** Metadata is stored in the footer */
|
|
134
|
+
async readHeader(): Promise<void> {
|
|
135
|
+
const buffer = await this.file.read(0, PARQUET_MAGIC.length);
|
|
136
|
+
const magic = buffer.toString();
|
|
137
|
+
switch (magic) {
|
|
138
|
+
case PARQUET_MAGIC:
|
|
139
|
+
break;
|
|
140
|
+
case PARQUET_MAGIC_ENCRYPTED:
|
|
141
|
+
throw new Error('Encrypted parquet file not supported');
|
|
142
|
+
default:
|
|
143
|
+
throw new Error(`Invalid parquet file (magic=${magic})`);
|
|
144
|
+
}
|
|
128
145
|
}
|
|
129
146
|
|
|
130
|
-
/**
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
147
|
+
/** Metadata is stored in the footer */
|
|
148
|
+
async readFooter(): Promise<FileMetaData> {
|
|
149
|
+
const trailerLen = PARQUET_MAGIC.length + 4;
|
|
150
|
+
const trailerBuf = await this.file.read(this.file.size - trailerLen, trailerLen);
|
|
151
|
+
|
|
152
|
+
const magic = trailerBuf.slice(4).toString();
|
|
153
|
+
if (magic !== PARQUET_MAGIC) {
|
|
154
|
+
throw new Error(`Not a valid parquet file (magic="${magic})`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const metadataSize = trailerBuf.readUInt32LE(0);
|
|
158
|
+
const metadataOffset = this.file.size - metadataSize - trailerLen;
|
|
159
|
+
if (metadataOffset < PARQUET_MAGIC.length) {
|
|
160
|
+
throw new Error(`Invalid metadata size ${metadataOffset}`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const metadataBuf = await this.file.read(metadataOffset, metadataSize);
|
|
164
|
+
// let metadata = new parquet_thrift.FileMetaData();
|
|
165
|
+
// parquet_util.decodeThrift(metadata, metadataBuf);
|
|
166
|
+
const {metadata} = decodeFileMetadata(metadataBuf);
|
|
167
|
+
return metadata;
|
|
136
168
|
}
|
|
137
169
|
|
|
138
|
-
/**
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
170
|
+
/** Data is stored in row groups (similar to Apache Arrow record batches) */
|
|
171
|
+
async readRowGroup(
|
|
172
|
+
schema: ParquetSchema,
|
|
173
|
+
rowGroup: RowGroup,
|
|
174
|
+
columnList: string[][]
|
|
175
|
+
): Promise<ParquetRowGroup> {
|
|
176
|
+
const buffer: ParquetRowGroup = {
|
|
177
|
+
rowCount: Number(rowGroup.num_rows),
|
|
178
|
+
columnData: {}
|
|
179
|
+
};
|
|
180
|
+
for (const colChunk of rowGroup.columns) {
|
|
181
|
+
const colMetadata = colChunk.meta_data;
|
|
182
|
+
const colKey = colMetadata?.path_in_schema;
|
|
183
|
+
if (columnList.length > 0 && fieldIndexOf(columnList, colKey!) < 0) {
|
|
184
|
+
continue; // eslint-disable-line no-continue
|
|
185
|
+
}
|
|
186
|
+
buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);
|
|
187
|
+
}
|
|
188
|
+
return buffer;
|
|
143
189
|
}
|
|
144
190
|
|
|
145
191
|
/**
|
|
146
|
-
*
|
|
192
|
+
* Each row group contains column chunks for all the columns.
|
|
147
193
|
*/
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
md[kv.key] = kv.value!;
|
|
194
|
+
async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetColumnChunk> {
|
|
195
|
+
if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
|
|
196
|
+
throw new Error('external references are not supported');
|
|
152
197
|
}
|
|
153
|
-
|
|
198
|
+
|
|
199
|
+
const field = schema.findField(colChunk.meta_data?.path_in_schema!);
|
|
200
|
+
const type: PrimitiveType = getThriftEnum(Type, colChunk.meta_data?.type!) as any;
|
|
201
|
+
|
|
202
|
+
if (type !== field.primitiveType) {
|
|
203
|
+
throw new Error(`chunk type not matching schema: ${type}`);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const compression: ParquetCompression = getThriftEnum(
|
|
207
|
+
CompressionCodec,
|
|
208
|
+
colChunk.meta_data?.codec!
|
|
209
|
+
) as any;
|
|
210
|
+
|
|
211
|
+
const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);
|
|
212
|
+
let pagesSize = Number(colChunk.meta_data?.total_compressed_size!);
|
|
213
|
+
|
|
214
|
+
if (!colChunk.file_path) {
|
|
215
|
+
pagesSize = Math.min(
|
|
216
|
+
this.file.size - pagesOffset,
|
|
217
|
+
Number(colChunk.meta_data?.total_compressed_size)
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const options: ParquetOptions = {
|
|
222
|
+
type,
|
|
223
|
+
rLevelMax: field.rLevelMax,
|
|
224
|
+
dLevelMax: field.dLevelMax,
|
|
225
|
+
compression,
|
|
226
|
+
column: field,
|
|
227
|
+
numValues: colChunk.meta_data?.num_values,
|
|
228
|
+
dictionary: []
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
let dictionary;
|
|
232
|
+
|
|
233
|
+
const dictionaryPageOffset = colChunk?.meta_data?.dictionary_page_offset;
|
|
234
|
+
|
|
235
|
+
if (dictionaryPageOffset) {
|
|
236
|
+
const dictionaryOffset = Number(dictionaryPageOffset);
|
|
237
|
+
// Getting dictionary from column chunk to iterate all over indexes to get dataPage values.
|
|
238
|
+
dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
dictionary = options.dictionary?.length ? options.dictionary : dictionary;
|
|
242
|
+
const pagesBuf = await this.file.read(pagesOffset, pagesSize);
|
|
243
|
+
return await decodeDataPages(pagesBuf, {...options, dictionary});
|
|
154
244
|
}
|
|
155
245
|
|
|
156
246
|
/**
|
|
157
|
-
*
|
|
247
|
+
* Getting dictionary for allows to flatten values by indices.
|
|
248
|
+
* @param dictionaryPageOffset
|
|
249
|
+
* @param options
|
|
250
|
+
* @param pagesOffset
|
|
251
|
+
* @returns
|
|
158
252
|
*/
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
253
|
+
async getDictionary(
|
|
254
|
+
dictionaryPageOffset: number,
|
|
255
|
+
options: ParquetOptions,
|
|
256
|
+
pagesOffset: number
|
|
257
|
+
): Promise<string[]> {
|
|
258
|
+
if (dictionaryPageOffset === 0) {
|
|
259
|
+
// dictionarySize = Math.min(this.fileSize - pagesOffset, this.defaultDictionarySize);
|
|
260
|
+
// pagesBuf = await this.read(pagesOffset, dictionarySize);
|
|
261
|
+
|
|
262
|
+
// In this case we are working with parquet-mr files format. Problem is described below:
|
|
263
|
+
// https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding
|
|
264
|
+
// We need to get dictionary page from column chunk if it exists.
|
|
265
|
+
// Now if we use code commented above we don't get DICTIONARY_PAGE we get DATA_PAGE instead.
|
|
266
|
+
return [];
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const dictionarySize = Math.min(
|
|
270
|
+
this.file.size - dictionaryPageOffset,
|
|
271
|
+
this.props.defaultDictionarySize
|
|
272
|
+
);
|
|
273
|
+
const pagesBuf = await this.file.read(dictionaryPageOffset, dictionarySize);
|
|
274
|
+
|
|
275
|
+
const cursor = {buffer: pagesBuf, offset: 0, size: pagesBuf.length};
|
|
276
|
+
const decodedPage = await decodePage(cursor, options);
|
|
277
|
+
|
|
278
|
+
return decodedPage.dictionary!;
|
|
162
279
|
}
|
|
163
280
|
}
|
|
@@ -98,6 +98,7 @@ export interface ParquetField {
|
|
|
98
98
|
fields?: Record<string, ParquetField>;
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
+
/** @todo better name, this is an internal type? */
|
|
101
102
|
export interface ParquetOptions {
|
|
102
103
|
type: ParquetType;
|
|
103
104
|
rLevelMax: number;
|
|
@@ -108,32 +109,40 @@ export interface ParquetOptions {
|
|
|
108
109
|
dictionary?: ParquetDictionary;
|
|
109
110
|
}
|
|
110
111
|
|
|
111
|
-
export interface ParquetData {
|
|
112
|
-
dlevels: number[];
|
|
113
|
-
rlevels: number[];
|
|
114
|
-
values: any[];
|
|
115
|
-
count: number;
|
|
116
|
-
pageHeaders: PageHeader[];
|
|
117
|
-
}
|
|
118
|
-
|
|
119
112
|
export interface ParquetPageData {
|
|
120
113
|
dlevels: number[];
|
|
121
114
|
rlevels: number[];
|
|
122
|
-
|
|
115
|
+
/** Actual column chunks */
|
|
116
|
+
values: any[]; // ArrayLike<any>;
|
|
123
117
|
count: number;
|
|
124
118
|
dictionary?: ParquetDictionary;
|
|
119
|
+
/** The "raw" page header from the file */
|
|
125
120
|
pageHeader: PageHeader;
|
|
126
121
|
}
|
|
127
122
|
|
|
128
|
-
export interface
|
|
123
|
+
export interface ParquetRow {
|
|
129
124
|
[key: string]: any;
|
|
130
125
|
}
|
|
131
126
|
|
|
132
|
-
|
|
127
|
+
/** @
|
|
128
|
+
* Holds data for one row group (column chunks) */
|
|
129
|
+
export class ParquetRowGroup {
|
|
130
|
+
/** Number of rows in this page */
|
|
133
131
|
rowCount: number;
|
|
134
|
-
|
|
135
|
-
|
|
132
|
+
/** Map of Column chunks */
|
|
133
|
+
columnData: Record<string, ParquetColumnChunk>;
|
|
134
|
+
|
|
135
|
+
constructor(rowCount: number = 0, columnData: Record<string, ParquetColumnChunk> = {}) {
|
|
136
136
|
this.rowCount = rowCount;
|
|
137
137
|
this.columnData = columnData;
|
|
138
138
|
}
|
|
139
139
|
}
|
|
140
|
+
|
|
141
|
+
/** Holds the data for one column chunk */
|
|
142
|
+
export interface ParquetColumnChunk {
|
|
143
|
+
dlevels: number[];
|
|
144
|
+
rlevels: number[];
|
|
145
|
+
values: any[];
|
|
146
|
+
count: number;
|
|
147
|
+
pageHeaders: PageHeader[];
|
|
148
|
+
}
|
|
@@ -4,14 +4,14 @@ import {PARQUET_CODECS} from '../codecs';
|
|
|
4
4
|
import {PARQUET_COMPRESSION_METHODS} from '../compression';
|
|
5
5
|
import {
|
|
6
6
|
FieldDefinition,
|
|
7
|
-
|
|
7
|
+
ParquetRowGroup,
|
|
8
8
|
ParquetCompression,
|
|
9
9
|
ParquetField,
|
|
10
|
-
|
|
10
|
+
ParquetRow,
|
|
11
11
|
RepetitionType,
|
|
12
12
|
SchemaDefinition
|
|
13
13
|
} from './declare';
|
|
14
|
-
import {
|
|
14
|
+
import {materializeRows, shredBuffer, shredRecord} from './shred';
|
|
15
15
|
import {PARQUET_LOGICAL_TYPES} from './types';
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -70,12 +70,12 @@ export class ParquetSchema {
|
|
|
70
70
|
return branch;
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
shredRecord(
|
|
74
|
-
shredRecord(this,
|
|
73
|
+
shredRecord(row: ParquetRow, rowGroup: ParquetRowGroup): void {
|
|
74
|
+
shredRecord(this, row, rowGroup);
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
return
|
|
77
|
+
materializeRows(rowGroup: ParquetRowGroup): ParquetRow[] {
|
|
78
|
+
return materializeRows(this, rowGroup);
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
compress(type: ParquetCompression): this {
|
|
@@ -84,7 +84,7 @@ export class ParquetSchema {
|
|
|
84
84
|
return this;
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
-
|
|
87
|
+
rowGroup(): ParquetRowGroup {
|
|
88
88
|
return shredBuffer(this);
|
|
89
89
|
}
|
|
90
90
|
}
|