@loaders.gl/parquet 4.0.0-alpha.5 → 4.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +2 -2
- package/dist/constants.js +18 -6
- package/dist/dist.min.js +27 -25
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/bundle.js +6 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/constants.js +17 -0
- package/dist/es5/constants.js.map +1 -0
- package/dist/es5/index.js +128 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +114 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +47 -0
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +81 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
- package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +172 -0
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/es5/lib/wasm/encode-parquet-wasm.js +43 -0
- package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/index.js +13 -0
- package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +42 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +31 -0
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +60 -0
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -0
- package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/es5/parquet-loader.js +44 -0
- package/dist/es5/parquet-loader.js.map +1 -0
- package/dist/es5/parquet-wasm-loader.js +30 -0
- package/dist/es5/parquet-wasm-loader.js.map +1 -0
- package/dist/es5/parquet-wasm-writer.js +26 -0
- package/dist/es5/parquet-wasm-writer.js.map +1 -0
- package/dist/es5/parquet-writer.js +24 -0
- package/dist/es5/parquet-writer.js.map +1 -0
- package/dist/es5/parquetjs/codecs/declare.js +2 -0
- package/dist/es5/parquetjs/codecs/declare.js.map +1 -0
- package/dist/es5/parquetjs/codecs/dictionary.js +23 -0
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/es5/parquetjs/codecs/index.js +47 -0
- package/dist/es5/parquetjs/codecs/index.js.map +1 -0
- package/dist/es5/parquetjs/codecs/plain.js +208 -0
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -0
- package/dist/es5/parquetjs/codecs/rle.js +132 -0
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -0
- package/dist/es5/parquetjs/compression.js +137 -0
- package/dist/es5/parquetjs/compression.js.map +1 -0
- package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +14 -0
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +193 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +198 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +367 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +99 -0
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +19 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +33 -0
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +152 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +207 -0
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +96 -0
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -0
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +19 -0
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -0
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +239 -0
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +96 -0
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +94 -0
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +423 -0
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +89 -0
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +115 -0
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +204 -0
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +124 -0
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +15 -0
- package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +165 -0
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +231 -0
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +115 -0
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +165 -0
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +97 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +126 -0
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +97 -0
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/Type.js +19 -0
- package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +52 -0
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/index.js +479 -0
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/es5/parquetjs/parser/decoders.js +393 -0
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +610 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/es5/parquetjs/schema/declare.js +21 -0
- package/dist/es5/parquetjs/schema/declare.js.map +1 -0
- package/dist/es5/parquetjs/schema/schema.js +165 -0
- package/dist/es5/parquetjs/schema/schema.js.map +1 -0
- package/dist/es5/parquetjs/schema/shred.js +282 -0
- package/dist/es5/parquetjs/schema/shred.js.map +1 -0
- package/dist/es5/parquetjs/schema/types.js +406 -0
- package/dist/es5/parquetjs/schema/types.js.map +1 -0
- package/dist/es5/parquetjs/utils/file-utils.js +47 -0
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/es5/parquetjs/utils/read-utils.js +120 -0
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/es5/workers/parquet-worker.js +6 -0
- package/dist/es5/workers/parquet-worker.js.map +1 -0
- package/dist/esm/bundle.js +4 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/constants.js +6 -0
- package/dist/esm/constants.js.map +1 -0
- package/dist/esm/index.js +31 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js +95 -0
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +39 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +62 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
- package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +39 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +29 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/esm/lib/wasm/encode-parquet-wasm.js +15 -0
- package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/index.js +2 -0
- package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +11 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js +5 -0
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +21 -0
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -0
- package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/esm/parquet-loader.js +36 -0
- package/dist/esm/parquet-loader.js.map +1 -0
- package/dist/esm/parquet-wasm-loader.js +22 -0
- package/dist/esm/parquet-wasm-loader.js.map +1 -0
- package/dist/esm/parquet-wasm-writer.js +19 -0
- package/dist/esm/parquet-wasm-writer.js.map +1 -0
- package/dist/esm/parquet-writer.js +17 -0
- package/dist/esm/parquet-writer.js.map +1 -0
- package/dist/esm/parquetjs/LICENSE +20 -0
- package/dist/esm/parquetjs/codecs/declare.js +2 -0
- package/dist/esm/parquetjs/codecs/declare.js.map +1 -0
- package/dist/esm/parquetjs/codecs/dictionary.js +13 -0
- package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/esm/parquetjs/codecs/index.js +23 -0
- package/dist/esm/parquetjs/codecs/index.js.map +1 -0
- package/dist/esm/parquetjs/codecs/plain.js +200 -0
- package/dist/esm/parquetjs/codecs/plain.js.map +1 -0
- package/dist/esm/parquetjs/codecs/rle.js +119 -0
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -0
- package/dist/esm/parquetjs/compression.js +61 -0
- package/dist/esm/parquetjs/compression.js.map +1 -0
- package/dist/{parquetjs/encoder/writer.js → esm/parquetjs/encoder/parquet-encoder.js} +8 -106
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/esm/parquetjs/modules.d.ts +21 -0
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +7 -0
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +173 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +176 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +347 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +77 -0
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +12 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +26 -0
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +132 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +187 -0
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DateType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +76 -0
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +93 -0
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js +12 -0
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -0
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +219 -0
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/IntType.js +76 -0
- package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +74 -0
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/ListType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +377 -0
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/MapType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/NullType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +69 -0
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +95 -0
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +184 -0
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +104 -0
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/PageType.js +8 -0
- package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +145 -0
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +211 -0
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +95 -0
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js +145 -0
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/StringType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js +77 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +102 -0
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +77 -0
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/Type.js +12 -0
- package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +31 -0
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/index.js +44 -0
- package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/esm/parquetjs/parser/decoders.js +253 -0
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
- package/dist/{parquetjs/parser/parquet-envelope-reader.js → esm/parquetjs/parser/parquet-reader.js} +95 -74
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/esm/parquetjs/schema/declare.js +12 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -0
- package/dist/esm/parquetjs/schema/schema.js +140 -0
- package/dist/esm/parquetjs/schema/schema.js.map +1 -0
- package/dist/esm/parquetjs/schema/shred.js +228 -0
- package/dist/esm/parquetjs/schema/shred.js.map +1 -0
- package/dist/esm/parquetjs/schema/types.js +397 -0
- package/dist/esm/parquetjs/schema/types.js.map +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js +34 -0
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/esm/parquetjs/utils/read-utils.js +90 -0
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/esm/workers/parquet-worker.js +4 -0
- package/dist/esm/workers/parquet-worker.js.map +1 -0
- package/dist/index.d.ts +16 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +58 -15
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-from-parquet.js +86 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +71 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/lib/geo/decode-geo-metadata.js +77 -0
- package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
- package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
- package/dist/lib/geo/geoparquet-schema.js +69 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +46 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-rows.js +37 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts +21 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/wasm/encode-parquet-wasm.js +30 -0
- package/dist/lib/wasm/load-wasm/index.d.ts +2 -0
- package/dist/lib/wasm/load-wasm/index.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/index.js +5 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts +3 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/load-wasm-browser.js +38 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts +3 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
- package/dist/lib/wasm/load-wasm/load-wasm-node.js +31 -0
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/wasm/parse-parquet-wasm.js +27 -0
- package/dist/parquet-loader.d.ts +6 -15
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +38 -19
- package/dist/parquet-wasm-loader.d.ts +23 -0
- package/dist/parquet-wasm-loader.d.ts.map +1 -0
- package/dist/parquet-wasm-loader.js +27 -0
- package/dist/parquet-wasm-writer.d.ts +3 -0
- package/dist/parquet-wasm-writer.d.ts.map +1 -0
- package/dist/parquet-wasm-writer.js +23 -0
- package/dist/parquet-worker.js +27 -25
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.d.ts +3 -2
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +18 -14
- package/dist/parquetjs/codecs/declare.js +2 -2
- package/dist/parquetjs/codecs/dictionary.js +12 -10
- package/dist/parquetjs/codecs/index.js +54 -22
- package/dist/parquetjs/codecs/plain.js +173 -232
- package/dist/parquetjs/codecs/rle.js +134 -140
- package/dist/parquetjs/compression.d.ts +3 -0
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +169 -48
- package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +15 -23
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.js +484 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +14 -7
- package/dist/parquetjs/parquet-thrift/BsonType.js +60 -37
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +209 -215
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +210 -211
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +394 -421
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +102 -89
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +19 -12
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +33 -26
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +165 -161
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +228 -234
- package/dist/parquetjs/parquet-thrift/DateType.js +60 -37
- package/dist/parquetjs/parquet-thrift/DecimalType.js +104 -90
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +121 -112
- package/dist/parquetjs/parquet-thrift/Encoding.js +19 -12
- package/dist/parquetjs/parquet-thrift/EnumType.js +60 -37
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -7
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +253 -263
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +60 -37
- package/dist/parquetjs/parquet-thrift/IntType.js +104 -90
- package/dist/parquetjs/parquet-thrift/JsonType.js +60 -37
- package/dist/parquetjs/parquet-thrift/KeyValue.js +101 -88
- package/dist/parquetjs/parquet-thrift/ListType.js +60 -37
- package/dist/parquetjs/parquet-thrift/LogicalType.js +366 -449
- package/dist/parquetjs/parquet-thrift/MapType.js +60 -37
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +60 -37
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +60 -37
- package/dist/parquetjs/parquet-thrift/NullType.js +60 -37
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +96 -80
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +126 -114
- package/dist/parquetjs/parquet-thrift/PageHeader.js +218 -231
- package/dist/parquetjs/parquet-thrift/PageLocation.js +140 -123
- package/dist/parquetjs/parquet-thrift/PageType.js +15 -8
- package/dist/parquetjs/parquet-thrift/RowGroup.js +179 -171
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +241 -268
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +126 -114
- package/dist/parquetjs/parquet-thrift/Statistics.js +175 -178
- package/dist/parquetjs/parquet-thrift/StringType.js +60 -37
- package/dist/parquetjs/parquet-thrift/TimeType.js +105 -91
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +124 -119
- package/dist/parquetjs/parquet-thrift/TimestampType.js +105 -91
- package/dist/parquetjs/parquet-thrift/Type.js +19 -12
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +60 -37
- package/dist/parquetjs/parquet-thrift/UUIDType.js +60 -37
- package/dist/parquetjs/parquet-thrift/index.js +65 -44
- package/dist/parquetjs/parser/decoders.d.ts +2 -2
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.js +301 -283
- package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +193 -113
- package/dist/parquetjs/schema/declare.d.ts +26 -18
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +11 -12
- package/dist/parquetjs/schema/schema.d.ts +4 -4
- package/dist/parquetjs/schema/schema.d.ts.map +1 -1
- package/dist/parquetjs/schema/schema.js +148 -162
- package/dist/parquetjs/schema/shred.d.ts +33 -12
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +340 -147
- package/dist/parquetjs/schema/types.d.ts +2 -2
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/schema/types.js +355 -415
- package/dist/parquetjs/utils/file-utils.d.ts +5 -4
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +37 -28
- package/dist/parquetjs/utils/read-utils.js +99 -95
- package/dist/workers/parquet-worker.js +5 -4
- package/package.json +17 -12
- package/src/index.ts +58 -7
- package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
- package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
- package/src/lib/arrow/convert-schema-from-parquet.ts +104 -0
- package/src/lib/arrow/convert-schema-to-parquet.ts +90 -0
- package/src/lib/geo/decode-geo-metadata.ts +108 -0
- package/src/lib/geo/geoparquet-schema.ts +69 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +60 -0
- package/src/lib/parsers/parse-parquet-to-rows.ts +45 -0
- package/src/lib/wasm/encode-parquet-wasm.ts +40 -0
- package/src/lib/wasm/load-wasm/index.ts +1 -0
- package/src/lib/wasm/load-wasm/load-wasm-browser.ts +15 -0
- package/src/lib/wasm/load-wasm/load-wasm-node.ts +5 -0
- package/src/lib/wasm/parse-parquet-wasm.ts +42 -0
- package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/src/parquet-loader.ts +30 -3
- package/src/parquet-wasm-loader.ts +36 -0
- package/src/parquet-wasm-writer.ts +24 -0
- package/src/parquet-writer.ts +4 -1
- package/src/parquetjs/compression.ts +24 -7
- package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +33 -38
- package/src/parquetjs/parser/decoders.ts +3 -3
- package/src/parquetjs/parser/parquet-reader.ts +239 -122
- package/src/parquetjs/schema/declare.ts +22 -13
- package/src/parquetjs/schema/schema.ts +8 -8
- package/src/parquetjs/schema/shred.ts +239 -71
- package/src/parquetjs/schema/types.ts +25 -30
- package/src/parquetjs/utils/file-utils.ts +3 -4
- package/dist/bundle.js.map +0 -1
- package/dist/constants.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/convert-schema.d.ts +0 -8
- package/dist/lib/convert-schema.d.ts.map +0 -1
- package/dist/lib/convert-schema.js +0 -71
- package/dist/lib/convert-schema.js.map +0 -1
- package/dist/lib/parse-parquet.d.ts +0 -4
- package/dist/lib/parse-parquet.d.ts.map +0 -1
- package/dist/lib/parse-parquet.js +0 -28
- package/dist/lib/parse-parquet.js.map +0 -1
- package/dist/lib/read-array-buffer.d.ts +0 -19
- package/dist/lib/read-array-buffer.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.js +0 -9
- package/dist/lib/read-array-buffer.js.map +0 -1
- package/dist/parquet-loader.js.map +0 -1
- package/dist/parquet-writer.js.map +0 -1
- package/dist/parquetjs/codecs/declare.js.map +0 -1
- package/dist/parquetjs/codecs/dictionary.js.map +0 -1
- package/dist/parquetjs/codecs/index.js.map +0 -1
- package/dist/parquetjs/codecs/plain.js.map +0 -1
- package/dist/parquetjs/codecs/rle.js.map +0 -1
- package/dist/parquetjs/compression.js.map +0 -1
- package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
- package/dist/parquetjs/encoder/writer.js.map +0 -1
- package/dist/parquetjs/file.d.ts +0 -10
- package/dist/parquetjs/file.d.ts.map +0 -1
- package/dist/parquetjs/file.js +0 -80
- package/dist/parquetjs/file.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/BsonType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DateType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/Encoding.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/EnumType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/IntType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/JsonType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/ListType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/MapType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/NullType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/Statistics.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/StringType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimeType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/Type.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
- package/dist/parquetjs/parquet-thrift/index.js.map +0 -1
- package/dist/parquetjs/parser/decoders.js.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
- package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.js +0 -90
- package/dist/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/parquetjs/parser/parquet-reader.js.map +0 -1
- package/dist/parquetjs/schema/declare.js.map +0 -1
- package/dist/parquetjs/schema/schema.js.map +0 -1
- package/dist/parquetjs/schema/shred.js.map +0 -1
- package/dist/parquetjs/schema/types.js.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
- package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.js +0 -12
- package/dist/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/parquetjs/utils/file-utils.js.map +0 -1
- package/dist/parquetjs/utils/read-utils.js.map +0 -1
- package/dist/workers/parquet-worker.js.map +0 -1
- package/src/lib/convert-schema.ts +0 -95
- package/src/lib/parse-parquet.ts +0 -27
- package/src/lib/read-array-buffer.ts +0 -31
- package/src/parquetjs/file.ts +0 -90
- package/src/parquetjs/parser/parquet-cursor.ts +0 -94
- package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
- package/src/parquetjs/utils/buffer-utils.ts +0 -18
- /package/dist/{parquetjs → es5/parquetjs}/LICENSE +0 -0
- /package/dist/{parquetjs → es5/parquetjs}/modules.d.ts +0 -0
|
@@ -0,0 +1,976 @@
|
|
|
1
|
+
//! This module has a single entry point, [`parquet_to_arrow_schema`].
|
|
2
|
+
use parquet2::schema::{
|
|
3
|
+
types::{
|
|
4
|
+
FieldInfo, GroupConvertedType, GroupLogicalType, IntegerType, ParquetType, PhysicalType,
|
|
5
|
+
PrimitiveConvertedType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
|
|
6
|
+
},
|
|
7
|
+
Repetition,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
use crate::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
|
|
11
|
+
|
|
12
|
+
/// Converts [`ParquetType`]s to a [`Field`], ignoring parquet fields that do not contain
|
|
13
|
+
/// any physical column.
|
|
14
|
+
pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec<Field> {
|
|
15
|
+
fields.iter().filter_map(to_field).collect::<Vec<_>>()
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
fn from_int32(
|
|
19
|
+
logical_type: Option<PrimitiveLogicalType>,
|
|
20
|
+
converted_type: Option<PrimitiveConvertedType>,
|
|
21
|
+
) -> DataType {
|
|
22
|
+
use PrimitiveLogicalType::*;
|
|
23
|
+
match (logical_type, converted_type) {
|
|
24
|
+
// handle logical types first
|
|
25
|
+
(Some(Integer(t)), _) => match t {
|
|
26
|
+
IntegerType::Int8 => DataType::Int8,
|
|
27
|
+
IntegerType::Int16 => DataType::Int16,
|
|
28
|
+
IntegerType::Int32 => DataType::Int32,
|
|
29
|
+
IntegerType::UInt8 => DataType::UInt8,
|
|
30
|
+
IntegerType::UInt16 => DataType::UInt16,
|
|
31
|
+
IntegerType::UInt32 => DataType::UInt32,
|
|
32
|
+
// The above are the only possible annotations for parquet's int32. Anything else
|
|
33
|
+
// is a deviation to the parquet specification and we ignore
|
|
34
|
+
_ => DataType::Int32,
|
|
35
|
+
},
|
|
36
|
+
(Some(Decimal(precision, scale)), _) => DataType::Decimal(precision, scale),
|
|
37
|
+
(Some(Date), _) => DataType::Date32,
|
|
38
|
+
(Some(Time { unit, .. }), _) => match unit {
|
|
39
|
+
ParquetTimeUnit::Milliseconds => DataType::Time32(TimeUnit::Millisecond),
|
|
40
|
+
// MILLIS is the only possible annotation for parquet's int32. Anything else
|
|
41
|
+
// is a deviation to the parquet specification and we ignore
|
|
42
|
+
_ => DataType::Int32,
|
|
43
|
+
},
|
|
44
|
+
// handle converted types:
|
|
45
|
+
(_, Some(PrimitiveConvertedType::Uint8)) => DataType::UInt8,
|
|
46
|
+
(_, Some(PrimitiveConvertedType::Uint16)) => DataType::UInt16,
|
|
47
|
+
(_, Some(PrimitiveConvertedType::Uint32)) => DataType::UInt32,
|
|
48
|
+
(_, Some(PrimitiveConvertedType::Int8)) => DataType::Int8,
|
|
49
|
+
(_, Some(PrimitiveConvertedType::Int16)) => DataType::Int16,
|
|
50
|
+
(_, Some(PrimitiveConvertedType::Int32)) => DataType::Int32,
|
|
51
|
+
(_, Some(PrimitiveConvertedType::Date)) => DataType::Date32,
|
|
52
|
+
(_, Some(PrimitiveConvertedType::TimeMillis)) => DataType::Time32(TimeUnit::Millisecond),
|
|
53
|
+
(_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
|
|
54
|
+
DataType::Decimal(precision, scale)
|
|
55
|
+
}
|
|
56
|
+
(_, _) => DataType::Int32,
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
fn from_int64(
|
|
61
|
+
logical_type: Option<PrimitiveLogicalType>,
|
|
62
|
+
converted_type: Option<PrimitiveConvertedType>,
|
|
63
|
+
) -> DataType {
|
|
64
|
+
use PrimitiveLogicalType::*;
|
|
65
|
+
match (logical_type, converted_type) {
|
|
66
|
+
// handle logical types first
|
|
67
|
+
(Some(Integer(integer)), _) => match integer {
|
|
68
|
+
IntegerType::UInt64 => DataType::UInt64,
|
|
69
|
+
IntegerType::Int64 => DataType::Int64,
|
|
70
|
+
_ => DataType::Int64,
|
|
71
|
+
},
|
|
72
|
+
(
|
|
73
|
+
Some(Timestamp {
|
|
74
|
+
is_adjusted_to_utc,
|
|
75
|
+
unit,
|
|
76
|
+
}),
|
|
77
|
+
_,
|
|
78
|
+
) => {
|
|
79
|
+
let timezone = if is_adjusted_to_utc {
|
|
80
|
+
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
|
|
81
|
+
// A TIMESTAMP with isAdjustedToUTC=true is defined as [...] elapsed since the Unix epoch
|
|
82
|
+
Some("+00:00".to_string())
|
|
83
|
+
} else {
|
|
84
|
+
// PARQUET:
|
|
85
|
+
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
|
|
86
|
+
// A TIMESTAMP with isAdjustedToUTC=false represents [...] such
|
|
87
|
+
// timestamps should always be displayed the same way, regardless of the local time zone in effect
|
|
88
|
+
// ARROW:
|
|
89
|
+
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
|
|
90
|
+
// If the time zone is null or equal to an empty string, the data is "time
|
|
91
|
+
// zone naive" and shall be displayed *as is* to the user, not localized
|
|
92
|
+
// to the locale of the user.
|
|
93
|
+
None
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
match unit {
|
|
97
|
+
ParquetTimeUnit::Milliseconds => {
|
|
98
|
+
DataType::Timestamp(TimeUnit::Millisecond, timezone)
|
|
99
|
+
}
|
|
100
|
+
ParquetTimeUnit::Microseconds => {
|
|
101
|
+
DataType::Timestamp(TimeUnit::Microsecond, timezone)
|
|
102
|
+
}
|
|
103
|
+
ParquetTimeUnit::Nanoseconds => DataType::Timestamp(TimeUnit::Nanosecond, timezone),
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
(Some(Time { unit, .. }), _) => match unit {
|
|
107
|
+
ParquetTimeUnit::Microseconds => DataType::Time64(TimeUnit::Microsecond),
|
|
108
|
+
ParquetTimeUnit::Nanoseconds => DataType::Time64(TimeUnit::Nanosecond),
|
|
109
|
+
// MILLIS is only possible for int32. Appearing in int64 is a deviation
|
|
110
|
+
// to parquet's spec, which we ignore
|
|
111
|
+
_ => DataType::Int64,
|
|
112
|
+
},
|
|
113
|
+
(Some(Decimal(precision, scale)), _) => DataType::Decimal(precision, scale),
|
|
114
|
+
// handle converted types:
|
|
115
|
+
(_, Some(PrimitiveConvertedType::TimeMicros)) => DataType::Time64(TimeUnit::Microsecond),
|
|
116
|
+
(_, Some(PrimitiveConvertedType::TimestampMillis)) => {
|
|
117
|
+
DataType::Timestamp(TimeUnit::Millisecond, None)
|
|
118
|
+
}
|
|
119
|
+
(_, Some(PrimitiveConvertedType::TimestampMicros)) => {
|
|
120
|
+
DataType::Timestamp(TimeUnit::Microsecond, None)
|
|
121
|
+
}
|
|
122
|
+
(_, Some(PrimitiveConvertedType::Int64)) => DataType::Int64,
|
|
123
|
+
(_, Some(PrimitiveConvertedType::Uint64)) => DataType::UInt64,
|
|
124
|
+
(_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
|
|
125
|
+
DataType::Decimal(precision, scale)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
(_, _) => DataType::Int64,
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
fn from_byte_array(
|
|
133
|
+
logical_type: &Option<PrimitiveLogicalType>,
|
|
134
|
+
converted_type: &Option<PrimitiveConvertedType>,
|
|
135
|
+
) -> DataType {
|
|
136
|
+
match (logical_type, converted_type) {
|
|
137
|
+
(Some(PrimitiveLogicalType::String), _) => DataType::Utf8,
|
|
138
|
+
(Some(PrimitiveLogicalType::Json), _) => DataType::Binary,
|
|
139
|
+
(Some(PrimitiveLogicalType::Bson), _) => DataType::Binary,
|
|
140
|
+
(Some(PrimitiveLogicalType::Enum), _) => DataType::Binary,
|
|
141
|
+
(_, Some(PrimitiveConvertedType::Json)) => DataType::Binary,
|
|
142
|
+
(_, Some(PrimitiveConvertedType::Bson)) => DataType::Binary,
|
|
143
|
+
(_, Some(PrimitiveConvertedType::Enum)) => DataType::Binary,
|
|
144
|
+
(_, Some(PrimitiveConvertedType::Utf8)) => DataType::Utf8,
|
|
145
|
+
(_, _) => DataType::Binary,
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
fn from_fixed_len_byte_array(
|
|
150
|
+
length: usize,
|
|
151
|
+
logical_type: Option<PrimitiveLogicalType>,
|
|
152
|
+
converted_type: Option<PrimitiveConvertedType>,
|
|
153
|
+
) -> DataType {
|
|
154
|
+
match (logical_type, converted_type) {
|
|
155
|
+
(Some(PrimitiveLogicalType::Decimal(precision, scale)), _) => {
|
|
156
|
+
DataType::Decimal(precision, scale)
|
|
157
|
+
}
|
|
158
|
+
(None, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
|
|
159
|
+
DataType::Decimal(precision, scale)
|
|
160
|
+
}
|
|
161
|
+
(None, Some(PrimitiveConvertedType::Interval)) => {
|
|
162
|
+
// There is currently no reliable way of determining which IntervalUnit
|
|
163
|
+
// to return. Thus without the original Arrow schema, the results
|
|
164
|
+
// would be incorrect if all 12 bytes of the interval are populated
|
|
165
|
+
DataType::Interval(IntervalUnit::DayTime)
|
|
166
|
+
}
|
|
167
|
+
_ => DataType::FixedSizeBinary(length),
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/// Maps a [`PhysicalType`] with optional metadata to a [`DataType`]
|
|
172
|
+
fn to_primitive_type_inner(primitive_type: &PrimitiveType) -> DataType {
|
|
173
|
+
match primitive_type.physical_type {
|
|
174
|
+
PhysicalType::Boolean => DataType::Boolean,
|
|
175
|
+
PhysicalType::Int32 => {
|
|
176
|
+
from_int32(primitive_type.logical_type, primitive_type.converted_type)
|
|
177
|
+
}
|
|
178
|
+
PhysicalType::Int64 => {
|
|
179
|
+
from_int64(primitive_type.logical_type, primitive_type.converted_type)
|
|
180
|
+
}
|
|
181
|
+
PhysicalType::Int96 => DataType::Timestamp(TimeUnit::Nanosecond, None),
|
|
182
|
+
PhysicalType::Float => DataType::Float32,
|
|
183
|
+
PhysicalType::Double => DataType::Float64,
|
|
184
|
+
PhysicalType::ByteArray => {
|
|
185
|
+
from_byte_array(&primitive_type.logical_type, &primitive_type.converted_type)
|
|
186
|
+
}
|
|
187
|
+
PhysicalType::FixedLenByteArray(length) => from_fixed_len_byte_array(
|
|
188
|
+
length,
|
|
189
|
+
primitive_type.logical_type,
|
|
190
|
+
primitive_type.converted_type,
|
|
191
|
+
),
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/// Entry point for converting parquet primitive type to arrow type.
|
|
196
|
+
///
|
|
197
|
+
/// This function takes care of repetition.
|
|
198
|
+
fn to_primitive_type(primitive_type: &PrimitiveType) -> DataType {
|
|
199
|
+
let base_type = to_primitive_type_inner(primitive_type);
|
|
200
|
+
|
|
201
|
+
if primitive_type.field_info.repetition == Repetition::Repeated {
|
|
202
|
+
DataType::List(Box::new(Field::new(
|
|
203
|
+
&primitive_type.field_info.name,
|
|
204
|
+
base_type,
|
|
205
|
+
is_nullable(&primitive_type.field_info),
|
|
206
|
+
)))
|
|
207
|
+
} else {
|
|
208
|
+
base_type
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
fn non_repeated_group(
|
|
213
|
+
logical_type: &Option<GroupLogicalType>,
|
|
214
|
+
converted_type: &Option<GroupConvertedType>,
|
|
215
|
+
fields: &[ParquetType],
|
|
216
|
+
parent_name: &str,
|
|
217
|
+
) -> Option<DataType> {
|
|
218
|
+
debug_assert!(!fields.is_empty());
|
|
219
|
+
match (logical_type, converted_type) {
|
|
220
|
+
(Some(GroupLogicalType::List), _) => to_list(fields, parent_name),
|
|
221
|
+
(None, Some(GroupConvertedType::List)) => to_list(fields, parent_name),
|
|
222
|
+
(Some(GroupLogicalType::Map), _) => to_list(fields, parent_name),
|
|
223
|
+
(None, Some(GroupConvertedType::Map) | Some(GroupConvertedType::MapKeyValue)) => {
|
|
224
|
+
to_map(fields)
|
|
225
|
+
}
|
|
226
|
+
_ => to_struct(fields),
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/// Converts a parquet group type to an arrow [`DataType::Struct`].
|
|
231
|
+
/// Returns [`None`] if all its fields are empty
|
|
232
|
+
fn to_struct(fields: &[ParquetType]) -> Option<DataType> {
|
|
233
|
+
let fields = fields.iter().filter_map(to_field).collect::<Vec<Field>>();
|
|
234
|
+
if fields.is_empty() {
|
|
235
|
+
None
|
|
236
|
+
} else {
|
|
237
|
+
Some(DataType::Struct(fields))
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/// Converts a parquet group type to an arrow [`DataType::Struct`].
|
|
242
|
+
/// Returns [`None`] if all its fields are empty
|
|
243
|
+
fn to_map(fields: &[ParquetType]) -> Option<DataType> {
|
|
244
|
+
let inner = to_field(&fields[0])?;
|
|
245
|
+
Some(DataType::Map(Box::new(inner), false))
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/// Entry point for converting parquet group type.
|
|
249
|
+
///
|
|
250
|
+
/// This function takes care of logical type and repetition.
|
|
251
|
+
fn to_group_type(
|
|
252
|
+
field_info: &FieldInfo,
|
|
253
|
+
logical_type: &Option<GroupLogicalType>,
|
|
254
|
+
converted_type: &Option<GroupConvertedType>,
|
|
255
|
+
fields: &[ParquetType],
|
|
256
|
+
parent_name: &str,
|
|
257
|
+
) -> Option<DataType> {
|
|
258
|
+
debug_assert!(!fields.is_empty());
|
|
259
|
+
if field_info.repetition == Repetition::Repeated {
|
|
260
|
+
Some(DataType::List(Box::new(Field::new(
|
|
261
|
+
&field_info.name,
|
|
262
|
+
to_struct(fields)?,
|
|
263
|
+
is_nullable(field_info),
|
|
264
|
+
))))
|
|
265
|
+
} else {
|
|
266
|
+
non_repeated_group(logical_type, converted_type, fields, parent_name)
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/// Checks whether this schema is nullable.
|
|
271
|
+
pub(crate) fn is_nullable(field_info: &FieldInfo) -> bool {
|
|
272
|
+
match field_info.repetition {
|
|
273
|
+
Repetition::Optional => true,
|
|
274
|
+
Repetition::Repeated => true,
|
|
275
|
+
Repetition::Required => false,
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/// Converts parquet schema to arrow field.
|
|
280
|
+
/// Returns `None` iff the parquet type has no associated primitive types,
|
|
281
|
+
/// i.e. if it is a column-less group type.
|
|
282
|
+
fn to_field(type_: &ParquetType) -> Option<Field> {
|
|
283
|
+
Some(Field::new(
|
|
284
|
+
&type_.get_field_info().name,
|
|
285
|
+
to_data_type(type_)?,
|
|
286
|
+
is_nullable(type_.get_field_info()),
|
|
287
|
+
))
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/// Converts a parquet list to arrow list.
|
|
291
|
+
///
|
|
292
|
+
/// To fully understand this algorithm, please refer to
|
|
293
|
+
/// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
|
|
294
|
+
fn to_list(fields: &[ParquetType], parent_name: &str) -> Option<DataType> {
|
|
295
|
+
let item = fields.first().unwrap();
|
|
296
|
+
|
|
297
|
+
let item_type = match item {
|
|
298
|
+
ParquetType::PrimitiveType(primitive) => Some(to_primitive_type_inner(primitive)),
|
|
299
|
+
ParquetType::GroupType { fields, .. } => {
|
|
300
|
+
if fields.len() == 1
|
|
301
|
+
&& item.name() != "array"
|
|
302
|
+
&& item.name() != format!("{parent_name}_tuple")
|
|
303
|
+
{
|
|
304
|
+
// extract the repetition field
|
|
305
|
+
let nested_item = fields.first().unwrap();
|
|
306
|
+
to_data_type(nested_item)
|
|
307
|
+
} else {
|
|
308
|
+
to_struct(fields)
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}?;
|
|
312
|
+
|
|
313
|
+
// Check that the name of the list child is "list", in which case we
|
|
314
|
+
// get the child nullability and name (normally "element") from the nested
|
|
315
|
+
// group type.
|
|
316
|
+
// Without this step, the child incorrectly inherits the parent's optionality
|
|
317
|
+
let (list_item_name, item_is_optional) = match item {
|
|
318
|
+
ParquetType::GroupType {
|
|
319
|
+
field_info, fields, ..
|
|
320
|
+
} if field_info.name == "list" && fields.len() == 1 => {
|
|
321
|
+
let field = fields.first().unwrap();
|
|
322
|
+
(
|
|
323
|
+
&field.get_field_info().name,
|
|
324
|
+
field.get_field_info().repetition != Repetition::Required,
|
|
325
|
+
)
|
|
326
|
+
}
|
|
327
|
+
_ => (
|
|
328
|
+
&item.get_field_info().name,
|
|
329
|
+
item.get_field_info().repetition != Repetition::Required,
|
|
330
|
+
),
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
Some(DataType::List(Box::new(Field::new(
|
|
334
|
+
list_item_name,
|
|
335
|
+
item_type,
|
|
336
|
+
item_is_optional,
|
|
337
|
+
))))
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/// Converts parquet schema to arrow data type.
|
|
341
|
+
///
|
|
342
|
+
/// This function discards schema name.
|
|
343
|
+
///
|
|
344
|
+
/// If this schema is a primitive type and not included in the leaves, the result is
|
|
345
|
+
/// Ok(None).
|
|
346
|
+
///
|
|
347
|
+
/// If this schema is a group type and none of its children is reserved in the
|
|
348
|
+
/// conversion, the result is Ok(None).
|
|
349
|
+
pub(crate) fn to_data_type(type_: &ParquetType) -> Option<DataType> {
|
|
350
|
+
match type_ {
|
|
351
|
+
ParquetType::PrimitiveType(primitive) => Some(to_primitive_type(primitive)),
|
|
352
|
+
ParquetType::GroupType {
|
|
353
|
+
field_info,
|
|
354
|
+
logical_type,
|
|
355
|
+
converted_type,
|
|
356
|
+
fields,
|
|
357
|
+
} => {
|
|
358
|
+
if fields.is_empty() {
|
|
359
|
+
None
|
|
360
|
+
} else {
|
|
361
|
+
to_group_type(
|
|
362
|
+
field_info,
|
|
363
|
+
logical_type,
|
|
364
|
+
converted_type,
|
|
365
|
+
fields,
|
|
366
|
+
&field_info.name,
|
|
367
|
+
)
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
#[cfg(test)]
|
|
374
|
+
mod tests {
|
|
375
|
+
use parquet2::metadata::SchemaDescriptor;
|
|
376
|
+
|
|
377
|
+
use super::*;
|
|
378
|
+
|
|
379
|
+
use crate::datatypes::{DataType, Field, TimeUnit};
|
|
380
|
+
use crate::error::Result;
|
|
381
|
+
|
|
382
|
+
#[test]
|
|
383
|
+
fn test_flat_primitives() -> Result<()> {
|
|
384
|
+
let message = "
|
|
385
|
+
message test_schema {
|
|
386
|
+
REQUIRED BOOLEAN boolean;
|
|
387
|
+
REQUIRED INT32 int8 (INT_8);
|
|
388
|
+
REQUIRED INT32 int16 (INT_16);
|
|
389
|
+
REQUIRED INT32 uint8 (INTEGER(8,false));
|
|
390
|
+
REQUIRED INT32 uint16 (INTEGER(16,false));
|
|
391
|
+
REQUIRED INT32 int32;
|
|
392
|
+
REQUIRED INT64 int64 ;
|
|
393
|
+
OPTIONAL DOUBLE double;
|
|
394
|
+
OPTIONAL FLOAT float;
|
|
395
|
+
OPTIONAL BINARY string (UTF8);
|
|
396
|
+
OPTIONAL BINARY string_2 (STRING);
|
|
397
|
+
}
|
|
398
|
+
";
|
|
399
|
+
let expected = &[
|
|
400
|
+
Field::new("boolean", DataType::Boolean, false),
|
|
401
|
+
Field::new("int8", DataType::Int8, false),
|
|
402
|
+
Field::new("int16", DataType::Int16, false),
|
|
403
|
+
Field::new("uint8", DataType::UInt8, false),
|
|
404
|
+
Field::new("uint16", DataType::UInt16, false),
|
|
405
|
+
Field::new("int32", DataType::Int32, false),
|
|
406
|
+
Field::new("int64", DataType::Int64, false),
|
|
407
|
+
Field::new("double", DataType::Float64, true),
|
|
408
|
+
Field::new("float", DataType::Float32, true),
|
|
409
|
+
Field::new("string", DataType::Utf8, true),
|
|
410
|
+
Field::new("string_2", DataType::Utf8, true),
|
|
411
|
+
];
|
|
412
|
+
|
|
413
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
|
|
414
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
415
|
+
|
|
416
|
+
assert_eq!(fields, expected);
|
|
417
|
+
Ok(())
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
#[test]
|
|
421
|
+
fn test_byte_array_fields() -> Result<()> {
|
|
422
|
+
let message = "
|
|
423
|
+
message test_schema {
|
|
424
|
+
REQUIRED BYTE_ARRAY binary;
|
|
425
|
+
REQUIRED FIXED_LEN_BYTE_ARRAY (20) fixed_binary;
|
|
426
|
+
}
|
|
427
|
+
";
|
|
428
|
+
let expected = vec![
|
|
429
|
+
Field::new("binary", DataType::Binary, false),
|
|
430
|
+
Field::new("fixed_binary", DataType::FixedSizeBinary(20), false),
|
|
431
|
+
];
|
|
432
|
+
|
|
433
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
|
|
434
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
435
|
+
|
|
436
|
+
assert_eq!(fields, expected);
|
|
437
|
+
Ok(())
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
#[test]
|
|
441
|
+
fn test_duplicate_fields() -> Result<()> {
|
|
442
|
+
let message = "
|
|
443
|
+
message test_schema {
|
|
444
|
+
REQUIRED BOOLEAN boolean;
|
|
445
|
+
REQUIRED INT32 int8 (INT_8);
|
|
446
|
+
}
|
|
447
|
+
";
|
|
448
|
+
let expected = &[
|
|
449
|
+
Field::new("boolean", DataType::Boolean, false),
|
|
450
|
+
Field::new("int8", DataType::Int8, false),
|
|
451
|
+
];
|
|
452
|
+
|
|
453
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
|
|
454
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
455
|
+
|
|
456
|
+
assert_eq!(fields, expected);
|
|
457
|
+
Ok(())
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
#[test]
|
|
461
|
+
fn test_parquet_lists() -> Result<()> {
|
|
462
|
+
let mut arrow_fields = Vec::new();
|
|
463
|
+
|
|
464
|
+
// LIST encoding example taken from parquet-format/LogicalTypes.md
|
|
465
|
+
let message_type = "
|
|
466
|
+
message test_schema {
|
|
467
|
+
REQUIRED GROUP my_list (LIST) {
|
|
468
|
+
REPEATED GROUP list {
|
|
469
|
+
OPTIONAL BINARY element (UTF8);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
OPTIONAL GROUP my_list (LIST) {
|
|
473
|
+
REPEATED GROUP list {
|
|
474
|
+
REQUIRED BINARY element (UTF8);
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
OPTIONAL GROUP array_of_arrays (LIST) {
|
|
478
|
+
REPEATED GROUP list {
|
|
479
|
+
REQUIRED GROUP element (LIST) {
|
|
480
|
+
REPEATED GROUP list {
|
|
481
|
+
REQUIRED INT32 element;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
OPTIONAL GROUP my_list (LIST) {
|
|
487
|
+
REPEATED GROUP element {
|
|
488
|
+
REQUIRED BINARY str (UTF8);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
OPTIONAL GROUP my_list (LIST) {
|
|
492
|
+
REPEATED INT32 element;
|
|
493
|
+
}
|
|
494
|
+
OPTIONAL GROUP my_list (LIST) {
|
|
495
|
+
REPEATED GROUP element {
|
|
496
|
+
REQUIRED BINARY str (UTF8);
|
|
497
|
+
REQUIRED INT32 num;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
OPTIONAL GROUP my_list (LIST) {
|
|
501
|
+
REPEATED GROUP array {
|
|
502
|
+
REQUIRED BINARY str (UTF8);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
}
|
|
506
|
+
OPTIONAL GROUP my_list (LIST) {
|
|
507
|
+
REPEATED GROUP my_list_tuple {
|
|
508
|
+
REQUIRED BINARY str (UTF8);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
REPEATED INT32 name;
|
|
512
|
+
}
|
|
513
|
+
";
|
|
514
|
+
|
|
515
|
+
// // List<String> (list non-null, elements nullable)
|
|
516
|
+
// required group my_list (LIST) {
|
|
517
|
+
// repeated group list {
|
|
518
|
+
// optional binary element (UTF8);
|
|
519
|
+
// }
|
|
520
|
+
// }
|
|
521
|
+
{
|
|
522
|
+
arrow_fields.push(Field::new(
|
|
523
|
+
"my_list",
|
|
524
|
+
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
|
|
525
|
+
false,
|
|
526
|
+
));
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// // List<String> (list nullable, elements non-null)
|
|
530
|
+
// optional group my_list (LIST) {
|
|
531
|
+
// repeated group list {
|
|
532
|
+
// required binary element (UTF8);
|
|
533
|
+
// }
|
|
534
|
+
// }
|
|
535
|
+
{
|
|
536
|
+
arrow_fields.push(Field::new(
|
|
537
|
+
"my_list",
|
|
538
|
+
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
|
|
539
|
+
true,
|
|
540
|
+
));
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Element types can be nested structures. For example, a list of lists:
|
|
544
|
+
//
|
|
545
|
+
// // List<List<Integer>>
|
|
546
|
+
// optional group array_of_arrays (LIST) {
|
|
547
|
+
// repeated group list {
|
|
548
|
+
// required group element (LIST) {
|
|
549
|
+
// repeated group list {
|
|
550
|
+
// required int32 element;
|
|
551
|
+
// }
|
|
552
|
+
// }
|
|
553
|
+
// }
|
|
554
|
+
// }
|
|
555
|
+
{
|
|
556
|
+
let arrow_inner_list =
|
|
557
|
+
DataType::List(Box::new(Field::new("element", DataType::Int32, false)));
|
|
558
|
+
arrow_fields.push(Field::new(
|
|
559
|
+
"array_of_arrays",
|
|
560
|
+
DataType::List(Box::new(Field::new("element", arrow_inner_list, false))),
|
|
561
|
+
true,
|
|
562
|
+
));
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// // List<String> (list nullable, elements non-null)
|
|
566
|
+
// optional group my_list (LIST) {
|
|
567
|
+
// repeated group element {
|
|
568
|
+
// required binary str (UTF8);
|
|
569
|
+
// };
|
|
570
|
+
// }
|
|
571
|
+
{
|
|
572
|
+
arrow_fields.push(Field::new(
|
|
573
|
+
"my_list",
|
|
574
|
+
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
|
|
575
|
+
true,
|
|
576
|
+
));
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// // List<Integer> (nullable list, non-null elements)
|
|
580
|
+
// optional group my_list (LIST) {
|
|
581
|
+
// repeated int32 element;
|
|
582
|
+
// }
|
|
583
|
+
{
|
|
584
|
+
arrow_fields.push(Field::new(
|
|
585
|
+
"my_list",
|
|
586
|
+
DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
|
|
587
|
+
true,
|
|
588
|
+
));
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// // List<Tuple<String, Integer>> (nullable list, non-null elements)
|
|
592
|
+
// optional group my_list (LIST) {
|
|
593
|
+
// repeated group element {
|
|
594
|
+
// required binary str (UTF8);
|
|
595
|
+
// required int32 num;
|
|
596
|
+
// };
|
|
597
|
+
// }
|
|
598
|
+
{
|
|
599
|
+
let arrow_struct = DataType::Struct(vec![
|
|
600
|
+
Field::new("str", DataType::Utf8, false),
|
|
601
|
+
Field::new("num", DataType::Int32, false),
|
|
602
|
+
]);
|
|
603
|
+
arrow_fields.push(Field::new(
|
|
604
|
+
"my_list",
|
|
605
|
+
DataType::List(Box::new(Field::new("element", arrow_struct, true))),
|
|
606
|
+
true,
|
|
607
|
+
));
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// // List<OneTuple<String>> (nullable list, non-null elements)
|
|
611
|
+
// optional group my_list (LIST) {
|
|
612
|
+
// repeated group array {
|
|
613
|
+
// required binary str (UTF8);
|
|
614
|
+
// };
|
|
615
|
+
// }
|
|
616
|
+
// Special case: group is named array
|
|
617
|
+
{
|
|
618
|
+
let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
|
|
619
|
+
arrow_fields.push(Field::new(
|
|
620
|
+
"my_list",
|
|
621
|
+
DataType::List(Box::new(Field::new("array", arrow_struct, true))),
|
|
622
|
+
true,
|
|
623
|
+
));
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// // List<OneTuple<String>> (nullable list, non-null elements)
|
|
627
|
+
// optional group my_list (LIST) {
|
|
628
|
+
// repeated group my_list_tuple {
|
|
629
|
+
// required binary str (UTF8);
|
|
630
|
+
// };
|
|
631
|
+
// }
|
|
632
|
+
// Special case: group named ends in _tuple
|
|
633
|
+
{
|
|
634
|
+
let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
|
|
635
|
+
arrow_fields.push(Field::new(
|
|
636
|
+
"my_list",
|
|
637
|
+
DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))),
|
|
638
|
+
true,
|
|
639
|
+
));
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// One-level encoding: Only allows required lists with required cells
|
|
643
|
+
// repeated value_type name
|
|
644
|
+
{
|
|
645
|
+
arrow_fields.push(Field::new(
|
|
646
|
+
"name",
|
|
647
|
+
DataType::List(Box::new(Field::new("name", DataType::Int32, true))),
|
|
648
|
+
true,
|
|
649
|
+
));
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
|
|
653
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
654
|
+
|
|
655
|
+
assert_eq!(arrow_fields, fields);
|
|
656
|
+
Ok(())
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
#[test]
|
|
660
|
+
fn test_parquet_list_nullable() -> Result<()> {
|
|
661
|
+
let mut arrow_fields = Vec::new();
|
|
662
|
+
|
|
663
|
+
let message_type = "
|
|
664
|
+
message test_schema {
|
|
665
|
+
REQUIRED GROUP my_list1 (LIST) {
|
|
666
|
+
REPEATED GROUP list {
|
|
667
|
+
OPTIONAL BINARY element (UTF8);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
OPTIONAL GROUP my_list2 (LIST) {
|
|
671
|
+
REPEATED GROUP list {
|
|
672
|
+
REQUIRED BINARY element (UTF8);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
REQUIRED GROUP my_list3 (LIST) {
|
|
676
|
+
REPEATED GROUP list {
|
|
677
|
+
REQUIRED BINARY element (UTF8);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
";
|
|
682
|
+
|
|
683
|
+
// // List<String> (list non-null, elements nullable)
|
|
684
|
+
// required group my_list1 (LIST) {
|
|
685
|
+
// repeated group list {
|
|
686
|
+
// optional binary element (UTF8);
|
|
687
|
+
// }
|
|
688
|
+
// }
|
|
689
|
+
{
|
|
690
|
+
arrow_fields.push(Field::new(
|
|
691
|
+
"my_list1",
|
|
692
|
+
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
|
|
693
|
+
false,
|
|
694
|
+
));
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// // List<String> (list nullable, elements non-null)
|
|
698
|
+
// optional group my_list2 (LIST) {
|
|
699
|
+
// repeated group list {
|
|
700
|
+
// required binary element (UTF8);
|
|
701
|
+
// }
|
|
702
|
+
// }
|
|
703
|
+
{
|
|
704
|
+
arrow_fields.push(Field::new(
|
|
705
|
+
"my_list2",
|
|
706
|
+
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
|
|
707
|
+
true,
|
|
708
|
+
));
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// // List<String> (list non-null, elements non-null)
|
|
712
|
+
// repeated group my_list3 (LIST) {
|
|
713
|
+
// repeated group list {
|
|
714
|
+
// required binary element (UTF8);
|
|
715
|
+
// }
|
|
716
|
+
// }
|
|
717
|
+
{
|
|
718
|
+
arrow_fields.push(Field::new(
|
|
719
|
+
"my_list3",
|
|
720
|
+
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
|
|
721
|
+
false,
|
|
722
|
+
));
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
|
|
726
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
727
|
+
|
|
728
|
+
assert_eq!(arrow_fields, fields);
|
|
729
|
+
Ok(())
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
#[test]
|
|
733
|
+
fn test_nested_schema() -> Result<()> {
|
|
734
|
+
let mut arrow_fields = Vec::new();
|
|
735
|
+
{
|
|
736
|
+
let group1_fields = vec![
|
|
737
|
+
Field::new("leaf1", DataType::Boolean, false),
|
|
738
|
+
Field::new("leaf2", DataType::Int32, false),
|
|
739
|
+
];
|
|
740
|
+
let group1_struct = Field::new("group1", DataType::Struct(group1_fields), false);
|
|
741
|
+
arrow_fields.push(group1_struct);
|
|
742
|
+
|
|
743
|
+
let leaf3_field = Field::new("leaf3", DataType::Int64, false);
|
|
744
|
+
arrow_fields.push(leaf3_field);
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
let message_type = "
|
|
748
|
+
message test_schema {
|
|
749
|
+
REQUIRED GROUP group1 {
|
|
750
|
+
REQUIRED BOOLEAN leaf1;
|
|
751
|
+
REQUIRED INT32 leaf2;
|
|
752
|
+
}
|
|
753
|
+
REQUIRED INT64 leaf3;
|
|
754
|
+
}
|
|
755
|
+
";
|
|
756
|
+
|
|
757
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
|
|
758
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
759
|
+
|
|
760
|
+
assert_eq!(arrow_fields, fields);
|
|
761
|
+
Ok(())
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
#[test]
|
|
765
|
+
fn test_repeated_nested_schema() -> Result<()> {
|
|
766
|
+
let mut arrow_fields = Vec::new();
|
|
767
|
+
{
|
|
768
|
+
arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
|
|
769
|
+
|
|
770
|
+
let inner_group_list = Field::new(
|
|
771
|
+
"innerGroup",
|
|
772
|
+
DataType::List(Box::new(Field::new(
|
|
773
|
+
"innerGroup",
|
|
774
|
+
DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]),
|
|
775
|
+
true,
|
|
776
|
+
))),
|
|
777
|
+
true,
|
|
778
|
+
);
|
|
779
|
+
|
|
780
|
+
let outer_group_list = Field::new(
|
|
781
|
+
"outerGroup",
|
|
782
|
+
DataType::List(Box::new(Field::new(
|
|
783
|
+
"outerGroup",
|
|
784
|
+
DataType::Struct(vec![
|
|
785
|
+
Field::new("leaf2", DataType::Int32, true),
|
|
786
|
+
inner_group_list,
|
|
787
|
+
]),
|
|
788
|
+
true,
|
|
789
|
+
))),
|
|
790
|
+
true,
|
|
791
|
+
);
|
|
792
|
+
arrow_fields.push(outer_group_list);
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
let message_type = "
|
|
796
|
+
message test_schema {
|
|
797
|
+
OPTIONAL INT32 leaf1;
|
|
798
|
+
REPEATED GROUP outerGroup {
|
|
799
|
+
OPTIONAL INT32 leaf2;
|
|
800
|
+
REPEATED GROUP innerGroup {
|
|
801
|
+
OPTIONAL INT32 leaf3;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
";
|
|
806
|
+
|
|
807
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
|
|
808
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
809
|
+
|
|
810
|
+
assert_eq!(arrow_fields, fields);
|
|
811
|
+
Ok(())
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
#[test]
|
|
815
|
+
fn test_column_desc_to_field() -> Result<()> {
|
|
816
|
+
let message_type = "
|
|
817
|
+
message test_schema {
|
|
818
|
+
REQUIRED BOOLEAN boolean;
|
|
819
|
+
REQUIRED INT32 int8 (INT_8);
|
|
820
|
+
REQUIRED INT32 uint8 (INTEGER(8,false));
|
|
821
|
+
REQUIRED INT32 int16 (INT_16);
|
|
822
|
+
REQUIRED INT32 uint16 (INTEGER(16,false));
|
|
823
|
+
REQUIRED INT32 int32;
|
|
824
|
+
REQUIRED INT64 int64;
|
|
825
|
+
OPTIONAL DOUBLE double;
|
|
826
|
+
OPTIONAL FLOAT float;
|
|
827
|
+
OPTIONAL BINARY string (UTF8);
|
|
828
|
+
REPEATED BOOLEAN bools;
|
|
829
|
+
OPTIONAL INT32 date (DATE);
|
|
830
|
+
OPTIONAL INT32 time_milli (TIME_MILLIS);
|
|
831
|
+
OPTIONAL INT64 time_micro (TIME_MICROS);
|
|
832
|
+
OPTIONAL INT64 time_nano (TIME(NANOS,false));
|
|
833
|
+
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
|
|
834
|
+
REQUIRED INT64 ts_micro (TIMESTAMP_MICROS);
|
|
835
|
+
REQUIRED INT64 ts_nano (TIMESTAMP(NANOS,true));
|
|
836
|
+
}
|
|
837
|
+
";
|
|
838
|
+
let arrow_fields = vec![
|
|
839
|
+
Field::new("boolean", DataType::Boolean, false),
|
|
840
|
+
Field::new("int8", DataType::Int8, false),
|
|
841
|
+
Field::new("uint8", DataType::UInt8, false),
|
|
842
|
+
Field::new("int16", DataType::Int16, false),
|
|
843
|
+
Field::new("uint16", DataType::UInt16, false),
|
|
844
|
+
Field::new("int32", DataType::Int32, false),
|
|
845
|
+
Field::new("int64", DataType::Int64, false),
|
|
846
|
+
Field::new("double", DataType::Float64, true),
|
|
847
|
+
Field::new("float", DataType::Float32, true),
|
|
848
|
+
Field::new("string", DataType::Utf8, true),
|
|
849
|
+
Field::new(
|
|
850
|
+
"bools",
|
|
851
|
+
DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))),
|
|
852
|
+
true,
|
|
853
|
+
),
|
|
854
|
+
Field::new("date", DataType::Date32, true),
|
|
855
|
+
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
|
|
856
|
+
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
|
|
857
|
+
Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond), true),
|
|
858
|
+
Field::new(
|
|
859
|
+
"ts_milli",
|
|
860
|
+
DataType::Timestamp(TimeUnit::Millisecond, None),
|
|
861
|
+
true,
|
|
862
|
+
),
|
|
863
|
+
Field::new(
|
|
864
|
+
"ts_micro",
|
|
865
|
+
DataType::Timestamp(TimeUnit::Microsecond, None),
|
|
866
|
+
false,
|
|
867
|
+
),
|
|
868
|
+
Field::new(
|
|
869
|
+
"ts_nano",
|
|
870
|
+
DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
|
|
871
|
+
false,
|
|
872
|
+
),
|
|
873
|
+
];
|
|
874
|
+
|
|
875
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
|
|
876
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
877
|
+
|
|
878
|
+
assert_eq!(arrow_fields, fields);
|
|
879
|
+
Ok(())
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
#[test]
|
|
883
|
+
fn test_field_to_column_desc() -> Result<()> {
|
|
884
|
+
let message_type = "
|
|
885
|
+
message arrow_schema {
|
|
886
|
+
REQUIRED BOOLEAN boolean;
|
|
887
|
+
REQUIRED INT32 int8 (INT_8);
|
|
888
|
+
REQUIRED INT32 int16 (INTEGER(16,true));
|
|
889
|
+
REQUIRED INT32 int32;
|
|
890
|
+
REQUIRED INT64 int64;
|
|
891
|
+
OPTIONAL DOUBLE double;
|
|
892
|
+
OPTIONAL FLOAT float;
|
|
893
|
+
OPTIONAL BINARY string (STRING);
|
|
894
|
+
OPTIONAL GROUP bools (LIST) {
|
|
895
|
+
REPEATED GROUP list {
|
|
896
|
+
OPTIONAL BOOLEAN element;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
REQUIRED GROUP bools_non_null (LIST) {
|
|
900
|
+
REPEATED GROUP list {
|
|
901
|
+
REQUIRED BOOLEAN element;
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
OPTIONAL INT32 date (DATE);
|
|
905
|
+
OPTIONAL INT32 time_milli (TIME(MILLIS,false));
|
|
906
|
+
OPTIONAL INT64 time_micro (TIME_MICROS);
|
|
907
|
+
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
|
|
908
|
+
REQUIRED INT64 ts_micro (TIMESTAMP(MICROS,false));
|
|
909
|
+
REQUIRED GROUP struct {
|
|
910
|
+
REQUIRED BOOLEAN bools;
|
|
911
|
+
REQUIRED INT32 uint32 (INTEGER(32,false));
|
|
912
|
+
REQUIRED GROUP int32 (LIST) {
|
|
913
|
+
REPEATED GROUP list {
|
|
914
|
+
OPTIONAL INT32 element;
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
REQUIRED BINARY dictionary_strings (STRING);
|
|
919
|
+
}
|
|
920
|
+
";
|
|
921
|
+
|
|
922
|
+
let arrow_fields = vec![
|
|
923
|
+
Field::new("boolean", DataType::Boolean, false),
|
|
924
|
+
Field::new("int8", DataType::Int8, false),
|
|
925
|
+
Field::new("int16", DataType::Int16, false),
|
|
926
|
+
Field::new("int32", DataType::Int32, false),
|
|
927
|
+
Field::new("int64", DataType::Int64, false),
|
|
928
|
+
Field::new("double", DataType::Float64, true),
|
|
929
|
+
Field::new("float", DataType::Float32, true),
|
|
930
|
+
Field::new("string", DataType::Utf8, true),
|
|
931
|
+
Field::new(
|
|
932
|
+
"bools",
|
|
933
|
+
DataType::List(Box::new(Field::new("element", DataType::Boolean, true))),
|
|
934
|
+
true,
|
|
935
|
+
),
|
|
936
|
+
Field::new(
|
|
937
|
+
"bools_non_null",
|
|
938
|
+
DataType::List(Box::new(Field::new("element", DataType::Boolean, false))),
|
|
939
|
+
false,
|
|
940
|
+
),
|
|
941
|
+
Field::new("date", DataType::Date32, true),
|
|
942
|
+
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
|
|
943
|
+
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
|
|
944
|
+
Field::new(
|
|
945
|
+
"ts_milli",
|
|
946
|
+
DataType::Timestamp(TimeUnit::Millisecond, None),
|
|
947
|
+
true,
|
|
948
|
+
),
|
|
949
|
+
Field::new(
|
|
950
|
+
"ts_micro",
|
|
951
|
+
DataType::Timestamp(TimeUnit::Microsecond, None),
|
|
952
|
+
false,
|
|
953
|
+
),
|
|
954
|
+
Field::new(
|
|
955
|
+
"struct",
|
|
956
|
+
DataType::Struct(vec![
|
|
957
|
+
Field::new("bools", DataType::Boolean, false),
|
|
958
|
+
Field::new("uint32", DataType::UInt32, false),
|
|
959
|
+
Field::new(
|
|
960
|
+
"int32",
|
|
961
|
+
DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
|
|
962
|
+
false,
|
|
963
|
+
),
|
|
964
|
+
]),
|
|
965
|
+
false,
|
|
966
|
+
),
|
|
967
|
+
Field::new("dictionary_strings", DataType::Utf8, false),
|
|
968
|
+
];
|
|
969
|
+
|
|
970
|
+
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
|
|
971
|
+
let fields = parquet_to_arrow_schema(parquet_schema.fields());
|
|
972
|
+
|
|
973
|
+
assert_eq!(arrow_fields, fields);
|
|
974
|
+
Ok(())
|
|
975
|
+
}
|
|
976
|
+
}
|