@loaders.gl/parquet 3.3.0-alpha.5 → 3.3.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +14 -14
- package/dist/dist.min.js.map +2 -2
- package/dist/es5/bundle.js +0 -1
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +3 -1
- package/dist/es5/constants.js.map +1 -1
- package/dist/es5/index.js +23 -39
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/convert-schema.js +2 -11
- package/dist/es5/lib/convert-schema.js.map +1 -1
- package/dist/es5/lib/parse-parquet.js +29 -72
- package/dist/es5/lib/parse-parquet.js.map +1 -1
- package/dist/es5/lib/read-array-buffer.js +0 -10
- package/dist/es5/lib/read-array-buffer.js.map +1 -1
- package/dist/es5/lib/wasm/encode-parquet-wasm.js +0 -11
- package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/index.js +0 -1
- package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +0 -14
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +0 -10
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +1 -19
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/es5/parquet-loader.js +2 -1
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +2 -1
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -3
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -2
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/declare.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +2 -9
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +0 -8
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +1 -77
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +1 -39
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +5 -30
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/writer.js +31 -149
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
- package/dist/es5/parquetjs/file.js +3 -12
- package/dist/es5/parquetjs/file.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +0 -48
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +0 -47
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +0 -82
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +0 -25
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +0 -39
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -51
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +0 -26
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -30
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +0 -59
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +0 -26
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +0 -26
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +0 -85
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +0 -25
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +0 -30
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +0 -54
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +0 -31
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +0 -41
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +0 -59
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +0 -30
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +0 -42
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +0 -27
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +0 -30
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +0 -27
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +0 -86
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +3 -82
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-cursor.js +5 -37
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +2 -88
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +14 -67
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +3 -7
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +6 -34
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +11 -41
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +3 -84
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +0 -2
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +1 -7
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/read-utils.js +6 -38
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/es5/workers/parquet-worker.js +0 -2
- package/dist/es5/workers/parquet-worker.js.map +1 -1
- package/dist/esm/bundle.js +1 -1
- package/dist/esm/bundle.js.map +1 -1
- package/dist/esm/constants.js +3 -0
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/index.js +10 -2
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/convert-schema.js +1 -7
- package/dist/esm/lib/convert-schema.js.map +1 -1
- package/dist/esm/lib/parse-parquet.js +2 -5
- package/dist/esm/lib/parse-parquet.js.map +1 -1
- package/dist/esm/lib/read-array-buffer.js +2 -1
- package/dist/esm/lib/read-array-buffer.js.map +1 -1
- package/dist/esm/lib/wasm/encode-parquet-wasm.js +1 -1
- package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -1
- package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -1
- package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
- package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +2 -3
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/esm/parquet-loader.js +3 -1
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +3 -1
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +2 -1
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +2 -2
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/codecs/declare.js.map +1 -1
- package/dist/esm/parquetjs/codecs/dictionary.js +2 -1
- package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -1
- package/dist/esm/parquetjs/codecs/index.js +2 -0
- package/dist/esm/parquetjs/codecs/index.js.map +1 -1
- package/dist/esm/parquetjs/codecs/plain.js +2 -68
- package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
- package/dist/esm/parquetjs/codecs/rle.js +3 -29
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
- package/dist/esm/parquetjs/compression.js +9 -5
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/encoder/writer.js +21 -51
- package/dist/esm/parquetjs/encoder/writer.js.map +1 -1
- package/dist/esm/parquetjs/file.js +1 -0
- package/dist/esm/parquetjs/file.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +0 -44
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +0 -42
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +0 -82
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +0 -18
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +0 -34
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -49
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/DateType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +0 -19
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -24
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +2 -53
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/IntType.js +0 -19
- package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +0 -19
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/ListType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +0 -90
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/MapType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/NullType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +0 -16
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +0 -24
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +0 -49
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +0 -24
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +0 -33
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +0 -59
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +0 -24
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js +0 -38
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/StringType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js +0 -19
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +0 -24
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +0 -19
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +1 -8
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/index.js +1 -0
- package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js +9 -39
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-cursor.js +1 -13
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +6 -32
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +1 -18
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +4 -4
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +3 -29
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +7 -22
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js +3 -78
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +2 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/esm/parquetjs/utils/read-utils.js +5 -12
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/esm/workers/parquet-worker.js.map +1 -1
- package/dist/lib/wasm/load-wasm/load-wasm-browser.js +5 -1
- package/dist/lib/wasm/load-wasm/load-wasm-node.js +5 -1
- package/dist/parquet-worker.js +14 -14
- package/dist/parquet-worker.js.map +2 -2
- package/dist/parquetjs/codecs/index.js +5 -1
- package/dist/parquetjs/encoder/writer.d.ts +1 -0
- package/dist/parquetjs/encoder/writer.d.ts.map +1 -1
- package/dist/parquetjs/encoder/writer.js +5 -1
- package/dist/parquetjs/parquet-thrift/BsonType.js +5 -1
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +5 -1
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -1
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +5 -1
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +5 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +5 -1
- package/dist/parquetjs/parquet-thrift/DateType.js +5 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js +5 -1
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +5 -1
- package/dist/parquetjs/parquet-thrift/EnumType.js +5 -1
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +5 -1
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +5 -1
- package/dist/parquetjs/parquet-thrift/IntType.js +5 -1
- package/dist/parquetjs/parquet-thrift/JsonType.js +5 -1
- package/dist/parquetjs/parquet-thrift/KeyValue.js +5 -1
- package/dist/parquetjs/parquet-thrift/ListType.js +5 -1
- package/dist/parquetjs/parquet-thrift/LogicalType.js +5 -1
- package/dist/parquetjs/parquet-thrift/MapType.js +5 -1
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +5 -1
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +5 -1
- package/dist/parquetjs/parquet-thrift/NullType.js +5 -1
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +5 -1
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +5 -1
- package/dist/parquetjs/parquet-thrift/PageHeader.js +5 -1
- package/dist/parquetjs/parquet-thrift/PageLocation.js +5 -1
- package/dist/parquetjs/parquet-thrift/RowGroup.js +5 -1
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +5 -1
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +5 -1
- package/dist/parquetjs/parquet-thrift/Statistics.js +5 -1
- package/dist/parquetjs/parquet-thrift/StringType.js +5 -1
- package/dist/parquetjs/parquet-thrift/TimeType.js +5 -1
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +5 -1
- package/dist/parquetjs/parquet-thrift/TimestampType.js +5 -1
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +5 -1
- package/dist/parquetjs/parquet-thrift/UUIDType.js +5 -1
- package/dist/parquetjs/parquet-thrift/index.js +5 -1
- package/dist/parquetjs/schema/shred.js +5 -1
- package/dist/parquetjs/utils/file-utils.d.ts +2 -0
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/package.json +6 -6
|
@@ -1,31 +1,29 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
1
3
|
import varint from 'varint';
|
|
4
|
+
|
|
2
5
|
export function encodeValues(type, values, opts) {
|
|
3
6
|
if (!('bitWidth' in opts)) {
|
|
4
7
|
throw new Error('bitWidth is required');
|
|
5
8
|
}
|
|
6
|
-
|
|
7
9
|
switch (type) {
|
|
8
10
|
case 'BOOLEAN':
|
|
9
11
|
case 'INT32':
|
|
10
12
|
case 'INT64':
|
|
11
13
|
values = values.map(x => parseInt(x, 10));
|
|
12
14
|
break;
|
|
13
|
-
|
|
14
15
|
default:
|
|
15
16
|
throw new Error("unsupported type: ".concat(type));
|
|
16
17
|
}
|
|
17
|
-
|
|
18
18
|
let buf = Buffer.alloc(0);
|
|
19
19
|
let run = [];
|
|
20
20
|
let repeats = 0;
|
|
21
|
-
|
|
22
21
|
for (let i = 0; i < values.length; i++) {
|
|
23
22
|
if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {
|
|
24
23
|
if (run.length) {
|
|
25
24
|
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
|
|
26
25
|
run = [];
|
|
27
26
|
}
|
|
28
|
-
|
|
29
27
|
repeats = 1;
|
|
30
28
|
} else if (repeats > 0 && values[i] === values[i - 1]) {
|
|
31
29
|
repeats += 1;
|
|
@@ -34,21 +32,17 @@ export function encodeValues(type, values, opts) {
|
|
|
34
32
|
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);
|
|
35
33
|
repeats = 0;
|
|
36
34
|
}
|
|
37
|
-
|
|
38
35
|
run.push(values[i]);
|
|
39
36
|
}
|
|
40
37
|
}
|
|
41
|
-
|
|
42
38
|
if (repeats) {
|
|
43
39
|
buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);
|
|
44
40
|
} else if (run.length) {
|
|
45
41
|
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
|
|
46
42
|
}
|
|
47
|
-
|
|
48
43
|
if (opts.disableEnvelope) {
|
|
49
44
|
return buf;
|
|
50
45
|
}
|
|
51
|
-
|
|
52
46
|
const envelope = Buffer.alloc(buf.length + 4);
|
|
53
47
|
envelope.writeUInt32LE(buf.length, undefined);
|
|
54
48
|
buf.copy(envelope, 4);
|
|
@@ -58,17 +52,13 @@ export function decodeValues(type, cursor, count, opts) {
|
|
|
58
52
|
if (!('bitWidth' in opts)) {
|
|
59
53
|
throw new Error('bitWidth is required');
|
|
60
54
|
}
|
|
61
|
-
|
|
62
55
|
if (!opts.disableEnvelope) {
|
|
63
56
|
cursor.offset += 4;
|
|
64
57
|
}
|
|
65
|
-
|
|
66
58
|
let values = [];
|
|
67
|
-
|
|
68
59
|
while (values.length < count) {
|
|
69
60
|
const header = varint.decode(cursor.buffer, cursor.offset);
|
|
70
61
|
cursor.offset += varint.encodingLength(header);
|
|
71
|
-
|
|
72
62
|
if (header & 1) {
|
|
73
63
|
const count = (header >> 1) * 8;
|
|
74
64
|
values.push(...decodeRunBitpacked(cursor, count, opts));
|
|
@@ -77,39 +67,30 @@ export function decodeValues(type, cursor, count, opts) {
|
|
|
77
67
|
values.push(...decodeRunRepeated(cursor, count, opts));
|
|
78
68
|
}
|
|
79
69
|
}
|
|
80
|
-
|
|
81
70
|
values = values.slice(0, count);
|
|
82
|
-
|
|
83
71
|
if (values.length !== count) {
|
|
84
72
|
throw new Error('invalid RLE encoding');
|
|
85
73
|
}
|
|
86
|
-
|
|
87
74
|
return values;
|
|
88
75
|
}
|
|
89
|
-
|
|
90
76
|
function decodeRunBitpacked(cursor, count, opts) {
|
|
91
77
|
const bitWidth = opts.bitWidth;
|
|
92
|
-
|
|
93
78
|
if (count % 8 !== 0) {
|
|
94
79
|
throw new Error('must be a multiple of 8');
|
|
95
80
|
}
|
|
96
81
|
|
|
97
82
|
const values = new Array(count).fill(0);
|
|
98
|
-
|
|
99
83
|
for (let b = 0; b < bitWidth * count; b++) {
|
|
100
84
|
if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & 1 << b % 8) {
|
|
101
85
|
values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;
|
|
102
86
|
}
|
|
103
87
|
}
|
|
104
|
-
|
|
105
88
|
cursor.offset += bitWidth * (count / 8);
|
|
106
89
|
return values;
|
|
107
90
|
}
|
|
108
|
-
|
|
109
91
|
function decodeRunRepeated(cursor, count, opts) {
|
|
110
92
|
const bitWidth = opts.bitWidth;
|
|
111
93
|
let value = 0;
|
|
112
|
-
|
|
113
94
|
for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {
|
|
114
95
|
value << 8;
|
|
115
96
|
value += cursor.buffer[cursor.offset];
|
|
@@ -118,29 +99,22 @@ function decodeRunRepeated(cursor, count, opts) {
|
|
|
118
99
|
|
|
119
100
|
return new Array(count).fill(value);
|
|
120
101
|
}
|
|
121
|
-
|
|
122
102
|
function encodeRunBitpacked(values, opts) {
|
|
123
103
|
const bitWidth = opts.bitWidth;
|
|
124
|
-
|
|
125
104
|
for (let i = 0; i < values.length % 8; i++) {
|
|
126
105
|
values.push(0);
|
|
127
106
|
}
|
|
128
|
-
|
|
129
107
|
const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));
|
|
130
|
-
|
|
131
108
|
for (let b = 0; b < bitWidth * values.length; b++) {
|
|
132
109
|
if ((values[Math.floor(b / bitWidth)] & 1 << b % bitWidth) > 0) {
|
|
133
110
|
buf[Math.floor(b / 8)] |= 1 << b % 8;
|
|
134
111
|
}
|
|
135
112
|
}
|
|
136
|
-
|
|
137
113
|
return Buffer.concat([Buffer.from(varint.encode(values.length / 8 << 1 | 1)), buf]);
|
|
138
114
|
}
|
|
139
|
-
|
|
140
115
|
function encodeRunRepeated(value, count, opts) {
|
|
141
116
|
const bitWidth = opts.bitWidth;
|
|
142
117
|
const buf = Buffer.alloc(Math.ceil(bitWidth / 8));
|
|
143
|
-
|
|
144
118
|
for (let i = 0; i < buf.length; i++) {
|
|
145
119
|
buf.writeUInt8(value & 0xff, i);
|
|
146
120
|
value >> 8;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["varint","encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":"AAIA,OAAOA,MAAP,MAAmB,QAAnB;AAGA,OAAO,SAASC,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAYC,CAAD,IAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAA1B,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,6BAA+BH,IAA/B,EAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,QAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;AAED,OAAO,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,UAAME,MAAM,GAAG7B,MAAM,CAAC8B,MAAP,CAAcJ,MAAM,CAACK,MAArB,EAA6BL,MAAM,CAACE,MAApC,CAAf;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB5B,MAAM,CAACgC,cAAP,CAAsBH,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AACd,YAAMF,KAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGc,kBAAkB,CAACP,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AACL,YAAMuB,KAAK,GAAGE,MAAM,IAAI,CAAxB;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGe,iBAAiB,CAACR,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACgC,KAAP,CAAa,CAAb,EAAgBR,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS8B,kBAAT,CACEP,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,MAAIT,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,QAAMF,MAAM,GAAG,IAAIkC,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGT,KAA/B,EAAsCY,CAAC,EAAvC,EAA2C;AACzC,QAAIb,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAAP,GAAgBY,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnEpC,MAAAA,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDV,EAAAA,MAAM,CAACE,MAAP,IAAiBQ,QAAQ,IAAIT,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAAS+B,iBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI5B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG0B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CtB,CAAC,EAA9C,EAAkD;AAEhD4B,IAAAA,KAAK,IAAI,CAAT;AACAA,IAAAA,KAAK,IAAIhB,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIS,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAASzB,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,QAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIjC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIwB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGjC,MAAM,CAACY,MAAtC,EAA8CwB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACpC,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE3B,MAAAA,GAAG,CAAC+B,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO7B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAgB1C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2BwB,KAA3B,EAA0Cf,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,QAAM3B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACqC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B5B,CAA7B;AAEA4B,IAAAA,KAAK,IAAI,CAAT;AACD;;AAED,SAAOhC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAclB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
|
|
1
|
+
{"version":3,"file":"rle.js","names":["varint","encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"sources":["../../../../src/parquetjs/codecs/rle.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"mappings":";;AAIA,OAAOA,MAAM,MAAM,QAAQ;;AAG3B,OAAO,SAASC,YAAY,CAC1BC,IAAmB,EACnBC,MAAa,EACbC,IAAyB,EACjB;EACR,IAAI,EAAE,UAAU,IAAIA,IAAI,CAAC,EAAE;IACzB,MAAM,IAAIC,KAAK,CAAC,sBAAsB,CAAC;EACzC;EAEA,QAAQH,IAAI;IACV,KAAK,SAAS;IACd,KAAK,OAAO;IACZ,KAAK,OAAO;MAEVC,MAAM,GAAGA,MAAM,CAACG,GAAG,CAAEC,CAAC,IAAKC,QAAQ,CAACD,CAAC,EAAE,EAAE,CAAC,CAAC;MAC3C;IAEF;MACE,MAAM,IAAIF,KAAK,6BAAsBH,IAAI,EAAG;EAAC;EAGjD,IAAIO,GAAG,GAAGC,MAAM,CAACC,KAAK,CAAC,CAAC,CAAC;EACzB,IAAIC,GAAU,GAAG,EAAE;EACnB,IAAIC,OAAO,GAAG,CAAC;EAEf,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGX,MAAM,CAACY,MAAM,EAAED,CAAC,EAAE,EAAE;IAGtC,IAAID,OAAO,KAAK,CAAC,IAAID,GAAG,CAACG,MAAM,GAAG,CAAC,KAAK,CAAC,IAAIZ,MAAM,CAACW,CAAC,CAAC,KAAKX,MAAM,CAACW,CAAC,GAAG,CAAC,CAAC,EAAE;MAExE,IAAIF,GAAG,CAACG,MAAM,EAAE;QACdN,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAEQ,kBAAkB,CAACL,GAAG,EAAER,IAAI,CAAC,CAAC,CAAC;QACzDQ,GAAG,GAAG,EAAE;MACV;MACAC,OAAO,GAAG,CAAC;IACb,CAAC,MAAM,IAAIA,OAAO,GAAG,CAAC,IAAIV,MAAM,CAACW,CAAC,CAAC,KAAKX,MAAM,CAACW,CAAC,GAAG,CAAC,CAAC,EAAE;MACrDD,OAAO,IAAI,CAAC;IACd,CAAC,MAAM;MAEL,IAAIA,OAAO,EAAE;QACXJ,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAES,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAC,CAAC,EAAED,OAAO,EAAET,IAAI,CAAC,CAAC,CAAC;QAC3ES,OAAO,GAAG,CAAC;MACb;MACAD,GAAG,CAACO,IAAI,CAAChB,MAAM,CAACW,CAAC,CAAC,CAAC;IACrB;EACF;EAEA,IAAID,OAAO,EAAE;IACXJ,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAES,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAM,GAAG,CAAC,CAAC,EAAEF,OAAO,EAAET,IAAI,CAAC,CAAC,CAAC;EACzF,CAAC,MAAM,IAAIQ,GAAG,CAACG,MAAM,EAAE;IACrBN,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAEQ,kBAAkB,CAACL,GAAG,EAAER,IAAI,CAAC,CAAC,CAAC;EAC3D;EAEA,IAAIA,IAAI,CAACgB,eAAe,EAAE;IACxB,OAAOX,GAAG;EACZ;EAEA,MAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAK,CAACF,GAAG,CAACM,MAAM,GAAG,CAAC,CAAC;EAC7CM,QAAQ,CAACC,aAAa,CAACb,GAAG,CAACM,MAAM,EAAEQ,SAAS,CAAC;EAC7Cd,GAAG,CAACe,IAAI,CAACH,QAAQ,EAAE,CAAC,CAAC;EAErB,OAAOA,QAAQ;AACjB;AAEA,OAAO,SAASI,YAAY,CAC1BvB,IAAmB,EACnBwB,MAAoB,EACpBC,KAAa,EACbvB,IAAyB,EACf;EACV,IAAI,EAAE,UAAU,IAAIA,IAAI,CAAC,EAAE;IACzB,MAAM,IAAIC,KAAK,CAAC,sBAAsB,CAAC;EACzC;EAEA,IAAI,CAACD,IAAI,CAACgB,eAAe,EAAE;IACzBM,MAAM,CAACE,MAAM,IAAI,CAAC;EACpB;EAEA,IAAIzB,MAAgB,GAAG,EAAE;EACzB,OAAOA,MAAM,CAACY,MAAM,GAAGY,KAAK,EAAE;IAC5B,MAAME,MAAM,GAAG7B,MAAM,CAAC8B,MAAM,CAACJ,MAAM,CAACK,MAAM,EAAEL,MAAM,CAACE,MAAM,CAAC;IAC1DF,MAAM,CAACE,MAAM,IAAI5B,MAAM,CAACgC,cAAc,CAACH,MAAM,CAAC;IAC9C,IAAIA,MAAM,GAAG,CAAC,EAAE;MACd,MAAMF,KAAK,GAAG,CAACE,MAAM,IAAI,CAAC,IAAI,CAAC;MAC/B1B,MAAM,CAACgB,IAAI,CAAC,GAAGc,kBAAkB,CAACP,MAAM,EAAEC,KAAK,EAAEvB,IAAI,CAAC,CAAC;IACzD,CAAC,MAAM;MACL,MAAMuB,KAAK,GAAGE,MAAM,IAAI,CAAC;MACzB1B,MAAM,CAACgB,IAAI,CAAC,GAAGe,iBAAiB,CAACR,MAAM,EAAEC,KAAK,EAAEvB,IAAI,CAAC,CAAC;IACxD;EACF;EACAD,MAAM,GAAGA,MAAM,CAACgC,KAAK,CAAC,CAAC,EAAER,KAAK,CAAC;EAE/B,IAAIxB,MAAM,CAACY,MAAM,KAAKY,KAAK,EAAE;IAC3B,MAAM,IAAItB,KAAK,CAAC,sBAAsB,CAAC;EACzC;EAEA,OAAOF,MAAM;AACf;AAEA,SAAS8B,kBAAkB,CACzBP,MAAoB,EACpBC,KAAa,EACbvB,IAAyB,EACf;EAEV,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,IAAIT,KAAK,GAAG,CAAC,KAAK,CAAC,EAAE;IACnB,MAAM,IAAItB,KAAK,CAAC,yBAAyB,CAAC;EAC5C;;EAGA,MAAMF,MAAM,GAAG,IAAIkC,KAAK,CAACV,KAAK,CAAC,CAACW,IAAI,CAAC,CAAC,CAAC;EACvC,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,QAAQ,GAAGT,KAAK,EAAEY,CAAC,EAAE,EAAE;IACzC,IAAIb,MAAM,CAACK,MAAM,CAACL,MAAM,CAACE,MAAM,GAAGY,IAAI,CAACC,KAAK,CAACF,CAAC,GAAG,CAAC,CAAC,CAAC,GAAI,CAAC,IAAIA,CAAC,GAAG,CAAE,EAAE;MACnEpC,MAAM,CAACqC,IAAI,CAACC,KAAK,CAACF,CAAC,GAAGH,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAIG,CAAC,GAAGH,QAAQ;IACvD;EACF;EAEAV,MAAM,CAACE,MAAM,IAAIQ,QAAQ,IAAIT,KAAK,GAAG,CAAC,CAAC;EACvC,OAAOxB,MAAM;AACf;AAEA,SAAS+B,iBAAiB,CACxBR,MAAoB,EACpBC,KAAa,EACbvB,IAAyB,EACf;EAEV,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,IAAIM,KAAK,GAAG,CAAC;EACb,KAAK,IAAI5B,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG0B,IAAI,CAACG,IAAI,CAACP,QAAQ,GAAG,CAAC,CAAC,EAAEtB,CAAC,EAAE,EAAE;IAEhD4B,KAAK,IAAI,CAAC;IACVA,KAAK,IAAIhB,MAAM,CAACK,MAAM,CAACL,MAAM,CAACE,MAAM,CAAC;IACrCF,MAAM,CAACE,MAAM,IAAI,CAAC;EACpB;;EAGA,OAAO,IAAIS,KAAK,CAACV,KAAK,CAAC,CAACW,IAAI,CAACI,KAAK,CAAC;AACrC;AAEA,SAASzB,kBAAkB,CAACd,MAAgB,EAAEC,IAAyB,EAAU;EAE/E,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,KAAK,IAAItB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGX,MAAM,CAACY,MAAM,GAAG,CAAC,EAAED,CAAC,EAAE,EAAE;IAC1CX,MAAM,CAACgB,IAAI,CAAC,CAAC,CAAC;EAChB;EAEA,MAAMV,GAAG,GAAGC,MAAM,CAACC,KAAK,CAAC6B,IAAI,CAACG,IAAI,CAACP,QAAQ,IAAIjC,MAAM,CAACY,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;EACnE,KAAK,IAAIwB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,QAAQ,GAAGjC,MAAM,CAACY,MAAM,EAAEwB,CAAC,EAAE,EAAE;IACjD,IAAI,CAACpC,MAAM,CAACqC,IAAI,CAACC,KAAK,CAACF,CAAC,GAAGH,QAAQ,CAAC,CAAC,GAAI,CAAC,IAAIG,CAAC,GAAGH,QAAS,IAAI,CAAC,EAAE;MAChE3B,GAAG,CAAC+B,IAAI,CAACC,KAAK,CAACF,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAIA,CAAC,GAAG,CAAC;IACtC;EACF;EAEA,OAAO7B,MAAM,CAACM,MAAM,CAAC,CAACN,MAAM,CAACkC,IAAI,CAAC5C,MAAM,CAAC6C,MAAM,CAAG1C,MAAM,CAACY,MAAM,GAAG,CAAC,IAAK,CAAC,GAAI,CAAC,CAAC,CAAC,EAAEN,GAAG,CAAC,CAAC;AACzF;AAEA,SAASS,iBAAiB,CAACwB,KAAa,EAAEf,KAAa,EAAEvB,IAAyB,EAAU;EAE1F,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,MAAM3B,GAAG,GAAGC,MAAM,CAACC,KAAK,CAAC6B,IAAI,CAACG,IAAI,CAACP,QAAQ,GAAG,CAAC,CAAC,CAAC;EAEjD,KAAK,IAAItB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGL,GAAG,CAACM,MAAM,EAAED,CAAC,EAAE,EAAE;IACnCL,GAAG,CAACqC,UAAU,CAACJ,KAAK,GAAG,IAAI,EAAE5B,CAAC,CAAC;IAE/B4B,KAAK,IAAI,CAAC;EACZ;;EAEA,OAAOhC,MAAM,CAACM,MAAM,CAAC,CAACN,MAAM,CAACkC,IAAI,CAAC5C,MAAM,CAAC6C,MAAM,CAAClB,KAAK,IAAI,CAAC,CAAC,CAAC,EAAElB,GAAG,CAAC,CAAC;AACrE"}
|
|
@@ -1,11 +1,16 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
1
3
|
import { NoCompression, GZipCompression, SnappyCompression, BrotliCompression, LZOCompression, LZ4Compression, ZstdCompression } from '@loaders.gl/compression';
|
|
2
4
|
import { toArrayBuffer, toBuffer } from './utils/buffer-utils';
|
|
5
|
+
|
|
3
6
|
import lz4js from 'lz4js';
|
|
4
7
|
import lzo from 'lzo';
|
|
8
|
+
|
|
5
9
|
const modules = {
|
|
6
10
|
lz4js,
|
|
7
11
|
lzo
|
|
8
12
|
};
|
|
13
|
+
|
|
9
14
|
export const PARQUET_COMPRESSION_METHODS = {
|
|
10
15
|
UNCOMPRESSED: new NoCompression(),
|
|
11
16
|
GZIP: new GZipCompression(),
|
|
@@ -26,37 +31,36 @@ export const PARQUET_COMPRESSION_METHODS = {
|
|
|
26
31
|
modules
|
|
27
32
|
})
|
|
28
33
|
};
|
|
34
|
+
|
|
29
35
|
export async function preloadCompressions(options) {
|
|
30
36
|
const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
|
|
31
37
|
return await Promise.all(compressions.map(compression => compression.preload()));
|
|
32
38
|
}
|
|
39
|
+
|
|
33
40
|
export async function deflate(method, value) {
|
|
34
41
|
const compression = PARQUET_COMPRESSION_METHODS[method];
|
|
35
|
-
|
|
36
42
|
if (!compression) {
|
|
37
43
|
throw new Error("parquet: invalid compression method: ".concat(method));
|
|
38
44
|
}
|
|
39
|
-
|
|
40
45
|
const inputArrayBuffer = toArrayBuffer(value);
|
|
41
46
|
const compressedArrayBuffer = await compression.compress(inputArrayBuffer);
|
|
42
47
|
return toBuffer(compressedArrayBuffer);
|
|
43
48
|
}
|
|
49
|
+
|
|
44
50
|
export async function decompress(method, value, size) {
|
|
45
51
|
const compression = PARQUET_COMPRESSION_METHODS[method];
|
|
46
|
-
|
|
47
52
|
if (!compression) {
|
|
48
53
|
throw new Error("parquet: invalid compression method: ".concat(method));
|
|
49
54
|
}
|
|
50
|
-
|
|
51
55
|
const inputArrayBuffer = toArrayBuffer(value);
|
|
52
56
|
const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);
|
|
53
57
|
return toBuffer(compressedArrayBuffer);
|
|
54
58
|
}
|
|
59
|
+
|
|
55
60
|
export function inflate(method, value, size) {
|
|
56
61
|
if (!(method in PARQUET_COMPRESSION_METHODS)) {
|
|
57
62
|
throw new Error("invalid compression method: ".concat(method));
|
|
58
63
|
}
|
|
59
|
-
|
|
60
64
|
return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);
|
|
61
65
|
}
|
|
62
66
|
//# sourceMappingURL=compression.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"file":"compression.js","names":["NoCompression","GZipCompression","SnappyCompression","BrotliCompression","LZOCompression","LZ4Compression","ZstdCompression","toArrayBuffer","toBuffer","lz4js","lzo","modules","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","GZIP","SNAPPY","BROTLI","LZ4","LZ4_RAW","LZO","ZSTD","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","Error","inputArrayBuffer","compressedArrayBuffer","compress","decompress","size","inflate"],"sources":["../../../src/parquetjs/compression.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\n// Forked from https://github.com/ironSource/parquetjs under MIT license\n\nimport {\n Compression,\n NoCompression,\n GZipCompression,\n SnappyCompression,\n BrotliCompression,\n LZOCompression,\n LZ4Compression,\n ZstdCompression\n} from '@loaders.gl/compression';\n\nimport {ParquetCompression} from './schema/declare';\nimport {toArrayBuffer, toBuffer} from './utils/buffer-utils';\n\n// TODO switch to worker compression to avoid bundling...\n\n// import brotli from 'brotli'; - brotli has problems with decompress in browsers\n// import brotliDecompress from 'brotli/decompress';\nimport lz4js from 'lz4js';\nimport lzo from 'lzo';\n// import {ZstdCodec} from 'zstd-codec';\n\n// Inject large dependencies through Compression constructor options\nconst modules = {\n // brotli has problems with decompress in browsers\n // brotli: {\n // decompress: brotliDecompress,\n // compress: () => {\n // throw new Error('brotli compress');\n // }\n // },\n lz4js,\n lzo\n // 'zstd-codec': ZstdCodec\n};\n\n// See https://github.com/apache/parquet-format/blob/master/Compression.md\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression> = {\n UNCOMPRESSED: new NoCompression(),\n GZIP: new GZipCompression(),\n SNAPPY: new SnappyCompression(),\n BROTLI: new BrotliCompression({modules}),\n // TODO: Understand difference between LZ4 and LZ4_RAW\n LZ4: new LZ4Compression({modules}),\n LZ4_RAW: new LZ4Compression({modules}),\n LZO: new LZOCompression({modules}),\n ZSTD: new ZstdCompression({modules})\n};\n\n/**\n * Register compressions that have big external libraries\n * @param options.modules External library dependencies\n */\nexport async function preloadCompressions(options?: {modules: {[key: string]: any}}) {\n const compressions = Object.values(PARQUET_COMPRESSION_METHODS);\n return await Promise.all(compressions.map((compression) => compression.preload()));\n}\n\n/**\n * Deflate a value using compression method `method`\n */\nexport async function deflate(method: ParquetCompression, value: Buffer): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.compress(inputArrayBuffer);\n return toBuffer(compressedArrayBuffer);\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport async function decompress(\n method: ParquetCompression,\n value: Buffer,\n size: number\n): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);\n return toBuffer(compressedArrayBuffer);\n}\n\n/*\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n // @ts-ignore\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\n/*\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n*/\n"],"mappings":";;AAIA,SAEEA,aAAa,EACbC,eAAe,EACfC,iBAAiB,EACjBC,iBAAiB,EACjBC,cAAc,EACdC,cAAc,EACdC,eAAe,QACV,yBAAyB;AAGhC,SAAQC,aAAa,EAAEC,QAAQ,QAAO,sBAAsB;;AAM5D,OAAOC,KAAK,MAAM,OAAO;AACzB,OAAOC,GAAG,MAAM,KAAK;;AAIrB,MAAMC,OAAO,GAAG;EAQdF,KAAK;EACLC;AAEF,CAAC;;AAGD,OAAO,MAAME,2BAAoE,GAAG;EAClFC,YAAY,EAAE,IAAIb,aAAa,EAAE;EACjCc,IAAI,EAAE,IAAIb,eAAe,EAAE;EAC3Bc,MAAM,EAAE,IAAIb,iBAAiB,EAAE;EAC/Bc,MAAM,EAAE,IAAIb,iBAAiB,CAAC;IAACQ;EAAO,CAAC,CAAC;EAExCM,GAAG,EAAE,IAAIZ,cAAc,CAAC;IAACM;EAAO,CAAC,CAAC;EAClCO,OAAO,EAAE,IAAIb,cAAc,CAAC;IAACM;EAAO,CAAC,CAAC;EACtCQ,GAAG,EAAE,IAAIf,cAAc,CAAC;IAACO;EAAO,CAAC,CAAC;EAClCS,IAAI,EAAE,IAAId,eAAe,CAAC;IAACK;EAAO,CAAC;AACrC,CAAC;;AAMD,OAAO,eAAeU,mBAAmB,CAACC,OAAyC,EAAE;EACnF,MAAMC,YAAY,GAAGC,MAAM,CAACC,MAAM,CAACb,2BAA2B,CAAC;EAC/D,OAAO,MAAMc,OAAO,CAACC,GAAG,CAACJ,YAAY,CAACK,GAAG,CAAEC,WAAW,IAAKA,WAAW,CAACC,OAAO,EAAE,CAAC,CAAC;AACpF;;AAKA,OAAO,eAAeC,OAAO,CAACC,MAA0B,EAAEC,KAAa,EAAmB;EACxF,MAAMJ,WAAW,GAAGjB,2BAA2B,CAACoB,MAAM,CAAC;EACvD,IAAI,CAACH,WAAW,EAAE;IAChB,MAAM,IAAIK,KAAK,gDAAyCF,MAAM,EAAG;EACnE;EACA,MAAMG,gBAAgB,GAAG5B,aAAa,CAAC0B,KAAK,CAAC;EAC7C,MAAMG,qBAAqB,GAAG,MAAMP,WAAW,CAACQ,QAAQ,CAACF,gBAAgB,CAAC;EAC1E,OAAO3B,QAAQ,CAAC4B,qBAAqB,CAAC;AACxC;;AAKA,OAAO,eAAeE,UAAU,CAC9BN,MAA0B,EAC1BC,KAAa,EACbM,IAAY,EACK;EACjB,MAAMV,WAAW,GAAGjB,2BAA2B,CAACoB,MAAM,CAAC;EACvD,IAAI,CAACH,WAAW,EAAE;IAChB,MAAM,IAAIK,KAAK,gDAAyCF,MAAM,EAAG;EACnE;EACA,MAAMG,gBAAgB,GAAG5B,aAAa,CAAC0B,KAAK,CAAC;EAC7C,MAAMG,qBAAqB,GAAG,MAAMP,WAAW,CAACS,UAAU,CAACH,gBAAgB,EAAEI,IAAI,CAAC;EAClF,OAAO/B,QAAQ,CAAC4B,qBAAqB,CAAC;AACxC;;AAKA,OAAO,SAASI,OAAO,CAACR,MAA0B,EAAEC,KAAa,EAAEM,IAAY,EAAU;EACvF,IAAI,EAAEP,MAAM,IAAIpB,2BAA2B,CAAC,EAAE;IAC5C,MAAM,IAAIsB,KAAK,uCAAgCF,MAAM,EAAG;EAC1D;EAEA,OAAOpB,2BAA2B,CAACoB,MAAM,CAAC,CAACQ,OAAO,CAACP,KAAK,EAAEM,IAAI,CAAC;AACjE"}
|
|
@@ -7,10 +7,14 @@ import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageH
|
|
|
7
7
|
import { osopen, oswrite, osclose } from '../utils/file-utils';
|
|
8
8
|
import { getBitWidth, serializeThrift } from '../utils/read-utils';
|
|
9
9
|
import Int64 from 'node-int64';
|
|
10
|
+
|
|
10
11
|
const PARQUET_MAGIC = 'PAR1';
|
|
12
|
+
|
|
11
13
|
const PARQUET_VERSION = 1;
|
|
14
|
+
|
|
12
15
|
const PARQUET_DEFAULT_PAGE_SIZE = 8192;
|
|
13
16
|
const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
17
|
+
|
|
14
18
|
const PARQUET_RDLVL_TYPE = 'INT32';
|
|
15
19
|
const PARQUET_RDLVL_ENCODING = 'RLE';
|
|
16
20
|
export class ParquetWriter {
|
|
@@ -23,33 +27,25 @@ export class ParquetWriter {
|
|
|
23
27
|
if (!opts) {
|
|
24
28
|
opts = {};
|
|
25
29
|
}
|
|
26
|
-
|
|
27
30
|
const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
28
31
|
return new ParquetWriter(schema, envelopeWriter, opts);
|
|
29
32
|
}
|
|
30
|
-
|
|
31
33
|
constructor(schema, envelopeWriter, opts) {
|
|
32
34
|
_defineProperty(this, "schema", void 0);
|
|
33
|
-
|
|
34
35
|
_defineProperty(this, "envelopeWriter", void 0);
|
|
35
|
-
|
|
36
36
|
_defineProperty(this, "rowBuffer", void 0);
|
|
37
|
-
|
|
38
37
|
_defineProperty(this, "rowGroupSize", void 0);
|
|
39
|
-
|
|
40
38
|
_defineProperty(this, "closed", void 0);
|
|
41
|
-
|
|
42
39
|
_defineProperty(this, "userMetadata", void 0);
|
|
43
|
-
|
|
44
40
|
this.schema = schema;
|
|
45
41
|
this.envelopeWriter = envelopeWriter;
|
|
46
42
|
this.rowBuffer = {};
|
|
47
43
|
this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
|
|
48
44
|
this.closed = false;
|
|
49
45
|
this.userMetadata = {};
|
|
46
|
+
|
|
50
47
|
this.writeHeader();
|
|
51
48
|
}
|
|
52
|
-
|
|
53
49
|
async writeHeader() {
|
|
54
50
|
try {
|
|
55
51
|
await this.envelopeWriter.writeHeader();
|
|
@@ -63,9 +59,7 @@ export class ParquetWriter {
|
|
|
63
59
|
if (this.closed) {
|
|
64
60
|
throw new Error('writer was closed');
|
|
65
61
|
}
|
|
66
|
-
|
|
67
62
|
Shred.shredRecord(this.schema, row, this.rowBuffer);
|
|
68
|
-
|
|
69
63
|
if (this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
70
64
|
this.rowBuffer = {};
|
|
71
65
|
}
|
|
@@ -75,13 +69,10 @@ export class ParquetWriter {
|
|
|
75
69
|
if (this.closed) {
|
|
76
70
|
throw new Error('writer was closed');
|
|
77
71
|
}
|
|
78
|
-
|
|
79
72
|
this.closed = true;
|
|
80
|
-
|
|
81
73
|
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
82
74
|
this.rowBuffer = {};
|
|
83
75
|
}
|
|
84
|
-
|
|
85
76
|
await this.envelopeWriter.writeFooter(this.userMetadata);
|
|
86
77
|
await this.envelopeWriter.close();
|
|
87
78
|
|
|
@@ -101,32 +92,23 @@ export class ParquetWriter {
|
|
|
101
92
|
setPageSize(cnt) {
|
|
102
93
|
this.envelopeWriter.setPageSize(cnt);
|
|
103
94
|
}
|
|
104
|
-
|
|
105
95
|
}
|
|
96
|
+
|
|
106
97
|
export class ParquetEnvelopeWriter {
|
|
107
98
|
static async openStream(schema, outputStream, opts) {
|
|
108
99
|
const writeFn = oswrite.bind(undefined, outputStream);
|
|
109
100
|
const closeFn = osclose.bind(undefined, outputStream);
|
|
110
101
|
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
|
|
111
102
|
}
|
|
112
|
-
|
|
113
103
|
constructor(schema, writeFn, closeFn, fileOffset, opts) {
|
|
114
104
|
_defineProperty(this, "schema", void 0);
|
|
115
|
-
|
|
116
105
|
_defineProperty(this, "write", void 0);
|
|
117
|
-
|
|
118
106
|
_defineProperty(this, "close", void 0);
|
|
119
|
-
|
|
120
107
|
_defineProperty(this, "offset", void 0);
|
|
121
|
-
|
|
122
108
|
_defineProperty(this, "rowCount", void 0);
|
|
123
|
-
|
|
124
109
|
_defineProperty(this, "rowGroups", void 0);
|
|
125
|
-
|
|
126
110
|
_defineProperty(this, "pageSize", void 0);
|
|
127
|
-
|
|
128
111
|
_defineProperty(this, "useDataPageV2", void 0);
|
|
129
|
-
|
|
130
112
|
this.schema = schema;
|
|
131
113
|
this.write = writeFn;
|
|
132
114
|
this.close = closeFn;
|
|
@@ -136,7 +118,6 @@ export class ParquetEnvelopeWriter {
|
|
|
136
118
|
this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
|
|
137
119
|
this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
|
|
138
120
|
}
|
|
139
|
-
|
|
140
121
|
writeSection(buf) {
|
|
141
122
|
this.offset += buf.length;
|
|
142
123
|
return this.write(buf);
|
|
@@ -161,29 +142,26 @@ export class ParquetEnvelopeWriter {
|
|
|
161
142
|
if (!userMetadata) {
|
|
162
143
|
userMetadata = {};
|
|
163
144
|
}
|
|
164
|
-
|
|
165
145
|
return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
|
|
166
146
|
}
|
|
167
147
|
|
|
168
148
|
setPageSize(cnt) {
|
|
169
149
|
this.pageSize = cnt;
|
|
170
150
|
}
|
|
171
|
-
|
|
172
151
|
}
|
|
152
|
+
|
|
173
153
|
export class ParquetTransformer extends Transform {
|
|
174
|
-
constructor(schema
|
|
154
|
+
constructor(schema) {
|
|
155
|
+
let opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
175
156
|
super({
|
|
176
157
|
objectMode: true
|
|
177
158
|
});
|
|
178
|
-
|
|
179
159
|
_defineProperty(this, "writer", void 0);
|
|
180
|
-
|
|
181
160
|
const writeProxy = function (t) {
|
|
182
161
|
return async function (b) {
|
|
183
162
|
t.push(b);
|
|
184
163
|
};
|
|
185
164
|
}(this);
|
|
186
|
-
|
|
187
165
|
this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts), opts);
|
|
188
166
|
}
|
|
189
167
|
|
|
@@ -191,7 +169,6 @@ export class ParquetTransformer extends Transform {
|
|
|
191
169
|
if (row) {
|
|
192
170
|
return this.writer.appendRow(row).then(callback);
|
|
193
171
|
}
|
|
194
|
-
|
|
195
172
|
callback();
|
|
196
173
|
return Promise.resolve();
|
|
197
174
|
}
|
|
@@ -199,20 +176,17 @@ export class ParquetTransformer extends Transform {
|
|
|
199
176
|
async _flush(callback) {
|
|
200
177
|
await this.writer.close(callback);
|
|
201
178
|
}
|
|
202
|
-
|
|
203
179
|
}
|
|
204
180
|
|
|
205
181
|
function encodeValues(type, encoding, values, opts) {
|
|
206
182
|
if (!(encoding in PARQUET_CODECS)) {
|
|
207
183
|
throw new Error("invalid encoding: ".concat(encoding));
|
|
208
184
|
}
|
|
209
|
-
|
|
210
185
|
return PARQUET_CODECS[encoding].encodeValues(type, values, opts);
|
|
211
186
|
}
|
|
212
187
|
|
|
213
188
|
async function encodeDataPage(column, data) {
|
|
214
189
|
let rLevelsBuf = Buffer.alloc(0);
|
|
215
|
-
|
|
216
190
|
if (column.rLevelMax > 0) {
|
|
217
191
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
218
192
|
bitWidth: getBitWidth(column.rLevelMax)
|
|
@@ -220,7 +194,6 @@ async function encodeDataPage(column, data) {
|
|
|
220
194
|
}
|
|
221
195
|
|
|
222
196
|
let dLevelsBuf = Buffer.alloc(0);
|
|
223
|
-
|
|
224
197
|
if (column.dLevelMax > 0) {
|
|
225
198
|
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
226
199
|
bitWidth: getBitWidth(column.dLevelMax)
|
|
@@ -232,7 +205,9 @@ async function encodeDataPage(column, data) {
|
|
|
232
205
|
bitWidth: column.typeLength
|
|
233
206
|
});
|
|
234
207
|
const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
208
|
+
|
|
235
209
|
const compressedBuf = await Compression.deflate(column.compression, dataBuf);
|
|
210
|
+
|
|
236
211
|
const header = new PageHeader({
|
|
237
212
|
type: PageType.DATA_PAGE,
|
|
238
213
|
data_page_header: new DataPageHeader({
|
|
@@ -241,9 +216,11 @@ async function encodeDataPage(column, data) {
|
|
|
241
216
|
definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING],
|
|
242
217
|
repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING]
|
|
243
218
|
}),
|
|
219
|
+
|
|
244
220
|
uncompressed_page_size: dataBuf.length,
|
|
245
221
|
compressed_page_size: compressedBuf.length
|
|
246
222
|
});
|
|
223
|
+
|
|
247
224
|
const headerBuf = serializeThrift(header);
|
|
248
225
|
const page = Buffer.concat([headerBuf, compressedBuf]);
|
|
249
226
|
return {
|
|
@@ -258,18 +235,17 @@ async function encodeDataPageV2(column, data, rowCount) {
|
|
|
258
235
|
typeLength: column.typeLength,
|
|
259
236
|
bitWidth: column.typeLength
|
|
260
237
|
});
|
|
238
|
+
|
|
261
239
|
const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
|
|
262
|
-
let rLevelsBuf = Buffer.alloc(0);
|
|
263
240
|
|
|
241
|
+
let rLevelsBuf = Buffer.alloc(0);
|
|
264
242
|
if (column.rLevelMax > 0) {
|
|
265
243
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
266
244
|
bitWidth: getBitWidth(column.rLevelMax),
|
|
267
245
|
disableEnvelope: true
|
|
268
246
|
});
|
|
269
247
|
}
|
|
270
|
-
|
|
271
248
|
let dLevelsBuf = Buffer.alloc(0);
|
|
272
|
-
|
|
273
249
|
if (column.dLevelMax > 0) {
|
|
274
250
|
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
275
251
|
bitWidth: getBitWidth(column.dLevelMax),
|
|
@@ -291,6 +267,7 @@ async function encodeDataPageV2(column, data, rowCount) {
|
|
|
291
267
|
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
292
268
|
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
293
269
|
});
|
|
270
|
+
|
|
294
271
|
const headerBuf = serializeThrift(header);
|
|
295
272
|
const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
296
273
|
return {
|
|
@@ -312,6 +289,7 @@ async function encodeColumnChunk(column, buffer, offset, opts) {
|
|
|
312
289
|
total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
|
|
313
290
|
total_compressed_size += result.header.compressed_page_size + result.headerSize;
|
|
314
291
|
}
|
|
292
|
+
|
|
315
293
|
const metadata = new ColumnMetaData({
|
|
316
294
|
path_in_schema: column.path,
|
|
317
295
|
num_values: data.count,
|
|
@@ -322,8 +300,10 @@ async function encodeColumnChunk(column, buffer, offset, opts) {
|
|
|
322
300
|
type: Type[column.primitiveType],
|
|
323
301
|
codec: CompressionCodec[column.compression]
|
|
324
302
|
});
|
|
303
|
+
|
|
325
304
|
metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);
|
|
326
305
|
metadata.encodings.push(Encoding[column.encoding]);
|
|
306
|
+
|
|
327
307
|
const metadataOffset = baseOffset + pageBuf.length;
|
|
328
308
|
const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);
|
|
329
309
|
return {
|
|
@@ -340,7 +320,6 @@ async function encodeRowGroup(schema, data, opts) {
|
|
|
340
320
|
total_byte_size: 0
|
|
341
321
|
});
|
|
342
322
|
let body = Buffer.alloc(0);
|
|
343
|
-
|
|
344
323
|
for (const field of schema.fieldList) {
|
|
345
324
|
if (field.isNested) {
|
|
346
325
|
continue;
|
|
@@ -355,7 +334,6 @@ async function encodeRowGroup(schema, data, opts) {
|
|
|
355
334
|
metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
356
335
|
body = Buffer.concat([body, cchunkData.body]);
|
|
357
336
|
}
|
|
358
|
-
|
|
359
337
|
return {
|
|
360
338
|
body,
|
|
361
339
|
metadata
|
|
@@ -371,17 +349,14 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
371
349
|
schema: [],
|
|
372
350
|
key_value_metadata: []
|
|
373
351
|
});
|
|
374
|
-
|
|
375
352
|
for (const key in userMetadata) {
|
|
376
|
-
var _metadata$key_value_m, _metadata$key_value_m2;
|
|
377
|
-
|
|
353
|
+
var _metadata$key_value_m, _metadata$key_value_m2, _metadata$key_value_m3;
|
|
378
354
|
const kv = new KeyValue({
|
|
379
355
|
key,
|
|
380
356
|
value: userMetadata[key]
|
|
381
357
|
});
|
|
382
|
-
(_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$
|
|
358
|
+
(_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = (_metadata$key_value_m3 = _metadata$key_value_m).push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m3, kv);
|
|
383
359
|
}
|
|
384
|
-
|
|
385
360
|
{
|
|
386
361
|
const schemaRoot = new SchemaElement({
|
|
387
362
|
name: 'root',
|
|
@@ -389,28 +364,23 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
389
364
|
});
|
|
390
365
|
metadata.schema.push(schemaRoot);
|
|
391
366
|
}
|
|
392
|
-
|
|
393
367
|
for (const field of schema.fieldList) {
|
|
394
368
|
const relt = FieldRepetitionType[field.repetitionType];
|
|
395
369
|
const schemaElem = new SchemaElement({
|
|
396
370
|
name: field.name,
|
|
397
371
|
repetition_type: relt
|
|
398
372
|
});
|
|
399
|
-
|
|
400
373
|
if (field.isNested) {
|
|
401
374
|
schemaElem.num_children = field.fieldCount;
|
|
402
375
|
} else {
|
|
403
376
|
schemaElem.type = Type[field.primitiveType];
|
|
404
377
|
}
|
|
405
|
-
|
|
406
378
|
if (field.originalType) {
|
|
407
379
|
schemaElem.converted_type = ConvertedType[field.originalType];
|
|
408
380
|
}
|
|
409
|
-
|
|
410
381
|
schemaElem.type_length = field.typeLength;
|
|
411
382
|
metadata.schema.push(schemaElem);
|
|
412
383
|
}
|
|
413
|
-
|
|
414
384
|
const metadataEncoded = serializeThrift(metadata);
|
|
415
385
|
const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
|
|
416
386
|
metadataEncoded.copy(footerEncoded);
|