@loaders.gl/parquet 3.1.3 → 4.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +2 -2
- package/dist/bundle.js.map +1 -0
- package/dist/constants.js +6 -18
- package/dist/constants.js.map +1 -0
- package/dist/dist.min.js +17 -8
- package/dist/dist.min.js.map +3 -3
- package/dist/index.js +14 -29
- package/dist/index.js.map +1 -0
- package/dist/lib/convert-schema.js +63 -62
- package/dist/lib/convert-schema.js.map +1 -0
- package/dist/lib/parse-parquet.js +25 -25
- package/dist/lib/parse-parquet.js.map +1 -0
- package/dist/lib/read-array-buffer.js +8 -28
- package/dist/lib/read-array-buffer.js.map +1 -0
- package/dist/parquet-loader.js +19 -24
- package/dist/parquet-loader.js.map +1 -0
- package/dist/parquet-worker.js +18 -9
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.js +14 -17
- package/dist/parquet-writer.js.map +1 -0
- package/dist/{es5/parquetjs → parquetjs}/LICENSE +0 -0
- package/dist/parquetjs/codecs/declare.js +2 -2
- package/dist/{es5/parquetjs → parquetjs}/codecs/declare.js.map +0 -0
- package/dist/parquetjs/codecs/dictionary.js +10 -12
- package/dist/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/parquetjs/codecs/index.js +22 -50
- package/dist/parquetjs/codecs/index.js.map +1 -0
- package/dist/parquetjs/codecs/plain.js +232 -173
- package/dist/parquetjs/codecs/plain.js.map +1 -0
- package/dist/parquetjs/codecs/rle.js +140 -134
- package/dist/parquetjs/codecs/rle.js.map +1 -0
- package/dist/parquetjs/compression.js +48 -154
- package/dist/parquetjs/compression.js.map +1 -0
- package/dist/parquetjs/encoder/writer.js +383 -440
- package/dist/parquetjs/encoder/writer.js.map +1 -0
- package/dist/parquetjs/file.js +66 -85
- package/dist/parquetjs/file.js.map +1 -0
- package/dist/{es5/parquetjs → parquetjs}/modules.d.ts +0 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +7 -14
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/BsonType.js +37 -56
- package/dist/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +215 -205
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +212 -207
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +422 -391
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +90 -99
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +12 -19
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +26 -33
- package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +162 -162
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +234 -224
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DateType.js +37 -56
- package/dist/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DecimalType.js +91 -101
- package/dist/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -118
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Encoding.js +12 -19
- package/dist/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/EnumType.js +37 -56
- package/dist/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -14
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +264 -250
- package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +37 -56
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/IntType.js +91 -101
- package/dist/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/JsonType.js +37 -56
- package/dist/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.js +89 -98
- package/dist/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ListType.js +37 -56
- package/dist/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.js +450 -363
- package/dist/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MapType.js +37 -56
- package/dist/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +37 -56
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +37 -56
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/NullType.js +37 -56
- package/dist/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +80 -92
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +115 -123
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.js +231 -214
- package/dist/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.js +124 -137
- package/dist/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageType.js +8 -15
- package/dist/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.js +172 -176
- package/dist/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +268 -237
- package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +115 -123
- package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Statistics.js +179 -172
- package/dist/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/StringType.js +37 -56
- package/dist/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimeType.js +92 -102
- package/dist/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +120 -121
- package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.js +92 -102
- package/dist/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Type.js +12 -19
- package/dist/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +37 -56
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/UUIDType.js +37 -56
- package/dist/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/index.js +44 -61
- package/dist/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/parquetjs/parser/decoders.js +283 -301
- package/dist/{es5/parquetjs → parquetjs}/parser/decoders.js.map +1 -1
- package/dist/parquetjs/parser/parquet-cursor.js +85 -69
- package/dist/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/parquetjs/parser/parquet-envelope-reader.js +146 -127
- package/dist/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/parquetjs/parser/parquet-reader.js +113 -127
- package/dist/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/parquetjs/schema/declare.js +12 -9
- package/dist/parquetjs/schema/declare.js.map +1 -0
- package/dist/parquetjs/schema/schema.js +162 -148
- package/dist/{es5/parquetjs → parquetjs}/schema/schema.js.map +1 -1
- package/dist/parquetjs/schema/shred.js +151 -214
- package/dist/parquetjs/schema/shred.js.map +1 -0
- package/dist/parquetjs/schema/types.js +415 -357
- package/dist/parquetjs/schema/types.js.map +1 -0
- package/dist/parquetjs/utils/buffer-utils.js +10 -20
- package/dist/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/parquetjs/utils/file-utils.js +28 -40
- package/dist/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/parquetjs/utils/read-utils.js +95 -99
- package/dist/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/workers/parquet-worker.js +4 -5
- package/dist/workers/parquet-worker.js.map +1 -0
- package/package.json +8 -8
- package/dist/es5/bundle.js +0 -7
- package/dist/es5/bundle.js.map +0 -1
- package/dist/es5/constants.js +0 -17
- package/dist/es5/constants.js.map +0 -1
- package/dist/es5/index.js +0 -82
- package/dist/es5/index.js.map +0 -1
- package/dist/es5/lib/convert-schema.js +0 -82
- package/dist/es5/lib/convert-schema.js.map +0 -1
- package/dist/es5/lib/parse-parquet.js +0 -173
- package/dist/es5/lib/parse-parquet.js.map +0 -1
- package/dist/es5/lib/read-array-buffer.js +0 -53
- package/dist/es5/lib/read-array-buffer.js.map +0 -1
- package/dist/es5/parquet-loader.js +0 -30
- package/dist/es5/parquet-loader.js.map +0 -1
- package/dist/es5/parquet-writer.js +0 -25
- package/dist/es5/parquet-writer.js.map +0 -1
- package/dist/es5/parquetjs/codecs/declare.js +0 -2
- package/dist/es5/parquetjs/codecs/dictionary.js +0 -30
- package/dist/es5/parquetjs/codecs/dictionary.js.map +0 -1
- package/dist/es5/parquetjs/codecs/index.js +0 -56
- package/dist/es5/parquetjs/codecs/index.js.map +0 -1
- package/dist/es5/parquetjs/codecs/plain.js +0 -287
- package/dist/es5/parquetjs/codecs/plain.js.map +0 -1
- package/dist/es5/parquetjs/codecs/rle.js +0 -174
- package/dist/es5/parquetjs/codecs/rle.js.map +0 -1
- package/dist/es5/parquetjs/compression.js +0 -167
- package/dist/es5/parquetjs/compression.js.map +0 -1
- package/dist/es5/parquetjs/encoder/writer.js +0 -875
- package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
- package/dist/es5/parquetjs/file.js +0 -103
- package/dist/es5/parquetjs/file.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +0 -241
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +0 -245
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +0 -449
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +0 -124
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +0 -34
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +0 -191
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -258
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +0 -122
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -143
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +0 -20
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +0 -298
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +0 -122
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +0 -120
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +0 -508
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +0 -114
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +0 -145
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +0 -258
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +0 -155
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +0 -16
- package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +0 -206
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +0 -290
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +0 -145
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +0 -207
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +0 -124
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +0 -156
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +0 -124
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +0 -20
- package/dist/es5/parquetjs/parquet-thrift/Type.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +0 -565
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +0 -1
- package/dist/es5/parquetjs/parser/decoders.js +0 -489
- package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -215
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -413
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +0 -364
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +0 -1
- package/dist/es5/parquetjs/schema/declare.js +0 -25
- package/dist/es5/parquetjs/schema/declare.js.map +0 -1
- package/dist/es5/parquetjs/schema/schema.js +0 -203
- package/dist/es5/parquetjs/schema/shred.js +0 -223
- package/dist/es5/parquetjs/schema/shred.js.map +0 -1
- package/dist/es5/parquetjs/schema/types.js +0 -492
- package/dist/es5/parquetjs/schema/types.js.map +0 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +0 -21
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/es5/parquetjs/utils/file-utils.js +0 -55
- package/dist/es5/parquetjs/utils/file-utils.js.map +0 -1
- package/dist/es5/parquetjs/utils/read-utils.js +0 -159
- package/dist/es5/parquetjs/utils/read-utils.js.map +0 -1
- package/dist/es5/workers/parquet-worker.js +0 -8
- package/dist/es5/workers/parquet-worker.js.map +0 -1
- package/dist/esm/bundle.js +0 -5
- package/dist/esm/bundle.js.map +0 -1
- package/dist/esm/constants.js +0 -6
- package/dist/esm/constants.js.map +0 -1
- package/dist/esm/index.js +0 -15
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/lib/convert-schema.js +0 -71
- package/dist/esm/lib/convert-schema.js.map +0 -1
- package/dist/esm/lib/parse-parquet.js +0 -28
- package/dist/esm/lib/parse-parquet.js.map +0 -1
- package/dist/esm/lib/read-array-buffer.js +0 -9
- package/dist/esm/lib/read-array-buffer.js.map +0 -1
- package/dist/esm/parquet-loader.js +0 -22
- package/dist/esm/parquet-loader.js.map +0 -1
- package/dist/esm/parquet-writer.js +0 -18
- package/dist/esm/parquet-writer.js.map +0 -1
- package/dist/esm/parquetjs/LICENSE +0 -20
- package/dist/esm/parquetjs/codecs/declare.js +0 -2
- package/dist/esm/parquetjs/codecs/declare.js.map +0 -1
- package/dist/esm/parquetjs/codecs/dictionary.js +0 -12
- package/dist/esm/parquetjs/codecs/dictionary.js.map +0 -1
- package/dist/esm/parquetjs/codecs/index.js +0 -23
- package/dist/esm/parquetjs/codecs/index.js.map +0 -1
- package/dist/esm/parquetjs/codecs/plain.js +0 -270
- package/dist/esm/parquetjs/codecs/plain.js.map +0 -1
- package/dist/esm/parquetjs/codecs/rle.js +0 -151
- package/dist/esm/parquetjs/codecs/rle.js.map +0 -1
- package/dist/esm/parquetjs/compression.js +0 -62
- package/dist/esm/parquetjs/compression.js.map +0 -1
- package/dist/esm/parquetjs/encoder/writer.js +0 -421
- package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
- package/dist/esm/parquetjs/file.js +0 -80
- package/dist/esm/parquetjs/file.js.map +0 -1
- package/dist/esm/parquetjs/modules.d.ts +0 -21
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +0 -8
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +0 -217
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +0 -218
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +0 -429
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +0 -95
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +0 -13
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +0 -27
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +0 -166
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -236
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DateType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +0 -95
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -117
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js +0 -13
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -8
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +0 -270
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/IntType.js +0 -95
- package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +0 -93
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ListType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +0 -467
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/MapType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/NullType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +0 -85
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +0 -119
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +0 -233
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +0 -128
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageType.js +0 -9
- package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +0 -178
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +0 -270
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +0 -119
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js +0 -183
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/StringType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js +0 -96
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +0 -126
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +0 -96
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/Type.js +0 -13
- package/dist/esm/parquetjs/parquet-thrift/Type.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/index.js +0 -44
- package/dist/esm/parquetjs/parquet-thrift/index.js.map +0 -1
- package/dist/esm/parquetjs/parser/decoders.js +0 -300
- package/dist/esm/parquetjs/parser/decoders.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -90
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -155
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +0 -120
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +0 -1
- package/dist/esm/parquetjs/schema/declare.js +0 -13
- package/dist/esm/parquetjs/schema/declare.js.map +0 -1
- package/dist/esm/parquetjs/schema/schema.js +0 -176
- package/dist/esm/parquetjs/schema/schema.js.map +0 -1
- package/dist/esm/parquetjs/schema/shred.js +0 -162
- package/dist/esm/parquetjs/schema/shred.js.map +0 -1
- package/dist/esm/parquetjs/schema/types.js +0 -476
- package/dist/esm/parquetjs/schema/types.js.map +0 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +0 -12
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/esm/parquetjs/utils/file-utils.js +0 -34
- package/dist/esm/parquetjs/utils/file-utils.js.map +0 -1
- package/dist/esm/parquetjs/utils/read-utils.js +0 -105
- package/dist/esm/parquetjs/utils/read-utils.js.map +0 -1
- package/dist/esm/workers/parquet-worker.js +0 -4
- package/dist/esm/workers/parquet-worker.js.map +0 -1
|
@@ -1,300 +0,0 @@
|
|
|
1
|
-
import { PARQUET_CODECS } from '../codecs';
|
|
2
|
-
import { ConvertedType, Encoding, FieldRepetitionType, PageType, Type } from '../parquet-thrift';
|
|
3
|
-
import { decompress } from '../compression';
|
|
4
|
-
import { PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING } from '../../constants';
|
|
5
|
-
import { decodePageHeader, getThriftEnum, getBitWidth } from '../utils/read-utils';
|
|
6
|
-
export async function decodeDataPages(buffer, options) {
|
|
7
|
-
const cursor = {
|
|
8
|
-
buffer,
|
|
9
|
-
offset: 0,
|
|
10
|
-
size: buffer.length
|
|
11
|
-
};
|
|
12
|
-
const data = {
|
|
13
|
-
rlevels: [],
|
|
14
|
-
dlevels: [],
|
|
15
|
-
values: [],
|
|
16
|
-
pageHeaders: [],
|
|
17
|
-
count: 0
|
|
18
|
-
};
|
|
19
|
-
let dictionary = options.dictionary || [];
|
|
20
|
-
|
|
21
|
-
while (cursor.offset < cursor.size && (!options.numValues || data.dlevels.length < Number(options.numValues))) {
|
|
22
|
-
const page = await decodePage(cursor, options);
|
|
23
|
-
|
|
24
|
-
if (page.dictionary) {
|
|
25
|
-
dictionary = page.dictionary;
|
|
26
|
-
continue;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
if (dictionary.length) {
|
|
30
|
-
page.values = page.values.map(value => dictionary[value]);
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
for (let index = 0; index < page.rlevels.length; index++) {
|
|
34
|
-
data.rlevels.push(page.rlevels[index]);
|
|
35
|
-
data.dlevels.push(page.dlevels[index]);
|
|
36
|
-
const value = page.values[index];
|
|
37
|
-
|
|
38
|
-
if (value !== undefined) {
|
|
39
|
-
data.values.push(value);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
data.count += page.count;
|
|
44
|
-
data.pageHeaders.push(page.pageHeader);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
return data;
|
|
48
|
-
}
|
|
49
|
-
export async function decodePage(cursor, options) {
|
|
50
|
-
let page;
|
|
51
|
-
const {
|
|
52
|
-
pageHeader,
|
|
53
|
-
length
|
|
54
|
-
} = decodePageHeader(cursor.buffer, cursor.offset);
|
|
55
|
-
cursor.offset += length;
|
|
56
|
-
const pageType = getThriftEnum(PageType, pageHeader.type);
|
|
57
|
-
|
|
58
|
-
switch (pageType) {
|
|
59
|
-
case 'DATA_PAGE':
|
|
60
|
-
page = await decodeDataPage(cursor, pageHeader, options);
|
|
61
|
-
break;
|
|
62
|
-
|
|
63
|
-
case 'DATA_PAGE_V2':
|
|
64
|
-
page = await decodeDataPageV2(cursor, pageHeader, options);
|
|
65
|
-
break;
|
|
66
|
-
|
|
67
|
-
case 'DICTIONARY_PAGE':
|
|
68
|
-
page = {
|
|
69
|
-
dictionary: await decodeDictionaryPage(cursor, pageHeader, options),
|
|
70
|
-
pageHeader
|
|
71
|
-
};
|
|
72
|
-
break;
|
|
73
|
-
|
|
74
|
-
default:
|
|
75
|
-
throw new Error("invalid page type: ".concat(pageType));
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
return page;
|
|
79
|
-
}
|
|
80
|
-
export function decodeSchema(schemaElements, offset, len) {
|
|
81
|
-
const schema = {};
|
|
82
|
-
let next = offset;
|
|
83
|
-
|
|
84
|
-
for (let i = 0; i < len; i++) {
|
|
85
|
-
const schemaElement = schemaElements[next];
|
|
86
|
-
const repetitionType = next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
|
|
87
|
-
let optional = false;
|
|
88
|
-
let repeated = false;
|
|
89
|
-
|
|
90
|
-
switch (repetitionType) {
|
|
91
|
-
case 'REQUIRED':
|
|
92
|
-
break;
|
|
93
|
-
|
|
94
|
-
case 'OPTIONAL':
|
|
95
|
-
optional = true;
|
|
96
|
-
break;
|
|
97
|
-
|
|
98
|
-
case 'REPEATED':
|
|
99
|
-
repeated = true;
|
|
100
|
-
break;
|
|
101
|
-
|
|
102
|
-
default:
|
|
103
|
-
throw new Error('parquet: unknown repetition type');
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
if (schemaElement.num_children > 0) {
|
|
107
|
-
const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children);
|
|
108
|
-
next = res.next;
|
|
109
|
-
schema[schemaElement.name] = {
|
|
110
|
-
optional,
|
|
111
|
-
repeated,
|
|
112
|
-
fields: res.schema
|
|
113
|
-
};
|
|
114
|
-
} else {
|
|
115
|
-
const type = getThriftEnum(Type, schemaElement.type);
|
|
116
|
-
let logicalType = type;
|
|
117
|
-
|
|
118
|
-
if (schemaElement.converted_type) {
|
|
119
|
-
logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
switch (logicalType) {
|
|
123
|
-
case 'DECIMAL':
|
|
124
|
-
logicalType = "".concat(logicalType, "_").concat(type);
|
|
125
|
-
break;
|
|
126
|
-
|
|
127
|
-
default:
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
schema[schemaElement.name] = {
|
|
131
|
-
type: logicalType,
|
|
132
|
-
typeLength: schemaElement.type_length,
|
|
133
|
-
presision: schemaElement.precision,
|
|
134
|
-
scale: schemaElement.scale,
|
|
135
|
-
optional,
|
|
136
|
-
repeated
|
|
137
|
-
};
|
|
138
|
-
next++;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return {
|
|
143
|
-
schema,
|
|
144
|
-
offset,
|
|
145
|
-
next
|
|
146
|
-
};
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function decodeValues(type, encoding, cursor, count, opts) {
|
|
150
|
-
if (!(encoding in PARQUET_CODECS)) {
|
|
151
|
-
throw new Error("invalid encoding: ".concat(encoding));
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
async function decodeDataPage(cursor, header, options) {
|
|
158
|
-
var _header$data_page_hea, _header$data_page_hea2, _header$data_page_hea3, _header$data_page_hea4;
|
|
159
|
-
|
|
160
|
-
const cursorEnd = cursor.offset + header.compressed_page_size;
|
|
161
|
-
const valueCount = (_header$data_page_hea = header.data_page_header) === null || _header$data_page_hea === void 0 ? void 0 : _header$data_page_hea.num_values;
|
|
162
|
-
let dataCursor = cursor;
|
|
163
|
-
|
|
164
|
-
if (options.compression !== 'UNCOMPRESSED') {
|
|
165
|
-
const valuesBuf = await decompress(options.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
166
|
-
dataCursor = {
|
|
167
|
-
buffer: valuesBuf,
|
|
168
|
-
offset: 0,
|
|
169
|
-
size: valuesBuf.length
|
|
170
|
-
};
|
|
171
|
-
cursor.offset = cursorEnd;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const rLevelEncoding = getThriftEnum(Encoding, (_header$data_page_hea2 = header.data_page_header) === null || _header$data_page_hea2 === void 0 ? void 0 : _header$data_page_hea2.repetition_level_encoding);
|
|
175
|
-
let rLevels = new Array(valueCount);
|
|
176
|
-
|
|
177
|
-
if (options.column.rLevelMax > 0) {
|
|
178
|
-
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount, {
|
|
179
|
-
bitWidth: getBitWidth(options.column.rLevelMax),
|
|
180
|
-
disableEnvelope: false
|
|
181
|
-
});
|
|
182
|
-
} else {
|
|
183
|
-
rLevels.fill(0);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
const dLevelEncoding = getThriftEnum(Encoding, (_header$data_page_hea3 = header.data_page_header) === null || _header$data_page_hea3 === void 0 ? void 0 : _header$data_page_hea3.definition_level_encoding);
|
|
187
|
-
let dLevels = new Array(valueCount);
|
|
188
|
-
|
|
189
|
-
if (options.column.dLevelMax > 0) {
|
|
190
|
-
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount, {
|
|
191
|
-
bitWidth: getBitWidth(options.column.dLevelMax),
|
|
192
|
-
disableEnvelope: false
|
|
193
|
-
});
|
|
194
|
-
} else {
|
|
195
|
-
dLevels.fill(0);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
let valueCountNonNull = 0;
|
|
199
|
-
|
|
200
|
-
for (const dlvl of dLevels) {
|
|
201
|
-
if (dlvl === options.column.dLevelMax) {
|
|
202
|
-
valueCountNonNull++;
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
const valueEncoding = getThriftEnum(Encoding, (_header$data_page_hea4 = header.data_page_header) === null || _header$data_page_hea4 === void 0 ? void 0 : _header$data_page_hea4.encoding);
|
|
207
|
-
const decodeOptions = {
|
|
208
|
-
typeLength: options.column.typeLength,
|
|
209
|
-
bitWidth: options.column.typeLength
|
|
210
|
-
};
|
|
211
|
-
const values = decodeValues(options.column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, decodeOptions);
|
|
212
|
-
return {
|
|
213
|
-
dlevels: dLevels,
|
|
214
|
-
rlevels: rLevels,
|
|
215
|
-
values,
|
|
216
|
-
count: valueCount,
|
|
217
|
-
pageHeader: header
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
async function decodeDataPageV2(cursor, header, opts) {
|
|
222
|
-
var _header$data_page_hea5, _header$data_page_hea6, _header$data_page_hea7, _header$data_page_hea8;
|
|
223
|
-
|
|
224
|
-
const cursorEnd = cursor.offset + header.compressed_page_size;
|
|
225
|
-
const valueCount = (_header$data_page_hea5 = header.data_page_header_v2) === null || _header$data_page_hea5 === void 0 ? void 0 : _header$data_page_hea5.num_values;
|
|
226
|
-
const valueCountNonNull = valueCount - ((_header$data_page_hea6 = header.data_page_header_v2) === null || _header$data_page_hea6 === void 0 ? void 0 : _header$data_page_hea6.num_nulls);
|
|
227
|
-
const valueEncoding = getThriftEnum(Encoding, (_header$data_page_hea7 = header.data_page_header_v2) === null || _header$data_page_hea7 === void 0 ? void 0 : _header$data_page_hea7.encoding);
|
|
228
|
-
let rLevels = new Array(valueCount);
|
|
229
|
-
|
|
230
|
-
if (opts.column.rLevelMax > 0) {
|
|
231
|
-
rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
232
|
-
bitWidth: getBitWidth(opts.column.rLevelMax),
|
|
233
|
-
disableEnvelope: true
|
|
234
|
-
});
|
|
235
|
-
} else {
|
|
236
|
-
rLevels.fill(0);
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
let dLevels = new Array(valueCount);
|
|
240
|
-
|
|
241
|
-
if (opts.column.dLevelMax > 0) {
|
|
242
|
-
dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
243
|
-
bitWidth: getBitWidth(opts.column.dLevelMax),
|
|
244
|
-
disableEnvelope: true
|
|
245
|
-
});
|
|
246
|
-
} else {
|
|
247
|
-
dLevels.fill(0);
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
let valuesBufCursor = cursor;
|
|
251
|
-
|
|
252
|
-
if ((_header$data_page_hea8 = header.data_page_header_v2) !== null && _header$data_page_hea8 !== void 0 && _header$data_page_hea8.is_compressed) {
|
|
253
|
-
const valuesBuf = await decompress(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
254
|
-
valuesBufCursor = {
|
|
255
|
-
buffer: valuesBuf,
|
|
256
|
-
offset: 0,
|
|
257
|
-
size: valuesBuf.length
|
|
258
|
-
};
|
|
259
|
-
cursor.offset = cursorEnd;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
const decodeOptions = {
|
|
263
|
-
typeLength: opts.column.typeLength,
|
|
264
|
-
bitWidth: opts.column.typeLength
|
|
265
|
-
};
|
|
266
|
-
const values = decodeValues(opts.column.primitiveType, valueEncoding, valuesBufCursor, valueCountNonNull, decodeOptions);
|
|
267
|
-
return {
|
|
268
|
-
dlevels: dLevels,
|
|
269
|
-
rlevels: rLevels,
|
|
270
|
-
values,
|
|
271
|
-
count: valueCount,
|
|
272
|
-
pageHeader: header
|
|
273
|
-
};
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
async function decodeDictionaryPage(cursor, pageHeader, options) {
|
|
277
|
-
var _pageHeader$dictionar;
|
|
278
|
-
|
|
279
|
-
const cursorEnd = cursor.offset + pageHeader.compressed_page_size;
|
|
280
|
-
let dictCursor = {
|
|
281
|
-
offset: 0,
|
|
282
|
-
buffer: cursor.buffer.slice(cursor.offset, cursorEnd),
|
|
283
|
-
size: cursorEnd - cursor.offset
|
|
284
|
-
};
|
|
285
|
-
cursor.offset = cursorEnd;
|
|
286
|
-
|
|
287
|
-
if (options.compression !== 'UNCOMPRESSED') {
|
|
288
|
-
const valuesBuf = await decompress(options.compression, dictCursor.buffer.slice(dictCursor.offset, cursorEnd), pageHeader.uncompressed_page_size);
|
|
289
|
-
dictCursor = {
|
|
290
|
-
buffer: valuesBuf,
|
|
291
|
-
offset: 0,
|
|
292
|
-
size: valuesBuf.length
|
|
293
|
-
};
|
|
294
|
-
cursor.offset = cursorEnd;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
const numValues = (pageHeader === null || pageHeader === void 0 ? void 0 : (_pageHeader$dictionar = pageHeader.dictionary_page_header) === null || _pageHeader$dictionar === void 0 ? void 0 : _pageHeader$dictionar.num_values) || 0;
|
|
298
|
-
return decodeValues(options.column.primitiveType, options.column.encoding, dictCursor, numValues, options).map(d => d.toString());
|
|
299
|
-
}
|
|
300
|
-
//# sourceMappingURL=decoders.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/parser/decoders.ts"],"names":["PARQUET_CODECS","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","decompress","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","decodePageHeader","getThriftEnum","getBitWidth","decodeDataPages","buffer","options","cursor","offset","size","length","data","rlevels","dlevels","values","pageHeaders","count","dictionary","numValues","Number","page","decodePage","map","value","index","push","undefined","pageHeader","pageType","type","decodeDataPage","decodeDataPageV2","decodeDictionaryPage","Error","decodeSchema","schemaElements","len","schema","next","i","schemaElement","repetitionType","repetition_type","optional","repeated","num_children","res","name","fields","logicalType","converted_type","typeLength","type_length","presision","precision","scale","decodeValues","encoding","opts","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","compression","valuesBuf","slice","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","Array","column","rLevelMax","bitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","decodeOptions","primitiveType","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","dictCursor","dictionary_page_header","d","toString"],"mappings":"AAUA,SAA2CA,cAA3C,QAAgE,WAAhE;AACA,SACEC,aADF,EAEEC,QAFF,EAGEC,mBAHF,EAKEC,QALF,EAOEC,IAPF,QAQO,mBARP;AASA,SAAQC,UAAR,QAAyB,gBAAzB;AACA,SAAQC,kBAAR,EAA4BC,sBAA5B,QAAyD,iBAAzD;AACA,SAAQC,gBAAR,EAA0BC,aAA1B,EAAyCC,WAAzC,QAA2D,qBAA3D;AASA,OAAO,eAAeC,eAAf,CACLC,MADK,EAELC,OAFK,EAGiB;AACtB,QAAMC,MAAoB,GAAG;AAC3BF,IAAAA,MAD2B;AAE3BG,IAAAA,MAAM,EAAE,CAFmB;AAG3BC,IAAAA,IAAI,EAAEJ,MAAM,CAACK;AAHc,GAA7B;AAMA,QAAMC,IAAiB,GAAG;AACxBC,IAAAA,OAAO,EAAE,EADe;AAExBC,IAAAA,OAAO,EAAE,EAFe;AAGxBC,IAAAA,MAAM,EAAE,EAHgB;AAIxBC,IAAAA,WAAW,EAAE,EAJW;AAKxBC,IAAAA,KAAK,EAAE;AALiB,GAA1B;AAQA,MAAIC,UAAU,GAAGX,OAAO,CAACW,UAAR,IAAsB,EAAvC;;AAEA,SAEEV,MAAM,CAACC,MAAP,GAAgBD,MAAM,CAACE,IAAvB,KACC,CAACH,OAAO,CAACY,SAAT,IAAsBP,IAAI,CAACE,OAAL,CAAaH,MAAb,GAAsBS,MAAM,CAACb,OAAO,CAACY,SAAT,CADnD,CAFF,EAIE;AAEA,UAAME,IAAI,GAAG,MAAMC,UAAU,CAACd,MAAD,EAASD,OAAT,CAA7B;;AAEA,QAAIc,IAAI,CAACH,UAAT,EAAqB;AACnBA,MAAAA,UAAU,GAAGG,IAAI,CAACH,UAAlB;AAEA;AACD;;AAED,QAAIA,UAAU,CAACP,MAAf,EAAuB;AAErBU,MAAAA,IAAI,CAACN,MAAL,GAAcM,IAAI,CAACN,MAAL,CAAYQ,GAAZ,CAAiBC,KAAD,IAAWN,UAAU,CAACM,KAAD,CAArC,CAAd;AACD;;AAED,SAAK,IAAIC,KAAK,GAAG,CAAjB,EAAoBA,KAAK,GAAGJ,IAAI,CAACR,OAAL,CAAaF,MAAzC,EAAiDc,KAAK,EAAtD,EAA0D;AACxDb,MAAAA,IAAI,CAACC,OAAL,CAAaa,IAAb,CAAkBL,IAAI,CAACR,OAAL,CAAaY,KAAb,CAAlB;AACAb,MAAAA,IAAI,CAACE,OAAL,CAAaY,IAAb,CAAkBL,IAAI,CAACP,OAAL,CAAaW,KAAb,CAAlB;AACA,YAAMD,KAAK,GAAGH,IAAI,CAACN,MAAL,CAAYU,KAAZ,CAAd;;AAEA,UAAID,KAAK,KAAKG,SAAd,EAAyB;AACvBf,QAAAA,IAAI,CAACG,MAAL,CAAYW,IAAZ,CAAiBF,KAAjB;AACD;AACF;;AAEDZ,IAAAA,IAAI,CAACK,KAAL,IAAcI,IAAI,CAACJ,KAAnB;AACAL,IAAAA,IAAI,CAACI,WAAL,CAAiBU,IAAjB,CAAsBL,IAAI,CAACO,UAA3B;AACD;;AAED,SAAOhB,IAAP;AACD;AAOD,OAAO,eAAeU,UAAf,CACLd,MADK,EAELD,OAFK,EAGqB;AAC1B,MAAIc,IAAJ;AACA,QAAM;AAACO,IAAAA,UAAD;AAAajB,IAAAA;AAAb,MAAuBT,gBAAgB,CAACM,MAAM,CAACF,MAAR,EAAgBE,MAAM,CAACC,MAAvB,CAA7C;AACAD,EAAAA,MAAM,CAACC,MAAP,IAAiBE,MAAjB;AAEA,QAAMkB,QAAQ,GAAG1B,aAAa,CAACN,QAAD,EAAW+B,UAAU,CAACE,IAAtB,CAA9B;;AAEA,UAAQD,QAAR;AACE,SAAK,WAAL;AACER,MAAAA,IAAI,GAAG,MAAMU,cAAc,CAACvB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAA3B;AACA;;AACF,SAAK,cAAL;AACEc,MAAAA,IAAI,GAAG,MAAMW,gBAAgB,CAACxB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAA7B;AACA;;AACF,SAAK,iBAAL;AACEc,MAAAA,IAAI,GAAG;AACLH,QAAAA,UAAU,EAAE,MAAMe,oBAAoB,CAACzB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CADjC;AAELqB,QAAAA;AAFK,OAAP;AAIA;;AACF;AACE,YAAM,IAAIM,KAAJ,8BAAgCL,QAAhC,EAAN;AAdJ;;AAiBA,SAAOR,IAAP;AACD;AAYD,OAAO,SAASc,YAAT,CACLC,cADK,EAEL3B,MAFK,EAGL4B,GAHK,EAQL;AACA,QAAMC,MAAwB,GAAG,EAAjC;AACA,MAAIC,IAAI,GAAG9B,MAAX;;AACA,OAAK,IAAI+B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,GAApB,EAAyBG,CAAC,EAA1B,EAA8B;AAC5B,UAAMC,aAAa,GAAGL,cAAc,CAACG,IAAD,CAApC;AAEA,UAAMG,cAAc,GAClBH,IAAI,GAAG,CAAP,GAAWpC,aAAa,CAACP,mBAAD,EAAsB6C,aAAa,CAACE,eAApC,CAAxB,GAAgF,MADlF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQH,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEE,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAIX,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIO,aAAa,CAACK,YAAd,GAA8B,CAAlC,EAAqC;AACnC,YAAMC,GAAG,GAAGZ,YAAY,CAACC,cAAD,EAAiBG,IAAI,GAAG,CAAxB,EAA2BE,aAAa,CAACK,YAAzC,CAAxB;AACAP,MAAAA,IAAI,GAAGQ,GAAG,CAACR,IAAX;AACAD,MAAAA,MAAM,CAACG,aAAa,CAACO,IAAf,CAAN,GAA6B;AAE3BJ,QAAAA,QAF2B;AAG3BC,QAAAA,QAH2B;AAI3BI,QAAAA,MAAM,EAAEF,GAAG,CAACT;AAJe,OAA7B;AAMD,KATD,MASO;AACL,YAAMR,IAAI,GAAG3B,aAAa,CAACL,IAAD,EAAO2C,aAAa,CAACX,IAArB,CAA1B;AACA,UAAIoB,WAAW,GAAGpB,IAAlB;;AAEA,UAAIW,aAAa,CAACU,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG/C,aAAa,CAACT,aAAD,EAAgB+C,aAAa,CAACU,cAA9B,CAA3B;AACD;;AAED,cAAQD,WAAR;AACE,aAAK,SAAL;AACEA,UAAAA,WAAW,aAAMA,WAAN,cAAqBpB,IAArB,CAAX;AACA;;AACF;AAJF;;AAOAQ,MAAAA,MAAM,CAACG,aAAa,CAACO,IAAf,CAAN,GAA6B;AAC3BlB,QAAAA,IAAI,EAAEoB,WADqB;AAE3BE,QAAAA,UAAU,EAAEX,aAAa,CAACY,WAFC;AAG3BC,QAAAA,SAAS,EAAEb,aAAa,CAACc,SAHE;AAI3BC,QAAAA,KAAK,EAAEf,aAAa,CAACe,KAJM;AAK3BZ,QAAAA,QAL2B;AAM3BC,QAAAA;AAN2B,OAA7B;AAQAN,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACD,IAAAA,MAAD;AAAS7B,IAAAA,MAAT;AAAiB8B,IAAAA;AAAjB,GAAP;AACD;;AAKD,SAASkB,YAAT,CACE3B,IADF,EAEE4B,QAFF,EAGElD,MAHF,EAIES,KAJF,EAKE0C,IALF,EAMS;AACP,MAAI,EAAED,QAAQ,IAAIjE,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIyC,KAAJ,6BAA+BwB,QAA/B,EAAN;AACD;;AACD,SAAOjE,cAAc,CAACiE,QAAD,CAAd,CAAyBD,YAAzB,CAAsC3B,IAAtC,EAA4CtB,MAA5C,EAAoDS,KAApD,EAA2D0C,IAA3D,CAAP;AACD;;AAQD,eAAe5B,cAAf,CACEvB,MADF,EAEEoD,MAFF,EAGErD,OAHF,EAI4B;AAAA;;AAC1B,QAAMsD,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmD,MAAM,CAACE,oBAAzC;AACA,QAAMC,UAAU,4BAAGH,MAAM,CAACI,gBAAV,0DAAG,sBAAyBC,UAA5C;AAGA,MAAIC,UAAU,GAAG1D,MAAjB;;AAEA,MAAID,OAAO,CAAC4D,WAAR,KAAwB,cAA5B,EAA4C;AAC1C,UAAMC,SAAS,GAAG,MAAMrE,UAAU,CAChCQ,OAAO,CAAC4D,WADwB,EAEhC3D,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFgC,EAGhCD,MAAM,CAACU,sBAHyB,CAAlC;AAKAJ,IAAAA,UAAU,GAAG;AACX5D,MAAAA,MAAM,EAAE8D,SADG;AAEX3D,MAAAA,MAAM,EAAE,CAFG;AAGXC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHL,KAAb;AAKAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAGD,QAAMU,cAAc,GAAGpE,aAAa,CAClCR,QADkC,4BAElCiE,MAAM,CAACI,gBAF2B,2DAElC,uBAAyBQ,yBAFS,CAApC;AAKA,MAAIC,OAAO,GAAG,IAAIC,KAAJ,CAAUX,UAAV,CAAd;;AAEA,MAAIxD,OAAO,CAACoE,MAAR,CAAeC,SAAf,GAA2B,CAA/B,EAAkC;AAChCH,IAAAA,OAAO,GAAGhB,YAAY,CAACzD,kBAAD,EAAqBuE,cAArB,EAAqCL,UAArC,EAAiDH,UAAjD,EAA8D;AAClFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACG,OAAO,CAACoE,MAAR,CAAeC,SAAhB,CAD6D;AAElFE,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLL,IAAAA,OAAO,CAACM,IAAR,CAAa,CAAb;AACD;;AAGD,QAAMC,cAAc,GAAG7E,aAAa,CAClCR,QADkC,4BAElCiE,MAAM,CAACI,gBAF2B,2DAElC,uBAAyBiB,yBAFS,CAApC;AAKA,MAAIC,OAAO,GAAG,IAAIR,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIxD,OAAO,CAACoE,MAAR,CAAeQ,SAAf,GAA2B,CAA/B,EAAkC;AAChCD,IAAAA,OAAO,GAAGzB,YAAY,CAACzD,kBAAD,EAAqBgF,cAArB,EAAqCd,UAArC,EAAiDH,UAAjD,EAA8D;AAClFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACG,OAAO,CAACoE,MAAR,CAAeQ,SAAhB,CAD6D;AAElFL,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACD,MAAIK,iBAAiB,GAAG,CAAxB;;AACA,OAAK,MAAMC,IAAX,IAAmBH,OAAnB,EAA4B;AAC1B,QAAIG,IAAI,KAAK9E,OAAO,CAACoE,MAAR,CAAeQ,SAA5B,EAAuC;AACrCC,MAAAA,iBAAiB;AAClB;AACF;;AAGD,QAAME,aAAa,GAAGnF,aAAa,CAACR,QAAD,4BAAWiE,MAAM,CAACI,gBAAlB,2DAAW,uBAAyBN,QAApC,CAAnC;AACA,QAAM6B,aAAa,GAAG;AACpBnC,IAAAA,UAAU,EAAE7C,OAAO,CAACoE,MAAR,CAAevB,UADP;AAEpByB,IAAAA,QAAQ,EAAEtE,OAAO,CAACoE,MAAR,CAAevB;AAFL,GAAtB;AAKA,QAAMrC,MAAM,GAAG0C,YAAY,CACzBlD,OAAO,CAACoE,MAAR,CAAea,aADU,EAEzBF,aAFyB,EAGzBpB,UAHyB,EAIzBkB,iBAJyB,EAKzBG,aALyB,CAA3B;AAQA,SAAO;AACLzE,IAAAA,OAAO,EAAEoE,OADJ;AAELrE,IAAAA,OAAO,EAAE4D,OAFJ;AAGL1D,IAAAA,MAHK;AAILE,IAAAA,KAAK,EAAE8C,UAJF;AAKLnC,IAAAA,UAAU,EAAEgC;AALP,GAAP;AAOD;;AASD,eAAe5B,gBAAf,CACExB,MADF,EAEEoD,MAFF,EAGED,IAHF,EAI4B;AAAA;;AAC1B,QAAME,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmD,MAAM,CAACE,oBAAzC;AAEA,QAAMC,UAAU,6BAAGH,MAAM,CAAC6B,mBAAV,2DAAG,uBAA4BxB,UAA/C;AAEA,QAAMmB,iBAAiB,GAAGrB,UAAU,8BAAGH,MAAM,CAAC6B,mBAAV,2DAAG,uBAA4BC,SAA/B,CAApC;AACA,QAAMJ,aAAa,GAAGnF,aAAa,CACjCR,QADiC,4BAEjCiE,MAAM,CAAC6B,mBAF0B,2DAEjC,uBAA4B/B,QAFK,CAAnC;AAOA,MAAIe,OAAO,GAAG,IAAIC,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIJ,IAAI,CAACgB,MAAL,CAAYC,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BH,IAAAA,OAAO,GAAGhB,YAAY,CAACzD,kBAAD,EAAqBC,sBAArB,EAA6CO,MAA7C,EAAqDuD,UAArD,EAAkE;AACtFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACuD,IAAI,CAACgB,MAAL,CAAYC,SAAb,CADiE;AAEtFE,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLL,IAAAA,OAAO,CAACM,IAAR,CAAa,CAAb;AACD;;AAID,MAAIG,OAAO,GAAG,IAAIR,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIJ,IAAI,CAACgB,MAAL,CAAYQ,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BD,IAAAA,OAAO,GAAGzB,YAAY,CAACzD,kBAAD,EAAqBC,sBAArB,EAA6CO,MAA7C,EAAqDuD,UAArD,EAAkE;AACtFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACuD,IAAI,CAACgB,MAAL,CAAYQ,SAAb,CADiE;AAEtFL,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGD,MAAIY,eAAe,GAAGnF,MAAtB;;AAEA,gCAAIoD,MAAM,CAAC6B,mBAAX,mDAAI,uBAA4BG,aAAhC,EAA+C;AAC7C,UAAMxB,SAAS,GAAG,MAAMrE,UAAU,CAChC4D,IAAI,CAACQ,WAD2B,EAEhC3D,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFgC,EAGhCD,MAAM,CAACU,sBAHyB,CAAlC;AAMAqB,IAAAA,eAAe,GAAG;AAChBrF,MAAAA,MAAM,EAAE8D,SADQ;AAEhB3D,MAAAA,MAAM,EAAE,CAFQ;AAGhBC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHA,KAAlB;AAMAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAED,QAAM0B,aAAa,GAAG;AACpBnC,IAAAA,UAAU,EAAEO,IAAI,CAACgB,MAAL,CAAYvB,UADJ;AAEpByB,IAAAA,QAAQ,EAAElB,IAAI,CAACgB,MAAL,CAAYvB;AAFF,GAAtB;AAKA,QAAMrC,MAAM,GAAG0C,YAAY,CACzBE,IAAI,CAACgB,MAAL,CAAYa,aADa,EAEzBF,aAFyB,EAGzBK,eAHyB,EAIzBP,iBAJyB,EAKzBG,aALyB,CAA3B;AAQA,SAAO;AACLzE,IAAAA,OAAO,EAAEoE,OADJ;AAELrE,IAAAA,OAAO,EAAE4D,OAFJ;AAGL1D,IAAAA,MAHK;AAILE,IAAAA,KAAK,EAAE8C,UAJF;AAKLnC,IAAAA,UAAU,EAAEgC;AALP,GAAP;AAOD;;AAQD,eAAe3B,oBAAf,CACEzB,MADF,EAEEoB,UAFF,EAGErB,OAHF,EAIqB;AAAA;;AACnB,QAAMsD,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmB,UAAU,CAACkC,oBAA7C;AAEA,MAAI+B,UAAU,GAAG;AACfpF,IAAAA,MAAM,EAAE,CADO;AAEfH,IAAAA,MAAM,EAAEE,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFO;AAGfnD,IAAAA,IAAI,EAAEmD,SAAS,GAAGrD,MAAM,CAACC;AAHV,GAAjB;AAMAD,EAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;;AAEA,MAAItD,OAAO,CAAC4D,WAAR,KAAwB,cAA5B,EAA4C;AAC1C,UAAMC,SAAS,GAAG,MAAMrE,UAAU,CAChCQ,OAAO,CAAC4D,WADwB,EAEhC0B,UAAU,CAACvF,MAAX,CAAkB+D,KAAlB,CAAwBwB,UAAU,CAACpF,MAAnC,EAA2CoD,SAA3C,CAFgC,EAGhCjC,UAAU,CAAC0C,sBAHqB,CAAlC;AAMAuB,IAAAA,UAAU,GAAG;AACXvF,MAAAA,MAAM,EAAE8D,SADG;AAEX3D,MAAAA,MAAM,EAAE,CAFG;AAGXC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHL,KAAb;AAMAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAED,QAAM1C,SAAS,GAAG,CAAAS,UAAU,SAAV,IAAAA,UAAU,WAAV,qCAAAA,UAAU,CAAEkE,sBAAZ,gFAAoC7B,UAApC,KAAkD,CAApE;AAEA,SAAOR,YAAY,CACjBlD,OAAO,CAACoE,MAAR,CAAea,aADE,EAEjBjF,OAAO,CAACoE,MAAR,CAAejB,QAFE,EAGjBmC,UAHiB,EAIjB1E,SAJiB,EAKjBZ,OALiB,CAAZ,CAMLgB,GANK,CAMAwE,CAAD,IAAOA,CAAC,CAACC,QAAF,EANN,CAAP;AAOD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {\n ParquetCodec,\n ParquetData,\n ParquetOptions,\n ParquetPageData,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from '../schema/declare';\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport {\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n PageHeader,\n PageType,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {decompress} from '../compression';\nimport {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';\nimport {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';\n\n/**\n * Decode data pages\n * @param buffer - input data\n * @param column - parquet column\n * @param compression - compression type\n * @returns parquet data page data\n */\nexport async function decodeDataPages(\n buffer: Buffer,\n options: ParquetOptions\n): Promise<ParquetData> {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n\n let dictionary = options.dictionary || [];\n\n while (\n // @ts-ignore size can be undefined\n cursor.offset < cursor.size &&\n (!options.numValues || data.dlevels.length < Number(options.numValues))\n ) {\n // Looks like we have to decode these in sequence due to cursor updates?\n const page = await decodePage(cursor, options);\n\n if (page.dictionary) {\n dictionary = page.dictionary;\n // eslint-disable-next-line no-continue\n continue;\n }\n\n if (dictionary.length) {\n // eslint-disable-next-line no-loop-func\n page.values = page.values.map((value) => dictionary[value]);\n }\n\n for (let index = 0; index < page.rlevels.length; index++) {\n data.rlevels.push(page.rlevels[index]);\n data.dlevels.push(page.dlevels[index]);\n const value = page.values[index];\n\n if (value !== undefined) {\n data.values.push(value);\n }\n }\n\n data.count += page.count;\n data.pageHeaders.push(page.pageHeader);\n }\n\n return data;\n}\n\n/**\n * Decode parquet page based on page type\n * @param cursor\n * @param options\n */\nexport async function decodePage(\n cursor: CursorBuffer,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n let page;\n const {pageHeader, length} = decodePageHeader(cursor.buffer, cursor.offset);\n cursor.offset += length;\n\n const pageType = getThriftEnum(PageType, pageHeader.type);\n\n switch (pageType) {\n case 'DATA_PAGE':\n page = await decodeDataPage(cursor, pageHeader, options);\n break;\n case 'DATA_PAGE_V2':\n page = await decodeDataPageV2(cursor, pageHeader, options);\n break;\n case 'DICTIONARY_PAGE':\n page = {\n dictionary: await decodeDictionaryPage(cursor, pageHeader, options),\n pageHeader\n };\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n return page;\n}\n\n/**\n * Decode parquet schema\n * @param schemaElements input schema elements data\n * @param offset offset to read from\n * @param len length of data\n * @returns result.offset\n * result.next - offset at the end of function\n * result.schema - schema read from the input data\n * @todo output offset is the same as input - possibly excess output field\n */\nexport function decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n const type = getThriftEnum(Type, schemaElement.type!);\n let logicalType = type;\n\n if (schemaElement.converted_type) {\n logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n switch (logicalType) {\n case 'DECIMAL':\n logicalType = `${logicalType}_${type}` as ParquetType;\n break;\n default:\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n presision: schemaElement.precision,\n scale: schemaElement.scale,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\n/**\n * Do decoding of parquet dataPage from column chunk\n * @param cursor\n * @param header\n * @param options\n */\nasync function decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n /* uncompress page */\n let dataCursor = cursor;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n\n if (options.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (options.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === options.column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;\n const decodeOptions = {\n typeLength: options.column.typeLength,\n bitWidth: options.column.typeLength\n };\n\n const values = decodeValues(\n options.column.primitiveType!,\n valueEncoding,\n dataCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of parquet dataPage in version 2 from column chunk\n * @param cursor\n * @param header\n * @param opts\n * @returns\n */\nasync function decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n opts: any\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (opts.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (opts.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = await decompress(\n opts.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const decodeOptions = {\n typeLength: opts.column.typeLength,\n bitWidth: opts.column.typeLength\n };\n\n const values = decodeValues(\n opts.column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.\n * @param cursor\n * @param pageHeader\n * @param options\n */\nasync function decodeDictionaryPage(\n cursor: CursorBuffer,\n pageHeader: PageHeader,\n options: ParquetOptions\n): Promise<string[]> {\n const cursorEnd = cursor.offset + pageHeader.compressed_page_size;\n\n let dictCursor = {\n offset: 0,\n buffer: cursor.buffer.slice(cursor.offset, cursorEnd),\n size: cursorEnd - cursor.offset\n };\n\n cursor.offset = cursorEnd;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n dictCursor.buffer.slice(dictCursor.offset, cursorEnd),\n pageHeader.uncompressed_page_size\n );\n\n dictCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const numValues = pageHeader?.dictionary_page_header?.num_values || 0;\n\n return decodeValues(\n options.column.primitiveType!,\n options.column.encoding!,\n dictCursor,\n numValues,\n options as ParquetCodecOptions\n ).map((d) => d.toString());\n}\n"],"file":"decoders.js"}
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
import _defineProperty from "@babel/runtime/helpers/esm/defineProperty";
|
|
2
|
-
|
|
3
|
-
let _Symbol$asyncIterator;
|
|
4
|
-
|
|
5
|
-
import { materializeRecords } from '../schema/shred';
|
|
6
|
-
_Symbol$asyncIterator = Symbol.asyncIterator;
|
|
7
|
-
export class ParquetCursor {
|
|
8
|
-
constructor(metadata, envelopeReader, schema, columnList) {
|
|
9
|
-
_defineProperty(this, "metadata", void 0);
|
|
10
|
-
|
|
11
|
-
_defineProperty(this, "envelopeReader", void 0);
|
|
12
|
-
|
|
13
|
-
_defineProperty(this, "schema", void 0);
|
|
14
|
-
|
|
15
|
-
_defineProperty(this, "columnList", void 0);
|
|
16
|
-
|
|
17
|
-
_defineProperty(this, "rowGroup", []);
|
|
18
|
-
|
|
19
|
-
_defineProperty(this, "rowGroupIndex", void 0);
|
|
20
|
-
|
|
21
|
-
this.metadata = metadata;
|
|
22
|
-
this.envelopeReader = envelopeReader;
|
|
23
|
-
this.schema = schema;
|
|
24
|
-
this.columnList = columnList;
|
|
25
|
-
this.rowGroupIndex = 0;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
async next() {
|
|
29
|
-
if (this.rowGroup.length === 0) {
|
|
30
|
-
if (this.rowGroupIndex >= this.metadata.row_groups.length) {
|
|
31
|
-
return null;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const rowBuffer = await this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
|
|
35
|
-
this.rowGroup = materializeRecords(this.schema, rowBuffer);
|
|
36
|
-
this.rowGroupIndex++;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
return this.rowGroup.shift();
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
rewind() {
|
|
43
|
-
this.rowGroup = [];
|
|
44
|
-
this.rowGroupIndex = 0;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
[_Symbol$asyncIterator]() {
|
|
48
|
-
let done = false;
|
|
49
|
-
return {
|
|
50
|
-
next: async () => {
|
|
51
|
-
if (done) {
|
|
52
|
-
return {
|
|
53
|
-
done,
|
|
54
|
-
value: null
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
const value = await this.next();
|
|
59
|
-
|
|
60
|
-
if (value === null) {
|
|
61
|
-
return {
|
|
62
|
-
done: true,
|
|
63
|
-
value
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
return {
|
|
68
|
-
done: false,
|
|
69
|
-
value
|
|
70
|
-
};
|
|
71
|
-
},
|
|
72
|
-
return: async () => {
|
|
73
|
-
done = true;
|
|
74
|
-
return {
|
|
75
|
-
done,
|
|
76
|
-
value: null
|
|
77
|
-
};
|
|
78
|
-
},
|
|
79
|
-
throw: async () => {
|
|
80
|
-
done = true;
|
|
81
|
-
return {
|
|
82
|
-
done: true,
|
|
83
|
-
value: null
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
}
|
|
90
|
-
//# sourceMappingURL=parquet-cursor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/parser/parquet-cursor.ts"],"names":["materializeRecords","Symbol","asyncIterator","ParquetCursor","constructor","metadata","envelopeReader","schema","columnList","rowGroupIndex","next","rowGroup","length","row_groups","rowBuffer","readRowGroup","shift","rewind","done","value","return","throw"],"mappings":";;;;AAKA,SAAQA,kBAAR,QAAiC,iBAAjC;wBAiEGC,MAAM,CAACC,a;AA5DV,OAAO,MAAMC,aAAN,CAAmD;AAcxDC,EAAAA,WAAW,CACTC,QADS,EAETC,cAFS,EAGTC,MAHS,EAITC,UAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA,sCAdiC,EAcjC;;AAAA;;AACA,SAAKH,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,SAAKC,MAAL,GAAcA,MAAd;AACA,SAAKC,UAAL,GAAkBA,UAAlB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMS,QAAJC,IAAI,GAAwB;AAChC,QAAI,KAAKC,QAAL,CAAcC,MAAd,KAAyB,CAA7B,EAAgC;AAC9B,UAAI,KAAKH,aAAL,IAAsB,KAAKJ,QAAL,CAAcQ,UAAd,CAAyBD,MAAnD,EAA2D;AAEzD,eAAO,IAAP;AACD;;AACD,YAAME,SAAS,GAAG,MAAM,KAAKR,cAAL,CAAoBS,YAApB,CACtB,KAAKR,MADiB,EAEtB,KAAKF,QAAL,CAAcQ,UAAd,CAAyB,KAAKJ,aAA9B,CAFsB,EAGtB,KAAKD,UAHiB,CAAxB;AAKA,WAAKG,QAAL,GAAgBX,kBAAkB,CAAC,KAAKO,MAAN,EAAcO,SAAd,CAAlC;AACA,WAAKL,aAAL;AACD;;AACD,WAAO,KAAKE,QAAL,CAAcK,KAAd,EAAP;AACD;;AAKDC,EAAAA,MAAM,GAAS;AACb,SAAKN,QAAL,GAAgB,EAAhB;AACA,SAAKF,aAAL,GAAqB,CAArB;AACD;;AAMD,4BAA2C;AACzC,QAAIS,IAAI,GAAG,KAAX;AACA,WAAO;AACLR,MAAAA,IAAI,EAAE,YAAY;AAChB,YAAIQ,IAAJ,EAAU;AACR,iBAAO;AAACA,YAAAA,IAAD;AAAOC,YAAAA,KAAK,EAAE;AAAd,WAAP;AACD;;AACD,cAAMA,KAAK,GAAG,MAAM,KAAKT,IAAL,EAApB;;AACA,YAAIS,KAAK,KAAK,IAAd,EAAoB;AAClB,iBAAO;AAACD,YAAAA,IAAI,EAAE,IAAP;AAAaC,YAAAA;AAAb,WAAP;AACD;;AACD,eAAO;AAACD,UAAAA,IAAI,EAAE,KAAP;AAAcC,UAAAA;AAAd,SAAP;AACD,OAVI;AAWLC,MAAAA,MAAM,EAAE,YAAY;AAClBF,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAD;AAAOC,UAAAA,KAAK,EAAE;AAAd,SAAP;AACD,OAdI;AAeLE,MAAAA,KAAK,EAAE,YAAY;AACjBH,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAI,EAAE,IAAP;AAAaC,UAAAA,KAAK,EAAE;AAApB,SAAP;AACD;AAlBI,KAAP;AAoBD;;AAlFuD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {FileMetaData} from '../parquet-thrift';\nimport {ParquetEnvelopeReader} from './parquet-envelope-reader';\nimport {ParquetSchema} from '../schema/schema';\nimport {ParquetRecord} from '../schema/declare';\nimport {materializeRecords} from '../schema/shred';\n\n/**\n * A parquet cursor is used to retrieve rows from a parquet file in order\n */\nexport class ParquetCursor<T> implements AsyncIterable<T> {\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n public columnList: string[][];\n public rowGroup: ParquetRecord[] = [];\n public rowGroupIndex: number;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is usually not recommended to call this constructor directly except for\n * advanced and internal use cases. Consider using getCursor() on the\n * ParquetReader instead\n */\n constructor(\n metadata: FileMetaData,\n envelopeReader: ParquetEnvelopeReader,\n schema: ParquetSchema,\n columnList: string[][]\n ) {\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n this.schema = schema;\n this.columnList = columnList;\n this.rowGroupIndex = 0;\n }\n\n /**\n * Retrieve the next row from the cursor. Returns a row or NULL if the end\n * of the file was reached\n */\n async next<T = any>(): Promise<T> {\n if (this.rowGroup.length === 0) {\n if (this.rowGroupIndex >= this.metadata.row_groups.length) {\n // @ts-ignore\n return null;\n }\n const rowBuffer = await this.envelopeReader.readRowGroup(\n this.schema,\n this.metadata.row_groups[this.rowGroupIndex],\n this.columnList\n );\n this.rowGroup = materializeRecords(this.schema, rowBuffer);\n this.rowGroupIndex++;\n }\n return this.rowGroup.shift() as any;\n }\n\n /**\n * Rewind the cursor the the beginning of the file\n */\n rewind(): void {\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n let done = false;\n return {\n next: async () => {\n if (done) {\n return {done, value: null};\n }\n const value = await this.next();\n if (value === null) {\n return {done: true, value};\n }\n return {done: false, value};\n },\n return: async () => {\n done = true;\n return {done, value: null};\n },\n throw: async () => {\n done = true;\n return {done: true, value: null};\n }\n };\n }\n}\n"],"file":"parquet-cursor.js"}
|
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
import _defineProperty from "@babel/runtime/helpers/esm/defineProperty";
|
|
2
|
-
import { PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED } from '../../constants';
|
|
3
|
-
import { CompressionCodec, Type } from '../parquet-thrift';
|
|
4
|
-
import { decodeFileMetadata, getThriftEnum, fieldIndexOf } from '../utils/read-utils';
|
|
5
|
-
import { decodeDataPages, decodePage } from './decoders';
|
|
6
|
-
const DEFAULT_DICTIONARY_SIZE = 1e6;
|
|
7
|
-
export class ParquetEnvelopeReader {
|
|
8
|
-
static async openBuffer(buffer) {
|
|
9
|
-
const readFn = (position, length) => Promise.resolve(buffer.slice(position, position + length));
|
|
10
|
-
|
|
11
|
-
const closeFn = () => Promise.resolve();
|
|
12
|
-
|
|
13
|
-
return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
constructor(read, close, fileSize, options) {
|
|
17
|
-
_defineProperty(this, "read", void 0);
|
|
18
|
-
|
|
19
|
-
_defineProperty(this, "close", void 0);
|
|
20
|
-
|
|
21
|
-
_defineProperty(this, "fileSize", void 0);
|
|
22
|
-
|
|
23
|
-
_defineProperty(this, "defaultDictionarySize", void 0);
|
|
24
|
-
|
|
25
|
-
this.read = read;
|
|
26
|
-
this.close = close;
|
|
27
|
-
this.fileSize = fileSize;
|
|
28
|
-
this.defaultDictionarySize = (options === null || options === void 0 ? void 0 : options.defaultDictionarySize) || DEFAULT_DICTIONARY_SIZE;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
async readHeader() {
|
|
32
|
-
const buffer = await this.read(0, PARQUET_MAGIC.length);
|
|
33
|
-
const magic = buffer.toString();
|
|
34
|
-
|
|
35
|
-
switch (magic) {
|
|
36
|
-
case PARQUET_MAGIC:
|
|
37
|
-
break;
|
|
38
|
-
|
|
39
|
-
case PARQUET_MAGIC_ENCRYPTED:
|
|
40
|
-
throw new Error('Encrypted parquet file not supported');
|
|
41
|
-
|
|
42
|
-
default:
|
|
43
|
-
throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
async readRowGroup(schema, rowGroup, columnList) {
|
|
48
|
-
const buffer = {
|
|
49
|
-
rowCount: Number(rowGroup.num_rows),
|
|
50
|
-
columnData: {}
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
for (const colChunk of rowGroup.columns) {
|
|
54
|
-
const colMetadata = colChunk.meta_data;
|
|
55
|
-
const colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
|
|
56
|
-
|
|
57
|
-
if (columnList.length > 0 && fieldIndexOf(columnList, colKey) < 0) {
|
|
58
|
-
continue;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
buffer.columnData[colKey.join()] = await this.readColumnChunk(schema, colChunk);
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
return buffer;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
async readColumnChunk(schema, colChunk) {
|
|
68
|
-
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _options$dictionary;
|
|
69
|
-
|
|
70
|
-
if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
|
|
71
|
-
throw new Error('external references are not supported');
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
const field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
|
|
75
|
-
const type = getThriftEnum(Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
|
|
76
|
-
|
|
77
|
-
if (type !== field.primitiveType) {
|
|
78
|
-
throw new Error("chunk type not matching schema: ".concat(type));
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
const compression = getThriftEnum(CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
|
|
82
|
-
const pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
|
|
83
|
-
let pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
|
|
84
|
-
|
|
85
|
-
if (!colChunk.file_path) {
|
|
86
|
-
var _colChunk$meta_data6;
|
|
87
|
-
|
|
88
|
-
pagesSize = Math.min(this.fileSize - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
const options = {
|
|
92
|
-
type,
|
|
93
|
-
rLevelMax: field.rLevelMax,
|
|
94
|
-
dLevelMax: field.dLevelMax,
|
|
95
|
-
compression,
|
|
96
|
-
column: field,
|
|
97
|
-
numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
|
|
98
|
-
dictionary: []
|
|
99
|
-
};
|
|
100
|
-
let dictionary;
|
|
101
|
-
const dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
|
|
102
|
-
|
|
103
|
-
if (dictionaryPageOffset) {
|
|
104
|
-
const dictionaryOffset = Number(dictionaryPageOffset);
|
|
105
|
-
dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
dictionary = (_options$dictionary = options.dictionary) !== null && _options$dictionary !== void 0 && _options$dictionary.length ? options.dictionary : dictionary;
|
|
109
|
-
const pagesBuf = await this.read(pagesOffset, pagesSize);
|
|
110
|
-
return await decodeDataPages(pagesBuf, { ...options,
|
|
111
|
-
dictionary
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
async getDictionary(dictionaryPageOffset, options, pagesOffset) {
|
|
116
|
-
if (dictionaryPageOffset === 0) {
|
|
117
|
-
return [];
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const dictionarySize = Math.min(this.fileSize - dictionaryPageOffset, this.defaultDictionarySize);
|
|
121
|
-
const pagesBuf = await this.read(dictionaryPageOffset, dictionarySize);
|
|
122
|
-
const cursor = {
|
|
123
|
-
buffer: pagesBuf,
|
|
124
|
-
offset: 0,
|
|
125
|
-
size: pagesBuf.length
|
|
126
|
-
};
|
|
127
|
-
const decodedPage = await decodePage(cursor, options);
|
|
128
|
-
return decodedPage.dictionary;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
async readFooter() {
|
|
132
|
-
const trailerLen = PARQUET_MAGIC.length + 4;
|
|
133
|
-
const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);
|
|
134
|
-
const magic = trailerBuf.slice(4).toString();
|
|
135
|
-
|
|
136
|
-
if (magic !== PARQUET_MAGIC) {
|
|
137
|
-
throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
const metadataSize = trailerBuf.readUInt32LE(0);
|
|
141
|
-
const metadataOffset = this.fileSize - metadataSize - trailerLen;
|
|
142
|
-
|
|
143
|
-
if (metadataOffset < PARQUET_MAGIC.length) {
|
|
144
|
-
throw new Error("Invalid metadata size ".concat(metadataOffset));
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
const metadataBuf = await this.read(metadataOffset, metadataSize);
|
|
148
|
-
const {
|
|
149
|
-
metadata
|
|
150
|
-
} = decodeFileMetadata(metadataBuf);
|
|
151
|
-
return metadata;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
}
|
|
155
|
-
//# sourceMappingURL=parquet-envelope-reader.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/parser/parquet-envelope-reader.ts"],"names":["PARQUET_MAGIC","PARQUET_MAGIC_ENCRYPTED","CompressionCodec","Type","decodeFileMetadata","getThriftEnum","fieldIndexOf","decodeDataPages","decodePage","DEFAULT_DICTIONARY_SIZE","ParquetEnvelopeReader","openBuffer","buffer","readFn","position","length","Promise","resolve","slice","closeFn","constructor","read","close","fileSize","options","defaultDictionarySize","readHeader","magic","toString","Error","readRowGroup","schema","rowGroup","columnList","rowCount","Number","num_rows","columnData","colChunk","columns","colMetadata","meta_data","colKey","path_in_schema","join","readColumnChunk","file_path","undefined","field","findField","type","primitiveType","compression","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","Math","min","rLevelMax","dLevelMax","column","numValues","num_values","dictionary","dictionaryPageOffset","dictionary_page_offset","dictionaryOffset","getDictionary","pagesBuf","dictionarySize","cursor","offset","size","decodedPage","readFooter","trailerLen","trailerBuf","metadataSize","readUInt32LE","metadataOffset","metadataBuf","metadata"],"mappings":";AAEA,SAAQA,aAAR,EAAuBC,uBAAvB,QAAqD,iBAArD;AACA,SAAqBC,gBAArB,EAA+DC,IAA/D,QAA0E,mBAA1E;AAQA,SAAQC,kBAAR,EAA4BC,aAA5B,EAA2CC,YAA3C,QAA8D,qBAA9D;AACA,SAAQC,eAAR,EAAyBC,UAAzB,QAA0C,YAA1C;AAEA,MAAMC,uBAAuB,GAAG,GAAhC;AAQA,OAAO,MAAMC,qBAAN,CAA4B;AAUV,eAAVC,UAAU,CAACC,MAAD,EAAiD;AACtE,UAAMC,MAAM,GAAG,CAACC,QAAD,EAAmBC,MAAnB,KACbC,OAAO,CAACC,OAAR,CAAgBL,MAAM,CAACM,KAAP,CAAaJ,QAAb,EAAuBA,QAAQ,GAAGC,MAAlC,CAAhB,CADF;;AAEA,UAAMI,OAAO,GAAG,MAAMH,OAAO,CAACC,OAAR,EAAtB;;AACA,WAAO,IAAIP,qBAAJ,CAA0BG,MAA1B,EAAkCM,OAAlC,EAA2CP,MAAM,CAACG,MAAlD,CAAP;AACD;;AAEDK,EAAAA,WAAW,CACTC,IADS,EAETC,KAFS,EAGTC,QAHS,EAITC,OAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKH,IAAL,GAAYA,IAAZ;AACA,SAAKC,KAAL,GAAaA,KAAb;AACA,SAAKC,QAAL,GAAgBA,QAAhB;AACA,SAAKE,qBAAL,GAA6B,CAAAD,OAAO,SAAP,IAAAA,OAAO,WAAP,YAAAA,OAAO,CAAEC,qBAAT,KAAkChB,uBAA/D;AACD;;AAEe,QAAViB,UAAU,GAAkB;AAChC,UAAMd,MAAM,GAAG,MAAM,KAAKS,IAAL,CAAU,CAAV,EAAarB,aAAa,CAACe,MAA3B,CAArB;AAEA,UAAMY,KAAK,GAAGf,MAAM,CAACgB,QAAP,EAAd;;AACA,YAAQD,KAAR;AACE,WAAK3B,aAAL;AACE;;AACF,WAAKC,uBAAL;AACE,cAAM,IAAI4B,KAAJ,CAAU,sCAAV,CAAN;;AACF;AACE,cAAM,IAAIA,KAAJ,uCAAyCF,KAAzC,OAAN;AANJ;AAQD;;AAEiB,QAAZG,YAAY,CAChBC,MADgB,EAEhBC,QAFgB,EAGhBC,UAHgB,EAIQ;AACxB,UAAMrB,MAAqB,GAAG;AAC5BsB,MAAAA,QAAQ,EAAEC,MAAM,CAACH,QAAQ,CAACI,QAAV,CADY;AAE5BC,MAAAA,UAAU,EAAE;AAFgB,KAA9B;;AAIA,SAAK,MAAMC,QAAX,IAAuBN,QAAQ,CAACO,OAAhC,EAAyC;AACvC,YAAMC,WAAW,GAAGF,QAAQ,CAACG,SAA7B;AACA,YAAMC,MAAM,GAAGF,WAAH,aAAGA,WAAH,uBAAGA,WAAW,CAAEG,cAA5B;;AACA,UAAIV,UAAU,CAAClB,MAAX,GAAoB,CAApB,IAAyBT,YAAY,CAAC2B,UAAD,EAAaS,MAAb,CAAZ,GAAoC,CAAjE,EAAoE;AAClE;AACD;;AACD9B,MAAAA,MAAM,CAACyB,UAAP,CAAkBK,MAAM,CAAEE,IAAR,EAAlB,IAAoC,MAAM,KAAKC,eAAL,CAAqBd,MAArB,EAA6BO,QAA7B,CAA1C;AACD;;AACD,WAAO1B,MAAP;AACD;;AAOoB,QAAfiC,eAAe,CAACd,MAAD,EAAwBO,QAAxB,EAAqE;AAAA;;AACxF,QAAIA,QAAQ,CAACQ,SAAT,KAAuBC,SAAvB,IAAoCT,QAAQ,CAACQ,SAAT,KAAuB,IAA/D,EAAqE;AACnE,YAAM,IAAIjB,KAAJ,CAAU,uCAAV,CAAN;AACD;;AAED,UAAMmB,KAAK,GAAGjB,MAAM,CAACkB,SAAP,wBAAiBX,QAAQ,CAACG,SAA1B,wDAAiB,oBAAoBE,cAArC,CAAd;AACA,UAAMO,IAAmB,GAAG7C,aAAa,CAACF,IAAD,0BAAOmC,QAAQ,CAACG,SAAhB,yDAAO,qBAAoBS,IAA3B,CAAzC;;AAEA,QAAIA,IAAI,KAAKF,KAAK,CAACG,aAAnB,EAAkC;AAChC,YAAM,IAAItB,KAAJ,2CAA6CqB,IAA7C,EAAN;AACD;;AAED,UAAME,WAA+B,GAAG/C,aAAa,CACnDH,gBADmD,0BAEnDoC,QAAQ,CAACG,SAF0C,yDAEnD,qBAAoBY,KAF+B,CAArD;AAKA,UAAMC,WAAW,GAAGnB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBc,gBAArB,CAA1B;AACA,QAAIC,SAAS,GAAGrB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBgB,qBAArB,CAAtB;;AAEA,QAAI,CAACnB,QAAQ,CAACQ,SAAd,EAAyB;AAAA;;AACvBU,MAAAA,SAAS,GAAGE,IAAI,CAACC,GAAL,CACV,KAAKpC,QAAL,GAAgB+B,WADN,EAEVnB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBgB,qBAArB,CAFI,CAAZ;AAID;;AAED,UAAMjC,OAAuB,GAAG;AAC9B0B,MAAAA,IAD8B;AAE9BU,MAAAA,SAAS,EAAEZ,KAAK,CAACY,SAFa;AAG9BC,MAAAA,SAAS,EAAEb,KAAK,CAACa,SAHa;AAI9BT,MAAAA,WAJ8B;AAK9BU,MAAAA,MAAM,EAAEd,KALsB;AAM9Be,MAAAA,SAAS,0BAAEzB,QAAQ,CAACG,SAAX,yDAAE,qBAAoBuB,UAND;AAO9BC,MAAAA,UAAU,EAAE;AAPkB,KAAhC;AAUA,QAAIA,UAAJ;AAEA,UAAMC,oBAAoB,GAAG5B,QAAH,aAAGA,QAAH,+CAAGA,QAAQ,CAAEG,SAAb,yDAAG,qBAAqB0B,sBAAlD;;AAEA,QAAID,oBAAJ,EAA0B;AACxB,YAAME,gBAAgB,GAAGjC,MAAM,CAAC+B,oBAAD,CAA/B;AAEAD,MAAAA,UAAU,GAAG,MAAM,KAAKI,aAAL,CAAmBD,gBAAnB,EAAqC5C,OAArC,EAA8C8B,WAA9C,CAAnB;AACD;;AAEDW,IAAAA,UAAU,GAAG,uBAAAzC,OAAO,CAACyC,UAAR,oEAAoBlD,MAApB,GAA6BS,OAAO,CAACyC,UAArC,GAAkDA,UAA/D;AACA,UAAMK,QAAQ,GAAG,MAAM,KAAKjD,IAAL,CAAUiC,WAAV,EAAuBE,SAAvB,CAAvB;AACA,WAAO,MAAMjD,eAAe,CAAC+D,QAAD,EAAW,EAAC,GAAG9C,OAAJ;AAAayC,MAAAA;AAAb,KAAX,CAA5B;AACD;;AASkB,QAAbI,aAAa,CACjBH,oBADiB,EAEjB1C,OAFiB,EAGjB8B,WAHiB,EAIE;AACnB,QAAIY,oBAAoB,KAAK,CAA7B,EAAgC;AAQ9B,aAAO,EAAP;AACD;;AAED,UAAMK,cAAc,GAAGb,IAAI,CAACC,GAAL,CACrB,KAAKpC,QAAL,GAAgB2C,oBADK,EAErB,KAAKzC,qBAFgB,CAAvB;AAIA,UAAM6C,QAAQ,GAAG,MAAM,KAAKjD,IAAL,CAAU6C,oBAAV,EAAgCK,cAAhC,CAAvB;AAEA,UAAMC,MAAM,GAAG;AAAC5D,MAAAA,MAAM,EAAE0D,QAAT;AAAmBG,MAAAA,MAAM,EAAE,CAA3B;AAA8BC,MAAAA,IAAI,EAAEJ,QAAQ,CAACvD;AAA7C,KAAf;AACA,UAAM4D,WAAW,GAAG,MAAMnE,UAAU,CAACgE,MAAD,EAAShD,OAAT,CAApC;AAEA,WAAOmD,WAAW,CAACV,UAAnB;AACD;;AAEe,QAAVW,UAAU,GAA0B;AACxC,UAAMC,UAAU,GAAG7E,aAAa,CAACe,MAAd,GAAuB,CAA1C;AACA,UAAM+D,UAAU,GAAG,MAAM,KAAKzD,IAAL,CAAU,KAAKE,QAAL,GAAgBsD,UAA1B,EAAsCA,UAAtC,CAAzB;AAEA,UAAMlD,KAAK,GAAGmD,UAAU,CAAC5D,KAAX,CAAiB,CAAjB,EAAoBU,QAApB,EAAd;;AACA,QAAID,KAAK,KAAK3B,aAAd,EAA6B;AAC3B,YAAM,IAAI6B,KAAJ,6CAA8CF,KAA9C,OAAN;AACD;;AAED,UAAMoD,YAAY,GAAGD,UAAU,CAACE,YAAX,CAAwB,CAAxB,CAArB;AACA,UAAMC,cAAc,GAAG,KAAK1D,QAAL,GAAgBwD,YAAhB,GAA+BF,UAAtD;;AACA,QAAII,cAAc,GAAGjF,aAAa,CAACe,MAAnC,EAA2C;AACzC,YAAM,IAAIc,KAAJ,iCAAmCoD,cAAnC,EAAN;AACD;;AAED,UAAMC,WAAW,GAAG,MAAM,KAAK7D,IAAL,CAAU4D,cAAV,EAA0BF,YAA1B,CAA1B;AAGA,UAAM;AAACI,MAAAA;AAAD,QAAa/E,kBAAkB,CAAC8E,WAAD,CAArC;AACA,WAAOC,QAAP;AACD;;AA/KgC","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {ParquetSchema} from '../schema/schema';\nimport {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';\nimport {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';\nimport {\n ParquetBuffer,\n ParquetCompression,\n ParquetData,\n PrimitiveType,\n ParquetOptions\n} from '../schema/declare';\nimport {decodeFileMetadata, getThriftEnum, fieldIndexOf} from '../utils/read-utils';\nimport {decodeDataPages, decodePage} from './decoders';\n\nconst DEFAULT_DICTIONARY_SIZE = 1e6;\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n public defaultDictionarySize: number;\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number,\n options?: any\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n this.defaultDictionarySize = options?.defaultDictionarySize || DEFAULT_DICTIONARY_SIZE;\n }\n\n async readHeader(): Promise<void> {\n const buffer = await this.read(0, PARQUET_MAGIC.length);\n\n const magic = buffer.toString();\n switch (magic) {\n case PARQUET_MAGIC:\n break;\n case PARQUET_MAGIC_ENCRYPTED:\n throw new Error('Encrypted parquet file not supported');\n default:\n throw new Error(`Invalid parquet file (magic=${magic})`);\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n /**\n * Do reading of parquet file's column chunk\n * @param schema\n * @param colChunk\n */\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n\n if (type !== field.primitiveType) {\n throw new Error(`chunk type not matching schema: ${type}`);\n }\n\n const compression: ParquetCompression = getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n let pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n\n if (!colChunk.file_path) {\n pagesSize = Math.min(\n this.fileSize - pagesOffset,\n Number(colChunk.meta_data?.total_compressed_size)\n );\n }\n\n const options: ParquetOptions = {\n type,\n rLevelMax: field.rLevelMax,\n dLevelMax: field.dLevelMax,\n compression,\n column: field,\n numValues: colChunk.meta_data?.num_values,\n dictionary: []\n };\n\n let dictionary;\n\n const dictionaryPageOffset = colChunk?.meta_data?.dictionary_page_offset;\n\n if (dictionaryPageOffset) {\n const dictionaryOffset = Number(dictionaryPageOffset);\n // Getting dictionary from column chunk to iterate all over indexes to get dataPage values.\n dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);\n }\n\n dictionary = options.dictionary?.length ? options.dictionary : dictionary;\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n return await decodeDataPages(pagesBuf, {...options, dictionary});\n }\n\n /**\n * Getting dictionary for allows to flatten values by indices.\n * @param dictionaryPageOffset\n * @param options\n * @param pagesOffset\n * @returns\n */\n async getDictionary(\n dictionaryPageOffset: number,\n options: ParquetOptions,\n pagesOffset: number\n ): Promise<string[]> {\n if (dictionaryPageOffset === 0) {\n // dictionarySize = Math.min(this.fileSize - pagesOffset, this.defaultDictionarySize);\n // pagesBuf = await this.read(pagesOffset, dictionarySize);\n\n // In this case we are working with parquet-mr files format. Problem is described below:\n // https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding\n // We need to get dictionary page from column chunk if it exists.\n // Now if we use code commented above we don't get DICTIONARY_PAGE we get DATA_PAGE instead.\n return [];\n }\n\n const dictionarySize = Math.min(\n this.fileSize - dictionaryPageOffset,\n this.defaultDictionarySize\n );\n const pagesBuf = await this.read(dictionaryPageOffset, dictionarySize);\n\n const cursor = {buffer: pagesBuf, offset: 0, size: pagesBuf.length};\n const decodedPage = await decodePage(cursor, options);\n\n return decodedPage.dictionary!;\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n const magic = trailerBuf.slice(4).toString();\n if (magic !== PARQUET_MAGIC) {\n throw new Error(`Not a valid parquet file (magic=\"${magic})`);\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error(`Invalid metadata size ${metadataOffset}`);\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n"],"file":"parquet-envelope-reader.js"}
|