@loaders.gl/parquet 3.1.3 → 4.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +2 -2
- package/dist/bundle.js.map +1 -0
- package/dist/constants.js +6 -18
- package/dist/constants.js.map +1 -0
- package/dist/dist.min.js +17 -8
- package/dist/dist.min.js.map +3 -3
- package/dist/index.js +14 -29
- package/dist/index.js.map +1 -0
- package/dist/lib/convert-schema.js +63 -62
- package/dist/lib/convert-schema.js.map +1 -0
- package/dist/lib/parse-parquet.js +25 -25
- package/dist/lib/parse-parquet.js.map +1 -0
- package/dist/lib/read-array-buffer.js +8 -28
- package/dist/lib/read-array-buffer.js.map +1 -0
- package/dist/parquet-loader.js +19 -24
- package/dist/parquet-loader.js.map +1 -0
- package/dist/parquet-worker.js +18 -9
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.js +14 -17
- package/dist/parquet-writer.js.map +1 -0
- package/dist/{es5/parquetjs → parquetjs}/LICENSE +0 -0
- package/dist/parquetjs/codecs/declare.js +2 -2
- package/dist/{es5/parquetjs → parquetjs}/codecs/declare.js.map +0 -0
- package/dist/parquetjs/codecs/dictionary.js +10 -12
- package/dist/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/parquetjs/codecs/index.js +22 -50
- package/dist/parquetjs/codecs/index.js.map +1 -0
- package/dist/parquetjs/codecs/plain.js +232 -173
- package/dist/parquetjs/codecs/plain.js.map +1 -0
- package/dist/parquetjs/codecs/rle.js +140 -134
- package/dist/parquetjs/codecs/rle.js.map +1 -0
- package/dist/parquetjs/compression.js +48 -154
- package/dist/parquetjs/compression.js.map +1 -0
- package/dist/parquetjs/encoder/writer.js +383 -440
- package/dist/parquetjs/encoder/writer.js.map +1 -0
- package/dist/parquetjs/file.js +66 -85
- package/dist/parquetjs/file.js.map +1 -0
- package/dist/{es5/parquetjs → parquetjs}/modules.d.ts +0 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +7 -14
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/BsonType.js +37 -56
- package/dist/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +215 -205
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +212 -207
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +422 -391
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +90 -99
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +12 -19
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +26 -33
- package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +162 -162
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +234 -224
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DateType.js +37 -56
- package/dist/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DecimalType.js +91 -101
- package/dist/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -118
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Encoding.js +12 -19
- package/dist/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/EnumType.js +37 -56
- package/dist/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -14
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +264 -250
- package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +37 -56
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/IntType.js +91 -101
- package/dist/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/JsonType.js +37 -56
- package/dist/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.js +89 -98
- package/dist/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ListType.js +37 -56
- package/dist/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.js +450 -363
- package/dist/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MapType.js +37 -56
- package/dist/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +37 -56
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +37 -56
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/NullType.js +37 -56
- package/dist/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +80 -92
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +115 -123
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.js +231 -214
- package/dist/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.js +124 -137
- package/dist/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageType.js +8 -15
- package/dist/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.js +172 -176
- package/dist/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +268 -237
- package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +115 -123
- package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Statistics.js +179 -172
- package/dist/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/StringType.js +37 -56
- package/dist/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimeType.js +92 -102
- package/dist/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +120 -121
- package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.js +92 -102
- package/dist/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Type.js +12 -19
- package/dist/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +37 -56
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/UUIDType.js +37 -56
- package/dist/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/index.js +44 -61
- package/dist/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/parquetjs/parser/decoders.js +283 -301
- package/dist/{es5/parquetjs → parquetjs}/parser/decoders.js.map +1 -1
- package/dist/parquetjs/parser/parquet-cursor.js +85 -69
- package/dist/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/parquetjs/parser/parquet-envelope-reader.js +146 -127
- package/dist/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/parquetjs/parser/parquet-reader.js +113 -127
- package/dist/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/parquetjs/schema/declare.js +12 -9
- package/dist/parquetjs/schema/declare.js.map +1 -0
- package/dist/parquetjs/schema/schema.js +162 -148
- package/dist/{es5/parquetjs → parquetjs}/schema/schema.js.map +1 -1
- package/dist/parquetjs/schema/shred.js +151 -214
- package/dist/parquetjs/schema/shred.js.map +1 -0
- package/dist/parquetjs/schema/types.js +415 -357
- package/dist/parquetjs/schema/types.js.map +1 -0
- package/dist/parquetjs/utils/buffer-utils.js +10 -20
- package/dist/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/parquetjs/utils/file-utils.js +28 -40
- package/dist/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/parquetjs/utils/read-utils.js +95 -99
- package/dist/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/workers/parquet-worker.js +4 -5
- package/dist/workers/parquet-worker.js.map +1 -0
- package/package.json +8 -8
- package/dist/es5/bundle.js +0 -7
- package/dist/es5/bundle.js.map +0 -1
- package/dist/es5/constants.js +0 -17
- package/dist/es5/constants.js.map +0 -1
- package/dist/es5/index.js +0 -82
- package/dist/es5/index.js.map +0 -1
- package/dist/es5/lib/convert-schema.js +0 -82
- package/dist/es5/lib/convert-schema.js.map +0 -1
- package/dist/es5/lib/parse-parquet.js +0 -173
- package/dist/es5/lib/parse-parquet.js.map +0 -1
- package/dist/es5/lib/read-array-buffer.js +0 -53
- package/dist/es5/lib/read-array-buffer.js.map +0 -1
- package/dist/es5/parquet-loader.js +0 -30
- package/dist/es5/parquet-loader.js.map +0 -1
- package/dist/es5/parquet-writer.js +0 -25
- package/dist/es5/parquet-writer.js.map +0 -1
- package/dist/es5/parquetjs/codecs/declare.js +0 -2
- package/dist/es5/parquetjs/codecs/dictionary.js +0 -30
- package/dist/es5/parquetjs/codecs/dictionary.js.map +0 -1
- package/dist/es5/parquetjs/codecs/index.js +0 -56
- package/dist/es5/parquetjs/codecs/index.js.map +0 -1
- package/dist/es5/parquetjs/codecs/plain.js +0 -287
- package/dist/es5/parquetjs/codecs/plain.js.map +0 -1
- package/dist/es5/parquetjs/codecs/rle.js +0 -174
- package/dist/es5/parquetjs/codecs/rle.js.map +0 -1
- package/dist/es5/parquetjs/compression.js +0 -167
- package/dist/es5/parquetjs/compression.js.map +0 -1
- package/dist/es5/parquetjs/encoder/writer.js +0 -875
- package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
- package/dist/es5/parquetjs/file.js +0 -103
- package/dist/es5/parquetjs/file.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +0 -241
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +0 -245
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +0 -449
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +0 -124
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +0 -34
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +0 -191
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -258
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +0 -122
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -143
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +0 -20
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +0 -298
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +0 -122
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +0 -120
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +0 -508
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +0 -114
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +0 -145
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +0 -258
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +0 -155
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +0 -16
- package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +0 -206
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +0 -290
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +0 -145
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +0 -207
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +0 -124
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +0 -156
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +0 -124
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +0 -20
- package/dist/es5/parquetjs/parquet-thrift/Type.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +0 -67
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +0 -565
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +0 -1
- package/dist/es5/parquetjs/parser/decoders.js +0 -489
- package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -215
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -413
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +0 -364
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +0 -1
- package/dist/es5/parquetjs/schema/declare.js +0 -25
- package/dist/es5/parquetjs/schema/declare.js.map +0 -1
- package/dist/es5/parquetjs/schema/schema.js +0 -203
- package/dist/es5/parquetjs/schema/shred.js +0 -223
- package/dist/es5/parquetjs/schema/shred.js.map +0 -1
- package/dist/es5/parquetjs/schema/types.js +0 -492
- package/dist/es5/parquetjs/schema/types.js.map +0 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +0 -21
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/es5/parquetjs/utils/file-utils.js +0 -55
- package/dist/es5/parquetjs/utils/file-utils.js.map +0 -1
- package/dist/es5/parquetjs/utils/read-utils.js +0 -159
- package/dist/es5/parquetjs/utils/read-utils.js.map +0 -1
- package/dist/es5/workers/parquet-worker.js +0 -8
- package/dist/es5/workers/parquet-worker.js.map +0 -1
- package/dist/esm/bundle.js +0 -5
- package/dist/esm/bundle.js.map +0 -1
- package/dist/esm/constants.js +0 -6
- package/dist/esm/constants.js.map +0 -1
- package/dist/esm/index.js +0 -15
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/lib/convert-schema.js +0 -71
- package/dist/esm/lib/convert-schema.js.map +0 -1
- package/dist/esm/lib/parse-parquet.js +0 -28
- package/dist/esm/lib/parse-parquet.js.map +0 -1
- package/dist/esm/lib/read-array-buffer.js +0 -9
- package/dist/esm/lib/read-array-buffer.js.map +0 -1
- package/dist/esm/parquet-loader.js +0 -22
- package/dist/esm/parquet-loader.js.map +0 -1
- package/dist/esm/parquet-writer.js +0 -18
- package/dist/esm/parquet-writer.js.map +0 -1
- package/dist/esm/parquetjs/LICENSE +0 -20
- package/dist/esm/parquetjs/codecs/declare.js +0 -2
- package/dist/esm/parquetjs/codecs/declare.js.map +0 -1
- package/dist/esm/parquetjs/codecs/dictionary.js +0 -12
- package/dist/esm/parquetjs/codecs/dictionary.js.map +0 -1
- package/dist/esm/parquetjs/codecs/index.js +0 -23
- package/dist/esm/parquetjs/codecs/index.js.map +0 -1
- package/dist/esm/parquetjs/codecs/plain.js +0 -270
- package/dist/esm/parquetjs/codecs/plain.js.map +0 -1
- package/dist/esm/parquetjs/codecs/rle.js +0 -151
- package/dist/esm/parquetjs/codecs/rle.js.map +0 -1
- package/dist/esm/parquetjs/compression.js +0 -62
- package/dist/esm/parquetjs/compression.js.map +0 -1
- package/dist/esm/parquetjs/encoder/writer.js +0 -421
- package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
- package/dist/esm/parquetjs/file.js +0 -80
- package/dist/esm/parquetjs/file.js.map +0 -1
- package/dist/esm/parquetjs/modules.d.ts +0 -21
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +0 -8
- package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +0 -217
- package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +0 -218
- package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +0 -429
- package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +0 -95
- package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +0 -13
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +0 -27
- package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +0 -166
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -236
- package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DateType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +0 -95
- package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -117
- package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js +0 -13
- package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -8
- package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +0 -270
- package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/IntType.js +0 -95
- package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +0 -93
- package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/ListType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +0 -467
- package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/MapType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/NullType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +0 -85
- package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +0 -119
- package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +0 -233
- package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +0 -128
- package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/PageType.js +0 -9
- package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +0 -178
- package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +0 -270
- package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +0 -119
- package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js +0 -183
- package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/StringType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js +0 -96
- package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +0 -126
- package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +0 -96
- package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/Type.js +0 -13
- package/dist/esm/parquetjs/parquet-thrift/Type.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +0 -39
- package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
- package/dist/esm/parquetjs/parquet-thrift/index.js +0 -44
- package/dist/esm/parquetjs/parquet-thrift/index.js.map +0 -1
- package/dist/esm/parquetjs/parser/decoders.js +0 -300
- package/dist/esm/parquetjs/parser/decoders.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -90
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -155
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +0 -120
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +0 -1
- package/dist/esm/parquetjs/schema/declare.js +0 -13
- package/dist/esm/parquetjs/schema/declare.js.map +0 -1
- package/dist/esm/parquetjs/schema/schema.js +0 -176
- package/dist/esm/parquetjs/schema/schema.js.map +0 -1
- package/dist/esm/parquetjs/schema/shred.js +0 -162
- package/dist/esm/parquetjs/schema/shred.js.map +0 -1
- package/dist/esm/parquetjs/schema/types.js +0 -476
- package/dist/esm/parquetjs/schema/types.js.map +0 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +0 -12
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/esm/parquetjs/utils/file-utils.js +0 -34
- package/dist/esm/parquetjs/utils/file-utils.js.map +0 -1
- package/dist/esm/parquetjs/utils/read-utils.js +0 -105
- package/dist/esm/parquetjs/utils/read-utils.js.map +0 -1
- package/dist/esm/workers/parquet-worker.js +0 -4
- package/dist/esm/workers/parquet-worker.js.map +0 -1
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/parser/parquet-cursor.ts"],"names":["Symbol","asyncIterator","ParquetCursor","metadata","envelopeReader","schema","columnList","rowGroupIndex","rowGroup","length","row_groups","readRowGroup","rowBuffer","shift","done","next","value","return","throw"],"mappings":";;;;;;;;;;;;;;;;;;;AAKA;;;;wBAiEGA,MAAM,CAACC,a;;IA5DGC,a;AAcX,yBACEC,QADF,EAEEC,cAFF,EAGEC,MAHF,EAIEC,UAJF,EAKE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,oDAdiC,EAcjC;AAAA;AACA,SAAKH,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,SAAKC,MAAL,GAAcA,MAAd;AACA,SAAKC,UAAL,GAAkBA,UAAlB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;;;;4EAMD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sBACM,KAAKC,QAAL,CAAcC,MAAd,KAAyB,CAD/B;AAAA;AAAA;AAAA;;AAAA,sBAEQ,KAAKF,aAAL,IAAsB,KAAKJ,QAAL,CAAcO,UAAd,CAAyBD,MAFvD;AAAA;AAAA;AAAA;;AAAA,iDAIa,IAJb;;AAAA;AAAA;AAAA,uBAM4B,KAAKL,cAAL,CAAoBO,YAApB,CACtB,KAAKN,MADiB,EAEtB,KAAKF,QAAL,CAAcO,UAAd,CAAyB,KAAKH,aAA9B,CAFsB,EAGtB,KAAKD,UAHiB,CAN5B;;AAAA;AAMUM,gBAAAA,SANV;AAWI,qBAAKJ,QAAL,GAAgB,+BAAmB,KAAKH,MAAxB,EAAgCO,SAAhC,CAAhB;AACA,qBAAKL,aAAL;;AAZJ;AAAA,iDAcS,KAAKC,QAAL,CAAcK,KAAd,EAdT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;WAoBA,kBAAe;AACb,WAAKL,QAAL,GAAgB,EAAhB;AACA,WAAKD,aAAL,GAAqB,CAArB;AACD;;;WAMD,iBAA2C;AAAA;;AACzC,UAAIO,IAAI,GAAG,KAAX;AACA,aAAO;AACLC,QAAAA,IAAI;AAAA,iFAAE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,yBACAD,IADA;AAAA;AAAA;AAAA;;AAAA,sDAEK;AAACA,sBAAAA,IAAI,EAAJA,IAAD;AAAOE,sBAAAA,KAAK,EAAE;AAAd,qBAFL;;AAAA;AAAA;AAAA,2BAIgB,KAAI,CAACD,IAAL,EAJhB;;AAAA;AAIEC,oBAAAA,KAJF;;AAAA,0BAKAA,KAAK,KAAK,IALV;AAAA;AAAA;AAAA;;AAAA,sDAMK;AAACF,sBAAAA,IAAI,EAAE,IAAP;AAAaE,sBAAAA,KAAK,EAALA;AAAb,qBANL;;AAAA;AAAA,sDAQG;AAACF,sBAAAA,IAAI,EAAE,KAAP;AAAcE,sBAAAA,KAAK,EAALA;AAAd,qBARH;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,WAAF;;AAAA;AAAA;AAAA;;AAAA;AAAA,WADC;AAWLC,QAAAA,MAAM;AAAA,mFAAE;AAAA;AAAA;AAAA;AAAA;AACNH,oBAAAA,IAAI,GAAG,IAAP;AADM,sDAEC;AAACA,sBAAAA,IAAI,EAAJA,IAAD;AAAOE,sBAAAA,KAAK,EAAE;AAAd,qBAFD;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,WAAF;;AAAA;AAAA;AAAA;;AAAA;AAAA,WAXD;AAeLE,QAAAA,KAAK;AAAA,kFAAE;AAAA;AAAA;AAAA;AAAA;AACLJ,oBAAAA,IAAI,GAAG,IAAP;AADK,sDAEE;AAACA,sBAAAA,IAAI,EAAE,IAAP;AAAaE,sBAAAA,KAAK,EAAE;AAApB,qBAFF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,WAAF;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAfA,OAAP;AAoBD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {FileMetaData} from '../parquet-thrift';\nimport {ParquetEnvelopeReader} from './parquet-envelope-reader';\nimport {ParquetSchema} from '../schema/schema';\nimport {ParquetRecord} from '../schema/declare';\nimport {materializeRecords} from '../schema/shred';\n\n/**\n * A parquet cursor is used to retrieve rows from a parquet file in order\n */\nexport class ParquetCursor<T> implements AsyncIterable<T> {\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n public columnList: string[][];\n public rowGroup: ParquetRecord[] = [];\n public rowGroupIndex: number;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is usually not recommended to call this constructor directly except for\n * advanced and internal use cases. Consider using getCursor() on the\n * ParquetReader instead\n */\n constructor(\n metadata: FileMetaData,\n envelopeReader: ParquetEnvelopeReader,\n schema: ParquetSchema,\n columnList: string[][]\n ) {\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n this.schema = schema;\n this.columnList = columnList;\n this.rowGroupIndex = 0;\n }\n\n /**\n * Retrieve the next row from the cursor. Returns a row or NULL if the end\n * of the file was reached\n */\n async next<T = any>(): Promise<T> {\n if (this.rowGroup.length === 0) {\n if (this.rowGroupIndex >= this.metadata.row_groups.length) {\n // @ts-ignore\n return null;\n }\n const rowBuffer = await this.envelopeReader.readRowGroup(\n this.schema,\n this.metadata.row_groups[this.rowGroupIndex],\n this.columnList\n );\n this.rowGroup = materializeRecords(this.schema, rowBuffer);\n this.rowGroupIndex++;\n }\n return this.rowGroup.shift() as any;\n }\n\n /**\n * Rewind the cursor the the beginning of the file\n */\n rewind(): void {\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n let done = false;\n return {\n next: async () => {\n if (done) {\n return {done, value: null};\n }\n const value = await this.next();\n if (value === null) {\n return {done: true, value};\n }\n return {done: false, value};\n },\n return: async () => {\n done = true;\n return {done, value: null};\n },\n throw: async () => {\n done = true;\n return {done: true, value: null};\n }\n };\n }\n}\n"],"file":"parquet-cursor.js"}
|
|
@@ -1,413 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
|
-
|
|
5
|
-
Object.defineProperty(exports, "__esModule", {
|
|
6
|
-
value: true
|
|
7
|
-
});
|
|
8
|
-
exports.ParquetEnvelopeReader = void 0;
|
|
9
|
-
|
|
10
|
-
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
11
|
-
|
|
12
|
-
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
13
|
-
|
|
14
|
-
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
15
|
-
|
|
16
|
-
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
17
|
-
|
|
18
|
-
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
19
|
-
|
|
20
|
-
var _constants = require("../../constants");
|
|
21
|
-
|
|
22
|
-
var _parquetThrift = require("../parquet-thrift");
|
|
23
|
-
|
|
24
|
-
var _readUtils = require("../utils/read-utils");
|
|
25
|
-
|
|
26
|
-
var _decoders = require("./decoders");
|
|
27
|
-
|
|
28
|
-
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
|
|
29
|
-
|
|
30
|
-
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
|
|
31
|
-
|
|
32
|
-
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
33
|
-
|
|
34
|
-
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
35
|
-
|
|
36
|
-
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
37
|
-
|
|
38
|
-
var DEFAULT_DICTIONARY_SIZE = 1e6;
|
|
39
|
-
|
|
40
|
-
var ParquetEnvelopeReader = function () {
|
|
41
|
-
function ParquetEnvelopeReader(read, close, fileSize, options) {
|
|
42
|
-
(0, _classCallCheck2.default)(this, ParquetEnvelopeReader);
|
|
43
|
-
(0, _defineProperty2.default)(this, "read", void 0);
|
|
44
|
-
(0, _defineProperty2.default)(this, "close", void 0);
|
|
45
|
-
(0, _defineProperty2.default)(this, "fileSize", void 0);
|
|
46
|
-
(0, _defineProperty2.default)(this, "defaultDictionarySize", void 0);
|
|
47
|
-
this.read = read;
|
|
48
|
-
this.close = close;
|
|
49
|
-
this.fileSize = fileSize;
|
|
50
|
-
this.defaultDictionarySize = (options === null || options === void 0 ? void 0 : options.defaultDictionarySize) || DEFAULT_DICTIONARY_SIZE;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
(0, _createClass2.default)(ParquetEnvelopeReader, [{
|
|
54
|
-
key: "readHeader",
|
|
55
|
-
value: function () {
|
|
56
|
-
var _readHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
|
|
57
|
-
var buffer, magic;
|
|
58
|
-
return _regenerator.default.wrap(function _callee$(_context) {
|
|
59
|
-
while (1) {
|
|
60
|
-
switch (_context.prev = _context.next) {
|
|
61
|
-
case 0:
|
|
62
|
-
_context.next = 2;
|
|
63
|
-
return this.read(0, _constants.PARQUET_MAGIC.length);
|
|
64
|
-
|
|
65
|
-
case 2:
|
|
66
|
-
buffer = _context.sent;
|
|
67
|
-
magic = buffer.toString();
|
|
68
|
-
_context.t0 = magic;
|
|
69
|
-
_context.next = _context.t0 === _constants.PARQUET_MAGIC ? 7 : _context.t0 === _constants.PARQUET_MAGIC_ENCRYPTED ? 8 : 9;
|
|
70
|
-
break;
|
|
71
|
-
|
|
72
|
-
case 7:
|
|
73
|
-
return _context.abrupt("break", 10);
|
|
74
|
-
|
|
75
|
-
case 8:
|
|
76
|
-
throw new Error('Encrypted parquet file not supported');
|
|
77
|
-
|
|
78
|
-
case 9:
|
|
79
|
-
throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
|
|
80
|
-
|
|
81
|
-
case 10:
|
|
82
|
-
case "end":
|
|
83
|
-
return _context.stop();
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}, _callee, this);
|
|
87
|
-
}));
|
|
88
|
-
|
|
89
|
-
function readHeader() {
|
|
90
|
-
return _readHeader.apply(this, arguments);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return readHeader;
|
|
94
|
-
}()
|
|
95
|
-
}, {
|
|
96
|
-
key: "readRowGroup",
|
|
97
|
-
value: function () {
|
|
98
|
-
var _readRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(schema, rowGroup, columnList) {
|
|
99
|
-
var buffer, _iterator, _step, colChunk, colMetadata, colKey;
|
|
100
|
-
|
|
101
|
-
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
102
|
-
while (1) {
|
|
103
|
-
switch (_context2.prev = _context2.next) {
|
|
104
|
-
case 0:
|
|
105
|
-
buffer = {
|
|
106
|
-
rowCount: Number(rowGroup.num_rows),
|
|
107
|
-
columnData: {}
|
|
108
|
-
};
|
|
109
|
-
_iterator = _createForOfIteratorHelper(rowGroup.columns);
|
|
110
|
-
_context2.prev = 2;
|
|
111
|
-
|
|
112
|
-
_iterator.s();
|
|
113
|
-
|
|
114
|
-
case 4:
|
|
115
|
-
if ((_step = _iterator.n()).done) {
|
|
116
|
-
_context2.next = 15;
|
|
117
|
-
break;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
colChunk = _step.value;
|
|
121
|
-
colMetadata = colChunk.meta_data;
|
|
122
|
-
colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
|
|
123
|
-
|
|
124
|
-
if (!(columnList.length > 0 && (0, _readUtils.fieldIndexOf)(columnList, colKey) < 0)) {
|
|
125
|
-
_context2.next = 10;
|
|
126
|
-
break;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
return _context2.abrupt("continue", 13);
|
|
130
|
-
|
|
131
|
-
case 10:
|
|
132
|
-
_context2.next = 12;
|
|
133
|
-
return this.readColumnChunk(schema, colChunk);
|
|
134
|
-
|
|
135
|
-
case 12:
|
|
136
|
-
buffer.columnData[colKey.join()] = _context2.sent;
|
|
137
|
-
|
|
138
|
-
case 13:
|
|
139
|
-
_context2.next = 4;
|
|
140
|
-
break;
|
|
141
|
-
|
|
142
|
-
case 15:
|
|
143
|
-
_context2.next = 20;
|
|
144
|
-
break;
|
|
145
|
-
|
|
146
|
-
case 17:
|
|
147
|
-
_context2.prev = 17;
|
|
148
|
-
_context2.t0 = _context2["catch"](2);
|
|
149
|
-
|
|
150
|
-
_iterator.e(_context2.t0);
|
|
151
|
-
|
|
152
|
-
case 20:
|
|
153
|
-
_context2.prev = 20;
|
|
154
|
-
|
|
155
|
-
_iterator.f();
|
|
156
|
-
|
|
157
|
-
return _context2.finish(20);
|
|
158
|
-
|
|
159
|
-
case 23:
|
|
160
|
-
return _context2.abrupt("return", buffer);
|
|
161
|
-
|
|
162
|
-
case 24:
|
|
163
|
-
case "end":
|
|
164
|
-
return _context2.stop();
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}, _callee2, this, [[2, 17, 20, 23]]);
|
|
168
|
-
}));
|
|
169
|
-
|
|
170
|
-
function readRowGroup(_x, _x2, _x3) {
|
|
171
|
-
return _readRowGroup.apply(this, arguments);
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
return readRowGroup;
|
|
175
|
-
}()
|
|
176
|
-
}, {
|
|
177
|
-
key: "readColumnChunk",
|
|
178
|
-
value: function () {
|
|
179
|
-
var _readColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(schema, colChunk) {
|
|
180
|
-
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _options$dictionary;
|
|
181
|
-
|
|
182
|
-
var field, type, compression, pagesOffset, pagesSize, _colChunk$meta_data6, options, dictionary, dictionaryPageOffset, dictionaryOffset, pagesBuf;
|
|
183
|
-
|
|
184
|
-
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
185
|
-
while (1) {
|
|
186
|
-
switch (_context3.prev = _context3.next) {
|
|
187
|
-
case 0:
|
|
188
|
-
if (!(colChunk.file_path !== undefined && colChunk.file_path !== null)) {
|
|
189
|
-
_context3.next = 2;
|
|
190
|
-
break;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
throw new Error('external references are not supported');
|
|
194
|
-
|
|
195
|
-
case 2:
|
|
196
|
-
field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
|
|
197
|
-
type = (0, _readUtils.getThriftEnum)(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
|
|
198
|
-
|
|
199
|
-
if (!(type !== field.primitiveType)) {
|
|
200
|
-
_context3.next = 6;
|
|
201
|
-
break;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
throw new Error("chunk type not matching schema: ".concat(type));
|
|
205
|
-
|
|
206
|
-
case 6:
|
|
207
|
-
compression = (0, _readUtils.getThriftEnum)(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
|
|
208
|
-
pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
|
|
209
|
-
pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
|
|
210
|
-
|
|
211
|
-
if (!colChunk.file_path) {
|
|
212
|
-
pagesSize = Math.min(this.fileSize - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
options = {
|
|
216
|
-
type: type,
|
|
217
|
-
rLevelMax: field.rLevelMax,
|
|
218
|
-
dLevelMax: field.dLevelMax,
|
|
219
|
-
compression: compression,
|
|
220
|
-
column: field,
|
|
221
|
-
numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
|
|
222
|
-
dictionary: []
|
|
223
|
-
};
|
|
224
|
-
dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
|
|
225
|
-
|
|
226
|
-
if (!dictionaryPageOffset) {
|
|
227
|
-
_context3.next = 17;
|
|
228
|
-
break;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
dictionaryOffset = Number(dictionaryPageOffset);
|
|
232
|
-
_context3.next = 16;
|
|
233
|
-
return this.getDictionary(dictionaryOffset, options, pagesOffset);
|
|
234
|
-
|
|
235
|
-
case 16:
|
|
236
|
-
dictionary = _context3.sent;
|
|
237
|
-
|
|
238
|
-
case 17:
|
|
239
|
-
dictionary = (_options$dictionary = options.dictionary) !== null && _options$dictionary !== void 0 && _options$dictionary.length ? options.dictionary : dictionary;
|
|
240
|
-
_context3.next = 20;
|
|
241
|
-
return this.read(pagesOffset, pagesSize);
|
|
242
|
-
|
|
243
|
-
case 20:
|
|
244
|
-
pagesBuf = _context3.sent;
|
|
245
|
-
_context3.next = 23;
|
|
246
|
-
return (0, _decoders.decodeDataPages)(pagesBuf, _objectSpread(_objectSpread({}, options), {}, {
|
|
247
|
-
dictionary: dictionary
|
|
248
|
-
}));
|
|
249
|
-
|
|
250
|
-
case 23:
|
|
251
|
-
return _context3.abrupt("return", _context3.sent);
|
|
252
|
-
|
|
253
|
-
case 24:
|
|
254
|
-
case "end":
|
|
255
|
-
return _context3.stop();
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
}, _callee3, this);
|
|
259
|
-
}));
|
|
260
|
-
|
|
261
|
-
function readColumnChunk(_x4, _x5) {
|
|
262
|
-
return _readColumnChunk.apply(this, arguments);
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
return readColumnChunk;
|
|
266
|
-
}()
|
|
267
|
-
}, {
|
|
268
|
-
key: "getDictionary",
|
|
269
|
-
value: function () {
|
|
270
|
-
var _getDictionary = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(dictionaryPageOffset, options, pagesOffset) {
|
|
271
|
-
var dictionarySize, pagesBuf, cursor, decodedPage;
|
|
272
|
-
return _regenerator.default.wrap(function _callee4$(_context4) {
|
|
273
|
-
while (1) {
|
|
274
|
-
switch (_context4.prev = _context4.next) {
|
|
275
|
-
case 0:
|
|
276
|
-
if (!(dictionaryPageOffset === 0)) {
|
|
277
|
-
_context4.next = 2;
|
|
278
|
-
break;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
return _context4.abrupt("return", []);
|
|
282
|
-
|
|
283
|
-
case 2:
|
|
284
|
-
dictionarySize = Math.min(this.fileSize - dictionaryPageOffset, this.defaultDictionarySize);
|
|
285
|
-
_context4.next = 5;
|
|
286
|
-
return this.read(dictionaryPageOffset, dictionarySize);
|
|
287
|
-
|
|
288
|
-
case 5:
|
|
289
|
-
pagesBuf = _context4.sent;
|
|
290
|
-
cursor = {
|
|
291
|
-
buffer: pagesBuf,
|
|
292
|
-
offset: 0,
|
|
293
|
-
size: pagesBuf.length
|
|
294
|
-
};
|
|
295
|
-
_context4.next = 9;
|
|
296
|
-
return (0, _decoders.decodePage)(cursor, options);
|
|
297
|
-
|
|
298
|
-
case 9:
|
|
299
|
-
decodedPage = _context4.sent;
|
|
300
|
-
return _context4.abrupt("return", decodedPage.dictionary);
|
|
301
|
-
|
|
302
|
-
case 11:
|
|
303
|
-
case "end":
|
|
304
|
-
return _context4.stop();
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
}, _callee4, this);
|
|
308
|
-
}));
|
|
309
|
-
|
|
310
|
-
function getDictionary(_x6, _x7, _x8) {
|
|
311
|
-
return _getDictionary.apply(this, arguments);
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
return getDictionary;
|
|
315
|
-
}()
|
|
316
|
-
}, {
|
|
317
|
-
key: "readFooter",
|
|
318
|
-
value: function () {
|
|
319
|
-
var _readFooter = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5() {
|
|
320
|
-
var trailerLen, trailerBuf, magic, metadataSize, metadataOffset, metadataBuf, _decodeFileMetadata, metadata;
|
|
321
|
-
|
|
322
|
-
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
323
|
-
while (1) {
|
|
324
|
-
switch (_context5.prev = _context5.next) {
|
|
325
|
-
case 0:
|
|
326
|
-
trailerLen = _constants.PARQUET_MAGIC.length + 4;
|
|
327
|
-
_context5.next = 3;
|
|
328
|
-
return this.read(this.fileSize - trailerLen, trailerLen);
|
|
329
|
-
|
|
330
|
-
case 3:
|
|
331
|
-
trailerBuf = _context5.sent;
|
|
332
|
-
magic = trailerBuf.slice(4).toString();
|
|
333
|
-
|
|
334
|
-
if (!(magic !== _constants.PARQUET_MAGIC)) {
|
|
335
|
-
_context5.next = 7;
|
|
336
|
-
break;
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
|
|
340
|
-
|
|
341
|
-
case 7:
|
|
342
|
-
metadataSize = trailerBuf.readUInt32LE(0);
|
|
343
|
-
metadataOffset = this.fileSize - metadataSize - trailerLen;
|
|
344
|
-
|
|
345
|
-
if (!(metadataOffset < _constants.PARQUET_MAGIC.length)) {
|
|
346
|
-
_context5.next = 11;
|
|
347
|
-
break;
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
throw new Error("Invalid metadata size ".concat(metadataOffset));
|
|
351
|
-
|
|
352
|
-
case 11:
|
|
353
|
-
_context5.next = 13;
|
|
354
|
-
return this.read(metadataOffset, metadataSize);
|
|
355
|
-
|
|
356
|
-
case 13:
|
|
357
|
-
metadataBuf = _context5.sent;
|
|
358
|
-
_decodeFileMetadata = (0, _readUtils.decodeFileMetadata)(metadataBuf), metadata = _decodeFileMetadata.metadata;
|
|
359
|
-
return _context5.abrupt("return", metadata);
|
|
360
|
-
|
|
361
|
-
case 16:
|
|
362
|
-
case "end":
|
|
363
|
-
return _context5.stop();
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
}, _callee5, this);
|
|
367
|
-
}));
|
|
368
|
-
|
|
369
|
-
function readFooter() {
|
|
370
|
-
return _readFooter.apply(this, arguments);
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
return readFooter;
|
|
374
|
-
}()
|
|
375
|
-
}], [{
|
|
376
|
-
key: "openBuffer",
|
|
377
|
-
value: function () {
|
|
378
|
-
var _openBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(buffer) {
|
|
379
|
-
var readFn, closeFn;
|
|
380
|
-
return _regenerator.default.wrap(function _callee6$(_context6) {
|
|
381
|
-
while (1) {
|
|
382
|
-
switch (_context6.prev = _context6.next) {
|
|
383
|
-
case 0:
|
|
384
|
-
readFn = function readFn(position, length) {
|
|
385
|
-
return Promise.resolve(buffer.slice(position, position + length));
|
|
386
|
-
};
|
|
387
|
-
|
|
388
|
-
closeFn = function closeFn() {
|
|
389
|
-
return Promise.resolve();
|
|
390
|
-
};
|
|
391
|
-
|
|
392
|
-
return _context6.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, buffer.length));
|
|
393
|
-
|
|
394
|
-
case 3:
|
|
395
|
-
case "end":
|
|
396
|
-
return _context6.stop();
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
}, _callee6);
|
|
400
|
-
}));
|
|
401
|
-
|
|
402
|
-
function openBuffer(_x9) {
|
|
403
|
-
return _openBuffer.apply(this, arguments);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
return openBuffer;
|
|
407
|
-
}()
|
|
408
|
-
}]);
|
|
409
|
-
return ParquetEnvelopeReader;
|
|
410
|
-
}();
|
|
411
|
-
|
|
412
|
-
exports.ParquetEnvelopeReader = ParquetEnvelopeReader;
|
|
413
|
-
//# sourceMappingURL=parquet-envelope-reader.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/parser/parquet-envelope-reader.ts"],"names":["DEFAULT_DICTIONARY_SIZE","ParquetEnvelopeReader","read","close","fileSize","options","defaultDictionarySize","PARQUET_MAGIC","length","buffer","magic","toString","PARQUET_MAGIC_ENCRYPTED","Error","schema","rowGroup","columnList","rowCount","Number","num_rows","columnData","columns","colChunk","colMetadata","meta_data","colKey","path_in_schema","readColumnChunk","join","file_path","undefined","field","findField","type","Type","primitiveType","compression","CompressionCodec","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","Math","min","rLevelMax","dLevelMax","column","numValues","num_values","dictionary","dictionaryPageOffset","dictionary_page_offset","dictionaryOffset","getDictionary","pagesBuf","dictionarySize","cursor","offset","size","decodedPage","trailerLen","trailerBuf","slice","metadataSize","readUInt32LE","metadataOffset","metadataBuf","metadata","readFn","position","Promise","resolve","closeFn"],"mappings":";;;;;;;;;;;;;;;;;;;AAEA;;AACA;;AAQA;;AACA;;;;;;;;;;;;AAEA,IAAMA,uBAAuB,GAAG,GAAhC;;IAQaC,qB;AAiBX,iCACEC,IADF,EAEEC,KAFF,EAGEC,QAHF,EAIEC,OAJF,EAKE;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,SAAKH,IAAL,GAAYA,IAAZ;AACA,SAAKC,KAAL,GAAaA,KAAb;AACA,SAAKC,QAAL,GAAgBA,QAAhB;AACA,SAAKE,qBAAL,GAA6B,CAAAD,OAAO,SAAP,IAAAA,OAAO,WAAP,YAAAA,OAAO,CAAEC,qBAAT,KAAkCN,uBAA/D;AACD;;;;;kFAED;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,uBACuB,KAAKE,IAAL,CAAU,CAAV,EAAaK,yBAAcC,MAA3B,CADvB;;AAAA;AACQC,gBAAAA,MADR;AAGQC,gBAAAA,KAHR,GAGgBD,MAAM,CAACE,QAAP,EAHhB;AAAA,8BAIUD,KAJV;AAAA,gDAKSH,wBALT,uBAOSK,kCAPT;AAAA;;AAAA;AAAA;;AAAA;AAAA,sBAQY,IAAIC,KAAJ,CAAU,sCAAV,CARZ;;AAAA;AAAA,sBAUY,IAAIA,KAAJ,uCAAyCH,KAAzC,OAVZ;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;oFAcA,kBACEI,MADF,EAEEC,QAFF,EAGEC,UAHF;AAAA;;AAAA;AAAA;AAAA;AAAA;AAKQP,gBAAAA,MALR,GAKgC;AAC5BQ,kBAAAA,QAAQ,EAAEC,MAAM,CAACH,QAAQ,CAACI,QAAV,CADY;AAE5BC,kBAAAA,UAAU,EAAE;AAFgB,iBALhC;AAAA,uDASyBL,QAAQ,CAACM,OATlC;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AASaC,gBAAAA,QATb;AAUUC,gBAAAA,WAVV,GAUwBD,QAAQ,CAACE,SAVjC;AAWUC,gBAAAA,MAXV,GAWmBF,WAXnB,aAWmBA,WAXnB,uBAWmBA,WAAW,CAAEG,cAXhC;;AAAA,sBAYQV,UAAU,CAACR,MAAX,GAAoB,CAApB,IAAyB,6BAAaQ,UAAb,EAAyBS,MAAzB,IAAoC,CAZrE;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA,uBAe8C,KAAKE,eAAL,CAAqBb,MAArB,EAA6BQ,QAA7B,CAf9C;;AAAA;AAeIb,gBAAAA,MAAM,CAACW,UAAP,CAAkBK,MAAM,CAAEG,IAAR,EAAlB,CAfJ;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;;AAAA;;AAAA;;AAAA;AAAA,kDAiBSnB,MAjBT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;uFAyBA,kBAAsBK,MAAtB,EAA6CQ,QAA7C;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA,sBACMA,QAAQ,CAACO,SAAT,KAAuBC,SAAvB,IAAoCR,QAAQ,CAACO,SAAT,KAAuB,IADjE;AAAA;AAAA;AAAA;;AAAA,sBAEU,IAAIhB,KAAJ,CAAU,uCAAV,CAFV;;AAAA;AAKQkB,gBAAAA,KALR,GAKgBjB,MAAM,CAACkB,SAAP,wBAAiBV,QAAQ,CAACE,SAA1B,wDAAiB,oBAAoBE,cAArC,CALhB;AAMQO,gBAAAA,IANR,GAM8B,8BAAcC,mBAAd,0BAAoBZ,QAAQ,CAACE,SAA7B,yDAAoB,qBAAoBS,IAAxC,CAN9B;;AAAA,sBAQMA,IAAI,KAAKF,KAAK,CAACI,aARrB;AAAA;AAAA;AAAA;;AAAA,sBASU,IAAItB,KAAJ,2CAA6CoB,IAA7C,EATV;;AAAA;AAYQG,gBAAAA,WAZR,GAY0C,8BACtCC,+BADsC,0BAEtCf,QAAQ,CAACE,SAF6B,yDAEtC,qBAAoBc,KAFkB,CAZ1C;AAiBQC,gBAAAA,WAjBR,GAiBsBrB,MAAM,yBAACI,QAAQ,CAACE,SAAV,yDAAC,qBAAoBgB,gBAArB,CAjB5B;AAkBMC,gBAAAA,SAlBN,GAkBkBvB,MAAM,yBAACI,QAAQ,CAACE,SAAV,yDAAC,qBAAoBkB,qBAArB,CAlBxB;;AAoBE,oBAAI,CAACpB,QAAQ,CAACO,SAAd,EAAyB;AACvBY,kBAAAA,SAAS,GAAGE,IAAI,CAACC,GAAL,CACV,KAAKxC,QAAL,GAAgBmC,WADN,EAEVrB,MAAM,yBAACI,QAAQ,CAACE,SAAV,yDAAC,qBAAoBkB,qBAArB,CAFI,CAAZ;AAID;;AAEKrC,gBAAAA,OA3BR,GA2BkC;AAC9B4B,kBAAAA,IAAI,EAAJA,IAD8B;AAE9BY,kBAAAA,SAAS,EAAEd,KAAK,CAACc,SAFa;AAG9BC,kBAAAA,SAAS,EAAEf,KAAK,CAACe,SAHa;AAI9BV,kBAAAA,WAAW,EAAXA,WAJ8B;AAK9BW,kBAAAA,MAAM,EAAEhB,KALsB;AAM9BiB,kBAAAA,SAAS,0BAAE1B,QAAQ,CAACE,SAAX,yDAAE,qBAAoByB,UAND;AAO9BC,kBAAAA,UAAU,EAAE;AAPkB,iBA3BlC;AAuCQC,gBAAAA,oBAvCR,GAuC+B7B,QAvC/B,aAuC+BA,QAvC/B,+CAuC+BA,QAAQ,CAAEE,SAvCzC,yDAuC+B,qBAAqB4B,sBAvCpD;;AAAA,qBAyCMD,oBAzCN;AAAA;AAAA;AAAA;;AA0CUE,gBAAAA,gBA1CV,GA0C6BnC,MAAM,CAACiC,oBAAD,CA1CnC;AAAA;AAAA,uBA4CuB,KAAKG,aAAL,CAAmBD,gBAAnB,EAAqChD,OAArC,EAA8CkC,WAA9C,CA5CvB;;AAAA;AA4CIW,gBAAAA,UA5CJ;;AAAA;AA+CEA,gBAAAA,UAAU,GAAG,uBAAA7C,OAAO,CAAC6C,UAAR,oEAAoB1C,MAApB,GAA6BH,OAAO,CAAC6C,UAArC,GAAkDA,UAA/D;AA/CF;AAAA,uBAgDyB,KAAKhD,IAAL,CAAUqC,WAAV,EAAuBE,SAAvB,CAhDzB;;AAAA;AAgDQc,gBAAAA,QAhDR;AAAA;AAAA,uBAiDe,+BAAgBA,QAAhB,kCAA8BlD,OAA9B;AAAuC6C,kBAAAA,UAAU,EAAVA;AAAvC,mBAjDf;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;qFA2DA,kBACEC,oBADF,EAEE9C,OAFF,EAGEkC,WAHF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sBAKMY,oBAAoB,KAAK,CAL/B;AAAA;AAAA;AAAA;;AAAA,kDAaW,EAbX;;AAAA;AAgBQK,gBAAAA,cAhBR,GAgByBb,IAAI,CAACC,GAAL,CACrB,KAAKxC,QAAL,GAAgB+C,oBADK,EAErB,KAAK7C,qBAFgB,CAhBzB;AAAA;AAAA,uBAoByB,KAAKJ,IAAL,CAAUiD,oBAAV,EAAgCK,cAAhC,CApBzB;;AAAA;AAoBQD,gBAAAA,QApBR;AAsBQE,gBAAAA,MAtBR,GAsBiB;AAAChD,kBAAAA,MAAM,EAAE8C,QAAT;AAAmBG,kBAAAA,MAAM,EAAE,CAA3B;AAA8BC,kBAAAA,IAAI,EAAEJ,QAAQ,CAAC/C;AAA7C,iBAtBjB;AAAA;AAAA,uBAuB4B,0BAAWiD,MAAX,EAAmBpD,OAAnB,CAvB5B;;AAAA;AAuBQuD,gBAAAA,WAvBR;AAAA,kDAyBSA,WAAW,CAACV,UAzBrB;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;kFA4BA;AAAA;;AAAA;AAAA;AAAA;AAAA;AACQW,gBAAAA,UADR,GACqBtD,yBAAcC,MAAd,GAAuB,CAD5C;AAAA;AAAA,uBAE2B,KAAKN,IAAL,CAAU,KAAKE,QAAL,GAAgByD,UAA1B,EAAsCA,UAAtC,CAF3B;;AAAA;AAEQC,gBAAAA,UAFR;AAIQpD,gBAAAA,KAJR,GAIgBoD,UAAU,CAACC,KAAX,CAAiB,CAAjB,EAAoBpD,QAApB,EAJhB;;AAAA,sBAKMD,KAAK,KAAKH,wBALhB;AAAA;AAAA;AAAA;;AAAA,sBAMU,IAAIM,KAAJ,6CAA8CH,KAA9C,OANV;;AAAA;AASQsD,gBAAAA,YATR,GASuBF,UAAU,CAACG,YAAX,CAAwB,CAAxB,CATvB;AAUQC,gBAAAA,cAVR,GAUyB,KAAK9D,QAAL,GAAgB4D,YAAhB,GAA+BH,UAVxD;;AAAA,sBAWMK,cAAc,GAAG3D,yBAAcC,MAXrC;AAAA;AAAA;AAAA;;AAAA,sBAYU,IAAIK,KAAJ,iCAAmCqD,cAAnC,EAZV;;AAAA;AAAA;AAAA,uBAe4B,KAAKhE,IAAL,CAAUgE,cAAV,EAA0BF,YAA1B,CAf5B;;AAAA;AAeQG,gBAAAA,WAfR;AAAA,sCAkBqB,mCAAmBA,WAAnB,CAlBrB,EAkBSC,QAlBT,uBAkBSA,QAlBT;AAAA,kDAmBSA,QAnBT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;kFAjJA,kBAAwB3D,MAAxB;AAAA;AAAA;AAAA;AAAA;AAAA;AACQ4D,gBAAAA,MADR,GACiB,SAATA,MAAS,CAACC,QAAD,EAAmB9D,MAAnB;AAAA,yBACb+D,OAAO,CAACC,OAAR,CAAgB/D,MAAM,CAACsD,KAAP,CAAaO,QAAb,EAAuBA,QAAQ,GAAG9D,MAAlC,CAAhB,CADa;AAAA,iBADjB;;AAGQiE,gBAAAA,OAHR,GAGkB,SAAVA,OAAU;AAAA,yBAAMF,OAAO,CAACC,OAAR,EAAN;AAAA,iBAHlB;;AAAA,kDAIS,IAAIvE,qBAAJ,CAA0BoE,MAA1B,EAAkCI,OAAlC,EAA2ChE,MAAM,CAACD,MAAlD,CAJT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {ParquetSchema} from '../schema/schema';\nimport {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';\nimport {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';\nimport {\n ParquetBuffer,\n ParquetCompression,\n ParquetData,\n PrimitiveType,\n ParquetOptions\n} from '../schema/declare';\nimport {decodeFileMetadata, getThriftEnum, fieldIndexOf} from '../utils/read-utils';\nimport {decodeDataPages, decodePage} from './decoders';\n\nconst DEFAULT_DICTIONARY_SIZE = 1e6;\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n public defaultDictionarySize: number;\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number,\n options?: any\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n this.defaultDictionarySize = options?.defaultDictionarySize || DEFAULT_DICTIONARY_SIZE;\n }\n\n async readHeader(): Promise<void> {\n const buffer = await this.read(0, PARQUET_MAGIC.length);\n\n const magic = buffer.toString();\n switch (magic) {\n case PARQUET_MAGIC:\n break;\n case PARQUET_MAGIC_ENCRYPTED:\n throw new Error('Encrypted parquet file not supported');\n default:\n throw new Error(`Invalid parquet file (magic=${magic})`);\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n /**\n * Do reading of parquet file's column chunk\n * @param schema\n * @param colChunk\n */\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n\n if (type !== field.primitiveType) {\n throw new Error(`chunk type not matching schema: ${type}`);\n }\n\n const compression: ParquetCompression = getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n let pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n\n if (!colChunk.file_path) {\n pagesSize = Math.min(\n this.fileSize - pagesOffset,\n Number(colChunk.meta_data?.total_compressed_size)\n );\n }\n\n const options: ParquetOptions = {\n type,\n rLevelMax: field.rLevelMax,\n dLevelMax: field.dLevelMax,\n compression,\n column: field,\n numValues: colChunk.meta_data?.num_values,\n dictionary: []\n };\n\n let dictionary;\n\n const dictionaryPageOffset = colChunk?.meta_data?.dictionary_page_offset;\n\n if (dictionaryPageOffset) {\n const dictionaryOffset = Number(dictionaryPageOffset);\n // Getting dictionary from column chunk to iterate all over indexes to get dataPage values.\n dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);\n }\n\n dictionary = options.dictionary?.length ? options.dictionary : dictionary;\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n return await decodeDataPages(pagesBuf, {...options, dictionary});\n }\n\n /**\n * Getting dictionary for allows to flatten values by indices.\n * @param dictionaryPageOffset\n * @param options\n * @param pagesOffset\n * @returns\n */\n async getDictionary(\n dictionaryPageOffset: number,\n options: ParquetOptions,\n pagesOffset: number\n ): Promise<string[]> {\n if (dictionaryPageOffset === 0) {\n // dictionarySize = Math.min(this.fileSize - pagesOffset, this.defaultDictionarySize);\n // pagesBuf = await this.read(pagesOffset, dictionarySize);\n\n // In this case we are working with parquet-mr files format. Problem is described below:\n // https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding\n // We need to get dictionary page from column chunk if it exists.\n // Now if we use code commented above we don't get DICTIONARY_PAGE we get DATA_PAGE instead.\n return [];\n }\n\n const dictionarySize = Math.min(\n this.fileSize - dictionaryPageOffset,\n this.defaultDictionarySize\n );\n const pagesBuf = await this.read(dictionaryPageOffset, dictionarySize);\n\n const cursor = {buffer: pagesBuf, offset: 0, size: pagesBuf.length};\n const decodedPage = await decodePage(cursor, options);\n\n return decodedPage.dictionary!;\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n const magic = trailerBuf.slice(4).toString();\n if (magic !== PARQUET_MAGIC) {\n throw new Error(`Not a valid parquet file (magic=\"${magic})`);\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error(`Invalid metadata size ${metadataOffset}`);\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n"],"file":"parquet-envelope-reader.js"}
|