@loaders.gl/parquet 4.0.0-alpha.8 → 4.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +21 -32
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/buffer-polyfill/buffer-polyfill.browser.js +12 -0
- package/dist/es5/buffer-polyfill/buffer-polyfill.browser.js.map +1 -0
- package/dist/es5/buffer-polyfill/buffer-polyfill.node.js +16 -0
- package/dist/es5/buffer-polyfill/buffer-polyfill.node.js.map +1 -0
- package/dist/es5/buffer-polyfill/buffer.js +1665 -0
- package/dist/es5/buffer-polyfill/buffer.js.map +1 -0
- package/dist/es5/buffer-polyfill/index.js +27 -0
- package/dist/es5/buffer-polyfill/index.js.map +1 -0
- package/dist/es5/buffer-polyfill/install-buffer-polyfill.js +10 -0
- package/dist/es5/buffer-polyfill/install-buffer-polyfill.js.map +1 -0
- package/dist/es5/index.js +25 -28
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/geo/decode-geo-column.js +53 -0
- package/dist/es5/lib/geo/decode-geo-column.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +52 -16
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/es5/lib/geo/{geoparquet-schema.js → geoparquet-metadata-schema.js} +4 -4
- package/dist/es5/lib/geo/geoparquet-metadata-schema.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-metadata-schema.json +60 -0
- package/dist/es5/lib/parsers/get-parquet-schema.js +41 -0
- package/dist/es5/lib/parsers/get-parquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +71 -72
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +95 -62
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/es5/parquet-loader.js +14 -14
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +8 -11
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +6 -7
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +2 -3
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +75 -0
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +53 -36
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +15 -13
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/utils/read-utils.js +6 -7
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/esm/buffer-polyfill/buffer-polyfill.browser.js +6 -0
- package/dist/esm/buffer-polyfill/buffer-polyfill.browser.js.map +1 -0
- package/dist/esm/buffer-polyfill/buffer-polyfill.node.js +10 -0
- package/dist/esm/buffer-polyfill/buffer-polyfill.node.js.map +1 -0
- package/dist/esm/buffer-polyfill/buffer.js +1489 -0
- package/dist/esm/buffer-polyfill/buffer.js.map +1 -0
- package/dist/esm/buffer-polyfill/index.js +4 -0
- package/dist/esm/buffer-polyfill/index.js.map +1 -0
- package/dist/esm/buffer-polyfill/install-buffer-polyfill.js +3 -0
- package/dist/esm/buffer-polyfill/install-buffer-polyfill.js.map +1 -0
- package/dist/esm/index.js +4 -10
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/geo/decode-geo-column.js +47 -0
- package/dist/esm/lib/geo/decode-geo-column.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +32 -5
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/esm/lib/geo/{geoparquet-schema.js → geoparquet-metadata-schema.js} +2 -2
- package/dist/esm/lib/geo/geoparquet-metadata-schema.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-metadata-schema.json +60 -0
- package/dist/esm/lib/parsers/get-parquet-schema.js +11 -0
- package/dist/esm/lib/parsers/get-parquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +10 -11
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +32 -7
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/esm/parquet-loader.js +12 -12
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +7 -9
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +6 -7
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +2 -3
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/esm/parquetjs/parquet-thrift/index.js +2 -0
- package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js +48 -32
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +14 -12
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/utils/read-utils.js +1 -1
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/parquet-worker.js +23 -34
- package/dist/parquet-worker.js.map +3 -3
- package/dist/src/buffer-polyfill/buffer-polyfill.browser.d.ts +4 -0
- package/dist/src/buffer-polyfill/buffer-polyfill.browser.d.ts.map +1 -0
- package/dist/src/buffer-polyfill/buffer-polyfill.node.d.ts +4 -0
- package/dist/src/buffer-polyfill/buffer-polyfill.node.d.ts.map +1 -0
- package/dist/src/buffer-polyfill/buffer.d.ts +222 -0
- package/dist/src/buffer-polyfill/buffer.d.ts.map +1 -0
- package/dist/src/buffer-polyfill/index.d.ts +4 -0
- package/dist/src/buffer-polyfill/index.d.ts.map +1 -0
- package/dist/src/buffer-polyfill/install-buffer-polyfill.d.ts +30 -0
- package/dist/src/buffer-polyfill/install-buffer-polyfill.d.ts.map +1 -0
- package/dist/src/bundle.d.ts.map +1 -0
- package/dist/src/constants.d.ts.map +1 -0
- package/dist/{index.d.ts → src/index.d.ts} +6 -10
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/src/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/src/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/src/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/src/lib/geo/decode-geo-column.d.ts +4 -0
- package/dist/src/lib/geo/decode-geo-column.d.ts.map +1 -0
- package/dist/src/lib/geo/decode-geo-metadata.d.ts +44 -0
- package/dist/src/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/{lib/geo/geoparquet-schema.d.ts → src/lib/geo/geoparquet-metadata-schema.d.ts} +2 -3
- package/dist/src/lib/geo/geoparquet-metadata-schema.d.ts.map +1 -0
- package/dist/src/lib/parsers/get-parquet-schema.d.ts +4 -0
- package/dist/src/lib/parsers/get-parquet-schema.d.ts.map +1 -0
- package/dist/src/lib/parsers/parse-parquet-to-columns.d.ts +6 -0
- package/dist/src/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/src/lib/parsers/parse-parquet-to-rows.d.ts +6 -0
- package/dist/src/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/src/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/src/lib/wasm/load-wasm/index.d.ts.map +1 -0
- package/dist/src/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
- package/dist/src/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
- package/dist/src/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/src/parquet-loader.d.ts +22 -0
- package/dist/src/parquet-loader.d.ts.map +1 -0
- package/dist/src/parquet-wasm-loader.d.ts +12 -0
- package/dist/src/parquet-wasm-loader.d.ts.map +1 -0
- package/dist/src/parquet-wasm-writer.d.ts +6 -0
- package/dist/src/parquet-wasm-writer.d.ts.map +1 -0
- package/dist/src/parquet-writer.d.ts.map +1 -0
- package/dist/src/parquetjs/codecs/declare.d.ts.map +1 -0
- package/dist/src/parquetjs/codecs/dictionary.d.ts.map +1 -0
- package/dist/src/parquetjs/codecs/index.d.ts.map +1 -0
- package/dist/src/parquetjs/codecs/plain.d.ts.map +1 -0
- package/dist/src/parquetjs/codecs/rle.d.ts.map +1 -0
- package/dist/src/parquetjs/compression.d.ts.map +1 -0
- package/dist/src/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/BoundaryOrder.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/BsonType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/CompressionCodec.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/ConvertedType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/DateType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/DecimalType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/Encoding.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/EnumType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/FieldRepetitionType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/IndexPageHeader.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/IntType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/JsonType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/KeyValue.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/ListType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/MapType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/MicroSeconds.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/MilliSeconds.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/NullType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/PageLocation.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/PageType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/SortingColumn.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/Statistics.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/StringType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/Type.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts.map +1 -0
- package/dist/src/parquetjs/parquet-thrift/UUIDType.d.ts.map +1 -0
- package/dist/{parquetjs → src/parquetjs}/parquet-thrift/index.d.ts +2 -0
- package/dist/src/parquetjs/parquet-thrift/index.d.ts.map +1 -0
- package/dist/{parquetjs → src/parquetjs}/parser/decoders.d.ts +4 -4
- package/dist/src/parquetjs/parser/decoders.d.ts.map +1 -0
- package/dist/{parquetjs → src/parquetjs}/parser/parquet-reader.d.ts +5 -3
- package/dist/src/parquetjs/parser/parquet-reader.d.ts.map +1 -0
- package/dist/{parquetjs → src/parquetjs}/schema/declare.d.ts +3 -1
- package/dist/src/parquetjs/schema/declare.d.ts.map +1 -0
- package/dist/src/parquetjs/schema/schema.d.ts.map +1 -0
- package/dist/src/parquetjs/schema/shred.d.ts.map +1 -0
- package/dist/src/parquetjs/schema/types.d.ts.map +1 -0
- package/dist/src/parquetjs/utils/file-utils.d.ts.map +1 -0
- package/dist/src/parquetjs/utils/read-utils.d.ts.map +1 -0
- package/dist/src/workers/parquet-worker.d.ts.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +21 -9
- package/src/buffer-polyfill/buffer-polyfill.browser.ts +11 -0
- package/src/buffer-polyfill/buffer-polyfill.node.ts +15 -0
- package/src/buffer-polyfill/buffer.ts +2207 -0
- package/src/buffer-polyfill/index.ts +8 -0
- package/src/buffer-polyfill/install-buffer-polyfill.ts +3 -0
- package/src/index.ts +31 -17
- package/src/lib/geo/decode-geo-column.ts +54 -0
- package/src/lib/geo/decode-geo-metadata.ts +81 -15
- package/src/lib/geo/geoparquet-metadata-schema.json +60 -0
- package/src/lib/geo/{geoparquet-schema.ts → geoparquet-metadata-schema.ts} +1 -1
- package/src/lib/parsers/get-parquet-schema.ts +14 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +15 -13
- package/src/lib/parsers/parse-parquet-to-rows.ts +39 -11
- package/src/parquet-loader.ts +29 -16
- package/src/parquet-wasm-loader.ts +13 -13
- package/src/parquet-wasm-writer.ts +10 -8
- package/src/parquet-writer.ts +1 -3
- package/src/parquetjs/codecs/plain.ts +1 -0
- package/src/parquetjs/codecs/rle.ts +2 -0
- package/src/parquetjs/encoder/parquet-encoder.ts +1 -0
- package/src/parquetjs/parquet-thrift/index.ts +4 -0
- package/src/parquetjs/parser/decoders.ts +63 -41
- package/src/parquetjs/parser/parquet-reader.ts +19 -14
- package/src/parquetjs/schema/declare.ts +3 -1
- package/src/parquetjs/utils/read-utils.ts +2 -2
- package/dist/bundle.d.ts.map +0 -1
- package/dist/bundle.js +0 -5
- package/dist/constants.d.ts.map +0 -1
- package/dist/constants.js +0 -18
- package/dist/es5/lib/geo/geoparquet-schema.js.map +0 -1
- package/dist/esm/lib/geo/geoparquet-schema.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -58
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +0 -1
- package/dist/lib/arrow/convert-columns-to-row-group.js +0 -1
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +0 -1
- package/dist/lib/arrow/convert-row-group-to-columns.js +0 -12
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +0 -1
- package/dist/lib/arrow/convert-schema-from-parquet.js +0 -86
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +0 -1
- package/dist/lib/arrow/convert-schema-to-parquet.js +0 -71
- package/dist/lib/geo/decode-geo-metadata.d.ts +0 -31
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +0 -1
- package/dist/lib/geo/decode-geo-metadata.js +0 -77
- package/dist/lib/geo/geoparquet-schema.d.ts.map +0 -1
- package/dist/lib/geo/geoparquet-schema.js +0 -69
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +0 -5
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +0 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +0 -46
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +0 -5
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +0 -1
- package/dist/lib/parsers/parse-parquet-to-rows.js +0 -37
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/encode-parquet-wasm.js +0 -30
- package/dist/lib/wasm/load-wasm/index.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm/index.js +0 -5
- package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm/load-wasm-browser.js +0 -38
- package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm/load-wasm-node.js +0 -31
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +0 -27
- package/dist/parquet-loader.d.ts +0 -14
- package/dist/parquet-loader.d.ts.map +0 -1
- package/dist/parquet-loader.js +0 -41
- package/dist/parquet-wasm-loader.d.ts +0 -23
- package/dist/parquet-wasm-loader.d.ts.map +0 -1
- package/dist/parquet-wasm-loader.js +0 -27
- package/dist/parquet-wasm-writer.d.ts +0 -3
- package/dist/parquet-wasm-writer.d.ts.map +0 -1
- package/dist/parquet-wasm-writer.js +0 -23
- package/dist/parquet-writer.d.ts.map +0 -1
- package/dist/parquet-writer.js +0 -22
- package/dist/parquetjs/codecs/declare.d.ts.map +0 -1
- package/dist/parquetjs/codecs/declare.js +0 -2
- package/dist/parquetjs/codecs/dictionary.d.ts.map +0 -1
- package/dist/parquetjs/codecs/dictionary.js +0 -14
- package/dist/parquetjs/codecs/index.d.ts.map +0 -1
- package/dist/parquetjs/codecs/index.js +0 -55
- package/dist/parquetjs/codecs/plain.d.ts.map +0 -1
- package/dist/parquetjs/codecs/plain.js +0 -211
- package/dist/parquetjs/codecs/rle.d.ts.map +0 -1
- package/dist/parquetjs/codecs/rle.js +0 -145
- package/dist/parquetjs/compression.d.ts.map +0 -1
- package/dist/parquetjs/compression.js +0 -183
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +0 -1
- package/dist/parquetjs/encoder/parquet-encoder.js +0 -484
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
- package/dist/parquetjs/parquet-thrift/BsonType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/BsonType.js +0 -62
- package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +0 -211
- package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +0 -217
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +0 -402
- package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +0 -108
- package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
- package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +0 -34
- package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +0 -170
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -230
- package/dist/parquetjs/parquet-thrift/DateType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/DateType.js +0 -62
- package/dist/parquetjs/parquet-thrift/DecimalType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/DecimalType.js +0 -109
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -126
- package/dist/parquetjs/parquet-thrift/Encoding.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/Encoding.js +0 -20
- package/dist/parquetjs/parquet-thrift/EnumType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/EnumType.js +0 -62
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
- package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +0 -260
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +0 -62
- package/dist/parquetjs/parquet-thrift/IntType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/IntType.js +0 -109
- package/dist/parquetjs/parquet-thrift/JsonType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/JsonType.js +0 -62
- package/dist/parquetjs/parquet-thrift/KeyValue.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/KeyValue.js +0 -106
- package/dist/parquetjs/parquet-thrift/ListType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/ListType.js +0 -62
- package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/LogicalType.js +0 -384
- package/dist/parquetjs/parquet-thrift/MapType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/MapType.js +0 -62
- package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +0 -62
- package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +0 -62
- package/dist/parquetjs/parquet-thrift/NullType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/NullType.js +0 -62
- package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +0 -101
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +0 -131
- package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageHeader.js +0 -220
- package/dist/parquetjs/parquet-thrift/PageLocation.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageLocation.js +0 -145
- package/dist/parquetjs/parquet-thrift/PageType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/PageType.js +0 -16
- package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/RowGroup.js +0 -186
- package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +0 -243
- package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +0 -131
- package/dist/parquetjs/parquet-thrift/Statistics.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/Statistics.js +0 -180
- package/dist/parquetjs/parquet-thrift/StringType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/StringType.js +0 -62
- package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimeType.js +0 -110
- package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +0 -131
- package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/TimestampType.js +0 -110
- package/dist/parquetjs/parquet-thrift/Type.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/Type.js +0 -20
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -62
- package/dist/parquetjs/parquet-thrift/UUIDType.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/UUIDType.js +0 -62
- package/dist/parquetjs/parquet-thrift/index.d.ts.map +0 -1
- package/dist/parquetjs/parquet-thrift/index.js +0 -65
- package/dist/parquetjs/parser/decoders.d.ts.map +0 -1
- package/dist/parquetjs/parser/decoders.js +0 -318
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-reader.js +0 -200
- package/dist/parquetjs/schema/declare.d.ts.map +0 -1
- package/dist/parquetjs/schema/declare.js +0 -12
- package/dist/parquetjs/schema/schema.d.ts.map +0 -1
- package/dist/parquetjs/schema/schema.js +0 -162
- package/dist/parquetjs/schema/shred.d.ts.map +0 -1
- package/dist/parquetjs/schema/shred.js +0 -355
- package/dist/parquetjs/schema/types.d.ts.map +0 -1
- package/dist/parquetjs/schema/types.js +0 -416
- package/dist/parquetjs/utils/file-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/file-utils.js +0 -43
- package/dist/parquetjs/utils/read-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/read-utils.js +0 -109
- package/dist/workers/parquet-worker.d.ts.map +0 -1
- package/dist/workers/parquet-worker.js +0 -5
- /package/dist/{bundle.d.ts → src/bundle.d.ts} +0 -0
- /package/dist/{constants.d.ts → src/constants.d.ts} +0 -0
- /package/dist/{lib → src/lib}/arrow/convert-columns-to-row-group.d.ts +0 -0
- /package/dist/{lib → src/lib}/arrow/convert-row-group-to-columns.d.ts +0 -0
- /package/dist/{lib → src/lib}/arrow/convert-schema-from-parquet.d.ts +0 -0
- /package/dist/{lib → src/lib}/arrow/convert-schema-to-parquet.d.ts +0 -0
- /package/dist/{lib → src/lib}/wasm/encode-parquet-wasm.d.ts +0 -0
- /package/dist/{lib → src/lib}/wasm/load-wasm/index.d.ts +0 -0
- /package/dist/{lib → src/lib}/wasm/load-wasm/load-wasm-browser.d.ts +0 -0
- /package/dist/{lib → src/lib}/wasm/load-wasm/load-wasm-node.d.ts +0 -0
- /package/dist/{lib → src/lib}/wasm/parse-parquet-wasm.d.ts +0 -0
- /package/dist/{parquet-writer.d.ts → src/parquet-writer.d.ts} +0 -0
- /package/dist/{parquetjs → src/parquetjs}/codecs/declare.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/codecs/dictionary.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/codecs/index.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/codecs/plain.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/codecs/rle.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/compression.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/encoder/parquet-encoder.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/BoundaryOrder.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/BsonType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/ColumnChunk.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/ColumnIndex.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/ColumnMetaData.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/ColumnOrder.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/CompressionCodec.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/ConvertedType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/DataPageHeader.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/DataPageHeaderV2.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/DateType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/DecimalType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/DictionaryPageHeader.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/Encoding.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/EnumType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/FieldRepetitionType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/FileMetaData.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/IndexPageHeader.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/IntType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/JsonType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/KeyValue.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/ListType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/LogicalType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/MapType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/MicroSeconds.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/MilliSeconds.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/NullType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/OffsetIndex.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/PageEncodingStats.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/PageHeader.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/PageLocation.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/PageType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/RowGroup.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/SchemaElement.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/SortingColumn.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/Statistics.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/StringType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/TimeType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/TimeUnit.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/TimestampType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/Type.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/TypeDefinedOrder.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/parquet-thrift/UUIDType.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/schema/schema.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/schema/shred.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/schema/types.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/utils/file-utils.d.ts +0 -0
- /package/dist/{parquetjs → src/parquetjs}/utils/read-utils.d.ts +0 -0
- /package/dist/{workers → src/workers}/parquet-worker.d.ts +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-encoder.js","names":["PARQUET_CODECS","Compression","Shred","ColumnChunk","ColumnMetaData","CompressionCodec","ConvertedType","DataPageHeader","DataPageHeaderV2","Encoding","FieldRepetitionType","FileMetaData","KeyValue","PageHeader","PageType","RowGroup","SchemaElement","Type","osopen","oswrite","osclose","getBitWidth","serializeThrift","Int64","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_DEFAULT_PAGE_SIZE","PARQUET_DEFAULT_ROW_GROUP_SIZE","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","ParquetEncoder","openFile","schema","path","opts","outputStream","openStream","arguments","length","undefined","envelopeWriter","ParquetEnvelopeWriter","constructor","_defineProperty","rowBuffer","rowGroupSize","closed","userMetadata","writeHeader","err","close","appendRow","row","Error","shredRecord","rowCount","callback","writeFooter","setMetadata","key","value","String","setRowGroupSize","cnt","setPageSize","writeFn","bind","closeFn","fileOffset","write","offset","rowGroups","pageSize","useDataPageV2","Boolean","writeSection","buf","Buffer","from","writeRowGroup","records","rgroup","encodeRowGroup","baseOffset","push","metadata","body","encodeFooter","encodeValues","type","encoding","values","concat","encodeDataPage","column","data","rLevelsBuf","alloc","rLevelMax","rlevels","bitWidth","dLevelsBuf","dLevelMax","dlevels","valuesBuf","primitiveType","typeLength","dataBuf","compressedBuf","deflate","compression","header","DATA_PAGE","data_page_header","num_values","count","definition_level_encoding","repetition_level_encoding","uncompressed_page_size","compressed_page_size","headerBuf","page","headerSize","encodeDataPageV2","disableEnvelope","DATA_PAGE_V2","data_page_header_v2","num_nulls","num_rows","definition_levels_byte_length","repetition_levels_byte_length","is_compressed","encodeColumnChunk","buffer","columnData","join","pageBuf","total_uncompressed_size","total_compressed_size","result","path_in_schema","data_page_offset","encodings","codec","metadataOffset","columns","total_byte_size","field","fieldList","isNested","cchunkData","cchunk","file_offset","meta_data","Number","version","created_by","row_groups","key_value_metadata","_metadata$key_value_m","_metadata$key_value_m2","_metadata$key_value_m3","kv","call","schemaRoot","name","num_children","Object","keys","fields","relt","repetitionType","schemaElem","repetition_type","fieldCount","originalType","converted_type","type_length","metadataEncoded","footerEncoded","copy","writeUInt32LE"],"sources":["../../../../src/parquetjs/encoder/parquet-encoder.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {stream} from '@loaders.gl/loader-utils';\nimport {ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport * as Compression from '../compression';\nimport {\n ParquetRowGroup,\n ParquetCodec,\n ParquetColumnChunk,\n ParquetField,\n PrimitiveType,\n ParquetRow\n} from '../schema/declare';\nimport {ParquetSchema} from '../schema/schema';\nimport * as Shred from '../schema/shred';\nimport {\n ColumnChunk,\n ColumnMetaData,\n CompressionCodec,\n ConvertedType,\n DataPageHeader,\n DataPageHeaderV2,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n KeyValue,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {osopen, oswrite, osclose} from '../utils/file-utils';\nimport {getBitWidth, serializeThrift} from '../utils/read-utils';\nimport Int64 from 'node-int64';\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Default Page and Row Group sizes\n */\nconst PARQUET_DEFAULT_PAGE_SIZE = 8192;\nconst PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;\n\n/**\n * Repetition and Definition Level Encoding\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\nexport interface ParquetEncoderOptions {\n baseOffset?: number;\n rowGroupSize?: number;\n pageSize?: number;\n useDataPageV2?: boolean;\n\n // Write Stream Options\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\n/**\n * Write a parquet file to an output stream. The ParquetEncoder will perform\n * buffering/batching for performance, so close() must be called after all rows\n * are written.\n */\n// eslint-disable-next-line @typescript-eslint/no-unused-vars\nexport class ParquetEncoder<T> {\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified file\n */\n static async openFile<T>(\n schema: ParquetSchema,\n path: string,\n opts?: ParquetEncoderOptions\n ): Promise<ParquetEncoder<T>> {\n const outputStream = await osopen(path, opts);\n return ParquetEncoder.openStream(schema, outputStream, opts);\n }\n\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified stream\n */\n static async openStream<T>(\n schema: ParquetSchema,\n outputStream: stream.Writable,\n opts: ParquetEncoderOptions = {}\n ): Promise<ParquetEncoder<T>> {\n const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);\n return new ParquetEncoder(schema, envelopeWriter, opts);\n }\n\n public schema: ParquetSchema;\n public envelopeWriter: ParquetEnvelopeWriter;\n public rowBuffer: ParquetRowGroup;\n public rowGroupSize: number;\n public closed: boolean;\n public userMetadata: Record<string, string>;\n\n /**\n * Create a new buffered parquet writer for a given envelope writer\n */\n constructor(\n schema: ParquetSchema,\n envelopeWriter: ParquetEnvelopeWriter,\n opts: ParquetEncoderOptions\n ) {\n this.schema = schema;\n this.envelopeWriter = envelopeWriter;\n // @ts-ignore Row buffer typings...\n this.rowBuffer = {};\n this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;\n this.closed = false;\n this.userMetadata = {};\n\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n this.writeHeader();\n }\n\n async writeHeader(): Promise<void> {\n // TODO - better not mess with promises in the constructor\n try {\n await this.envelopeWriter.writeHeader();\n } catch (err) {\n await this.envelopeWriter.close();\n throw err;\n }\n }\n\n /**\n * Append a single row to the parquet file. Rows are buffered in memory until\n * rowGroupSize rows are in the buffer or close() is called\n */\n async appendRow<T extends ParquetRow>(row: T): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n Shred.shredRecord(this.schema, row, this.rowBuffer);\n if (this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n }\n\n /**\n * Finish writing the parquet file and commit the footer to disk. This method\n * MUST be called after you are finished adding rows. You must not call this\n * method twice on the same object or add any rows after the close() method has\n * been called\n */\n async close(callback?: () => void): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n\n this.closed = true;\n\n if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n\n await this.envelopeWriter.writeFooter(this.userMetadata);\n await this.envelopeWriter.close();\n // this.envelopeWriter = null;\n\n if (callback) {\n callback();\n }\n }\n\n /**\n * Add key<>value metadata to the file\n */\n setMetadata(key: string, value: string): void {\n // TODO: value to be any, obj -> JSON\n this.userMetadata[String(key)] = String(value);\n }\n\n /**\n * Set the parquet row group size. This values controls the maximum number\n * of rows that are buffered in memory at any given time as well as the number\n * of rows that are co-located on disk. A higher value is generally better for\n * read-time I/O performance at the tradeoff of write-time memory usage.\n */\n setRowGroupSize(cnt: number): void {\n this.rowGroupSize = cnt;\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.envelopeWriter.setPageSize(cnt);\n }\n}\n\n/**\n * Create a parquet file from a schema and a number of row groups. This class\n * performs direct, unbuffered writes to the underlying output stream and is\n * intendend for advanced and internal users; the writeXXX methods must be\n * called in the correct order to produce a valid file.\n */\nexport class ParquetEnvelopeWriter {\n /**\n * Create a new parquet envelope writer that writes to the specified stream\n */\n static async openStream(\n schema: ParquetSchema,\n outputStream: stream.Writable,\n opts: ParquetEncoderOptions\n ): Promise<ParquetEnvelopeWriter> {\n const writeFn = oswrite.bind(undefined, outputStream);\n const closeFn = osclose.bind(undefined, outputStream);\n return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);\n }\n\n public schema: ParquetSchema;\n public write: (buf: Buffer) => Promise<void>;\n public close: () => Promise<void>;\n public offset: number;\n public rowCount: number;\n public rowGroups: RowGroup[];\n public pageSize: number;\n public useDataPageV2: boolean;\n\n constructor(\n schema: ParquetSchema,\n writeFn: (buf: Buffer) => Promise<void>,\n closeFn: () => Promise<void>,\n fileOffset: number,\n opts: ParquetEncoderOptions\n ) {\n this.schema = schema;\n this.write = writeFn;\n this.close = closeFn;\n this.offset = fileOffset;\n this.rowCount = 0;\n this.rowGroups = [];\n this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;\n this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;\n }\n\n writeSection(buf: Buffer): Promise<void> {\n this.offset += buf.length;\n return this.write(buf);\n }\n\n /**\n * Encode the parquet file header\n */\n writeHeader(): Promise<void> {\n return this.writeSection(Buffer.from(PARQUET_MAGIC));\n }\n\n /**\n * Encode a parquet row group. The records object should be created using the\n * shredRecord method\n */\n async writeRowGroup(records: ParquetRowGroup): Promise<void> {\n const rgroup = await encodeRowGroup(this.schema, records, {\n baseOffset: this.offset,\n pageSize: this.pageSize,\n useDataPageV2: this.useDataPageV2\n });\n\n this.rowCount += records.rowCount;\n this.rowGroups.push(rgroup.metadata);\n return await this.writeSection(rgroup.body);\n }\n\n /**\n * Write the parquet file footer\n */\n writeFooter(userMetadata: Record<string, string>): Promise<void> {\n if (!userMetadata) {\n // tslint:disable-next-line:no-parameter-reassignment\n userMetadata = {};\n }\n\n return this.writeSection(\n encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)\n );\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.pageSize = cnt;\n }\n}\n\n/**\n * Create a parquet transform stream\nexport class ParquetTransformer<T> extends stream.Transform {\n public writer: ParquetEncoder<T>;\n\n constructor(schema: ParquetSchema, opts: ParquetEncoderOptions = {}) {\n super({objectMode: true});\n\n const writeProxy = (function (t: ParquetTransformer<any>) {\n return async function (b: any): Promise<void> {\n t.push(b);\n };\n })(this);\n\n this.writer = new ParquetEncoder(\n schema,\n new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),\n opts\n );\n }\n\n // tslint:disable-next-line:function-name\n _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {\n if (row) {\n return this.writer.appendRow(row).then(callback);\n }\n callback();\n return Promise.resolve();\n }\n\n // tslint:disable-next-line:function-name\n async _flush(callback: (val?: any) => void) {\n await this.writer.close(callback);\n }\n}\n */\n\n/**\n * Encode a consecutive array of data using one of the parquet encodings\n */\nfunction encodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n values: any[],\n opts: ParquetCodecOptions\n) {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].encodeValues(type, values, opts);\n}\n\n/**\n * Encode a parquet data page\n */\nasync function encodeDataPage(\n column: ParquetField,\n data: ParquetColumnChunk\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax)\n // disableEnvelope: false\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax)\n // disableEnvelope: false\n });\n }\n\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, dataBuf);\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE,\n data_page_header: new DataPageHeader({\n num_values: data.count,\n encoding: Encoding[column.encoding!] as any,\n definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],\n repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]\n }),\n uncompressed_page_size: dataBuf.length,\n compressed_page_size: compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, compressedBuf]);\n\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode a parquet data page (v2)\n */\nasync function encodeDataPageV2(\n column: ParquetField,\n data: ParquetColumnChunk,\n rowCount: number\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, valuesBuf);\n\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n }\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE_V2,\n data_page_header_v2: new DataPageHeaderV2({\n num_values: data.count,\n num_nulls: data.count - data.values.length,\n num_rows: rowCount,\n encoding: Encoding[column.encoding!] as any,\n definition_levels_byte_length: dLevelsBuf.length,\n repetition_levels_byte_length: rLevelsBuf.length,\n is_compressed: column.compression !== 'UNCOMPRESSED'\n }),\n uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,\n compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode an array of values into a parquet column chunk\n */\nasync function encodeColumnChunk(\n column: ParquetField,\n buffer: ParquetRowGroup,\n offset: number,\n opts: ParquetEncoderOptions\n): Promise<{\n body: Buffer;\n metadata: ColumnMetaData;\n metadataOffset: number;\n}> {\n const data = buffer.columnData[column.path.join()];\n const baseOffset = (opts.baseOffset || 0) + offset;\n /* encode data page(s) */\n // const pages: Buffer[] = [];\n let pageBuf: Buffer;\n // tslint:disable-next-line:variable-name\n let total_uncompressed_size = 0;\n // tslint:disable-next-line:variable-name\n let total_compressed_size = 0;\n {\n const result = opts.useDataPageV2\n ? await encodeDataPageV2(column, data, buffer.rowCount)\n : await encodeDataPage(column, data);\n // pages.push(result.page);\n pageBuf = result.page;\n total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;\n total_compressed_size += result.header.compressed_page_size + result.headerSize;\n }\n\n // const pagesBuf = Buffer.concat(pages);\n // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;\n\n /* prepare metadata header */\n const metadata = new ColumnMetaData({\n path_in_schema: column.path,\n num_values: data.count,\n data_page_offset: baseOffset,\n encodings: [],\n total_uncompressed_size, // : pagesBuf.length,\n total_compressed_size,\n type: Type[column.primitiveType!],\n codec: CompressionCodec[column.compression!]\n });\n\n /* list encodings */\n metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);\n metadata.encodings.push(Encoding[column.encoding!]);\n\n /* concat metadata header and data pages */\n const metadataOffset = baseOffset + pageBuf.length;\n const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);\n return {body, metadata, metadataOffset};\n}\n\n/**\n * Encode a list of column values into a parquet row group\n */\nasync function encodeRowGroup(\n schema: ParquetSchema,\n data: ParquetRowGroup,\n opts: ParquetEncoderOptions\n): Promise<{\n body: Buffer;\n metadata: RowGroup;\n}> {\n const metadata = new RowGroup({\n num_rows: data.rowCount,\n columns: [],\n total_byte_size: 0\n });\n\n let body = Buffer.alloc(0);\n for (const field of schema.fieldList) {\n if (field.isNested) {\n continue; // eslint-disable-line no-continue\n }\n\n const cchunkData = await encodeColumnChunk(field, data, body.length, opts);\n\n const cchunk = new ColumnChunk({\n file_offset: cchunkData.metadataOffset,\n meta_data: cchunkData.metadata\n });\n\n metadata.columns.push(cchunk);\n metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);\n\n body = Buffer.concat([body, cchunkData.body]);\n }\n\n return {body, metadata};\n}\n\n/**\n * Encode a parquet file metadata footer\n */\nfunction encodeFooter(\n schema: ParquetSchema,\n rowCount: number,\n rowGroups: RowGroup[],\n userMetadata: Record<string, string>\n): Buffer {\n const metadata = new FileMetaData({\n version: PARQUET_VERSION,\n created_by: 'parquets',\n num_rows: rowCount,\n row_groups: rowGroups,\n schema: [],\n key_value_metadata: []\n });\n\n for (const key in userMetadata) {\n const kv = new KeyValue({\n key,\n value: userMetadata[key]\n });\n metadata.key_value_metadata?.push?.(kv);\n }\n\n {\n const schemaRoot = new SchemaElement({\n name: 'root',\n num_children: Object.keys(schema.fields).length\n });\n metadata.schema.push(schemaRoot);\n }\n\n for (const field of schema.fieldList) {\n const relt = FieldRepetitionType[field.repetitionType];\n const schemaElem = new SchemaElement({\n name: field.name,\n repetition_type: relt as any\n });\n\n if (field.isNested) {\n schemaElem.num_children = field.fieldCount;\n } else {\n schemaElem.type = Type[field.primitiveType!] as Type;\n }\n\n if (field.originalType) {\n schemaElem.converted_type = ConvertedType[field.originalType] as ConvertedType;\n }\n\n schemaElem.type_length = field.typeLength;\n\n metadata.schema.push(schemaElem);\n }\n\n const metadataEncoded = serializeThrift(metadata);\n const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);\n metadataEncoded.copy(footerEncoded);\n footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);\n footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);\n return footerEncoded;\n}\n"],"mappings":";AAGA,SAA6BA,cAAc,QAAO,WAAW;AAC7D,OAAO,KAAKC,WAAW,MAAM,gBAAgB;AAU7C,OAAO,KAAKC,KAAK,MAAM,iBAAiB;AACxC,SACEC,WAAW,EACXC,cAAc,EACdC,gBAAgB,EAChBC,aAAa,EACbC,cAAc,EACdC,gBAAgB,EAChBC,QAAQ,EACRC,mBAAmB,EACnBC,YAAY,EACZC,QAAQ,EACRC,UAAU,EACVC,QAAQ,EACRC,QAAQ,EACRC,aAAa,EACbC,IAAI,QACC,mBAAmB;AAC1B,SAAQC,MAAM,EAAEC,OAAO,EAAEC,OAAO,QAAO,qBAAqB;AAC5D,SAAQC,WAAW,EAAEC,eAAe,QAAO,qBAAqB;AAChE,OAAOC,KAAK,MAAM,YAAY;AAK9B,MAAMC,aAAa,GAAG,MAAM;AAK5B,MAAMC,eAAe,GAAG,CAAC;AAKzB,MAAMC,yBAAyB,GAAG,IAAI;AACtC,MAAMC,8BAA8B,GAAG,IAAI;AAK3C,MAAMC,kBAAkB,GAAG,OAAO;AAClC,MAAMC,sBAAsB,GAAG,KAAK;AAuBpC,OAAO,MAAMC,cAAc,CAAI;EAK7B,aAAaC,QAAQA,CACnBC,MAAqB,EACrBC,IAAY,EACZC,IAA4B,EACA;IAC5B,MAAMC,YAAY,GAAG,MAAMjB,MAAM,CAACe,IAAI,EAAEC,IAAI,CAAC;IAC7C,OAAOJ,cAAc,CAACM,UAAU,CAACJ,MAAM,EAAEG,YAAY,EAAED,IAAI,CAAC;EAC9D;EAMA,aAAaE,UAAUA,CACrBJ,MAAqB,EACrBG,YAA6B,EAED;IAAA,IAD5BD,IAA2B,GAAAG,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAEhC,MAAMG,cAAc,GAAG,MAAMC,qBAAqB,CAACL,UAAU,CAACJ,MAAM,EAAEG,YAAY,EAAED,IAAI,CAAC;IACzF,OAAO,IAAIJ,cAAc,CAACE,MAAM,EAAEQ,cAAc,EAAEN,IAAI,CAAC;EACzD;EAYAQ,WAAWA,CACTV,MAAqB,EACrBQ,cAAqC,EACrCN,IAA2B,EAC3B;IAAAS,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IACA,IAAI,CAACX,MAAM,GAAGA,MAAM;IACpB,IAAI,CAACQ,cAAc,GAAGA,cAAc;IAEpC,IAAI,CAACI,SAAS,GAAG,CAAC,CAAC;IACnB,IAAI,CAACC,YAAY,GAAGX,IAAI,CAACW,YAAY,IAAIlB,8BAA8B;IACvE,IAAI,CAACmB,MAAM,GAAG,KAAK;IACnB,IAAI,CAACC,YAAY,GAAG,CAAC,CAAC;IAGtB,IAAI,CAACC,WAAW,CAAC,CAAC;EACpB;EAEA,MAAMA,WAAWA,CAAA,EAAkB;IAEjC,IAAI;MACF,MAAM,IAAI,CAACR,cAAc,CAACQ,WAAW,CAAC,CAAC;IACzC,CAAC,CAAC,OAAOC,GAAG,EAAE;MACZ,MAAM,IAAI,CAACT,cAAc,CAACU,KAAK,CAAC,CAAC;MACjC,MAAMD,GAAG;IACX;EACF;EAMA,MAAME,SAASA,CAAuBC,GAAM,EAAiB;IAC3D,IAAI,IAAI,CAACN,MAAM,EAAE;MACf,MAAM,IAAIO,KAAK,CAAC,mBAAmB,CAAC;IACtC;IACAnD,KAAK,CAACoD,WAAW,CAAC,IAAI,CAACtB,MAAM,EAAEoB,GAAG,EAAE,IAAI,CAACR,SAAS,CAAC;IACnD,IAAI,IAAI,CAACA,SAAS,CAACW,QAAQ,IAAI,IAAI,CAACV,YAAY,EAAE;MAEhD,IAAI,CAACD,SAAS,GAAG,CAAC,CAAC;IACrB;EACF;EAQA,MAAMM,KAAKA,CAACM,QAAqB,EAAiB;IAChD,IAAI,IAAI,CAACV,MAAM,EAAE;MACf,MAAM,IAAIO,KAAK,CAAC,mBAAmB,CAAC;IACtC;IAEA,IAAI,CAACP,MAAM,GAAG,IAAI;IAElB,IAAI,IAAI,CAACF,SAAS,CAACW,QAAQ,GAAG,CAAC,IAAI,IAAI,CAACX,SAAS,CAACW,QAAQ,IAAI,IAAI,CAACV,YAAY,EAAE;MAE/E,IAAI,CAACD,SAAS,GAAG,CAAC,CAAC;IACrB;IAEA,MAAM,IAAI,CAACJ,cAAc,CAACiB,WAAW,CAAC,IAAI,CAACV,YAAY,CAAC;IACxD,MAAM,IAAI,CAACP,cAAc,CAACU,KAAK,CAAC,CAAC;IAGjC,IAAIM,QAAQ,EAAE;MACZA,QAAQ,CAAC,CAAC;IACZ;EACF;EAKAE,WAAWA,CAACC,GAAW,EAAEC,KAAa,EAAQ;IAE5C,IAAI,CAACb,YAAY,CAACc,MAAM,CAACF,GAAG,CAAC,CAAC,GAAGE,MAAM,CAACD,KAAK,CAAC;EAChD;EAQAE,eAAeA,CAACC,GAAW,EAAQ;IACjC,IAAI,CAAClB,YAAY,GAAGkB,GAAG;EACzB;EAMAC,WAAWA,CAACD,GAAW,EAAQ;IAC7B,IAAI,CAACvB,cAAc,CAACwB,WAAW,CAACD,GAAG,CAAC;EACtC;AACF;AAQA,OAAO,MAAMtB,qBAAqB,CAAC;EAIjC,aAAaL,UAAUA,CACrBJ,MAAqB,EACrBG,YAA6B,EAC7BD,IAA2B,EACK;IAChC,MAAM+B,OAAO,GAAG9C,OAAO,CAAC+C,IAAI,CAAC3B,SAAS,EAAEJ,YAAY,CAAC;IACrD,MAAMgC,OAAO,GAAG/C,OAAO,CAAC8C,IAAI,CAAC3B,SAAS,EAAEJ,YAAY,CAAC;IACrD,OAAO,IAAIM,qBAAqB,CAACT,MAAM,EAAEiC,OAAO,EAAEE,OAAO,EAAE,CAAC,EAAEjC,IAAI,CAAC;EACrE;EAWAQ,WAAWA,CACTV,MAAqB,EACrBiC,OAAuC,EACvCE,OAA4B,EAC5BC,UAAkB,EAClBlC,IAA2B,EAC3B;IAAAS,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IACA,IAAI,CAACX,MAAM,GAAGA,MAAM;IACpB,IAAI,CAACqC,KAAK,GAAGJ,OAAO;IACpB,IAAI,CAACf,KAAK,GAAGiB,OAAO;IACpB,IAAI,CAACG,MAAM,GAAGF,UAAU;IACxB,IAAI,CAACb,QAAQ,GAAG,CAAC;IACjB,IAAI,CAACgB,SAAS,GAAG,EAAE;IACnB,IAAI,CAACC,QAAQ,GAAGtC,IAAI,CAACsC,QAAQ,IAAI9C,yBAAyB;IAC1D,IAAI,CAAC+C,aAAa,GAAG,eAAe,IAAIvC,IAAI,GAAGwC,OAAO,CAACxC,IAAI,CAACuC,aAAa,CAAC,GAAG,KAAK;EACpF;EAEAE,YAAYA,CAACC,GAAW,EAAiB;IACvC,IAAI,CAACN,MAAM,IAAIM,GAAG,CAACtC,MAAM;IACzB,OAAO,IAAI,CAAC+B,KAAK,CAACO,GAAG,CAAC;EACxB;EAKA5B,WAAWA,CAAA,EAAkB;IAC3B,OAAO,IAAI,CAAC2B,YAAY,CAACE,MAAM,CAACC,IAAI,CAACtD,aAAa,CAAC,CAAC;EACtD;EAMA,MAAMuD,aAAaA,CAACC,OAAwB,EAAiB;IAC3D,MAAMC,MAAM,GAAG,MAAMC,cAAc,CAAC,IAAI,CAAClD,MAAM,EAAEgD,OAAO,EAAE;MACxDG,UAAU,EAAE,IAAI,CAACb,MAAM;MACvBE,QAAQ,EAAE,IAAI,CAACA,QAAQ;MACvBC,aAAa,EAAE,IAAI,CAACA;IACtB,CAAC,CAAC;IAEF,IAAI,CAAClB,QAAQ,IAAIyB,OAAO,CAACzB,QAAQ;IACjC,IAAI,CAACgB,SAAS,CAACa,IAAI,CAACH,MAAM,CAACI,QAAQ,CAAC;IACpC,OAAO,MAAM,IAAI,CAACV,YAAY,CAACM,MAAM,CAACK,IAAI,CAAC;EAC7C;EAKA7B,WAAWA,CAACV,YAAoC,EAAiB;IAC/D,IAAI,CAACA,YAAY,EAAE;MAEjBA,YAAY,GAAG,CAAC,CAAC;IACnB;IAEA,OAAO,IAAI,CAAC4B,YAAY,CACtBY,YAAY,CAAC,IAAI,CAACvD,MAAM,EAAE,IAAI,CAACuB,QAAQ,EAAE,IAAI,CAACgB,SAAS,EAAExB,YAAY,CACvE,CAAC;EACH;EAMAiB,WAAWA,CAACD,GAAW,EAAQ;IAC7B,IAAI,CAACS,QAAQ,GAAGT,GAAG;EACrB;AACF;AA0CA,SAASyB,YAAYA,CACnBC,IAAmB,EACnBC,QAAsB,EACtBC,MAAa,EACbzD,IAAyB,EACzB;EACA,IAAI,EAAEwD,QAAQ,IAAI1F,cAAc,CAAC,EAAE;IACjC,MAAM,IAAIqD,KAAK,sBAAAuC,MAAA,CAAsBF,QAAQ,CAAE,CAAC;EAClD;EACA,OAAO1F,cAAc,CAAC0F,QAAQ,CAAC,CAACF,YAAY,CAACC,IAAI,EAAEE,MAAM,EAAEzD,IAAI,CAAC;AAClE;AAKA,eAAe2D,cAAcA,CAC3BC,MAAoB,EACpBC,IAAwB,EAKvB;EAED,IAAIC,UAAU,GAAGnB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACI,SAAS,GAAG,CAAC,EAAE;IACxBF,UAAU,GAAGR,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACI,OAAO,EAAE;MAClFC,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACI,SAAS;IAExC,CAAC,CAAC;EACJ;EAEA,IAAIG,UAAU,GAAGxB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IACxBD,UAAU,GAAGb,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACQ,OAAO,EAAE;MAClFH,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACQ,SAAS;IAExC,CAAC,CAAC;EACJ;EAGA,MAAME,SAAS,GAAGhB,YAAY,CAACM,MAAM,CAACW,aAAa,EAAGX,MAAM,CAACJ,QAAQ,EAAGK,IAAI,CAACJ,MAAM,EAAE;IACnFe,UAAU,EAAEZ,MAAM,CAACY,UAAU;IAC7BN,QAAQ,EAAEN,MAAM,CAACY;EACnB,CAAC,CAAC;EAEF,MAAMC,OAAO,GAAG9B,MAAM,CAACe,MAAM,CAAC,CAACI,UAAU,EAAEK,UAAU,EAAEG,SAAS,CAAC,CAAC;EAGlE,MAAMI,aAAa,GAAG,MAAM3G,WAAW,CAAC4G,OAAO,CAACf,MAAM,CAACgB,WAAW,EAAGH,OAAO,CAAC;EAG7E,MAAMI,MAAM,GAAG,IAAIlG,UAAU,CAAC;IAC5B4E,IAAI,EAAE3E,QAAQ,CAACkG,SAAS;IACxBC,gBAAgB,EAAE,IAAI1G,cAAc,CAAC;MACnC2G,UAAU,EAAEnB,IAAI,CAACoB,KAAK;MACtBzB,QAAQ,EAAEjF,QAAQ,CAACqF,MAAM,CAACJ,QAAQ,CAAS;MAC3C0B,yBAAyB,EAAE3G,QAAQ,CAACoB,sBAAsB,CAAC;MAC3DwF,yBAAyB,EAAE5G,QAAQ,CAACoB,sBAAsB;IAC5D,CAAC,CAAC;IACFyF,sBAAsB,EAAEX,OAAO,CAACrE,MAAM;IACtCiF,oBAAoB,EAAEX,aAAa,CAACtE;EACtC,CAAC,CAAC;EAGF,MAAMkF,SAAS,GAAGlG,eAAe,CAACyF,MAAM,CAAC;EACzC,MAAMU,IAAI,GAAG5C,MAAM,CAACe,MAAM,CAAC,CAAC4B,SAAS,EAAEZ,aAAa,CAAC,CAAC;EAEtD,OAAO;IAACG,MAAM;IAAEW,UAAU,EAAEF,SAAS,CAAClF,MAAM;IAAEmF;EAAI,CAAC;AACrD;AAKA,eAAeE,gBAAgBA,CAC7B7B,MAAoB,EACpBC,IAAwB,EACxBxC,QAAgB,EAKf;EAED,MAAMiD,SAAS,GAAGhB,YAAY,CAACM,MAAM,CAACW,aAAa,EAAGX,MAAM,CAACJ,QAAQ,EAAGK,IAAI,CAACJ,MAAM,EAAE;IACnFe,UAAU,EAAEZ,MAAM,CAACY,UAAU;IAC7BN,QAAQ,EAAEN,MAAM,CAACY;EACnB,CAAC,CAAC;EAGF,MAAME,aAAa,GAAG,MAAM3G,WAAW,CAAC4G,OAAO,CAACf,MAAM,CAACgB,WAAW,EAAGN,SAAS,CAAC;EAG/E,IAAIR,UAAU,GAAGnB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACI,SAAS,GAAG,CAAC,EAAE;IACxBF,UAAU,GAAGR,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACI,OAAO,EAAE;MAClFC,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACI,SAAS,CAAC;MACvC0B,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ;EAEA,IAAIvB,UAAU,GAAGxB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IACxBD,UAAU,GAAGb,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACQ,OAAO,EAAE;MAClFH,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACQ,SAAS,CAAC;MACvCsB,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ;EAGA,MAAMb,MAAM,GAAG,IAAIlG,UAAU,CAAC;IAC5B4E,IAAI,EAAE3E,QAAQ,CAAC+G,YAAY;IAC3BC,mBAAmB,EAAE,IAAItH,gBAAgB,CAAC;MACxC0G,UAAU,EAAEnB,IAAI,CAACoB,KAAK;MACtBY,SAAS,EAAEhC,IAAI,CAACoB,KAAK,GAAGpB,IAAI,CAACJ,MAAM,CAACrD,MAAM;MAC1C0F,QAAQ,EAAEzE,QAAQ;MAClBmC,QAAQ,EAAEjF,QAAQ,CAACqF,MAAM,CAACJ,QAAQ,CAAS;MAC3CuC,6BAA6B,EAAE5B,UAAU,CAAC/D,MAAM;MAChD4F,6BAA6B,EAAElC,UAAU,CAAC1D,MAAM;MAChD6F,aAAa,EAAErC,MAAM,CAACgB,WAAW,KAAK;IACxC,CAAC,CAAC;IACFQ,sBAAsB,EAAEtB,UAAU,CAAC1D,MAAM,GAAG+D,UAAU,CAAC/D,MAAM,GAAGkE,SAAS,CAAClE,MAAM;IAChFiF,oBAAoB,EAAEvB,UAAU,CAAC1D,MAAM,GAAG+D,UAAU,CAAC/D,MAAM,GAAGsE,aAAa,CAACtE;EAC9E,CAAC,CAAC;EAGF,MAAMkF,SAAS,GAAGlG,eAAe,CAACyF,MAAM,CAAC;EACzC,MAAMU,IAAI,GAAG5C,MAAM,CAACe,MAAM,CAAC,CAAC4B,SAAS,EAAExB,UAAU,EAAEK,UAAU,EAAEO,aAAa,CAAC,CAAC;EAC9E,OAAO;IAACG,MAAM;IAAEW,UAAU,EAAEF,SAAS,CAAClF,MAAM;IAAEmF;EAAI,CAAC;AACrD;AAKA,eAAeW,iBAAiBA,CAC9BtC,MAAoB,EACpBuC,MAAuB,EACvB/D,MAAc,EACdpC,IAA2B,EAK1B;EACD,MAAM6D,IAAI,GAAGsC,MAAM,CAACC,UAAU,CAACxC,MAAM,CAAC7D,IAAI,CAACsG,IAAI,CAAC,CAAC,CAAC;EAClD,MAAMpD,UAAU,GAAG,CAACjD,IAAI,CAACiD,UAAU,IAAI,CAAC,IAAIb,MAAM;EAGlD,IAAIkE,OAAe;EAEnB,IAAIC,uBAAuB,GAAG,CAAC;EAE/B,IAAIC,qBAAqB,GAAG,CAAC;EAC7B;IACE,MAAMC,MAAM,GAAGzG,IAAI,CAACuC,aAAa,GAC7B,MAAMkD,gBAAgB,CAAC7B,MAAM,EAAEC,IAAI,EAAEsC,MAAM,CAAC9E,QAAQ,CAAC,GACrD,MAAMsC,cAAc,CAACC,MAAM,EAAEC,IAAI,CAAC;IAEtCyC,OAAO,GAAGG,MAAM,CAAClB,IAAI;IACrBgB,uBAAuB,IAAIE,MAAM,CAAC5B,MAAM,CAACO,sBAAsB,GAAGqB,MAAM,CAACjB,UAAU;IACnFgB,qBAAqB,IAAIC,MAAM,CAAC5B,MAAM,CAACQ,oBAAoB,GAAGoB,MAAM,CAACjB,UAAU;EACjF;EAMA,MAAMrC,QAAQ,GAAG,IAAIjF,cAAc,CAAC;IAClCwI,cAAc,EAAE9C,MAAM,CAAC7D,IAAI;IAC3BiF,UAAU,EAAEnB,IAAI,CAACoB,KAAK;IACtB0B,gBAAgB,EAAE1D,UAAU;IAC5B2D,SAAS,EAAE,EAAE;IACbL,uBAAuB;IACvBC,qBAAqB;IACrBjD,IAAI,EAAExE,IAAI,CAAC6E,MAAM,CAACW,aAAa,CAAE;IACjCsC,KAAK,EAAE1I,gBAAgB,CAACyF,MAAM,CAACgB,WAAW;EAC5C,CAAC,CAAC;EAGFzB,QAAQ,CAACyD,SAAS,CAAC1D,IAAI,CAAC3E,QAAQ,CAACoB,sBAAsB,CAAC,CAAC;EACzDwD,QAAQ,CAACyD,SAAS,CAAC1D,IAAI,CAAC3E,QAAQ,CAACqF,MAAM,CAACJ,QAAQ,CAAE,CAAC;EAGnD,MAAMsD,cAAc,GAAG7D,UAAU,GAAGqD,OAAO,CAAClG,MAAM;EAClD,MAAMgD,IAAI,GAAGT,MAAM,CAACe,MAAM,CAAC,CAAC4C,OAAO,EAAElH,eAAe,CAAC+D,QAAQ,CAAC,CAAC,CAAC;EAChE,OAAO;IAACC,IAAI;IAAED,QAAQ;IAAE2D;EAAc,CAAC;AACzC;AAKA,eAAe9D,cAAcA,CAC3BlD,MAAqB,EACrB+D,IAAqB,EACrB7D,IAA2B,EAI1B;EACD,MAAMmD,QAAQ,GAAG,IAAItE,QAAQ,CAAC;IAC5BiH,QAAQ,EAAEjC,IAAI,CAACxC,QAAQ;IACvB0F,OAAO,EAAE,EAAE;IACXC,eAAe,EAAE;EACnB,CAAC,CAAC;EAEF,IAAI5D,IAAI,GAAGT,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAC1B,KAAK,MAAMkD,KAAK,IAAInH,MAAM,CAACoH,SAAS,EAAE;IACpC,IAAID,KAAK,CAACE,QAAQ,EAAE;MAClB;IACF;IAEA,MAAMC,UAAU,GAAG,MAAMlB,iBAAiB,CAACe,KAAK,EAAEpD,IAAI,EAAET,IAAI,CAAChD,MAAM,EAAEJ,IAAI,CAAC;IAE1E,MAAMqH,MAAM,GAAG,IAAIpJ,WAAW,CAAC;MAC7BqJ,WAAW,EAAEF,UAAU,CAACN,cAAc;MACtCS,SAAS,EAAEH,UAAU,CAACjE;IACxB,CAAC,CAAC;IAEFA,QAAQ,CAAC4D,OAAO,CAAC7D,IAAI,CAACmE,MAAM,CAAC;IAC7BlE,QAAQ,CAAC6D,eAAe,GAAG,IAAI3H,KAAK,CAACmI,MAAM,CAACrE,QAAQ,CAAC6D,eAAe,CAAC,GAAGI,UAAU,CAAChE,IAAI,CAAChD,MAAM,CAAC;IAE/FgD,IAAI,GAAGT,MAAM,CAACe,MAAM,CAAC,CAACN,IAAI,EAAEgE,UAAU,CAAChE,IAAI,CAAC,CAAC;EAC/C;EAEA,OAAO;IAACA,IAAI;IAAED;EAAQ,CAAC;AACzB;AAKA,SAASE,YAAYA,CACnBvD,MAAqB,EACrBuB,QAAgB,EAChBgB,SAAqB,EACrBxB,YAAoC,EAC5B;EACR,MAAMsC,QAAQ,GAAG,IAAI1E,YAAY,CAAC;IAChCgJ,OAAO,EAAElI,eAAe;IACxBmI,UAAU,EAAE,UAAU;IACtB5B,QAAQ,EAAEzE,QAAQ;IAClBsG,UAAU,EAAEtF,SAAS;IACrBvC,MAAM,EAAE,EAAE;IACV8H,kBAAkB,EAAE;EACtB,CAAC,CAAC;EAEF,KAAK,MAAMnG,GAAG,IAAIZ,YAAY,EAAE;IAAA,IAAAgH,qBAAA,EAAAC,sBAAA,EAAAC,sBAAA;IAC9B,MAAMC,EAAE,GAAG,IAAItJ,QAAQ,CAAC;MACtB+C,GAAG;MACHC,KAAK,EAAEb,YAAY,CAACY,GAAG;IACzB,CAAC,CAAC;IACF,CAAAoG,qBAAA,GAAA1E,QAAQ,CAACyE,kBAAkB,cAAAC,qBAAA,wBAAAC,sBAAA,GAA3B,CAAAC,sBAAA,GAAAF,qBAAA,EAA6B3E,IAAI,cAAA4E,sBAAA,uBAAjCA,sBAAA,CAAAG,IAAA,CAAAF,sBAAA,EAAoCC,EAAE,CAAC;EACzC;EAEA;IACE,MAAME,UAAU,GAAG,IAAIpJ,aAAa,CAAC;MACnCqJ,IAAI,EAAE,MAAM;MACZC,YAAY,EAAEC,MAAM,CAACC,IAAI,CAACxI,MAAM,CAACyI,MAAM,CAAC,CAACnI;IAC3C,CAAC,CAAC;IACF+C,QAAQ,CAACrD,MAAM,CAACoD,IAAI,CAACgF,UAAU,CAAC;EAClC;EAEA,KAAK,MAAMjB,KAAK,IAAInH,MAAM,CAACoH,SAAS,EAAE;IACpC,MAAMsB,IAAI,GAAGhK,mBAAmB,CAACyI,KAAK,CAACwB,cAAc,CAAC;IACtD,MAAMC,UAAU,GAAG,IAAI5J,aAAa,CAAC;MACnCqJ,IAAI,EAAElB,KAAK,CAACkB,IAAI;MAChBQ,eAAe,EAAEH;IACnB,CAAC,CAAC;IAEF,IAAIvB,KAAK,CAACE,QAAQ,EAAE;MAClBuB,UAAU,CAACN,YAAY,GAAGnB,KAAK,CAAC2B,UAAU;IAC5C,CAAC,MAAM;MACLF,UAAU,CAACnF,IAAI,GAAGxE,IAAI,CAACkI,KAAK,CAAC1C,aAAa,CAAU;IACtD;IAEA,IAAI0C,KAAK,CAAC4B,YAAY,EAAE;MACtBH,UAAU,CAACI,cAAc,GAAG1K,aAAa,CAAC6I,KAAK,CAAC4B,YAAY,CAAkB;IAChF;IAEAH,UAAU,CAACK,WAAW,GAAG9B,KAAK,CAACzC,UAAU;IAEzCrB,QAAQ,CAACrD,MAAM,CAACoD,IAAI,CAACwF,UAAU,CAAC;EAClC;EAEA,MAAMM,eAAe,GAAG5J,eAAe,CAAC+D,QAAQ,CAAC;EACjD,MAAM8F,aAAa,GAAGtG,MAAM,CAACoB,KAAK,CAACiF,eAAe,CAAC5I,MAAM,GAAG,CAAC,CAAC;EAC9D4I,eAAe,CAACE,IAAI,CAACD,aAAa,CAAC;EACnCA,aAAa,CAACE,aAAa,CAACH,eAAe,CAAC5I,MAAM,EAAE4I,eAAe,CAAC5I,MAAM,CAAC;EAC3E6I,aAAa,CAAC9G,KAAK,CAAC7C,aAAa,EAAE0J,eAAe,CAAC5I,MAAM,GAAG,CAAC,CAAC;EAC9D,OAAO6I,aAAa;AACtB"}
|
|
1
|
+
{"version":3,"file":"parquet-encoder.js","names":["PARQUET_CODECS","Compression","Shred","ColumnChunk","ColumnMetaData","CompressionCodec","ConvertedType","DataPageHeader","DataPageHeaderV2","Encoding","FieldRepetitionType","FileMetaData","KeyValue","PageHeader","PageType","RowGroup","SchemaElement","Type","osopen","oswrite","osclose","getBitWidth","serializeThrift","Int64","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_DEFAULT_PAGE_SIZE","PARQUET_DEFAULT_ROW_GROUP_SIZE","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","ParquetEncoder","openFile","schema","path","opts","outputStream","openStream","arguments","length","undefined","envelopeWriter","ParquetEnvelopeWriter","constructor","_defineProperty","rowBuffer","rowGroupSize","closed","userMetadata","writeHeader","err","close","appendRow","row","Error","shredRecord","rowCount","callback","writeFooter","setMetadata","key","value","String","setRowGroupSize","cnt","setPageSize","writeFn","bind","closeFn","fileOffset","write","offset","rowGroups","pageSize","useDataPageV2","Boolean","writeSection","buf","Buffer","from","writeRowGroup","records","rgroup","encodeRowGroup","baseOffset","push","metadata","body","encodeFooter","encodeValues","type","encoding","values","concat","encodeDataPage","column","data","rLevelsBuf","alloc","rLevelMax","rlevels","bitWidth","dLevelsBuf","dLevelMax","dlevels","valuesBuf","primitiveType","typeLength","dataBuf","compressedBuf","deflate","compression","header","DATA_PAGE","data_page_header","num_values","count","definition_level_encoding","repetition_level_encoding","uncompressed_page_size","compressed_page_size","headerBuf","page","headerSize","encodeDataPageV2","disableEnvelope","DATA_PAGE_V2","data_page_header_v2","num_nulls","num_rows","definition_levels_byte_length","repetition_levels_byte_length","is_compressed","encodeColumnChunk","buffer","columnData","join","pageBuf","total_uncompressed_size","total_compressed_size","result","path_in_schema","data_page_offset","encodings","codec","metadataOffset","columns","total_byte_size","field","fieldList","isNested","cchunkData","cchunk","file_offset","meta_data","Number","version","created_by","row_groups","key_value_metadata","_metadata$key_value_m","_metadata$key_value_m2","_metadata$key_value_m3","kv","call","schemaRoot","name","num_children","Object","keys","fields","relt","repetitionType","schemaElem","repetition_type","fieldCount","originalType","converted_type","type_length","metadataEncoded","footerEncoded","copy","writeUInt32LE"],"sources":["../../../../src/parquetjs/encoder/parquet-encoder.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {stream} from '@loaders.gl/loader-utils';\nimport {ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport * as Compression from '../compression';\nimport {\n ParquetRowGroup,\n ParquetCodec,\n ParquetColumnChunk,\n ParquetField,\n PrimitiveType,\n ParquetRow\n} from '../schema/declare';\nimport {ParquetSchema} from '../schema/schema';\nimport * as Shred from '../schema/shred';\nimport {\n ColumnChunk,\n ColumnMetaData,\n CompressionCodec,\n ConvertedType,\n DataPageHeader,\n DataPageHeaderV2,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n KeyValue,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {osopen, oswrite, osclose} from '../utils/file-utils';\nimport {getBitWidth, serializeThrift} from '../utils/read-utils';\nimport Int64 from 'node-int64';\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Default Page and Row Group sizes\n */\nconst PARQUET_DEFAULT_PAGE_SIZE = 8192;\nconst PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;\n\n/**\n * Repetition and Definition Level Encoding\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\nexport interface ParquetEncoderOptions {\n baseOffset?: number;\n rowGroupSize?: number;\n pageSize?: number;\n useDataPageV2?: boolean;\n\n // Write Stream Options\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\n/**\n * Write a parquet file to an output stream. The ParquetEncoder will perform\n * buffering/batching for performance, so close() must be called after all rows\n * are written.\n */\n// eslint-disable-next-line @typescript-eslint/no-unused-vars\nexport class ParquetEncoder<T> {\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified file\n */\n static async openFile<T>(\n schema: ParquetSchema,\n path: string,\n opts?: ParquetEncoderOptions\n ): Promise<ParquetEncoder<T>> {\n const outputStream = await osopen(path, opts);\n return ParquetEncoder.openStream(schema, outputStream, opts);\n }\n\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified stream\n */\n static async openStream<T>(\n schema: ParquetSchema,\n outputStream: stream.Writable,\n opts: ParquetEncoderOptions = {}\n ): Promise<ParquetEncoder<T>> {\n const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);\n return new ParquetEncoder(schema, envelopeWriter, opts);\n }\n\n public schema: ParquetSchema;\n public envelopeWriter: ParquetEnvelopeWriter;\n public rowBuffer: ParquetRowGroup;\n public rowGroupSize: number;\n public closed: boolean;\n public userMetadata: Record<string, string>;\n\n /**\n * Create a new buffered parquet writer for a given envelope writer\n */\n constructor(\n schema: ParquetSchema,\n envelopeWriter: ParquetEnvelopeWriter,\n opts: ParquetEncoderOptions\n ) {\n this.schema = schema;\n this.envelopeWriter = envelopeWriter;\n // @ts-ignore Row buffer typings...\n this.rowBuffer = {};\n this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;\n this.closed = false;\n this.userMetadata = {};\n\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n this.writeHeader();\n }\n\n async writeHeader(): Promise<void> {\n // TODO - better not mess with promises in the constructor\n try {\n await this.envelopeWriter.writeHeader();\n } catch (err) {\n await this.envelopeWriter.close();\n throw err;\n }\n }\n\n /**\n * Append a single row to the parquet file. Rows are buffered in memory until\n * rowGroupSize rows are in the buffer or close() is called\n */\n async appendRow<T extends ParquetRow>(row: T): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n Shred.shredRecord(this.schema, row, this.rowBuffer);\n if (this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n }\n\n /**\n * Finish writing the parquet file and commit the footer to disk. This method\n * MUST be called after you are finished adding rows. You must not call this\n * method twice on the same object or add any rows after the close() method has\n * been called\n */\n async close(callback?: () => void): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n\n this.closed = true;\n\n if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n\n await this.envelopeWriter.writeFooter(this.userMetadata);\n await this.envelopeWriter.close();\n // this.envelopeWriter = null;\n\n if (callback) {\n callback();\n }\n }\n\n /**\n * Add key<>value metadata to the file\n */\n setMetadata(key: string, value: string): void {\n // TODO: value to be any, obj -> JSON\n this.userMetadata[String(key)] = String(value);\n }\n\n /**\n * Set the parquet row group size. This values controls the maximum number\n * of rows that are buffered in memory at any given time as well as the number\n * of rows that are co-located on disk. A higher value is generally better for\n * read-time I/O performance at the tradeoff of write-time memory usage.\n */\n setRowGroupSize(cnt: number): void {\n this.rowGroupSize = cnt;\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.envelopeWriter.setPageSize(cnt);\n }\n}\n\n/**\n * Create a parquet file from a schema and a number of row groups. This class\n * performs direct, unbuffered writes to the underlying output stream and is\n * intendend for advanced and internal users; the writeXXX methods must be\n * called in the correct order to produce a valid file.\n */\nexport class ParquetEnvelopeWriter {\n /**\n * Create a new parquet envelope writer that writes to the specified stream\n */\n static async openStream(\n schema: ParquetSchema,\n outputStream: stream.Writable,\n opts: ParquetEncoderOptions\n ): Promise<ParquetEnvelopeWriter> {\n const writeFn = oswrite.bind(undefined, outputStream);\n const closeFn = osclose.bind(undefined, outputStream);\n return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);\n }\n\n public schema: ParquetSchema;\n public write: (buf: Buffer) => Promise<void>;\n public close: () => Promise<void>;\n public offset: number;\n public rowCount: number;\n public rowGroups: RowGroup[];\n public pageSize: number;\n public useDataPageV2: boolean;\n\n constructor(\n schema: ParquetSchema,\n writeFn: (buf: Buffer) => Promise<void>,\n closeFn: () => Promise<void>,\n fileOffset: number,\n opts: ParquetEncoderOptions\n ) {\n this.schema = schema;\n this.write = writeFn;\n this.close = closeFn;\n this.offset = fileOffset;\n this.rowCount = 0;\n this.rowGroups = [];\n this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;\n this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;\n }\n\n writeSection(buf: Buffer): Promise<void> {\n this.offset += buf.length;\n return this.write(buf);\n }\n\n /**\n * Encode the parquet file header\n */\n writeHeader(): Promise<void> {\n return this.writeSection(Buffer.from(PARQUET_MAGIC));\n }\n\n /**\n * Encode a parquet row group. The records object should be created using the\n * shredRecord method\n */\n async writeRowGroup(records: ParquetRowGroup): Promise<void> {\n const rgroup = await encodeRowGroup(this.schema, records, {\n baseOffset: this.offset,\n pageSize: this.pageSize,\n useDataPageV2: this.useDataPageV2\n });\n\n this.rowCount += records.rowCount;\n this.rowGroups.push(rgroup.metadata);\n return await this.writeSection(rgroup.body);\n }\n\n /**\n * Write the parquet file footer\n */\n writeFooter(userMetadata: Record<string, string>): Promise<void> {\n if (!userMetadata) {\n // tslint:disable-next-line:no-parameter-reassignment\n userMetadata = {};\n }\n\n return this.writeSection(\n encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)\n );\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.pageSize = cnt;\n }\n}\n\n/**\n * Create a parquet transform stream\nexport class ParquetTransformer<T> extends stream.Transform {\n public writer: ParquetEncoder<T>;\n\n constructor(schema: ParquetSchema, opts: ParquetEncoderOptions = {}) {\n super({objectMode: true});\n\n const writeProxy = (function (t: ParquetTransformer<any>) {\n return async function (b: any): Promise<void> {\n t.push(b);\n };\n })(this);\n\n this.writer = new ParquetEncoder(\n schema,\n new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),\n opts\n );\n }\n\n // tslint:disable-next-line:function-name\n _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {\n if (row) {\n return this.writer.appendRow(row).then(callback);\n }\n callback();\n return Promise.resolve();\n }\n\n // tslint:disable-next-line:function-name\n async _flush(callback: (val?: any) => void) {\n await this.writer.close(callback);\n }\n}\n */\n\n/**\n * Encode a consecutive array of data using one of the parquet encodings\n */\nfunction encodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n values: any[],\n opts: ParquetCodecOptions\n) {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].encodeValues(type, values, opts);\n}\n\n/**\n * Encode a parquet data page\n */\nasync function encodeDataPage(\n column: ParquetField,\n data: ParquetColumnChunk\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax)\n // disableEnvelope: false\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax)\n // disableEnvelope: false\n });\n }\n\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, dataBuf);\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE,\n data_page_header: new DataPageHeader({\n num_values: data.count,\n encoding: Encoding[column.encoding!] as any,\n definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],\n repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]\n }),\n uncompressed_page_size: dataBuf.length,\n compressed_page_size: compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, compressedBuf]);\n\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode a parquet data page (v2)\n */\nasync function encodeDataPageV2(\n column: ParquetField,\n data: ParquetColumnChunk,\n rowCount: number\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, valuesBuf);\n\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n }\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE_V2,\n data_page_header_v2: new DataPageHeaderV2({\n num_values: data.count,\n num_nulls: data.count - data.values.length,\n num_rows: rowCount,\n encoding: Encoding[column.encoding!] as any,\n definition_levels_byte_length: dLevelsBuf.length,\n repetition_levels_byte_length: rLevelsBuf.length,\n is_compressed: column.compression !== 'UNCOMPRESSED'\n }),\n uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,\n compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode an array of values into a parquet column chunk\n */\nasync function encodeColumnChunk(\n column: ParquetField,\n buffer: ParquetRowGroup,\n offset: number,\n opts: ParquetEncoderOptions\n): Promise<{\n body: Buffer;\n metadata: ColumnMetaData;\n metadataOffset: number;\n}> {\n const data = buffer.columnData[column.path.join()];\n const baseOffset = (opts.baseOffset || 0) + offset;\n /* encode data page(s) */\n // const pages: Buffer[] = [];\n let pageBuf: Buffer;\n // tslint:disable-next-line:variable-name\n let total_uncompressed_size = 0;\n // tslint:disable-next-line:variable-name\n let total_compressed_size = 0;\n {\n const result = opts.useDataPageV2\n ? await encodeDataPageV2(column, data, buffer.rowCount)\n : await encodeDataPage(column, data);\n // pages.push(result.page);\n pageBuf = result.page;\n total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;\n total_compressed_size += result.header.compressed_page_size + result.headerSize;\n }\n\n // const pagesBuf = Buffer.concat(pages);\n // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;\n\n /* prepare metadata header */\n const metadata = new ColumnMetaData({\n path_in_schema: column.path,\n num_values: data.count,\n data_page_offset: baseOffset,\n encodings: [],\n total_uncompressed_size, // : pagesBuf.length,\n total_compressed_size,\n type: Type[column.primitiveType!],\n codec: CompressionCodec[column.compression!]\n });\n\n /* list encodings */\n metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);\n metadata.encodings.push(Encoding[column.encoding!]);\n\n /* concat metadata header and data pages */\n const metadataOffset = baseOffset + pageBuf.length;\n const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);\n return {body, metadata, metadataOffset};\n}\n\n/**\n * Encode a list of column values into a parquet row group\n */\nasync function encodeRowGroup(\n schema: ParquetSchema,\n data: ParquetRowGroup,\n opts: ParquetEncoderOptions\n): Promise<{\n body: Buffer;\n metadata: RowGroup;\n}> {\n const metadata = new RowGroup({\n num_rows: data.rowCount,\n columns: [],\n total_byte_size: 0\n });\n\n let body = Buffer.alloc(0);\n for (const field of schema.fieldList) {\n if (field.isNested) {\n continue; // eslint-disable-line no-continue\n }\n\n const cchunkData = await encodeColumnChunk(field, data, body.length, opts);\n\n const cchunk = new ColumnChunk({\n file_offset: cchunkData.metadataOffset,\n meta_data: cchunkData.metadata\n });\n\n metadata.columns.push(cchunk);\n metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);\n\n body = Buffer.concat([body, cchunkData.body]);\n }\n\n return {body, metadata};\n}\n\n/**\n * Encode a parquet file metadata footer\n */\nfunction encodeFooter(\n schema: ParquetSchema,\n rowCount: number,\n rowGroups: RowGroup[],\n userMetadata: Record<string, string>\n): Buffer {\n const metadata = new FileMetaData({\n version: PARQUET_VERSION,\n created_by: 'parquets',\n num_rows: rowCount,\n row_groups: rowGroups,\n schema: [],\n key_value_metadata: []\n });\n\n for (const key in userMetadata) {\n const kv = new KeyValue({\n key,\n value: userMetadata[key]\n });\n metadata.key_value_metadata?.push?.(kv);\n }\n\n {\n const schemaRoot = new SchemaElement({\n name: 'root',\n num_children: Object.keys(schema.fields).length\n });\n metadata.schema.push(schemaRoot);\n }\n\n for (const field of schema.fieldList) {\n const relt = FieldRepetitionType[field.repetitionType];\n const schemaElem = new SchemaElement({\n name: field.name,\n repetition_type: relt as any\n });\n\n if (field.isNested) {\n schemaElem.num_children = field.fieldCount;\n } else {\n schemaElem.type = Type[field.primitiveType!] as Type;\n }\n\n if (field.originalType) {\n schemaElem.converted_type = ConvertedType[field.originalType] as ConvertedType;\n }\n\n schemaElem.type_length = field.typeLength;\n\n metadata.schema.push(schemaElem);\n }\n\n const metadataEncoded = serializeThrift(metadata);\n const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);\n\n metadataEncoded.copy(footerEncoded);\n footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);\n footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);\n return footerEncoded;\n}\n"],"mappings":";AAGA,SAA6BA,cAAc,QAAO,WAAW;AAC7D,OAAO,KAAKC,WAAW,MAAM,gBAAgB;AAU7C,OAAO,KAAKC,KAAK,MAAM,iBAAiB;AACxC,SACEC,WAAW,EACXC,cAAc,EACdC,gBAAgB,EAChBC,aAAa,EACbC,cAAc,EACdC,gBAAgB,EAChBC,QAAQ,EACRC,mBAAmB,EACnBC,YAAY,EACZC,QAAQ,EACRC,UAAU,EACVC,QAAQ,EACRC,QAAQ,EACRC,aAAa,EACbC,IAAI,QACC,mBAAmB;AAC1B,SAAQC,MAAM,EAAEC,OAAO,EAAEC,OAAO,QAAO,qBAAqB;AAC5D,SAAQC,WAAW,EAAEC,eAAe,QAAO,qBAAqB;AAChE,OAAOC,KAAK,MAAM,YAAY;AAK9B,MAAMC,aAAa,GAAG,MAAM;AAK5B,MAAMC,eAAe,GAAG,CAAC;AAKzB,MAAMC,yBAAyB,GAAG,IAAI;AACtC,MAAMC,8BAA8B,GAAG,IAAI;AAK3C,MAAMC,kBAAkB,GAAG,OAAO;AAClC,MAAMC,sBAAsB,GAAG,KAAK;AAuBpC,OAAO,MAAMC,cAAc,CAAI;EAK7B,aAAaC,QAAQA,CACnBC,MAAqB,EACrBC,IAAY,EACZC,IAA4B,EACA;IAC5B,MAAMC,YAAY,GAAG,MAAMjB,MAAM,CAACe,IAAI,EAAEC,IAAI,CAAC;IAC7C,OAAOJ,cAAc,CAACM,UAAU,CAACJ,MAAM,EAAEG,YAAY,EAAED,IAAI,CAAC;EAC9D;EAMA,aAAaE,UAAUA,CACrBJ,MAAqB,EACrBG,YAA6B,EAED;IAAA,IAD5BD,IAA2B,GAAAG,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAEhC,MAAMG,cAAc,GAAG,MAAMC,qBAAqB,CAACL,UAAU,CAACJ,MAAM,EAAEG,YAAY,EAAED,IAAI,CAAC;IACzF,OAAO,IAAIJ,cAAc,CAACE,MAAM,EAAEQ,cAAc,EAAEN,IAAI,CAAC;EACzD;EAYAQ,WAAWA,CACTV,MAAqB,EACrBQ,cAAqC,EACrCN,IAA2B,EAC3B;IAAAS,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IACA,IAAI,CAACX,MAAM,GAAGA,MAAM;IACpB,IAAI,CAACQ,cAAc,GAAGA,cAAc;IAEpC,IAAI,CAACI,SAAS,GAAG,CAAC,CAAC;IACnB,IAAI,CAACC,YAAY,GAAGX,IAAI,CAACW,YAAY,IAAIlB,8BAA8B;IACvE,IAAI,CAACmB,MAAM,GAAG,KAAK;IACnB,IAAI,CAACC,YAAY,GAAG,CAAC,CAAC;IAGtB,IAAI,CAACC,WAAW,CAAC,CAAC;EACpB;EAEA,MAAMA,WAAWA,CAAA,EAAkB;IAEjC,IAAI;MACF,MAAM,IAAI,CAACR,cAAc,CAACQ,WAAW,CAAC,CAAC;IACzC,CAAC,CAAC,OAAOC,GAAG,EAAE;MACZ,MAAM,IAAI,CAACT,cAAc,CAACU,KAAK,CAAC,CAAC;MACjC,MAAMD,GAAG;IACX;EACF;EAMA,MAAME,SAASA,CAAuBC,GAAM,EAAiB;IAC3D,IAAI,IAAI,CAACN,MAAM,EAAE;MACf,MAAM,IAAIO,KAAK,CAAC,mBAAmB,CAAC;IACtC;IACAnD,KAAK,CAACoD,WAAW,CAAC,IAAI,CAACtB,MAAM,EAAEoB,GAAG,EAAE,IAAI,CAACR,SAAS,CAAC;IACnD,IAAI,IAAI,CAACA,SAAS,CAACW,QAAQ,IAAI,IAAI,CAACV,YAAY,EAAE;MAEhD,IAAI,CAACD,SAAS,GAAG,CAAC,CAAC;IACrB;EACF;EAQA,MAAMM,KAAKA,CAACM,QAAqB,EAAiB;IAChD,IAAI,IAAI,CAACV,MAAM,EAAE;MACf,MAAM,IAAIO,KAAK,CAAC,mBAAmB,CAAC;IACtC;IAEA,IAAI,CAACP,MAAM,GAAG,IAAI;IAElB,IAAI,IAAI,CAACF,SAAS,CAACW,QAAQ,GAAG,CAAC,IAAI,IAAI,CAACX,SAAS,CAACW,QAAQ,IAAI,IAAI,CAACV,YAAY,EAAE;MAE/E,IAAI,CAACD,SAAS,GAAG,CAAC,CAAC;IACrB;IAEA,MAAM,IAAI,CAACJ,cAAc,CAACiB,WAAW,CAAC,IAAI,CAACV,YAAY,CAAC;IACxD,MAAM,IAAI,CAACP,cAAc,CAACU,KAAK,CAAC,CAAC;IAGjC,IAAIM,QAAQ,EAAE;MACZA,QAAQ,CAAC,CAAC;IACZ;EACF;EAKAE,WAAWA,CAACC,GAAW,EAAEC,KAAa,EAAQ;IAE5C,IAAI,CAACb,YAAY,CAACc,MAAM,CAACF,GAAG,CAAC,CAAC,GAAGE,MAAM,CAACD,KAAK,CAAC;EAChD;EAQAE,eAAeA,CAACC,GAAW,EAAQ;IACjC,IAAI,CAAClB,YAAY,GAAGkB,GAAG;EACzB;EAMAC,WAAWA,CAACD,GAAW,EAAQ;IAC7B,IAAI,CAACvB,cAAc,CAACwB,WAAW,CAACD,GAAG,CAAC;EACtC;AACF;AAQA,OAAO,MAAMtB,qBAAqB,CAAC;EAIjC,aAAaL,UAAUA,CACrBJ,MAAqB,EACrBG,YAA6B,EAC7BD,IAA2B,EACK;IAChC,MAAM+B,OAAO,GAAG9C,OAAO,CAAC+C,IAAI,CAAC3B,SAAS,EAAEJ,YAAY,CAAC;IACrD,MAAMgC,OAAO,GAAG/C,OAAO,CAAC8C,IAAI,CAAC3B,SAAS,EAAEJ,YAAY,CAAC;IACrD,OAAO,IAAIM,qBAAqB,CAACT,MAAM,EAAEiC,OAAO,EAAEE,OAAO,EAAE,CAAC,EAAEjC,IAAI,CAAC;EACrE;EAWAQ,WAAWA,CACTV,MAAqB,EACrBiC,OAAuC,EACvCE,OAA4B,EAC5BC,UAAkB,EAClBlC,IAA2B,EAC3B;IAAAS,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IAAAA,eAAA;IACA,IAAI,CAACX,MAAM,GAAGA,MAAM;IACpB,IAAI,CAACqC,KAAK,GAAGJ,OAAO;IACpB,IAAI,CAACf,KAAK,GAAGiB,OAAO;IACpB,IAAI,CAACG,MAAM,GAAGF,UAAU;IACxB,IAAI,CAACb,QAAQ,GAAG,CAAC;IACjB,IAAI,CAACgB,SAAS,GAAG,EAAE;IACnB,IAAI,CAACC,QAAQ,GAAGtC,IAAI,CAACsC,QAAQ,IAAI9C,yBAAyB;IAC1D,IAAI,CAAC+C,aAAa,GAAG,eAAe,IAAIvC,IAAI,GAAGwC,OAAO,CAACxC,IAAI,CAACuC,aAAa,CAAC,GAAG,KAAK;EACpF;EAEAE,YAAYA,CAACC,GAAW,EAAiB;IACvC,IAAI,CAACN,MAAM,IAAIM,GAAG,CAACtC,MAAM;IACzB,OAAO,IAAI,CAAC+B,KAAK,CAACO,GAAG,CAAC;EACxB;EAKA5B,WAAWA,CAAA,EAAkB;IAC3B,OAAO,IAAI,CAAC2B,YAAY,CAACE,MAAM,CAACC,IAAI,CAACtD,aAAa,CAAC,CAAC;EACtD;EAMA,MAAMuD,aAAaA,CAACC,OAAwB,EAAiB;IAC3D,MAAMC,MAAM,GAAG,MAAMC,cAAc,CAAC,IAAI,CAAClD,MAAM,EAAEgD,OAAO,EAAE;MACxDG,UAAU,EAAE,IAAI,CAACb,MAAM;MACvBE,QAAQ,EAAE,IAAI,CAACA,QAAQ;MACvBC,aAAa,EAAE,IAAI,CAACA;IACtB,CAAC,CAAC;IAEF,IAAI,CAAClB,QAAQ,IAAIyB,OAAO,CAACzB,QAAQ;IACjC,IAAI,CAACgB,SAAS,CAACa,IAAI,CAACH,MAAM,CAACI,QAAQ,CAAC;IACpC,OAAO,MAAM,IAAI,CAACV,YAAY,CAACM,MAAM,CAACK,IAAI,CAAC;EAC7C;EAKA7B,WAAWA,CAACV,YAAoC,EAAiB;IAC/D,IAAI,CAACA,YAAY,EAAE;MAEjBA,YAAY,GAAG,CAAC,CAAC;IACnB;IAEA,OAAO,IAAI,CAAC4B,YAAY,CACtBY,YAAY,CAAC,IAAI,CAACvD,MAAM,EAAE,IAAI,CAACuB,QAAQ,EAAE,IAAI,CAACgB,SAAS,EAAExB,YAAY,CACvE,CAAC;EACH;EAMAiB,WAAWA,CAACD,GAAW,EAAQ;IAC7B,IAAI,CAACS,QAAQ,GAAGT,GAAG;EACrB;AACF;AA0CA,SAASyB,YAAYA,CACnBC,IAAmB,EACnBC,QAAsB,EACtBC,MAAa,EACbzD,IAAyB,EACzB;EACA,IAAI,EAAEwD,QAAQ,IAAI1F,cAAc,CAAC,EAAE;IACjC,MAAM,IAAIqD,KAAK,sBAAAuC,MAAA,CAAsBF,QAAQ,CAAE,CAAC;EAClD;EACA,OAAO1F,cAAc,CAAC0F,QAAQ,CAAC,CAACF,YAAY,CAACC,IAAI,EAAEE,MAAM,EAAEzD,IAAI,CAAC;AAClE;AAKA,eAAe2D,cAAcA,CAC3BC,MAAoB,EACpBC,IAAwB,EAKvB;EAED,IAAIC,UAAU,GAAGnB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACI,SAAS,GAAG,CAAC,EAAE;IACxBF,UAAU,GAAGR,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACI,OAAO,EAAE;MAClFC,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACI,SAAS;IAExC,CAAC,CAAC;EACJ;EAEA,IAAIG,UAAU,GAAGxB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IACxBD,UAAU,GAAGb,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACQ,OAAO,EAAE;MAClFH,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACQ,SAAS;IAExC,CAAC,CAAC;EACJ;EAGA,MAAME,SAAS,GAAGhB,YAAY,CAACM,MAAM,CAACW,aAAa,EAAGX,MAAM,CAACJ,QAAQ,EAAGK,IAAI,CAACJ,MAAM,EAAE;IACnFe,UAAU,EAAEZ,MAAM,CAACY,UAAU;IAC7BN,QAAQ,EAAEN,MAAM,CAACY;EACnB,CAAC,CAAC;EAEF,MAAMC,OAAO,GAAG9B,MAAM,CAACe,MAAM,CAAC,CAACI,UAAU,EAAEK,UAAU,EAAEG,SAAS,CAAC,CAAC;EAGlE,MAAMI,aAAa,GAAG,MAAM3G,WAAW,CAAC4G,OAAO,CAACf,MAAM,CAACgB,WAAW,EAAGH,OAAO,CAAC;EAG7E,MAAMI,MAAM,GAAG,IAAIlG,UAAU,CAAC;IAC5B4E,IAAI,EAAE3E,QAAQ,CAACkG,SAAS;IACxBC,gBAAgB,EAAE,IAAI1G,cAAc,CAAC;MACnC2G,UAAU,EAAEnB,IAAI,CAACoB,KAAK;MACtBzB,QAAQ,EAAEjF,QAAQ,CAACqF,MAAM,CAACJ,QAAQ,CAAS;MAC3C0B,yBAAyB,EAAE3G,QAAQ,CAACoB,sBAAsB,CAAC;MAC3DwF,yBAAyB,EAAE5G,QAAQ,CAACoB,sBAAsB;IAC5D,CAAC,CAAC;IACFyF,sBAAsB,EAAEX,OAAO,CAACrE,MAAM;IACtCiF,oBAAoB,EAAEX,aAAa,CAACtE;EACtC,CAAC,CAAC;EAGF,MAAMkF,SAAS,GAAGlG,eAAe,CAACyF,MAAM,CAAC;EACzC,MAAMU,IAAI,GAAG5C,MAAM,CAACe,MAAM,CAAC,CAAC4B,SAAS,EAAEZ,aAAa,CAAC,CAAC;EAEtD,OAAO;IAACG,MAAM;IAAEW,UAAU,EAAEF,SAAS,CAAClF,MAAM;IAAEmF;EAAI,CAAC;AACrD;AAKA,eAAeE,gBAAgBA,CAC7B7B,MAAoB,EACpBC,IAAwB,EACxBxC,QAAgB,EAKf;EAED,MAAMiD,SAAS,GAAGhB,YAAY,CAACM,MAAM,CAACW,aAAa,EAAGX,MAAM,CAACJ,QAAQ,EAAGK,IAAI,CAACJ,MAAM,EAAE;IACnFe,UAAU,EAAEZ,MAAM,CAACY,UAAU;IAC7BN,QAAQ,EAAEN,MAAM,CAACY;EACnB,CAAC,CAAC;EAGF,MAAME,aAAa,GAAG,MAAM3G,WAAW,CAAC4G,OAAO,CAACf,MAAM,CAACgB,WAAW,EAAGN,SAAS,CAAC;EAG/E,IAAIR,UAAU,GAAGnB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACI,SAAS,GAAG,CAAC,EAAE;IACxBF,UAAU,GAAGR,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACI,OAAO,EAAE;MAClFC,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACI,SAAS,CAAC;MACvC0B,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ;EAEA,IAAIvB,UAAU,GAAGxB,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAChC,IAAIH,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IACxBD,UAAU,GAAGb,YAAY,CAAC5D,kBAAkB,EAAEC,sBAAsB,EAAEkE,IAAI,CAACQ,OAAO,EAAE;MAClFH,QAAQ,EAAE/E,WAAW,CAACyE,MAAM,CAACQ,SAAS,CAAC;MACvCsB,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ;EAGA,MAAMb,MAAM,GAAG,IAAIlG,UAAU,CAAC;IAC5B4E,IAAI,EAAE3E,QAAQ,CAAC+G,YAAY;IAC3BC,mBAAmB,EAAE,IAAItH,gBAAgB,CAAC;MACxC0G,UAAU,EAAEnB,IAAI,CAACoB,KAAK;MACtBY,SAAS,EAAEhC,IAAI,CAACoB,KAAK,GAAGpB,IAAI,CAACJ,MAAM,CAACrD,MAAM;MAC1C0F,QAAQ,EAAEzE,QAAQ;MAClBmC,QAAQ,EAAEjF,QAAQ,CAACqF,MAAM,CAACJ,QAAQ,CAAS;MAC3CuC,6BAA6B,EAAE5B,UAAU,CAAC/D,MAAM;MAChD4F,6BAA6B,EAAElC,UAAU,CAAC1D,MAAM;MAChD6F,aAAa,EAAErC,MAAM,CAACgB,WAAW,KAAK;IACxC,CAAC,CAAC;IACFQ,sBAAsB,EAAEtB,UAAU,CAAC1D,MAAM,GAAG+D,UAAU,CAAC/D,MAAM,GAAGkE,SAAS,CAAClE,MAAM;IAChFiF,oBAAoB,EAAEvB,UAAU,CAAC1D,MAAM,GAAG+D,UAAU,CAAC/D,MAAM,GAAGsE,aAAa,CAACtE;EAC9E,CAAC,CAAC;EAGF,MAAMkF,SAAS,GAAGlG,eAAe,CAACyF,MAAM,CAAC;EACzC,MAAMU,IAAI,GAAG5C,MAAM,CAACe,MAAM,CAAC,CAAC4B,SAAS,EAAExB,UAAU,EAAEK,UAAU,EAAEO,aAAa,CAAC,CAAC;EAC9E,OAAO;IAACG,MAAM;IAAEW,UAAU,EAAEF,SAAS,CAAClF,MAAM;IAAEmF;EAAI,CAAC;AACrD;AAKA,eAAeW,iBAAiBA,CAC9BtC,MAAoB,EACpBuC,MAAuB,EACvB/D,MAAc,EACdpC,IAA2B,EAK1B;EACD,MAAM6D,IAAI,GAAGsC,MAAM,CAACC,UAAU,CAACxC,MAAM,CAAC7D,IAAI,CAACsG,IAAI,CAAC,CAAC,CAAC;EAClD,MAAMpD,UAAU,GAAG,CAACjD,IAAI,CAACiD,UAAU,IAAI,CAAC,IAAIb,MAAM;EAGlD,IAAIkE,OAAe;EAEnB,IAAIC,uBAAuB,GAAG,CAAC;EAE/B,IAAIC,qBAAqB,GAAG,CAAC;EAC7B;IACE,MAAMC,MAAM,GAAGzG,IAAI,CAACuC,aAAa,GAC7B,MAAMkD,gBAAgB,CAAC7B,MAAM,EAAEC,IAAI,EAAEsC,MAAM,CAAC9E,QAAQ,CAAC,GACrD,MAAMsC,cAAc,CAACC,MAAM,EAAEC,IAAI,CAAC;IAEtCyC,OAAO,GAAGG,MAAM,CAAClB,IAAI;IACrBgB,uBAAuB,IAAIE,MAAM,CAAC5B,MAAM,CAACO,sBAAsB,GAAGqB,MAAM,CAACjB,UAAU;IACnFgB,qBAAqB,IAAIC,MAAM,CAAC5B,MAAM,CAACQ,oBAAoB,GAAGoB,MAAM,CAACjB,UAAU;EACjF;EAMA,MAAMrC,QAAQ,GAAG,IAAIjF,cAAc,CAAC;IAClCwI,cAAc,EAAE9C,MAAM,CAAC7D,IAAI;IAC3BiF,UAAU,EAAEnB,IAAI,CAACoB,KAAK;IACtB0B,gBAAgB,EAAE1D,UAAU;IAC5B2D,SAAS,EAAE,EAAE;IACbL,uBAAuB;IACvBC,qBAAqB;IACrBjD,IAAI,EAAExE,IAAI,CAAC6E,MAAM,CAACW,aAAa,CAAE;IACjCsC,KAAK,EAAE1I,gBAAgB,CAACyF,MAAM,CAACgB,WAAW;EAC5C,CAAC,CAAC;EAGFzB,QAAQ,CAACyD,SAAS,CAAC1D,IAAI,CAAC3E,QAAQ,CAACoB,sBAAsB,CAAC,CAAC;EACzDwD,QAAQ,CAACyD,SAAS,CAAC1D,IAAI,CAAC3E,QAAQ,CAACqF,MAAM,CAACJ,QAAQ,CAAE,CAAC;EAGnD,MAAMsD,cAAc,GAAG7D,UAAU,GAAGqD,OAAO,CAAClG,MAAM;EAClD,MAAMgD,IAAI,GAAGT,MAAM,CAACe,MAAM,CAAC,CAAC4C,OAAO,EAAElH,eAAe,CAAC+D,QAAQ,CAAC,CAAC,CAAC;EAChE,OAAO;IAACC,IAAI;IAAED,QAAQ;IAAE2D;EAAc,CAAC;AACzC;AAKA,eAAe9D,cAAcA,CAC3BlD,MAAqB,EACrB+D,IAAqB,EACrB7D,IAA2B,EAI1B;EACD,MAAMmD,QAAQ,GAAG,IAAItE,QAAQ,CAAC;IAC5BiH,QAAQ,EAAEjC,IAAI,CAACxC,QAAQ;IACvB0F,OAAO,EAAE,EAAE;IACXC,eAAe,EAAE;EACnB,CAAC,CAAC;EAEF,IAAI5D,IAAI,GAAGT,MAAM,CAACoB,KAAK,CAAC,CAAC,CAAC;EAC1B,KAAK,MAAMkD,KAAK,IAAInH,MAAM,CAACoH,SAAS,EAAE;IACpC,IAAID,KAAK,CAACE,QAAQ,EAAE;MAClB;IACF;IAEA,MAAMC,UAAU,GAAG,MAAMlB,iBAAiB,CAACe,KAAK,EAAEpD,IAAI,EAAET,IAAI,CAAChD,MAAM,EAAEJ,IAAI,CAAC;IAE1E,MAAMqH,MAAM,GAAG,IAAIpJ,WAAW,CAAC;MAC7BqJ,WAAW,EAAEF,UAAU,CAACN,cAAc;MACtCS,SAAS,EAAEH,UAAU,CAACjE;IACxB,CAAC,CAAC;IAEFA,QAAQ,CAAC4D,OAAO,CAAC7D,IAAI,CAACmE,MAAM,CAAC;IAC7BlE,QAAQ,CAAC6D,eAAe,GAAG,IAAI3H,KAAK,CAACmI,MAAM,CAACrE,QAAQ,CAAC6D,eAAe,CAAC,GAAGI,UAAU,CAAChE,IAAI,CAAChD,MAAM,CAAC;IAE/FgD,IAAI,GAAGT,MAAM,CAACe,MAAM,CAAC,CAACN,IAAI,EAAEgE,UAAU,CAAChE,IAAI,CAAC,CAAC;EAC/C;EAEA,OAAO;IAACA,IAAI;IAAED;EAAQ,CAAC;AACzB;AAKA,SAASE,YAAYA,CACnBvD,MAAqB,EACrBuB,QAAgB,EAChBgB,SAAqB,EACrBxB,YAAoC,EAC5B;EACR,MAAMsC,QAAQ,GAAG,IAAI1E,YAAY,CAAC;IAChCgJ,OAAO,EAAElI,eAAe;IACxBmI,UAAU,EAAE,UAAU;IACtB5B,QAAQ,EAAEzE,QAAQ;IAClBsG,UAAU,EAAEtF,SAAS;IACrBvC,MAAM,EAAE,EAAE;IACV8H,kBAAkB,EAAE;EACtB,CAAC,CAAC;EAEF,KAAK,MAAMnG,GAAG,IAAIZ,YAAY,EAAE;IAAA,IAAAgH,qBAAA,EAAAC,sBAAA,EAAAC,sBAAA;IAC9B,MAAMC,EAAE,GAAG,IAAItJ,QAAQ,CAAC;MACtB+C,GAAG;MACHC,KAAK,EAAEb,YAAY,CAACY,GAAG;IACzB,CAAC,CAAC;IACF,CAAAoG,qBAAA,GAAA1E,QAAQ,CAACyE,kBAAkB,cAAAC,qBAAA,wBAAAC,sBAAA,GAA3B,CAAAC,sBAAA,GAAAF,qBAAA,EAA6B3E,IAAI,cAAA4E,sBAAA,uBAAjCA,sBAAA,CAAAG,IAAA,CAAAF,sBAAA,EAAoCC,EAAE,CAAC;EACzC;EAEA;IACE,MAAME,UAAU,GAAG,IAAIpJ,aAAa,CAAC;MACnCqJ,IAAI,EAAE,MAAM;MACZC,YAAY,EAAEC,MAAM,CAACC,IAAI,CAACxI,MAAM,CAACyI,MAAM,CAAC,CAACnI;IAC3C,CAAC,CAAC;IACF+C,QAAQ,CAACrD,MAAM,CAACoD,IAAI,CAACgF,UAAU,CAAC;EAClC;EAEA,KAAK,MAAMjB,KAAK,IAAInH,MAAM,CAACoH,SAAS,EAAE;IACpC,MAAMsB,IAAI,GAAGhK,mBAAmB,CAACyI,KAAK,CAACwB,cAAc,CAAC;IACtD,MAAMC,UAAU,GAAG,IAAI5J,aAAa,CAAC;MACnCqJ,IAAI,EAAElB,KAAK,CAACkB,IAAI;MAChBQ,eAAe,EAAEH;IACnB,CAAC,CAAC;IAEF,IAAIvB,KAAK,CAACE,QAAQ,EAAE;MAClBuB,UAAU,CAACN,YAAY,GAAGnB,KAAK,CAAC2B,UAAU;IAC5C,CAAC,MAAM;MACLF,UAAU,CAACnF,IAAI,GAAGxE,IAAI,CAACkI,KAAK,CAAC1C,aAAa,CAAU;IACtD;IAEA,IAAI0C,KAAK,CAAC4B,YAAY,EAAE;MACtBH,UAAU,CAACI,cAAc,GAAG1K,aAAa,CAAC6I,KAAK,CAAC4B,YAAY,CAAkB;IAChF;IAEAH,UAAU,CAACK,WAAW,GAAG9B,KAAK,CAACzC,UAAU;IAEzCrB,QAAQ,CAACrD,MAAM,CAACoD,IAAI,CAACwF,UAAU,CAAC;EAClC;EAEA,MAAMM,eAAe,GAAG5J,eAAe,CAAC+D,QAAQ,CAAC;EACjD,MAAM8F,aAAa,GAAGtG,MAAM,CAACoB,KAAK,CAACiF,eAAe,CAAC5I,MAAM,GAAG,CAAC,CAAC;EAE9D4I,eAAe,CAACE,IAAI,CAACD,aAAa,CAAC;EACnCA,aAAa,CAACE,aAAa,CAACH,eAAe,CAAC5I,MAAM,EAAE4I,eAAe,CAAC5I,MAAM,CAAC;EAC3E6I,aAAa,CAAC9G,KAAK,CAAC7C,aAAa,EAAE0J,eAAe,CAAC5I,MAAM,GAAG,CAAC,CAAC;EAC9D,OAAO6I,aAAa;AACtB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","names":[],"sources":["../../../../src/parquetjs/parquet-thrift/index.ts"],"sourcesContent":["/* tslint:disable */\n/* eslint-disable */\n/*\n * Autogenerated by @creditkarma/thrift-typescript v3.7.2\n * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING\n */\nexport * from './Type';\nexport * from './ConvertedType';\nexport * from './FieldRepetitionType';\nexport * from './Encoding';\nexport * from './CompressionCodec';\nexport * from './PageType';\nexport * from './BoundaryOrder';\nexport * from './Statistics';\nexport * from './StringType';\nexport * from './UUIDType';\nexport * from './MapType';\nexport * from './ListType';\nexport * from './EnumType';\nexport * from './DateType';\nexport * from './NullType';\nexport * from './DecimalType';\nexport * from './MilliSeconds';\nexport * from './MicroSeconds';\nexport * from './TimestampType';\nexport * from './TimeType';\nexport * from './IntType';\nexport * from './JsonType';\nexport * from './BsonType';\nexport * from './SchemaElement';\nexport * from './DataPageHeader';\nexport * from './IndexPageHeader';\nexport * from './DictionaryPageHeader';\nexport * from './DataPageHeaderV2';\nexport * from './PageHeader';\nexport * from './KeyValue';\nexport * from './SortingColumn';\nexport * from './PageEncodingStats';\nexport * from './ColumnMetaData';\nexport * from './ColumnChunk';\nexport * from './RowGroup';\nexport * from './TypeDefinedOrder';\nexport * from './PageLocation';\nexport * from './OffsetIndex';\nexport * from './ColumnIndex';\nexport * from './FileMetaData';\nexport * from './TimeUnit';\nexport * from './LogicalType';\nexport * from './ColumnOrder';\n"],"mappings":"AAMA,cAAc,QAAQ;AACtB,cAAc,iBAAiB;AAC/B,cAAc,uBAAuB;AACrC,cAAc,YAAY;AAC1B,cAAc,oBAAoB;AAClC,cAAc,YAAY;AAC1B,cAAc,iBAAiB;AAC/B,cAAc,cAAc;AAC5B,cAAc,cAAc;AAC5B,cAAc,YAAY;AAC1B,cAAc,WAAW;AACzB,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,eAAe;AAC7B,cAAc,gBAAgB;AAC9B,cAAc,gBAAgB;AAC9B,cAAc,iBAAiB;AAC/B,cAAc,YAAY;AAC1B,cAAc,WAAW;AACzB,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,iBAAiB;AAC/B,cAAc,kBAAkB;AAChC,cAAc,mBAAmB;AACjC,cAAc,wBAAwB;AACtC,cAAc,oBAAoB;AAClC,cAAc,cAAc;AAC5B,cAAc,YAAY;AAC1B,cAAc,iBAAiB;AAC/B,cAAc,qBAAqB;AACnC,cAAc,kBAAkB;AAChC,cAAc,eAAe;AAC7B,cAAc,YAAY;AAC1B,cAAc,oBAAoB;AAClC,cAAc,gBAAgB;AAC9B,cAAc,eAAe;AAC7B,cAAc,eAAe;AAC7B,cAAc,gBAAgB;AAC9B,cAAc,YAAY;AAC1B,cAAc,eAAe;AAC7B,cAAc,eAAe"}
|
|
1
|
+
{"version":3,"file":"index.js","names":["Buffer","TBufferedTransport","TCompactProtocol","TFramedTransport"],"sources":["../../../../src/parquetjs/parquet-thrift/index.ts"],"sourcesContent":["/* tslint:disable */\n/* eslint-disable */\n/*\n * Autogenerated by @creditkarma/thrift-typescript v3.7.2\n * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING\n */\nexport {Buffer} from '../../buffer-polyfill/install-buffer-polyfill';\n\nexport {TBufferedTransport, TCompactProtocol, TFramedTransport} from 'thrift';\n\nexport * from './Type';\nexport * from './ConvertedType';\nexport * from './FieldRepetitionType';\nexport * from './Encoding';\nexport * from './CompressionCodec';\nexport * from './PageType';\nexport * from './BoundaryOrder';\nexport * from './Statistics';\nexport * from './StringType';\nexport * from './UUIDType';\nexport * from './MapType';\nexport * from './ListType';\nexport * from './EnumType';\nexport * from './DateType';\nexport * from './NullType';\nexport * from './DecimalType';\nexport * from './MilliSeconds';\nexport * from './MicroSeconds';\nexport * from './TimestampType';\nexport * from './TimeType';\nexport * from './IntType';\nexport * from './JsonType';\nexport * from './BsonType';\nexport * from './SchemaElement';\nexport * from './DataPageHeader';\nexport * from './IndexPageHeader';\nexport * from './DictionaryPageHeader';\nexport * from './DataPageHeaderV2';\nexport * from './PageHeader';\nexport * from './KeyValue';\nexport * from './SortingColumn';\nexport * from './PageEncodingStats';\nexport * from './ColumnMetaData';\nexport * from './ColumnChunk';\nexport * from './RowGroup';\nexport * from './TypeDefinedOrder';\nexport * from './PageLocation';\nexport * from './OffsetIndex';\nexport * from './ColumnIndex';\nexport * from './FileMetaData';\nexport * from './TimeUnit';\nexport * from './LogicalType';\nexport * from './ColumnOrder';\n"],"mappings":"AAMA,SAAQA,MAAM,QAAO,+CAA+C;AAEpE,SAAQC,kBAAkB,EAAEC,gBAAgB,EAAEC,gBAAgB,QAAO,QAAQ;AAE7E,cAAc,QAAQ;AACtB,cAAc,iBAAiB;AAC/B,cAAc,uBAAuB;AACrC,cAAc,YAAY;AAC1B,cAAc,oBAAoB;AAClC,cAAc,YAAY;AAC1B,cAAc,iBAAiB;AAC/B,cAAc,cAAc;AAC5B,cAAc,cAAc;AAC5B,cAAc,YAAY;AAC1B,cAAc,WAAW;AACzB,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,eAAe;AAC7B,cAAc,gBAAgB;AAC9B,cAAc,gBAAgB;AAC9B,cAAc,iBAAiB;AAC/B,cAAc,YAAY;AAC1B,cAAc,WAAW;AACzB,cAAc,YAAY;AAC1B,cAAc,YAAY;AAC1B,cAAc,iBAAiB;AAC/B,cAAc,kBAAkB;AAChC,cAAc,mBAAmB;AACjC,cAAc,wBAAwB;AACtC,cAAc,oBAAoB;AAClC,cAAc,cAAc;AAC5B,cAAc,YAAY;AAC1B,cAAc,iBAAiB;AAC/B,cAAc,qBAAqB;AACnC,cAAc,kBAAkB;AAChC,cAAc,eAAe;AAC7B,cAAc,YAAY;AAC1B,cAAc,oBAAoB;AAClC,cAAc,gBAAgB;AAC9B,cAAc,eAAe;AAC7B,cAAc,eAAe;AAC7B,cAAc,gBAAgB;AAC9B,cAAc,YAAY;AAC1B,cAAc,eAAe;AAC7B,cAAc,eAAe"}
|
|
@@ -3,7 +3,7 @@ import { ConvertedType, Encoding, FieldRepetitionType, PageType, Type } from '..
|
|
|
3
3
|
import { decompress } from '../compression';
|
|
4
4
|
import { PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING } from '../../constants';
|
|
5
5
|
import { decodePageHeader, getThriftEnum, getBitWidth } from '../utils/read-utils';
|
|
6
|
-
export async function decodeDataPages(buffer,
|
|
6
|
+
export async function decodeDataPages(buffer, context) {
|
|
7
7
|
const cursor = {
|
|
8
8
|
buffer,
|
|
9
9
|
offset: 0,
|
|
@@ -16,9 +16,9 @@ export async function decodeDataPages(buffer, options) {
|
|
|
16
16
|
pageHeaders: [],
|
|
17
17
|
count: 0
|
|
18
18
|
};
|
|
19
|
-
let dictionary =
|
|
20
|
-
while (cursor.offset < cursor.size && (!
|
|
21
|
-
const page = await decodePage(cursor,
|
|
19
|
+
let dictionary = context.dictionary || [];
|
|
20
|
+
while (cursor.offset < cursor.size && (!context.numValues || data.dlevels.length < Number(context.numValues))) {
|
|
21
|
+
const page = await decodePage(cursor, context);
|
|
22
22
|
if (page.dictionary) {
|
|
23
23
|
dictionary = page.dictionary;
|
|
24
24
|
continue;
|
|
@@ -39,7 +39,7 @@ export async function decodeDataPages(buffer, options) {
|
|
|
39
39
|
}
|
|
40
40
|
return data;
|
|
41
41
|
}
|
|
42
|
-
export async function decodePage(cursor,
|
|
42
|
+
export async function decodePage(cursor, context) {
|
|
43
43
|
let page;
|
|
44
44
|
const {
|
|
45
45
|
pageHeader,
|
|
@@ -49,14 +49,14 @@ export async function decodePage(cursor, options) {
|
|
|
49
49
|
const pageType = getThriftEnum(PageType, pageHeader.type);
|
|
50
50
|
switch (pageType) {
|
|
51
51
|
case 'DATA_PAGE':
|
|
52
|
-
page = await decodeDataPage(cursor, pageHeader,
|
|
52
|
+
page = await decodeDataPage(cursor, pageHeader, context);
|
|
53
53
|
break;
|
|
54
54
|
case 'DATA_PAGE_V2':
|
|
55
|
-
page = await decodeDataPageV2(cursor, pageHeader,
|
|
55
|
+
page = await decodeDataPageV2(cursor, pageHeader, context);
|
|
56
56
|
break;
|
|
57
57
|
case 'DICTIONARY_PAGE':
|
|
58
58
|
page = {
|
|
59
|
-
dictionary: await decodeDictionaryPage(cursor, pageHeader,
|
|
59
|
+
dictionary: await decodeDictionaryPage(cursor, pageHeader, context),
|
|
60
60
|
pageHeader
|
|
61
61
|
};
|
|
62
62
|
break;
|
|
@@ -128,13 +128,13 @@ function decodeValues(type, encoding, cursor, count, opts) {
|
|
|
128
128
|
}
|
|
129
129
|
return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
|
|
130
130
|
}
|
|
131
|
-
async function decodeDataPage(cursor, header,
|
|
131
|
+
async function decodeDataPage(cursor, header, context) {
|
|
132
132
|
var _header$data_page_hea, _header$data_page_hea2, _header$data_page_hea3, _header$data_page_hea4;
|
|
133
133
|
const cursorEnd = cursor.offset + header.compressed_page_size;
|
|
134
134
|
const valueCount = (_header$data_page_hea = header.data_page_header) === null || _header$data_page_hea === void 0 ? void 0 : _header$data_page_hea.num_values;
|
|
135
135
|
let dataCursor = cursor;
|
|
136
|
-
if (
|
|
137
|
-
const valuesBuf = await decompress(
|
|
136
|
+
if (context.compression !== 'UNCOMPRESSED') {
|
|
137
|
+
const valuesBuf = await decompress(context.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
138
138
|
dataCursor = {
|
|
139
139
|
buffer: valuesBuf,
|
|
140
140
|
offset: 0,
|
|
@@ -144,9 +144,9 @@ async function decodeDataPage(cursor, header, options) {
|
|
|
144
144
|
}
|
|
145
145
|
const rLevelEncoding = getThriftEnum(Encoding, (_header$data_page_hea2 = header.data_page_header) === null || _header$data_page_hea2 === void 0 ? void 0 : _header$data_page_hea2.repetition_level_encoding);
|
|
146
146
|
let rLevels = new Array(valueCount);
|
|
147
|
-
if (
|
|
147
|
+
if (context.column.rLevelMax > 0) {
|
|
148
148
|
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount, {
|
|
149
|
-
bitWidth: getBitWidth(
|
|
149
|
+
bitWidth: getBitWidth(context.column.rLevelMax),
|
|
150
150
|
disableEnvelope: false
|
|
151
151
|
});
|
|
152
152
|
} else {
|
|
@@ -154,9 +154,9 @@ async function decodeDataPage(cursor, header, options) {
|
|
|
154
154
|
}
|
|
155
155
|
const dLevelEncoding = getThriftEnum(Encoding, (_header$data_page_hea3 = header.data_page_header) === null || _header$data_page_hea3 === void 0 ? void 0 : _header$data_page_hea3.definition_level_encoding);
|
|
156
156
|
let dLevels = new Array(valueCount);
|
|
157
|
-
if (
|
|
157
|
+
if (context.column.dLevelMax > 0) {
|
|
158
158
|
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount, {
|
|
159
|
-
bitWidth: getBitWidth(
|
|
159
|
+
bitWidth: getBitWidth(context.column.dLevelMax),
|
|
160
160
|
disableEnvelope: false
|
|
161
161
|
});
|
|
162
162
|
} else {
|
|
@@ -164,16 +164,16 @@ async function decodeDataPage(cursor, header, options) {
|
|
|
164
164
|
}
|
|
165
165
|
let valueCountNonNull = 0;
|
|
166
166
|
for (const dlvl of dLevels) {
|
|
167
|
-
if (dlvl ===
|
|
167
|
+
if (dlvl === context.column.dLevelMax) {
|
|
168
168
|
valueCountNonNull++;
|
|
169
169
|
}
|
|
170
170
|
}
|
|
171
171
|
const valueEncoding = getThriftEnum(Encoding, (_header$data_page_hea4 = header.data_page_header) === null || _header$data_page_hea4 === void 0 ? void 0 : _header$data_page_hea4.encoding);
|
|
172
172
|
const decodeOptions = {
|
|
173
|
-
typeLength:
|
|
174
|
-
bitWidth:
|
|
173
|
+
typeLength: context.column.typeLength,
|
|
174
|
+
bitWidth: context.column.typeLength
|
|
175
175
|
};
|
|
176
|
-
const values = decodeValues(
|
|
176
|
+
const values = decodeValues(context.column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, decodeOptions);
|
|
177
177
|
return {
|
|
178
178
|
dlevels: dLevels,
|
|
179
179
|
rlevels: rLevels,
|
|
@@ -182,25 +182,25 @@ async function decodeDataPage(cursor, header, options) {
|
|
|
182
182
|
pageHeader: header
|
|
183
183
|
};
|
|
184
184
|
}
|
|
185
|
-
async function decodeDataPageV2(cursor, header,
|
|
185
|
+
async function decodeDataPageV2(cursor, header, context) {
|
|
186
186
|
var _header$data_page_hea5, _header$data_page_hea6, _header$data_page_hea7, _header$data_page_hea8;
|
|
187
187
|
const cursorEnd = cursor.offset + header.compressed_page_size;
|
|
188
188
|
const valueCount = (_header$data_page_hea5 = header.data_page_header_v2) === null || _header$data_page_hea5 === void 0 ? void 0 : _header$data_page_hea5.num_values;
|
|
189
189
|
const valueCountNonNull = valueCount - ((_header$data_page_hea6 = header.data_page_header_v2) === null || _header$data_page_hea6 === void 0 ? void 0 : _header$data_page_hea6.num_nulls);
|
|
190
190
|
const valueEncoding = getThriftEnum(Encoding, (_header$data_page_hea7 = header.data_page_header_v2) === null || _header$data_page_hea7 === void 0 ? void 0 : _header$data_page_hea7.encoding);
|
|
191
191
|
let rLevels = new Array(valueCount);
|
|
192
|
-
if (
|
|
192
|
+
if (context.column.rLevelMax > 0) {
|
|
193
193
|
rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
194
|
-
bitWidth: getBitWidth(
|
|
194
|
+
bitWidth: getBitWidth(context.column.rLevelMax),
|
|
195
195
|
disableEnvelope: true
|
|
196
196
|
});
|
|
197
197
|
} else {
|
|
198
198
|
rLevels.fill(0);
|
|
199
199
|
}
|
|
200
200
|
let dLevels = new Array(valueCount);
|
|
201
|
-
if (
|
|
201
|
+
if (context.column.dLevelMax > 0) {
|
|
202
202
|
dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
203
|
-
bitWidth: getBitWidth(
|
|
203
|
+
bitWidth: getBitWidth(context.column.dLevelMax),
|
|
204
204
|
disableEnvelope: true
|
|
205
205
|
});
|
|
206
206
|
} else {
|
|
@@ -208,7 +208,7 @@ async function decodeDataPageV2(cursor, header, opts) {
|
|
|
208
208
|
}
|
|
209
209
|
let valuesBufCursor = cursor;
|
|
210
210
|
if ((_header$data_page_hea8 = header.data_page_header_v2) !== null && _header$data_page_hea8 !== void 0 && _header$data_page_hea8.is_compressed) {
|
|
211
|
-
const valuesBuf = await decompress(
|
|
211
|
+
const valuesBuf = await decompress(context.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
212
212
|
valuesBufCursor = {
|
|
213
213
|
buffer: valuesBuf,
|
|
214
214
|
offset: 0,
|
|
@@ -217,10 +217,10 @@ async function decodeDataPageV2(cursor, header, opts) {
|
|
|
217
217
|
cursor.offset = cursorEnd;
|
|
218
218
|
}
|
|
219
219
|
const decodeOptions = {
|
|
220
|
-
typeLength:
|
|
221
|
-
bitWidth:
|
|
220
|
+
typeLength: context.column.typeLength,
|
|
221
|
+
bitWidth: context.column.typeLength
|
|
222
222
|
};
|
|
223
|
-
const values = decodeValues(
|
|
223
|
+
const values = decodeValues(context.column.primitiveType, valueEncoding, valuesBufCursor, valueCountNonNull, decodeOptions);
|
|
224
224
|
return {
|
|
225
225
|
dlevels: dLevels,
|
|
226
226
|
rlevels: rLevels,
|
|
@@ -229,7 +229,7 @@ async function decodeDataPageV2(cursor, header, opts) {
|
|
|
229
229
|
pageHeader: header
|
|
230
230
|
};
|
|
231
231
|
}
|
|
232
|
-
async function decodeDictionaryPage(cursor, pageHeader,
|
|
232
|
+
async function decodeDictionaryPage(cursor, pageHeader, context) {
|
|
233
233
|
var _pageHeader$dictionar;
|
|
234
234
|
const cursorEnd = cursor.offset + pageHeader.compressed_page_size;
|
|
235
235
|
let dictCursor = {
|
|
@@ -238,8 +238,8 @@ async function decodeDictionaryPage(cursor, pageHeader, options) {
|
|
|
238
238
|
size: cursorEnd - cursor.offset
|
|
239
239
|
};
|
|
240
240
|
cursor.offset = cursorEnd;
|
|
241
|
-
if (
|
|
242
|
-
const valuesBuf = await decompress(
|
|
241
|
+
if (context.compression !== 'UNCOMPRESSED') {
|
|
242
|
+
const valuesBuf = await decompress(context.compression, dictCursor.buffer.slice(dictCursor.offset, cursorEnd), pageHeader.uncompressed_page_size);
|
|
243
243
|
dictCursor = {
|
|
244
244
|
buffer: valuesBuf,
|
|
245
245
|
offset: 0,
|
|
@@ -248,6 +248,22 @@ async function decodeDictionaryPage(cursor, pageHeader, options) {
|
|
|
248
248
|
cursor.offset = cursorEnd;
|
|
249
249
|
}
|
|
250
250
|
const numValues = (pageHeader === null || pageHeader === void 0 ? void 0 : (_pageHeader$dictionar = pageHeader.dictionary_page_header) === null || _pageHeader$dictionar === void 0 ? void 0 : _pageHeader$dictionar.num_values) || 0;
|
|
251
|
-
|
|
251
|
+
const decodedDictionaryValues = decodeValues(context.column.primitiveType, context.column.encoding, dictCursor, numValues, context);
|
|
252
|
+
let values;
|
|
253
|
+
if (context !== null && context !== void 0 && context.preserveBinary) {
|
|
254
|
+
values = decodedDictionaryValues.map(d => preserveBinary(d));
|
|
255
|
+
} else {
|
|
256
|
+
values = decodedDictionaryValues.map(d => d.toString());
|
|
257
|
+
}
|
|
258
|
+
return values;
|
|
259
|
+
}
|
|
260
|
+
function preserveBinary(d) {
|
|
261
|
+
if (ArrayBuffer.isView(d)) {
|
|
262
|
+
return d;
|
|
263
|
+
}
|
|
264
|
+
if (Buffer.isBuffer(d)) {
|
|
265
|
+
return d.buffer.slice(d.byteOffset, d.byteLength);
|
|
266
|
+
}
|
|
267
|
+
return d.toString();
|
|
252
268
|
}
|
|
253
269
|
//# sourceMappingURL=decoders.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decoders.js","names":["PARQUET_CODECS","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","decompress","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","decodePageHeader","getThriftEnum","getBitWidth","decodeDataPages","buffer","options","cursor","offset","size","length","data","rlevels","dlevels","values","pageHeaders","count","dictionary","numValues","Number","page","decodePage","map","value","index","push","undefined","pageHeader","pageType","type","decodeDataPage","decodeDataPageV2","decodeDictionaryPage","Error","concat","decodeSchema","schemaElements","len","schema","next","i","schemaElement","repetitionType","repetition_type","optional","repeated","num_children","res","name","fields","logicalType","converted_type","typeLength","type_length","presision","precision","scale","decodeValues","encoding","opts","header","_header$data_page_hea","_header$data_page_hea2","_header$data_page_hea3","_header$data_page_hea4","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","compression","valuesBuf","slice","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","Array","column","rLevelMax","bitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","decodeOptions","primitiveType","_header$data_page_hea5","_header$data_page_hea6","_header$data_page_hea7","_header$data_page_hea8","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","_pageHeader$dictionar","dictCursor","dictionary_page_header","d","toString"],"sources":["../../../../src/parquetjs/parser/decoders.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {\n ParquetCodec,\n ParquetColumnChunk,\n ParquetOptions,\n ParquetPageData,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from '../schema/declare';\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport {\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n PageHeader,\n PageType,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {decompress} from '../compression';\nimport {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';\nimport {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';\n\n/**\n * Decode data pages\n * @param buffer - input data\n * @param column - parquet column\n * @param compression - compression type\n * @returns parquet data page data\n */\nexport async function decodeDataPages(\n buffer: Buffer,\n options: ParquetOptions\n): Promise<ParquetColumnChunk> {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetColumnChunk = {\n rlevels: [],\n dlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n\n let dictionary = options.dictionary || [];\n\n while (\n // @ts-ignore size can be undefined\n cursor.offset < cursor.size &&\n (!options.numValues || data.dlevels.length < Number(options.numValues))\n ) {\n // Looks like we have to decode these in sequence due to cursor updates?\n const page = await decodePage(cursor, options);\n\n if (page.dictionary) {\n dictionary = page.dictionary;\n // eslint-disable-next-line no-continue\n continue;\n }\n\n if (dictionary.length) {\n // eslint-disable-next-line no-loop-func\n page.values = page.values.map((value) => dictionary[value]);\n }\n\n for (let index = 0; index < page.rlevels.length; index++) {\n data.rlevels.push(page.rlevels[index]);\n data.dlevels.push(page.dlevels[index]);\n const value = page.values[index];\n\n if (value !== undefined) {\n data.values.push(value);\n }\n }\n\n data.count += page.count;\n data.pageHeaders.push(page.pageHeader);\n }\n\n return data;\n}\n\n/**\n * Decode parquet page based on page type\n * @param cursor\n * @param options\n */\nexport async function decodePage(\n cursor: CursorBuffer,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n let page;\n const {pageHeader, length} = decodePageHeader(cursor.buffer, cursor.offset);\n cursor.offset += length;\n\n const pageType = getThriftEnum(PageType, pageHeader.type);\n\n switch (pageType) {\n case 'DATA_PAGE':\n page = await decodeDataPage(cursor, pageHeader, options);\n break;\n case 'DATA_PAGE_V2':\n page = await decodeDataPageV2(cursor, pageHeader, options);\n break;\n case 'DICTIONARY_PAGE':\n page = {\n dictionary: await decodeDictionaryPage(cursor, pageHeader, options),\n pageHeader\n };\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n return page;\n}\n\n/**\n * Decode parquet schema\n * @param schemaElements input schema elements data\n * @param offset offset to read from\n * @param len length of data\n * @returns result.offset\n * result.next - offset at the end of function\n * result.schema - schema read from the input data\n * @todo output offset is the same as input - possibly excess output field\n */\nexport function decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n const type = getThriftEnum(Type, schemaElement.type!);\n let logicalType = type;\n\n if (schemaElement.converted_type) {\n logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n switch (logicalType) {\n case 'DECIMAL':\n logicalType = `${logicalType}_${type}` as ParquetType;\n break;\n default:\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n presision: schemaElement.precision,\n scale: schemaElement.scale,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\n/**\n * Do decoding of parquet dataPage from column chunk\n * @param cursor\n * @param header\n * @param options\n */\nasync function decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n /* uncompress page */\n let dataCursor = cursor;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n\n if (options.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (options.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === options.column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;\n const decodeOptions = {\n typeLength: options.column.typeLength,\n bitWidth: options.column.typeLength\n };\n\n const values = decodeValues(\n options.column.primitiveType!,\n valueEncoding,\n dataCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of parquet dataPage in version 2 from column chunk\n * @param cursor\n * @param header\n * @param opts\n * @returns\n */\nasync function decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n opts: any\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (opts.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (opts.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = await decompress(\n opts.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const decodeOptions = {\n typeLength: opts.column.typeLength,\n bitWidth: opts.column.typeLength\n };\n\n const values = decodeValues(\n opts.column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.\n * @param cursor\n * @param pageHeader\n * @param options\n */\nasync function decodeDictionaryPage(\n cursor: CursorBuffer,\n pageHeader: PageHeader,\n options: ParquetOptions\n): Promise<string[]> {\n const cursorEnd = cursor.offset + pageHeader.compressed_page_size;\n\n let dictCursor = {\n offset: 0,\n buffer: cursor.buffer.slice(cursor.offset, cursorEnd),\n size: cursorEnd - cursor.offset\n };\n\n cursor.offset = cursorEnd;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n dictCursor.buffer.slice(dictCursor.offset, cursorEnd),\n pageHeader.uncompressed_page_size\n );\n\n dictCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const numValues = pageHeader?.dictionary_page_header?.num_values || 0;\n\n return decodeValues(\n options.column.primitiveType!,\n options.column.encoding!,\n dictCursor,\n numValues,\n options as ParquetCodecOptions\n ).map((d) => d.toString());\n}\n"],"mappings":"AAUA,SAA2CA,cAAc,QAAO,WAAW;AAC3E,SACEC,aAAa,EACbC,QAAQ,EACRC,mBAAmB,EAEnBC,QAAQ,EAERC,IAAI,QACC,mBAAmB;AAC1B,SAAQC,UAAU,QAAO,gBAAgB;AACzC,SAAQC,kBAAkB,EAAEC,sBAAsB,QAAO,iBAAiB;AAC1E,SAAQC,gBAAgB,EAAEC,aAAa,EAAEC,WAAW,QAAO,qBAAqB;AAShF,OAAO,eAAeC,eAAeA,CACnCC,MAAc,EACdC,OAAuB,EACM;EAC7B,MAAMC,MAAoB,GAAG;IAC3BF,MAAM;IACNG,MAAM,EAAE,CAAC;IACTC,IAAI,EAAEJ,MAAM,CAACK;EACf,CAAC;EAED,MAAMC,IAAwB,GAAG;IAC/BC,OAAO,EAAE,EAAE;IACXC,OAAO,EAAE,EAAE;IACXC,MAAM,EAAE,EAAE;IACVC,WAAW,EAAE,EAAE;IACfC,KAAK,EAAE;EACT,CAAC;EAED,IAAIC,UAAU,GAAGX,OAAO,CAACW,UAAU,IAAI,EAAE;EAEzC,OAEEV,MAAM,CAACC,MAAM,GAAGD,MAAM,CAACE,IAAI,KAC1B,CAACH,OAAO,CAACY,SAAS,IAAIP,IAAI,CAACE,OAAO,CAACH,MAAM,GAAGS,MAAM,CAACb,OAAO,CAACY,SAAS,CAAC,CAAC,EACvE;IAEA,MAAME,IAAI,GAAG,MAAMC,UAAU,CAACd,MAAM,EAAED,OAAO,CAAC;IAE9C,IAAIc,IAAI,CAACH,UAAU,EAAE;MACnBA,UAAU,GAAGG,IAAI,CAACH,UAAU;MAE5B;IACF;IAEA,IAAIA,UAAU,CAACP,MAAM,EAAE;MAErBU,IAAI,CAACN,MAAM,GAAGM,IAAI,CAACN,MAAM,CAACQ,GAAG,CAAEC,KAAK,IAAKN,UAAU,CAACM,KAAK,CAAC,CAAC;IAC7D;IAEA,KAAK,IAAIC,KAAK,GAAG,CAAC,EAAEA,KAAK,GAAGJ,IAAI,CAACR,OAAO,CAACF,MAAM,EAAEc,KAAK,EAAE,EAAE;MACxDb,IAAI,CAACC,OAAO,CAACa,IAAI,CAACL,IAAI,CAACR,OAAO,CAACY,KAAK,CAAC,CAAC;MACtCb,IAAI,CAACE,OAAO,CAACY,IAAI,CAACL,IAAI,CAACP,OAAO,CAACW,KAAK,CAAC,CAAC;MACtC,MAAMD,KAAK,GAAGH,IAAI,CAACN,MAAM,CAACU,KAAK,CAAC;MAEhC,IAAID,KAAK,KAAKG,SAAS,EAAE;QACvBf,IAAI,CAACG,MAAM,CAACW,IAAI,CAACF,KAAK,CAAC;MACzB;IACF;IAEAZ,IAAI,CAACK,KAAK,IAAII,IAAI,CAACJ,KAAK;IACxBL,IAAI,CAACI,WAAW,CAACU,IAAI,CAACL,IAAI,CAACO,UAAU,CAAC;EACxC;EAEA,OAAOhB,IAAI;AACb;AAOA,OAAO,eAAeU,UAAUA,CAC9Bd,MAAoB,EACpBD,OAAuB,EACG;EAC1B,IAAIc,IAAI;EACR,MAAM;IAACO,UAAU;IAAEjB;EAAM,CAAC,GAAGT,gBAAgB,CAACM,MAAM,CAACF,MAAM,EAAEE,MAAM,CAACC,MAAM,CAAC;EAC3ED,MAAM,CAACC,MAAM,IAAIE,MAAM;EAEvB,MAAMkB,QAAQ,GAAG1B,aAAa,CAACN,QAAQ,EAAE+B,UAAU,CAACE,IAAI,CAAC;EAEzD,QAAQD,QAAQ;IACd,KAAK,WAAW;MACdR,IAAI,GAAG,MAAMU,cAAc,CAACvB,MAAM,EAAEoB,UAAU,EAAErB,OAAO,CAAC;MACxD;IACF,KAAK,cAAc;MACjBc,IAAI,GAAG,MAAMW,gBAAgB,CAACxB,MAAM,EAAEoB,UAAU,EAAErB,OAAO,CAAC;MAC1D;IACF,KAAK,iBAAiB;MACpBc,IAAI,GAAG;QACLH,UAAU,EAAE,MAAMe,oBAAoB,CAACzB,MAAM,EAAEoB,UAAU,EAAErB,OAAO,CAAC;QACnEqB;MACF,CAAC;MACD;IACF;MACE,MAAM,IAAIM,KAAK,uBAAAC,MAAA,CAAuBN,QAAQ,CAAE,CAAC;EACrD;EAEA,OAAOR,IAAI;AACb;AAYA,OAAO,SAASe,YAAYA,CAC1BC,cAA+B,EAC/B5B,MAAc,EACd6B,GAAW,EAKX;EACA,MAAMC,MAAwB,GAAG,CAAC,CAAC;EACnC,IAAIC,IAAI,GAAG/B,MAAM;EACjB,KAAK,IAAIgC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,GAAG,EAAEG,CAAC,EAAE,EAAE;IAC5B,MAAMC,aAAa,GAAGL,cAAc,CAACG,IAAI,CAAC;IAE1C,MAAMG,cAAc,GAClBH,IAAI,GAAG,CAAC,GAAGrC,aAAa,CAACP,mBAAmB,EAAE8C,aAAa,CAACE,eAAgB,CAAC,GAAG,MAAM;IAExF,IAAIC,QAAQ,GAAG,KAAK;IACpB,IAAIC,QAAQ,GAAG,KAAK;IACpB,QAAQH,cAAc;MACpB,KAAK,UAAU;QACb;MACF,KAAK,UAAU;QACbE,QAAQ,GAAG,IAAI;QACf;MACF,KAAK,UAAU;QACbC,QAAQ,GAAG,IAAI;QACf;MACF;QACE,MAAM,IAAIZ,KAAK,CAAC,kCAAkC,CAAC;IACvD;IAEA,IAAIQ,aAAa,CAACK,YAAY,GAAI,CAAC,EAAE;MACnC,MAAMC,GAAG,GAAGZ,YAAY,CAACC,cAAc,EAAEG,IAAI,GAAG,CAAC,EAAEE,aAAa,CAACK,YAAa,CAAC;MAC/EP,IAAI,GAAGQ,GAAG,CAACR,IAAI;MACfD,MAAM,CAACG,aAAa,CAACO,IAAI,CAAC,GAAG;QAE3BJ,QAAQ;QACRC,QAAQ;QACRI,MAAM,EAAEF,GAAG,CAACT;MACd,CAAC;IACH,CAAC,MAAM;MACL,MAAMT,IAAI,GAAG3B,aAAa,CAACL,IAAI,EAAE4C,aAAa,CAACZ,IAAK,CAAC;MACrD,IAAIqB,WAAW,GAAGrB,IAAI;MAEtB,IAAIY,aAAa,CAACU,cAAc,EAAE;QAChCD,WAAW,GAAGhD,aAAa,CAACT,aAAa,EAAEgD,aAAa,CAACU,cAAc,CAAC;MAC1E;MAEA,QAAQD,WAAW;QACjB,KAAK,SAAS;UACZA,WAAW,MAAAhB,MAAA,CAAMgB,WAAW,OAAAhB,MAAA,CAAIL,IAAI,CAAiB;UACrD;QACF;MACF;MAEAS,MAAM,CAACG,aAAa,CAACO,IAAI,CAAC,GAAG;QAC3BnB,IAAI,EAAEqB,WAA0B;QAChCE,UAAU,EAAEX,aAAa,CAACY,WAAW;QACrCC,SAAS,EAAEb,aAAa,CAACc,SAAS;QAClCC,KAAK,EAAEf,aAAa,CAACe,KAAK;QAC1BZ,QAAQ;QACRC;MACF,CAAC;MACDN,IAAI,EAAE;IACR;EACF;EACA,OAAO;IAACD,MAAM;IAAE9B,MAAM;IAAE+B;EAAI,CAAC;AAC/B;AAKA,SAASkB,YAAYA,CACnB5B,IAAmB,EACnB6B,QAAsB,EACtBnD,MAAoB,EACpBS,KAAa,EACb2C,IAAyB,EAClB;EACP,IAAI,EAAED,QAAQ,IAAIlE,cAAc,CAAC,EAAE;IACjC,MAAM,IAAIyC,KAAK,sBAAAC,MAAA,CAAsBwB,QAAQ,CAAE,CAAC;EAClD;EACA,OAAOlE,cAAc,CAACkE,QAAQ,CAAC,CAACD,YAAY,CAAC5B,IAAI,EAAEtB,MAAM,EAAES,KAAK,EAAE2C,IAAI,CAAC;AACzE;AAQA,eAAe7B,cAAcA,CAC3BvB,MAAoB,EACpBqD,MAAkB,EAClBtD,OAAuB,EACG;EAAA,IAAAuD,qBAAA,EAAAC,sBAAA,EAAAC,sBAAA,EAAAC,sBAAA;EAC1B,MAAMC,SAAS,GAAG1D,MAAM,CAACC,MAAM,GAAGoD,MAAM,CAACM,oBAAoB;EAC7D,MAAMC,UAAU,IAAAN,qBAAA,GAAGD,MAAM,CAACQ,gBAAgB,cAAAP,qBAAA,uBAAvBA,qBAAA,CAAyBQ,UAAU;EAGtD,IAAIC,UAAU,GAAG/D,MAAM;EAEvB,IAAID,OAAO,CAACiE,WAAW,KAAK,cAAc,EAAE;IAC1C,MAAMC,SAAS,GAAG,MAAM1E,UAAU,CAChCQ,OAAO,CAACiE,WAAW,EACnBhE,MAAM,CAACF,MAAM,CAACoE,KAAK,CAAClE,MAAM,CAACC,MAAM,EAAEyD,SAAS,CAAC,EAC7CL,MAAM,CAACc,sBACT,CAAC;IACDJ,UAAU,GAAG;MACXjE,MAAM,EAAEmE,SAAS;MACjBhE,MAAM,EAAE,CAAC;MACTC,IAAI,EAAE+D,SAAS,CAAC9D;IAClB,CAAC;IACDH,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAC3B;EAGA,MAAMU,cAAc,GAAGzE,aAAa,CAClCR,QAAQ,GAAAoE,sBAAA,GACRF,MAAM,CAACQ,gBAAgB,cAAAN,sBAAA,uBAAvBA,sBAAA,CAAyBc,yBAC3B,CAAiB;EAEjB,IAAIC,OAAO,GAAG,IAAIC,KAAK,CAACX,UAAU,CAAC;EAEnC,IAAI7D,OAAO,CAACyE,MAAM,CAACC,SAAS,GAAG,CAAC,EAAE;IAChCH,OAAO,GAAGpB,YAAY,CAAC1D,kBAAkB,EAAE4E,cAAc,EAAEL,UAAU,EAAEH,UAAU,EAAG;MAClFc,QAAQ,EAAE9E,WAAW,CAACG,OAAO,CAACyE,MAAM,CAACC,SAAS,CAAC;MAC/CE,eAAe,EAAE;IAEnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLL,OAAO,CAACM,IAAI,CAAC,CAAC,CAAC;EACjB;EAGA,MAAMC,cAAc,GAAGlF,aAAa,CAClCR,QAAQ,GAAAqE,sBAAA,GACRH,MAAM,CAACQ,gBAAgB,cAAAL,sBAAA,uBAAvBA,sBAAA,CAAyBsB,yBAC3B,CAAiB;EAEjB,IAAIC,OAAO,GAAG,IAAIR,KAAK,CAACX,UAAU,CAAC;EACnC,IAAI7D,OAAO,CAACyE,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IAChCD,OAAO,GAAG7B,YAAY,CAAC1D,kBAAkB,EAAEqF,cAAc,EAAEd,UAAU,EAAEH,UAAU,EAAG;MAClFc,QAAQ,EAAE9E,WAAW,CAACG,OAAO,CAACyE,MAAM,CAACQ,SAAS,CAAC;MAC/CL,eAAe,EAAE;IAEnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLI,OAAO,CAACH,IAAI,CAAC,CAAC,CAAC;EACjB;EACA,IAAIK,iBAAiB,GAAG,CAAC;EACzB,KAAK,MAAMC,IAAI,IAAIH,OAAO,EAAE;IAC1B,IAAIG,IAAI,KAAKnF,OAAO,CAACyE,MAAM,CAACQ,SAAS,EAAE;MACrCC,iBAAiB,EAAE;IACrB;EACF;EAGA,MAAME,aAAa,GAAGxF,aAAa,CAACR,QAAQ,GAAAsE,sBAAA,GAAEJ,MAAM,CAACQ,gBAAgB,cAAAJ,sBAAA,uBAAvBA,sBAAA,CAAyBN,QAAS,CAAiB;EACjG,MAAMiC,aAAa,GAAG;IACpBvC,UAAU,EAAE9C,OAAO,CAACyE,MAAM,CAAC3B,UAAU;IACrC6B,QAAQ,EAAE3E,OAAO,CAACyE,MAAM,CAAC3B;EAC3B,CAAC;EAED,MAAMtC,MAAM,GAAG2C,YAAY,CACzBnD,OAAO,CAACyE,MAAM,CAACa,aAAa,EAC5BF,aAAa,EACbpB,UAAU,EACVkB,iBAAiB,EACjBG,aACF,CAAC;EAED,OAAO;IACL9E,OAAO,EAAEyE,OAAO;IAChB1E,OAAO,EAAEiE,OAAO;IAChB/D,MAAM;IACNE,KAAK,EAAEmD,UAAW;IAClBxC,UAAU,EAAEiC;EACd,CAAC;AACH;AASA,eAAe7B,gBAAgBA,CAC7BxB,MAAoB,EACpBqD,MAAkB,EAClBD,IAAS,EACiB;EAAA,IAAAkC,sBAAA,EAAAC,sBAAA,EAAAC,sBAAA,EAAAC,sBAAA;EAC1B,MAAM/B,SAAS,GAAG1D,MAAM,CAACC,MAAM,GAAGoD,MAAM,CAACM,oBAAoB;EAE7D,MAAMC,UAAU,IAAA0B,sBAAA,GAAGjC,MAAM,CAACqC,mBAAmB,cAAAJ,sBAAA,uBAA1BA,sBAAA,CAA4BxB,UAAU;EAEzD,MAAMmB,iBAAiB,GAAGrB,UAAU,KAAA2B,sBAAA,GAAGlC,MAAM,CAACqC,mBAAmB,cAAAH,sBAAA,uBAA1BA,sBAAA,CAA4BI,SAAS;EAC5E,MAAMR,aAAa,GAAGxF,aAAa,CACjCR,QAAQ,GAAAqG,sBAAA,GACRnC,MAAM,CAACqC,mBAAmB,cAAAF,sBAAA,uBAA1BA,sBAAA,CAA4BrC,QAC9B,CAAiB;EAIjB,IAAImB,OAAO,GAAG,IAAIC,KAAK,CAACX,UAAU,CAAC;EACnC,IAAIR,IAAI,CAACoB,MAAM,CAACC,SAAS,GAAG,CAAC,EAAE;IAC7BH,OAAO,GAAGpB,YAAY,CAAC1D,kBAAkB,EAAEC,sBAAsB,EAAEO,MAAM,EAAE4D,UAAU,EAAG;MACtFc,QAAQ,EAAE9E,WAAW,CAACwD,IAAI,CAACoB,MAAM,CAACC,SAAS,CAAC;MAC5CE,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLL,OAAO,CAACM,IAAI,CAAC,CAAC,CAAC;EACjB;EAIA,IAAIG,OAAO,GAAG,IAAIR,KAAK,CAACX,UAAU,CAAC;EACnC,IAAIR,IAAI,CAACoB,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IAC7BD,OAAO,GAAG7B,YAAY,CAAC1D,kBAAkB,EAAEC,sBAAsB,EAAEO,MAAM,EAAE4D,UAAU,EAAG;MACtFc,QAAQ,EAAE9E,WAAW,CAACwD,IAAI,CAACoB,MAAM,CAACQ,SAAS,CAAC;MAC5CL,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLI,OAAO,CAACH,IAAI,CAAC,CAAC,CAAC;EACjB;EAGA,IAAIgB,eAAe,GAAG5F,MAAM;EAE5B,KAAAyF,sBAAA,GAAIpC,MAAM,CAACqC,mBAAmB,cAAAD,sBAAA,eAA1BA,sBAAA,CAA4BI,aAAa,EAAE;IAC7C,MAAM5B,SAAS,GAAG,MAAM1E,UAAU,CAChC6D,IAAI,CAACY,WAAW,EAChBhE,MAAM,CAACF,MAAM,CAACoE,KAAK,CAAClE,MAAM,CAACC,MAAM,EAAEyD,SAAS,CAAC,EAC7CL,MAAM,CAACc,sBACT,CAAC;IAEDyB,eAAe,GAAG;MAChB9F,MAAM,EAAEmE,SAAS;MACjBhE,MAAM,EAAE,CAAC;MACTC,IAAI,EAAE+D,SAAS,CAAC9D;IAClB,CAAC;IAEDH,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAC3B;EAEA,MAAM0B,aAAa,GAAG;IACpBvC,UAAU,EAAEO,IAAI,CAACoB,MAAM,CAAC3B,UAAU;IAClC6B,QAAQ,EAAEtB,IAAI,CAACoB,MAAM,CAAC3B;EACxB,CAAC;EAED,MAAMtC,MAAM,GAAG2C,YAAY,CACzBE,IAAI,CAACoB,MAAM,CAACa,aAAa,EACzBF,aAAa,EACbS,eAAe,EACfX,iBAAiB,EACjBG,aACF,CAAC;EAED,OAAO;IACL9E,OAAO,EAAEyE,OAAO;IAChB1E,OAAO,EAAEiE,OAAO;IAChB/D,MAAM;IACNE,KAAK,EAAEmD,UAAW;IAClBxC,UAAU,EAAEiC;EACd,CAAC;AACH;AAQA,eAAe5B,oBAAoBA,CACjCzB,MAAoB,EACpBoB,UAAsB,EACtBrB,OAAuB,EACJ;EAAA,IAAA+F,qBAAA;EACnB,MAAMpC,SAAS,GAAG1D,MAAM,CAACC,MAAM,GAAGmB,UAAU,CAACuC,oBAAoB;EAEjE,IAAIoC,UAAU,GAAG;IACf9F,MAAM,EAAE,CAAC;IACTH,MAAM,EAAEE,MAAM,CAACF,MAAM,CAACoE,KAAK,CAAClE,MAAM,CAACC,MAAM,EAAEyD,SAAS,CAAC;IACrDxD,IAAI,EAAEwD,SAAS,GAAG1D,MAAM,CAACC;EAC3B,CAAC;EAEDD,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAEzB,IAAI3D,OAAO,CAACiE,WAAW,KAAK,cAAc,EAAE;IAC1C,MAAMC,SAAS,GAAG,MAAM1E,UAAU,CAChCQ,OAAO,CAACiE,WAAW,EACnB+B,UAAU,CAACjG,MAAM,CAACoE,KAAK,CAAC6B,UAAU,CAAC9F,MAAM,EAAEyD,SAAS,CAAC,EACrDtC,UAAU,CAAC+C,sBACb,CAAC;IAED4B,UAAU,GAAG;MACXjG,MAAM,EAAEmE,SAAS;MACjBhE,MAAM,EAAE,CAAC;MACTC,IAAI,EAAE+D,SAAS,CAAC9D;IAClB,CAAC;IAEDH,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAC3B;EAEA,MAAM/C,SAAS,GAAG,CAAAS,UAAU,aAAVA,UAAU,wBAAA0E,qBAAA,GAAV1E,UAAU,CAAE4E,sBAAsB,cAAAF,qBAAA,uBAAlCA,qBAAA,CAAoChC,UAAU,KAAI,CAAC;EAErE,OAAOZ,YAAY,CACjBnD,OAAO,CAACyE,MAAM,CAACa,aAAa,EAC5BtF,OAAO,CAACyE,MAAM,CAACrB,QAAQ,EACvB4C,UAAU,EACVpF,SAAS,EACTZ,OACF,CAAC,CAACgB,GAAG,CAAEkF,CAAC,IAAKA,CAAC,CAACC,QAAQ,CAAC,CAAC,CAAC;AAC5B"}
|
|
1
|
+
{"version":3,"file":"decoders.js","names":["PARQUET_CODECS","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","decompress","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","decodePageHeader","getThriftEnum","getBitWidth","decodeDataPages","buffer","context","cursor","offset","size","length","data","rlevels","dlevels","values","pageHeaders","count","dictionary","numValues","Number","page","decodePage","map","value","index","push","undefined","pageHeader","pageType","type","decodeDataPage","decodeDataPageV2","decodeDictionaryPage","Error","concat","decodeSchema","schemaElements","len","schema","next","i","schemaElement","repetitionType","repetition_type","optional","repeated","num_children","res","name","fields","logicalType","converted_type","typeLength","type_length","presision","precision","scale","decodeValues","encoding","opts","header","_header$data_page_hea","_header$data_page_hea2","_header$data_page_hea3","_header$data_page_hea4","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","compression","valuesBuf","slice","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","Array","column","rLevelMax","bitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","decodeOptions","primitiveType","_header$data_page_hea5","_header$data_page_hea6","_header$data_page_hea7","_header$data_page_hea8","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","_pageHeader$dictionar","dictCursor","dictionary_page_header","decodedDictionaryValues","preserveBinary","d","toString","ArrayBuffer","isView","Buffer","isBuffer","byteOffset","byteLength"],"sources":["../../../../src/parquetjs/parser/decoders.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {\n ParquetCodec,\n ParquetColumnChunk,\n ParquetReaderContext,\n ParquetPageData,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from '../schema/declare';\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport {\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n PageHeader,\n PageType,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {decompress} from '../compression';\nimport {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';\nimport {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';\n\n/**\n * Decode data pages\n * @param buffer - input data\n * @param column - parquet column\n * @param compression - compression type\n * @returns parquet data page data\n */\nexport async function decodeDataPages(\n buffer: Buffer,\n context: ParquetReaderContext\n): Promise<ParquetColumnChunk> {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetColumnChunk = {\n rlevels: [],\n dlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n\n let dictionary = context.dictionary || [];\n\n while (\n // @ts-ignore size can be undefined\n cursor.offset < cursor.size &&\n (!context.numValues || data.dlevels.length < Number(context.numValues))\n ) {\n // Looks like we have to decode these in sequence due to cursor updates?\n const page = await decodePage(cursor, context);\n\n if (page.dictionary) {\n dictionary = page.dictionary;\n // eslint-disable-next-line no-continue\n continue;\n }\n\n if (dictionary.length) {\n // eslint-disable-next-line no-loop-func\n page.values = page.values.map((value) => dictionary[value]);\n }\n\n for (let index = 0; index < page.rlevels.length; index++) {\n data.rlevels.push(page.rlevels[index]);\n data.dlevels.push(page.dlevels[index]);\n const value = page.values[index];\n\n if (value !== undefined) {\n data.values.push(value);\n }\n }\n\n data.count += page.count;\n data.pageHeaders.push(page.pageHeader);\n }\n\n return data;\n}\n\n/**\n * Decode parquet page based on page type\n * @param cursor\n * @param context\n */\nexport async function decodePage(\n cursor: CursorBuffer,\n context: ParquetReaderContext\n): Promise<ParquetPageData> {\n let page;\n\n const {pageHeader, length} = decodePageHeader(cursor.buffer, cursor.offset);\n cursor.offset += length;\n\n const pageType = getThriftEnum(PageType, pageHeader.type);\n\n switch (pageType) {\n case 'DATA_PAGE':\n page = await decodeDataPage(cursor, pageHeader, context);\n break;\n case 'DATA_PAGE_V2':\n page = await decodeDataPageV2(cursor, pageHeader, context);\n break;\n case 'DICTIONARY_PAGE':\n page = {\n dictionary: await decodeDictionaryPage(cursor, pageHeader, context),\n pageHeader\n };\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n return page;\n}\n\n/**\n * Decode parquet schema\n * @param schemaElements input schema elements data\n * @param offset offset to read from\n * @param len length of data\n * @returns result.offset\n * result.next - offset at the end of function\n * result.schema - schema read from the input data\n * @todo output offset is the same as input - possibly excess output field\n */\nexport function decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n const type = getThriftEnum(Type, schemaElement.type!);\n let logicalType = type;\n\n if (schemaElement.converted_type) {\n logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n switch (logicalType) {\n case 'DECIMAL':\n logicalType = `${logicalType}_${type}` as ParquetType;\n break;\n default:\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n presision: schemaElement.precision,\n scale: schemaElement.scale,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\n/**\n * Do decoding of parquet dataPage from column chunk\n * @param cursor\n * @param header\n * @param options\n */\nasync function decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n context: ParquetReaderContext\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n /* uncompress page */\n let dataCursor = cursor;\n\n if (context.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n context.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n\n if (context.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(context.column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (context.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(context.column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === context.column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;\n const decodeOptions: ParquetCodecOptions = {\n typeLength: context.column.typeLength,\n bitWidth: context.column.typeLength\n };\n\n const values = decodeValues(\n context.column.primitiveType!,\n valueEncoding,\n dataCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of parquet dataPage in version 2 from column chunk\n * @param cursor\n * @param header\n * @param opts\n * @returns\n */\nasync function decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n context: ParquetReaderContext\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (context.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(context.column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (context.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(context.column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = await decompress(\n context.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const decodeOptions = {\n typeLength: context.column.typeLength,\n bitWidth: context.column.typeLength\n };\n\n const values = decodeValues(\n context.column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.\n * @param cursor\n * @param pageHeader\n * @param context\n */\nasync function decodeDictionaryPage(\n cursor: CursorBuffer,\n pageHeader: PageHeader,\n context: ParquetReaderContext\n): Promise<(string | ArrayBuffer)[]> {\n const cursorEnd = cursor.offset + pageHeader.compressed_page_size;\n\n let dictCursor = {\n offset: 0,\n buffer: cursor.buffer.slice(cursor.offset, cursorEnd),\n size: cursorEnd - cursor.offset\n };\n\n cursor.offset = cursorEnd;\n\n if (context.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n context.compression,\n dictCursor.buffer.slice(dictCursor.offset, cursorEnd),\n pageHeader.uncompressed_page_size\n );\n\n dictCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const numValues = pageHeader?.dictionary_page_header?.num_values || 0;\n\n const decodedDictionaryValues = decodeValues(\n context.column.primitiveType!,\n context.column.encoding!,\n dictCursor,\n numValues,\n // TODO - this looks wrong?\n context as ParquetCodecOptions\n );\n\n // Makes it look a little easier\n let values: any[];\n if (context?.preserveBinary) {\n values = decodedDictionaryValues.map((d) => preserveBinary(d));\n } else {\n values = decodedDictionaryValues.map((d) => d.toString());\n }\n return values;\n}\n\nfunction preserveBinary(d: any): ArrayBuffer | ArrayBufferView | string {\n if (ArrayBuffer.isView(d)) {\n return d;\n }\n // Convert to ArrayBuffer\n if (Buffer.isBuffer(d)) {\n return d.buffer.slice(d.byteOffset, d.byteLength);\n }\n return d.toString();\n}\n"],"mappings":"AAUA,SAA2CA,cAAc,QAAO,WAAW;AAC3E,SACEC,aAAa,EACbC,QAAQ,EACRC,mBAAmB,EAEnBC,QAAQ,EAERC,IAAI,QACC,mBAAmB;AAC1B,SAAQC,UAAU,QAAO,gBAAgB;AACzC,SAAQC,kBAAkB,EAAEC,sBAAsB,QAAO,iBAAiB;AAC1E,SAAQC,gBAAgB,EAAEC,aAAa,EAAEC,WAAW,QAAO,qBAAqB;AAShF,OAAO,eAAeC,eAAeA,CACnCC,MAAc,EACdC,OAA6B,EACA;EAC7B,MAAMC,MAAoB,GAAG;IAC3BF,MAAM;IACNG,MAAM,EAAE,CAAC;IACTC,IAAI,EAAEJ,MAAM,CAACK;EACf,CAAC;EAED,MAAMC,IAAwB,GAAG;IAC/BC,OAAO,EAAE,EAAE;IACXC,OAAO,EAAE,EAAE;IACXC,MAAM,EAAE,EAAE;IACVC,WAAW,EAAE,EAAE;IACfC,KAAK,EAAE;EACT,CAAC;EAED,IAAIC,UAAU,GAAGX,OAAO,CAACW,UAAU,IAAI,EAAE;EAEzC,OAEEV,MAAM,CAACC,MAAM,GAAGD,MAAM,CAACE,IAAI,KAC1B,CAACH,OAAO,CAACY,SAAS,IAAIP,IAAI,CAACE,OAAO,CAACH,MAAM,GAAGS,MAAM,CAACb,OAAO,CAACY,SAAS,CAAC,CAAC,EACvE;IAEA,MAAME,IAAI,GAAG,MAAMC,UAAU,CAACd,MAAM,EAAED,OAAO,CAAC;IAE9C,IAAIc,IAAI,CAACH,UAAU,EAAE;MACnBA,UAAU,GAAGG,IAAI,CAACH,UAAU;MAE5B;IACF;IAEA,IAAIA,UAAU,CAACP,MAAM,EAAE;MAErBU,IAAI,CAACN,MAAM,GAAGM,IAAI,CAACN,MAAM,CAACQ,GAAG,CAAEC,KAAK,IAAKN,UAAU,CAACM,KAAK,CAAC,CAAC;IAC7D;IAEA,KAAK,IAAIC,KAAK,GAAG,CAAC,EAAEA,KAAK,GAAGJ,IAAI,CAACR,OAAO,CAACF,MAAM,EAAEc,KAAK,EAAE,EAAE;MACxDb,IAAI,CAACC,OAAO,CAACa,IAAI,CAACL,IAAI,CAACR,OAAO,CAACY,KAAK,CAAC,CAAC;MACtCb,IAAI,CAACE,OAAO,CAACY,IAAI,CAACL,IAAI,CAACP,OAAO,CAACW,KAAK,CAAC,CAAC;MACtC,MAAMD,KAAK,GAAGH,IAAI,CAACN,MAAM,CAACU,KAAK,CAAC;MAEhC,IAAID,KAAK,KAAKG,SAAS,EAAE;QACvBf,IAAI,CAACG,MAAM,CAACW,IAAI,CAACF,KAAK,CAAC;MACzB;IACF;IAEAZ,IAAI,CAACK,KAAK,IAAII,IAAI,CAACJ,KAAK;IACxBL,IAAI,CAACI,WAAW,CAACU,IAAI,CAACL,IAAI,CAACO,UAAU,CAAC;EACxC;EAEA,OAAOhB,IAAI;AACb;AAOA,OAAO,eAAeU,UAAUA,CAC9Bd,MAAoB,EACpBD,OAA6B,EACH;EAC1B,IAAIc,IAAI;EAER,MAAM;IAACO,UAAU;IAAEjB;EAAM,CAAC,GAAGT,gBAAgB,CAACM,MAAM,CAACF,MAAM,EAAEE,MAAM,CAACC,MAAM,CAAC;EAC3ED,MAAM,CAACC,MAAM,IAAIE,MAAM;EAEvB,MAAMkB,QAAQ,GAAG1B,aAAa,CAACN,QAAQ,EAAE+B,UAAU,CAACE,IAAI,CAAC;EAEzD,QAAQD,QAAQ;IACd,KAAK,WAAW;MACdR,IAAI,GAAG,MAAMU,cAAc,CAACvB,MAAM,EAAEoB,UAAU,EAAErB,OAAO,CAAC;MACxD;IACF,KAAK,cAAc;MACjBc,IAAI,GAAG,MAAMW,gBAAgB,CAACxB,MAAM,EAAEoB,UAAU,EAAErB,OAAO,CAAC;MAC1D;IACF,KAAK,iBAAiB;MACpBc,IAAI,GAAG;QACLH,UAAU,EAAE,MAAMe,oBAAoB,CAACzB,MAAM,EAAEoB,UAAU,EAAErB,OAAO,CAAC;QACnEqB;MACF,CAAC;MACD;IACF;MACE,MAAM,IAAIM,KAAK,uBAAAC,MAAA,CAAuBN,QAAQ,CAAE,CAAC;EACrD;EAEA,OAAOR,IAAI;AACb;AAYA,OAAO,SAASe,YAAYA,CAC1BC,cAA+B,EAC/B5B,MAAc,EACd6B,GAAW,EAKX;EACA,MAAMC,MAAwB,GAAG,CAAC,CAAC;EACnC,IAAIC,IAAI,GAAG/B,MAAM;EACjB,KAAK,IAAIgC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,GAAG,EAAEG,CAAC,EAAE,EAAE;IAC5B,MAAMC,aAAa,GAAGL,cAAc,CAACG,IAAI,CAAC;IAE1C,MAAMG,cAAc,GAClBH,IAAI,GAAG,CAAC,GAAGrC,aAAa,CAACP,mBAAmB,EAAE8C,aAAa,CAACE,eAAgB,CAAC,GAAG,MAAM;IAExF,IAAIC,QAAQ,GAAG,KAAK;IACpB,IAAIC,QAAQ,GAAG,KAAK;IACpB,QAAQH,cAAc;MACpB,KAAK,UAAU;QACb;MACF,KAAK,UAAU;QACbE,QAAQ,GAAG,IAAI;QACf;MACF,KAAK,UAAU;QACbC,QAAQ,GAAG,IAAI;QACf;MACF;QACE,MAAM,IAAIZ,KAAK,CAAC,kCAAkC,CAAC;IACvD;IAEA,IAAIQ,aAAa,CAACK,YAAY,GAAI,CAAC,EAAE;MACnC,MAAMC,GAAG,GAAGZ,YAAY,CAACC,cAAc,EAAEG,IAAI,GAAG,CAAC,EAAEE,aAAa,CAACK,YAAa,CAAC;MAC/EP,IAAI,GAAGQ,GAAG,CAACR,IAAI;MACfD,MAAM,CAACG,aAAa,CAACO,IAAI,CAAC,GAAG;QAE3BJ,QAAQ;QACRC,QAAQ;QACRI,MAAM,EAAEF,GAAG,CAACT;MACd,CAAC;IACH,CAAC,MAAM;MACL,MAAMT,IAAI,GAAG3B,aAAa,CAACL,IAAI,EAAE4C,aAAa,CAACZ,IAAK,CAAC;MACrD,IAAIqB,WAAW,GAAGrB,IAAI;MAEtB,IAAIY,aAAa,CAACU,cAAc,EAAE;QAChCD,WAAW,GAAGhD,aAAa,CAACT,aAAa,EAAEgD,aAAa,CAACU,cAAc,CAAC;MAC1E;MAEA,QAAQD,WAAW;QACjB,KAAK,SAAS;UACZA,WAAW,MAAAhB,MAAA,CAAMgB,WAAW,OAAAhB,MAAA,CAAIL,IAAI,CAAiB;UACrD;QACF;MACF;MAEAS,MAAM,CAACG,aAAa,CAACO,IAAI,CAAC,GAAG;QAC3BnB,IAAI,EAAEqB,WAA0B;QAChCE,UAAU,EAAEX,aAAa,CAACY,WAAW;QACrCC,SAAS,EAAEb,aAAa,CAACc,SAAS;QAClCC,KAAK,EAAEf,aAAa,CAACe,KAAK;QAC1BZ,QAAQ;QACRC;MACF,CAAC;MACDN,IAAI,EAAE;IACR;EACF;EACA,OAAO;IAACD,MAAM;IAAE9B,MAAM;IAAE+B;EAAI,CAAC;AAC/B;AAKA,SAASkB,YAAYA,CACnB5B,IAAmB,EACnB6B,QAAsB,EACtBnD,MAAoB,EACpBS,KAAa,EACb2C,IAAyB,EAClB;EACP,IAAI,EAAED,QAAQ,IAAIlE,cAAc,CAAC,EAAE;IACjC,MAAM,IAAIyC,KAAK,sBAAAC,MAAA,CAAsBwB,QAAQ,CAAE,CAAC;EAClD;EACA,OAAOlE,cAAc,CAACkE,QAAQ,CAAC,CAACD,YAAY,CAAC5B,IAAI,EAAEtB,MAAM,EAAES,KAAK,EAAE2C,IAAI,CAAC;AACzE;AAQA,eAAe7B,cAAcA,CAC3BvB,MAAoB,EACpBqD,MAAkB,EAClBtD,OAA6B,EACH;EAAA,IAAAuD,qBAAA,EAAAC,sBAAA,EAAAC,sBAAA,EAAAC,sBAAA;EAC1B,MAAMC,SAAS,GAAG1D,MAAM,CAACC,MAAM,GAAGoD,MAAM,CAACM,oBAAoB;EAC7D,MAAMC,UAAU,IAAAN,qBAAA,GAAGD,MAAM,CAACQ,gBAAgB,cAAAP,qBAAA,uBAAvBA,qBAAA,CAAyBQ,UAAU;EAGtD,IAAIC,UAAU,GAAG/D,MAAM;EAEvB,IAAID,OAAO,CAACiE,WAAW,KAAK,cAAc,EAAE;IAC1C,MAAMC,SAAS,GAAG,MAAM1E,UAAU,CAChCQ,OAAO,CAACiE,WAAW,EACnBhE,MAAM,CAACF,MAAM,CAACoE,KAAK,CAAClE,MAAM,CAACC,MAAM,EAAEyD,SAAS,CAAC,EAC7CL,MAAM,CAACc,sBACT,CAAC;IACDJ,UAAU,GAAG;MACXjE,MAAM,EAAEmE,SAAS;MACjBhE,MAAM,EAAE,CAAC;MACTC,IAAI,EAAE+D,SAAS,CAAC9D;IAClB,CAAC;IACDH,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAC3B;EAGA,MAAMU,cAAc,GAAGzE,aAAa,CAClCR,QAAQ,GAAAoE,sBAAA,GACRF,MAAM,CAACQ,gBAAgB,cAAAN,sBAAA,uBAAvBA,sBAAA,CAAyBc,yBAC3B,CAAiB;EAEjB,IAAIC,OAAO,GAAG,IAAIC,KAAK,CAACX,UAAU,CAAC;EAEnC,IAAI7D,OAAO,CAACyE,MAAM,CAACC,SAAS,GAAG,CAAC,EAAE;IAChCH,OAAO,GAAGpB,YAAY,CAAC1D,kBAAkB,EAAE4E,cAAc,EAAEL,UAAU,EAAEH,UAAU,EAAG;MAClFc,QAAQ,EAAE9E,WAAW,CAACG,OAAO,CAACyE,MAAM,CAACC,SAAS,CAAC;MAC/CE,eAAe,EAAE;IAEnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLL,OAAO,CAACM,IAAI,CAAC,CAAC,CAAC;EACjB;EAGA,MAAMC,cAAc,GAAGlF,aAAa,CAClCR,QAAQ,GAAAqE,sBAAA,GACRH,MAAM,CAACQ,gBAAgB,cAAAL,sBAAA,uBAAvBA,sBAAA,CAAyBsB,yBAC3B,CAAiB;EAEjB,IAAIC,OAAO,GAAG,IAAIR,KAAK,CAACX,UAAU,CAAC;EACnC,IAAI7D,OAAO,CAACyE,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IAChCD,OAAO,GAAG7B,YAAY,CAAC1D,kBAAkB,EAAEqF,cAAc,EAAEd,UAAU,EAAEH,UAAU,EAAG;MAClFc,QAAQ,EAAE9E,WAAW,CAACG,OAAO,CAACyE,MAAM,CAACQ,SAAS,CAAC;MAC/CL,eAAe,EAAE;IAEnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLI,OAAO,CAACH,IAAI,CAAC,CAAC,CAAC;EACjB;EACA,IAAIK,iBAAiB,GAAG,CAAC;EACzB,KAAK,MAAMC,IAAI,IAAIH,OAAO,EAAE;IAC1B,IAAIG,IAAI,KAAKnF,OAAO,CAACyE,MAAM,CAACQ,SAAS,EAAE;MACrCC,iBAAiB,EAAE;IACrB;EACF;EAGA,MAAME,aAAa,GAAGxF,aAAa,CAACR,QAAQ,GAAAsE,sBAAA,GAAEJ,MAAM,CAACQ,gBAAgB,cAAAJ,sBAAA,uBAAvBA,sBAAA,CAAyBN,QAAS,CAAiB;EACjG,MAAMiC,aAAkC,GAAG;IACzCvC,UAAU,EAAE9C,OAAO,CAACyE,MAAM,CAAC3B,UAAU;IACrC6B,QAAQ,EAAE3E,OAAO,CAACyE,MAAM,CAAC3B;EAC3B,CAAC;EAED,MAAMtC,MAAM,GAAG2C,YAAY,CACzBnD,OAAO,CAACyE,MAAM,CAACa,aAAa,EAC5BF,aAAa,EACbpB,UAAU,EACVkB,iBAAiB,EACjBG,aACF,CAAC;EAED,OAAO;IACL9E,OAAO,EAAEyE,OAAO;IAChB1E,OAAO,EAAEiE,OAAO;IAChB/D,MAAM;IACNE,KAAK,EAAEmD,UAAW;IAClBxC,UAAU,EAAEiC;EACd,CAAC;AACH;AASA,eAAe7B,gBAAgBA,CAC7BxB,MAAoB,EACpBqD,MAAkB,EAClBtD,OAA6B,EACH;EAAA,IAAAuF,sBAAA,EAAAC,sBAAA,EAAAC,sBAAA,EAAAC,sBAAA;EAC1B,MAAM/B,SAAS,GAAG1D,MAAM,CAACC,MAAM,GAAGoD,MAAM,CAACM,oBAAoB;EAE7D,MAAMC,UAAU,IAAA0B,sBAAA,GAAGjC,MAAM,CAACqC,mBAAmB,cAAAJ,sBAAA,uBAA1BA,sBAAA,CAA4BxB,UAAU;EAEzD,MAAMmB,iBAAiB,GAAGrB,UAAU,KAAA2B,sBAAA,GAAGlC,MAAM,CAACqC,mBAAmB,cAAAH,sBAAA,uBAA1BA,sBAAA,CAA4BI,SAAS;EAC5E,MAAMR,aAAa,GAAGxF,aAAa,CACjCR,QAAQ,GAAAqG,sBAAA,GACRnC,MAAM,CAACqC,mBAAmB,cAAAF,sBAAA,uBAA1BA,sBAAA,CAA4BrC,QAC9B,CAAiB;EAIjB,IAAImB,OAAO,GAAG,IAAIC,KAAK,CAACX,UAAU,CAAC;EACnC,IAAI7D,OAAO,CAACyE,MAAM,CAACC,SAAS,GAAG,CAAC,EAAE;IAChCH,OAAO,GAAGpB,YAAY,CAAC1D,kBAAkB,EAAEC,sBAAsB,EAAEO,MAAM,EAAE4D,UAAU,EAAG;MACtFc,QAAQ,EAAE9E,WAAW,CAACG,OAAO,CAACyE,MAAM,CAACC,SAAS,CAAC;MAC/CE,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLL,OAAO,CAACM,IAAI,CAAC,CAAC,CAAC;EACjB;EAIA,IAAIG,OAAO,GAAG,IAAIR,KAAK,CAACX,UAAU,CAAC;EACnC,IAAI7D,OAAO,CAACyE,MAAM,CAACQ,SAAS,GAAG,CAAC,EAAE;IAChCD,OAAO,GAAG7B,YAAY,CAAC1D,kBAAkB,EAAEC,sBAAsB,EAAEO,MAAM,EAAE4D,UAAU,EAAG;MACtFc,QAAQ,EAAE9E,WAAW,CAACG,OAAO,CAACyE,MAAM,CAACQ,SAAS,CAAC;MAC/CL,eAAe,EAAE;IACnB,CAAC,CAAC;EACJ,CAAC,MAAM;IACLI,OAAO,CAACH,IAAI,CAAC,CAAC,CAAC;EACjB;EAGA,IAAIgB,eAAe,GAAG5F,MAAM;EAE5B,KAAAyF,sBAAA,GAAIpC,MAAM,CAACqC,mBAAmB,cAAAD,sBAAA,eAA1BA,sBAAA,CAA4BI,aAAa,EAAE;IAC7C,MAAM5B,SAAS,GAAG,MAAM1E,UAAU,CAChCQ,OAAO,CAACiE,WAAW,EACnBhE,MAAM,CAACF,MAAM,CAACoE,KAAK,CAAClE,MAAM,CAACC,MAAM,EAAEyD,SAAS,CAAC,EAC7CL,MAAM,CAACc,sBACT,CAAC;IAEDyB,eAAe,GAAG;MAChB9F,MAAM,EAAEmE,SAAS;MACjBhE,MAAM,EAAE,CAAC;MACTC,IAAI,EAAE+D,SAAS,CAAC9D;IAClB,CAAC;IAEDH,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAC3B;EAEA,MAAM0B,aAAa,GAAG;IACpBvC,UAAU,EAAE9C,OAAO,CAACyE,MAAM,CAAC3B,UAAU;IACrC6B,QAAQ,EAAE3E,OAAO,CAACyE,MAAM,CAAC3B;EAC3B,CAAC;EAED,MAAMtC,MAAM,GAAG2C,YAAY,CACzBnD,OAAO,CAACyE,MAAM,CAACa,aAAa,EAC5BF,aAAa,EACbS,eAAe,EACfX,iBAAiB,EACjBG,aACF,CAAC;EAED,OAAO;IACL9E,OAAO,EAAEyE,OAAO;IAChB1E,OAAO,EAAEiE,OAAO;IAChB/D,MAAM;IACNE,KAAK,EAAEmD,UAAW;IAClBxC,UAAU,EAAEiC;EACd,CAAC;AACH;AAQA,eAAe5B,oBAAoBA,CACjCzB,MAAoB,EACpBoB,UAAsB,EACtBrB,OAA6B,EACM;EAAA,IAAA+F,qBAAA;EACnC,MAAMpC,SAAS,GAAG1D,MAAM,CAACC,MAAM,GAAGmB,UAAU,CAACuC,oBAAoB;EAEjE,IAAIoC,UAAU,GAAG;IACf9F,MAAM,EAAE,CAAC;IACTH,MAAM,EAAEE,MAAM,CAACF,MAAM,CAACoE,KAAK,CAAClE,MAAM,CAACC,MAAM,EAAEyD,SAAS,CAAC;IACrDxD,IAAI,EAAEwD,SAAS,GAAG1D,MAAM,CAACC;EAC3B,CAAC;EAEDD,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAEzB,IAAI3D,OAAO,CAACiE,WAAW,KAAK,cAAc,EAAE;IAC1C,MAAMC,SAAS,GAAG,MAAM1E,UAAU,CAChCQ,OAAO,CAACiE,WAAW,EACnB+B,UAAU,CAACjG,MAAM,CAACoE,KAAK,CAAC6B,UAAU,CAAC9F,MAAM,EAAEyD,SAAS,CAAC,EACrDtC,UAAU,CAAC+C,sBACb,CAAC;IAED4B,UAAU,GAAG;MACXjG,MAAM,EAAEmE,SAAS;MACjBhE,MAAM,EAAE,CAAC;MACTC,IAAI,EAAE+D,SAAS,CAAC9D;IAClB,CAAC;IAEDH,MAAM,CAACC,MAAM,GAAGyD,SAAS;EAC3B;EAEA,MAAM/C,SAAS,GAAG,CAAAS,UAAU,aAAVA,UAAU,wBAAA0E,qBAAA,GAAV1E,UAAU,CAAE4E,sBAAsB,cAAAF,qBAAA,uBAAlCA,qBAAA,CAAoChC,UAAU,KAAI,CAAC;EAErE,MAAMmC,uBAAuB,GAAG/C,YAAY,CAC1CnD,OAAO,CAACyE,MAAM,CAACa,aAAa,EAC5BtF,OAAO,CAACyE,MAAM,CAACrB,QAAQ,EACvB4C,UAAU,EACVpF,SAAS,EAETZ,OACF,CAAC;EAGD,IAAIQ,MAAa;EACjB,IAAIR,OAAO,aAAPA,OAAO,eAAPA,OAAO,CAAEmG,cAAc,EAAE;IAC3B3F,MAAM,GAAG0F,uBAAuB,CAAClF,GAAG,CAAEoF,CAAC,IAAKD,cAAc,CAACC,CAAC,CAAC,CAAC;EAChE,CAAC,MAAM;IACL5F,MAAM,GAAG0F,uBAAuB,CAAClF,GAAG,CAAEoF,CAAC,IAAKA,CAAC,CAACC,QAAQ,CAAC,CAAC,CAAC;EAC3D;EACA,OAAO7F,MAAM;AACf;AAEA,SAAS2F,cAAcA,CAACC,CAAM,EAA0C;EACtE,IAAIE,WAAW,CAACC,MAAM,CAACH,CAAC,CAAC,EAAE;IACzB,OAAOA,CAAC;EACV;EAEA,IAAII,MAAM,CAACC,QAAQ,CAACL,CAAC,CAAC,EAAE;IACtB,OAAOA,CAAC,CAACrG,MAAM,CAACoE,KAAK,CAACiC,CAAC,CAACM,UAAU,EAAEN,CAAC,CAACO,UAAU,CAAC;EACnD;EACA,OAAOP,CAAC,CAACC,QAAQ,CAAC,CAAC;AACrB"}
|
|
@@ -6,9 +6,6 @@ import { PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED } from '../../constants';
|
|
|
6
6
|
import { CompressionCodec, Type } from '../parquet-thrift';
|
|
7
7
|
import { decodeFileMetadata, getThriftEnum, fieldIndexOf } from '../utils/read-utils';
|
|
8
8
|
import { decodeDataPages, decodePage } from './decoders';
|
|
9
|
-
const DEFAULT_PROPS = {
|
|
10
|
-
defaultDictionarySize: 1e6
|
|
11
|
-
};
|
|
12
9
|
export class ParquetReader {
|
|
13
10
|
constructor(file, props) {
|
|
14
11
|
_defineProperty(this, "props", void 0);
|
|
@@ -16,7 +13,7 @@ export class ParquetReader {
|
|
|
16
13
|
_defineProperty(this, "metadata", null);
|
|
17
14
|
this.file = file;
|
|
18
15
|
this.props = {
|
|
19
|
-
...
|
|
16
|
+
...ParquetReader.defaultProps,
|
|
20
17
|
...props
|
|
21
18
|
};
|
|
22
19
|
}
|
|
@@ -120,7 +117,7 @@ export class ParquetReader {
|
|
|
120
117
|
return buffer;
|
|
121
118
|
}
|
|
122
119
|
async readColumnChunk(schema, colChunk) {
|
|
123
|
-
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8,
|
|
120
|
+
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _context$dictionary;
|
|
124
121
|
if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
|
|
125
122
|
throw new Error('external references are not supported');
|
|
126
123
|
}
|
|
@@ -136,29 +133,30 @@ export class ParquetReader {
|
|
|
136
133
|
var _colChunk$meta_data6;
|
|
137
134
|
pagesSize = Math.min(this.file.size - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
|
|
138
135
|
}
|
|
139
|
-
const
|
|
136
|
+
const context = {
|
|
140
137
|
type,
|
|
141
138
|
rLevelMax: field.rLevelMax,
|
|
142
139
|
dLevelMax: field.dLevelMax,
|
|
143
140
|
compression,
|
|
144
141
|
column: field,
|
|
145
142
|
numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
|
|
146
|
-
dictionary: []
|
|
143
|
+
dictionary: [],
|
|
144
|
+
preserveBinary: this.props.preserveBinary
|
|
147
145
|
};
|
|
148
146
|
let dictionary;
|
|
149
147
|
const dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
|
|
150
148
|
if (dictionaryPageOffset) {
|
|
151
149
|
const dictionaryOffset = Number(dictionaryPageOffset);
|
|
152
|
-
dictionary = await this.getDictionary(dictionaryOffset,
|
|
150
|
+
dictionary = await this.getDictionary(dictionaryOffset, context, pagesOffset);
|
|
153
151
|
}
|
|
154
|
-
dictionary = (
|
|
152
|
+
dictionary = (_context$dictionary = context.dictionary) !== null && _context$dictionary !== void 0 && _context$dictionary.length ? context.dictionary : dictionary;
|
|
155
153
|
const pagesBuf = await this.file.read(pagesOffset, pagesSize);
|
|
156
154
|
return await decodeDataPages(pagesBuf, {
|
|
157
|
-
...
|
|
155
|
+
...context,
|
|
158
156
|
dictionary
|
|
159
157
|
});
|
|
160
158
|
}
|
|
161
|
-
async getDictionary(dictionaryPageOffset,
|
|
159
|
+
async getDictionary(dictionaryPageOffset, context, pagesOffset) {
|
|
162
160
|
if (dictionaryPageOffset === 0) {
|
|
163
161
|
return [];
|
|
164
162
|
}
|
|
@@ -169,8 +167,12 @@ export class ParquetReader {
|
|
|
169
167
|
offset: 0,
|
|
170
168
|
size: pagesBuf.length
|
|
171
169
|
};
|
|
172
|
-
const decodedPage = await decodePage(cursor,
|
|
170
|
+
const decodedPage = await decodePage(cursor, context);
|
|
173
171
|
return decodedPage.dictionary;
|
|
174
172
|
}
|
|
175
173
|
}
|
|
174
|
+
_defineProperty(ParquetReader, "defaultProps", {
|
|
175
|
+
defaultDictionarySize: 1e6,
|
|
176
|
+
preserveBinary: false
|
|
177
|
+
});
|
|
176
178
|
//# sourceMappingURL=parquet-reader.js.map
|