@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +7 -18
- package/dist/dist.min.js.map +1 -1
- package/dist/es5/bundle.js +2 -4
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +17 -0
- package/dist/es5/constants.js.map +1 -0
- package/dist/es5/index.js +53 -21
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/convert-schema.js +82 -0
- package/dist/es5/lib/convert-schema.js.map +1 -0
- package/dist/es5/lib/parse-parquet.js +173 -0
- package/dist/es5/lib/parse-parquet.js.map +1 -0
- package/dist/es5/lib/read-array-buffer.js +53 -0
- package/dist/es5/lib/read-array-buffer.js.map +1 -0
- package/dist/es5/parquet-loader.js +6 -79
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/es5/parquetjs/codecs/index.js +10 -0
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +2 -2
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +138 -104
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +495 -0
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +2 -0
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +2 -1
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +79 -4
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/es5/parquetjs/utils/file-utils.js +108 -0
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/esm/bundle.js +2 -4
- package/dist/esm/bundle.js.map +1 -1
- package/dist/esm/constants.js +6 -0
- package/dist/esm/constants.js.map +1 -0
- package/dist/esm/index.js +14 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/convert-schema.js +71 -0
- package/dist/esm/lib/convert-schema.js.map +1 -0
- package/dist/esm/lib/parse-parquet.js +28 -0
- package/dist/esm/lib/parse-parquet.js.map +1 -0
- package/dist/esm/lib/read-array-buffer.js +9 -0
- package/dist/esm/lib/read-array-buffer.js.map +1 -0
- package/dist/esm/parquet-loader.js +4 -24
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
- package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/esm/parquetjs/codecs/index.js +9 -0
- package/dist/esm/parquetjs/codecs/index.js.map +1 -1
- package/dist/esm/parquetjs/codecs/rle.js +2 -2
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
- package/dist/esm/parquetjs/compression.js +54 -105
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
- package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js +300 -0
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +2 -0
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +2 -1
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js +78 -4
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js +79 -0
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/parquet-worker.js +7 -18
- package/dist/parquet-worker.js.map +1 -1
- package/package.json +10 -10
- package/src/bundle.ts +2 -3
- package/src/constants.ts +17 -0
- package/src/index.ts +30 -4
- package/src/lib/convert-schema.ts +95 -0
- package/src/lib/parse-parquet.ts +27 -0
- package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
- package/src/parquet-loader.ts +4 -24
- package/src/parquetjs/codecs/dictionary.ts +11 -0
- package/src/parquetjs/codecs/index.ts +13 -0
- package/src/parquetjs/codecs/rle.ts +4 -2
- package/src/parquetjs/compression.ts +89 -50
- package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
- package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
- package/src/parquetjs/parser/decoders.ts +448 -0
- package/src/parquetjs/parser/parquet-cursor.ts +94 -0
- package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
- package/src/parquetjs/parser/parquet-reader.ts +179 -0
- package/src/parquetjs/schema/declare.ts +48 -2
- package/src/parquetjs/schema/schema.ts +2 -0
- package/src/parquetjs/schema/shred.ts +3 -1
- package/src/parquetjs/schema/types.ts +82 -5
- package/src/parquetjs/utils/buffer-utils.ts +18 -0
- package/src/parquetjs/utils/file-utils.ts +96 -0
- package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
- package/dist/dist.es5.min.js +0 -51
- package/dist/dist.es5.min.js.map +0 -1
- package/dist/es5/parquetjs/compression.ts.disabled +0 -105
- package/dist/es5/parquetjs/reader.js +0 -1078
- package/dist/es5/parquetjs/reader.js.map +0 -1
- package/dist/es5/parquetjs/util.js.map +0 -1
- package/dist/es5/parquetjs/writer.js.map +0 -1
- package/dist/esm/libs/read-array-buffer.ts +0 -31
- package/dist/esm/parquetjs/compression.ts.disabled +0 -105
- package/dist/esm/parquetjs/reader.js +0 -524
- package/dist/esm/parquetjs/reader.js.map +0 -1
- package/dist/esm/parquetjs/util.js.map +0 -1
- package/dist/esm/parquetjs/writer.js.map +0 -1
- package/src/libs/read-array-buffer.ts +0 -31
- package/src/parquetjs/compression.ts.disabled +0 -105
- package/src/parquetjs/reader.ts +0 -707
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/parquetjs/compression.ts"],"names":["
|
|
1
|
+
{"version":3,"sources":["../../../src/parquetjs/compression.ts"],"names":["modules","brotli","decompress","brotliDecompress","compress","Error","lz4js","lzo","ZstdCodec","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","NoCompression","GZIP","GZipCompression","SNAPPY","SnappyCompression","BROTLI","BrotliCompression","LZ4","LZ4Compression","LZ4_RAW","LZO","LZOCompression","ZSTD","ZstdCompression","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","inputArrayBuffer","compressedArrayBuffer","size","inflate"],"mappings":";;;;;;;;;;;;;;;;;AAIA;;AAYA;;AAKA;;AACA;;AACA;;AACA;;AAGA,IAAMA,OAAO,GAAG;AAEdC,EAAAA,MAAM,EAAE;AACNC,IAAAA,UAAU,EAAEC,oBADN;AAENC,IAAAA,QAAQ,EAAE,oBAAM;AACd,YAAM,IAAIC,KAAJ,CAAU,iBAAV,CAAN;AACD;AAJK,GAFM;AAQdC,EAAAA,KAAK,EAALA,cARc;AASdC,EAAAA,GAAG,EAAHA,YATc;AAUd,gBAAcC;AAVA,CAAhB;AAcO,IAAMC,2BAAoE,GAAG;AAClFC,EAAAA,YAAY,EAAE,IAAIC,0BAAJ,EADoE;AAElFC,EAAAA,IAAI,EAAE,IAAIC,4BAAJ,EAF4E;AAGlFC,EAAAA,MAAM,EAAE,IAAIC,8BAAJ,EAH0E;AAIlFC,EAAAA,MAAM,EAAE,IAAIC,8BAAJ,CAAsB;AAACjB,IAAAA,OAAO,EAAPA;AAAD,GAAtB,CAJ0E;AAMlFkB,EAAAA,GAAG,EAAE,IAAIC,2BAAJ,CAAmB;AAACnB,IAAAA,OAAO,EAAPA;AAAD,GAAnB,CAN6E;AAOlFoB,EAAAA,OAAO,EAAE,IAAID,2BAAJ,CAAmB;AAACnB,IAAAA,OAAO,EAAPA;AAAD,GAAnB,CAPyE;AAQlFqB,EAAAA,GAAG,EAAE,IAAIC,2BAAJ,CAAmB;AAACtB,IAAAA,OAAO,EAAPA;AAAD,GAAnB,CAR6E;AASlFuB,EAAAA,IAAI,EAAE,IAAIC,4BAAJ,CAAoB;AAACxB,IAAAA,OAAO,EAAPA;AAAD,GAApB;AAT4E,CAA7E;;;SAgBeyB,mB;;;;;mFAAf,iBAAmCC,OAAnC;AAAA;AAAA;AAAA;AAAA;AAAA;AACCC,YAAAA,YADD,GACgBC,MAAM,CAACC,MAAP,CAAcpB,2BAAd,CADhB;AAAA;AAAA,mBAEQqB,OAAO,CAACC,GAAR,CAAYJ,YAAY,CAACK,GAAb,CAAiB,UAACC,WAAD;AAAA,qBAAiBA,WAAW,CAACC,OAAZ,EAAjB;AAAA,aAAjB,CAAZ,CAFR;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAQeC,O;;;;;uEAAf,kBAAuBC,MAAvB,EAAmDC,KAAnD;AAAA;AAAA;AAAA;AAAA;AAAA;AACCJ,YAAAA,WADD,GACexB,2BAA2B,CAAC2B,MAAD,CAD1C;;AAAA,gBAEAH,WAFA;AAAA;AAAA;AAAA;;AAAA,kBAGG,IAAI5B,KAAJ,gDAAkD+B,MAAlD,EAHH;;AAAA;AAKCE,YAAAA,gBALD,GAKoB,gCAAcD,KAAd,CALpB;AAAA;AAAA,mBAM+BJ,WAAW,CAAC7B,QAAZ,CAAqBkC,gBAArB,CAN/B;;AAAA;AAMCC,YAAAA,qBAND;AAAA,8CAOE,2BAASA,qBAAT,CAPF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAaerC,U;;;;;0EAAf,kBACLkC,MADK,EAELC,KAFK,EAGLG,IAHK;AAAA;AAAA;AAAA;AAAA;AAAA;AAKCP,YAAAA,WALD,GAKexB,2BAA2B,CAAC2B,MAAD,CAL1C;;AAAA,gBAMAH,WANA;AAAA;AAAA;AAAA;;AAAA,kBAOG,IAAI5B,KAAJ,gDAAkD+B,MAAlD,EAPH;;AAAA;AASCE,YAAAA,gBATD,GASoB,gCAAcD,KAAd,CATpB;AAAA;AAAA,mBAU+BJ,WAAW,CAAC/B,UAAZ,CAAuBoC,gBAAvB,EAAyCE,IAAzC,CAV/B;;AAAA;AAUCD,YAAAA,qBAVD;AAAA,8CAWE,2BAASA,qBAAT,CAXF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;AAiBA,SAASE,OAAT,CAAiBL,MAAjB,EAA6CC,KAA7C,EAA4DG,IAA5D,EAAkF;AACvF,MAAI,EAAEJ,MAAM,IAAI3B,2BAAZ,CAAJ,EAA8C;AAC5C,UAAM,IAAIJ,KAAJ,uCAAyC+B,MAAzC,EAAN;AACD;;AAED,SAAO3B,2BAA2B,CAAC2B,MAAD,CAA3B,CAAoCK,OAApC,CAA4CJ,KAA5C,EAAmDG,IAAnD,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\n// Forked from https://github.com/ironSource/parquetjs under MIT license\n\nimport {\n Compression,\n NoCompression,\n GZipCompression,\n SnappyCompression,\n BrotliCompression,\n LZOCompression,\n LZ4Compression,\n ZstdCompression\n} from '@loaders.gl/compression';\n\nimport {ParquetCompression} from './schema/declare';\nimport {toArrayBuffer, toBuffer} from './utils/buffer-utils';\n\n// TODO switch to worker compression to avoid bundling...\n\n// import brotli from 'brotli'; - brotli has problems with decompress in browsers\nimport brotliDecompress from 'brotli/decompress';\nimport lz4js from 'lz4js';\nimport lzo from 'lzo';\nimport {ZstdCodec} from 'zstd-codec';\n\n// Inject large dependencies through Compression constructor options\nconst modules = {\n // brotli has problems with decompress in browsers\n brotli: {\n decompress: brotliDecompress,\n compress: () => {\n throw new Error('brotli compress');\n }\n },\n lz4js,\n lzo,\n 'zstd-codec': ZstdCodec\n};\n\n// See https://github.com/apache/parquet-format/blob/master/Compression.md\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression> = {\n UNCOMPRESSED: new NoCompression(),\n GZIP: new GZipCompression(),\n SNAPPY: new SnappyCompression(),\n BROTLI: new BrotliCompression({modules}),\n // TODO: Understand difference between LZ4 and LZ4_RAW\n LZ4: new LZ4Compression({modules}),\n LZ4_RAW: new LZ4Compression({modules}),\n LZO: new LZOCompression({modules}),\n ZSTD: new ZstdCompression({modules})\n};\n\n/**\n * Register compressions that have big external libraries\n * @param options.modules External library dependencies\n */\nexport async function preloadCompressions(options?: {modules: {[key: string]: any}}) {\n const compressions = Object.values(PARQUET_COMPRESSION_METHODS);\n return await Promise.all(compressions.map((compression) => compression.preload()));\n}\n\n/**\n * Deflate a value using compression method `method`\n */\nexport async function deflate(method: ParquetCompression, value: Buffer): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.compress(inputArrayBuffer);\n return toBuffer(compressedArrayBuffer);\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport async function decompress(\n method: ParquetCompression,\n value: Buffer,\n size: number\n): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);\n return toBuffer(compressedArrayBuffer);\n}\n\n/*\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n // @ts-ignore\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\n/*\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n*/\n"],"file":"compression.js"}
|
|
@@ -29,15 +29,17 @@ var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/de
|
|
|
29
29
|
|
|
30
30
|
var _stream = require("stream");
|
|
31
31
|
|
|
32
|
-
var _codecs = require("
|
|
32
|
+
var _codecs = require("../codecs");
|
|
33
33
|
|
|
34
|
-
var Compression = _interopRequireWildcard(require("
|
|
34
|
+
var Compression = _interopRequireWildcard(require("../compression"));
|
|
35
35
|
|
|
36
|
-
var Shred = _interopRequireWildcard(require("
|
|
36
|
+
var Shred = _interopRequireWildcard(require("../schema/shred"));
|
|
37
37
|
|
|
38
|
-
var _parquetThrift = require("
|
|
38
|
+
var _parquetThrift = require("../parquet-thrift");
|
|
39
39
|
|
|
40
|
-
var
|
|
40
|
+
var _fileUtils = require("../utils/file-utils");
|
|
41
|
+
|
|
42
|
+
var _readUtils = require("../utils/read-utils");
|
|
41
43
|
|
|
42
44
|
var _nodeInt = _interopRequireDefault(require("node-int64"));
|
|
43
45
|
|
|
@@ -77,25 +79,58 @@ var ParquetWriter = function () {
|
|
|
77
79
|
this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
|
|
78
80
|
this.closed = false;
|
|
79
81
|
this.userMetadata = {};
|
|
80
|
-
|
|
81
|
-
try {
|
|
82
|
-
envelopeWriter.writeHeader();
|
|
83
|
-
} catch (err) {
|
|
84
|
-
envelopeWriter.close();
|
|
85
|
-
throw err;
|
|
86
|
-
}
|
|
82
|
+
this.writeHeader();
|
|
87
83
|
}
|
|
88
84
|
|
|
89
85
|
(0, _createClass2.default)(ParquetWriter, [{
|
|
90
|
-
key: "
|
|
86
|
+
key: "writeHeader",
|
|
91
87
|
value: function () {
|
|
92
|
-
var
|
|
88
|
+
var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
|
|
93
89
|
return _regenerator.default.wrap(function _callee$(_context) {
|
|
94
90
|
while (1) {
|
|
95
91
|
switch (_context.prev = _context.next) {
|
|
92
|
+
case 0:
|
|
93
|
+
_context.prev = 0;
|
|
94
|
+
_context.next = 3;
|
|
95
|
+
return this.envelopeWriter.writeHeader();
|
|
96
|
+
|
|
97
|
+
case 3:
|
|
98
|
+
_context.next = 10;
|
|
99
|
+
break;
|
|
100
|
+
|
|
101
|
+
case 5:
|
|
102
|
+
_context.prev = 5;
|
|
103
|
+
_context.t0 = _context["catch"](0);
|
|
104
|
+
_context.next = 9;
|
|
105
|
+
return this.envelopeWriter.close();
|
|
106
|
+
|
|
107
|
+
case 9:
|
|
108
|
+
throw _context.t0;
|
|
109
|
+
|
|
110
|
+
case 10:
|
|
111
|
+
case "end":
|
|
112
|
+
return _context.stop();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}, _callee, this, [[0, 5]]);
|
|
116
|
+
}));
|
|
117
|
+
|
|
118
|
+
function writeHeader() {
|
|
119
|
+
return _writeHeader.apply(this, arguments);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return writeHeader;
|
|
123
|
+
}()
|
|
124
|
+
}, {
|
|
125
|
+
key: "appendRow",
|
|
126
|
+
value: function () {
|
|
127
|
+
var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
|
|
128
|
+
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
129
|
+
while (1) {
|
|
130
|
+
switch (_context2.prev = _context2.next) {
|
|
96
131
|
case 0:
|
|
97
132
|
if (!this.closed) {
|
|
98
|
-
|
|
133
|
+
_context2.next = 2;
|
|
99
134
|
break;
|
|
100
135
|
}
|
|
101
136
|
|
|
@@ -110,10 +145,10 @@ var ParquetWriter = function () {
|
|
|
110
145
|
|
|
111
146
|
case 4:
|
|
112
147
|
case "end":
|
|
113
|
-
return
|
|
148
|
+
return _context2.stop();
|
|
114
149
|
}
|
|
115
150
|
}
|
|
116
|
-
},
|
|
151
|
+
}, _callee2, this);
|
|
117
152
|
}));
|
|
118
153
|
|
|
119
154
|
function appendRow(_x) {
|
|
@@ -125,13 +160,13 @@ var ParquetWriter = function () {
|
|
|
125
160
|
}, {
|
|
126
161
|
key: "close",
|
|
127
162
|
value: function () {
|
|
128
|
-
var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
129
|
-
return _regenerator.default.wrap(function
|
|
163
|
+
var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
|
|
164
|
+
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
130
165
|
while (1) {
|
|
131
|
-
switch (
|
|
166
|
+
switch (_context3.prev = _context3.next) {
|
|
132
167
|
case 0:
|
|
133
168
|
if (!this.closed) {
|
|
134
|
-
|
|
169
|
+
_context3.next = 2;
|
|
135
170
|
break;
|
|
136
171
|
}
|
|
137
172
|
|
|
@@ -144,11 +179,11 @@ var ParquetWriter = function () {
|
|
|
144
179
|
this.rowBuffer = {};
|
|
145
180
|
}
|
|
146
181
|
|
|
147
|
-
|
|
182
|
+
_context3.next = 6;
|
|
148
183
|
return this.envelopeWriter.writeFooter(this.userMetadata);
|
|
149
184
|
|
|
150
185
|
case 6:
|
|
151
|
-
|
|
186
|
+
_context3.next = 8;
|
|
152
187
|
return this.envelopeWriter.close();
|
|
153
188
|
|
|
154
189
|
case 8:
|
|
@@ -158,10 +193,10 @@ var ParquetWriter = function () {
|
|
|
158
193
|
|
|
159
194
|
case 9:
|
|
160
195
|
case "end":
|
|
161
|
-
return
|
|
196
|
+
return _context3.stop();
|
|
162
197
|
}
|
|
163
198
|
}
|
|
164
|
-
},
|
|
199
|
+
}, _callee3, this);
|
|
165
200
|
}));
|
|
166
201
|
|
|
167
202
|
function close(_x2) {
|
|
@@ -188,25 +223,25 @@ var ParquetWriter = function () {
|
|
|
188
223
|
}], [{
|
|
189
224
|
key: "openFile",
|
|
190
225
|
value: function () {
|
|
191
|
-
var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
226
|
+
var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, path, opts) {
|
|
192
227
|
var outputStream;
|
|
193
|
-
return _regenerator.default.wrap(function
|
|
228
|
+
return _regenerator.default.wrap(function _callee4$(_context4) {
|
|
194
229
|
while (1) {
|
|
195
|
-
switch (
|
|
230
|
+
switch (_context4.prev = _context4.next) {
|
|
196
231
|
case 0:
|
|
197
|
-
|
|
198
|
-
return
|
|
232
|
+
_context4.next = 2;
|
|
233
|
+
return (0, _fileUtils.osopen)(path, opts);
|
|
199
234
|
|
|
200
235
|
case 2:
|
|
201
|
-
outputStream =
|
|
202
|
-
return
|
|
236
|
+
outputStream = _context4.sent;
|
|
237
|
+
return _context4.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
|
|
203
238
|
|
|
204
239
|
case 4:
|
|
205
240
|
case "end":
|
|
206
|
-
return
|
|
241
|
+
return _context4.stop();
|
|
207
242
|
}
|
|
208
243
|
}
|
|
209
|
-
},
|
|
244
|
+
}, _callee4);
|
|
210
245
|
}));
|
|
211
246
|
|
|
212
247
|
function openFile(_x3, _x4, _x5) {
|
|
@@ -218,29 +253,29 @@ var ParquetWriter = function () {
|
|
|
218
253
|
}, {
|
|
219
254
|
key: "openStream",
|
|
220
255
|
value: function () {
|
|
221
|
-
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
256
|
+
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream, opts) {
|
|
222
257
|
var envelopeWriter;
|
|
223
|
-
return _regenerator.default.wrap(function
|
|
258
|
+
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
224
259
|
while (1) {
|
|
225
|
-
switch (
|
|
260
|
+
switch (_context5.prev = _context5.next) {
|
|
226
261
|
case 0:
|
|
227
262
|
if (!opts) {
|
|
228
263
|
opts = {};
|
|
229
264
|
}
|
|
230
265
|
|
|
231
|
-
|
|
266
|
+
_context5.next = 3;
|
|
232
267
|
return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
233
268
|
|
|
234
269
|
case 3:
|
|
235
|
-
envelopeWriter =
|
|
236
|
-
return
|
|
270
|
+
envelopeWriter = _context5.sent;
|
|
271
|
+
return _context5.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
|
|
237
272
|
|
|
238
273
|
case 5:
|
|
239
274
|
case "end":
|
|
240
|
-
return
|
|
275
|
+
return _context5.stop();
|
|
241
276
|
}
|
|
242
277
|
}
|
|
243
|
-
},
|
|
278
|
+
}, _callee5);
|
|
244
279
|
}));
|
|
245
280
|
|
|
246
281
|
function openStream(_x6, _x7, _x8) {
|
|
@@ -290,27 +325,35 @@ var ParquetEnvelopeWriter = function () {
|
|
|
290
325
|
}, {
|
|
291
326
|
key: "writeRowGroup",
|
|
292
327
|
value: function () {
|
|
293
|
-
var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
328
|
+
var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(records) {
|
|
294
329
|
var rgroup;
|
|
295
|
-
return _regenerator.default.wrap(function
|
|
330
|
+
return _regenerator.default.wrap(function _callee6$(_context6) {
|
|
296
331
|
while (1) {
|
|
297
|
-
switch (
|
|
332
|
+
switch (_context6.prev = _context6.next) {
|
|
298
333
|
case 0:
|
|
299
|
-
|
|
334
|
+
_context6.next = 2;
|
|
335
|
+
return encodeRowGroup(this.schema, records, {
|
|
300
336
|
baseOffset: this.offset,
|
|
301
337
|
pageSize: this.pageSize,
|
|
302
338
|
useDataPageV2: this.useDataPageV2
|
|
303
339
|
});
|
|
340
|
+
|
|
341
|
+
case 2:
|
|
342
|
+
rgroup = _context6.sent;
|
|
304
343
|
this.rowCount += records.rowCount;
|
|
305
344
|
this.rowGroups.push(rgroup.metadata);
|
|
306
|
-
|
|
345
|
+
_context6.next = 7;
|
|
346
|
+
return this.writeSection(rgroup.body);
|
|
307
347
|
|
|
308
|
-
case
|
|
348
|
+
case 7:
|
|
349
|
+
return _context6.abrupt("return", _context6.sent);
|
|
350
|
+
|
|
351
|
+
case 8:
|
|
309
352
|
case "end":
|
|
310
|
-
return
|
|
353
|
+
return _context6.stop();
|
|
311
354
|
}
|
|
312
355
|
}
|
|
313
|
-
},
|
|
356
|
+
}, _callee6, this);
|
|
314
357
|
}));
|
|
315
358
|
|
|
316
359
|
function writeRowGroup(_x9) {
|
|
@@ -336,22 +379,22 @@ var ParquetEnvelopeWriter = function () {
|
|
|
336
379
|
}], [{
|
|
337
380
|
key: "openStream",
|
|
338
381
|
value: function () {
|
|
339
|
-
var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
382
|
+
var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(schema, outputStream, opts) {
|
|
340
383
|
var writeFn, closeFn;
|
|
341
|
-
return _regenerator.default.wrap(function
|
|
384
|
+
return _regenerator.default.wrap(function _callee7$(_context7) {
|
|
342
385
|
while (1) {
|
|
343
|
-
switch (
|
|
386
|
+
switch (_context7.prev = _context7.next) {
|
|
344
387
|
case 0:
|
|
345
|
-
writeFn =
|
|
346
|
-
closeFn =
|
|
347
|
-
return
|
|
388
|
+
writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
|
|
389
|
+
closeFn = _fileUtils.osclose.bind(undefined, outputStream);
|
|
390
|
+
return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
|
|
348
391
|
|
|
349
392
|
case 3:
|
|
350
393
|
case "end":
|
|
351
|
-
return
|
|
394
|
+
return _context7.stop();
|
|
352
395
|
}
|
|
353
396
|
}
|
|
354
|
-
},
|
|
397
|
+
}, _callee7);
|
|
355
398
|
}));
|
|
356
399
|
|
|
357
400
|
function openStream(_x10, _x11, _x12) {
|
|
@@ -383,19 +426,19 @@ var ParquetTransformer = function (_Transform) {
|
|
|
383
426
|
|
|
384
427
|
var writeProxy = function (t) {
|
|
385
428
|
return function () {
|
|
386
|
-
var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
387
|
-
return _regenerator.default.wrap(function
|
|
429
|
+
var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(b) {
|
|
430
|
+
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
388
431
|
while (1) {
|
|
389
|
-
switch (
|
|
432
|
+
switch (_context8.prev = _context8.next) {
|
|
390
433
|
case 0:
|
|
391
434
|
t.push(b);
|
|
392
435
|
|
|
393
436
|
case 1:
|
|
394
437
|
case "end":
|
|
395
|
-
return
|
|
438
|
+
return _context8.stop();
|
|
396
439
|
}
|
|
397
440
|
}
|
|
398
|
-
},
|
|
441
|
+
}, _callee8);
|
|
399
442
|
}));
|
|
400
443
|
|
|
401
444
|
return function (_x13) {
|
|
@@ -404,16 +447,16 @@ var ParquetTransformer = function (_Transform) {
|
|
|
404
447
|
}();
|
|
405
448
|
}((0, _assertThisInitialized2.default)(_this));
|
|
406
449
|
|
|
407
|
-
_this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
408
|
-
return _regenerator.default.wrap(function
|
|
450
|
+
_this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
|
|
451
|
+
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
409
452
|
while (1) {
|
|
410
|
-
switch (
|
|
453
|
+
switch (_context9.prev = _context9.next) {
|
|
411
454
|
case 0:
|
|
412
455
|
case "end":
|
|
413
|
-
return
|
|
456
|
+
return _context9.stop();
|
|
414
457
|
}
|
|
415
458
|
}
|
|
416
|
-
},
|
|
459
|
+
}, _callee9);
|
|
417
460
|
})), 0, opts), opts);
|
|
418
461
|
return _this;
|
|
419
462
|
}
|
|
@@ -431,20 +474,20 @@ var ParquetTransformer = function (_Transform) {
|
|
|
431
474
|
}, {
|
|
432
475
|
key: "_flush",
|
|
433
476
|
value: function () {
|
|
434
|
-
var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
435
|
-
return _regenerator.default.wrap(function
|
|
477
|
+
var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(callback) {
|
|
478
|
+
return _regenerator.default.wrap(function _callee10$(_context10) {
|
|
436
479
|
while (1) {
|
|
437
|
-
switch (
|
|
480
|
+
switch (_context10.prev = _context10.next) {
|
|
438
481
|
case 0:
|
|
439
|
-
|
|
482
|
+
_context10.next = 2;
|
|
440
483
|
return this.writer.close(callback);
|
|
441
484
|
|
|
442
485
|
case 2:
|
|
443
486
|
case "end":
|
|
444
|
-
return
|
|
487
|
+
return _context10.stop();
|
|
445
488
|
}
|
|
446
489
|
}
|
|
447
|
-
},
|
|
490
|
+
}, _callee10, this);
|
|
448
491
|
}));
|
|
449
492
|
|
|
450
493
|
function _flush(_x14) {
|
|
@@ -467,174 +510,300 @@ function encodeValues(type, encoding, values, opts) {
|
|
|
467
510
|
return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
|
|
468
511
|
}
|
|
469
512
|
|
|
470
|
-
function encodeDataPage(
|
|
471
|
-
|
|
513
|
+
function encodeDataPage(_x15, _x16) {
|
|
514
|
+
return _encodeDataPage.apply(this, arguments);
|
|
515
|
+
}
|
|
472
516
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
517
|
+
function _encodeDataPage() {
|
|
518
|
+
_encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(column, data) {
|
|
519
|
+
var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
|
|
520
|
+
return _regenerator.default.wrap(function _callee11$(_context11) {
|
|
521
|
+
while (1) {
|
|
522
|
+
switch (_context11.prev = _context11.next) {
|
|
523
|
+
case 0:
|
|
524
|
+
rLevelsBuf = Buffer.alloc(0);
|
|
525
|
+
|
|
526
|
+
if (column.rLevelMax > 0) {
|
|
527
|
+
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
528
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
|
|
529
|
+
});
|
|
530
|
+
}
|
|
478
531
|
|
|
479
|
-
|
|
532
|
+
dLevelsBuf = Buffer.alloc(0);
|
|
480
533
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
534
|
+
if (column.dLevelMax > 0) {
|
|
535
|
+
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
536
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
|
|
537
|
+
});
|
|
538
|
+
}
|
|
486
539
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
540
|
+
valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
541
|
+
typeLength: column.typeLength,
|
|
542
|
+
bitWidth: column.typeLength
|
|
543
|
+
});
|
|
544
|
+
dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
545
|
+
_context11.next = 8;
|
|
546
|
+
return Compression.deflate(column.compression, dataBuf);
|
|
547
|
+
|
|
548
|
+
case 8:
|
|
549
|
+
compressedBuf = _context11.sent;
|
|
550
|
+
header = new _parquetThrift.PageHeader({
|
|
551
|
+
type: _parquetThrift.PageType.DATA_PAGE,
|
|
552
|
+
data_page_header: new _parquetThrift.DataPageHeader({
|
|
553
|
+
num_values: data.count,
|
|
554
|
+
encoding: _parquetThrift.Encoding[column.encoding],
|
|
555
|
+
definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
|
|
556
|
+
repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
|
|
557
|
+
}),
|
|
558
|
+
uncompressed_page_size: dataBuf.length,
|
|
559
|
+
compressed_page_size: compressedBuf.length
|
|
560
|
+
});
|
|
561
|
+
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
562
|
+
page = Buffer.concat([headerBuf, compressedBuf]);
|
|
563
|
+
return _context11.abrupt("return", {
|
|
564
|
+
header: header,
|
|
565
|
+
headerSize: headerBuf.length,
|
|
566
|
+
page: page
|
|
567
|
+
});
|
|
568
|
+
|
|
569
|
+
case 13:
|
|
570
|
+
case "end":
|
|
571
|
+
return _context11.stop();
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
}, _callee11);
|
|
575
|
+
}));
|
|
576
|
+
return _encodeDataPage.apply(this, arguments);
|
|
511
577
|
}
|
|
512
578
|
|
|
513
|
-
function encodeDataPageV2(
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
bitWidth: column.typeLength
|
|
517
|
-
});
|
|
518
|
-
var compressedBuf = Compression.deflate(column.compression, valuesBuf);
|
|
519
|
-
var rLevelsBuf = Buffer.alloc(0);
|
|
579
|
+
function encodeDataPageV2(_x17, _x18, _x19) {
|
|
580
|
+
return _encodeDataPageV.apply(this, arguments);
|
|
581
|
+
}
|
|
520
582
|
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
583
|
+
function _encodeDataPageV() {
|
|
584
|
+
_encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(column, data, rowCount) {
|
|
585
|
+
var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
|
|
586
|
+
return _regenerator.default.wrap(function _callee12$(_context12) {
|
|
587
|
+
while (1) {
|
|
588
|
+
switch (_context12.prev = _context12.next) {
|
|
589
|
+
case 0:
|
|
590
|
+
valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
591
|
+
typeLength: column.typeLength,
|
|
592
|
+
bitWidth: column.typeLength
|
|
593
|
+
});
|
|
594
|
+
_context12.next = 3;
|
|
595
|
+
return Compression.deflate(column.compression, valuesBuf);
|
|
596
|
+
|
|
597
|
+
case 3:
|
|
598
|
+
compressedBuf = _context12.sent;
|
|
599
|
+
rLevelsBuf = Buffer.alloc(0);
|
|
600
|
+
|
|
601
|
+
if (column.rLevelMax > 0) {
|
|
602
|
+
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
603
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
|
|
604
|
+
disableEnvelope: true
|
|
605
|
+
});
|
|
606
|
+
}
|
|
527
607
|
|
|
528
|
-
|
|
608
|
+
dLevelsBuf = Buffer.alloc(0);
|
|
529
609
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
610
|
+
if (column.dLevelMax > 0) {
|
|
611
|
+
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
612
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
|
|
613
|
+
disableEnvelope: true
|
|
614
|
+
});
|
|
615
|
+
}
|
|
536
616
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
617
|
+
header = new _parquetThrift.PageHeader({
|
|
618
|
+
type: _parquetThrift.PageType.DATA_PAGE_V2,
|
|
619
|
+
data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
|
|
620
|
+
num_values: data.count,
|
|
621
|
+
num_nulls: data.count - data.values.length,
|
|
622
|
+
num_rows: rowCount,
|
|
623
|
+
encoding: _parquetThrift.Encoding[column.encoding],
|
|
624
|
+
definition_levels_byte_length: dLevelsBuf.length,
|
|
625
|
+
repetition_levels_byte_length: rLevelsBuf.length,
|
|
626
|
+
is_compressed: column.compression !== 'UNCOMPRESSED'
|
|
627
|
+
}),
|
|
628
|
+
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
629
|
+
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
630
|
+
});
|
|
631
|
+
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
632
|
+
page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
633
|
+
return _context12.abrupt("return", {
|
|
634
|
+
header: header,
|
|
635
|
+
headerSize: headerBuf.length,
|
|
636
|
+
page: page
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
case 12:
|
|
640
|
+
case "end":
|
|
641
|
+
return _context12.stop();
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}, _callee12);
|
|
645
|
+
}));
|
|
646
|
+
return _encodeDataPageV.apply(this, arguments);
|
|
558
647
|
}
|
|
559
648
|
|
|
560
|
-
function encodeColumnChunk(
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
var pageBuf;
|
|
564
|
-
var total_uncompressed_size = 0;
|
|
565
|
-
var total_compressed_size = 0;
|
|
566
|
-
{
|
|
567
|
-
var result;
|
|
649
|
+
function encodeColumnChunk(_x20, _x21, _x22, _x23) {
|
|
650
|
+
return _encodeColumnChunk.apply(this, arguments);
|
|
651
|
+
}
|
|
568
652
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
653
|
+
function _encodeColumnChunk() {
|
|
654
|
+
_encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(column, buffer, offset, opts) {
|
|
655
|
+
var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
|
|
656
|
+
return _regenerator.default.wrap(function _callee13$(_context13) {
|
|
657
|
+
while (1) {
|
|
658
|
+
switch (_context13.prev = _context13.next) {
|
|
659
|
+
case 0:
|
|
660
|
+
data = buffer.columnData[column.path.join()];
|
|
661
|
+
baseOffset = (opts.baseOffset || 0) + offset;
|
|
662
|
+
total_uncompressed_size = 0;
|
|
663
|
+
total_compressed_size = 0;
|
|
664
|
+
|
|
665
|
+
if (!opts.useDataPageV2) {
|
|
666
|
+
_context13.next = 10;
|
|
667
|
+
break;
|
|
668
|
+
}
|
|
574
669
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
670
|
+
_context13.next = 7;
|
|
671
|
+
return encodeDataPageV2(column, data, buffer.rowCount);
|
|
672
|
+
|
|
673
|
+
case 7:
|
|
674
|
+
_context13.t0 = _context13.sent;
|
|
675
|
+
_context13.next = 13;
|
|
676
|
+
break;
|
|
677
|
+
|
|
678
|
+
case 10:
|
|
679
|
+
_context13.next = 12;
|
|
680
|
+
return encodeDataPage(column, data);
|
|
681
|
+
|
|
682
|
+
case 12:
|
|
683
|
+
_context13.t0 = _context13.sent;
|
|
684
|
+
|
|
685
|
+
case 13:
|
|
686
|
+
result = _context13.t0;
|
|
687
|
+
pageBuf = result.page;
|
|
688
|
+
total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
|
|
689
|
+
total_compressed_size += result.header.compressed_page_size + result.headerSize;
|
|
690
|
+
metadata = new _parquetThrift.ColumnMetaData({
|
|
691
|
+
path_in_schema: column.path,
|
|
692
|
+
num_values: data.count,
|
|
693
|
+
data_page_offset: baseOffset,
|
|
694
|
+
encodings: [],
|
|
695
|
+
total_uncompressed_size: total_uncompressed_size,
|
|
696
|
+
total_compressed_size: total_compressed_size,
|
|
697
|
+
type: _parquetThrift.Type[column.primitiveType],
|
|
698
|
+
codec: _parquetThrift.CompressionCodec[column.compression]
|
|
699
|
+
});
|
|
700
|
+
metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
|
|
701
|
+
metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
|
|
702
|
+
metadataOffset = baseOffset + pageBuf.length;
|
|
703
|
+
body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
|
|
704
|
+
return _context13.abrupt("return", {
|
|
705
|
+
body: body,
|
|
706
|
+
metadata: metadata,
|
|
707
|
+
metadataOffset: metadataOffset
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
case 23:
|
|
711
|
+
case "end":
|
|
712
|
+
return _context13.stop();
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
}, _callee13);
|
|
716
|
+
}));
|
|
717
|
+
return _encodeColumnChunk.apply(this, arguments);
|
|
598
718
|
}
|
|
599
719
|
|
|
600
|
-
function encodeRowGroup(
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
columns: [],
|
|
604
|
-
total_byte_size: 0
|
|
605
|
-
});
|
|
606
|
-
var body = Buffer.alloc(0);
|
|
720
|
+
function encodeRowGroup(_x24, _x25, _x26) {
|
|
721
|
+
return _encodeRowGroup.apply(this, arguments);
|
|
722
|
+
}
|
|
607
723
|
|
|
608
|
-
|
|
609
|
-
|
|
724
|
+
function _encodeRowGroup() {
|
|
725
|
+
_encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14(schema, data, opts) {
|
|
726
|
+
var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
|
|
727
|
+
|
|
728
|
+
return _regenerator.default.wrap(function _callee14$(_context14) {
|
|
729
|
+
while (1) {
|
|
730
|
+
switch (_context14.prev = _context14.next) {
|
|
731
|
+
case 0:
|
|
732
|
+
metadata = new _parquetThrift.RowGroup({
|
|
733
|
+
num_rows: data.rowCount,
|
|
734
|
+
columns: [],
|
|
735
|
+
total_byte_size: 0
|
|
736
|
+
});
|
|
737
|
+
body = Buffer.alloc(0);
|
|
738
|
+
_iterator2 = _createForOfIteratorHelper(schema.fieldList);
|
|
739
|
+
_context14.prev = 3;
|
|
740
|
+
|
|
741
|
+
_iterator2.s();
|
|
742
|
+
|
|
743
|
+
case 5:
|
|
744
|
+
if ((_step2 = _iterator2.n()).done) {
|
|
745
|
+
_context14.next = 18;
|
|
746
|
+
break;
|
|
747
|
+
}
|
|
610
748
|
|
|
611
|
-
|
|
612
|
-
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
613
|
-
var field = _step.value;
|
|
749
|
+
field = _step2.value;
|
|
614
750
|
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
751
|
+
if (!field.isNested) {
|
|
752
|
+
_context14.next = 9;
|
|
753
|
+
break;
|
|
754
|
+
}
|
|
618
755
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
756
|
+
return _context14.abrupt("continue", 16);
|
|
757
|
+
|
|
758
|
+
case 9:
|
|
759
|
+
_context14.next = 11;
|
|
760
|
+
return encodeColumnChunk(field, data, body.length, opts);
|
|
761
|
+
|
|
762
|
+
case 11:
|
|
763
|
+
cchunkData = _context14.sent;
|
|
764
|
+
cchunk = new _parquetThrift.ColumnChunk({
|
|
765
|
+
file_offset: cchunkData.metadataOffset,
|
|
766
|
+
meta_data: cchunkData.metadata
|
|
767
|
+
});
|
|
768
|
+
metadata.columns.push(cchunk);
|
|
769
|
+
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
770
|
+
body = Buffer.concat([body, cchunkData.body]);
|
|
633
771
|
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
772
|
+
case 16:
|
|
773
|
+
_context14.next = 5;
|
|
774
|
+
break;
|
|
775
|
+
|
|
776
|
+
case 18:
|
|
777
|
+
_context14.next = 23;
|
|
778
|
+
break;
|
|
779
|
+
|
|
780
|
+
case 20:
|
|
781
|
+
_context14.prev = 20;
|
|
782
|
+
_context14.t0 = _context14["catch"](3);
|
|
783
|
+
|
|
784
|
+
_iterator2.e(_context14.t0);
|
|
785
|
+
|
|
786
|
+
case 23:
|
|
787
|
+
_context14.prev = 23;
|
|
788
|
+
|
|
789
|
+
_iterator2.f();
|
|
790
|
+
|
|
791
|
+
return _context14.finish(23);
|
|
792
|
+
|
|
793
|
+
case 26:
|
|
794
|
+
return _context14.abrupt("return", {
|
|
795
|
+
body: body,
|
|
796
|
+
metadata: metadata
|
|
797
|
+
});
|
|
798
|
+
|
|
799
|
+
case 27:
|
|
800
|
+
case "end":
|
|
801
|
+
return _context14.stop();
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
}, _callee14, null, [[3, 20, 23, 26]]);
|
|
805
|
+
}));
|
|
806
|
+
return _encodeRowGroup.apply(this, arguments);
|
|
638
807
|
}
|
|
639
808
|
|
|
640
809
|
function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
@@ -665,12 +834,12 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
665
834
|
metadata.schema.push(schemaRoot);
|
|
666
835
|
}
|
|
667
836
|
|
|
668
|
-
var
|
|
669
|
-
|
|
837
|
+
var _iterator = _createForOfIteratorHelper(schema.fieldList),
|
|
838
|
+
_step;
|
|
670
839
|
|
|
671
840
|
try {
|
|
672
|
-
for (
|
|
673
|
-
var field =
|
|
841
|
+
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
842
|
+
var field = _step.value;
|
|
674
843
|
var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
|
|
675
844
|
var schemaElem = new _parquetThrift.SchemaElement({
|
|
676
845
|
name: field.name,
|
|
@@ -691,12 +860,12 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
691
860
|
metadata.schema.push(schemaElem);
|
|
692
861
|
}
|
|
693
862
|
} catch (err) {
|
|
694
|
-
|
|
863
|
+
_iterator.e(err);
|
|
695
864
|
} finally {
|
|
696
|
-
|
|
865
|
+
_iterator.f();
|
|
697
866
|
}
|
|
698
867
|
|
|
699
|
-
var metadataEncoded =
|
|
868
|
+
var metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
|
|
700
869
|
var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
|
|
701
870
|
metadataEncoded.copy(footerEncoded);
|
|
702
871
|
footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
|