@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/dist.min.js +7 -18
  2. package/dist/dist.min.js.map +1 -1
  3. package/dist/es5/bundle.js +2 -4
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +17 -0
  6. package/dist/es5/constants.js.map +1 -0
  7. package/dist/es5/index.js +53 -21
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +82 -0
  10. package/dist/es5/lib/convert-schema.js.map +1 -0
  11. package/dist/es5/lib/parse-parquet.js +173 -0
  12. package/dist/es5/lib/parse-parquet.js.map +1 -0
  13. package/dist/es5/lib/read-array-buffer.js +53 -0
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +6 -79
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +1 -1
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  21. package/dist/es5/parquetjs/codecs/index.js +10 -0
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +2 -2
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +138 -104
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  29. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  30. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  31. package/dist/es5/parquetjs/parser/decoders.js +495 -0
  32. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  33. package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
  34. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  35. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
  36. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  37. package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
  38. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  39. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  40. package/dist/es5/parquetjs/schema/schema.js +2 -0
  41. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  42. package/dist/es5/parquetjs/schema/shred.js +2 -1
  43. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  44. package/dist/es5/parquetjs/schema/types.js +79 -4
  45. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  46. package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
  47. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  48. package/dist/es5/parquetjs/utils/file-utils.js +108 -0
  49. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  50. package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
  51. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  52. package/dist/esm/bundle.js +2 -4
  53. package/dist/esm/bundle.js.map +1 -1
  54. package/dist/esm/constants.js +6 -0
  55. package/dist/esm/constants.js.map +1 -0
  56. package/dist/esm/index.js +14 -4
  57. package/dist/esm/index.js.map +1 -1
  58. package/dist/esm/lib/convert-schema.js +71 -0
  59. package/dist/esm/lib/convert-schema.js.map +1 -0
  60. package/dist/esm/lib/parse-parquet.js +28 -0
  61. package/dist/esm/lib/parse-parquet.js.map +1 -0
  62. package/dist/esm/lib/read-array-buffer.js +9 -0
  63. package/dist/esm/lib/read-array-buffer.js.map +1 -0
  64. package/dist/esm/parquet-loader.js +4 -24
  65. package/dist/esm/parquet-loader.js.map +1 -1
  66. package/dist/esm/parquet-writer.js +1 -1
  67. package/dist/esm/parquet-writer.js.map +1 -1
  68. package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
  69. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  70. package/dist/esm/parquetjs/codecs/index.js +9 -0
  71. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  72. package/dist/esm/parquetjs/codecs/rle.js +2 -2
  73. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  74. package/dist/esm/parquetjs/compression.js +54 -105
  75. package/dist/esm/parquetjs/compression.js.map +1 -1
  76. package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
  77. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  78. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  79. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  80. package/dist/esm/parquetjs/parser/decoders.js +300 -0
  81. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  82. package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
  83. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  84. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
  85. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  86. package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
  87. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  88. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  89. package/dist/esm/parquetjs/schema/schema.js +2 -0
  90. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  91. package/dist/esm/parquetjs/schema/shred.js +2 -1
  92. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  93. package/dist/esm/parquetjs/schema/types.js +78 -4
  94. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  95. package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
  96. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  97. package/dist/esm/parquetjs/utils/file-utils.js +79 -0
  98. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  99. package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
  100. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  101. package/dist/parquet-worker.js +7 -18
  102. package/dist/parquet-worker.js.map +1 -1
  103. package/package.json +10 -10
  104. package/src/bundle.ts +2 -3
  105. package/src/constants.ts +17 -0
  106. package/src/index.ts +30 -4
  107. package/src/lib/convert-schema.ts +95 -0
  108. package/src/lib/parse-parquet.ts +27 -0
  109. package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
  110. package/src/parquet-loader.ts +4 -24
  111. package/src/parquetjs/codecs/dictionary.ts +11 -0
  112. package/src/parquetjs/codecs/index.ts +13 -0
  113. package/src/parquetjs/codecs/rle.ts +4 -2
  114. package/src/parquetjs/compression.ts +89 -50
  115. package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
  116. package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
  117. package/src/parquetjs/parser/decoders.ts +448 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +179 -0
  121. package/src/parquetjs/schema/declare.ts +48 -2
  122. package/src/parquetjs/schema/schema.ts +2 -0
  123. package/src/parquetjs/schema/shred.ts +3 -1
  124. package/src/parquetjs/schema/types.ts +82 -5
  125. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  126. package/src/parquetjs/utils/file-utils.ts +96 -0
  127. package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
  128. package/dist/dist.es5.min.js +0 -51
  129. package/dist/dist.es5.min.js.map +0 -1
  130. package/dist/es5/parquetjs/compression.ts.disabled +0 -105
  131. package/dist/es5/parquetjs/reader.js +0 -1078
  132. package/dist/es5/parquetjs/reader.js.map +0 -1
  133. package/dist/es5/parquetjs/util.js.map +0 -1
  134. package/dist/es5/parquetjs/writer.js.map +0 -1
  135. package/dist/esm/libs/read-array-buffer.ts +0 -31
  136. package/dist/esm/parquetjs/compression.ts.disabled +0 -105
  137. package/dist/esm/parquetjs/reader.js +0 -524
  138. package/dist/esm/parquetjs/reader.js.map +0 -1
  139. package/dist/esm/parquetjs/util.js.map +0 -1
  140. package/dist/esm/parquetjs/writer.js.map +0 -1
  141. package/src/libs/read-array-buffer.ts +0 -31
  142. package/src/parquetjs/compression.ts.disabled +0 -105
  143. package/src/parquetjs/reader.ts +0 -707
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/parquetjs/compression.ts"],"names":["brotli","lzo","lz4js","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","deflate","deflate_identity","inflate","inflate_identity","GZIP","deflate_gzip","inflate_gzip","SNAPPY","deflate_snappy","inflate_snappy","LZO","deflate_lzo","inflate_lzo","BROTLI","deflate_brotli","inflate_brotli","LZ4","deflate_lz4","inflate_lz4","method","value","Error","zlib","gzipSync","snappyjs","compress","Util","load","result","mode","quality","lgwin","Buffer","from","alloc","err","size","gunzipSync","uncompress","decompress","length"],"mappings":";;;;;;;;;;;;;AAGA;;AACA;;AACA;;;;;;AAEA,IAAIA,MAAJ;AACA,IAAIC,GAAJ;AACA,IAAIC,KAAJ;AAOO,IAAMC,2BAA8E,GAAG;AAC5FC,EAAAA,YAAY,EAAE;AACZC,IAAAA,OAAO,EAAEC,gBADG;AAEZC,IAAAA,OAAO,EAAEC;AAFG,GAD8E;AAK5FC,EAAAA,IAAI,EAAE;AACJJ,IAAAA,OAAO,EAAEK,YADL;AAEJH,IAAAA,OAAO,EAAEI;AAFL,GALsF;AAS5FC,EAAAA,MAAM,EAAE;AACNP,IAAAA,OAAO,EAAEQ,cADH;AAENN,IAAAA,OAAO,EAAEO;AAFH,GAToF;AAa5FC,EAAAA,GAAG,EAAE;AACHV,IAAAA,OAAO,EAAEW,WADN;AAEHT,IAAAA,OAAO,EAAEU;AAFN,GAbuF;AAiB5FC,EAAAA,MAAM,EAAE;AACNb,IAAAA,OAAO,EAAEc,cADH;AAENZ,IAAAA,OAAO,EAAEa;AAFH,GAjBoF;AAqB5FC,EAAAA,GAAG,EAAE;AACHhB,IAAAA,OAAO,EAAEiB,WADN;AAEHf,IAAAA,OAAO,EAAEgB;AAFN;AArBuF,CAAvF;;;AA8BA,SAASlB,OAAT,CAAiBmB,MAAjB,EAA6CC,KAA7C,EAAoE;AACzE,MAAI,EAAED,MAAM,IAAIrB,2BAAZ,CAAJ,EAA8C;AAC5C,UAAM,IAAIuB,KAAJ,uCAAyCF,MAAzC,EAAN;AACD;;AAED,SAAOrB,2BAA2B,CAACqB,MAAD,CAA3B,CAAoCnB,OAApC,CAA4CoB,KAA5C,CAAP;AACD;;AAED,SAASnB,gBAAT,CAA0BmB,KAA1B,EAAiD;AAC/C,SAAOA,KAAP;AACD;;AAED,SAASf,YAAT,CAAsBe,KAAtB,EAA6C;AAC3C,SAAOE,cAAKC,QAAL,CAAcH,KAAd,CAAP;AACD;;AAED,SAASZ,cAAT,CAAwBY,KAAxB,EAA+C;AAC7C,SAAOI,kBAASC,QAAT,CAAkBL,KAAlB,CAAP;AACD;;AAED,SAAST,WAAT,CAAqBS,KAArB,EAA4C;AAC1CxB,EAAAA,GAAG,GAAGA,GAAG,IAAI8B,IAAI,CAACC,IAAL,CAAU,KAAV,CAAb;AACA,SAAO/B,GAAG,CAAC6B,QAAJ,CAAaL,KAAb,CAAP;AACD;;AAED,SAASN,cAAT,CAAwBM,KAAxB,EAA+C;AAC7CzB,EAAAA,MAAM,GAAGA,MAAM,IAAI+B,IAAI,CAACC,IAAL,CAAU,QAAV,CAAnB;AACA,MAAMC,MAAM,GAAGjC,MAAM,CAAC8B,QAAP,CAAgBL,KAAhB,EAAuB;AACpCS,IAAAA,IAAI,EAAE,CAD8B;AAEpCC,IAAAA,OAAO,EAAE,CAF2B;AAGpCC,IAAAA,KAAK,EAAE;AAH6B,GAAvB,CAAf;AAKA,SAAOH,MAAM,GAAGI,MAAM,CAACC,IAAP,CAAYL,MAAZ,CAAH,GAAyBI,MAAM,CAACE,KAAP,CAAa,CAAb,CAAtC;AACD;;AAED,SAASjB,WAAT,CAAqBG,KAArB,EAA4C;AAC1CvB,EAAAA,KAAK,GAAGA,KAAK,IAAI6B,IAAI,CAACC,IAAL,CAAU,OAAV,CAAjB;;AACA,MAAI;AAMF,WAAOK,MAAM,CAACC,IAAP,CAAYpC,KAAK,CAAC4B,QAAN,CAAeL,KAAf,CAAZ,CAAP;AACD,GAPD,CAOE,OAAOe,GAAP,EAAY;AACZ,UAAMA,GAAN;AACD;AACF;;AAKM,SAASjC,OAAT,CAAiBiB,MAAjB,EAA6CC,KAA7C,EAA4DgB,IAA5D,EAAkF;AACvF,MAAI,EAAEjB,MAAM,IAAIrB,2BAAZ,CAAJ,EAA8C;AAC5C,UAAM,IAAIuB,KAAJ,uCAAyCF,MAAzC,EAAN;AACD;;AAED,SAAOrB,2BAA2B,CAACqB,MAAD,CAA3B,CAAoCjB,OAApC,CAA4CkB,KAA5C,EAAmDgB,IAAnD,CAAP;AACD;;AAED,SAASjC,gBAAT,CAA0BiB,KAA1B,EAAiD;AAC/C,SAAOA,KAAP;AACD;;AAED,SAASd,YAAT,CAAsBc,KAAtB,EAA6C;AAC3C,SAAOE,cAAKe,UAAL,CAAgBjB,KAAhB,CAAP;AACD;;AAED,SAASX,cAAT,CAAwBW,KAAxB,EAA+C;AAC7C,SAAOI,kBAASc,UAAT,CAAoBlB,KAApB,CAAP;AACD;;AAED,SAASR,WAAT,CAAqBQ,KAArB,EAAoCgB,IAApC,EAA0D;AACxDxC,EAAAA,GAAG,GAAGA,GAAG,IAAI8B,IAAI,CAACC,IAAL,CAAU,KAAV,CAAb;AACA,SAAO/B,GAAG,CAAC2C,UAAJ,CAAenB,KAAf,EAAsBgB,IAAtB,CAAP;AACD;;AAED,SAASlB,WAAT,CAAqBE,KAArB,EAAoCgB,IAApC,EAA0D;AACxDvC,EAAAA,KAAK,GAAGA,KAAK,IAAI6B,IAAI,CAACC,IAAL,CAAU,OAAV,CAAjB;;AACA,MAAI;AAMF,WAAOK,MAAM,CAACC,IAAP,CAAYpC,KAAK,CAAC0C,UAAN,CAAiBnB,KAAjB,EAAwBgB,IAAxB,CAAZ,CAAP;AACD,GAPD,CAOE,OAAOD,GAAP,EAAY;AACZ,UAAMA,GAAN;AACD;AACF;;AAED,SAASpB,cAAT,CAAwBK,KAAxB,EAA+C;AAC7CzB,EAAAA,MAAM,GAAGA,MAAM,IAAI+B,IAAI,CAACC,IAAL,CAAU,QAAV,CAAnB;;AACA,MAAI,CAACP,KAAK,CAACoB,MAAX,EAAmB;AACjB,WAAOR,MAAM,CAACE,KAAP,CAAa,CAAb,CAAP;AACD;;AACD,SAAOF,MAAM,CAACC,IAAP,CAAYtC,MAAM,CAAC4C,UAAP,CAAkBnB,KAAlB,CAAZ,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {ParquetCompression} from './schema/declare';\nimport * as Util from './util';\nimport zlib from 'zlib';\nimport snappyjs from 'snappyjs';\n\nlet brotli: any;\nlet lzo: any;\nlet lz4js: any;\n\nexport interface ParquetCompressionKit {\n deflate: (value: Buffer) => Buffer;\n inflate: (value: Buffer, size: number) => Buffer;\n}\n\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, ParquetCompressionKit> = {\n UNCOMPRESSED: {\n deflate: deflate_identity,\n inflate: inflate_identity\n },\n GZIP: {\n deflate: deflate_gzip,\n inflate: inflate_gzip\n },\n SNAPPY: {\n deflate: deflate_snappy,\n inflate: inflate_snappy\n },\n LZO: {\n deflate: deflate_lzo,\n inflate: inflate_lzo\n },\n BROTLI: {\n deflate: deflate_brotli,\n inflate: inflate_brotli\n },\n LZ4: {\n deflate: deflate_lz4,\n inflate: inflate_lz4\n }\n};\n\n/**\n * Deflate a value using compression method `method`\n */\nexport function deflate(method: ParquetCompression, value: Buffer): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n\n return PARQUET_COMPRESSION_METHODS[method].deflate(value);\n}\n\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n"],"file":"compression.js"}
1
+ {"version":3,"sources":["../../../src/parquetjs/compression.ts"],"names":["modules","brotli","decompress","brotliDecompress","compress","Error","lz4js","lzo","ZstdCodec","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","NoCompression","GZIP","GZipCompression","SNAPPY","SnappyCompression","BROTLI","BrotliCompression","LZ4","LZ4Compression","LZ4_RAW","LZO","LZOCompression","ZSTD","ZstdCompression","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","inputArrayBuffer","compressedArrayBuffer","size","inflate"],"mappings":";;;;;;;;;;;;;;;;;AAIA;;AAYA;;AAKA;;AACA;;AACA;;AACA;;AAGA,IAAMA,OAAO,GAAG;AAEdC,EAAAA,MAAM,EAAE;AACNC,IAAAA,UAAU,EAAEC,oBADN;AAENC,IAAAA,QAAQ,EAAE,oBAAM;AACd,YAAM,IAAIC,KAAJ,CAAU,iBAAV,CAAN;AACD;AAJK,GAFM;AAQdC,EAAAA,KAAK,EAALA,cARc;AASdC,EAAAA,GAAG,EAAHA,YATc;AAUd,gBAAcC;AAVA,CAAhB;AAcO,IAAMC,2BAAoE,GAAG;AAClFC,EAAAA,YAAY,EAAE,IAAIC,0BAAJ,EADoE;AAElFC,EAAAA,IAAI,EAAE,IAAIC,4BAAJ,EAF4E;AAGlFC,EAAAA,MAAM,EAAE,IAAIC,8BAAJ,EAH0E;AAIlFC,EAAAA,MAAM,EAAE,IAAIC,8BAAJ,CAAsB;AAACjB,IAAAA,OAAO,EAAPA;AAAD,GAAtB,CAJ0E;AAMlFkB,EAAAA,GAAG,EAAE,IAAIC,2BAAJ,CAAmB;AAACnB,IAAAA,OAAO,EAAPA;AAAD,GAAnB,CAN6E;AAOlFoB,EAAAA,OAAO,EAAE,IAAID,2BAAJ,CAAmB;AAACnB,IAAAA,OAAO,EAAPA;AAAD,GAAnB,CAPyE;AAQlFqB,EAAAA,GAAG,EAAE,IAAIC,2BAAJ,CAAmB;AAACtB,IAAAA,OAAO,EAAPA;AAAD,GAAnB,CAR6E;AASlFuB,EAAAA,IAAI,EAAE,IAAIC,4BAAJ,CAAoB;AAACxB,IAAAA,OAAO,EAAPA;AAAD,GAApB;AAT4E,CAA7E;;;SAgBeyB,mB;;;;;mFAAf,iBAAmCC,OAAnC;AAAA;AAAA;AAAA;AAAA;AAAA;AACCC,YAAAA,YADD,GACgBC,MAAM,CAACC,MAAP,CAAcpB,2BAAd,CADhB;AAAA;AAAA,mBAEQqB,OAAO,CAACC,GAAR,CAAYJ,YAAY,CAACK,GAAb,CAAiB,UAACC,WAAD;AAAA,qBAAiBA,WAAW,CAACC,OAAZ,EAAjB;AAAA,aAAjB,CAAZ,CAFR;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAQeC,O;;;;;uEAAf,kBAAuBC,MAAvB,EAAmDC,KAAnD;AAAA;AAAA;AAAA;AAAA;AAAA;AACCJ,YAAAA,WADD,GACexB,2BAA2B,CAAC2B,MAAD,CAD1C;;AAAA,gBAEAH,WAFA;AAAA;AAAA;AAAA;;AAAA,kBAGG,IAAI5B,KAAJ,gDAAkD+B,MAAlD,EAHH;;AAAA;AAKCE,YAAAA,gBALD,GAKoB,gCAAcD,KAAd,CALpB;AAAA;AAAA,mBAM+BJ,WAAW,CAAC7B,QAAZ,CAAqBkC,gBAArB,CAN/B;;AAAA;AAMCC,YAAAA,qBAND;AAAA,8CAOE,2BAASA,qBAAT,CAPF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAaerC,U;;;;;0EAAf,kBACLkC,MADK,EAELC,KAFK,EAGLG,IAHK;AAAA;AAAA;AAAA;AAAA;AAAA;AAKCP,YAAAA,WALD,GAKexB,2BAA2B,CAAC2B,MAAD,CAL1C;;AAAA,gBAMAH,WANA;AAAA;AAAA;AAAA;;AAAA,kBAOG,IAAI5B,KAAJ,gDAAkD+B,MAAlD,EAPH;;AAAA;AASCE,YAAAA,gBATD,GASoB,gCAAcD,KAAd,CATpB;AAAA;AAAA,mBAU+BJ,WAAW,CAAC/B,UAAZ,CAAuBoC,gBAAvB,EAAyCE,IAAzC,CAV/B;;AAAA;AAUCD,YAAAA,qBAVD;AAAA,8CAWE,2BAASA,qBAAT,CAXF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;AAiBA,SAASE,OAAT,CAAiBL,MAAjB,EAA6CC,KAA7C,EAA4DG,IAA5D,EAAkF;AACvF,MAAI,EAAEJ,MAAM,IAAI3B,2BAAZ,CAAJ,EAA8C;AAC5C,UAAM,IAAIJ,KAAJ,uCAAyC+B,MAAzC,EAAN;AACD;;AAED,SAAO3B,2BAA2B,CAAC2B,MAAD,CAA3B,CAAoCK,OAApC,CAA4CJ,KAA5C,EAAmDG,IAAnD,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\n// Forked from https://github.com/ironSource/parquetjs under MIT license\n\nimport {\n Compression,\n NoCompression,\n GZipCompression,\n SnappyCompression,\n BrotliCompression,\n LZOCompression,\n LZ4Compression,\n ZstdCompression\n} from '@loaders.gl/compression';\n\nimport {ParquetCompression} from './schema/declare';\nimport {toArrayBuffer, toBuffer} from './utils/buffer-utils';\n\n// TODO switch to worker compression to avoid bundling...\n\n// import brotli from 'brotli'; - brotli has problems with decompress in browsers\nimport brotliDecompress from 'brotli/decompress';\nimport lz4js from 'lz4js';\nimport lzo from 'lzo';\nimport {ZstdCodec} from 'zstd-codec';\n\n// Inject large dependencies through Compression constructor options\nconst modules = {\n // brotli has problems with decompress in browsers\n brotli: {\n decompress: brotliDecompress,\n compress: () => {\n throw new Error('brotli compress');\n }\n },\n lz4js,\n lzo,\n 'zstd-codec': ZstdCodec\n};\n\n// See https://github.com/apache/parquet-format/blob/master/Compression.md\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression> = {\n UNCOMPRESSED: new NoCompression(),\n GZIP: new GZipCompression(),\n SNAPPY: new SnappyCompression(),\n BROTLI: new BrotliCompression({modules}),\n // TODO: Understand difference between LZ4 and LZ4_RAW\n LZ4: new LZ4Compression({modules}),\n LZ4_RAW: new LZ4Compression({modules}),\n LZO: new LZOCompression({modules}),\n ZSTD: new ZstdCompression({modules})\n};\n\n/**\n * Register compressions that have big external libraries\n * @param options.modules External library dependencies\n */\nexport async function preloadCompressions(options?: {modules: {[key: string]: any}}) {\n const compressions = Object.values(PARQUET_COMPRESSION_METHODS);\n return await Promise.all(compressions.map((compression) => compression.preload()));\n}\n\n/**\n * Deflate a value using compression method `method`\n */\nexport async function deflate(method: ParquetCompression, value: Buffer): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.compress(inputArrayBuffer);\n return toBuffer(compressedArrayBuffer);\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport async function decompress(\n method: ParquetCompression,\n value: Buffer,\n size: number\n): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);\n return toBuffer(compressedArrayBuffer);\n}\n\n/*\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n // @ts-ignore\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\n/*\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n*/\n"],"file":"compression.js"}
@@ -29,15 +29,17 @@ var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/de
29
29
 
30
30
  var _stream = require("stream");
31
31
 
32
- var _codecs = require("./codecs");
32
+ var _codecs = require("../codecs");
33
33
 
34
- var Compression = _interopRequireWildcard(require("./compression"));
34
+ var Compression = _interopRequireWildcard(require("../compression"));
35
35
 
36
- var Shred = _interopRequireWildcard(require("./schema/shred"));
36
+ var Shred = _interopRequireWildcard(require("../schema/shred"));
37
37
 
38
- var _parquetThrift = require("./parquet-thrift");
38
+ var _parquetThrift = require("../parquet-thrift");
39
39
 
40
- var Util = _interopRequireWildcard(require("./util"));
40
+ var _fileUtils = require("../utils/file-utils");
41
+
42
+ var _readUtils = require("../utils/read-utils");
41
43
 
42
44
  var _nodeInt = _interopRequireDefault(require("node-int64"));
43
45
 
@@ -77,25 +79,58 @@ var ParquetWriter = function () {
77
79
  this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
78
80
  this.closed = false;
79
81
  this.userMetadata = {};
80
-
81
- try {
82
- envelopeWriter.writeHeader();
83
- } catch (err) {
84
- envelopeWriter.close();
85
- throw err;
86
- }
82
+ this.writeHeader();
87
83
  }
88
84
 
89
85
  (0, _createClass2.default)(ParquetWriter, [{
90
- key: "appendRow",
86
+ key: "writeHeader",
91
87
  value: function () {
92
- var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(row) {
88
+ var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
93
89
  return _regenerator.default.wrap(function _callee$(_context) {
94
90
  while (1) {
95
91
  switch (_context.prev = _context.next) {
92
+ case 0:
93
+ _context.prev = 0;
94
+ _context.next = 3;
95
+ return this.envelopeWriter.writeHeader();
96
+
97
+ case 3:
98
+ _context.next = 10;
99
+ break;
100
+
101
+ case 5:
102
+ _context.prev = 5;
103
+ _context.t0 = _context["catch"](0);
104
+ _context.next = 9;
105
+ return this.envelopeWriter.close();
106
+
107
+ case 9:
108
+ throw _context.t0;
109
+
110
+ case 10:
111
+ case "end":
112
+ return _context.stop();
113
+ }
114
+ }
115
+ }, _callee, this, [[0, 5]]);
116
+ }));
117
+
118
+ function writeHeader() {
119
+ return _writeHeader.apply(this, arguments);
120
+ }
121
+
122
+ return writeHeader;
123
+ }()
124
+ }, {
125
+ key: "appendRow",
126
+ value: function () {
127
+ var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
128
+ return _regenerator.default.wrap(function _callee2$(_context2) {
129
+ while (1) {
130
+ switch (_context2.prev = _context2.next) {
96
131
  case 0:
97
132
  if (!this.closed) {
98
- _context.next = 2;
133
+ _context2.next = 2;
99
134
  break;
100
135
  }
101
136
 
@@ -110,10 +145,10 @@ var ParquetWriter = function () {
110
145
 
111
146
  case 4:
112
147
  case "end":
113
- return _context.stop();
148
+ return _context2.stop();
114
149
  }
115
150
  }
116
- }, _callee, this);
151
+ }, _callee2, this);
117
152
  }));
118
153
 
119
154
  function appendRow(_x) {
@@ -125,13 +160,13 @@ var ParquetWriter = function () {
125
160
  }, {
126
161
  key: "close",
127
162
  value: function () {
128
- var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(callback) {
129
- return _regenerator.default.wrap(function _callee2$(_context2) {
163
+ var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
164
+ return _regenerator.default.wrap(function _callee3$(_context3) {
130
165
  while (1) {
131
- switch (_context2.prev = _context2.next) {
166
+ switch (_context3.prev = _context3.next) {
132
167
  case 0:
133
168
  if (!this.closed) {
134
- _context2.next = 2;
169
+ _context3.next = 2;
135
170
  break;
136
171
  }
137
172
 
@@ -144,11 +179,11 @@ var ParquetWriter = function () {
144
179
  this.rowBuffer = {};
145
180
  }
146
181
 
147
- _context2.next = 6;
182
+ _context3.next = 6;
148
183
  return this.envelopeWriter.writeFooter(this.userMetadata);
149
184
 
150
185
  case 6:
151
- _context2.next = 8;
186
+ _context3.next = 8;
152
187
  return this.envelopeWriter.close();
153
188
 
154
189
  case 8:
@@ -158,10 +193,10 @@ var ParquetWriter = function () {
158
193
 
159
194
  case 9:
160
195
  case "end":
161
- return _context2.stop();
196
+ return _context3.stop();
162
197
  }
163
198
  }
164
- }, _callee2, this);
199
+ }, _callee3, this);
165
200
  }));
166
201
 
167
202
  function close(_x2) {
@@ -188,25 +223,25 @@ var ParquetWriter = function () {
188
223
  }], [{
189
224
  key: "openFile",
190
225
  value: function () {
191
- var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(schema, path, opts) {
226
+ var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, path, opts) {
192
227
  var outputStream;
193
- return _regenerator.default.wrap(function _callee3$(_context3) {
228
+ return _regenerator.default.wrap(function _callee4$(_context4) {
194
229
  while (1) {
195
- switch (_context3.prev = _context3.next) {
230
+ switch (_context4.prev = _context4.next) {
196
231
  case 0:
197
- _context3.next = 2;
198
- return Util.osopen(path, opts);
232
+ _context4.next = 2;
233
+ return (0, _fileUtils.osopen)(path, opts);
199
234
 
200
235
  case 2:
201
- outputStream = _context3.sent;
202
- return _context3.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
236
+ outputStream = _context4.sent;
237
+ return _context4.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
203
238
 
204
239
  case 4:
205
240
  case "end":
206
- return _context3.stop();
241
+ return _context4.stop();
207
242
  }
208
243
  }
209
- }, _callee3);
244
+ }, _callee4);
210
245
  }));
211
246
 
212
247
  function openFile(_x3, _x4, _x5) {
@@ -218,29 +253,29 @@ var ParquetWriter = function () {
218
253
  }, {
219
254
  key: "openStream",
220
255
  value: function () {
221
- var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, outputStream, opts) {
256
+ var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream, opts) {
222
257
  var envelopeWriter;
223
- return _regenerator.default.wrap(function _callee4$(_context4) {
258
+ return _regenerator.default.wrap(function _callee5$(_context5) {
224
259
  while (1) {
225
- switch (_context4.prev = _context4.next) {
260
+ switch (_context5.prev = _context5.next) {
226
261
  case 0:
227
262
  if (!opts) {
228
263
  opts = {};
229
264
  }
230
265
 
231
- _context4.next = 3;
266
+ _context5.next = 3;
232
267
  return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
233
268
 
234
269
  case 3:
235
- envelopeWriter = _context4.sent;
236
- return _context4.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
270
+ envelopeWriter = _context5.sent;
271
+ return _context5.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
237
272
 
238
273
  case 5:
239
274
  case "end":
240
- return _context4.stop();
275
+ return _context5.stop();
241
276
  }
242
277
  }
243
- }, _callee4);
278
+ }, _callee5);
244
279
  }));
245
280
 
246
281
  function openStream(_x6, _x7, _x8) {
@@ -290,27 +325,35 @@ var ParquetEnvelopeWriter = function () {
290
325
  }, {
291
326
  key: "writeRowGroup",
292
327
  value: function () {
293
- var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(records) {
328
+ var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(records) {
294
329
  var rgroup;
295
- return _regenerator.default.wrap(function _callee5$(_context5) {
330
+ return _regenerator.default.wrap(function _callee6$(_context6) {
296
331
  while (1) {
297
- switch (_context5.prev = _context5.next) {
332
+ switch (_context6.prev = _context6.next) {
298
333
  case 0:
299
- rgroup = encodeRowGroup(this.schema, records, {
334
+ _context6.next = 2;
335
+ return encodeRowGroup(this.schema, records, {
300
336
  baseOffset: this.offset,
301
337
  pageSize: this.pageSize,
302
338
  useDataPageV2: this.useDataPageV2
303
339
  });
340
+
341
+ case 2:
342
+ rgroup = _context6.sent;
304
343
  this.rowCount += records.rowCount;
305
344
  this.rowGroups.push(rgroup.metadata);
306
- return _context5.abrupt("return", this.writeSection(rgroup.body));
345
+ _context6.next = 7;
346
+ return this.writeSection(rgroup.body);
307
347
 
308
- case 4:
348
+ case 7:
349
+ return _context6.abrupt("return", _context6.sent);
350
+
351
+ case 8:
309
352
  case "end":
310
- return _context5.stop();
353
+ return _context6.stop();
311
354
  }
312
355
  }
313
- }, _callee5, this);
356
+ }, _callee6, this);
314
357
  }));
315
358
 
316
359
  function writeRowGroup(_x9) {
@@ -336,22 +379,22 @@ var ParquetEnvelopeWriter = function () {
336
379
  }], [{
337
380
  key: "openStream",
338
381
  value: function () {
339
- var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(schema, outputStream, opts) {
382
+ var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(schema, outputStream, opts) {
340
383
  var writeFn, closeFn;
341
- return _regenerator.default.wrap(function _callee6$(_context6) {
384
+ return _regenerator.default.wrap(function _callee7$(_context7) {
342
385
  while (1) {
343
- switch (_context6.prev = _context6.next) {
386
+ switch (_context7.prev = _context7.next) {
344
387
  case 0:
345
- writeFn = Util.oswrite.bind(undefined, outputStream);
346
- closeFn = Util.osclose.bind(undefined, outputStream);
347
- return _context6.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
388
+ writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
389
+ closeFn = _fileUtils.osclose.bind(undefined, outputStream);
390
+ return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
348
391
 
349
392
  case 3:
350
393
  case "end":
351
- return _context6.stop();
394
+ return _context7.stop();
352
395
  }
353
396
  }
354
- }, _callee6);
397
+ }, _callee7);
355
398
  }));
356
399
 
357
400
  function openStream(_x10, _x11, _x12) {
@@ -383,19 +426,19 @@ var ParquetTransformer = function (_Transform) {
383
426
 
384
427
  var writeProxy = function (t) {
385
428
  return function () {
386
- var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(b) {
387
- return _regenerator.default.wrap(function _callee7$(_context7) {
429
+ var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(b) {
430
+ return _regenerator.default.wrap(function _callee8$(_context8) {
388
431
  while (1) {
389
- switch (_context7.prev = _context7.next) {
432
+ switch (_context8.prev = _context8.next) {
390
433
  case 0:
391
434
  t.push(b);
392
435
 
393
436
  case 1:
394
437
  case "end":
395
- return _context7.stop();
438
+ return _context8.stop();
396
439
  }
397
440
  }
398
- }, _callee7);
441
+ }, _callee8);
399
442
  }));
400
443
 
401
444
  return function (_x13) {
@@ -404,16 +447,16 @@ var ParquetTransformer = function (_Transform) {
404
447
  }();
405
448
  }((0, _assertThisInitialized2.default)(_this));
406
449
 
407
- _this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8() {
408
- return _regenerator.default.wrap(function _callee8$(_context8) {
450
+ _this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
451
+ return _regenerator.default.wrap(function _callee9$(_context9) {
409
452
  while (1) {
410
- switch (_context8.prev = _context8.next) {
453
+ switch (_context9.prev = _context9.next) {
411
454
  case 0:
412
455
  case "end":
413
- return _context8.stop();
456
+ return _context9.stop();
414
457
  }
415
458
  }
416
- }, _callee8);
459
+ }, _callee9);
417
460
  })), 0, opts), opts);
418
461
  return _this;
419
462
  }
@@ -431,20 +474,20 @@ var ParquetTransformer = function (_Transform) {
431
474
  }, {
432
475
  key: "_flush",
433
476
  value: function () {
434
- var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9(callback) {
435
- return _regenerator.default.wrap(function _callee9$(_context9) {
477
+ var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(callback) {
478
+ return _regenerator.default.wrap(function _callee10$(_context10) {
436
479
  while (1) {
437
- switch (_context9.prev = _context9.next) {
480
+ switch (_context10.prev = _context10.next) {
438
481
  case 0:
439
- _context9.next = 2;
482
+ _context10.next = 2;
440
483
  return this.writer.close(callback);
441
484
 
442
485
  case 2:
443
486
  case "end":
444
- return _context9.stop();
487
+ return _context10.stop();
445
488
  }
446
489
  }
447
- }, _callee9, this);
490
+ }, _callee10, this);
448
491
  }));
449
492
 
450
493
  function _flush(_x14) {
@@ -467,174 +510,300 @@ function encodeValues(type, encoding, values, opts) {
467
510
  return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
468
511
  }
469
512
 
470
- function encodeDataPage(column, data) {
471
- var rLevelsBuf = Buffer.alloc(0);
513
+ function encodeDataPage(_x15, _x16) {
514
+ return _encodeDataPage.apply(this, arguments);
515
+ }
472
516
 
473
- if (column.rLevelMax > 0) {
474
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
475
- bitWidth: Util.getBitWidth(column.rLevelMax)
476
- });
477
- }
517
+ function _encodeDataPage() {
518
+ _encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(column, data) {
519
+ var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
520
+ return _regenerator.default.wrap(function _callee11$(_context11) {
521
+ while (1) {
522
+ switch (_context11.prev = _context11.next) {
523
+ case 0:
524
+ rLevelsBuf = Buffer.alloc(0);
525
+
526
+ if (column.rLevelMax > 0) {
527
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
528
+ bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
529
+ });
530
+ }
478
531
 
479
- var dLevelsBuf = Buffer.alloc(0);
532
+ dLevelsBuf = Buffer.alloc(0);
480
533
 
481
- if (column.dLevelMax > 0) {
482
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
483
- bitWidth: Util.getBitWidth(column.dLevelMax)
484
- });
485
- }
534
+ if (column.dLevelMax > 0) {
535
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
536
+ bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
537
+ });
538
+ }
486
539
 
487
- var valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
488
- typeLength: column.typeLength,
489
- bitWidth: column.typeLength
490
- });
491
- var dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
492
- var compressedBuf = Compression.deflate(column.compression, dataBuf);
493
- var header = new _parquetThrift.PageHeader({
494
- type: _parquetThrift.PageType.DATA_PAGE,
495
- data_page_header: new _parquetThrift.DataPageHeader({
496
- num_values: data.count,
497
- encoding: _parquetThrift.Encoding[column.encoding],
498
- definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
499
- repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
500
- }),
501
- uncompressed_page_size: dataBuf.length,
502
- compressed_page_size: compressedBuf.length
503
- });
504
- var headerBuf = Util.serializeThrift(header);
505
- var page = Buffer.concat([headerBuf, compressedBuf]);
506
- return {
507
- header: header,
508
- headerSize: headerBuf.length,
509
- page: page
510
- };
540
+ valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
541
+ typeLength: column.typeLength,
542
+ bitWidth: column.typeLength
543
+ });
544
+ dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
545
+ _context11.next = 8;
546
+ return Compression.deflate(column.compression, dataBuf);
547
+
548
+ case 8:
549
+ compressedBuf = _context11.sent;
550
+ header = new _parquetThrift.PageHeader({
551
+ type: _parquetThrift.PageType.DATA_PAGE,
552
+ data_page_header: new _parquetThrift.DataPageHeader({
553
+ num_values: data.count,
554
+ encoding: _parquetThrift.Encoding[column.encoding],
555
+ definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
556
+ repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
557
+ }),
558
+ uncompressed_page_size: dataBuf.length,
559
+ compressed_page_size: compressedBuf.length
560
+ });
561
+ headerBuf = (0, _readUtils.serializeThrift)(header);
562
+ page = Buffer.concat([headerBuf, compressedBuf]);
563
+ return _context11.abrupt("return", {
564
+ header: header,
565
+ headerSize: headerBuf.length,
566
+ page: page
567
+ });
568
+
569
+ case 13:
570
+ case "end":
571
+ return _context11.stop();
572
+ }
573
+ }
574
+ }, _callee11);
575
+ }));
576
+ return _encodeDataPage.apply(this, arguments);
511
577
  }
512
578
 
513
- function encodeDataPageV2(column, data, rowCount) {
514
- var valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
515
- typeLength: column.typeLength,
516
- bitWidth: column.typeLength
517
- });
518
- var compressedBuf = Compression.deflate(column.compression, valuesBuf);
519
- var rLevelsBuf = Buffer.alloc(0);
579
+ function encodeDataPageV2(_x17, _x18, _x19) {
580
+ return _encodeDataPageV.apply(this, arguments);
581
+ }
520
582
 
521
- if (column.rLevelMax > 0) {
522
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
523
- bitWidth: Util.getBitWidth(column.rLevelMax),
524
- disableEnvelope: true
525
- });
526
- }
583
+ function _encodeDataPageV() {
584
+ _encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(column, data, rowCount) {
585
+ var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
586
+ return _regenerator.default.wrap(function _callee12$(_context12) {
587
+ while (1) {
588
+ switch (_context12.prev = _context12.next) {
589
+ case 0:
590
+ valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
591
+ typeLength: column.typeLength,
592
+ bitWidth: column.typeLength
593
+ });
594
+ _context12.next = 3;
595
+ return Compression.deflate(column.compression, valuesBuf);
596
+
597
+ case 3:
598
+ compressedBuf = _context12.sent;
599
+ rLevelsBuf = Buffer.alloc(0);
600
+
601
+ if (column.rLevelMax > 0) {
602
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
603
+ bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
604
+ disableEnvelope: true
605
+ });
606
+ }
527
607
 
528
- var dLevelsBuf = Buffer.alloc(0);
608
+ dLevelsBuf = Buffer.alloc(0);
529
609
 
530
- if (column.dLevelMax > 0) {
531
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
532
- bitWidth: Util.getBitWidth(column.dLevelMax),
533
- disableEnvelope: true
534
- });
535
- }
610
+ if (column.dLevelMax > 0) {
611
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
612
+ bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
613
+ disableEnvelope: true
614
+ });
615
+ }
536
616
 
537
- var header = new _parquetThrift.PageHeader({
538
- type: _parquetThrift.PageType.DATA_PAGE_V2,
539
- data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
540
- num_values: data.count,
541
- num_nulls: data.count - data.values.length,
542
- num_rows: rowCount,
543
- encoding: _parquetThrift.Encoding[column.encoding],
544
- definition_levels_byte_length: dLevelsBuf.length,
545
- repetition_levels_byte_length: rLevelsBuf.length,
546
- is_compressed: column.compression !== 'UNCOMPRESSED'
547
- }),
548
- uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
549
- compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
550
- });
551
- var headerBuf = Util.serializeThrift(header);
552
- var page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
553
- return {
554
- header: header,
555
- headerSize: headerBuf.length,
556
- page: page
557
- };
617
+ header = new _parquetThrift.PageHeader({
618
+ type: _parquetThrift.PageType.DATA_PAGE_V2,
619
+ data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
620
+ num_values: data.count,
621
+ num_nulls: data.count - data.values.length,
622
+ num_rows: rowCount,
623
+ encoding: _parquetThrift.Encoding[column.encoding],
624
+ definition_levels_byte_length: dLevelsBuf.length,
625
+ repetition_levels_byte_length: rLevelsBuf.length,
626
+ is_compressed: column.compression !== 'UNCOMPRESSED'
627
+ }),
628
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
629
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
630
+ });
631
+ headerBuf = (0, _readUtils.serializeThrift)(header);
632
+ page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
633
+ return _context12.abrupt("return", {
634
+ header: header,
635
+ headerSize: headerBuf.length,
636
+ page: page
637
+ });
638
+
639
+ case 12:
640
+ case "end":
641
+ return _context12.stop();
642
+ }
643
+ }
644
+ }, _callee12);
645
+ }));
646
+ return _encodeDataPageV.apply(this, arguments);
558
647
  }
559
648
 
560
- function encodeColumnChunk(column, buffer, offset, opts) {
561
- var data = buffer.columnData[column.path.join()];
562
- var baseOffset = (opts.baseOffset || 0) + offset;
563
- var pageBuf;
564
- var total_uncompressed_size = 0;
565
- var total_compressed_size = 0;
566
- {
567
- var result;
649
+ function encodeColumnChunk(_x20, _x21, _x22, _x23) {
650
+ return _encodeColumnChunk.apply(this, arguments);
651
+ }
568
652
 
569
- if (opts.useDataPageV2) {
570
- result = encodeDataPageV2(column, data, buffer.rowCount);
571
- } else {
572
- result = encodeDataPage(column, data);
573
- }
653
+ function _encodeColumnChunk() {
654
+ _encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(column, buffer, offset, opts) {
655
+ var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
656
+ return _regenerator.default.wrap(function _callee13$(_context13) {
657
+ while (1) {
658
+ switch (_context13.prev = _context13.next) {
659
+ case 0:
660
+ data = buffer.columnData[column.path.join()];
661
+ baseOffset = (opts.baseOffset || 0) + offset;
662
+ total_uncompressed_size = 0;
663
+ total_compressed_size = 0;
664
+
665
+ if (!opts.useDataPageV2) {
666
+ _context13.next = 10;
667
+ break;
668
+ }
574
669
 
575
- pageBuf = result.page;
576
- total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
577
- total_compressed_size += result.header.compressed_page_size + result.headerSize;
578
- }
579
- var metadata = new _parquetThrift.ColumnMetaData({
580
- path_in_schema: column.path,
581
- num_values: data.count,
582
- data_page_offset: baseOffset,
583
- encodings: [],
584
- total_uncompressed_size: total_uncompressed_size,
585
- total_compressed_size: total_compressed_size,
586
- type: _parquetThrift.Type[column.primitiveType],
587
- codec: _parquetThrift.CompressionCodec[column.compression]
588
- });
589
- metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
590
- metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
591
- var metadataOffset = baseOffset + pageBuf.length;
592
- var body = Buffer.concat([pageBuf, Util.serializeThrift(metadata)]);
593
- return {
594
- body: body,
595
- metadata: metadata,
596
- metadataOffset: metadataOffset
597
- };
670
+ _context13.next = 7;
671
+ return encodeDataPageV2(column, data, buffer.rowCount);
672
+
673
+ case 7:
674
+ _context13.t0 = _context13.sent;
675
+ _context13.next = 13;
676
+ break;
677
+
678
+ case 10:
679
+ _context13.next = 12;
680
+ return encodeDataPage(column, data);
681
+
682
+ case 12:
683
+ _context13.t0 = _context13.sent;
684
+
685
+ case 13:
686
+ result = _context13.t0;
687
+ pageBuf = result.page;
688
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
689
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
690
+ metadata = new _parquetThrift.ColumnMetaData({
691
+ path_in_schema: column.path,
692
+ num_values: data.count,
693
+ data_page_offset: baseOffset,
694
+ encodings: [],
695
+ total_uncompressed_size: total_uncompressed_size,
696
+ total_compressed_size: total_compressed_size,
697
+ type: _parquetThrift.Type[column.primitiveType],
698
+ codec: _parquetThrift.CompressionCodec[column.compression]
699
+ });
700
+ metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
701
+ metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
702
+ metadataOffset = baseOffset + pageBuf.length;
703
+ body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
704
+ return _context13.abrupt("return", {
705
+ body: body,
706
+ metadata: metadata,
707
+ metadataOffset: metadataOffset
708
+ });
709
+
710
+ case 23:
711
+ case "end":
712
+ return _context13.stop();
713
+ }
714
+ }
715
+ }, _callee13);
716
+ }));
717
+ return _encodeColumnChunk.apply(this, arguments);
598
718
  }
599
719
 
600
- function encodeRowGroup(schema, data, opts) {
601
- var metadata = new _parquetThrift.RowGroup({
602
- num_rows: data.rowCount,
603
- columns: [],
604
- total_byte_size: 0
605
- });
606
- var body = Buffer.alloc(0);
720
+ function encodeRowGroup(_x24, _x25, _x26) {
721
+ return _encodeRowGroup.apply(this, arguments);
722
+ }
607
723
 
608
- var _iterator = _createForOfIteratorHelper(schema.fieldList),
609
- _step;
724
+ function _encodeRowGroup() {
725
+ _encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14(schema, data, opts) {
726
+ var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
727
+
728
+ return _regenerator.default.wrap(function _callee14$(_context14) {
729
+ while (1) {
730
+ switch (_context14.prev = _context14.next) {
731
+ case 0:
732
+ metadata = new _parquetThrift.RowGroup({
733
+ num_rows: data.rowCount,
734
+ columns: [],
735
+ total_byte_size: 0
736
+ });
737
+ body = Buffer.alloc(0);
738
+ _iterator2 = _createForOfIteratorHelper(schema.fieldList);
739
+ _context14.prev = 3;
740
+
741
+ _iterator2.s();
742
+
743
+ case 5:
744
+ if ((_step2 = _iterator2.n()).done) {
745
+ _context14.next = 18;
746
+ break;
747
+ }
610
748
 
611
- try {
612
- for (_iterator.s(); !(_step = _iterator.n()).done;) {
613
- var field = _step.value;
749
+ field = _step2.value;
614
750
 
615
- if (field.isNested) {
616
- continue;
617
- }
751
+ if (!field.isNested) {
752
+ _context14.next = 9;
753
+ break;
754
+ }
618
755
 
619
- var cchunkData = encodeColumnChunk(field, data, body.length, opts);
620
- var cchunk = new _parquetThrift.ColumnChunk({
621
- file_offset: cchunkData.metadataOffset,
622
- meta_data: cchunkData.metadata
623
- });
624
- metadata.columns.push(cchunk);
625
- metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
626
- body = Buffer.concat([body, cchunkData.body]);
627
- }
628
- } catch (err) {
629
- _iterator.e(err);
630
- } finally {
631
- _iterator.f();
632
- }
756
+ return _context14.abrupt("continue", 16);
757
+
758
+ case 9:
759
+ _context14.next = 11;
760
+ return encodeColumnChunk(field, data, body.length, opts);
761
+
762
+ case 11:
763
+ cchunkData = _context14.sent;
764
+ cchunk = new _parquetThrift.ColumnChunk({
765
+ file_offset: cchunkData.metadataOffset,
766
+ meta_data: cchunkData.metadata
767
+ });
768
+ metadata.columns.push(cchunk);
769
+ metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
770
+ body = Buffer.concat([body, cchunkData.body]);
633
771
 
634
- return {
635
- body: body,
636
- metadata: metadata
637
- };
772
+ case 16:
773
+ _context14.next = 5;
774
+ break;
775
+
776
+ case 18:
777
+ _context14.next = 23;
778
+ break;
779
+
780
+ case 20:
781
+ _context14.prev = 20;
782
+ _context14.t0 = _context14["catch"](3);
783
+
784
+ _iterator2.e(_context14.t0);
785
+
786
+ case 23:
787
+ _context14.prev = 23;
788
+
789
+ _iterator2.f();
790
+
791
+ return _context14.finish(23);
792
+
793
+ case 26:
794
+ return _context14.abrupt("return", {
795
+ body: body,
796
+ metadata: metadata
797
+ });
798
+
799
+ case 27:
800
+ case "end":
801
+ return _context14.stop();
802
+ }
803
+ }
804
+ }, _callee14, null, [[3, 20, 23, 26]]);
805
+ }));
806
+ return _encodeRowGroup.apply(this, arguments);
638
807
  }
639
808
 
640
809
  function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
@@ -665,12 +834,12 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
665
834
  metadata.schema.push(schemaRoot);
666
835
  }
667
836
 
668
- var _iterator2 = _createForOfIteratorHelper(schema.fieldList),
669
- _step2;
837
+ var _iterator = _createForOfIteratorHelper(schema.fieldList),
838
+ _step;
670
839
 
671
840
  try {
672
- for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
673
- var field = _step2.value;
841
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
842
+ var field = _step.value;
674
843
  var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
675
844
  var schemaElem = new _parquetThrift.SchemaElement({
676
845
  name: field.name,
@@ -691,12 +860,12 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
691
860
  metadata.schema.push(schemaElem);
692
861
  }
693
862
  } catch (err) {
694
- _iterator2.e(err);
863
+ _iterator.e(err);
695
864
  } finally {
696
- _iterator2.f();
865
+ _iterator.f();
697
866
  }
698
867
 
699
- var metadataEncoded = Util.serializeThrift(metadata);
868
+ var metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
700
869
  var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
701
870
  metadataEncoded.copy(footerEncoded);
702
871
  footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);