@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/dist.min.js +7 -18
  2. package/dist/dist.min.js.map +1 -1
  3. package/dist/es5/bundle.js +2 -4
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +17 -0
  6. package/dist/es5/constants.js.map +1 -0
  7. package/dist/es5/index.js +53 -21
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +82 -0
  10. package/dist/es5/lib/convert-schema.js.map +1 -0
  11. package/dist/es5/lib/parse-parquet.js +173 -0
  12. package/dist/es5/lib/parse-parquet.js.map +1 -0
  13. package/dist/es5/lib/read-array-buffer.js +53 -0
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +6 -79
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +1 -1
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  21. package/dist/es5/parquetjs/codecs/index.js +10 -0
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +2 -2
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +138 -104
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  29. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  30. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  31. package/dist/es5/parquetjs/parser/decoders.js +495 -0
  32. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  33. package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
  34. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  35. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
  36. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  37. package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
  38. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  39. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  40. package/dist/es5/parquetjs/schema/schema.js +2 -0
  41. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  42. package/dist/es5/parquetjs/schema/shred.js +2 -1
  43. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  44. package/dist/es5/parquetjs/schema/types.js +79 -4
  45. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  46. package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
  47. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  48. package/dist/es5/parquetjs/utils/file-utils.js +108 -0
  49. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  50. package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
  51. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  52. package/dist/esm/bundle.js +2 -4
  53. package/dist/esm/bundle.js.map +1 -1
  54. package/dist/esm/constants.js +6 -0
  55. package/dist/esm/constants.js.map +1 -0
  56. package/dist/esm/index.js +14 -4
  57. package/dist/esm/index.js.map +1 -1
  58. package/dist/esm/lib/convert-schema.js +71 -0
  59. package/dist/esm/lib/convert-schema.js.map +1 -0
  60. package/dist/esm/lib/parse-parquet.js +28 -0
  61. package/dist/esm/lib/parse-parquet.js.map +1 -0
  62. package/dist/esm/lib/read-array-buffer.js +9 -0
  63. package/dist/esm/lib/read-array-buffer.js.map +1 -0
  64. package/dist/esm/parquet-loader.js +4 -24
  65. package/dist/esm/parquet-loader.js.map +1 -1
  66. package/dist/esm/parquet-writer.js +1 -1
  67. package/dist/esm/parquet-writer.js.map +1 -1
  68. package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
  69. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  70. package/dist/esm/parquetjs/codecs/index.js +9 -0
  71. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  72. package/dist/esm/parquetjs/codecs/rle.js +2 -2
  73. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  74. package/dist/esm/parquetjs/compression.js +54 -105
  75. package/dist/esm/parquetjs/compression.js.map +1 -1
  76. package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
  77. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  78. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  79. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  80. package/dist/esm/parquetjs/parser/decoders.js +300 -0
  81. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  82. package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
  83. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  84. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
  85. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  86. package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
  87. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  88. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  89. package/dist/esm/parquetjs/schema/schema.js +2 -0
  90. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  91. package/dist/esm/parquetjs/schema/shred.js +2 -1
  92. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  93. package/dist/esm/parquetjs/schema/types.js +78 -4
  94. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  95. package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
  96. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  97. package/dist/esm/parquetjs/utils/file-utils.js +79 -0
  98. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  99. package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
  100. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  101. package/dist/parquet-worker.js +7 -18
  102. package/dist/parquet-worker.js.map +1 -1
  103. package/package.json +10 -10
  104. package/src/bundle.ts +2 -3
  105. package/src/constants.ts +17 -0
  106. package/src/index.ts +30 -4
  107. package/src/lib/convert-schema.ts +95 -0
  108. package/src/lib/parse-parquet.ts +27 -0
  109. package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
  110. package/src/parquet-loader.ts +4 -24
  111. package/src/parquetjs/codecs/dictionary.ts +11 -0
  112. package/src/parquetjs/codecs/index.ts +13 -0
  113. package/src/parquetjs/codecs/rle.ts +4 -2
  114. package/src/parquetjs/compression.ts +89 -50
  115. package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
  116. package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
  117. package/src/parquetjs/parser/decoders.ts +448 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +179 -0
  121. package/src/parquetjs/schema/declare.ts +48 -2
  122. package/src/parquetjs/schema/schema.ts +2 -0
  123. package/src/parquetjs/schema/shred.ts +3 -1
  124. package/src/parquetjs/schema/types.ts +82 -5
  125. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  126. package/src/parquetjs/utils/file-utils.ts +96 -0
  127. package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
  128. package/dist/dist.es5.min.js +0 -51
  129. package/dist/dist.es5.min.js.map +0 -1
  130. package/dist/es5/parquetjs/compression.ts.disabled +0 -105
  131. package/dist/es5/parquetjs/reader.js +0 -1078
  132. package/dist/es5/parquetjs/reader.js.map +0 -1
  133. package/dist/es5/parquetjs/util.js.map +0 -1
  134. package/dist/es5/parquetjs/writer.js.map +0 -1
  135. package/dist/esm/libs/read-array-buffer.ts +0 -31
  136. package/dist/esm/parquetjs/compression.ts.disabled +0 -105
  137. package/dist/esm/parquetjs/reader.js +0 -524
  138. package/dist/esm/parquetjs/reader.js.map +0 -1
  139. package/dist/esm/parquetjs/util.js.map +0 -1
  140. package/dist/esm/parquetjs/writer.js.map +0 -1
  141. package/src/libs/read-array-buffer.ts +0 -31
  142. package/src/parquetjs/compression.ts.disabled +0 -105
  143. package/src/parquetjs/reader.ts +0 -707
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../src/parquetjs/reader.ts"],"names":["PARQUET_CODECS","Compression","ParquetSchema","Shred","CompressionCodec","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","Util","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","Symbol","asyncIterator","ParquetCursor","constructor","metadata","envelopeReader","schema","columnList","rowGroup","rowGroupIndex","next","length","row_groups","rowBuffer","readRowGroup","materializeRecords","shift","rewind","done","value","return","throw","ParquetReader","openFile","filePath","ParquetEnvelopeReader","readHeader","readFooter","err","close","openBuffer","buffer","openArrayBuffer","arrayBuffer","readFn","start","Buffer","from","closeFn","size","byteLength","version","Error","root","decodeSchema","num_children","getCursor","map","x","Array","isArray","getRowCount","Number","num_rows","getSchema","getMetadata","md","kv","key_value_metadata","key","fileStat","fstat","fileDescriptor","fopen","fread","bind","undefined","fclose","position","Promise","resolve","slice","read","fileSize","buf","toString","rowCount","columnData","colChunk","columns","colMetadata","meta_data","colKey","path_in_schema","fieldIndexOf","join","readColumnChunk","file_path","field","findField","type","getThriftEnum","primitiveType","compression","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","pagesBuf","decodeDataPages","trailerLen","trailerBuf","metadataSize","readUInt32LE","metadataOffset","metadataBuf","decodeFileMetadata","decodeValues","encoding","cursor","count","opts","column","offset","data","rlevels","dlevels","values","pageHeader","decodePageHeader","pageType","pageData","decodeDataPage","decodeDataPageV2","prototype","push","apply","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","valuesBuf","inflate","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","rLevelMax","bitWidth","getBitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","typeLength","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","schemaElements","len","i","schemaElement","repetitionType","repetition_type","optional","repeated","res","name","fields","logicalType","converted_type","type_length"],"mappings":";;;;AACA,SAA2CA,cAA3C,QAAgE,UAAhE;AACA,OAAO,KAAKC,WAAZ,MAA6B,eAA7B;AAYA,SAAQC,aAAR,QAA4B,iBAA5B;AACA,OAAO,KAAKC,KAAZ,MAAuB,gBAAvB;AAEA,SAEEC,gBAFF,EAGEC,aAHF,EAIEC,QAJF,EAKEC,mBALF,EAQEC,QARF,EAWEC,IAXF,QAYO,kBAZP;AAaA,OAAO,KAAKC,IAAZ,MAAsB,QAAtB;AAMA,MAAMC,aAAa,GAAG,MAAtB;AAKA,MAAMC,eAAe,GAAG,CAAxB;AAKA,MAAMC,kBAAkB,GAAG,OAA3B;AACA,MAAMC,sBAAsB,GAAG,KAA/B;wBAkEGC,MAAM,CAACC,a;AA7DV,OAAO,MAAMC,aAAN,CAAmD;AAcxDC,EAAAA,WAAW,CACTC,QADS,EAETC,cAFS,EAGTC,MAHS,EAITC,UAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKH,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,SAAKC,MAAL,GAAcA,MAAd;AACA,SAAKC,UAAL,GAAkBA,UAAlB;AACA,SAAKC,QAAL,GAAgB,EAAhB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMS,QAAJC,IAAI,GAAwB;AAChC,QAAI,KAAKF,QAAL,CAAcG,MAAd,KAAyB,CAA7B,EAAgC;AAC9B,UAAI,KAAKF,aAAL,IAAsB,KAAKL,QAAL,CAAcQ,UAAd,CAAyBD,MAAnD,EAA2D;AAEzD,eAAO,IAAP;AACD;;AACD,YAAME,SAAS,GAAG,MAAM,KAAKR,cAAL,CAAoBS,YAApB,CACtB,KAAKR,MADiB,EAEtB,KAAKF,QAAL,CAAcQ,UAAd,CAAyB,KAAKH,aAA9B,CAFsB,EAGtB,KAAKF,UAHiB,CAAxB;AAKA,WAAKC,QAAL,GAAgBpB,KAAK,CAAC2B,kBAAN,CAAyB,KAAKT,MAA9B,EAAsCO,SAAtC,CAAhB;AACA,WAAKJ,aAAL;AACD;;AACD,WAAO,KAAKD,QAAL,CAAcQ,KAAd,EAAP;AACD;;AAKDC,EAAAA,MAAM,GAAS;AACb,SAAKT,QAAL,GAAgB,EAAhB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMD,4BAA2C;AACzC,QAAIS,IAAI,GAAG,KAAX;AACA,WAAO;AACLR,MAAAA,IAAI,EAAE,YAAY;AAChB,YAAIQ,IAAJ,EAAU;AACR,iBAAO;AAACA,YAAAA,IAAD;AAAOC,YAAAA,KAAK,EAAE;AAAd,WAAP;AACD;;AACD,cAAMA,KAAK,GAAG,MAAM,KAAKT,IAAL,EAApB;;AACA,YAAIS,KAAK,KAAK,IAAd,EAAoB;AAClB,iBAAO;AAACD,YAAAA,IAAI,EAAE,IAAP;AAAaC,YAAAA;AAAb,WAAP;AACD;;AACD,eAAO;AAACD,UAAAA,IAAI,EAAE,KAAP;AAAcC,UAAAA;AAAd,SAAP;AACD,OAVI;AAWLC,MAAAA,MAAM,EAAE,YAAY;AAClBF,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAD;AAAOC,UAAAA,KAAK,EAAE;AAAd,SAAP;AACD,OAdI;AAeLE,MAAAA,KAAK,EAAE,YAAY;AACjBH,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAI,EAAE,IAAP;AAAaC,UAAAA,KAAK,EAAE;AAApB,SAAP;AACD;AAlBI,KAAP;AAoBD;;AAnFuD;yBAwOvDnB,MAAM,CAACC,a;AA3IV,OAAO,MAAMqB,aAAN,CAAmD;AAKnC,eAARC,QAAQ,CAAIC,QAAJ,EAAiD;AACpE,UAAMnB,cAAc,GAAG,MAAMoB,qBAAqB,CAACF,QAAtB,CAA+BC,QAA/B,CAA7B;;AACA,QAAI;AACF,YAAMnB,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAqBlB,QAArB,EAA+BC,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAEsB,eAAVE,UAAU,CAAIC,MAAJ,EAA+C;AACpE,UAAM1B,cAAc,GAAG,MAAMoB,qBAAqB,CAACK,UAAtB,CAAiCC,MAAjC,CAA7B;;AACA,QAAI;AACF,YAAM1B,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAqBlB,QAArB,EAA+BC,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAK2B,eAAfI,eAAe,CAAIC,WAAJ,EAAyD;AACnF,UAAMC,MAAM,GAAG,OAAOC,KAAP,EAAsBxB,MAAtB,KAAyCyB,MAAM,CAACC,IAAP,CAAYJ,WAAZ,EAAyBE,KAAzB,EAAgCxB,MAAhC,CAAxD;;AACA,UAAM2B,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGN,WAAW,CAACO,UAAzB;AACA,UAAMnC,cAAc,GAAG,IAAIoB,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMlC,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAkBlB,QAAlB,EAA4BC,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAYDzB,EAAAA,WAAW,CAACC,QAAD,EAAyBC,cAAzB,EAAgE;AAAA;;AAAA;;AAAA;;AACzE,QAAID,QAAQ,CAACqC,OAAT,KAAqB5C,eAAzB,EAA0C;AACxC,YAAM,IAAI6C,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAED,SAAKtC,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,UAAMsC,IAAI,GAAG,KAAKvC,QAAL,CAAcE,MAAd,CAAqB,CAArB,CAAb;AACA,UAAM;AAACA,MAAAA;AAAD,QAAWsC,YAAY,CAAC,KAAKxC,QAAL,CAAcE,MAAf,EAAuB,CAAvB,EAA0BqC,IAAI,CAACE,YAA/B,CAA7B;AACA,SAAKvC,MAAL,GAAc,IAAInB,aAAJ,CAAkBmB,MAAlB,CAAd;AACD;;AAMU,QAALuB,KAAK,GAAkB;AAC3B,UAAM,KAAKxB,cAAL,CAAoBwB,KAApB,EAAN;AAGD;;AAeDiB,EAAAA,SAAS,CAACvC,UAAD,EAAgE;AACvE,QAAI,CAACA,UAAL,EAAiB;AAEfA,MAAAA,UAAU,GAAG,EAAb;AACD;;AAGDA,IAAAA,UAAU,GAAGA,UAAU,CAACwC,GAAX,CAAgBC,CAAD,IAAQC,KAAK,CAACC,OAAN,CAAcF,CAAd,IAAmBA,CAAnB,GAAuB,CAACA,CAAD,CAA9C,CAAb;AAEA,WAAO,IAAI9C,aAAJ,CACL,KAAKE,QADA,EAEL,KAAKC,cAFA,EAGL,KAAKC,MAHA,EAILC,UAJK,CAAP;AAMD;;AAMD4C,EAAAA,WAAW,GAAW;AACpB,WAAOC,MAAM,CAAC,KAAKhD,QAAL,CAAciD,QAAf,CAAb;AACD;;AAKDC,EAAAA,SAAS,GAAkB;AACzB,WAAO,KAAKhD,MAAZ;AACD;;AAKDiD,EAAAA,WAAW,GAA2B;AACpC,UAAMC,EAA0B,GAAG,EAAnC;;AACA,SAAK,MAAMC,EAAX,IAAiB,KAAKrD,QAAL,CAAcsD,kBAA/B,EAAoD;AAClDF,MAAAA,EAAE,CAACC,EAAE,CAACE,GAAJ,CAAF,GAAaF,EAAE,CAACtC,KAAhB;AACD;;AACD,WAAOqC,EAAP;AACD;;AAMD,6BAA2C;AACzC,WAAO,KAAKV,SAAL,GAAiB9C,MAAM,CAACC,aAAxB,GAAP;AACD;;AA7IuD;AAsJ1D,OAAO,MAAMwB,qBAAN,CAA4B;AASZ,eAARF,QAAQ,CAACC,QAAD,EAAmD;AACtE,UAAMoC,QAAQ,GAAG,MAAMjE,IAAI,CAACkE,KAAL,CAAWrC,QAAX,CAAvB;AACA,UAAMsC,cAAc,GAAG,MAAMnE,IAAI,CAACoE,KAAL,CAAWvC,QAAX,CAA7B;AAEA,UAAMU,MAAM,GAAGvC,IAAI,CAACqE,KAAL,CAAWC,IAAX,CAAgBC,SAAhB,EAA2BJ,cAA3B,CAAf;AACA,UAAMxB,OAAO,GAAG3C,IAAI,CAACwE,MAAL,CAAYF,IAAZ,CAAiBC,SAAjB,EAA4BJ,cAA5B,CAAhB;AAEA,WAAO,IAAIrC,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CsB,QAAQ,CAACrB,IAApD,CAAP;AACD;;AAEsB,eAAVT,UAAU,CAACC,MAAD,EAAiD;AACtE,UAAMG,MAAM,GAAG,CAACkC,QAAD,EAAmBzD,MAAnB,KACb0D,OAAO,CAACC,OAAR,CAAgBvC,MAAM,CAACwC,KAAP,CAAaH,QAAb,EAAuBA,QAAQ,GAAGzD,MAAlC,CAAhB,CADF;;AAEA,UAAM2B,OAAO,GAAG,MAAM+B,OAAO,CAACC,OAAR,EAAtB;;AACA,WAAO,IAAI7C,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CP,MAAM,CAACpB,MAAlD,CAAP;AACD;;AAEDR,EAAAA,WAAW,CACTqE,IADS,EAET3C,KAFS,EAGT4C,QAHS,EAIT;AAAA;;AAAA;;AAAA;;AACA,SAAKD,IAAL,GAAYA,IAAZ;AACA,SAAK3C,KAAL,GAAaA,KAAb;AACA,SAAK4C,QAAL,GAAgBA,QAAhB;AACD;;AAEe,QAAV/C,UAAU,GAAkB;AAChC,UAAMgD,GAAG,GAAG,MAAM,KAAKF,IAAL,CAAU,CAAV,EAAa5E,aAAa,CAACe,MAA3B,CAAlB;;AAEA,QAAI+D,GAAG,CAACC,QAAJ,OAAmB/E,aAAvB,EAAsC;AACpC,YAAM,IAAI8C,KAAJ,CAAU,wBAAV,CAAN;AACD;AACF;;AAEiB,QAAZ5B,YAAY,CAChBR,MADgB,EAEhBE,QAFgB,EAGhBD,UAHgB,EAIQ;AACxB,UAAMwB,MAAqB,GAAG;AAC5B6C,MAAAA,QAAQ,EAAExB,MAAM,CAAC5C,QAAQ,CAAC6C,QAAV,CADY;AAE5BwB,MAAAA,UAAU,EAAE;AAFgB,KAA9B;;AAIA,SAAK,MAAMC,QAAX,IAAuBtE,QAAQ,CAACuE,OAAhC,EAAyC;AACvC,YAAMC,WAAW,GAAGF,QAAQ,CAACG,SAA7B;AACA,YAAMC,MAAM,GAAGF,WAAH,aAAGA,WAAH,uBAAGA,WAAW,CAAEG,cAA5B;;AACA,UAAI5E,UAAU,CAACI,MAAX,GAAoB,CAApB,IAAyBhB,IAAI,CAACyF,YAAL,CAAkB7E,UAAlB,EAA8B2E,MAA9B,IAAyC,CAAtE,EAAyE;AACvE;AACD;;AACDnD,MAAAA,MAAM,CAAC8C,UAAP,CAAkBK,MAAM,CAAEG,IAAR,EAAlB,IAAoC,MAAM,KAAKC,eAAL,CAAqBhF,MAArB,EAA6BwE,QAA7B,CAA1C;AACD;;AACD,WAAO/C,MAAP;AACD;;AAEoB,QAAfuD,eAAe,CAAChF,MAAD,EAAwBwE,QAAxB,EAAqE;AAAA;;AACxF,QAAIA,QAAQ,CAACS,SAAT,KAAuBrB,SAAvB,IAAoCY,QAAQ,CAACS,SAAT,KAAuB,IAA/D,EAAqE;AACnE,YAAM,IAAI7C,KAAJ,CAAU,uCAAV,CAAN;AACD;;AAED,UAAM8C,KAAK,GAAGlF,MAAM,CAACmF,SAAP,wBAAiBX,QAAQ,CAACG,SAA1B,wDAAiB,oBAAoBE,cAArC,CAAd;AACA,UAAMO,IAAmB,GAAG/F,IAAI,CAACgG,aAAL,CAAmBjG,IAAnB,0BAAyBoF,QAAQ,CAACG,SAAlC,yDAAyB,qBAAoBS,IAA7C,CAA5B;AACA,QAAIA,IAAI,KAAKF,KAAK,CAACI,aAAnB,EAAkC,MAAM,IAAIlD,KAAJ,CAAW,mCAAkCgD,IAAK,EAAlD,CAAN;AAElC,UAAMG,WAA+B,GAAGlG,IAAI,CAACgG,aAAL,CACtCtG,gBADsC,0BAEtCyF,QAAQ,CAACG,SAF6B,yDAEtC,qBAAoBa,KAFkB,CAAxC;AAKA,UAAMC,WAAW,GAAG3C,MAAM,yBAAC0B,QAAQ,CAACG,SAAV,yDAAC,qBAAoBe,gBAArB,CAA1B;AACA,UAAMC,SAAS,GAAG7C,MAAM,yBAAC0B,QAAQ,CAACG,SAAV,yDAAC,qBAAoBiB,qBAArB,CAAxB;AACA,UAAMC,QAAQ,GAAG,MAAM,KAAK3B,IAAL,CAAUuB,WAAV,EAAuBE,SAAvB,CAAvB;AAEA,WAAOG,eAAe,CAACD,QAAD,EAAWX,KAAX,EAAkBK,WAAlB,CAAtB;AACD;;AAEe,QAAVlE,UAAU,GAA0B;AACxC,UAAM0E,UAAU,GAAGzG,aAAa,CAACe,MAAd,GAAuB,CAA1C;AACA,UAAM2F,UAAU,GAAG,MAAM,KAAK9B,IAAL,CAAU,KAAKC,QAAL,GAAgB4B,UAA1B,EAAsCA,UAAtC,CAAzB;;AAEA,QAAIC,UAAU,CAAC/B,KAAX,CAAiB,CAAjB,EAAoBI,QAApB,OAAmC/E,aAAvC,EAAsD;AACpD,YAAM,IAAI8C,KAAJ,CAAU,0BAAV,CAAN;AACD;;AAED,UAAM6D,YAAY,GAAGD,UAAU,CAACE,YAAX,CAAwB,CAAxB,CAArB;AACA,UAAMC,cAAc,GAAG,KAAKhC,QAAL,GAAgB8B,YAAhB,GAA+BF,UAAtD;;AACA,QAAII,cAAc,GAAG7G,aAAa,CAACe,MAAnC,EAA2C;AACzC,YAAM,IAAI+B,KAAJ,CAAU,uBAAV,CAAN;AACD;;AAED,UAAMgE,WAAW,GAAG,MAAM,KAAKlC,IAAL,CAAUiC,cAAV,EAA0BF,YAA1B,CAA1B;AAGA,UAAM;AAACnG,MAAAA;AAAD,QAAaT,IAAI,CAACgH,kBAAL,CAAwBD,WAAxB,CAAnB;AACA,WAAOtG,QAAP;AACD;;AAxGgC;;AA8GnC,SAASwG,YAAT,CACElB,IADF,EAEEmB,QAFF,EAGEC,MAHF,EAIEC,KAJF,EAKEC,IALF,EAMS;AACP,MAAI,EAAEH,QAAQ,IAAI5H,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIyD,KAAJ,CAAW,qBAAoBmE,QAAS,EAAxC,CAAN;AACD;;AACD,SAAO5H,cAAc,CAAC4H,QAAD,CAAd,CAAyBD,YAAzB,CAAsClB,IAAtC,EAA4CoB,MAA5C,EAAoDC,KAApD,EAA2DC,IAA3D,CAAP;AACD;;AAED,SAASZ,eAAT,CACErE,MADF,EAEEkF,MAFF,EAGEpB,WAHF,EAIe;AACb,QAAMiB,MAAoB,GAAG;AAC3B/E,IAAAA,MAD2B;AAE3BmF,IAAAA,MAAM,EAAE,CAFmB;AAG3B3E,IAAAA,IAAI,EAAER,MAAM,CAACpB;AAHc,GAA7B;AAMA,QAAMwG,IAAiB,GAAG;AACxBC,IAAAA,OAAO,EAAE,EADe;AAExBC,IAAAA,OAAO,EAAE,EAFe;AAGxBC,IAAAA,MAAM,EAAE,EAHgB;AAIxBP,IAAAA,KAAK,EAAE;AAJiB,GAA1B;;AAQA,SAAOD,MAAM,CAACI,MAAP,GAAgBJ,MAAM,CAACvE,IAA9B,EAAoC;AAIlC,UAAM;AAACgF,MAAAA,UAAD;AAAa5G,MAAAA;AAAb,QAAuBhB,IAAI,CAAC6H,gBAAL,CAAsBV,MAAM,CAAC/E,MAA7B,CAA7B;AACA+E,IAAAA,MAAM,CAACI,MAAP,IAAiBvG,MAAjB;AAEA,UAAM8G,QAAQ,GAAG9H,IAAI,CAACgG,aAAL,CAAmBlG,QAAnB,EAA6B8H,UAAU,CAAC7B,IAAxC,CAAjB;AAEA,QAAIgC,QAA4B,GAAG,IAAnC;;AACA,YAAQD,QAAR;AACE,WAAK,WAAL;AACEC,QAAAA,QAAQ,GAAGC,cAAc,CAACb,MAAD,EAASS,UAAT,EAAqBN,MAArB,EAA6BpB,WAA7B,CAAzB;AACA;;AACF,WAAK,cAAL;AACE6B,QAAAA,QAAQ,GAAGE,gBAAgB,CAACd,MAAD,EAASS,UAAT,EAAqBN,MAArB,EAA6BpB,WAA7B,CAA3B;AACA;;AACF;AACE,cAAM,IAAInD,KAAJ,CAAW,sBAAqB+E,QAAS,EAAzC,CAAN;AARJ;;AAWAxE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACC,OAAhC,EAAyCM,QAAQ,CAACN,OAAlD;AACAnE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACE,OAAhC,EAAyCK,QAAQ,CAACL,OAAlD;AACApE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACG,MAAhC,EAAwCI,QAAQ,CAACJ,MAAjD;AACAH,IAAAA,IAAI,CAACJ,KAAL,IAAcW,QAAQ,CAACX,KAAvB;AACD;;AAED,SAAOI,IAAP;AACD;;AAED,SAASQ,cAAT,CACEb,MADF,EAEEkB,MAFF,EAGEf,MAHF,EAIEpB,WAJF,EAKe;AAAA;;AACb,QAAMoC,SAAS,GAAGnB,MAAM,CAACI,MAAP,GAAgBc,MAAM,CAACE,oBAAzC;AACA,QAAMC,UAAU,4BAAGH,MAAM,CAACI,gBAAV,0DAAG,sBAAyBC,UAA5C;AAmBA,MAAIC,UAAU,GAAGxB,MAAjB;;AACA,MAAIjB,WAAW,KAAK,cAApB,EAAoC;AAClC,UAAM0C,SAAS,GAAGrJ,WAAW,CAACsJ,OAAZ,CAChB3C,WADgB,EAEhBiB,MAAM,CAAC/E,MAAP,CAAcwC,KAAd,CAAoBuC,MAAM,CAACI,MAA3B,EAAmCe,SAAnC,CAFgB,EAGhBD,MAAM,CAACS,sBAHS,CAAlB;AAKAH,IAAAA,UAAU,GAAG;AACXvG,MAAAA,MAAM,EAAEwG,SADG;AAEXrB,MAAAA,MAAM,EAAE,CAFG;AAGX3E,MAAAA,IAAI,EAAEgG,SAAS,CAAC5H;AAHL,KAAb;AAKAmG,IAAAA,MAAM,CAACI,MAAP,GAAgBe,SAAhB;AACD;;AAGD,QAAMS,cAAc,GAAG/I,IAAI,CAACgG,aAAL,CACrBpG,QADqB,4BAErByI,MAAM,CAACI,gBAFc,2DAErB,uBAAyBO,yBAFJ,CAAvB;AAKA,MAAIC,OAAO,GAAG,IAAI3F,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAAC4B,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGhC,YAAY,CAAC9G,kBAAD,EAAqB4I,cAArB,EAAqCJ,UAArC,EAAiDH,UAAjD,EAA8D;AAClFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAAC4B,SAAxB,CADwE;AAElFG,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLJ,IAAAA,OAAO,CAACK,IAAR,CAAa,CAAb;AACD;;AAGD,QAAMC,cAAc,GAAGvJ,IAAI,CAACgG,aAAL,CACrBpG,QADqB,4BAErByI,MAAM,CAACI,gBAFc,2DAErB,uBAAyBe,yBAFJ,CAAvB;AAKA,MAAIC,OAAO,GAAG,IAAInG,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAACoC,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGxC,YAAY,CAAC9G,kBAAD,EAAqBoJ,cAArB,EAAqCZ,UAArC,EAAiDH,UAAjD,EAA8D;AAClFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAACoC,SAAxB,CADwE;AAElFL,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACD,MAAIK,iBAAiB,GAAG,CAAxB;;AACA,OAAK,MAAMC,IAAX,IAAmBH,OAAnB,EAA4B;AAC1B,QAAIG,IAAI,KAAKtC,MAAM,CAACoC,SAApB,EAA+B;AAC7BC,MAAAA,iBAAiB;AAClB;AACF;;AAGD,QAAME,aAAa,GAAG7J,IAAI,CAACgG,aAAL,CACpBpG,QADoB,4BAEpByI,MAAM,CAACI,gBAFa,2DAEpB,uBAAyBvB,QAFL,CAAtB;AAIA,QAAMS,MAAM,GAAGV,YAAY,CAACK,MAAM,CAACrB,aAAR,EAAwB4D,aAAxB,EAAuClB,UAAvC,EAAmDgB,iBAAnD,EAAsE;AAC/FG,IAAAA,UAAU,EAAExC,MAAM,CAACwC,UAD4E;AAE/FX,IAAAA,QAAQ,EAAE7B,MAAM,CAACwC;AAF8E,GAAtE,CAA3B;AASA,SAAO;AACLpC,IAAAA,OAAO,EAAE+B,OADJ;AAELhC,IAAAA,OAAO,EAAEwB,OAFJ;AAGLtB,IAAAA,MAHK;AAILP,IAAAA,KAAK,EAAEoB;AAJF,GAAP;AAMD;;AAED,SAASP,gBAAT,CACEd,MADF,EAEEkB,MAFF,EAGEf,MAHF,EAIEpB,WAJF,EAKe;AAAA;;AACb,QAAMoC,SAAS,GAAGnB,MAAM,CAACI,MAAP,GAAgBc,MAAM,CAACE,oBAAzC;AAEA,QAAMC,UAAU,6BAAGH,MAAM,CAAC0B,mBAAV,2DAAG,uBAA4BrB,UAA/C;AAEA,QAAMiB,iBAAiB,GAAGnB,UAAU,8BAAGH,MAAM,CAAC0B,mBAAV,2DAAG,uBAA4BC,SAA/B,CAApC;AACA,QAAMH,aAAa,GAAG7J,IAAI,CAACgG,aAAL,CACpBpG,QADoB,4BAEpByI,MAAM,CAAC0B,mBAFa,2DAEpB,uBAA4B7C,QAFR,CAAtB;AAOA,MAAI+B,OAAO,GAAG,IAAI3F,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAAC4B,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGhC,YAAY,CAAC9G,kBAAD,EAAqBC,sBAArB,EAA6C+G,MAA7C,EAAqDqB,UAArD,EAAkE;AACtFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAAC4B,SAAxB,CAD4E;AAEtFG,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLJ,IAAAA,OAAO,CAACK,IAAR,CAAa,CAAb;AACD;;AAID,MAAIG,OAAO,GAAG,IAAInG,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAACoC,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGxC,YAAY,CAAC9G,kBAAD,EAAqBC,sBAArB,EAA6C+G,MAA7C,EAAqDqB,UAArD,EAAkE;AACtFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAACoC,SAAxB,CAD4E;AAEtFL,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGD,MAAIW,eAAe,GAAG9C,MAAtB;;AAEA,gCAAIkB,MAAM,CAAC0B,mBAAX,mDAAI,uBAA4BG,aAAhC,EAA+C;AAC7C,UAAMtB,SAAS,GAAGrJ,WAAW,CAACsJ,OAAZ,CAChB3C,WADgB,EAEhBiB,MAAM,CAAC/E,MAAP,CAAcwC,KAAd,CAAoBuC,MAAM,CAACI,MAA3B,EAAmCe,SAAnC,CAFgB,EAGhBD,MAAM,CAACS,sBAHS,CAAlB;AAMAmB,IAAAA,eAAe,GAAG;AAChB7H,MAAAA,MAAM,EAAEwG,SADQ;AAEhBrB,MAAAA,MAAM,EAAE,CAFQ;AAGhB3E,MAAAA,IAAI,EAAEgG,SAAS,CAAC5H;AAHA,KAAlB;AAMAmG,IAAAA,MAAM,CAACI,MAAP,GAAgBe,SAAhB;AACD;;AAED,QAAMX,MAAM,GAAGV,YAAY,CACzBK,MAAM,CAACrB,aADkB,EAEzB4D,aAFyB,EAGzBI,eAHyB,EAIzBN,iBAJyB,EAKzB;AACEG,IAAAA,UAAU,EAAExC,MAAM,CAACwC,UADrB;AAEEX,IAAAA,QAAQ,EAAE7B,MAAM,CAACwC;AAFnB,GALyB,CAA3B;AAWA,SAAO;AACLpC,IAAAA,OAAO,EAAE+B,OADJ;AAELhC,IAAAA,OAAO,EAAEwB,OAFJ;AAGLtB,IAAAA,MAHK;AAILP,IAAAA,KAAK,EAAEoB;AAJF,GAAP;AAMD;;AAED,SAASvF,YAAT,CACEkH,cADF,EAEE5C,MAFF,EAGE6C,GAHF,EAQE;AACA,QAAMzJ,MAAwB,GAAG,EAAjC;AACA,MAAII,IAAI,GAAGwG,MAAX;;AACA,OAAK,IAAI8C,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGD,GAApB,EAAyBC,CAAC,EAA1B,EAA8B;AAC5B,UAAMC,aAAa,GAAGH,cAAc,CAACpJ,IAAD,CAApC;AAEA,UAAMwJ,cAAc,GAClBxJ,IAAI,GAAG,CAAP,GAAWf,IAAI,CAACgG,aAAL,CAAmBnG,mBAAnB,EAAwCyK,aAAa,CAACE,eAAtD,CAAX,GAAqF,MADvF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQH,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEE,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAI3H,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIuH,aAAa,CAACpH,YAAd,GAA8B,CAAlC,EAAqC;AACnC,YAAMyH,GAAG,GAAG1H,YAAY,CAACkH,cAAD,EAAiBpJ,IAAI,GAAG,CAAxB,EAA2BuJ,aAAa,CAACpH,YAAzC,CAAxB;AACAnC,MAAAA,IAAI,GAAG4J,GAAG,CAAC5J,IAAX;AACAJ,MAAAA,MAAM,CAAC2J,aAAa,CAACM,IAAf,CAAN,GAA6B;AAE3BH,QAAAA,QAF2B;AAG3BC,QAAAA,QAH2B;AAI3BG,QAAAA,MAAM,EAAEF,GAAG,CAAChK;AAJe,OAA7B;AAMD,KATD,MASO;AACL,UAAImK,WAAW,GAAG9K,IAAI,CAACgG,aAAL,CAAmBjG,IAAnB,EAAyBuK,aAAa,CAACvE,IAAvC,CAAlB;;AAEA,UAAIuE,aAAa,CAACS,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG9K,IAAI,CAACgG,aAAL,CAAmBrG,aAAnB,EAAkC2K,aAAa,CAACS,cAAhD,CAAd;AACD;;AAEDpK,MAAAA,MAAM,CAAC2J,aAAa,CAACM,IAAf,CAAN,GAA6B;AAC3B7E,QAAAA,IAAI,EAAE+E,WADqB;AAE3BhB,QAAAA,UAAU,EAAEQ,aAAa,CAACU,WAFC;AAG3BP,QAAAA,QAH2B;AAI3BC,QAAAA;AAJ2B,OAA7B;AAMA3J,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACJ,IAAAA,MAAD;AAAS4G,IAAAA,MAAT;AAAiBxG,IAAAA;AAAjB,GAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from './codecs';\nimport * as Compression from './compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetCompression,\n ParquetData,\n ParquetField,\n ParquetRecord,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from './schema/declare';\nimport {ParquetSchema} from './schema/schema';\nimport * as Shred from './schema/shred';\n// tslint:disable-next-line:max-line-length\nimport {\n ColumnChunk,\n CompressionCodec,\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from './parquet-thrift';\nimport * as Util from './util';\n// import Fs = require('fs');\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Internal type used for repetition/definition levels\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\n/**\n * A parquet cursor is used to retrieve rows from a parquet file in order\n */\nexport class ParquetCursor<T> implements AsyncIterable<T> {\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n public columnList: string[][];\n public rowGroup: ParquetRecord[];\n public rowGroupIndex: number;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is usually not recommended to call this constructor directly except for\n * advanced and internal use cases. Consider using getCursor() on the\n * ParquetReader instead\n */\n constructor(\n metadata: FileMetaData,\n envelopeReader: ParquetEnvelopeReader,\n schema: ParquetSchema,\n columnList: string[][]\n ) {\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n this.schema = schema;\n this.columnList = columnList;\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Retrieve the next row from the cursor. Returns a row or NULL if the end\n * of the file was reached\n */\n async next<T = any>(): Promise<T> {\n if (this.rowGroup.length === 0) {\n if (this.rowGroupIndex >= this.metadata.row_groups.length) {\n // @ts-ignore\n return null;\n }\n const rowBuffer = await this.envelopeReader.readRowGroup(\n this.schema,\n this.metadata.row_groups[this.rowGroupIndex],\n this.columnList\n );\n this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);\n this.rowGroupIndex++;\n }\n return this.rowGroup.shift() as any;\n }\n\n /**\n * Rewind the cursor the the beginning of the file\n */\n rewind(): void {\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n let done = false;\n return {\n next: async () => {\n if (done) {\n return {done, value: null};\n }\n const value = await this.next();\n if (value === null) {\n return {done: true, value};\n }\n return {done: false, value};\n },\n return: async () => {\n done = true;\n return {done, value: null};\n },\n throw: async () => {\n done = true;\n return {done: true, value: null};\n }\n };\n }\n}\n\n/**\n * A parquet reader allows retrieving the rows from a parquet file in order.\n * The basic usage is to create a reader and then retrieve a cursor/iterator\n * which allows you to consume row after row until all rows have been read. It is\n * important that you call close() after you are finished reading the file to\n * avoid leaking file descriptors.\n */\nexport class ParquetReader<T> implements AsyncIterable<T> {\n /**\n * Open the parquet file pointed to by the specified path and return a new\n * parquet reader\n */\n static async openFile<T>(filePath: string): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);\n const closeFn = async () => {};\n const size = arrayBuffer.byteLength;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is not recommended to call this constructor directly except for advanced\n * and internal use cases. Consider using one of the open{File,Buffer} methods\n * instead\n */\n constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {\n if (metadata.version !== PARQUET_VERSION) {\n throw new Error('invalid parquet version');\n }\n\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n const root = this.metadata.schema[0];\n const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);\n this.schema = new ParquetSchema(schema);\n }\n\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n async close(): Promise<void> {\n await this.envelopeReader.close();\n // this.envelopeReader = null;\n // this.metadata = null;\n }\n\n /**\n * Return a cursor to the file. You may open more than one cursor and use\n * them concurrently. All cursors become invalid once close() is called on\n * the reader object.\n *\n * The required_columns parameter controls which columns are actually read\n * from disk. An empty array or no value implies all columns. A list of column\n * names means that only those columns should be loaded from disk.\n */\n getCursor(): ParquetCursor<T>;\n // @ts-ignore\n getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;\n getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;\n getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {\n if (!columnList) {\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = [];\n }\n\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));\n\n return new ParquetCursor<T>(\n this.metadata,\n this.envelopeReader,\n this.schema,\n columnList as string[][]\n );\n }\n\n /**\n * Return the number of rows in this file. Note that the number of rows is\n * not neccessarily equal to the number of rows in each column.\n */\n getRowCount(): number {\n return Number(this.metadata.num_rows);\n }\n\n /**\n * Returns the ParquetSchema for this file\n */\n getSchema(): ParquetSchema {\n return this.schema;\n }\n\n /**\n * Returns the user (key/value) metadata for this file\n */\n getMetadata(): Record<string, string> {\n const md: Record<string, string> = {};\n for (const kv of this.metadata.key_value_metadata!) {\n md[kv.key] = kv.value!;\n }\n return md;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.getCursor()[Symbol.asyncIterator]();\n }\n}\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n\n static async openFile(filePath: string): Promise<ParquetEnvelopeReader> {\n const fileStat = await Util.fstat(filePath);\n const fileDescriptor = await Util.fopen(filePath);\n\n const readFn = Util.fread.bind(undefined, fileDescriptor);\n const closeFn = Util.fclose.bind(undefined, fileDescriptor);\n\n return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);\n }\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n }\n\n async readHeader(): Promise<void> {\n const buf = await this.read(0, PARQUET_MAGIC.length);\n\n if (buf.toString() !== PARQUET_MAGIC) {\n throw new Error('not valid parquet file');\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && Util.fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = Util.getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n if (type !== field.primitiveType) throw new Error(`chunk type not matching schema: ${type}`);\n\n const compression: ParquetCompression = Util.getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n const pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n\n return decodeDataPages(pagesBuf, field, compression);\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n if (trailerBuf.slice(4).toString() !== PARQUET_MAGIC) {\n throw new Error('not a valid parquet file');\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error('invalid metadata size');\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = Util.decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\nfunction decodeDataPages(\n buffer: Buffer,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n count: 0\n };\n\n // @ts-ignore size can be undefined\n while (cursor.offset < cursor.size) {\n // const pageHeader = new parquet_thrift.PageHeader();\n // cursor.offset += parquet_util.decodeThrift(pageHeader, cursor.buffer);\n\n const {pageHeader, length} = Util.decodePageHeader(cursor.buffer);\n cursor.offset += length;\n\n const pageType = Util.getThriftEnum(PageType, pageHeader.type);\n\n let pageData: ParquetData | null = null;\n switch (pageType) {\n case 'DATA_PAGE':\n pageData = decodeDataPage(cursor, pageHeader, column, compression);\n break;\n case 'DATA_PAGE_V2':\n pageData = decodeDataPageV2(cursor, pageHeader, column, compression);\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n Array.prototype.push.apply(data.rlevels, pageData.rlevels);\n Array.prototype.push.apply(data.dlevels, pageData.dlevels);\n Array.prototype.push.apply(data.values, pageData.values);\n data.count += pageData.count;\n }\n\n return data;\n}\n\nfunction decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n // const info = {\n // path: opts.column.path.join('.'),\n // valueEncoding,\n // dLevelEncoding,\n // rLevelEncoding,\n // cursorOffset: cursor.offset,\n // cursorEnd,\n // cusrorSize: cursor.size,\n // header,\n // opts,\n // buffer: cursor.buffer.toJSON(),\n // values: null as any[],\n // valBuf: null as any\n // };\n // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));\n\n /* uncompress page */\n let dataCursor = cursor;\n if (compression !== 'UNCOMPRESSED') {\n const valuesBuf = Compression.inflate(\n compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.encoding!\n ) as ParquetCodec;\n const values = decodeValues(column.primitiveType!, valueEncoding, dataCursor, valueCountNonNull, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // info.valBuf = uncursor.buffer.toJSON();\n // info.values = values;\n // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!\n };\n}\n\nfunction decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = Compression.inflate(\n compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const values = decodeValues(\n column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n }\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!\n };\n}\n\nfunction decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? Util.getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n let logicalType = Util.getThriftEnum(Type, schemaElement.type!);\n\n if (schemaElement.converted_type) {\n logicalType = Util.getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n"],"file":"reader.js"}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../src/parquetjs/util.ts"],"names":["fs","TBufferedTransport","TCompactProtocol","TFramedTransport","FileMetaData","PageHeader","UFramedTransport","serializeThrift","obj","output","transport","undefined","buf","push","protocol","write","flush","Buffer","concat","decodeThrift","offset","readPos","read","decodeFileMetadata","metadata","length","decodePageHeader","pageHeader","getBitWidth","val","Math","ceil","log2","getThriftEnum","klass","value","k","Error","fopen","filePath","Promise","resolve","reject","open","err","fd","fstat","stat","fread","position","buffer","alloc","bytesRead","fclose","close","oswrite","os","osclose","osopen","path","opts","outputStream","createWriteStream","once","fieldIndexOf","arr","elem","j","m","i","load","name","module","global","require"],"mappings":";AACA,OAAOA,EAAP,MAAe,IAAf;AACA,SAAQC,kBAAR,EAA4BC,gBAA5B,EAA8CC,gBAA9C,QAAqE,QAArE;AACA,SAAQC,YAAR,EAAsBC,UAAtB,QAAuC,kBAAvC;;AAYA,MAAMC,gBAAN,SAA+BH,gBAA/B,CAAgD;AAAA;AAAA;;AAAA,qCACrB,CADqB;AAAA;;AAAA;;AAOhD,OAAO,SAASI,eAAT,CAAyBC,GAAzB,EAA2C;AAChD,QAAMC,MAAgB,GAAG,EAAzB;AAEA,QAAMC,SAAS,GAAG,IAAIT,kBAAJ,CAAuBU,SAAvB,EAAmCC,GAAD,IAAS;AAC3DH,IAAAA,MAAM,CAACI,IAAP,CAAYD,GAAZ;AACD,GAFiB,CAAlB;AAIA,QAAME,QAAQ,GAAG,IAAIZ,gBAAJ,CAAqBQ,SAArB,CAAjB;AACAF,EAAAA,GAAG,CAACO,KAAJ,CAAUD,QAAV;AACAJ,EAAAA,SAAS,CAACM,KAAV;AAEA,SAAOC,MAAM,CAACC,MAAP,CAAcT,MAAd,CAAP;AACD;AAED,OAAO,SAASU,YAAT,CAAsBX,GAAtB,EAAgCI,GAAhC,EAA6CQ,MAA7C,EAA8D;AACnE,MAAI,CAACA,MAAL,EAAa;AAEXA,IAAAA,MAAM,GAAG,CAAT;AACD;;AAED,QAAMV,SAAS,GAAG,IAAIJ,gBAAJ,CAAqBM,GAArB,CAAlB;AACAF,EAAAA,SAAS,CAACW,OAAV,GAAoBD,MAApB;AACA,QAAMN,QAAQ,GAAG,IAAIZ,gBAAJ,CAAqBQ,SAArB,CAAjB;AACAF,EAAAA,GAAG,CAACc,IAAJ,CAASR,QAAT;AACA,SAAOJ,SAAS,CAACW,OAAV,GAAoBD,MAA3B;AACD;AAED,OAAO,SAASG,kBAAT,CAA4BX,GAA5B,EAAyCQ,MAAzC,EAA0D;AAC/D,MAAI,CAACA,MAAL,EAAa;AAEXA,IAAAA,MAAM,GAAG,CAAT;AACD;;AAED,QAAMV,SAAS,GAAG,IAAIJ,gBAAJ,CAAqBM,GAArB,CAAlB;AACAF,EAAAA,SAAS,CAACW,OAAV,GAAoBD,MAApB;AACA,QAAMN,QAAQ,GAAG,IAAIZ,gBAAJ,CAAqBQ,SAArB,CAAjB;AACA,QAAMc,QAAQ,GAAGpB,YAAY,CAACkB,IAAb,CAAkBR,QAAlB,CAAjB;AACA,SAAO;AAACW,IAAAA,MAAM,EAAEf,SAAS,CAACW,OAAV,GAAoBD,MAA7B;AAAqCI,IAAAA;AAArC,GAAP;AACD;AAED,OAAO,SAASE,gBAAT,CAA0Bd,GAA1B,EAAuCQ,MAAvC,EAAwD;AAC7D,MAAI,CAACA,MAAL,EAAa;AAEXA,IAAAA,MAAM,GAAG,CAAT;AACD;;AAED,QAAMV,SAAS,GAAG,IAAIJ,gBAAJ,CAAqBM,GAArB,CAAlB;AACAF,EAAAA,SAAS,CAACW,OAAV,GAAoBD,MAApB;AACA,QAAMN,QAAQ,GAAG,IAAIZ,gBAAJ,CAAqBQ,SAArB,CAAjB;AACA,QAAMiB,UAAU,GAAGtB,UAAU,CAACiB,IAAX,CAAgBR,QAAhB,CAAnB;AACA,SAAO;AAACW,IAAAA,MAAM,EAAEf,SAAS,CAACW,OAAV,GAAoBD,MAA7B;AAAqCO,IAAAA;AAArC,GAAP;AACD;AAKD,OAAO,SAASC,WAAT,CAAqBC,GAArB,EAA0C;AAC/C,MAAIA,GAAG,KAAK,CAAZ,EAAe;AACb,WAAO,CAAP;AAED;;AACD,SAAOC,IAAI,CAACC,IAAL,CAAUD,IAAI,CAACE,IAAL,CAAUH,GAAG,GAAG,CAAhB,CAAV,CAAP;AACD;AAKD,OAAO,SAASI,aAAT,CAAuBC,KAAvB,EAAmCC,KAAnC,EAAmE;AACxE,OAAK,MAAMC,CAAX,IAAgBF,KAAhB,EAAuB;AACrB,QAAIA,KAAK,CAACE,CAAD,CAAL,KAAaD,KAAjB,EAAwB;AACtB,aAAOC,CAAP;AACD;AACF;;AACD,QAAM,IAAIC,KAAJ,CAAU,oBAAV,CAAN;AACD;AAED,OAAO,SAASC,KAAT,CAAeC,QAAf,EAAkD;AACvD,SAAO,IAAIC,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACtC1C,IAAAA,EAAE,CAAC2C,IAAH,CAAQJ,QAAR,EAAkB,GAAlB,EAAuB,CAACK,GAAD,EAAMC,EAAN,KAAa;AAClC,UAAID,GAAJ,EAAS;AACPF,QAAAA,MAAM,CAACE,GAAD,CAAN;AACD,OAFD,MAEO;AACLH,QAAAA,OAAO,CAACI,EAAD,CAAP;AACD;AACF,KAND;AAOD,GARM,CAAP;AASD;AAED,OAAO,SAASC,KAAT,CAAeP,QAAf,EAAoD;AACzD,SAAO,IAAIC,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACtC1C,IAAAA,EAAE,CAAC+C,IAAH,CAAQR,QAAR,EAAkB,CAACK,GAAD,EAAMG,IAAN,KAAe;AAC/B,UAAIH,GAAJ,EAAS;AACPF,QAAAA,MAAM,CAACE,GAAD,CAAN;AACD,OAFD,MAEO;AACLH,QAAAA,OAAO,CAACM,IAAD,CAAP;AACD;AACF,KAND;AAOD,GARM,CAAP;AASD;AAED,OAAO,SAASC,KAAT,CAAeH,EAAf,EAA2BI,QAA3B,EAA6CxB,MAA7C,EAA8E;AACnF,QAAMyB,MAAM,GAAGjC,MAAM,CAACkC,KAAP,CAAa1B,MAAb,CAAf;AACA,SAAO,IAAIe,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACtC1C,IAAAA,EAAE,CAACsB,IAAH,CAAQuB,EAAR,EAAYK,MAAZ,EAAoB,CAApB,EAAuBzB,MAAvB,EAA+BwB,QAA/B,EAAyC,CAACL,GAAD,EAAMQ,SAAN,EAAiBxC,GAAjB,KAAyB;AAChE,UAAIgC,GAAG,IAAIQ,SAAS,KAAK3B,MAAzB,EAAiC;AAC/BiB,QAAAA,MAAM,CAACE,GAAG,IAAIP,KAAK,CAAC,aAAD,CAAb,CAAN;AACD,OAFD,MAEO;AACLI,QAAAA,OAAO,CAAC7B,GAAD,CAAP;AACD;AACF,KAND;AAOD,GARM,CAAP;AASD;AAED,OAAO,SAASyC,MAAT,CAAgBR,EAAhB,EAA2C;AAChD,SAAO,IAAIL,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACtC1C,IAAAA,EAAE,CAACsD,KAAH,CAAST,EAAT,EAAcD,GAAD,IAAS;AACpB,UAAIA,GAAJ,EAAS;AACPF,QAAAA,MAAM,CAACE,GAAD,CAAN;AACD,OAFD,MAEO;AACLH,QAAAA,OAAO;AACR;AACF,KAND;AAOD,GARM,CAAP;AASD;AAED,OAAO,SAASc,OAAT,CAAiBC,EAAjB,EAA+B5C,GAA/B,EAA2D;AAChE,SAAO,IAAI4B,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACtCc,IAAAA,EAAE,CAACzC,KAAH,CAASH,GAAT,EAAegC,GAAD,IAAS;AACrB,UAAIA,GAAJ,EAAS;AACPF,QAAAA,MAAM,CAACE,GAAD,CAAN;AACD,OAFD,MAEO;AACLH,QAAAA,OAAO;AACR;AACF,KAND;AAOD,GARM,CAAP;AASD;AAED,OAAO,SAASgB,OAAT,CAAiBD,EAAjB,EAA8C;AACnD,SAAO,IAAIhB,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACrCc,IAAAA,EAAD,CAAYF,KAAZ,CAAmBV,GAAD,IAAc;AAC9B,UAAIA,GAAJ,EAAS;AACPF,QAAAA,MAAM,CAACE,GAAD,CAAN;AACD,OAFD,MAEO;AACLH,QAAAA,OAAO;AACR;AACF,KAND;AAOD,GARM,CAAP;AASD;AAED,OAAO,SAASiB,MAAT,CAAgBC,IAAhB,EAA8BC,IAA9B,EAAkF;AACvF,SAAO,IAAIpB,OAAJ,CAAY,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACtC,UAAMmB,YAAY,GAAG7D,EAAE,CAAC8D,iBAAH,CAAqBH,IAArB,EAA2BC,IAA3B,CAArB;AACAC,IAAAA,YAAY,CAACE,IAAb,CAAkB,MAAlB,EAA2BlB,EAAD,IAAQJ,OAAO,CAACoB,YAAD,CAAzC;AACAA,IAAAA,YAAY,CAACE,IAAb,CAAkB,OAAlB,EAA4BnB,GAAD,IAASF,MAAM,CAACE,GAAD,CAA1C;AACD,GAJM,CAAP;AAKD;AAKD,OAAO,SAASoB,YAAT,CAAsBC,GAAtB,EAAuCC,IAAvC,EAA+D;AACpE,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGF,GAAG,CAACxC,MAAxB,EAAgC0C,CAAC,EAAjC,EAAqC;AACnC,QAAIF,GAAG,CAACE,CAAD,CAAH,CAAO1C,MAAP,GAAgByC,IAAI,CAACzC,MAAzB,EAAiC;AAC/B;AACD;;AACD,QAAI2C,CAAC,GAAG,IAAR;;AACA,SAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,IAAI,CAACzC,MAAzB,EAAiC4C,CAAC,EAAlC,EAAsC;AACpC,UAAIJ,GAAG,CAACE,CAAD,CAAH,CAAOE,CAAP,MAAcH,IAAI,CAACG,CAAD,CAAlB,IAAyBJ,GAAG,CAACE,CAAD,CAAH,CAAOE,CAAP,MAAc,GAAvC,IAA8CJ,GAAG,CAACE,CAAD,CAAH,CAAOE,CAAP,MAAc,GAAhE,EAAqE;AACnE;AACD;;AACD,UAAIA,CAAC,IAAIJ,GAAG,CAACE,CAAD,CAAH,CAAO1C,MAAZ,IAAsBwC,GAAG,CAACE,CAAD,CAAH,CAAOF,GAAG,CAACE,CAAD,CAAH,CAAO1C,MAAP,GAAgB,CAAvB,MAA8B,GAAxD,EAA6D;AAC3D;AACD;;AACD2C,MAAAA,CAAC,GAAG,KAAJ;AACA;AACD;;AACD,QAAIA,CAAJ,EAAO,OAAOD,CAAP;AACR;;AACD,SAAO,CAAC,CAAR;AACD;AAED,OAAO,SAASG,IAAT,CAAcC,IAAd,EAAiC;AACtC,SAAO,CAACC,MAAM,IAAKC,MAAZ,EAA4BC,OAA5B,CAAoCH,IAApC,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport fs from 'fs';\nimport {TBufferedTransport, TCompactProtocol, TFramedTransport} from 'thrift';\nimport {FileMetaData, PageHeader} from './parquet-thrift';\nimport {Writable} from 'stream';\n\nexport interface WriteStreamOptions {\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\nclass UFramedTransport extends TFramedTransport {\n public readPos: number = 0;\n}\n\n/**\n * Helper function that serializes a thrift object into a buffer\n */\nexport function serializeThrift(obj: any): Buffer {\n const output: Buffer[] = [];\n\n const transport = new TBufferedTransport(undefined, (buf) => {\n output.push(buf as Buffer);\n });\n\n const protocol = new TCompactProtocol(transport);\n obj.write(protocol);\n transport.flush();\n\n return Buffer.concat(output);\n}\n\nexport function decodeThrift(obj: any, buf: Buffer, offset?: number) {\n if (!offset) {\n // tslint:disable-next-line:no-parameter-reassignment\n offset = 0;\n }\n\n const transport = new UFramedTransport(buf);\n transport.readPos = offset;\n const protocol = new TCompactProtocol(transport);\n obj.read(protocol);\n return transport.readPos - offset;\n}\n\nexport function decodeFileMetadata(buf: Buffer, offset?: number) {\n if (!offset) {\n // tslint:disable-next-line:no-parameter-reassignment\n offset = 0;\n }\n\n const transport = new UFramedTransport(buf);\n transport.readPos = offset;\n const protocol = new TCompactProtocol(transport);\n const metadata = FileMetaData.read(protocol);\n return {length: transport.readPos - offset, metadata};\n}\n\nexport function decodePageHeader(buf: Buffer, offset?: number) {\n if (!offset) {\n // tslint:disable-next-line:no-parameter-reassignment\n offset = 0;\n }\n\n const transport = new UFramedTransport(buf);\n transport.readPos = offset;\n const protocol = new TCompactProtocol(transport);\n const pageHeader = PageHeader.read(protocol);\n return {length: transport.readPos - offset, pageHeader};\n}\n\n/**\n * Get the number of bits required to store a given value\n */\nexport function getBitWidth(val: number): number {\n if (val === 0) {\n return 0;\n // tslint:disable-next-line:no-else-after-return\n }\n return Math.ceil(Math.log2(val + 1));\n}\n\n/**\n * FIXME not ideal that this is linear\n */\nexport function getThriftEnum(klass: any, value: number | string): string {\n for (const k in klass) {\n if (klass[k] === value) {\n return k;\n }\n }\n throw new Error('Invalid ENUM value');\n}\n\nexport function fopen(filePath: string): Promise<number> {\n return new Promise((resolve, reject) => {\n fs.open(filePath, 'r', (err, fd) => {\n if (err) {\n reject(err);\n } else {\n resolve(fd);\n }\n });\n });\n}\n\nexport function fstat(filePath: string): Promise<fs.Stats> {\n return new Promise((resolve, reject) => {\n fs.stat(filePath, (err, stat) => {\n if (err) {\n reject(err);\n } else {\n resolve(stat);\n }\n });\n });\n}\n\nexport function fread(fd: number, position: number, length: number): Promise<Buffer> {\n const buffer = Buffer.alloc(length);\n return new Promise((resolve, reject) => {\n fs.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => {\n if (err || bytesRead !== length) {\n reject(err || Error('read failed'));\n } else {\n resolve(buf);\n }\n });\n });\n}\n\nexport function fclose(fd: number): Promise<void> {\n return new Promise((resolve, reject) => {\n fs.close(fd, (err) => {\n if (err) {\n reject(err);\n } else {\n resolve();\n }\n });\n });\n}\n\nexport function oswrite(os: Writable, buf: Buffer): Promise<void> {\n return new Promise((resolve, reject) => {\n os.write(buf, (err) => {\n if (err) {\n reject(err);\n } else {\n resolve();\n }\n });\n });\n}\n\nexport function osclose(os: Writable): Promise<void> {\n return new Promise((resolve, reject) => {\n (os as any).close((err: any) => {\n if (err) {\n reject(err);\n } else {\n resolve();\n }\n });\n });\n}\n\nexport function osopen(path: string, opts?: WriteStreamOptions): Promise<fs.WriteStream> {\n return new Promise((resolve, reject) => {\n const outputStream = fs.createWriteStream(path, opts as any);\n outputStream.once('open', (fd) => resolve(outputStream));\n outputStream.once('error', (err) => reject(err));\n });\n}\n\n// Supports MQTT path wildcards\n// + all immediate children\n// # all descendents\nexport function fieldIndexOf(arr: string[][], elem: string[]): number {\n for (let j = 0; j < arr.length; j++) {\n if (arr[j].length > elem.length) {\n continue; // eslint-disable-line no-continue\n }\n let m = true;\n for (let i = 0; i < elem.length; i++) {\n if (arr[j][i] === elem[i] || arr[j][i] === '+' || arr[j][i] === '#') {\n continue; // eslint-disable-line no-continue\n }\n if (i >= arr[j].length && arr[j][arr[j].length - 1] === '#') {\n continue; // eslint-disable-line no-continue\n }\n m = false;\n break;\n }\n if (m) return j;\n }\n return -1;\n}\n\nexport function load(name: string): any {\n return (module || (global as any)).require(name);\n}\n"],"file":"util.js"}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../src/parquetjs/writer.ts"],"names":["Transform","PARQUET_CODECS","Compression","Shred","ColumnChunk","ColumnMetaData","CompressionCodec","ConvertedType","DataPageHeader","DataPageHeaderV2","Encoding","FieldRepetitionType","FileMetaData","KeyValue","PageHeader","PageType","RowGroup","SchemaElement","Type","Util","Int64","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_DEFAULT_PAGE_SIZE","PARQUET_DEFAULT_ROW_GROUP_SIZE","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","ParquetWriter","openFile","schema","path","opts","outputStream","osopen","openStream","envelopeWriter","ParquetEnvelopeWriter","constructor","rowBuffer","rowGroupSize","closed","userMetadata","writeHeader","err","close","appendRow","row","Error","shredRecord","rowCount","callback","writeFooter","setMetadata","key","value","String","setRowGroupSize","cnt","setPageSize","writeFn","oswrite","bind","undefined","closeFn","osclose","fileOffset","write","offset","rowGroups","pageSize","useDataPageV2","Boolean","writeSection","buf","length","Buffer","from","writeRowGroup","records","rgroup","encodeRowGroup","baseOffset","push","metadata","body","encodeFooter","ParquetTransformer","objectMode","writeProxy","t","b","writer","_transform","encoding","then","Promise","resolve","_flush","encodeValues","type","values","encodeDataPage","column","data","rLevelsBuf","alloc","rLevelMax","rlevels","bitWidth","getBitWidth","dLevelsBuf","dLevelMax","dlevels","valuesBuf","primitiveType","typeLength","dataBuf","concat","compressedBuf","deflate","compression","header","DATA_PAGE","data_page_header","num_values","count","definition_level_encoding","repetition_level_encoding","uncompressed_page_size","compressed_page_size","headerBuf","serializeThrift","page","headerSize","encodeDataPageV2","disableEnvelope","DATA_PAGE_V2","data_page_header_v2","num_nulls","num_rows","definition_levels_byte_length","repetition_levels_byte_length","is_compressed","encodeColumnChunk","buffer","columnData","join","pageBuf","total_uncompressed_size","total_compressed_size","result","path_in_schema","data_page_offset","encodings","codec","metadataOffset","columns","total_byte_size","field","fieldList","isNested","cchunkData","cchunk","file_offset","meta_data","Number","version","created_by","row_groups","key_value_metadata","kv","schemaRoot","name","num_children","Object","keys","fields","relt","repetitionType","schemaElem","repetition_type","fieldCount","originalType","converted_type","type_length","metadataEncoded","footerEncoded","copy","writeUInt32LE"],"mappings":";AAEA,SAAQA,SAAR,QAAkC,QAAlC;AACA,SAA6BC,cAA7B,QAAkD,UAAlD;AACA,OAAO,KAAKC,WAAZ,MAA6B,eAA7B;AASA,OAAO,KAAKC,KAAZ,MAAuB,gBAAvB;AACA,SACEC,WADF,EAEEC,cAFF,EAGEC,gBAHF,EAIEC,aAJF,EAKEC,cALF,EAMEC,gBANF,EAOEC,QAPF,EAQEC,mBARF,EASEC,YATF,EAUEC,QAVF,EAWEC,UAXF,EAYEC,QAZF,EAaEC,QAbF,EAcEC,aAdF,EAeEC,IAfF,QAgBO,kBAhBP;AAiBA,OAAO,KAAKC,IAAZ,MAAsB,QAAtB;AACA,OAAOC,KAAP,MAAkB,YAAlB;AAKA,MAAMC,aAAa,GAAG,MAAtB;AAKA,MAAMC,eAAe,GAAG,CAAxB;AAKA,MAAMC,yBAAyB,GAAG,IAAlC;AACA,MAAMC,8BAA8B,GAAG,IAAvC;AAKA,MAAMC,kBAAkB,GAAG,OAA3B;AACA,MAAMC,sBAAsB,GAAG,KAA/B;AAuBA,OAAO,MAAMC,aAAN,CAAuB;AAKP,eAARC,QAAQ,CACnBC,MADmB,EAEnBC,IAFmB,EAGnBC,IAHmB,EAIQ;AAC3B,UAAMC,YAAY,GAAG,MAAMb,IAAI,CAACc,MAAL,CAAYH,IAAZ,EAAkBC,IAAlB,CAA3B;AACA,WAAOJ,aAAa,CAACO,UAAd,CAAyBL,MAAzB,EAAiCG,YAAjC,EAA+CD,IAA/C,CAAP;AACD;;AAMsB,eAAVG,UAAU,CACrBL,MADqB,EAErBG,YAFqB,EAGrBD,IAHqB,EAIM;AAC3B,QAAI,CAACA,IAAL,EAAW;AAETA,MAAAA,IAAI,GAAG,EAAP;AACD;;AAED,UAAMI,cAAc,GAAG,MAAMC,qBAAqB,CAACF,UAAtB,CAAiCL,MAAjC,EAAyCG,YAAzC,EAAuDD,IAAvD,CAA7B;AAEA,WAAO,IAAIJ,aAAJ,CAAkBE,MAAlB,EAA0BM,cAA1B,EAA0CJ,IAA1C,CAAP;AACD;;AAYDM,EAAAA,WAAW,CACTR,MADS,EAETM,cAFS,EAGTJ,IAHS,EAIT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKM,cAAL,GAAsBA,cAAtB;AAEA,SAAKG,SAAL,GAAiB,EAAjB;AACA,SAAKC,YAAL,GAAoBR,IAAI,CAACQ,YAAL,IAAqBf,8BAAzC;AACA,SAAKgB,MAAL,GAAc,KAAd;AACA,SAAKC,YAAL,GAAoB,EAApB;;AAGA,QAAI;AAEFN,MAAAA,cAAc,CAACO,WAAf;AACD,KAHD,CAGE,OAAOC,GAAP,EAAY;AAEZR,MAAAA,cAAc,CAACS,KAAf;AACA,YAAMD,GAAN;AACD;AACF;;AAMc,QAATE,SAAS,CAAIC,GAAJ,EAA2B;AACxC,QAAI,KAAKN,MAAT,EAAiB;AACf,YAAM,IAAIO,KAAJ,CAAU,mBAAV,CAAN;AACD;;AACD5C,IAAAA,KAAK,CAAC6C,WAAN,CAAkB,KAAKnB,MAAvB,EAA+BiB,GAA/B,EAAoC,KAAKR,SAAzC;;AACA,QAAI,KAAKA,SAAL,CAAeW,QAAf,IAA2B,KAAKV,YAApC,EAAkD;AAEhD,WAAKD,SAAL,GAAiB,EAAjB;AACD;AACF;;AAQU,QAALM,KAAK,CAACM,QAAD,EAAuC;AAChD,QAAI,KAAKV,MAAT,EAAiB;AACf,YAAM,IAAIO,KAAJ,CAAU,mBAAV,CAAN;AACD;;AAED,SAAKP,MAAL,GAAc,IAAd;;AAEA,QAAI,KAAKF,SAAL,CAAeW,QAAf,GAA0B,CAA1B,IAA+B,KAAKX,SAAL,CAAeW,QAAf,IAA2B,KAAKV,YAAnE,EAAiF;AAE/E,WAAKD,SAAL,GAAiB,EAAjB;AACD;;AAED,UAAM,KAAKH,cAAL,CAAoBgB,WAApB,CAAgC,KAAKV,YAArC,CAAN;AACA,UAAM,KAAKN,cAAL,CAAoBS,KAApB,EAAN;;AAGA,QAAIM,QAAJ,EAAc;AACZA,MAAAA,QAAQ;AACT;AACF;;AAKDE,EAAAA,WAAW,CAACC,GAAD,EAAcC,KAAd,EAAmC;AAE5C,SAAKb,YAAL,CAAkBc,MAAM,CAACF,GAAD,CAAxB,IAAiCE,MAAM,CAACD,KAAD,CAAvC;AACD;;AAQDE,EAAAA,eAAe,CAACC,GAAD,EAAoB;AACjC,SAAKlB,YAAL,GAAoBkB,GAApB;AACD;;AAMDC,EAAAA,WAAW,CAACD,GAAD,EAAoB;AAC7B,SAAKtB,cAAL,CAAoBuB,WAApB,CAAgCD,GAAhC;AACD;;AArI2B;AA8I9B,OAAO,MAAMrB,qBAAN,CAA4B;AAIV,eAAVF,UAAU,CACrBL,MADqB,EAErBG,YAFqB,EAGrBD,IAHqB,EAIW;AAChC,UAAM4B,OAAO,GAAGxC,IAAI,CAACyC,OAAL,CAAaC,IAAb,CAAkBC,SAAlB,EAA6B9B,YAA7B,CAAhB;AACA,UAAM+B,OAAO,GAAG5C,IAAI,CAAC6C,OAAL,CAAaH,IAAb,CAAkBC,SAAlB,EAA6B9B,YAA7B,CAAhB;AACA,WAAO,IAAII,qBAAJ,CAA0BP,MAA1B,EAAkC8B,OAAlC,EAA2CI,OAA3C,EAAoD,CAApD,EAAuDhC,IAAvD,CAAP;AACD;;AAWDM,EAAAA,WAAW,CACTR,MADS,EAET8B,OAFS,EAGTI,OAHS,EAITE,UAJS,EAKTlC,IALS,EAMT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKqC,KAAL,GAAaP,OAAb;AACA,SAAKf,KAAL,GAAamB,OAAb;AACA,SAAKI,MAAL,GAAcF,UAAd;AACA,SAAKhB,QAAL,GAAgB,CAAhB;AACA,SAAKmB,SAAL,GAAiB,EAAjB;AACA,SAAKC,QAAL,GAAgBtC,IAAI,CAACsC,QAAL,IAAiB9C,yBAAjC;AACA,SAAK+C,aAAL,GAAqB,mBAAmBvC,IAAnB,GAA0BwC,OAAO,CAACxC,IAAI,CAACuC,aAAN,CAAjC,GAAwD,KAA7E;AACD;;AAEDE,EAAAA,YAAY,CAACC,GAAD,EAA6B;AACvC,SAAKN,MAAL,IAAeM,GAAG,CAACC,MAAnB;AACA,WAAO,KAAKR,KAAL,CAAWO,GAAX,CAAP;AACD;;AAKD/B,EAAAA,WAAW,GAAkB;AAC3B,WAAO,KAAK8B,YAAL,CAAkBG,MAAM,CAACC,IAAP,CAAYvD,aAAZ,CAAlB,CAAP;AACD;;AAMkB,QAAbwD,aAAa,CAACC,OAAD,EAAwC;AACzD,UAAMC,MAAM,GAAGC,cAAc,CAAC,KAAKnD,MAAN,EAAciD,OAAd,EAAuB;AAClDG,MAAAA,UAAU,EAAE,KAAKd,MADiC;AAElDE,MAAAA,QAAQ,EAAE,KAAKA,QAFmC;AAGlDC,MAAAA,aAAa,EAAE,KAAKA;AAH8B,KAAvB,CAA7B;AAMA,SAAKrB,QAAL,IAAiB6B,OAAO,CAAC7B,QAAzB;AACA,SAAKmB,SAAL,CAAec,IAAf,CAAoBH,MAAM,CAACI,QAA3B;AACA,WAAO,KAAKX,YAAL,CAAkBO,MAAM,CAACK,IAAzB,CAAP;AACD;;AAKDjC,EAAAA,WAAW,CAACV,YAAD,EAAsD;AAC/D,QAAI,CAACA,YAAL,EAAmB;AAEjBA,MAAAA,YAAY,GAAG,EAAf;AACD;;AAED,WAAO,KAAK+B,YAAL,CACLa,YAAY,CAAC,KAAKxD,MAAN,EAAc,KAAKoB,QAAnB,EAA6B,KAAKmB,SAAlC,EAA6C3B,YAA7C,CADP,CAAP;AAGD;;AAMDiB,EAAAA,WAAW,CAACD,GAAD,EAAoB;AAC7B,SAAKY,QAAL,GAAgBZ,GAAhB;AACD;;AAxFgC;AA8FnC,OAAO,MAAM6B,kBAAN,SAAoCtF,SAApC,CAA8C;AAGnDqC,EAAAA,WAAW,CAACR,MAAD,EAAwBE,IAA0B,GAAG,EAArD,EAAyD;AAClE,UAAM;AAACwD,MAAAA,UAAU,EAAE;AAAb,KAAN;;AADkE;;AAGlE,UAAMC,UAAU,GAAI,UAAUC,CAAV,EAAsC;AACxD,aAAO,gBAAgBC,CAAhB,EAAuC;AAC5CD,QAAAA,CAAC,CAACP,IAAF,CAAOQ,CAAP;AACD,OAFD;AAGD,KAJkB,CAIhB,IAJgB,CAAnB;;AAMA,SAAKC,MAAL,GAAc,IAAIhE,aAAJ,CACZE,MADY,EAEZ,IAAIO,qBAAJ,CAA0BP,MAA1B,EAAkC2D,UAAlC,EAA8C,YAAY,CAAE,CAA5D,EAA8D,CAA9D,EAAiEzD,IAAjE,CAFY,EAGZA,IAHY,CAAd;AAKD;;AAGD6D,EAAAA,UAAU,CAAC9C,GAAD,EAAW+C,QAAX,EAA6B3C,QAA7B,EAA2E;AACnF,QAAIJ,GAAJ,EAAS;AACP,aAAO,KAAK6C,MAAL,CAAY9C,SAAZ,CAAsBC,GAAtB,EAA2BgD,IAA3B,CAAgC5C,QAAhC,CAAP;AACD;;AACDA,IAAAA,QAAQ;AACR,WAAO6C,OAAO,CAACC,OAAR,EAAP;AACD;;AAGW,QAANC,MAAM,CAAC/C,QAAD,EAAgC;AAC1C,UAAM,KAAKyC,MAAL,CAAY/C,KAAZ,CAAkBM,QAAlB,CAAN;AACD;;AA/BkD;;AAqCrD,SAASgD,YAAT,CACEC,IADF,EAEEN,QAFF,EAGEO,MAHF,EAIErE,IAJF,EAKE;AACA,MAAI,EAAE8D,QAAQ,IAAI5F,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAI8C,KAAJ,CAAW,qBAAoB8C,QAAS,EAAxC,CAAN;AACD;;AACD,SAAO5F,cAAc,CAAC4F,QAAD,CAAd,CAAyBK,YAAzB,CAAsCC,IAAtC,EAA4CC,MAA5C,EAAoDrE,IAApD,CAAP;AACD;;AAKD,SAASsE,cAAT,CACEC,MADF,EAEEC,IAFF,EAOE;AAEA,MAAIC,UAAU,GAAG7B,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,IAAAA,UAAU,GAAGN,YAAY,CAACzE,kBAAD,EAAqBC,sBAArB,EAA6C6E,IAAI,CAACI,OAAlD,EAA2D;AAClFC,MAAAA,QAAQ,EAAEzF,IAAI,CAAC0F,WAAL,CAAiBP,MAAM,CAACI,SAAxB;AADwE,KAA3D,CAAzB;AAID;;AAED,MAAII,UAAU,GAAGnC,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACS,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,UAAU,GAAGZ,YAAY,CAACzE,kBAAD,EAAqBC,sBAArB,EAA6C6E,IAAI,CAACS,OAAlD,EAA2D;AAClFJ,MAAAA,QAAQ,EAAEzF,IAAI,CAAC0F,WAAL,CAAiBP,MAAM,CAACS,SAAxB;AADwE,KAA3D,CAAzB;AAID;;AAGD,QAAME,SAAS,GAAGf,YAAY,CAACI,MAAM,CAACY,aAAR,EAAwBZ,MAAM,CAACT,QAA/B,EAA0CU,IAAI,CAACH,MAA/C,EAAuD;AACnFe,IAAAA,UAAU,EAAEb,MAAM,CAACa,UADgE;AAEnFP,IAAAA,QAAQ,EAAEN,MAAM,CAACa;AAFkE,GAAvD,CAA9B;AAKA,QAAMC,OAAO,GAAGzC,MAAM,CAAC0C,MAAP,CAAc,CAACb,UAAD,EAAaM,UAAb,EAAyBG,SAAzB,CAAd,CAAhB;AAGA,QAAMK,aAAa,GAAGpH,WAAW,CAACqH,OAAZ,CAAoBjB,MAAM,CAACkB,WAA3B,EAAyCJ,OAAzC,CAAtB;AAGA,QAAMK,MAAM,GAAG,IAAI3G,UAAJ,CAAe;AAC5BqF,IAAAA,IAAI,EAAEpF,QAAQ,CAAC2G,SADa;AAE5BC,IAAAA,gBAAgB,EAAE,IAAInH,cAAJ,CAAmB;AACnCoH,MAAAA,UAAU,EAAErB,IAAI,CAACsB,KADkB;AAEnChC,MAAAA,QAAQ,EAAEnF,QAAQ,CAAC4F,MAAM,CAACT,QAAR,CAFiB;AAGnCiC,MAAAA,yBAAyB,EAAEpH,QAAQ,CAACgB,sBAAD,CAHA;AAInCqG,MAAAA,yBAAyB,EAAErH,QAAQ,CAACgB,sBAAD;AAJA,KAAnB,CAFU;AAQ5BsG,IAAAA,sBAAsB,EAAEZ,OAAO,CAAC1C,MARJ;AAS5BuD,IAAAA,oBAAoB,EAAEX,aAAa,CAAC5C;AATR,GAAf,CAAf;AAaA,QAAMwD,SAAS,GAAG/G,IAAI,CAACgH,eAAL,CAAqBV,MAArB,CAAlB;AACA,QAAMW,IAAI,GAAGzD,MAAM,CAAC0C,MAAP,CAAc,CAACa,SAAD,EAAYZ,aAAZ,CAAd,CAAb;AAEA,SAAO;AAACG,IAAAA,MAAD;AAASY,IAAAA,UAAU,EAAEH,SAAS,CAACxD,MAA/B;AAAuC0D,IAAAA;AAAvC,GAAP;AACD;;AAKD,SAASE,gBAAT,CACEhC,MADF,EAEEC,IAFF,EAGEtD,QAHF,EAQE;AAEA,QAAMgE,SAAS,GAAGf,YAAY,CAACI,MAAM,CAACY,aAAR,EAAwBZ,MAAM,CAACT,QAA/B,EAA0CU,IAAI,CAACH,MAA/C,EAAuD;AACnFe,IAAAA,UAAU,EAAEb,MAAM,CAACa,UADgE;AAEnFP,IAAAA,QAAQ,EAAEN,MAAM,CAACa;AAFkE,GAAvD,CAA9B;AAMA,QAAMG,aAAa,GAAGpH,WAAW,CAACqH,OAAZ,CAAoBjB,MAAM,CAACkB,WAA3B,EAAyCP,SAAzC,CAAtB;AAGA,MAAIT,UAAU,GAAG7B,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,IAAAA,UAAU,GAAGN,YAAY,CAACzE,kBAAD,EAAqBC,sBAArB,EAA6C6E,IAAI,CAACI,OAAlD,EAA2D;AAClFC,MAAAA,QAAQ,EAAEzF,IAAI,CAAC0F,WAAL,CAAiBP,MAAM,CAACI,SAAxB,CADwE;AAElF6B,MAAAA,eAAe,EAAE;AAFiE,KAA3D,CAAzB;AAID;;AAED,MAAIzB,UAAU,GAAGnC,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACS,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,UAAU,GAAGZ,YAAY,CAACzE,kBAAD,EAAqBC,sBAArB,EAA6C6E,IAAI,CAACS,OAAlD,EAA2D;AAClFJ,MAAAA,QAAQ,EAAEzF,IAAI,CAAC0F,WAAL,CAAiBP,MAAM,CAACS,SAAxB,CADwE;AAElFwB,MAAAA,eAAe,EAAE;AAFiE,KAA3D,CAAzB;AAID;;AAGD,QAAMd,MAAM,GAAG,IAAI3G,UAAJ,CAAe;AAC5BqF,IAAAA,IAAI,EAAEpF,QAAQ,CAACyH,YADa;AAE5BC,IAAAA,mBAAmB,EAAE,IAAIhI,gBAAJ,CAAqB;AACxCmH,MAAAA,UAAU,EAAErB,IAAI,CAACsB,KADuB;AAExCa,MAAAA,SAAS,EAAEnC,IAAI,CAACsB,KAAL,GAAatB,IAAI,CAACH,MAAL,CAAY1B,MAFI;AAGxCiE,MAAAA,QAAQ,EAAE1F,QAH8B;AAIxC4C,MAAAA,QAAQ,EAAEnF,QAAQ,CAAC4F,MAAM,CAACT,QAAR,CAJsB;AAKxC+C,MAAAA,6BAA6B,EAAE9B,UAAU,CAACpC,MALF;AAMxCmE,MAAAA,6BAA6B,EAAErC,UAAU,CAAC9B,MANF;AAOxCoE,MAAAA,aAAa,EAAExC,MAAM,CAACkB,WAAP,KAAuB;AAPE,KAArB,CAFO;AAW5BQ,IAAAA,sBAAsB,EAAExB,UAAU,CAAC9B,MAAX,GAAoBoC,UAAU,CAACpC,MAA/B,GAAwCuC,SAAS,CAACvC,MAX9C;AAY5BuD,IAAAA,oBAAoB,EAAEzB,UAAU,CAAC9B,MAAX,GAAoBoC,UAAU,CAACpC,MAA/B,GAAwC4C,aAAa,CAAC5C;AAZhD,GAAf,CAAf;AAgBA,QAAMwD,SAAS,GAAG/G,IAAI,CAACgH,eAAL,CAAqBV,MAArB,CAAlB;AACA,QAAMW,IAAI,GAAGzD,MAAM,CAAC0C,MAAP,CAAc,CAACa,SAAD,EAAY1B,UAAZ,EAAwBM,UAAxB,EAAoCQ,aAApC,CAAd,CAAb;AACA,SAAO;AAACG,IAAAA,MAAD;AAASY,IAAAA,UAAU,EAAEH,SAAS,CAACxD,MAA/B;AAAuC0D,IAAAA;AAAvC,GAAP;AACD;;AAKD,SAASW,iBAAT,CACEzC,MADF,EAEE0C,MAFF,EAGE7E,MAHF,EAIEpC,IAJF,EASE;AACA,QAAMwE,IAAI,GAAGyC,MAAM,CAACC,UAAP,CAAkB3C,MAAM,CAACxE,IAAP,CAAYoH,IAAZ,EAAlB,CAAb;AACA,QAAMjE,UAAU,GAAG,CAAClD,IAAI,CAACkD,UAAL,IAAmB,CAApB,IAAyBd,MAA5C;AAGA,MAAIgF,OAAJ;AAEA,MAAIC,uBAAuB,GAAG,CAA9B;AAEA,MAAIC,qBAAqB,GAAG,CAA5B;AACA;AACE,QAAIC,MAAJ;;AACA,QAAIvH,IAAI,CAACuC,aAAT,EAAwB;AACtBgF,MAAAA,MAAM,GAAGhB,gBAAgB,CAAChC,MAAD,EAASC,IAAT,EAAeyC,MAAM,CAAC/F,QAAtB,CAAzB;AACD,KAFD,MAEO;AACLqG,MAAAA,MAAM,GAAGjD,cAAc,CAACC,MAAD,EAASC,IAAT,CAAvB;AACD;;AAED4C,IAAAA,OAAO,GAAGG,MAAM,CAAClB,IAAjB;AACAgB,IAAAA,uBAAuB,IAAIE,MAAM,CAAC7B,MAAP,CAAcO,sBAAd,GAAuCsB,MAAM,CAACjB,UAAzE;AACAgB,IAAAA,qBAAqB,IAAIC,MAAM,CAAC7B,MAAP,CAAcQ,oBAAd,GAAqCqB,MAAM,CAACjB,UAArE;AACD;AAMD,QAAMlD,QAAQ,GAAG,IAAI9E,cAAJ,CAAmB;AAClCkJ,IAAAA,cAAc,EAAEjD,MAAM,CAACxE,IADW;AAElC8F,IAAAA,UAAU,EAAErB,IAAI,CAACsB,KAFiB;AAGlC2B,IAAAA,gBAAgB,EAAEvE,UAHgB;AAIlCwE,IAAAA,SAAS,EAAE,EAJuB;AAKlCL,IAAAA,uBALkC;AAMlCC,IAAAA,qBANkC;AAOlClD,IAAAA,IAAI,EAAEjF,IAAI,CAACoF,MAAM,CAACY,aAAR,CAPwB;AAQlCwC,IAAAA,KAAK,EAAEpJ,gBAAgB,CAACgG,MAAM,CAACkB,WAAR;AARW,GAAnB,CAAjB;AAYArC,EAAAA,QAAQ,CAACsE,SAAT,CAAmBvE,IAAnB,CAAwBxE,QAAQ,CAACgB,sBAAD,CAAhC;AACAyD,EAAAA,QAAQ,CAACsE,SAAT,CAAmBvE,IAAnB,CAAwBxE,QAAQ,CAAC4F,MAAM,CAACT,QAAR,CAAhC;AAGA,QAAM8D,cAAc,GAAG1E,UAAU,GAAGkE,OAAO,CAACzE,MAA5C;AACA,QAAMU,IAAI,GAAGT,MAAM,CAAC0C,MAAP,CAAc,CAAC8B,OAAD,EAAUhI,IAAI,CAACgH,eAAL,CAAqBhD,QAArB,CAAV,CAAd,CAAb;AACA,SAAO;AAACC,IAAAA,IAAD;AAAOD,IAAAA,QAAP;AAAiBwE,IAAAA;AAAjB,GAAP;AACD;;AAKD,SAAS3E,cAAT,CACEnD,MADF,EAEE0E,IAFF,EAGExE,IAHF,EAOE;AACA,QAAMoD,QAAQ,GAAG,IAAInE,QAAJ,CAAa;AAC5B2H,IAAAA,QAAQ,EAAEpC,IAAI,CAACtD,QADa;AAE5B2G,IAAAA,OAAO,EAAE,EAFmB;AAG5BC,IAAAA,eAAe,EAAE;AAHW,GAAb,CAAjB;AAMA,MAAIzE,IAAI,GAAGT,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAX;;AACA,OAAK,MAAMqD,KAAX,IAAoBjI,MAAM,CAACkI,SAA3B,EAAsC;AACpC,QAAID,KAAK,CAACE,QAAV,EAAoB;AAClB;AACD;;AAED,UAAMC,UAAU,GAAGlB,iBAAiB,CAACe,KAAD,EAAQvD,IAAR,EAAcnB,IAAI,CAACV,MAAnB,EAA2B3C,IAA3B,CAApC;AAEA,UAAMmI,MAAM,GAAG,IAAI9J,WAAJ,CAAgB;AAC7B+J,MAAAA,WAAW,EAAEF,UAAU,CAACN,cADK;AAE7BS,MAAAA,SAAS,EAAEH,UAAU,CAAC9E;AAFO,KAAhB,CAAf;AAKAA,IAAAA,QAAQ,CAACyE,OAAT,CAAiB1E,IAAjB,CAAsBgF,MAAtB;AACA/E,IAAAA,QAAQ,CAAC0E,eAAT,GAA2B,IAAIzI,KAAJ,CAAUiJ,MAAM,CAAClF,QAAQ,CAAC0E,eAAV,CAAN,GAAmCI,UAAU,CAAC7E,IAAX,CAAgBV,MAA7D,CAA3B;AAEAU,IAAAA,IAAI,GAAGT,MAAM,CAAC0C,MAAP,CAAc,CAACjC,IAAD,EAAO6E,UAAU,CAAC7E,IAAlB,CAAd,CAAP;AACD;;AAED,SAAO;AAACA,IAAAA,IAAD;AAAOD,IAAAA;AAAP,GAAP;AACD;;AAKD,SAASE,YAAT,CACExD,MADF,EAEEoB,QAFF,EAGEmB,SAHF,EAIE3B,YAJF,EAKU;AACR,QAAM0C,QAAQ,GAAG,IAAIvE,YAAJ,CAAiB;AAChC0J,IAAAA,OAAO,EAAEhJ,eADuB;AAEhCiJ,IAAAA,UAAU,EAAE,UAFoB;AAGhC5B,IAAAA,QAAQ,EAAE1F,QAHsB;AAIhCuH,IAAAA,UAAU,EAAEpG,SAJoB;AAKhCvC,IAAAA,MAAM,EAAE,EALwB;AAMhC4I,IAAAA,kBAAkB,EAAE;AANY,GAAjB,CAAjB;;AASA,OAAK,MAAMpH,GAAX,IAAkBZ,YAAlB,EAAgC;AAAA;;AAC9B,UAAMiI,EAAE,GAAG,IAAI7J,QAAJ,CAAa;AACtBwC,MAAAA,GADsB;AAEtBC,MAAAA,KAAK,EAAEb,YAAY,CAACY,GAAD;AAFG,KAAb,CAAX;AAIA,6BAAA8B,QAAQ,CAACsF,kBAAT,0GAA6BvF,IAA7B,8GAAoCwF,EAApC;AACD;;AAED;AACE,UAAMC,UAAU,GAAG,IAAI1J,aAAJ,CAAkB;AACnC2J,MAAAA,IAAI,EAAE,MAD6B;AAEnCC,MAAAA,YAAY,EAAEC,MAAM,CAACC,IAAP,CAAYlJ,MAAM,CAACmJ,MAAnB,EAA2BtG;AAFN,KAAlB,CAAnB;AAIAS,IAAAA,QAAQ,CAACtD,MAAT,CAAgBqD,IAAhB,CAAqByF,UAArB;AACD;;AAED,OAAK,MAAMb,KAAX,IAAoBjI,MAAM,CAACkI,SAA3B,EAAsC;AACpC,UAAMkB,IAAI,GAAGtK,mBAAmB,CAACmJ,KAAK,CAACoB,cAAP,CAAhC;AACA,UAAMC,UAAU,GAAG,IAAIlK,aAAJ,CAAkB;AACnC2J,MAAAA,IAAI,EAAEd,KAAK,CAACc,IADuB;AAEnCQ,MAAAA,eAAe,EAAEH;AAFkB,KAAlB,CAAnB;;AAKA,QAAInB,KAAK,CAACE,QAAV,EAAoB;AAClBmB,MAAAA,UAAU,CAACN,YAAX,GAA0Bf,KAAK,CAACuB,UAAhC;AACD,KAFD,MAEO;AACLF,MAAAA,UAAU,CAAChF,IAAX,GAAkBjF,IAAI,CAAC4I,KAAK,CAAC5C,aAAP,CAAtB;AACD;;AAED,QAAI4C,KAAK,CAACwB,YAAV,EAAwB;AACtBH,MAAAA,UAAU,CAACI,cAAX,GAA4BhL,aAAa,CAACuJ,KAAK,CAACwB,YAAP,CAAzC;AACD;;AAEDH,IAAAA,UAAU,CAACK,WAAX,GAAyB1B,KAAK,CAAC3C,UAA/B;AAEAhC,IAAAA,QAAQ,CAACtD,MAAT,CAAgBqD,IAAhB,CAAqBiG,UAArB;AACD;;AAED,QAAMM,eAAe,GAAGtK,IAAI,CAACgH,eAAL,CAAqBhD,QAArB,CAAxB;AACA,QAAMuG,aAAa,GAAG/G,MAAM,CAAC8B,KAAP,CAAagF,eAAe,CAAC/G,MAAhB,GAAyB,CAAtC,CAAtB;AACA+G,EAAAA,eAAe,CAACE,IAAhB,CAAqBD,aAArB;AACAA,EAAAA,aAAa,CAACE,aAAd,CAA4BH,eAAe,CAAC/G,MAA5C,EAAoD+G,eAAe,CAAC/G,MAApE;AACAgH,EAAAA,aAAa,CAACxH,KAAd,CAAoB7C,aAApB,EAAmCoK,eAAe,CAAC/G,MAAhB,GAAyB,CAA5D;AACA,SAAOgH,aAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {Transform, Writable} from 'stream';\nimport {ParquetCodecOptions, PARQUET_CODECS} from './codecs';\nimport * as Compression from './compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetData,\n ParquetField,\n PrimitiveType\n} from './schema/declare';\nimport {ParquetSchema} from './schema/schema';\nimport * as Shred from './schema/shred';\nimport {\n ColumnChunk,\n ColumnMetaData,\n CompressionCodec,\n ConvertedType,\n DataPageHeader,\n DataPageHeaderV2,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n KeyValue,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from './parquet-thrift';\nimport * as Util from './util';\nimport Int64 from 'node-int64';\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Default Page and Row Group sizes\n */\nconst PARQUET_DEFAULT_PAGE_SIZE = 8192;\nconst PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;\n\n/**\n * Repetition and Definition Level Encoding\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\nexport interface ParquetWriterOptions {\n baseOffset?: number;\n rowGroupSize?: number;\n pageSize?: number;\n useDataPageV2?: boolean;\n\n // Write Stream Options\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\n/**\n * Write a parquet file to an output stream. The ParquetWriter will perform\n * buffering/batching for performance, so close() must be called after all rows\n * are written.\n */\n// eslint-disable-next-line @typescript-eslint/no-unused-vars\nexport class ParquetWriter<T> {\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified file\n */\n static async openFile<T>(\n schema: ParquetSchema,\n path: string,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n const outputStream = await Util.osopen(path, opts);\n return ParquetWriter.openStream(schema, outputStream, opts);\n }\n\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified stream\n */\n static async openStream<T>(\n schema: ParquetSchema,\n outputStream: Writable,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n if (!opts) {\n // tslint:disable-next-line:no-parameter-reassignment\n opts = {};\n }\n\n const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);\n\n return new ParquetWriter(schema, envelopeWriter, opts);\n }\n\n public schema: ParquetSchema;\n public envelopeWriter: ParquetEnvelopeWriter;\n public rowBuffer: ParquetBuffer;\n public rowGroupSize: number;\n public closed: boolean;\n public userMetadata: Record<string, string>;\n\n /**\n * Create a new buffered parquet writer for a given envelope writer\n */\n constructor(\n schema: ParquetSchema,\n envelopeWriter: ParquetEnvelopeWriter,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.envelopeWriter = envelopeWriter;\n // @ts-ignore Row buffer typings...\n this.rowBuffer = {};\n this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;\n this.closed = false;\n this.userMetadata = {};\n\n // TODO - better not mess with promises in the constructor\n try {\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n envelopeWriter.writeHeader();\n } catch (err) {\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n envelopeWriter.close();\n throw err;\n }\n }\n\n /**\n * Append a single row to the parquet file. Rows are buffered in memory until\n * rowGroupSize rows are in the buffer or close() is called\n */\n async appendRow<T>(row: T): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n Shred.shredRecord(this.schema, row, this.rowBuffer);\n if (this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n }\n\n /**\n * Finish writing the parquet file and commit the footer to disk. This method\n * MUST be called after you are finished adding rows. You must not call this\n * method twice on the same object or add any rows after the close() method has\n * been called\n */\n async close(callback?: () => void): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n\n this.closed = true;\n\n if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n\n await this.envelopeWriter.writeFooter(this.userMetadata);\n await this.envelopeWriter.close();\n // this.envelopeWriter = null;\n\n if (callback) {\n callback();\n }\n }\n\n /**\n * Add key<>value metadata to the file\n */\n setMetadata(key: string, value: string): void {\n // TODO: value to be any, obj -> JSON\n this.userMetadata[String(key)] = String(value);\n }\n\n /**\n * Set the parquet row group size. This values controls the maximum number\n * of rows that are buffered in memory at any given time as well as the number\n * of rows that are co-located on disk. A higher value is generally better for\n * read-time I/O performance at the tradeoff of write-time memory usage.\n */\n setRowGroupSize(cnt: number): void {\n this.rowGroupSize = cnt;\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.envelopeWriter.setPageSize(cnt);\n }\n}\n\n/**\n * Create a parquet file from a schema and a number of row groups. This class\n * performs direct, unbuffered writes to the underlying output stream and is\n * intendend for advanced and internal users; the writeXXX methods must be\n * called in the correct order to produce a valid file.\n */\nexport class ParquetEnvelopeWriter {\n /**\n * Create a new parquet envelope writer that writes to the specified stream\n */\n static async openStream(\n schema: ParquetSchema,\n outputStream: Writable,\n opts: ParquetWriterOptions\n ): Promise<ParquetEnvelopeWriter> {\n const writeFn = Util.oswrite.bind(undefined, outputStream);\n const closeFn = Util.osclose.bind(undefined, outputStream);\n return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);\n }\n\n public schema: ParquetSchema;\n public write: (buf: Buffer) => Promise<void>;\n public close: () => Promise<void>;\n public offset: number;\n public rowCount: number;\n public rowGroups: RowGroup[];\n public pageSize: number;\n public useDataPageV2: boolean;\n\n constructor(\n schema: ParquetSchema,\n writeFn: (buf: Buffer) => Promise<void>,\n closeFn: () => Promise<void>,\n fileOffset: number,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.write = writeFn;\n this.close = closeFn;\n this.offset = fileOffset;\n this.rowCount = 0;\n this.rowGroups = [];\n this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;\n this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;\n }\n\n writeSection(buf: Buffer): Promise<void> {\n this.offset += buf.length;\n return this.write(buf);\n }\n\n /**\n * Encode the parquet file header\n */\n writeHeader(): Promise<void> {\n return this.writeSection(Buffer.from(PARQUET_MAGIC));\n }\n\n /**\n * Encode a parquet row group. The records object should be created using the\n * shredRecord method\n */\n async writeRowGroup(records: ParquetBuffer): Promise<void> {\n const rgroup = encodeRowGroup(this.schema, records, {\n baseOffset: this.offset,\n pageSize: this.pageSize,\n useDataPageV2: this.useDataPageV2\n });\n\n this.rowCount += records.rowCount;\n this.rowGroups.push(rgroup.metadata);\n return this.writeSection(rgroup.body);\n }\n\n /**\n * Write the parquet file footer\n */\n writeFooter(userMetadata: Record<string, string>): Promise<void> {\n if (!userMetadata) {\n // tslint:disable-next-line:no-parameter-reassignment\n userMetadata = {};\n }\n\n return this.writeSection(\n encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)\n );\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.pageSize = cnt;\n }\n}\n\n/**\n * Create a parquet transform stream\n */\nexport class ParquetTransformer<T> extends Transform {\n public writer: ParquetWriter<T>;\n\n constructor(schema: ParquetSchema, opts: ParquetWriterOptions = {}) {\n super({objectMode: true});\n\n const writeProxy = (function (t: ParquetTransformer<any>) {\n return async function (b: any): Promise<void> {\n t.push(b);\n };\n })(this);\n\n this.writer = new ParquetWriter(\n schema,\n new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),\n opts\n );\n }\n\n // tslint:disable-next-line:function-name\n _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {\n if (row) {\n return this.writer.appendRow(row).then(callback);\n }\n callback();\n return Promise.resolve();\n }\n\n // tslint:disable-next-line:function-name\n async _flush(callback: (val?: any) => void) {\n await this.writer.close(callback);\n }\n}\n\n/**\n * Encode a consecutive array of data using one of the parquet encodings\n */\nfunction encodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n values: any[],\n opts: ParquetCodecOptions\n) {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].encodeValues(type, values, opts);\n}\n\n/**\n * Encode a parquet data page\n */\nfunction encodeDataPage(\n column: ParquetField,\n data: ParquetData\n): {\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n} {\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: Util.getBitWidth(column.rLevelMax)\n // disableEnvelope: false\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: Util.getBitWidth(column.dLevelMax)\n // disableEnvelope: false\n });\n }\n\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = Compression.deflate(column.compression!, dataBuf);\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE,\n data_page_header: new DataPageHeader({\n num_values: data.count,\n encoding: Encoding[column.encoding!] as any,\n definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],\n repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]\n }),\n uncompressed_page_size: dataBuf.length,\n compressed_page_size: compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = Util.serializeThrift(header);\n const page = Buffer.concat([headerBuf, compressedBuf]);\n\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode a parquet data page (v2)\n */\nfunction encodeDataPageV2(\n column: ParquetField,\n data: ParquetData,\n rowCount: number\n): {\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n} {\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = Compression.deflate(column.compression!, valuesBuf);\n\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n }\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE_V2,\n data_page_header_v2: new DataPageHeaderV2({\n num_values: data.count,\n num_nulls: data.count - data.values.length,\n num_rows: rowCount,\n encoding: Encoding[column.encoding!] as any,\n definition_levels_byte_length: dLevelsBuf.length,\n repetition_levels_byte_length: rLevelsBuf.length,\n is_compressed: column.compression !== 'UNCOMPRESSED'\n }),\n uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,\n compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = Util.serializeThrift(header);\n const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode an array of values into a parquet column chunk\n */\nfunction encodeColumnChunk(\n column: ParquetField,\n buffer: ParquetBuffer,\n offset: number,\n opts: ParquetWriterOptions\n): {\n body: Buffer;\n metadata: ColumnMetaData;\n metadataOffset: number;\n} {\n const data = buffer.columnData[column.path.join()];\n const baseOffset = (opts.baseOffset || 0) + offset;\n /* encode data page(s) */\n // const pages: Buffer[] = [];\n let pageBuf: Buffer;\n // tslint:disable-next-line:variable-name\n let total_uncompressed_size = 0;\n // tslint:disable-next-line:variable-name\n let total_compressed_size = 0;\n {\n let result: any;\n if (opts.useDataPageV2) {\n result = encodeDataPageV2(column, data, buffer.rowCount);\n } else {\n result = encodeDataPage(column, data);\n }\n // pages.push(result.page);\n pageBuf = result.page;\n total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;\n total_compressed_size += result.header.compressed_page_size + result.headerSize;\n }\n\n // const pagesBuf = Buffer.concat(pages);\n // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;\n\n /* prepare metadata header */\n const metadata = new ColumnMetaData({\n path_in_schema: column.path,\n num_values: data.count,\n data_page_offset: baseOffset,\n encodings: [],\n total_uncompressed_size, // : pagesBuf.length,\n total_compressed_size,\n type: Type[column.primitiveType!],\n codec: CompressionCodec[column.compression!]\n });\n\n /* list encodings */\n metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);\n metadata.encodings.push(Encoding[column.encoding!]);\n\n /* concat metadata header and data pages */\n const metadataOffset = baseOffset + pageBuf.length;\n const body = Buffer.concat([pageBuf, Util.serializeThrift(metadata)]);\n return {body, metadata, metadataOffset};\n}\n\n/**\n * Encode a list of column values into a parquet row group\n */\nfunction encodeRowGroup(\n schema: ParquetSchema,\n data: ParquetBuffer,\n opts: ParquetWriterOptions\n): {\n body: Buffer;\n metadata: RowGroup;\n} {\n const metadata = new RowGroup({\n num_rows: data.rowCount,\n columns: [],\n total_byte_size: 0\n });\n\n let body = Buffer.alloc(0);\n for (const field of schema.fieldList) {\n if (field.isNested) {\n continue; // eslint-disable-line no-continue\n }\n\n const cchunkData = encodeColumnChunk(field, data, body.length, opts);\n\n const cchunk = new ColumnChunk({\n file_offset: cchunkData.metadataOffset,\n meta_data: cchunkData.metadata\n });\n\n metadata.columns.push(cchunk);\n metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);\n\n body = Buffer.concat([body, cchunkData.body]);\n }\n\n return {body, metadata};\n}\n\n/**\n * Encode a parquet file metadata footer\n */\nfunction encodeFooter(\n schema: ParquetSchema,\n rowCount: number,\n rowGroups: RowGroup[],\n userMetadata: Record<string, string>\n): Buffer {\n const metadata = new FileMetaData({\n version: PARQUET_VERSION,\n created_by: 'parquets',\n num_rows: rowCount,\n row_groups: rowGroups,\n schema: [],\n key_value_metadata: []\n });\n\n for (const key in userMetadata) {\n const kv = new KeyValue({\n key,\n value: userMetadata[key]\n });\n metadata.key_value_metadata?.push?.(kv);\n }\n\n {\n const schemaRoot = new SchemaElement({\n name: 'root',\n num_children: Object.keys(schema.fields).length\n });\n metadata.schema.push(schemaRoot);\n }\n\n for (const field of schema.fieldList) {\n const relt = FieldRepetitionType[field.repetitionType];\n const schemaElem = new SchemaElement({\n name: field.name,\n repetition_type: relt as any\n });\n\n if (field.isNested) {\n schemaElem.num_children = field.fieldCount;\n } else {\n schemaElem.type = Type[field.primitiveType!] as Type;\n }\n\n if (field.originalType) {\n schemaElem.converted_type = ConvertedType[field.originalType] as ConvertedType;\n }\n\n schemaElem.type_length = field.typeLength;\n\n metadata.schema.push(schemaElem);\n }\n\n const metadataEncoded = Util.serializeThrift(metadata);\n const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);\n metadataEncoded.copy(footerEncoded);\n footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);\n footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);\n return footerEncoded;\n}\n"],"file":"writer.js"}
@@ -1,31 +0,0 @@
1
- // Random-Access read
2
-
3
- export async function readArrayBuffer(
4
- file: Blob | ArrayBuffer | any,
5
- start: number,
6
- length: number
7
- ): Promise<ArrayBuffer> {
8
- if (file instanceof Blob) {
9
- const slice = file.slice(start, start + length);
10
- return await slice.arrayBuffer();
11
- }
12
- return await file.read(start, start + length);
13
- }
14
-
15
- /**
16
- * Read a slice of a Blob or File, without loading the entire file into memory
17
- * The trick when reading File objects is to read successive "slices" of the File
18
- * Per spec https://w3c.github.io/FileAPI/, slicing a File only updates the start and end fields
19
- * Actually reading from file happens in `readAsArrayBuffer`
20
- * @param blob to read
21
- export async function readBlob(blob: Blob): Promise<ArrayBuffer> {
22
- return await new Promise((resolve, reject) => {
23
- const fileReader = new FileReader();
24
- fileReader.onload = (event: ProgressEvent<FileReader>) =>
25
- resolve(event?.target?.result as ArrayBuffer);
26
- // TODO - reject with a proper Error
27
- fileReader.onerror = (error: ProgressEvent<FileReader>) => reject(error);
28
- fileReader.readAsArrayBuffer(blob);
29
- });
30
- }
31
- */
@@ -1,105 +0,0 @@
1
- // Forked from https://github.com/ironSource/parquetjs under MIT license
2
-
3
- import zlib from 'zlib';
4
- import snappy from 'snappyjs';
5
- // import brotli from 'brotli';
6
- import brotliDecompress from 'brotli/decompress';
7
-
8
- export const PARQUET_COMPRESSION_METHODS = {
9
- UNCOMPRESSED: {
10
- deflate: deflateIdentity,
11
- inflate: inflateIdentity
12
- },
13
- GZIP: {
14
- deflate: deflateGzip,
15
- inflate: inflateGzip
16
- },
17
- SNAPPY: {
18
- deflate: deflateSnappy,
19
- inflate: inflateSnappy
20
- },
21
- LZO: {
22
- deflate: deflateLzo,
23
- inflate: inflateLzo
24
- },
25
- BROTLI: {
26
- deflate: deflateBrotli,
27
- inflate: inflateBrotli
28
- }
29
- };
30
-
31
- /**
32
- * Deflate a value using compression method `method`
33
- */
34
- export function deflate(method, value) {
35
- if (!(method in PARQUET_COMPRESSION_METHODS)) {
36
- throw new Error(`parquet: invalid compression method ${method}`);
37
- }
38
-
39
- return PARQUET_COMPRESSION_METHODS[method].deflate(value);
40
- }
41
-
42
- /**
43
- * Inflate a value using compression method `method`
44
- */
45
- export function inflate(method, value) {
46
- if (!(method in PARQUET_COMPRESSION_METHODS)) {
47
- throw new Error(`parquet: invalid compression method ${method}`);
48
- }
49
-
50
- return PARQUET_COMPRESSION_METHODS[method].inflate(value);
51
- }
52
-
53
- /**
54
- * Lazyily load lzo, avoids potential failing require
55
- * unless there was an attempt to access it
56
- */
57
- function loadLZO() {
58
- return require('lzo');
59
- }
60
-
61
- function deflateIdentity(value) {
62
- return value;
63
- }
64
-
65
- function deflateGzip(value) {
66
- return zlib.gzipSync(value);
67
- }
68
-
69
- function deflateSnappy(value) {
70
- return snappy.compress(value);
71
- }
72
-
73
- function deflateLzo(value) {
74
- return loadLZO().compress(value);
75
- }
76
-
77
- function deflateBrotli(value) {
78
- throw new Error('brotli compression not supported');
79
- // TODO - works under Node.js
80
- // return new Buffer(brotli.compress(value, {
81
- // mode: 0,
82
- // quality: 8,
83
- // lgwin: 22
84
- // }));
85
- }
86
-
87
- function inflateIdentity(value) {
88
- return value;
89
- }
90
-
91
- function inflateGzip(value) {
92
- return zlib.gunzipSync(value);
93
- }
94
-
95
- function inflateSnappy(value) {
96
- return snappy.uncompress(value);
97
- }
98
-
99
- function inflateLzo(value) {
100
- return loadLZO().decompress(value);
101
- }
102
-
103
- function inflateBrotli(value) {
104
- return new Buffer(brotliDecompress(value));
105
- }
@@ -1,707 +0,0 @@
1
- // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
2
- import {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from './codecs';
3
- import * as Compression from './compression';
4
- import {
5
- ParquetBuffer,
6
- ParquetCodec,
7
- ParquetCompression,
8
- ParquetData,
9
- ParquetField,
10
- ParquetRecord,
11
- ParquetType,
12
- PrimitiveType,
13
- SchemaDefinition
14
- } from './schema/declare';
15
- import {ParquetSchema} from './schema/schema';
16
- import * as Shred from './schema/shred';
17
- // tslint:disable-next-line:max-line-length
18
- import {
19
- ColumnChunk,
20
- CompressionCodec,
21
- ConvertedType,
22
- Encoding,
23
- FieldRepetitionType,
24
- FileMetaData,
25
- PageHeader,
26
- PageType,
27
- RowGroup,
28
- SchemaElement,
29
- Type
30
- } from './parquet-thrift';
31
- import * as Util from './util';
32
- // import Fs = require('fs');
33
-
34
- /**
35
- * Parquet File Magic String
36
- */
37
- const PARQUET_MAGIC = 'PAR1';
38
-
39
- /**
40
- * Parquet File Format Version
41
- */
42
- const PARQUET_VERSION = 1;
43
-
44
- /**
45
- * Internal type used for repetition/definition levels
46
- */
47
- const PARQUET_RDLVL_TYPE = 'INT32';
48
- const PARQUET_RDLVL_ENCODING = 'RLE';
49
-
50
- /**
51
- * A parquet cursor is used to retrieve rows from a parquet file in order
52
- */
53
- export class ParquetCursor<T> implements AsyncIterable<T> {
54
- public metadata: FileMetaData;
55
- public envelopeReader: ParquetEnvelopeReader;
56
- public schema: ParquetSchema;
57
- public columnList: string[][];
58
- public rowGroup: ParquetRecord[];
59
- public rowGroupIndex: number;
60
-
61
- /**
62
- * Create a new parquet reader from the file metadata and an envelope reader.
63
- * It is usually not recommended to call this constructor directly except for
64
- * advanced and internal use cases. Consider using getCursor() on the
65
- * ParquetReader instead
66
- */
67
- constructor(
68
- metadata: FileMetaData,
69
- envelopeReader: ParquetEnvelopeReader,
70
- schema: ParquetSchema,
71
- columnList: string[][]
72
- ) {
73
- this.metadata = metadata;
74
- this.envelopeReader = envelopeReader;
75
- this.schema = schema;
76
- this.columnList = columnList;
77
- this.rowGroup = [];
78
- this.rowGroupIndex = 0;
79
- }
80
-
81
- /**
82
- * Retrieve the next row from the cursor. Returns a row or NULL if the end
83
- * of the file was reached
84
- */
85
- async next<T = any>(): Promise<T> {
86
- if (this.rowGroup.length === 0) {
87
- if (this.rowGroupIndex >= this.metadata.row_groups.length) {
88
- // @ts-ignore
89
- return null;
90
- }
91
- const rowBuffer = await this.envelopeReader.readRowGroup(
92
- this.schema,
93
- this.metadata.row_groups[this.rowGroupIndex],
94
- this.columnList
95
- );
96
- this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);
97
- this.rowGroupIndex++;
98
- }
99
- return this.rowGroup.shift() as any;
100
- }
101
-
102
- /**
103
- * Rewind the cursor the the beginning of the file
104
- */
105
- rewind(): void {
106
- this.rowGroup = [];
107
- this.rowGroupIndex = 0;
108
- }
109
-
110
- /**
111
- * Implement AsyncIterable
112
- */
113
- // tslint:disable-next-line:function-name
114
- [Symbol.asyncIterator](): AsyncIterator<T> {
115
- let done = false;
116
- return {
117
- next: async () => {
118
- if (done) {
119
- return {done, value: null};
120
- }
121
- const value = await this.next();
122
- if (value === null) {
123
- return {done: true, value};
124
- }
125
- return {done: false, value};
126
- },
127
- return: async () => {
128
- done = true;
129
- return {done, value: null};
130
- },
131
- throw: async () => {
132
- done = true;
133
- return {done: true, value: null};
134
- }
135
- };
136
- }
137
- }
138
-
139
- /**
140
- * A parquet reader allows retrieving the rows from a parquet file in order.
141
- * The basic usage is to create a reader and then retrieve a cursor/iterator
142
- * which allows you to consume row after row until all rows have been read. It is
143
- * important that you call close() after you are finished reading the file to
144
- * avoid leaking file descriptors.
145
- */
146
- export class ParquetReader<T> implements AsyncIterable<T> {
147
- /**
148
- * Open the parquet file pointed to by the specified path and return a new
149
- * parquet reader
150
- */
151
- static async openFile<T>(filePath: string): Promise<ParquetReader<T>> {
152
- const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);
153
- try {
154
- await envelopeReader.readHeader();
155
- const metadata = await envelopeReader.readFooter();
156
- return new ParquetReader<T>(metadata, envelopeReader);
157
- } catch (err) {
158
- await envelopeReader.close();
159
- throw err;
160
- }
161
- }
162
-
163
- static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {
164
- const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);
165
- try {
166
- await envelopeReader.readHeader();
167
- const metadata = await envelopeReader.readFooter();
168
- return new ParquetReader<T>(metadata, envelopeReader);
169
- } catch (err) {
170
- await envelopeReader.close();
171
- throw err;
172
- }
173
- }
174
-
175
- /**
176
- * return a new parquet reader initialized with a read function
177
- */
178
- static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {
179
- const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);
180
- const closeFn = async () => {};
181
- const size = arrayBuffer.byteLength;
182
- const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
183
- try {
184
- await envelopeReader.readHeader();
185
- const metadata = await envelopeReader.readFooter();
186
- return new ParquetReader(metadata, envelopeReader);
187
- } catch (err) {
188
- await envelopeReader.close();
189
- throw err;
190
- }
191
- }
192
-
193
- public metadata: FileMetaData;
194
- public envelopeReader: ParquetEnvelopeReader;
195
- public schema: ParquetSchema;
196
-
197
- /**
198
- * Create a new parquet reader from the file metadata and an envelope reader.
199
- * It is not recommended to call this constructor directly except for advanced
200
- * and internal use cases. Consider using one of the open{File,Buffer} methods
201
- * instead
202
- */
203
- constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {
204
- if (metadata.version !== PARQUET_VERSION) {
205
- throw new Error('invalid parquet version');
206
- }
207
-
208
- this.metadata = metadata;
209
- this.envelopeReader = envelopeReader;
210
- const root = this.metadata.schema[0];
211
- const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);
212
- this.schema = new ParquetSchema(schema);
213
- }
214
-
215
- /**
216
- * Close this parquet reader. You MUST call this method once you're finished
217
- * reading rows
218
- */
219
- async close(): Promise<void> {
220
- await this.envelopeReader.close();
221
- // this.envelopeReader = null;
222
- // this.metadata = null;
223
- }
224
-
225
- /**
226
- * Return a cursor to the file. You may open more than one cursor and use
227
- * them concurrently. All cursors become invalid once close() is called on
228
- * the reader object.
229
- *
230
- * The required_columns parameter controls which columns are actually read
231
- * from disk. An empty array or no value implies all columns. A list of column
232
- * names means that only those columns should be loaded from disk.
233
- */
234
- getCursor(): ParquetCursor<T>;
235
- // @ts-ignore
236
- getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;
237
- getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;
238
- getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {
239
- if (!columnList) {
240
- // tslint:disable-next-line:no-parameter-reassignment
241
- columnList = [];
242
- }
243
-
244
- // tslint:disable-next-line:no-parameter-reassignment
245
- columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));
246
-
247
- return new ParquetCursor<T>(
248
- this.metadata,
249
- this.envelopeReader,
250
- this.schema,
251
- columnList as string[][]
252
- );
253
- }
254
-
255
- /**
256
- * Return the number of rows in this file. Note that the number of rows is
257
- * not neccessarily equal to the number of rows in each column.
258
- */
259
- getRowCount(): number {
260
- return Number(this.metadata.num_rows);
261
- }
262
-
263
- /**
264
- * Returns the ParquetSchema for this file
265
- */
266
- getSchema(): ParquetSchema {
267
- return this.schema;
268
- }
269
-
270
- /**
271
- * Returns the user (key/value) metadata for this file
272
- */
273
- getMetadata(): Record<string, string> {
274
- const md: Record<string, string> = {};
275
- for (const kv of this.metadata.key_value_metadata!) {
276
- md[kv.key] = kv.value!;
277
- }
278
- return md;
279
- }
280
-
281
- /**
282
- * Implement AsyncIterable
283
- */
284
- // tslint:disable-next-line:function-name
285
- [Symbol.asyncIterator](): AsyncIterator<T> {
286
- return this.getCursor()[Symbol.asyncIterator]();
287
- }
288
- }
289
-
290
- /**
291
- * The parquet envelope reader allows direct, unbuffered access to the individual
292
- * sections of the parquet file, namely the header, footer and the row groups.
293
- * This class is intended for advanced/internal users; if you just want to retrieve
294
- * rows from a parquet file use the ParquetReader instead
295
- */
296
- export class ParquetEnvelopeReader {
297
- public read: (position: number, length: number) => Promise<Buffer>;
298
- /**
299
- * Close this parquet reader. You MUST call this method once you're finished
300
- * reading rows
301
- */
302
- public close: () => Promise<void>;
303
- public fileSize: number;
304
-
305
- static async openFile(filePath: string): Promise<ParquetEnvelopeReader> {
306
- const fileStat = await Util.fstat(filePath);
307
- const fileDescriptor = await Util.fopen(filePath);
308
-
309
- const readFn = Util.fread.bind(undefined, fileDescriptor);
310
- const closeFn = Util.fclose.bind(undefined, fileDescriptor);
311
-
312
- return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);
313
- }
314
-
315
- static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {
316
- const readFn = (position: number, length: number) =>
317
- Promise.resolve(buffer.slice(position, position + length));
318
- const closeFn = () => Promise.resolve();
319
- return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);
320
- }
321
-
322
- constructor(
323
- read: (position: number, length: number) => Promise<Buffer>,
324
- close: () => Promise<void>,
325
- fileSize: number
326
- ) {
327
- this.read = read;
328
- this.close = close;
329
- this.fileSize = fileSize;
330
- }
331
-
332
- async readHeader(): Promise<void> {
333
- const buf = await this.read(0, PARQUET_MAGIC.length);
334
-
335
- if (buf.toString() !== PARQUET_MAGIC) {
336
- throw new Error('not valid parquet file');
337
- }
338
- }
339
-
340
- async readRowGroup(
341
- schema: ParquetSchema,
342
- rowGroup: RowGroup,
343
- columnList: string[][]
344
- ): Promise<ParquetBuffer> {
345
- const buffer: ParquetBuffer = {
346
- rowCount: Number(rowGroup.num_rows),
347
- columnData: {}
348
- };
349
- for (const colChunk of rowGroup.columns) {
350
- const colMetadata = colChunk.meta_data;
351
- const colKey = colMetadata?.path_in_schema;
352
- if (columnList.length > 0 && Util.fieldIndexOf(columnList, colKey!) < 0) {
353
- continue; // eslint-disable-line no-continue
354
- }
355
- buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);
356
- }
357
- return buffer;
358
- }
359
-
360
- async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {
361
- if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
362
- throw new Error('external references are not supported');
363
- }
364
-
365
- const field = schema.findField(colChunk.meta_data?.path_in_schema!);
366
- const type: PrimitiveType = Util.getThriftEnum(Type, colChunk.meta_data?.type!) as any;
367
- if (type !== field.primitiveType) throw new Error(`chunk type not matching schema: ${type}`);
368
-
369
- const compression: ParquetCompression = Util.getThriftEnum(
370
- CompressionCodec,
371
- colChunk.meta_data?.codec!
372
- ) as any;
373
-
374
- const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);
375
- const pagesSize = Number(colChunk.meta_data?.total_compressed_size!);
376
- const pagesBuf = await this.read(pagesOffset, pagesSize);
377
-
378
- return decodeDataPages(pagesBuf, field, compression);
379
- }
380
-
381
- async readFooter(): Promise<FileMetaData> {
382
- const trailerLen = PARQUET_MAGIC.length + 4;
383
- const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);
384
-
385
- if (trailerBuf.slice(4).toString() !== PARQUET_MAGIC) {
386
- throw new Error('not a valid parquet file');
387
- }
388
-
389
- const metadataSize = trailerBuf.readUInt32LE(0);
390
- const metadataOffset = this.fileSize - metadataSize - trailerLen;
391
- if (metadataOffset < PARQUET_MAGIC.length) {
392
- throw new Error('invalid metadata size');
393
- }
394
-
395
- const metadataBuf = await this.read(metadataOffset, metadataSize);
396
- // let metadata = new parquet_thrift.FileMetaData();
397
- // parquet_util.decodeThrift(metadata, metadataBuf);
398
- const {metadata} = Util.decodeFileMetadata(metadataBuf);
399
- return metadata;
400
- }
401
- }
402
-
403
- /**
404
- * Decode a consecutive array of data using one of the parquet encodings
405
- */
406
- function decodeValues(
407
- type: PrimitiveType,
408
- encoding: ParquetCodec,
409
- cursor: CursorBuffer,
410
- count: number,
411
- opts: ParquetCodecOptions
412
- ): any[] {
413
- if (!(encoding in PARQUET_CODECS)) {
414
- throw new Error(`invalid encoding: ${encoding}`);
415
- }
416
- return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
417
- }
418
-
419
- function decodeDataPages(
420
- buffer: Buffer,
421
- column: ParquetField,
422
- compression: ParquetCompression
423
- ): ParquetData {
424
- const cursor: CursorBuffer = {
425
- buffer,
426
- offset: 0,
427
- size: buffer.length
428
- };
429
-
430
- const data: ParquetData = {
431
- rlevels: [],
432
- dlevels: [],
433
- values: [],
434
- count: 0
435
- };
436
-
437
- // @ts-ignore size can be undefined
438
- while (cursor.offset < cursor.size) {
439
- // const pageHeader = new parquet_thrift.PageHeader();
440
- // cursor.offset += parquet_util.decodeThrift(pageHeader, cursor.buffer);
441
-
442
- const {pageHeader, length} = Util.decodePageHeader(cursor.buffer);
443
- cursor.offset += length;
444
-
445
- const pageType = Util.getThriftEnum(PageType, pageHeader.type);
446
-
447
- let pageData: ParquetData | null = null;
448
- switch (pageType) {
449
- case 'DATA_PAGE':
450
- pageData = decodeDataPage(cursor, pageHeader, column, compression);
451
- break;
452
- case 'DATA_PAGE_V2':
453
- pageData = decodeDataPageV2(cursor, pageHeader, column, compression);
454
- break;
455
- default:
456
- throw new Error(`invalid page type: ${pageType}`);
457
- }
458
-
459
- Array.prototype.push.apply(data.rlevels, pageData.rlevels);
460
- Array.prototype.push.apply(data.dlevels, pageData.dlevels);
461
- Array.prototype.push.apply(data.values, pageData.values);
462
- data.count += pageData.count;
463
- }
464
-
465
- return data;
466
- }
467
-
468
- function decodeDataPage(
469
- cursor: CursorBuffer,
470
- header: PageHeader,
471
- column: ParquetField,
472
- compression: ParquetCompression
473
- ): ParquetData {
474
- const cursorEnd = cursor.offset + header.compressed_page_size;
475
- const valueCount = header.data_page_header?.num_values;
476
-
477
- // const info = {
478
- // path: opts.column.path.join('.'),
479
- // valueEncoding,
480
- // dLevelEncoding,
481
- // rLevelEncoding,
482
- // cursorOffset: cursor.offset,
483
- // cursorEnd,
484
- // cusrorSize: cursor.size,
485
- // header,
486
- // opts,
487
- // buffer: cursor.buffer.toJSON(),
488
- // values: null as any[],
489
- // valBuf: null as any
490
- // };
491
- // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));
492
-
493
- /* uncompress page */
494
- let dataCursor = cursor;
495
- if (compression !== 'UNCOMPRESSED') {
496
- const valuesBuf = Compression.inflate(
497
- compression,
498
- cursor.buffer.slice(cursor.offset, cursorEnd),
499
- header.uncompressed_page_size
500
- );
501
- dataCursor = {
502
- buffer: valuesBuf,
503
- offset: 0,
504
- size: valuesBuf.length
505
- };
506
- cursor.offset = cursorEnd;
507
- }
508
-
509
- /* read repetition levels */
510
- const rLevelEncoding = Util.getThriftEnum(
511
- Encoding,
512
- header.data_page_header?.repetition_level_encoding!
513
- ) as ParquetCodec;
514
- // tslint:disable-next-line:prefer-array-literal
515
- let rLevels = new Array(valueCount);
516
- if (column.rLevelMax > 0) {
517
- rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {
518
- bitWidth: Util.getBitWidth(column.rLevelMax),
519
- disableEnvelope: false
520
- // column: opts.column
521
- });
522
- } else {
523
- rLevels.fill(0);
524
- }
525
-
526
- /* read definition levels */
527
- const dLevelEncoding = Util.getThriftEnum(
528
- Encoding,
529
- header.data_page_header?.definition_level_encoding!
530
- ) as ParquetCodec;
531
- // tslint:disable-next-line:prefer-array-literal
532
- let dLevels = new Array(valueCount);
533
- if (column.dLevelMax > 0) {
534
- dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {
535
- bitWidth: Util.getBitWidth(column.dLevelMax),
536
- disableEnvelope: false
537
- // column: opts.column
538
- });
539
- } else {
540
- dLevels.fill(0);
541
- }
542
- let valueCountNonNull = 0;
543
- for (const dlvl of dLevels) {
544
- if (dlvl === column.dLevelMax) {
545
- valueCountNonNull++;
546
- }
547
- }
548
-
549
- /* read values */
550
- const valueEncoding = Util.getThriftEnum(
551
- Encoding,
552
- header.data_page_header?.encoding!
553
- ) as ParquetCodec;
554
- const values = decodeValues(column.primitiveType!, valueEncoding, dataCursor, valueCountNonNull, {
555
- typeLength: column.typeLength,
556
- bitWidth: column.typeLength
557
- });
558
-
559
- // info.valBuf = uncursor.buffer.toJSON();
560
- // info.values = values;
561
- // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));
562
-
563
- return {
564
- dlevels: dLevels,
565
- rlevels: rLevels,
566
- values,
567
- count: valueCount!
568
- };
569
- }
570
-
571
- function decodeDataPageV2(
572
- cursor: CursorBuffer,
573
- header: PageHeader,
574
- column: ParquetField,
575
- compression: ParquetCompression
576
- ): ParquetData {
577
- const cursorEnd = cursor.offset + header.compressed_page_size;
578
-
579
- const valueCount = header.data_page_header_v2?.num_values;
580
- // @ts-ignore
581
- const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;
582
- const valueEncoding = Util.getThriftEnum(
583
- Encoding,
584
- header.data_page_header_v2?.encoding!
585
- ) as ParquetCodec;
586
-
587
- /* read repetition levels */
588
- // tslint:disable-next-line:prefer-array-literal
589
- let rLevels = new Array(valueCount);
590
- if (column.rLevelMax > 0) {
591
- rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {
592
- bitWidth: Util.getBitWidth(column.rLevelMax),
593
- disableEnvelope: true
594
- });
595
- } else {
596
- rLevels.fill(0);
597
- }
598
-
599
- /* read definition levels */
600
- // tslint:disable-next-line:prefer-array-literal
601
- let dLevels = new Array(valueCount);
602
- if (column.dLevelMax > 0) {
603
- dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {
604
- bitWidth: Util.getBitWidth(column.dLevelMax),
605
- disableEnvelope: true
606
- });
607
- } else {
608
- dLevels.fill(0);
609
- }
610
-
611
- /* read values */
612
- let valuesBufCursor = cursor;
613
-
614
- if (header.data_page_header_v2?.is_compressed) {
615
- const valuesBuf = Compression.inflate(
616
- compression,
617
- cursor.buffer.slice(cursor.offset, cursorEnd),
618
- header.uncompressed_page_size
619
- );
620
-
621
- valuesBufCursor = {
622
- buffer: valuesBuf,
623
- offset: 0,
624
- size: valuesBuf.length
625
- };
626
-
627
- cursor.offset = cursorEnd;
628
- }
629
-
630
- const values = decodeValues(
631
- column.primitiveType!,
632
- valueEncoding,
633
- valuesBufCursor,
634
- valueCountNonNull,
635
- {
636
- typeLength: column.typeLength,
637
- bitWidth: column.typeLength
638
- }
639
- );
640
-
641
- return {
642
- dlevels: dLevels,
643
- rlevels: rLevels,
644
- values,
645
- count: valueCount!
646
- };
647
- }
648
-
649
- function decodeSchema(
650
- schemaElements: SchemaElement[],
651
- offset: number,
652
- len: number
653
- ): {
654
- offset: number;
655
- next: number;
656
- schema: SchemaDefinition;
657
- } {
658
- const schema: SchemaDefinition = {};
659
- let next = offset;
660
- for (let i = 0; i < len; i++) {
661
- const schemaElement = schemaElements[next];
662
-
663
- const repetitionType =
664
- next > 0 ? Util.getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';
665
-
666
- let optional = false;
667
- let repeated = false;
668
- switch (repetitionType) {
669
- case 'REQUIRED':
670
- break;
671
- case 'OPTIONAL':
672
- optional = true;
673
- break;
674
- case 'REPEATED':
675
- repeated = true;
676
- break;
677
- default:
678
- throw new Error('parquet: unknown repetition type');
679
- }
680
-
681
- if (schemaElement.num_children! > 0) {
682
- const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);
683
- next = res.next;
684
- schema[schemaElement.name] = {
685
- // type: undefined,
686
- optional,
687
- repeated,
688
- fields: res.schema
689
- };
690
- } else {
691
- let logicalType = Util.getThriftEnum(Type, schemaElement.type!);
692
-
693
- if (schemaElement.converted_type) {
694
- logicalType = Util.getThriftEnum(ConvertedType, schemaElement.converted_type);
695
- }
696
-
697
- schema[schemaElement.name] = {
698
- type: logicalType as ParquetType,
699
- typeLength: schemaElement.type_length,
700
- optional,
701
- repeated
702
- };
703
- next++;
704
- }
705
- }
706
- return {schema, offset, next};
707
- }