@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/dist.min.js +7 -18
  2. package/dist/dist.min.js.map +1 -1
  3. package/dist/es5/bundle.js +2 -4
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +17 -0
  6. package/dist/es5/constants.js.map +1 -0
  7. package/dist/es5/index.js +53 -21
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +82 -0
  10. package/dist/es5/lib/convert-schema.js.map +1 -0
  11. package/dist/es5/lib/parse-parquet.js +173 -0
  12. package/dist/es5/lib/parse-parquet.js.map +1 -0
  13. package/dist/es5/lib/read-array-buffer.js +53 -0
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +6 -79
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +1 -1
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  21. package/dist/es5/parquetjs/codecs/index.js +10 -0
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +2 -2
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +138 -104
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  29. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  30. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  31. package/dist/es5/parquetjs/parser/decoders.js +495 -0
  32. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  33. package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
  34. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  35. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
  36. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  37. package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
  38. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  39. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  40. package/dist/es5/parquetjs/schema/schema.js +2 -0
  41. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  42. package/dist/es5/parquetjs/schema/shred.js +2 -1
  43. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  44. package/dist/es5/parquetjs/schema/types.js +79 -4
  45. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  46. package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
  47. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  48. package/dist/es5/parquetjs/utils/file-utils.js +108 -0
  49. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  50. package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
  51. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  52. package/dist/esm/bundle.js +2 -4
  53. package/dist/esm/bundle.js.map +1 -1
  54. package/dist/esm/constants.js +6 -0
  55. package/dist/esm/constants.js.map +1 -0
  56. package/dist/esm/index.js +14 -4
  57. package/dist/esm/index.js.map +1 -1
  58. package/dist/esm/lib/convert-schema.js +71 -0
  59. package/dist/esm/lib/convert-schema.js.map +1 -0
  60. package/dist/esm/lib/parse-parquet.js +28 -0
  61. package/dist/esm/lib/parse-parquet.js.map +1 -0
  62. package/dist/esm/lib/read-array-buffer.js +9 -0
  63. package/dist/esm/lib/read-array-buffer.js.map +1 -0
  64. package/dist/esm/parquet-loader.js +4 -24
  65. package/dist/esm/parquet-loader.js.map +1 -1
  66. package/dist/esm/parquet-writer.js +1 -1
  67. package/dist/esm/parquet-writer.js.map +1 -1
  68. package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
  69. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  70. package/dist/esm/parquetjs/codecs/index.js +9 -0
  71. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  72. package/dist/esm/parquetjs/codecs/rle.js +2 -2
  73. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  74. package/dist/esm/parquetjs/compression.js +54 -105
  75. package/dist/esm/parquetjs/compression.js.map +1 -1
  76. package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
  77. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  78. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  79. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  80. package/dist/esm/parquetjs/parser/decoders.js +300 -0
  81. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  82. package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
  83. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  84. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
  85. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  86. package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
  87. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  88. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  89. package/dist/esm/parquetjs/schema/schema.js +2 -0
  90. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  91. package/dist/esm/parquetjs/schema/shred.js +2 -1
  92. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  93. package/dist/esm/parquetjs/schema/types.js +78 -4
  94. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  95. package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
  96. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  97. package/dist/esm/parquetjs/utils/file-utils.js +79 -0
  98. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  99. package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
  100. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  101. package/dist/parquet-worker.js +7 -18
  102. package/dist/parquet-worker.js.map +1 -1
  103. package/package.json +10 -10
  104. package/src/bundle.ts +2 -3
  105. package/src/constants.ts +17 -0
  106. package/src/index.ts +30 -4
  107. package/src/lib/convert-schema.ts +95 -0
  108. package/src/lib/parse-parquet.ts +27 -0
  109. package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
  110. package/src/parquet-loader.ts +4 -24
  111. package/src/parquetjs/codecs/dictionary.ts +11 -0
  112. package/src/parquetjs/codecs/index.ts +13 -0
  113. package/src/parquetjs/codecs/rle.ts +4 -2
  114. package/src/parquetjs/compression.ts +89 -50
  115. package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
  116. package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
  117. package/src/parquetjs/parser/decoders.ts +448 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +179 -0
  121. package/src/parquetjs/schema/declare.ts +48 -2
  122. package/src/parquetjs/schema/schema.ts +2 -0
  123. package/src/parquetjs/schema/shred.ts +3 -1
  124. package/src/parquetjs/schema/types.ts +82 -5
  125. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  126. package/src/parquetjs/utils/file-utils.ts +96 -0
  127. package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
  128. package/dist/dist.es5.min.js +0 -51
  129. package/dist/dist.es5.min.js.map +0 -1
  130. package/dist/es5/parquetjs/compression.ts.disabled +0 -105
  131. package/dist/es5/parquetjs/reader.js +0 -1078
  132. package/dist/es5/parquetjs/reader.js.map +0 -1
  133. package/dist/es5/parquetjs/util.js.map +0 -1
  134. package/dist/es5/parquetjs/writer.js.map +0 -1
  135. package/dist/esm/libs/read-array-buffer.ts +0 -31
  136. package/dist/esm/parquetjs/compression.ts.disabled +0 -105
  137. package/dist/esm/parquetjs/reader.js +0 -524
  138. package/dist/esm/parquetjs/reader.js.map +0 -1
  139. package/dist/esm/parquetjs/util.js.map +0 -1
  140. package/dist/esm/parquetjs/writer.js.map +0 -1
  141. package/src/libs/read-array-buffer.ts +0 -31
  142. package/src/parquetjs/compression.ts.disabled +0 -105
  143. package/src/parquetjs/reader.ts +0 -707
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../../src/parquetjs/parser/decoders.ts"],"names":["decodeDataPages","buffer","options","cursor","offset","size","length","data","rlevels","dlevels","values","pageHeaders","count","dictionary","numValues","Number","decodePage","page","map","value","index","push","undefined","pageHeader","pageType","PageType","type","decodeDataPage","decodeDataPageV2","decodeDictionaryPage","Error","decodeSchema","schemaElements","len","schema","next","i","schemaElement","repetitionType","FieldRepetitionType","repetition_type","optional","repeated","num_children","res","name","fields","Type","logicalType","converted_type","ConvertedType","typeLength","type_length","presision","precision","scale","decodeValues","encoding","opts","PARQUET_CODECS","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","compression","slice","uncompressed_page_size","valuesBuf","rLevelEncoding","Encoding","repetition_level_encoding","rLevels","Array","column","rLevelMax","PARQUET_RDLVL_TYPE","bitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","decodeOptions","primitiveType","data_page_header_v2","num_nulls","PARQUET_RDLVL_ENCODING","valuesBufCursor","is_compressed","dictCursor","dictionary_page_header","d","toString"],"mappings":";;;;;;;;;;;;;;;AAUA;;AACA;;AASA;;AACA;;AACA;;;;;;;;SASsBA,e;;;;;+EAAf,iBACLC,MADK,EAELC,OAFK;AAAA;AAAA;AAAA;AAAA;AAAA;AAICC,YAAAA,MAJD,GAIwB;AAC3BF,cAAAA,MAAM,EAANA,MAD2B;AAE3BG,cAAAA,MAAM,EAAE,CAFmB;AAG3BC,cAAAA,IAAI,EAAEJ,MAAM,CAACK;AAHc,aAJxB;AAUCC,YAAAA,IAVD,GAUqB;AACxBC,cAAAA,OAAO,EAAE,EADe;AAExBC,cAAAA,OAAO,EAAE,EAFe;AAGxBC,cAAAA,MAAM,EAAE,EAHgB;AAIxBC,cAAAA,WAAW,EAAE,EAJW;AAKxBC,cAAAA,KAAK,EAAE;AALiB,aAVrB;AAkBDC,YAAAA,UAlBC,GAkBYX,OAAO,CAACW,UAAR,IAAsB,EAlBlC;;AAAA;AAAA,kBAsBHV,MAAM,CAACC,MAAP,GAAgBD,MAAM,CAACE,IAAvB,KACC,CAACH,OAAO,CAACY,SAAT,IAAsBP,IAAI,CAACE,OAAL,CAAaH,MAAb,GAAsBS,MAAM,CAACb,OAAO,CAACY,SAAT,CADnD,CAtBG;AAAA;AAAA;AAAA;;AAAA;AAAA,mBA0BgBE,UAAU,CAACb,MAAD,EAASD,OAAT,CA1B1B;;AAAA;AA0BGe,YAAAA,IA1BH;;AAAA,iBA4BCA,IAAI,CAACJ,UA5BN;AAAA;AAAA;AAAA;;AA6BDA,YAAAA,UAAU,GAAGI,IAAI,CAACJ,UAAlB;AA7BC;;AAAA;AAkCH,gBAAIA,UAAU,CAACP,MAAf,EAAuB;AAErBW,cAAAA,IAAI,CAACP,MAAL,GAAcO,IAAI,CAACP,MAAL,CAAYQ,GAAZ,CAAgB,UAACC,KAAD;AAAA,uBAAWN,UAAU,CAACM,KAAD,CAArB;AAAA,eAAhB,CAAd;AACD;;AAED,iBAASC,KAAT,GAAiB,CAAjB,EAAoBA,KAAK,GAAGH,IAAI,CAACT,OAAL,CAAaF,MAAzC,EAAiDc,KAAK,EAAtD,EAA0D;AACxDb,cAAAA,IAAI,CAACC,OAAL,CAAaa,IAAb,CAAkBJ,IAAI,CAACT,OAAL,CAAaY,KAAb,CAAlB;AACAb,cAAAA,IAAI,CAACE,OAAL,CAAaY,IAAb,CAAkBJ,IAAI,CAACR,OAAL,CAAaW,KAAb,CAAlB;AACMD,cAAAA,KAHkD,GAG1CF,IAAI,CAACP,MAAL,CAAYU,KAAZ,CAH0C;;AAKxD,kBAAID,KAAK,KAAKG,SAAd,EAAyB;AACvBf,gBAAAA,IAAI,CAACG,MAAL,CAAYW,IAAZ,CAAiBF,KAAjB;AACD;AACF;;AAEDZ,YAAAA,IAAI,CAACK,KAAL,IAAcK,IAAI,CAACL,KAAnB;AACAL,YAAAA,IAAI,CAACI,WAAL,CAAiBU,IAAjB,CAAsBJ,IAAI,CAACM,UAA3B;AAlDG;AAAA;;AAAA;AAAA,6CAqDEhB,IArDF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SA6DeS,U;;;;;0EAAf,kBACLb,MADK,EAELD,OAFK;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBAK8B,iCAAiBC,MAAM,CAACF,MAAxB,EAAgCE,MAAM,CAACC,MAAvC,CAL9B;;AAAA;AAAA;AAKEmB,YAAAA,UALF,yBAKEA,UALF;AAKcjB,YAAAA,MALd,yBAKcA,MALd;AAMLH,YAAAA,MAAM,CAACC,MAAP,IAAiBE,MAAjB;AAEMkB,YAAAA,QARD,GAQY,8BAAcC,uBAAd,EAAwBF,UAAU,CAACG,IAAnC,CARZ;AAAA,2BAUGF,QAVH;AAAA,8CAWE,WAXF,yBAcE,cAdF,yBAiBE,iBAjBF;AAAA;;AAAA;AAAA;AAAA,mBAYYG,cAAc,CAACxB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAZ1B;;AAAA;AAYDe,YAAAA,IAZC;AAAA;;AAAA;AAAA;AAAA,mBAeYW,gBAAgB,CAACzB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAf5B;;AAAA;AAeDe,YAAAA,IAfC;AAAA;;AAAA;AAAA;AAAA,mBAmBmBY,oBAAoB,CAAC1B,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAnBvC;;AAAA;AAAA;AAAA,2BAoBCqB,UApBD;AAkBDN,YAAAA,IAlBC;AAmBCJ,cAAAA,UAnBD;AAoBCU,cAAAA,UApBD;AAAA;AAAA;;AAAA;AAAA,kBAwBK,IAAIO,KAAJ,8BAAgCN,QAAhC,EAxBL;;AAAA;AAAA,8CA2BEP,IA3BF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;AAwCA,SAASc,YAAT,CACLC,cADK,EAEL5B,MAFK,EAGL6B,GAHK,EAQL;AACA,MAAMC,MAAwB,GAAG,EAAjC;AACA,MAAIC,IAAI,GAAG/B,MAAX;;AACA,OAAK,IAAIgC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,GAApB,EAAyBG,CAAC,EAA1B,EAA8B;AAC5B,QAAMC,aAAa,GAAGL,cAAc,CAACG,IAAD,CAApC;AAEA,QAAMG,cAAc,GAClBH,IAAI,GAAG,CAAP,GAAW,8BAAcI,kCAAd,EAAmCF,aAAa,CAACG,eAAjD,CAAX,GAAgF,MADlF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQJ,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEG,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAIZ,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIO,aAAa,CAACM,YAAd,GAA8B,CAAlC,EAAqC;AACnC,UAAMC,GAAG,GAAGb,YAAY,CAACC,cAAD,EAAiBG,IAAI,GAAG,CAAxB,EAA2BE,aAAa,CAACM,YAAzC,CAAxB;AACAR,MAAAA,IAAI,GAAGS,GAAG,CAACT,IAAX;AACAD,MAAAA,MAAM,CAACG,aAAa,CAACQ,IAAf,CAAN,GAA6B;AAE3BJ,QAAAA,QAAQ,EAARA,QAF2B;AAG3BC,QAAAA,QAAQ,EAARA,QAH2B;AAI3BI,QAAAA,MAAM,EAAEF,GAAG,CAACV;AAJe,OAA7B;AAMD,KATD,MASO;AACL,UAAMR,IAAI,GAAG,8BAAcqB,mBAAd,EAAoBV,aAAa,CAACX,IAAlC,CAAb;AACA,UAAIsB,WAAW,GAAGtB,IAAlB;;AAEA,UAAIW,aAAa,CAACY,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG,8BAAcE,4BAAd,EAA6Bb,aAAa,CAACY,cAA3C,CAAd;AACD;;AAED,cAAQD,WAAR;AACE,aAAK,SAAL;AACEA,UAAAA,WAAW,aAAMA,WAAN,cAAqBtB,IAArB,CAAX;AACA;;AACF;AAJF;;AAOAQ,MAAAA,MAAM,CAACG,aAAa,CAACQ,IAAf,CAAN,GAA6B;AAC3BnB,QAAAA,IAAI,EAAEsB,WADqB;AAE3BG,QAAAA,UAAU,EAAEd,aAAa,CAACe,WAFC;AAG3BC,QAAAA,SAAS,EAAEhB,aAAa,CAACiB,SAHE;AAI3BC,QAAAA,KAAK,EAAElB,aAAa,CAACkB,KAJM;AAK3Bd,QAAAA,QAAQ,EAARA,QAL2B;AAM3BC,QAAAA,QAAQ,EAARA;AAN2B,OAA7B;AAQAP,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACD,IAAAA,MAAM,EAANA,MAAD;AAAS9B,IAAAA,MAAM,EAANA,MAAT;AAAiB+B,IAAAA,IAAI,EAAJA;AAAjB,GAAP;AACD;;AAKD,SAASqB,YAAT,CACE9B,IADF,EAEE+B,QAFF,EAGEtD,MAHF,EAIES,KAJF,EAKE8C,IALF,EAMS;AACP,MAAI,EAAED,QAAQ,IAAIE,sBAAd,CAAJ,EAAmC;AACjC,UAAM,IAAI7B,KAAJ,6BAA+B2B,QAA/B,EAAN;AACD;;AACD,SAAOE,uBAAeF,QAAf,EAAyBD,YAAzB,CAAsC9B,IAAtC,EAA4CvB,MAA5C,EAAoDS,KAApD,EAA2D8C,IAA3D,CAAP;AACD;;SAQc/B,c;;;;;8EAAf,kBACExB,MADF,EAEEyD,MAFF,EAGE1D,OAHF;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAKQ2D,YAAAA,SALR,GAKoB1D,MAAM,CAACC,MAAP,GAAgBwD,MAAM,CAACE,oBAL3C;AAMQC,YAAAA,UANR,4BAMqBH,MAAM,CAACI,gBAN5B,0DAMqB,sBAAyBC,UAN9C;AASMC,YAAAA,UATN,GASmB/D,MATnB;;AAAA,kBAWMD,OAAO,CAACiE,WAAR,KAAwB,cAX9B;AAAA;AAAA;AAAA;;AAAA;AAAA,mBAY4B,6BACtBjE,OAAO,CAACiE,WADc,EAEtBhE,MAAM,CAACF,MAAP,CAAcmE,KAAd,CAAoBjE,MAAM,CAACC,MAA3B,EAAmCyD,SAAnC,CAFsB,EAGtBD,MAAM,CAACS,sBAHe,CAZ5B;;AAAA;AAYUC,YAAAA,SAZV;AAiBIJ,YAAAA,UAAU,GAAG;AACXjE,cAAAA,MAAM,EAAEqE,SADG;AAEXlE,cAAAA,MAAM,EAAE,CAFG;AAGXC,cAAAA,IAAI,EAAEiE,SAAS,CAAChE;AAHL,aAAb;AAKAH,YAAAA,MAAM,CAACC,MAAP,GAAgByD,SAAhB;;AAtBJ;AA0BQU,YAAAA,cA1BR,GA0ByB,8BACrBC,uBADqB,4BAErBZ,MAAM,CAACI,gBAFc,2DAErB,uBAAyBS,yBAFJ,CA1BzB;AA+BMC,YAAAA,OA/BN,GA+BgB,IAAIC,KAAJ,CAAUZ,UAAV,CA/BhB;;AAiCE,gBAAI7D,OAAO,CAAC0E,MAAR,CAAeC,SAAf,GAA2B,CAA/B,EAAkC;AAChCH,cAAAA,OAAO,GAAGlB,YAAY,CAACsB,6BAAD,EAAqBP,cAArB,EAAqCL,UAArC,EAAiDH,UAAjD,EAA8D;AAClFgB,gBAAAA,QAAQ,EAAE,4BAAY7E,OAAO,CAAC0E,MAAR,CAAeC,SAA3B,CADwE;AAElFG,gBAAAA,eAAe,EAAE;AAFiE,eAA9D,CAAtB;AAKD,aAND,MAMO;AACLN,cAAAA,OAAO,CAACO,IAAR,CAAa,CAAb;AACD;;AAGKC,YAAAA,cA5CR,GA4CyB,8BACrBV,uBADqB,4BAErBZ,MAAM,CAACI,gBAFc,2DAErB,uBAAyBmB,yBAFJ,CA5CzB;AAiDMC,YAAAA,OAjDN,GAiDgB,IAAIT,KAAJ,CAAUZ,UAAV,CAjDhB;;AAkDE,gBAAI7D,OAAO,CAAC0E,MAAR,CAAeS,SAAf,GAA2B,CAA/B,EAAkC;AAChCD,cAAAA,OAAO,GAAG5B,YAAY,CAACsB,6BAAD,EAAqBI,cAArB,EAAqChB,UAArC,EAAiDH,UAAjD,EAA8D;AAClFgB,gBAAAA,QAAQ,EAAE,4BAAY7E,OAAO,CAAC0E,MAAR,CAAeS,SAA3B,CADwE;AAElFL,gBAAAA,eAAe,EAAE;AAFiE,eAA9D,CAAtB;AAKD,aAND,MAMO;AACLI,cAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACGK,YAAAA,iBA3DN,GA2D0B,CA3D1B;AAAA,mDA4DqBF,OA5DrB;;AAAA;AA4DE,kEAA4B;AAAjBG,gBAAAA,IAAiB;;AAC1B,oBAAIA,IAAI,KAAKrF,OAAO,CAAC0E,MAAR,CAAeS,SAA5B,EAAuC;AACrCC,kBAAAA,iBAAiB;AAClB;AACF;AAhEH;AAAA;AAAA;AAAA;AAAA;;AAmEQE,YAAAA,aAnER,GAmEwB,8BAAchB,uBAAd,4BAAwBZ,MAAM,CAACI,gBAA/B,2DAAwB,uBAAyBP,QAAjD,CAnExB;AAoEQgC,YAAAA,aApER,GAoEwB;AACpBtC,cAAAA,UAAU,EAAEjD,OAAO,CAAC0E,MAAR,CAAezB,UADP;AAEpB4B,cAAAA,QAAQ,EAAE7E,OAAO,CAAC0E,MAAR,CAAezB;AAFL,aApExB;AAyEQzC,YAAAA,MAzER,GAyEiB8C,YAAY,CACzBtD,OAAO,CAAC0E,MAAR,CAAec,aADU,EAEzBF,aAFyB,EAGzBtB,UAHyB,EAIzBoB,iBAJyB,EAKzBG,aALyB,CAzE7B;AAAA,8CAiFS;AACLhF,cAAAA,OAAO,EAAE2E,OADJ;AAEL5E,cAAAA,OAAO,EAAEkE,OAFJ;AAGLhE,cAAAA,MAAM,EAANA,MAHK;AAILE,cAAAA,KAAK,EAAEmD,UAJF;AAKLxC,cAAAA,UAAU,EAAEqC;AALP,aAjFT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAiGehC,gB;;;;;+EAAf,kBACEzB,MADF,EAEEyD,MAFF,EAGEF,IAHF;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAKQG,YAAAA,SALR,GAKoB1D,MAAM,CAACC,MAAP,GAAgBwD,MAAM,CAACE,oBAL3C;AAOQC,YAAAA,UAPR,6BAOqBH,MAAM,CAAC+B,mBAP5B,2DAOqB,uBAA4B1B,UAPjD;AASQqB,YAAAA,iBATR,GAS4BvB,UAAU,8BAAGH,MAAM,CAAC+B,mBAAV,2DAAG,uBAA4BC,SAA/B,CATtC;AAUQJ,YAAAA,aAVR,GAUwB,8BACpBhB,uBADoB,4BAEpBZ,MAAM,CAAC+B,mBAFa,2DAEpB,uBAA4BlC,QAFR,CAVxB;AAiBMiB,YAAAA,OAjBN,GAiBgB,IAAIC,KAAJ,CAAUZ,UAAV,CAjBhB;;AAkBE,gBAAIL,IAAI,CAACkB,MAAL,CAAYC,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BH,cAAAA,OAAO,GAAGlB,YAAY,CAACsB,6BAAD,EAAqBe,iCAArB,EAA6C1F,MAA7C,EAAqD4D,UAArD,EAAkE;AACtFgB,gBAAAA,QAAQ,EAAE,4BAAYrB,IAAI,CAACkB,MAAL,CAAYC,SAAxB,CAD4E;AAEtFG,gBAAAA,eAAe,EAAE;AAFqE,eAAlE,CAAtB;AAID,aALD,MAKO;AACLN,cAAAA,OAAO,CAACO,IAAR,CAAa,CAAb;AACD;;AAIGG,YAAAA,OA7BN,GA6BgB,IAAIT,KAAJ,CAAUZ,UAAV,CA7BhB;;AA8BE,gBAAIL,IAAI,CAACkB,MAAL,CAAYS,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BD,cAAAA,OAAO,GAAG5B,YAAY,CAACsB,6BAAD,EAAqBe,iCAArB,EAA6C1F,MAA7C,EAAqD4D,UAArD,EAAkE;AACtFgB,gBAAAA,QAAQ,EAAE,4BAAYrB,IAAI,CAACkB,MAAL,CAAYS,SAAxB,CAD4E;AAEtFL,gBAAAA,eAAe,EAAE;AAFqE,eAAlE,CAAtB;AAID,aALD,MAKO;AACLI,cAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGGa,YAAAA,eAxCN,GAwCwB3F,MAxCxB;;AAAA,4CA0CMyD,MAAM,CAAC+B,mBA1Cb,mDA0CM,uBAA4BI,aA1ClC;AAAA;AAAA;AAAA;;AAAA;AAAA,mBA2C4B,6BACtBrC,IAAI,CAACS,WADiB,EAEtBhE,MAAM,CAACF,MAAP,CAAcmE,KAAd,CAAoBjE,MAAM,CAACC,MAA3B,EAAmCyD,SAAnC,CAFsB,EAGtBD,MAAM,CAACS,sBAHe,CA3C5B;;AAAA;AA2CUC,YAAAA,SA3CV;AAiDIwB,YAAAA,eAAe,GAAG;AAChB7F,cAAAA,MAAM,EAAEqE,SADQ;AAEhBlE,cAAAA,MAAM,EAAE,CAFQ;AAGhBC,cAAAA,IAAI,EAAEiE,SAAS,CAAChE;AAHA,aAAlB;AAMAH,YAAAA,MAAM,CAACC,MAAP,GAAgByD,SAAhB;;AAvDJ;AA0DQ4B,YAAAA,aA1DR,GA0DwB;AACpBtC,cAAAA,UAAU,EAAEO,IAAI,CAACkB,MAAL,CAAYzB,UADJ;AAEpB4B,cAAAA,QAAQ,EAAErB,IAAI,CAACkB,MAAL,CAAYzB;AAFF,aA1DxB;AA+DQzC,YAAAA,MA/DR,GA+DiB8C,YAAY,CACzBE,IAAI,CAACkB,MAAL,CAAYc,aADa,EAEzBF,aAFyB,EAGzBM,eAHyB,EAIzBR,iBAJyB,EAKzBG,aALyB,CA/D7B;AAAA,8CAuES;AACLhF,cAAAA,OAAO,EAAE2E,OADJ;AAEL5E,cAAAA,OAAO,EAAEkE,OAFJ;AAGLhE,cAAAA,MAAM,EAANA,MAHK;AAILE,cAAAA,KAAK,EAAEmD,UAJF;AAKLxC,cAAAA,UAAU,EAAEqC;AALP,aAvET;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAsFe/B,oB;;;;;oFAAf,kBACE1B,MADF,EAEEoB,UAFF,EAGErB,OAHF;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAKQ2D,YAAAA,SALR,GAKoB1D,MAAM,CAACC,MAAP,GAAgBmB,UAAU,CAACuC,oBAL/C;AAOMkC,YAAAA,UAPN,GAOmB;AACf5F,cAAAA,MAAM,EAAE,CADO;AAEfH,cAAAA,MAAM,EAAEE,MAAM,CAACF,MAAP,CAAcmE,KAAd,CAAoBjE,MAAM,CAACC,MAA3B,EAAmCyD,SAAnC,CAFO;AAGfxD,cAAAA,IAAI,EAAEwD,SAAS,GAAG1D,MAAM,CAACC;AAHV,aAPnB;AAaED,YAAAA,MAAM,CAACC,MAAP,GAAgByD,SAAhB;;AAbF,kBAeM3D,OAAO,CAACiE,WAAR,KAAwB,cAf9B;AAAA;AAAA;AAAA;;AAAA;AAAA,mBAgB4B,6BACtBjE,OAAO,CAACiE,WADc,EAEtB6B,UAAU,CAAC/F,MAAX,CAAkBmE,KAAlB,CAAwB4B,UAAU,CAAC5F,MAAnC,EAA2CyD,SAA3C,CAFsB,EAGtBtC,UAAU,CAAC8C,sBAHW,CAhB5B;;AAAA;AAgBUC,YAAAA,SAhBV;AAsBI0B,YAAAA,UAAU,GAAG;AACX/F,cAAAA,MAAM,EAAEqE,SADG;AAEXlE,cAAAA,MAAM,EAAE,CAFG;AAGXC,cAAAA,IAAI,EAAEiE,SAAS,CAAChE;AAHL,aAAb;AAMAH,YAAAA,MAAM,CAACC,MAAP,GAAgByD,SAAhB;;AA5BJ;AA+BQ/C,YAAAA,SA/BR,GA+BoB,CAAAS,UAAU,SAAV,IAAAA,UAAU,WAAV,qCAAAA,UAAU,CAAE0E,sBAAZ,gFAAoChC,UAApC,KAAkD,CA/BtE;AAAA,8CAiCST,YAAY,CACjBtD,OAAO,CAAC0E,MAAR,CAAec,aADE,EAEjBxF,OAAO,CAAC0E,MAAR,CAAenB,QAFE,EAGjBuC,UAHiB,EAIjBlF,SAJiB,EAKjBZ,OALiB,CAAZ,CAMLgB,GANK,CAMD,UAACgF,CAAD;AAAA,qBAAOA,CAAC,CAACC,QAAF,EAAP;AAAA,aANC,CAjCT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {\n ParquetCodec,\n ParquetData,\n ParquetOptions,\n ParquetPageData,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from '../schema/declare';\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport {\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n PageHeader,\n PageType,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {decompress} from '../compression';\nimport {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';\nimport {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';\n\n/**\n * Decode data pages\n * @param buffer - input data\n * @param column - parquet column\n * @param compression - compression type\n * @returns parquet data page data\n */\nexport async function decodeDataPages(\n buffer: Buffer,\n options: ParquetOptions\n): Promise<ParquetData> {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n\n let dictionary = options.dictionary || [];\n\n while (\n // @ts-ignore size can be undefined\n cursor.offset < cursor.size &&\n (!options.numValues || data.dlevels.length < Number(options.numValues))\n ) {\n // Looks like we have to decode these in sequence due to cursor updates?\n const page = await decodePage(cursor, options);\n\n if (page.dictionary) {\n dictionary = page.dictionary;\n // eslint-disable-next-line no-continue\n continue;\n }\n\n if (dictionary.length) {\n // eslint-disable-next-line no-loop-func\n page.values = page.values.map((value) => dictionary[value]);\n }\n\n for (let index = 0; index < page.rlevels.length; index++) {\n data.rlevels.push(page.rlevels[index]);\n data.dlevels.push(page.dlevels[index]);\n const value = page.values[index];\n\n if (value !== undefined) {\n data.values.push(value);\n }\n }\n\n data.count += page.count;\n data.pageHeaders.push(page.pageHeader);\n }\n\n return data;\n}\n\n/**\n * Decode parquet page based on page type\n * @param cursor\n * @param options\n */\nexport async function decodePage(\n cursor: CursorBuffer,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n let page;\n const {pageHeader, length} = await decodePageHeader(cursor.buffer, cursor.offset);\n cursor.offset += length;\n\n const pageType = getThriftEnum(PageType, pageHeader.type);\n\n switch (pageType) {\n case 'DATA_PAGE':\n page = await decodeDataPage(cursor, pageHeader, options);\n break;\n case 'DATA_PAGE_V2':\n page = await decodeDataPageV2(cursor, pageHeader, options);\n break;\n case 'DICTIONARY_PAGE':\n page = {\n dictionary: await decodeDictionaryPage(cursor, pageHeader, options),\n pageHeader\n };\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n return page;\n}\n\n/**\n * Decode parquet schema\n * @param schemaElements input schema elements data\n * @param offset offset to read from\n * @param len length of data\n * @returns result.offset\n * result.next - offset at the end of function\n * result.schema - schema read from the input data\n * @todo output offset is the same as input - possibly excess output field\n */\nexport function decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n const type = getThriftEnum(Type, schemaElement.type!);\n let logicalType = type;\n\n if (schemaElement.converted_type) {\n logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n switch (logicalType) {\n case 'DECIMAL':\n logicalType = `${logicalType}_${type}` as ParquetType;\n break;\n default:\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n presision: schemaElement.precision,\n scale: schemaElement.scale,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\n/**\n * Do decoding of parquet dataPage from column chunk\n * @param cursor\n * @param header\n * @param options\n */\nasync function decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n /* uncompress page */\n let dataCursor = cursor;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n\n if (options.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (options.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === options.column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;\n const decodeOptions = {\n typeLength: options.column.typeLength,\n bitWidth: options.column.typeLength\n };\n\n const values = decodeValues(\n options.column.primitiveType!,\n valueEncoding,\n dataCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of parquet dataPage in version 2 from column chunk\n * @param cursor\n * @param header\n * @param opts\n * @returns\n */\nasync function decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n opts: any\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (opts.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (opts.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = await decompress(\n opts.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const decodeOptions = {\n typeLength: opts.column.typeLength,\n bitWidth: opts.column.typeLength\n };\n\n const values = decodeValues(\n opts.column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.\n * @param cursor\n * @param pageHeader\n * @param options\n */\nasync function decodeDictionaryPage(\n cursor: CursorBuffer,\n pageHeader: PageHeader,\n options: ParquetOptions\n): Promise<string[]> {\n const cursorEnd = cursor.offset + pageHeader.compressed_page_size;\n\n let dictCursor = {\n offset: 0,\n buffer: cursor.buffer.slice(cursor.offset, cursorEnd),\n size: cursorEnd - cursor.offset\n };\n\n cursor.offset = cursorEnd;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n dictCursor.buffer.slice(dictCursor.offset, cursorEnd),\n pageHeader.uncompressed_page_size\n );\n\n dictCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const numValues = pageHeader?.dictionary_page_header?.num_values || 0;\n\n return decodeValues(\n options.column.primitiveType!,\n options.column.encoding!,\n dictCursor,\n numValues,\n options as ParquetCodecOptions\n ).map((d) => d.toString());\n}\n"],"file":"decoders.js"}
@@ -0,0 +1,215 @@
1
+ "use strict";
2
+
3
+ var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
+
5
+ Object.defineProperty(exports, "__esModule", {
6
+ value: true
7
+ });
8
+ exports.ParquetCursor = void 0;
9
+
10
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
11
+
12
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
13
+
14
+ var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
15
+
16
+ var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
17
+
18
+ var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
19
+
20
+ var _shred = require("../schema/shred");
21
+
22
+ var _Symbol$asyncIterator;
23
+
24
+ _Symbol$asyncIterator = Symbol.asyncIterator;
25
+
26
+ var ParquetCursor = function () {
27
+ function ParquetCursor(metadata, envelopeReader, schema, columnList) {
28
+ (0, _classCallCheck2.default)(this, ParquetCursor);
29
+ (0, _defineProperty2.default)(this, "metadata", void 0);
30
+ (0, _defineProperty2.default)(this, "envelopeReader", void 0);
31
+ (0, _defineProperty2.default)(this, "schema", void 0);
32
+ (0, _defineProperty2.default)(this, "columnList", void 0);
33
+ (0, _defineProperty2.default)(this, "rowGroup", []);
34
+ (0, _defineProperty2.default)(this, "rowGroupIndex", void 0);
35
+ this.metadata = metadata;
36
+ this.envelopeReader = envelopeReader;
37
+ this.schema = schema;
38
+ this.columnList = columnList;
39
+ this.rowGroupIndex = 0;
40
+ }
41
+
42
+ (0, _createClass2.default)(ParquetCursor, [{
43
+ key: "next",
44
+ value: function () {
45
+ var _next = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
46
+ var rowBuffer;
47
+ return _regenerator.default.wrap(function _callee$(_context) {
48
+ while (1) {
49
+ switch (_context.prev = _context.next) {
50
+ case 0:
51
+ if (!(this.rowGroup.length === 0)) {
52
+ _context.next = 8;
53
+ break;
54
+ }
55
+
56
+ if (!(this.rowGroupIndex >= this.metadata.row_groups.length)) {
57
+ _context.next = 3;
58
+ break;
59
+ }
60
+
61
+ return _context.abrupt("return", null);
62
+
63
+ case 3:
64
+ _context.next = 5;
65
+ return this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
66
+
67
+ case 5:
68
+ rowBuffer = _context.sent;
69
+ this.rowGroup = (0, _shred.materializeRecords)(this.schema, rowBuffer);
70
+ this.rowGroupIndex++;
71
+
72
+ case 8:
73
+ return _context.abrupt("return", this.rowGroup.shift());
74
+
75
+ case 9:
76
+ case "end":
77
+ return _context.stop();
78
+ }
79
+ }
80
+ }, _callee, this);
81
+ }));
82
+
83
+ function next() {
84
+ return _next.apply(this, arguments);
85
+ }
86
+
87
+ return next;
88
+ }()
89
+ }, {
90
+ key: "rewind",
91
+ value: function rewind() {
92
+ this.rowGroup = [];
93
+ this.rowGroupIndex = 0;
94
+ }
95
+ }, {
96
+ key: _Symbol$asyncIterator,
97
+ value: function value() {
98
+ var _this = this;
99
+
100
+ var done = false;
101
+ return {
102
+ next: function () {
103
+ var _next2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2() {
104
+ var value;
105
+ return _regenerator.default.wrap(function _callee2$(_context2) {
106
+ while (1) {
107
+ switch (_context2.prev = _context2.next) {
108
+ case 0:
109
+ if (!done) {
110
+ _context2.next = 2;
111
+ break;
112
+ }
113
+
114
+ return _context2.abrupt("return", {
115
+ done: done,
116
+ value: null
117
+ });
118
+
119
+ case 2:
120
+ _context2.next = 4;
121
+ return _this.next();
122
+
123
+ case 4:
124
+ value = _context2.sent;
125
+
126
+ if (!(value === null)) {
127
+ _context2.next = 7;
128
+ break;
129
+ }
130
+
131
+ return _context2.abrupt("return", {
132
+ done: true,
133
+ value: value
134
+ });
135
+
136
+ case 7:
137
+ return _context2.abrupt("return", {
138
+ done: false,
139
+ value: value
140
+ });
141
+
142
+ case 8:
143
+ case "end":
144
+ return _context2.stop();
145
+ }
146
+ }
147
+ }, _callee2);
148
+ }));
149
+
150
+ function next() {
151
+ return _next2.apply(this, arguments);
152
+ }
153
+
154
+ return next;
155
+ }(),
156
+ return: function () {
157
+ var _return2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3() {
158
+ return _regenerator.default.wrap(function _callee3$(_context3) {
159
+ while (1) {
160
+ switch (_context3.prev = _context3.next) {
161
+ case 0:
162
+ done = true;
163
+ return _context3.abrupt("return", {
164
+ done: done,
165
+ value: null
166
+ });
167
+
168
+ case 2:
169
+ case "end":
170
+ return _context3.stop();
171
+ }
172
+ }
173
+ }, _callee3);
174
+ }));
175
+
176
+ function _return() {
177
+ return _return2.apply(this, arguments);
178
+ }
179
+
180
+ return _return;
181
+ }(),
182
+ throw: function () {
183
+ var _throw2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4() {
184
+ return _regenerator.default.wrap(function _callee4$(_context4) {
185
+ while (1) {
186
+ switch (_context4.prev = _context4.next) {
187
+ case 0:
188
+ done = true;
189
+ return _context4.abrupt("return", {
190
+ done: true,
191
+ value: null
192
+ });
193
+
194
+ case 2:
195
+ case "end":
196
+ return _context4.stop();
197
+ }
198
+ }
199
+ }, _callee4);
200
+ }));
201
+
202
+ function _throw() {
203
+ return _throw2.apply(this, arguments);
204
+ }
205
+
206
+ return _throw;
207
+ }()
208
+ };
209
+ }
210
+ }]);
211
+ return ParquetCursor;
212
+ }();
213
+
214
+ exports.ParquetCursor = ParquetCursor;
215
+ //# sourceMappingURL=parquet-cursor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../../src/parquetjs/parser/parquet-cursor.ts"],"names":["Symbol","asyncIterator","ParquetCursor","metadata","envelopeReader","schema","columnList","rowGroupIndex","rowGroup","length","row_groups","readRowGroup","rowBuffer","shift","done","next","value","return","throw"],"mappings":";;;;;;;;;;;;;;;;;;;AAKA;;;;wBAiEGA,MAAM,CAACC,a;;IA5DGC,a;AAcX,yBACEC,QADF,EAEEC,cAFF,EAGEC,MAHF,EAIEC,UAJF,EAKE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,oDAdiC,EAcjC;AAAA;AACA,SAAKH,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,SAAKC,MAAL,GAAcA,MAAd;AACA,SAAKC,UAAL,GAAkBA,UAAlB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;;;;4EAMD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sBACM,KAAKC,QAAL,CAAcC,MAAd,KAAyB,CAD/B;AAAA;AAAA;AAAA;;AAAA,sBAEQ,KAAKF,aAAL,IAAsB,KAAKJ,QAAL,CAAcO,UAAd,CAAyBD,MAFvD;AAAA;AAAA;AAAA;;AAAA,iDAIa,IAJb;;AAAA;AAAA;AAAA,uBAM4B,KAAKL,cAAL,CAAoBO,YAApB,CACtB,KAAKN,MADiB,EAEtB,KAAKF,QAAL,CAAcO,UAAd,CAAyB,KAAKH,aAA9B,CAFsB,EAGtB,KAAKD,UAHiB,CAN5B;;AAAA;AAMUM,gBAAAA,SANV;AAWI,qBAAKJ,QAAL,GAAgB,+BAAmB,KAAKH,MAAxB,EAAgCO,SAAhC,CAAhB;AACA,qBAAKL,aAAL;;AAZJ;AAAA,iDAcS,KAAKC,QAAL,CAAcK,KAAd,EAdT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;WAoBA,kBAAe;AACb,WAAKL,QAAL,GAAgB,EAAhB;AACA,WAAKD,aAAL,GAAqB,CAArB;AACD;;;WAMD,iBAA2C;AAAA;;AACzC,UAAIO,IAAI,GAAG,KAAX;AACA,aAAO;AACLC,QAAAA,IAAI;AAAA,iFAAE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,yBACAD,IADA;AAAA;AAAA;AAAA;;AAAA,sDAEK;AAACA,sBAAAA,IAAI,EAAJA,IAAD;AAAOE,sBAAAA,KAAK,EAAE;AAAd,qBAFL;;AAAA;AAAA;AAAA,2BAIgB,KAAI,CAACD,IAAL,EAJhB;;AAAA;AAIEC,oBAAAA,KAJF;;AAAA,0BAKAA,KAAK,KAAK,IALV;AAAA;AAAA;AAAA;;AAAA,sDAMK;AAACF,sBAAAA,IAAI,EAAE,IAAP;AAAaE,sBAAAA,KAAK,EAALA;AAAb,qBANL;;AAAA;AAAA,sDAQG;AAACF,sBAAAA,IAAI,EAAE,KAAP;AAAcE,sBAAAA,KAAK,EAALA;AAAd,qBARH;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,WAAF;;AAAA;AAAA;AAAA;;AAAA;AAAA,WADC;AAWLC,QAAAA,MAAM;AAAA,mFAAE;AAAA;AAAA;AAAA;AAAA;AACNH,oBAAAA,IAAI,GAAG,IAAP;AADM,sDAEC;AAACA,sBAAAA,IAAI,EAAJA,IAAD;AAAOE,sBAAAA,KAAK,EAAE;AAAd,qBAFD;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,WAAF;;AAAA;AAAA;AAAA;;AAAA;AAAA,WAXD;AAeLE,QAAAA,KAAK;AAAA,kFAAE;AAAA;AAAA;AAAA;AAAA;AACLJ,oBAAAA,IAAI,GAAG,IAAP;AADK,sDAEE;AAACA,sBAAAA,IAAI,EAAE,IAAP;AAAaE,sBAAAA,KAAK,EAAE;AAApB,qBAFF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,WAAF;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAfA,OAAP;AAoBD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {FileMetaData} from '../parquet-thrift';\nimport {ParquetEnvelopeReader} from './parquet-envelope-reader';\nimport {ParquetSchema} from '../schema/schema';\nimport {ParquetRecord} from '../schema/declare';\nimport {materializeRecords} from '../schema/shred';\n\n/**\n * A parquet cursor is used to retrieve rows from a parquet file in order\n */\nexport class ParquetCursor<T> implements AsyncIterable<T> {\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n public columnList: string[][];\n public rowGroup: ParquetRecord[] = [];\n public rowGroupIndex: number;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is usually not recommended to call this constructor directly except for\n * advanced and internal use cases. Consider using getCursor() on the\n * ParquetReader instead\n */\n constructor(\n metadata: FileMetaData,\n envelopeReader: ParquetEnvelopeReader,\n schema: ParquetSchema,\n columnList: string[][]\n ) {\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n this.schema = schema;\n this.columnList = columnList;\n this.rowGroupIndex = 0;\n }\n\n /**\n * Retrieve the next row from the cursor. Returns a row or NULL if the end\n * of the file was reached\n */\n async next<T = any>(): Promise<T> {\n if (this.rowGroup.length === 0) {\n if (this.rowGroupIndex >= this.metadata.row_groups.length) {\n // @ts-ignore\n return null;\n }\n const rowBuffer = await this.envelopeReader.readRowGroup(\n this.schema,\n this.metadata.row_groups[this.rowGroupIndex],\n this.columnList\n );\n this.rowGroup = materializeRecords(this.schema, rowBuffer);\n this.rowGroupIndex++;\n }\n return this.rowGroup.shift() as any;\n }\n\n /**\n * Rewind the cursor the the beginning of the file\n */\n rewind(): void {\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n let done = false;\n return {\n next: async () => {\n if (done) {\n return {done, value: null};\n }\n const value = await this.next();\n if (value === null) {\n return {done: true, value};\n }\n return {done: false, value};\n },\n return: async () => {\n done = true;\n return {done, value: null};\n },\n throw: async () => {\n done = true;\n return {done: true, value: null};\n }\n };\n }\n}\n"],"file":"parquet-cursor.js"}
@@ -0,0 +1,452 @@
1
+ "use strict";
2
+
3
+ var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
+
5
+ Object.defineProperty(exports, "__esModule", {
6
+ value: true
7
+ });
8
+ exports.ParquetEnvelopeReader = void 0;
9
+
10
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
11
+
12
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
13
+
14
+ var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
15
+
16
+ var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
17
+
18
+ var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
19
+
20
+ var _constants = require("../../constants");
21
+
22
+ var _parquetThrift = require("../parquet-thrift");
23
+
24
+ var _fileUtils = require("../utils/file-utils");
25
+
26
+ var _readUtils = require("../utils/read-utils");
27
+
28
+ var _decoders = require("./decoders");
29
+
30
+ function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
31
+
32
+ function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
33
+
34
+ function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
35
+
36
+ function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
37
+
38
+ function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
39
+
40
+ var DEFAULT_DICTIONARY_SIZE = 1e6;
41
+
42
+ var ParquetEnvelopeReader = function () {
43
+ function ParquetEnvelopeReader(read, close, fileSize, options) {
44
+ (0, _classCallCheck2.default)(this, ParquetEnvelopeReader);
45
+ (0, _defineProperty2.default)(this, "read", void 0);
46
+ (0, _defineProperty2.default)(this, "close", void 0);
47
+ (0, _defineProperty2.default)(this, "fileSize", void 0);
48
+ (0, _defineProperty2.default)(this, "defaultDictionarySize", void 0);
49
+ this.read = read;
50
+ this.close = close;
51
+ this.fileSize = fileSize;
52
+ this.defaultDictionarySize = (options === null || options === void 0 ? void 0 : options.defaultDictionarySize) || DEFAULT_DICTIONARY_SIZE;
53
+ }
54
+
55
+ (0, _createClass2.default)(ParquetEnvelopeReader, [{
56
+ key: "readHeader",
57
+ value: function () {
58
+ var _readHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
59
+ var buffer, magic;
60
+ return _regenerator.default.wrap(function _callee$(_context) {
61
+ while (1) {
62
+ switch (_context.prev = _context.next) {
63
+ case 0:
64
+ _context.next = 2;
65
+ return this.read(0, _constants.PARQUET_MAGIC.length);
66
+
67
+ case 2:
68
+ buffer = _context.sent;
69
+ magic = buffer.toString();
70
+ _context.t0 = magic;
71
+ _context.next = _context.t0 === _constants.PARQUET_MAGIC ? 7 : _context.t0 === _constants.PARQUET_MAGIC_ENCRYPTED ? 8 : 9;
72
+ break;
73
+
74
+ case 7:
75
+ return _context.abrupt("break", 10);
76
+
77
+ case 8:
78
+ throw new Error('Encrypted parquet file not supported');
79
+
80
+ case 9:
81
+ throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
82
+
83
+ case 10:
84
+ case "end":
85
+ return _context.stop();
86
+ }
87
+ }
88
+ }, _callee, this);
89
+ }));
90
+
91
+ function readHeader() {
92
+ return _readHeader.apply(this, arguments);
93
+ }
94
+
95
+ return readHeader;
96
+ }()
97
+ }, {
98
+ key: "readRowGroup",
99
+ value: function () {
100
+ var _readRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(schema, rowGroup, columnList) {
101
+ var buffer, _iterator, _step, colChunk, colMetadata, colKey;
102
+
103
+ return _regenerator.default.wrap(function _callee2$(_context2) {
104
+ while (1) {
105
+ switch (_context2.prev = _context2.next) {
106
+ case 0:
107
+ buffer = {
108
+ rowCount: Number(rowGroup.num_rows),
109
+ columnData: {}
110
+ };
111
+ _iterator = _createForOfIteratorHelper(rowGroup.columns);
112
+ _context2.prev = 2;
113
+
114
+ _iterator.s();
115
+
116
+ case 4:
117
+ if ((_step = _iterator.n()).done) {
118
+ _context2.next = 15;
119
+ break;
120
+ }
121
+
122
+ colChunk = _step.value;
123
+ colMetadata = colChunk.meta_data;
124
+ colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
125
+
126
+ if (!(columnList.length > 0 && (0, _readUtils.fieldIndexOf)(columnList, colKey) < 0)) {
127
+ _context2.next = 10;
128
+ break;
129
+ }
130
+
131
+ return _context2.abrupt("continue", 13);
132
+
133
+ case 10:
134
+ _context2.next = 12;
135
+ return this.readColumnChunk(schema, colChunk);
136
+
137
+ case 12:
138
+ buffer.columnData[colKey.join()] = _context2.sent;
139
+
140
+ case 13:
141
+ _context2.next = 4;
142
+ break;
143
+
144
+ case 15:
145
+ _context2.next = 20;
146
+ break;
147
+
148
+ case 17:
149
+ _context2.prev = 17;
150
+ _context2.t0 = _context2["catch"](2);
151
+
152
+ _iterator.e(_context2.t0);
153
+
154
+ case 20:
155
+ _context2.prev = 20;
156
+
157
+ _iterator.f();
158
+
159
+ return _context2.finish(20);
160
+
161
+ case 23:
162
+ return _context2.abrupt("return", buffer);
163
+
164
+ case 24:
165
+ case "end":
166
+ return _context2.stop();
167
+ }
168
+ }
169
+ }, _callee2, this, [[2, 17, 20, 23]]);
170
+ }));
171
+
172
+ function readRowGroup(_x, _x2, _x3) {
173
+ return _readRowGroup.apply(this, arguments);
174
+ }
175
+
176
+ return readRowGroup;
177
+ }()
178
+ }, {
179
+ key: "readColumnChunk",
180
+ value: function () {
181
+ var _readColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(schema, colChunk) {
182
+ var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _options$dictionary;
183
+
184
+ var field, type, compression, pagesOffset, pagesSize, _colChunk$meta_data6, options, dictionary, dictionaryPageOffset, dictionaryOffset, pagesBuf;
185
+
186
+ return _regenerator.default.wrap(function _callee3$(_context3) {
187
+ while (1) {
188
+ switch (_context3.prev = _context3.next) {
189
+ case 0:
190
+ if (!(colChunk.file_path !== undefined && colChunk.file_path !== null)) {
191
+ _context3.next = 2;
192
+ break;
193
+ }
194
+
195
+ throw new Error('external references are not supported');
196
+
197
+ case 2:
198
+ field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
199
+ type = (0, _readUtils.getThriftEnum)(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
200
+
201
+ if (!(type !== field.primitiveType)) {
202
+ _context3.next = 6;
203
+ break;
204
+ }
205
+
206
+ throw new Error("chunk type not matching schema: ".concat(type));
207
+
208
+ case 6:
209
+ compression = (0, _readUtils.getThriftEnum)(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
210
+ pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
211
+ pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
212
+
213
+ if (!colChunk.file_path) {
214
+ pagesSize = Math.min(this.fileSize - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
215
+ }
216
+
217
+ options = {
218
+ type: type,
219
+ rLevelMax: field.rLevelMax,
220
+ dLevelMax: field.dLevelMax,
221
+ compression: compression,
222
+ column: field,
223
+ numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
224
+ dictionary: []
225
+ };
226
+ dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
227
+
228
+ if (!dictionaryPageOffset) {
229
+ _context3.next = 17;
230
+ break;
231
+ }
232
+
233
+ dictionaryOffset = Number(dictionaryPageOffset);
234
+ _context3.next = 16;
235
+ return this.getDictionary(dictionaryOffset, options, pagesOffset);
236
+
237
+ case 16:
238
+ dictionary = _context3.sent;
239
+
240
+ case 17:
241
+ dictionary = (_options$dictionary = options.dictionary) !== null && _options$dictionary !== void 0 && _options$dictionary.length ? options.dictionary : dictionary;
242
+ _context3.next = 20;
243
+ return this.read(pagesOffset, pagesSize);
244
+
245
+ case 20:
246
+ pagesBuf = _context3.sent;
247
+ _context3.next = 23;
248
+ return (0, _decoders.decodeDataPages)(pagesBuf, _objectSpread(_objectSpread({}, options), {}, {
249
+ dictionary: dictionary
250
+ }));
251
+
252
+ case 23:
253
+ return _context3.abrupt("return", _context3.sent);
254
+
255
+ case 24:
256
+ case "end":
257
+ return _context3.stop();
258
+ }
259
+ }
260
+ }, _callee3, this);
261
+ }));
262
+
263
+ function readColumnChunk(_x4, _x5) {
264
+ return _readColumnChunk.apply(this, arguments);
265
+ }
266
+
267
+ return readColumnChunk;
268
+ }()
269
+ }, {
270
+ key: "getDictionary",
271
+ value: function () {
272
+ var _getDictionary = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(dictionaryPageOffset, options, pagesOffset) {
273
+ var dictionarySize, pagesBuf, cursor, decodedPage;
274
+ return _regenerator.default.wrap(function _callee4$(_context4) {
275
+ while (1) {
276
+ switch (_context4.prev = _context4.next) {
277
+ case 0:
278
+ if (!(dictionaryPageOffset === 0)) {
279
+ _context4.next = 2;
280
+ break;
281
+ }
282
+
283
+ return _context4.abrupt("return", []);
284
+
285
+ case 2:
286
+ dictionarySize = Math.min(this.fileSize - dictionaryPageOffset, this.defaultDictionarySize);
287
+ _context4.next = 5;
288
+ return this.read(dictionaryPageOffset, dictionarySize);
289
+
290
+ case 5:
291
+ pagesBuf = _context4.sent;
292
+ cursor = {
293
+ buffer: pagesBuf,
294
+ offset: 0,
295
+ size: pagesBuf.length
296
+ };
297
+ _context4.next = 9;
298
+ return (0, _decoders.decodePage)(cursor, options);
299
+
300
+ case 9:
301
+ decodedPage = _context4.sent;
302
+ return _context4.abrupt("return", decodedPage.dictionary);
303
+
304
+ case 11:
305
+ case "end":
306
+ return _context4.stop();
307
+ }
308
+ }
309
+ }, _callee4, this);
310
+ }));
311
+
312
+ function getDictionary(_x6, _x7, _x8) {
313
+ return _getDictionary.apply(this, arguments);
314
+ }
315
+
316
+ return getDictionary;
317
+ }()
318
+ }, {
319
+ key: "readFooter",
320
+ value: function () {
321
+ var _readFooter = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5() {
322
+ var trailerLen, trailerBuf, magic, metadataSize, metadataOffset, metadataBuf, _decodeFileMetadata, metadata;
323
+
324
+ return _regenerator.default.wrap(function _callee5$(_context5) {
325
+ while (1) {
326
+ switch (_context5.prev = _context5.next) {
327
+ case 0:
328
+ trailerLen = _constants.PARQUET_MAGIC.length + 4;
329
+ _context5.next = 3;
330
+ return this.read(this.fileSize - trailerLen, trailerLen);
331
+
332
+ case 3:
333
+ trailerBuf = _context5.sent;
334
+ magic = trailerBuf.slice(4).toString();
335
+
336
+ if (!(magic !== _constants.PARQUET_MAGIC)) {
337
+ _context5.next = 7;
338
+ break;
339
+ }
340
+
341
+ throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
342
+
343
+ case 7:
344
+ metadataSize = trailerBuf.readUInt32LE(0);
345
+ metadataOffset = this.fileSize - metadataSize - trailerLen;
346
+
347
+ if (!(metadataOffset < _constants.PARQUET_MAGIC.length)) {
348
+ _context5.next = 11;
349
+ break;
350
+ }
351
+
352
+ throw new Error("Invalid metadata size ".concat(metadataOffset));
353
+
354
+ case 11:
355
+ _context5.next = 13;
356
+ return this.read(metadataOffset, metadataSize);
357
+
358
+ case 13:
359
+ metadataBuf = _context5.sent;
360
+ _decodeFileMetadata = (0, _readUtils.decodeFileMetadata)(metadataBuf), metadata = _decodeFileMetadata.metadata;
361
+ return _context5.abrupt("return", metadata);
362
+
363
+ case 16:
364
+ case "end":
365
+ return _context5.stop();
366
+ }
367
+ }
368
+ }, _callee5, this);
369
+ }));
370
+
371
+ function readFooter() {
372
+ return _readFooter.apply(this, arguments);
373
+ }
374
+
375
+ return readFooter;
376
+ }()
377
+ }], [{
378
+ key: "openFile",
379
+ value: function () {
380
+ var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(filePath) {
381
+ var fileStat, fileDescriptor, readFn, closeFn;
382
+ return _regenerator.default.wrap(function _callee6$(_context6) {
383
+ while (1) {
384
+ switch (_context6.prev = _context6.next) {
385
+ case 0:
386
+ _context6.next = 2;
387
+ return (0, _fileUtils.fstat)(filePath);
388
+
389
+ case 2:
390
+ fileStat = _context6.sent;
391
+ _context6.next = 5;
392
+ return (0, _fileUtils.fopen)(filePath);
393
+
394
+ case 5:
395
+ fileDescriptor = _context6.sent;
396
+ readFn = _fileUtils.fread.bind(undefined, fileDescriptor);
397
+ closeFn = _fileUtils.fclose.bind(undefined, fileDescriptor);
398
+ return _context6.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, fileStat.size));
399
+
400
+ case 9:
401
+ case "end":
402
+ return _context6.stop();
403
+ }
404
+ }
405
+ }, _callee6);
406
+ }));
407
+
408
+ function openFile(_x9) {
409
+ return _openFile.apply(this, arguments);
410
+ }
411
+
412
+ return openFile;
413
+ }()
414
+ }, {
415
+ key: "openBuffer",
416
+ value: function () {
417
+ var _openBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(buffer) {
418
+ var readFn, closeFn;
419
+ return _regenerator.default.wrap(function _callee7$(_context7) {
420
+ while (1) {
421
+ switch (_context7.prev = _context7.next) {
422
+ case 0:
423
+ readFn = function readFn(position, length) {
424
+ return Promise.resolve(buffer.slice(position, position + length));
425
+ };
426
+
427
+ closeFn = function closeFn() {
428
+ return Promise.resolve();
429
+ };
430
+
431
+ return _context7.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, buffer.length));
432
+
433
+ case 3:
434
+ case "end":
435
+ return _context7.stop();
436
+ }
437
+ }
438
+ }, _callee7);
439
+ }));
440
+
441
+ function openBuffer(_x10) {
442
+ return _openBuffer.apply(this, arguments);
443
+ }
444
+
445
+ return openBuffer;
446
+ }()
447
+ }]);
448
+ return ParquetEnvelopeReader;
449
+ }();
450
+
451
+ exports.ParquetEnvelopeReader = ParquetEnvelopeReader;
452
+ //# sourceMappingURL=parquet-envelope-reader.js.map