@loaders.gl/parquet 3.1.0-alpha.4 → 3.1.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/dist/bundle.d.ts +2 -0
  2. package/dist/bundle.d.ts.map +1 -0
  3. package/dist/bundle.js +5 -0
  4. package/dist/constants.d.ts +15 -0
  5. package/dist/constants.d.ts.map +1 -0
  6. package/dist/constants.js +18 -0
  7. package/dist/dist.min.js +27 -13
  8. package/dist/dist.min.js.map +7 -1
  9. package/dist/es5/bundle.js +1 -1
  10. package/dist/es5/bundle.js.map +1 -1
  11. package/dist/es5/constants.js +5 -5
  12. package/dist/es5/constants.js.map +1 -1
  13. package/dist/es5/index.js +16 -45
  14. package/dist/es5/index.js.map +1 -1
  15. package/dist/es5/lib/convert-schema.js +13 -13
  16. package/dist/es5/lib/convert-schema.js.map +1 -1
  17. package/dist/es5/lib/parse-parquet.js +19 -154
  18. package/dist/es5/lib/parse-parquet.js.map +1 -1
  19. package/dist/es5/lib/read-array-buffer.js +6 -43
  20. package/dist/es5/lib/read-array-buffer.js.map +1 -1
  21. package/dist/es5/parquet-loader.js +4 -4
  22. package/dist/es5/parquet-loader.js.map +1 -1
  23. package/dist/es5/parquet-writer.js +4 -4
  24. package/dist/es5/parquet-writer.js.map +1 -1
  25. package/dist/es5/parquetjs/codecs/dictionary.js +2 -10
  26. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  27. package/dist/es5/parquetjs/codecs/index.js +4 -6
  28. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  29. package/dist/es5/parquetjs/codecs/plain.js +41 -43
  30. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  31. package/dist/es5/parquetjs/codecs/rle.js +25 -35
  32. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  33. package/dist/es5/parquetjs/compression.js +28 -122
  34. package/dist/es5/parquetjs/compression.js.map +1 -1
  35. package/dist/es5/parquetjs/encoder/writer.js +301 -737
  36. package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
  37. package/dist/es5/parquetjs/file.js +15 -15
  38. package/dist/es5/parquetjs/file.js.map +1 -1
  39. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  40. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +31 -45
  41. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  42. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +141 -152
  43. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  44. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +147 -160
  45. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  46. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +248 -259
  47. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +67 -79
  49. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  50. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  51. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  52. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +113 -124
  53. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  54. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +158 -169
  55. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/DateType.js +31 -45
  57. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +68 -79
  59. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +83 -94
  61. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  63. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +31 -45
  64. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +170 -182
  67. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +31 -45
  69. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  70. package/dist/es5/parquetjs/parquet-thrift/IntType.js +68 -79
  71. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +31 -45
  73. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +68 -79
  75. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/ListType.js +31 -45
  77. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +319 -343
  79. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/MapType.js +31 -45
  81. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +31 -45
  83. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +31 -45
  85. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/NullType.js +31 -45
  87. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +64 -75
  89. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  90. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +83 -94
  91. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +158 -169
  93. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +83 -94
  95. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +113 -124
  98. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +188 -199
  100. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +83 -94
  102. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +124 -135
  104. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  105. package/dist/es5/parquetjs/parquet-thrift/StringType.js +31 -45
  106. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  107. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +68 -79
  108. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  109. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +88 -101
  110. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  111. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +68 -79
  112. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  113. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  114. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -45
  115. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  116. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +31 -45
  117. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  118. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  119. package/dist/es5/parquetjs/parser/decoders.js +218 -397
  120. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  121. package/dist/es5/parquetjs/parser/parquet-cursor.js +62 -180
  122. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
  123. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +124 -408
  124. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  125. package/dist/es5/parquetjs/parser/parquet-reader.js +91 -369
  126. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  127. package/dist/es5/parquetjs/schema/declare.js +9 -11
  128. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  129. package/dist/es5/parquetjs/schema/schema.js +73 -87
  130. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  131. package/dist/es5/parquetjs/schema/shred.js +56 -96
  132. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  133. package/dist/es5/parquetjs/schema/types.js +39 -40
  134. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  135. package/dist/es5/parquetjs/utils/buffer-utils.js +1 -1
  136. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
  137. package/dist/es5/parquetjs/utils/file-utils.js +8 -65
  138. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  139. package/dist/es5/parquetjs/utils/read-utils.js +22 -50
  140. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  141. package/dist/esm/index.js +2 -3
  142. package/dist/esm/index.js.map +1 -1
  143. package/dist/esm/parquet-loader.js +1 -1
  144. package/dist/esm/parquet-loader.js.map +1 -1
  145. package/dist/esm/parquet-writer.js +1 -1
  146. package/dist/esm/parquet-writer.js.map +1 -1
  147. package/dist/esm/parquetjs/codecs/plain.js +3 -3
  148. package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
  149. package/dist/esm/parquetjs/codecs/rle.js +1 -1
  150. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  151. package/dist/esm/parquetjs/compression.js +4 -13
  152. package/dist/esm/parquetjs/compression.js.map +1 -1
  153. package/dist/esm/parquetjs/encoder/writer.js +1 -1
  154. package/dist/esm/parquetjs/encoder/writer.js.map +1 -1
  155. package/dist/esm/parquetjs/parser/decoders.js +4 -4
  156. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  157. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +4 -13
  158. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  159. package/dist/esm/parquetjs/parser/parquet-reader.js +0 -13
  160. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  161. package/dist/esm/parquetjs/schema/schema.js +3 -3
  162. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  163. package/dist/esm/parquetjs/schema/shred.js +2 -2
  164. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  165. package/dist/esm/parquetjs/schema/types.js +20 -20
  166. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  167. package/dist/esm/parquetjs/utils/file-utils.js +0 -45
  168. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  169. package/dist/index.d.ts +28 -0
  170. package/dist/index.d.ts.map +1 -0
  171. package/dist/index.js +30 -0
  172. package/dist/lib/convert-schema.d.ts +8 -0
  173. package/dist/lib/convert-schema.d.ts.map +1 -0
  174. package/dist/lib/convert-schema.js +70 -0
  175. package/dist/lib/parse-parquet.d.ts +4 -0
  176. package/dist/lib/parse-parquet.d.ts.map +1 -0
  177. package/dist/lib/parse-parquet.js +28 -0
  178. package/dist/lib/read-array-buffer.d.ts +19 -0
  179. package/dist/lib/read-array-buffer.d.ts.map +1 -0
  180. package/dist/lib/read-array-buffer.js +29 -0
  181. package/dist/parquet-loader.d.ts +23 -0
  182. package/dist/parquet-loader.d.ts.map +1 -0
  183. package/dist/parquet-loader.js +27 -0
  184. package/dist/parquet-worker.js +27 -13
  185. package/dist/parquet-worker.js.map +7 -1
  186. package/dist/parquet-writer.d.ts +4 -0
  187. package/dist/parquet-writer.d.ts.map +1 -0
  188. package/dist/parquet-writer.js +21 -0
  189. package/dist/parquetjs/codecs/declare.d.ts +17 -0
  190. package/dist/parquetjs/codecs/declare.d.ts.map +1 -0
  191. package/dist/parquetjs/codecs/declare.js +2 -0
  192. package/dist/parquetjs/codecs/dictionary.d.ts +3 -0
  193. package/dist/parquetjs/codecs/dictionary.d.ts.map +1 -0
  194. package/dist/parquetjs/codecs/dictionary.js +14 -0
  195. package/dist/parquetjs/codecs/index.d.ts +5 -0
  196. package/dist/parquetjs/codecs/index.d.ts.map +1 -0
  197. package/dist/parquetjs/codecs/index.js +51 -0
  198. package/dist/parquetjs/codecs/plain.d.ts +6 -0
  199. package/dist/parquetjs/codecs/plain.d.ts.map +1 -0
  200. package/dist/parquetjs/codecs/plain.js +211 -0
  201. package/dist/parquetjs/codecs/rle.d.ts +6 -0
  202. package/dist/parquetjs/codecs/rle.d.ts.map +1 -0
  203. package/dist/parquetjs/codecs/rle.js +145 -0
  204. package/dist/parquetjs/compression.d.ts +23 -0
  205. package/dist/parquetjs/compression.d.ts.map +1 -0
  206. package/dist/parquetjs/compression.js +168 -0
  207. package/dist/parquetjs/encoder/writer.d.ts +123 -0
  208. package/dist/parquetjs/encoder/writer.d.ts.map +1 -0
  209. package/dist/parquetjs/encoder/writer.js +478 -0
  210. package/dist/parquetjs/file.d.ts +10 -0
  211. package/dist/parquetjs/file.d.ts.map +1 -0
  212. package/dist/parquetjs/file.js +99 -0
  213. package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts +6 -0
  214. package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts.map +1 -0
  215. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +15 -0
  216. package/dist/parquetjs/parquet-thrift/BsonType.d.ts +9 -0
  217. package/dist/parquetjs/parquet-thrift/BsonType.d.ts.map +1 -0
  218. package/dist/parquetjs/parquet-thrift/BsonType.js +58 -0
  219. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts +25 -0
  220. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -0
  221. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +207 -0
  222. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts +22 -0
  223. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -0
  224. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +213 -0
  225. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts +42 -0
  226. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -0
  227. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +398 -0
  228. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts +13 -0
  229. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -0
  230. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +104 -0
  231. package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts +11 -0
  232. package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts.map +1 -0
  233. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +20 -0
  234. package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts +25 -0
  235. package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts.map +1 -0
  236. package/dist/parquetjs/parquet-thrift/ConvertedType.js +34 -0
  237. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts +21 -0
  238. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -0
  239. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +166 -0
  240. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts +27 -0
  241. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -0
  242. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +226 -0
  243. package/dist/parquetjs/parquet-thrift/DateType.d.ts +9 -0
  244. package/dist/parquetjs/parquet-thrift/DateType.d.ts.map +1 -0
  245. package/dist/parquetjs/parquet-thrift/DateType.js +58 -0
  246. package/dist/parquetjs/parquet-thrift/DecimalType.d.ts +13 -0
  247. package/dist/parquetjs/parquet-thrift/DecimalType.d.ts.map +1 -0
  248. package/dist/parquetjs/parquet-thrift/DecimalType.js +105 -0
  249. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts +16 -0
  250. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -0
  251. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +122 -0
  252. package/dist/parquetjs/parquet-thrift/Encoding.d.ts +11 -0
  253. package/dist/parquetjs/parquet-thrift/Encoding.d.ts.map +1 -0
  254. package/dist/parquetjs/parquet-thrift/Encoding.js +20 -0
  255. package/dist/parquetjs/parquet-thrift/EnumType.d.ts +9 -0
  256. package/dist/parquetjs/parquet-thrift/EnumType.d.ts.map +1 -0
  257. package/dist/parquetjs/parquet-thrift/EnumType.js +58 -0
  258. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts +6 -0
  259. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts.map +1 -0
  260. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +15 -0
  261. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts +28 -0
  262. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -0
  263. package/dist/parquetjs/parquet-thrift/FileMetaData.js +256 -0
  264. package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts +9 -0
  265. package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts.map +1 -0
  266. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +58 -0
  267. package/dist/parquetjs/parquet-thrift/IntType.d.ts +13 -0
  268. package/dist/parquetjs/parquet-thrift/IntType.d.ts.map +1 -0
  269. package/dist/parquetjs/parquet-thrift/IntType.js +105 -0
  270. package/dist/parquetjs/parquet-thrift/JsonType.d.ts +9 -0
  271. package/dist/parquetjs/parquet-thrift/JsonType.d.ts.map +1 -0
  272. package/dist/parquetjs/parquet-thrift/JsonType.js +58 -0
  273. package/dist/parquetjs/parquet-thrift/KeyValue.d.ts +13 -0
  274. package/dist/parquetjs/parquet-thrift/KeyValue.d.ts.map +1 -0
  275. package/dist/parquetjs/parquet-thrift/KeyValue.js +102 -0
  276. package/dist/parquetjs/parquet-thrift/ListType.d.ts +9 -0
  277. package/dist/parquetjs/parquet-thrift/ListType.d.ts.map +1 -0
  278. package/dist/parquetjs/parquet-thrift/ListType.js +58 -0
  279. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts +61 -0
  280. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -0
  281. package/dist/parquetjs/parquet-thrift/LogicalType.js +380 -0
  282. package/dist/parquetjs/parquet-thrift/MapType.d.ts +9 -0
  283. package/dist/parquetjs/parquet-thrift/MapType.d.ts.map +1 -0
  284. package/dist/parquetjs/parquet-thrift/MapType.js +58 -0
  285. package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts +9 -0
  286. package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts.map +1 -0
  287. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +58 -0
  288. package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts +9 -0
  289. package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts.map +1 -0
  290. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +58 -0
  291. package/dist/parquetjs/parquet-thrift/NullType.d.ts +9 -0
  292. package/dist/parquetjs/parquet-thrift/NullType.d.ts.map +1 -0
  293. package/dist/parquetjs/parquet-thrift/NullType.js +58 -0
  294. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts +12 -0
  295. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -0
  296. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +97 -0
  297. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts +17 -0
  298. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -0
  299. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +127 -0
  300. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts +30 -0
  301. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -0
  302. package/dist/parquetjs/parquet-thrift/PageHeader.js +216 -0
  303. package/dist/parquetjs/parquet-thrift/PageLocation.d.ts +16 -0
  304. package/dist/parquetjs/parquet-thrift/PageLocation.d.ts.map +1 -0
  305. package/dist/parquetjs/parquet-thrift/PageLocation.js +141 -0
  306. package/dist/parquetjs/parquet-thrift/PageType.d.ts +7 -0
  307. package/dist/parquetjs/parquet-thrift/PageType.d.ts.map +1 -0
  308. package/dist/parquetjs/parquet-thrift/PageType.js +16 -0
  309. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts +20 -0
  310. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -0
  311. package/dist/parquetjs/parquet-thrift/RowGroup.js +182 -0
  312. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts +33 -0
  313. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -0
  314. package/dist/parquetjs/parquet-thrift/SchemaElement.js +239 -0
  315. package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts +15 -0
  316. package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts.map +1 -0
  317. package/dist/parquetjs/parquet-thrift/SortingColumn.js +127 -0
  318. package/dist/parquetjs/parquet-thrift/Statistics.d.ts +23 -0
  319. package/dist/parquetjs/parquet-thrift/Statistics.d.ts.map +1 -0
  320. package/dist/parquetjs/parquet-thrift/Statistics.js +176 -0
  321. package/dist/parquetjs/parquet-thrift/StringType.d.ts +9 -0
  322. package/dist/parquetjs/parquet-thrift/StringType.d.ts.map +1 -0
  323. package/dist/parquetjs/parquet-thrift/StringType.js +58 -0
  324. package/dist/parquetjs/parquet-thrift/TimeType.d.ts +14 -0
  325. package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -0
  326. package/dist/parquetjs/parquet-thrift/TimeType.js +106 -0
  327. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts +17 -0
  328. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -0
  329. package/dist/parquetjs/parquet-thrift/TimeUnit.js +127 -0
  330. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts +14 -0
  331. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -0
  332. package/dist/parquetjs/parquet-thrift/TimestampType.js +106 -0
  333. package/dist/parquetjs/parquet-thrift/Type.d.ts +11 -0
  334. package/dist/parquetjs/parquet-thrift/Type.d.ts.map +1 -0
  335. package/dist/parquetjs/parquet-thrift/Type.js +20 -0
  336. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts +9 -0
  337. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts.map +1 -0
  338. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +58 -0
  339. package/dist/parquetjs/parquet-thrift/UUIDType.d.ts +9 -0
  340. package/dist/parquetjs/parquet-thrift/UUIDType.d.ts.map +1 -0
  341. package/dist/parquetjs/parquet-thrift/UUIDType.js +58 -0
  342. package/dist/parquetjs/parquet-thrift/index.d.ts +44 -0
  343. package/dist/parquetjs/parquet-thrift/index.d.ts.map +1 -0
  344. package/dist/parquetjs/parquet-thrift/index.js +61 -0
  345. package/dist/parquetjs/parser/decoders.d.ts +34 -0
  346. package/dist/parquetjs/parser/decoders.d.ts.map +1 -0
  347. package/dist/parquetjs/parser/decoders.js +318 -0
  348. package/dist/parquetjs/parser/parquet-cursor.d.ts +36 -0
  349. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +1 -0
  350. package/dist/parquetjs/parser/parquet-cursor.js +74 -0
  351. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +40 -0
  352. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +1 -0
  353. package/dist/parquetjs/parser/parquet-envelope-reader.js +136 -0
  354. package/dist/parquetjs/parser/parquet-reader.d.ts +68 -0
  355. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -0
  356. package/dist/parquetjs/parser/parquet-reader.js +134 -0
  357. package/dist/parquetjs/schema/declare.d.ts +80 -0
  358. package/dist/parquetjs/schema/declare.d.ts.map +1 -0
  359. package/dist/parquetjs/schema/declare.js +10 -0
  360. package/dist/parquetjs/schema/schema.d.ts +26 -0
  361. package/dist/parquetjs/schema/schema.d.ts.map +1 -0
  362. package/dist/parquetjs/schema/schema.js +162 -0
  363. package/dist/parquetjs/schema/shred.d.ts +48 -0
  364. package/dist/parquetjs/schema/shred.d.ts.map +1 -0
  365. package/dist/parquetjs/schema/shred.js +225 -0
  366. package/dist/parquetjs/schema/types.d.ts +20 -0
  367. package/dist/parquetjs/schema/types.d.ts.map +1 -0
  368. package/dist/parquetjs/schema/types.js +418 -0
  369. package/dist/parquetjs/utils/buffer-utils.d.ts +10 -0
  370. package/dist/parquetjs/utils/buffer-utils.d.ts.map +1 -0
  371. package/dist/parquetjs/utils/buffer-utils.js +22 -0
  372. package/dist/parquetjs/utils/file-utils.d.ts +16 -0
  373. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -0
  374. package/dist/parquetjs/utils/file-utils.js +46 -0
  375. package/dist/parquetjs/utils/read-utils.d.ts +25 -0
  376. package/dist/parquetjs/utils/read-utils.d.ts.map +1 -0
  377. package/dist/parquetjs/utils/read-utils.js +109 -0
  378. package/dist/workers/parquet-worker.d.ts +2 -0
  379. package/dist/workers/parquet-worker.d.ts.map +1 -0
  380. package/dist/workers/parquet-worker.js +5 -0
  381. package/package.json +8 -8
  382. package/src/index.ts +3 -3
  383. package/src/parquetjs/compression.ts +10 -10
  384. package/src/parquetjs/parser/decoders.ts +1 -1
  385. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -11
  386. package/src/parquetjs/parser/parquet-reader.ts +0 -16
  387. package/src/parquetjs/utils/file-utils.ts +0 -49
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/encoder/writer.ts"],"names":["Transform","PARQUET_CODECS","Compression","Shred","ColumnChunk","ColumnMetaData","CompressionCodec","ConvertedType","DataPageHeader","DataPageHeaderV2","Encoding","FieldRepetitionType","FileMetaData","KeyValue","PageHeader","PageType","RowGroup","SchemaElement","Type","osopen","oswrite","osclose","getBitWidth","serializeThrift","Int64","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_DEFAULT_PAGE_SIZE","PARQUET_DEFAULT_ROW_GROUP_SIZE","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","ParquetWriter","openFile","schema","path","opts","outputStream","openStream","envelopeWriter","ParquetEnvelopeWriter","constructor","rowBuffer","rowGroupSize","closed","userMetadata","writeHeader","err","close","appendRow","row","Error","shredRecord","rowCount","callback","writeFooter","setMetadata","key","value","String","setRowGroupSize","cnt","setPageSize","writeFn","bind","undefined","closeFn","fileOffset","write","offset","rowGroups","pageSize","useDataPageV2","Boolean","writeSection","buf","length","Buffer","from","writeRowGroup","records","rgroup","encodeRowGroup","baseOffset","push","metadata","body","encodeFooter","ParquetTransformer","objectMode","writeProxy","t","b","writer","_transform","encoding","then","Promise","resolve","_flush","encodeValues","type","values","encodeDataPage","column","data","rLevelsBuf","alloc","rLevelMax","rlevels","bitWidth","dLevelsBuf","dLevelMax","dlevels","valuesBuf","primitiveType","typeLength","dataBuf","concat","compressedBuf","deflate","compression","header","DATA_PAGE","data_page_header","num_values","count","definition_level_encoding","repetition_level_encoding","uncompressed_page_size","compressed_page_size","headerBuf","page","headerSize","encodeDataPageV2","disableEnvelope","DATA_PAGE_V2","data_page_header_v2","num_nulls","num_rows","definition_levels_byte_length","repetition_levels_byte_length","is_compressed","encodeColumnChunk","buffer","columnData","join","pageBuf","total_uncompressed_size","total_compressed_size","result","path_in_schema","data_page_offset","encodings","codec","metadataOffset","columns","total_byte_size","field","fieldList","isNested","cchunkData","cchunk","file_offset","meta_data","Number","version","created_by","row_groups","key_value_metadata","kv","schemaRoot","name","num_children","Object","keys","fields","relt","repetitionType","schemaElem","repetition_type","fieldCount","originalType","converted_type","type_length","metadataEncoded","footerEncoded","copy","writeUInt32LE"],"mappings":";AAEA,SAAQA,SAAR,QAAkC,QAAlC;AACA,SAA6BC,cAA7B,QAAkD,WAAlD;AACA,OAAO,KAAKC,WAAZ,MAA6B,gBAA7B;AASA,OAAO,KAAKC,KAAZ,MAAuB,iBAAvB;AACA,SACEC,WADF,EAEEC,cAFF,EAGEC,gBAHF,EAIEC,aAJF,EAKEC,cALF,EAMEC,gBANF,EAOEC,QAPF,EAQEC,mBARF,EASEC,YATF,EAUEC,QAVF,EAWEC,UAXF,EAYEC,QAZF,EAaEC,QAbF,EAcEC,aAdF,EAeEC,IAfF,QAgBO,mBAhBP;AAiBA,SAAQC,MAAR,EAAgBC,OAAhB,EAAyBC,OAAzB,QAAuC,qBAAvC;AACA,SAAQC,WAAR,EAAqBC,eAArB,QAA2C,qBAA3C;AACA,OAAOC,KAAP,MAAkB,YAAlB;AAKA,MAAMC,aAAa,GAAG,MAAtB;AAKA,MAAMC,eAAe,GAAG,CAAxB;AAKA,MAAMC,yBAAyB,GAAG,IAAlC;AACA,MAAMC,8BAA8B,GAAG,IAAvC;AAKA,MAAMC,kBAAkB,GAAG,OAA3B;AACA,MAAMC,sBAAsB,GAAG,KAA/B;AAuBA,OAAO,MAAMC,aAAN,CAAuB;AAKP,eAARC,QAAQ,CACnBC,MADmB,EAEnBC,IAFmB,EAGnBC,IAHmB,EAIQ;AAC3B,UAAMC,YAAY,GAAG,MAAMjB,MAAM,CAACe,IAAD,EAAOC,IAAP,CAAjC;AACA,WAAOJ,aAAa,CAACM,UAAd,CAAyBJ,MAAzB,EAAiCG,YAAjC,EAA+CD,IAA/C,CAAP;AACD;;AAMsB,eAAVE,UAAU,CACrBJ,MADqB,EAErBG,YAFqB,EAGrBD,IAHqB,EAIM;AAC3B,QAAI,CAACA,IAAL,EAAW;AAETA,MAAAA,IAAI,GAAG,EAAP;AACD;;AAED,UAAMG,cAAc,GAAG,MAAMC,qBAAqB,CAACF,UAAtB,CAAiCJ,MAAjC,EAAyCG,YAAzC,EAAuDD,IAAvD,CAA7B;AAEA,WAAO,IAAIJ,aAAJ,CAAkBE,MAAlB,EAA0BK,cAA1B,EAA0CH,IAA1C,CAAP;AACD;;AAYDK,EAAAA,WAAW,CACTP,MADS,EAETK,cAFS,EAGTH,IAHS,EAIT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKK,cAAL,GAAsBA,cAAtB;AAEA,SAAKG,SAAL,GAAiB,EAAjB;AACA,SAAKC,YAAL,GAAoBP,IAAI,CAACO,YAAL,IAAqBd,8BAAzC;AACA,SAAKe,MAAL,GAAc,KAAd;AACA,SAAKC,YAAL,GAAoB,EAApB;AAGA,SAAKC,WAAL;AACD;;AAEgB,QAAXA,WAAW,GAAkB;AAEjC,QAAI;AACF,YAAM,KAAKP,cAAL,CAAoBO,WAApB,EAAN;AACD,KAFD,CAEE,OAAOC,GAAP,EAAY;AACZ,YAAM,KAAKR,cAAL,CAAoBS,KAApB,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAMc,QAATE,SAAS,CAAIC,GAAJ,EAA2B;AACxC,QAAI,KAAKN,MAAT,EAAiB;AACf,YAAM,IAAIO,KAAJ,CAAU,mBAAV,CAAN;AACD;;AACD/C,IAAAA,KAAK,CAACgD,WAAN,CAAkB,KAAKlB,MAAvB,EAA+BgB,GAA/B,EAAoC,KAAKR,SAAzC;;AACA,QAAI,KAAKA,SAAL,CAAeW,QAAf,IAA2B,KAAKV,YAApC,EAAkD;AAEhD,WAAKD,SAAL,GAAiB,EAAjB;AACD;AACF;;AAQU,QAALM,KAAK,CAACM,QAAD,EAAuC;AAChD,QAAI,KAAKV,MAAT,EAAiB;AACf,YAAM,IAAIO,KAAJ,CAAU,mBAAV,CAAN;AACD;;AAED,SAAKP,MAAL,GAAc,IAAd;;AAEA,QAAI,KAAKF,SAAL,CAAeW,QAAf,GAA0B,CAA1B,IAA+B,KAAKX,SAAL,CAAeW,QAAf,IAA2B,KAAKV,YAAnE,EAAiF;AAE/E,WAAKD,SAAL,GAAiB,EAAjB;AACD;;AAED,UAAM,KAAKH,cAAL,CAAoBgB,WAApB,CAAgC,KAAKV,YAArC,CAAN;AACA,UAAM,KAAKN,cAAL,CAAoBS,KAApB,EAAN;;AAGA,QAAIM,QAAJ,EAAc;AACZA,MAAAA,QAAQ;AACT;AACF;;AAKDE,EAAAA,WAAW,CAACC,GAAD,EAAcC,KAAd,EAAmC;AAE5C,SAAKb,YAAL,CAAkBc,MAAM,CAACF,GAAD,CAAxB,IAAiCE,MAAM,CAACD,KAAD,CAAvC;AACD;;AAQDE,EAAAA,eAAe,CAACC,GAAD,EAAoB;AACjC,SAAKlB,YAAL,GAAoBkB,GAApB;AACD;;AAMDC,EAAAA,WAAW,CAACD,GAAD,EAAoB;AAC7B,SAAKtB,cAAL,CAAoBuB,WAApB,CAAgCD,GAAhC;AACD;;AAxI2B;AAiJ9B,OAAO,MAAMrB,qBAAN,CAA4B;AAIV,eAAVF,UAAU,CACrBJ,MADqB,EAErBG,YAFqB,EAGrBD,IAHqB,EAIW;AAChC,UAAM2B,OAAO,GAAG1C,OAAO,CAAC2C,IAAR,CAAaC,SAAb,EAAwB5B,YAAxB,CAAhB;AACA,UAAM6B,OAAO,GAAG5C,OAAO,CAAC0C,IAAR,CAAaC,SAAb,EAAwB5B,YAAxB,CAAhB;AACA,WAAO,IAAIG,qBAAJ,CAA0BN,MAA1B,EAAkC6B,OAAlC,EAA2CG,OAA3C,EAAoD,CAApD,EAAuD9B,IAAvD,CAAP;AACD;;AAWDK,EAAAA,WAAW,CACTP,MADS,EAET6B,OAFS,EAGTG,OAHS,EAITC,UAJS,EAKT/B,IALS,EAMT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKkC,KAAL,GAAaL,OAAb;AACA,SAAKf,KAAL,GAAakB,OAAb;AACA,SAAKG,MAAL,GAAcF,UAAd;AACA,SAAKd,QAAL,GAAgB,CAAhB;AACA,SAAKiB,SAAL,GAAiB,EAAjB;AACA,SAAKC,QAAL,GAAgBnC,IAAI,CAACmC,QAAL,IAAiB3C,yBAAjC;AACA,SAAK4C,aAAL,GAAqB,mBAAmBpC,IAAnB,GAA0BqC,OAAO,CAACrC,IAAI,CAACoC,aAAN,CAAjC,GAAwD,KAA7E;AACD;;AAEDE,EAAAA,YAAY,CAACC,GAAD,EAA6B;AACvC,SAAKN,MAAL,IAAeM,GAAG,CAACC,MAAnB;AACA,WAAO,KAAKR,KAAL,CAAWO,GAAX,CAAP;AACD;;AAKD7B,EAAAA,WAAW,GAAkB;AAC3B,WAAO,KAAK4B,YAAL,CAAkBG,MAAM,CAACC,IAAP,CAAYpD,aAAZ,CAAlB,CAAP;AACD;;AAMkB,QAAbqD,aAAa,CAACC,OAAD,EAAwC;AACzD,UAAMC,MAAM,GAAG,MAAMC,cAAc,CAAC,KAAKhD,MAAN,EAAc8C,OAAd,EAAuB;AACxDG,MAAAA,UAAU,EAAE,KAAKd,MADuC;AAExDE,MAAAA,QAAQ,EAAE,KAAKA,QAFyC;AAGxDC,MAAAA,aAAa,EAAE,KAAKA;AAHoC,KAAvB,CAAnC;AAMA,SAAKnB,QAAL,IAAiB2B,OAAO,CAAC3B,QAAzB;AACA,SAAKiB,SAAL,CAAec,IAAf,CAAoBH,MAAM,CAACI,QAA3B;AACA,WAAO,MAAM,KAAKX,YAAL,CAAkBO,MAAM,CAACK,IAAzB,CAAb;AACD;;AAKD/B,EAAAA,WAAW,CAACV,YAAD,EAAsD;AAC/D,QAAI,CAACA,YAAL,EAAmB;AAEjBA,MAAAA,YAAY,GAAG,EAAf;AACD;;AAED,WAAO,KAAK6B,YAAL,CACLa,YAAY,CAAC,KAAKrD,MAAN,EAAc,KAAKmB,QAAnB,EAA6B,KAAKiB,SAAlC,EAA6CzB,YAA7C,CADP,CAAP;AAGD;;AAMDiB,EAAAA,WAAW,CAACD,GAAD,EAAoB;AAC7B,SAAKU,QAAL,GAAgBV,GAAhB;AACD;;AAxFgC;AA8FnC,OAAO,MAAM2B,kBAAN,SAAoCvF,SAApC,CAA8C;AAGnDwC,EAAAA,WAAW,CAACP,MAAD,EAAwBE,IAA0B,GAAG,EAArD,EAAyD;AAClE,UAAM;AAACqD,MAAAA,UAAU,EAAE;AAAb,KAAN;;AADkE;;AAGlE,UAAMC,UAAU,GAAI,UAAUC,CAAV,EAAsC;AACxD,aAAO,gBAAgBC,CAAhB,EAAuC;AAC5CD,QAAAA,CAAC,CAACP,IAAF,CAAOQ,CAAP;AACD,OAFD;AAGD,KAJkB,CAIhB,IAJgB,CAAnB;;AAMA,SAAKC,MAAL,GAAc,IAAI7D,aAAJ,CACZE,MADY,EAEZ,IAAIM,qBAAJ,CAA0BN,MAA1B,EAAkCwD,UAAlC,EAA8C,YAAY,CAAE,CAA5D,EAA8D,CAA9D,EAAiEtD,IAAjE,CAFY,EAGZA,IAHY,CAAd;AAKD;;AAGD0D,EAAAA,UAAU,CAAC5C,GAAD,EAAW6C,QAAX,EAA6BzC,QAA7B,EAA2E;AACnF,QAAIJ,GAAJ,EAAS;AACP,aAAO,KAAK2C,MAAL,CAAY5C,SAAZ,CAAsBC,GAAtB,EAA2B8C,IAA3B,CAAgC1C,QAAhC,CAAP;AACD;;AACDA,IAAAA,QAAQ;AACR,WAAO2C,OAAO,CAACC,OAAR,EAAP;AACD;;AAGW,QAANC,MAAM,CAAC7C,QAAD,EAAgC;AAC1C,UAAM,KAAKuC,MAAL,CAAY7C,KAAZ,CAAkBM,QAAlB,CAAN;AACD;;AA/BkD;;AAqCrD,SAAS8C,YAAT,CACEC,IADF,EAEEN,QAFF,EAGEO,MAHF,EAIElE,IAJF,EAKE;AACA,MAAI,EAAE2D,QAAQ,IAAI7F,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIiD,KAAJ,CAAW,qBAAoB4C,QAAS,EAAxC,CAAN;AACD;;AACD,SAAO7F,cAAc,CAAC6F,QAAD,CAAd,CAAyBK,YAAzB,CAAsCC,IAAtC,EAA4CC,MAA5C,EAAoDlE,IAApD,CAAP;AACD;;AAKD,eAAemE,cAAf,CACEC,MADF,EAEEC,IAFF,EAOG;AAED,MAAIC,UAAU,GAAG7B,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,IAAAA,UAAU,GAAGN,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACI,OAAlD,EAA2D;AAClFC,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACI,SAAR;AAD6D,KAA3D,CAAzB;AAID;;AAED,MAAIG,UAAU,GAAGlC,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACQ,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,UAAU,GAAGX,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACQ,OAAlD,EAA2D;AAClFH,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACQ,SAAR;AAD6D,KAA3D,CAAzB;AAID;;AAGD,QAAME,SAAS,GAAGd,YAAY,CAACI,MAAM,CAACW,aAAR,EAAwBX,MAAM,CAACT,QAA/B,EAA0CU,IAAI,CAACH,MAA/C,EAAuD;AACnFc,IAAAA,UAAU,EAAEZ,MAAM,CAACY,UADgE;AAEnFN,IAAAA,QAAQ,EAAEN,MAAM,CAACY;AAFkE,GAAvD,CAA9B;AAKA,QAAMC,OAAO,GAAGxC,MAAM,CAACyC,MAAP,CAAc,CAACZ,UAAD,EAAaK,UAAb,EAAyBG,SAAzB,CAAd,CAAhB;AAGA,QAAMK,aAAa,GAAG,MAAMpH,WAAW,CAACqH,OAAZ,CAAoBhB,MAAM,CAACiB,WAA3B,EAAyCJ,OAAzC,CAA5B;AAGA,QAAMK,MAAM,GAAG,IAAI3G,UAAJ,CAAe;AAC5BsF,IAAAA,IAAI,EAAErF,QAAQ,CAAC2G,SADa;AAE5BC,IAAAA,gBAAgB,EAAE,IAAInH,cAAJ,CAAmB;AACnCoH,MAAAA,UAAU,EAAEpB,IAAI,CAACqB,KADkB;AAEnC/B,MAAAA,QAAQ,EAAEpF,QAAQ,CAAC6F,MAAM,CAACT,QAAR,CAFiB;AAGnCgC,MAAAA,yBAAyB,EAAEpH,QAAQ,CAACoB,sBAAD,CAHA;AAInCiG,MAAAA,yBAAyB,EAAErH,QAAQ,CAACoB,sBAAD;AAJA,KAAnB,CAFU;AAQ5BkG,IAAAA,sBAAsB,EAAEZ,OAAO,CAACzC,MARJ;AAS5BsD,IAAAA,oBAAoB,EAAEX,aAAa,CAAC3C;AATR,GAAf,CAAf;AAaA,QAAMuD,SAAS,GAAG3G,eAAe,CAACkG,MAAD,CAAjC;AACA,QAAMU,IAAI,GAAGvD,MAAM,CAACyC,MAAP,CAAc,CAACa,SAAD,EAAYZ,aAAZ,CAAd,CAAb;AAEA,SAAO;AAACG,IAAAA,MAAD;AAASW,IAAAA,UAAU,EAAEF,SAAS,CAACvD,MAA/B;AAAuCwD,IAAAA;AAAvC,GAAP;AACD;;AAKD,eAAeE,gBAAf,CACE9B,MADF,EAEEC,IAFF,EAGEpD,QAHF,EAQG;AAED,QAAM6D,SAAS,GAAGd,YAAY,CAACI,MAAM,CAACW,aAAR,EAAwBX,MAAM,CAACT,QAA/B,EAA0CU,IAAI,CAACH,MAA/C,EAAuD;AACnFc,IAAAA,UAAU,EAAEZ,MAAM,CAACY,UADgE;AAEnFN,IAAAA,QAAQ,EAAEN,MAAM,CAACY;AAFkE,GAAvD,CAA9B;AAMA,QAAMG,aAAa,GAAG,MAAMpH,WAAW,CAACqH,OAAZ,CAAoBhB,MAAM,CAACiB,WAA3B,EAAyCP,SAAzC,CAA5B;AAGA,MAAIR,UAAU,GAAG7B,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,IAAAA,UAAU,GAAGN,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACI,OAAlD,EAA2D;AAClFC,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACI,SAAR,CAD6D;AAElF2B,MAAAA,eAAe,EAAE;AAFiE,KAA3D,CAAzB;AAID;;AAED,MAAIxB,UAAU,GAAGlC,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACQ,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,UAAU,GAAGX,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACQ,OAAlD,EAA2D;AAClFH,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACQ,SAAR,CAD6D;AAElFuB,MAAAA,eAAe,EAAE;AAFiE,KAA3D,CAAzB;AAID;;AAGD,QAAMb,MAAM,GAAG,IAAI3G,UAAJ,CAAe;AAC5BsF,IAAAA,IAAI,EAAErF,QAAQ,CAACwH,YADa;AAE5BC,IAAAA,mBAAmB,EAAE,IAAI/H,gBAAJ,CAAqB;AACxCmH,MAAAA,UAAU,EAAEpB,IAAI,CAACqB,KADuB;AAExCY,MAAAA,SAAS,EAAEjC,IAAI,CAACqB,KAAL,GAAarB,IAAI,CAACH,MAAL,CAAY1B,MAFI;AAGxC+D,MAAAA,QAAQ,EAAEtF,QAH8B;AAIxC0C,MAAAA,QAAQ,EAAEpF,QAAQ,CAAC6F,MAAM,CAACT,QAAR,CAJsB;AAKxC6C,MAAAA,6BAA6B,EAAE7B,UAAU,CAACnC,MALF;AAMxCiE,MAAAA,6BAA6B,EAAEnC,UAAU,CAAC9B,MANF;AAOxCkE,MAAAA,aAAa,EAAEtC,MAAM,CAACiB,WAAP,KAAuB;AAPE,KAArB,CAFO;AAW5BQ,IAAAA,sBAAsB,EAAEvB,UAAU,CAAC9B,MAAX,GAAoBmC,UAAU,CAACnC,MAA/B,GAAwCsC,SAAS,CAACtC,MAX9C;AAY5BsD,IAAAA,oBAAoB,EAAExB,UAAU,CAAC9B,MAAX,GAAoBmC,UAAU,CAACnC,MAA/B,GAAwC2C,aAAa,CAAC3C;AAZhD,GAAf,CAAf;AAgBA,QAAMuD,SAAS,GAAG3G,eAAe,CAACkG,MAAD,CAAjC;AACA,QAAMU,IAAI,GAAGvD,MAAM,CAACyC,MAAP,CAAc,CAACa,SAAD,EAAYzB,UAAZ,EAAwBK,UAAxB,EAAoCQ,aAApC,CAAd,CAAb;AACA,SAAO;AAACG,IAAAA,MAAD;AAASW,IAAAA,UAAU,EAAEF,SAAS,CAACvD,MAA/B;AAAuCwD,IAAAA;AAAvC,GAAP;AACD;;AAKD,eAAeW,iBAAf,CACEvC,MADF,EAEEwC,MAFF,EAGE3E,MAHF,EAIEjC,IAJF,EASG;AACD,QAAMqE,IAAI,GAAGuC,MAAM,CAACC,UAAP,CAAkBzC,MAAM,CAACrE,IAAP,CAAY+G,IAAZ,EAAlB,CAAb;AACA,QAAM/D,UAAU,GAAG,CAAC/C,IAAI,CAAC+C,UAAL,IAAmB,CAApB,IAAyBd,MAA5C;AAGA,MAAI8E,OAAJ;AAEA,MAAIC,uBAAuB,GAAG,CAA9B;AAEA,MAAIC,qBAAqB,GAAG,CAA5B;AACA;AACE,UAAMC,MAAM,GAAGlH,IAAI,CAACoC,aAAL,GACX,MAAM8D,gBAAgB,CAAC9B,MAAD,EAASC,IAAT,EAAeuC,MAAM,CAAC3F,QAAtB,CADX,GAEX,MAAMkD,cAAc,CAACC,MAAD,EAASC,IAAT,CAFxB;AAIA0C,IAAAA,OAAO,GAAGG,MAAM,CAAClB,IAAjB;AACAgB,IAAAA,uBAAuB,IAAIE,MAAM,CAAC5B,MAAP,CAAcO,sBAAd,GAAuCqB,MAAM,CAACjB,UAAzE;AACAgB,IAAAA,qBAAqB,IAAIC,MAAM,CAAC5B,MAAP,CAAcQ,oBAAd,GAAqCoB,MAAM,CAACjB,UAArE;AACD;AAMD,QAAMhD,QAAQ,GAAG,IAAI/E,cAAJ,CAAmB;AAClCiJ,IAAAA,cAAc,EAAE/C,MAAM,CAACrE,IADW;AAElC0F,IAAAA,UAAU,EAAEpB,IAAI,CAACqB,KAFiB;AAGlC0B,IAAAA,gBAAgB,EAAErE,UAHgB;AAIlCsE,IAAAA,SAAS,EAAE,EAJuB;AAKlCL,IAAAA,uBALkC;AAMlCC,IAAAA,qBANkC;AAOlChD,IAAAA,IAAI,EAAElF,IAAI,CAACqF,MAAM,CAACW,aAAR,CAPwB;AAQlCuC,IAAAA,KAAK,EAAEnJ,gBAAgB,CAACiG,MAAM,CAACiB,WAAR;AARW,GAAnB,CAAjB;AAYApC,EAAAA,QAAQ,CAACoE,SAAT,CAAmBrE,IAAnB,CAAwBzE,QAAQ,CAACoB,sBAAD,CAAhC;AACAsD,EAAAA,QAAQ,CAACoE,SAAT,CAAmBrE,IAAnB,CAAwBzE,QAAQ,CAAC6F,MAAM,CAACT,QAAR,CAAhC;AAGA,QAAM4D,cAAc,GAAGxE,UAAU,GAAGgE,OAAO,CAACvE,MAA5C;AACA,QAAMU,IAAI,GAAGT,MAAM,CAACyC,MAAP,CAAc,CAAC6B,OAAD,EAAU3H,eAAe,CAAC6D,QAAD,CAAzB,CAAd,CAAb;AACA,SAAO;AAACC,IAAAA,IAAD;AAAOD,IAAAA,QAAP;AAAiBsE,IAAAA;AAAjB,GAAP;AACD;;AAKD,eAAezE,cAAf,CACEhD,MADF,EAEEuE,IAFF,EAGErE,IAHF,EAOG;AACD,QAAMiD,QAAQ,GAAG,IAAIpE,QAAJ,CAAa;AAC5B0H,IAAAA,QAAQ,EAAElC,IAAI,CAACpD,QADa;AAE5BuG,IAAAA,OAAO,EAAE,EAFmB;AAG5BC,IAAAA,eAAe,EAAE;AAHW,GAAb,CAAjB;AAMA,MAAIvE,IAAI,GAAGT,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAX;;AACA,OAAK,MAAMmD,KAAX,IAAoB5H,MAAM,CAAC6H,SAA3B,EAAsC;AACpC,QAAID,KAAK,CAACE,QAAV,EAAoB;AAClB;AACD;;AAED,UAAMC,UAAU,GAAG,MAAMlB,iBAAiB,CAACe,KAAD,EAAQrD,IAAR,EAAcnB,IAAI,CAACV,MAAnB,EAA2BxC,IAA3B,CAA1C;AAEA,UAAM8H,MAAM,GAAG,IAAI7J,WAAJ,CAAgB;AAC7B8J,MAAAA,WAAW,EAAEF,UAAU,CAACN,cADK;AAE7BS,MAAAA,SAAS,EAAEH,UAAU,CAAC5E;AAFO,KAAhB,CAAf;AAKAA,IAAAA,QAAQ,CAACuE,OAAT,CAAiBxE,IAAjB,CAAsB8E,MAAtB;AACA7E,IAAAA,QAAQ,CAACwE,eAAT,GAA2B,IAAIpI,KAAJ,CAAU4I,MAAM,CAAChF,QAAQ,CAACwE,eAAV,CAAN,GAAmCI,UAAU,CAAC3E,IAAX,CAAgBV,MAA7D,CAA3B;AAEAU,IAAAA,IAAI,GAAGT,MAAM,CAACyC,MAAP,CAAc,CAAChC,IAAD,EAAO2E,UAAU,CAAC3E,IAAlB,CAAd,CAAP;AACD;;AAED,SAAO;AAACA,IAAAA,IAAD;AAAOD,IAAAA;AAAP,GAAP;AACD;;AAKD,SAASE,YAAT,CACErD,MADF,EAEEmB,QAFF,EAGEiB,SAHF,EAIEzB,YAJF,EAKU;AACR,QAAMwC,QAAQ,GAAG,IAAIxE,YAAJ,CAAiB;AAChCyJ,IAAAA,OAAO,EAAE3I,eADuB;AAEhC4I,IAAAA,UAAU,EAAE,UAFoB;AAGhC5B,IAAAA,QAAQ,EAAEtF,QAHsB;AAIhCmH,IAAAA,UAAU,EAAElG,SAJoB;AAKhCpC,IAAAA,MAAM,EAAE,EALwB;AAMhCuI,IAAAA,kBAAkB,EAAE;AANY,GAAjB,CAAjB;;AASA,OAAK,MAAMhH,GAAX,IAAkBZ,YAAlB,EAAgC;AAAA;;AAC9B,UAAM6H,EAAE,GAAG,IAAI5J,QAAJ,CAAa;AACtB2C,MAAAA,GADsB;AAEtBC,MAAAA,KAAK,EAAEb,YAAY,CAACY,GAAD;AAFG,KAAb,CAAX;AAIA,6BAAA4B,QAAQ,CAACoF,kBAAT,0GAA6BrF,IAA7B,8GAAoCsF,EAApC;AACD;;AAED;AACE,UAAMC,UAAU,GAAG,IAAIzJ,aAAJ,CAAkB;AACnC0J,MAAAA,IAAI,EAAE,MAD6B;AAEnCC,MAAAA,YAAY,EAAEC,MAAM,CAACC,IAAP,CAAY7I,MAAM,CAAC8I,MAAnB,EAA2BpG;AAFN,KAAlB,CAAnB;AAIAS,IAAAA,QAAQ,CAACnD,MAAT,CAAgBkD,IAAhB,CAAqBuF,UAArB;AACD;;AAED,OAAK,MAAMb,KAAX,IAAoB5H,MAAM,CAAC6H,SAA3B,EAAsC;AACpC,UAAMkB,IAAI,GAAGrK,mBAAmB,CAACkJ,KAAK,CAACoB,cAAP,CAAhC;AACA,UAAMC,UAAU,GAAG,IAAIjK,aAAJ,CAAkB;AACnC0J,MAAAA,IAAI,EAAEd,KAAK,CAACc,IADuB;AAEnCQ,MAAAA,eAAe,EAAEH;AAFkB,KAAlB,CAAnB;;AAKA,QAAInB,KAAK,CAACE,QAAV,EAAoB;AAClBmB,MAAAA,UAAU,CAACN,YAAX,GAA0Bf,KAAK,CAACuB,UAAhC;AACD,KAFD,MAEO;AACLF,MAAAA,UAAU,CAAC9E,IAAX,GAAkBlF,IAAI,CAAC2I,KAAK,CAAC3C,aAAP,CAAtB;AACD;;AAED,QAAI2C,KAAK,CAACwB,YAAV,EAAwB;AACtBH,MAAAA,UAAU,CAACI,cAAX,GAA4B/K,aAAa,CAACsJ,KAAK,CAACwB,YAAP,CAAzC;AACD;;AAEDH,IAAAA,UAAU,CAACK,WAAX,GAAyB1B,KAAK,CAAC1C,UAA/B;AAEA/B,IAAAA,QAAQ,CAACnD,MAAT,CAAgBkD,IAAhB,CAAqB+F,UAArB;AACD;;AAED,QAAMM,eAAe,GAAGjK,eAAe,CAAC6D,QAAD,CAAvC;AACA,QAAMqG,aAAa,GAAG7G,MAAM,CAAC8B,KAAP,CAAa8E,eAAe,CAAC7G,MAAhB,GAAyB,CAAtC,CAAtB;AACA6G,EAAAA,eAAe,CAACE,IAAhB,CAAqBD,aAArB;AACAA,EAAAA,aAAa,CAACE,aAAd,CAA4BH,eAAe,CAAC7G,MAA5C,EAAoD6G,eAAe,CAAC7G,MAApE;AACA8G,EAAAA,aAAa,CAACtH,KAAd,CAAoB1C,aAApB,EAAmC+J,eAAe,CAAC7G,MAAhB,GAAyB,CAA5D;AACA,SAAO8G,aAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {Transform, Writable} from 'stream';\nimport {ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport * as Compression from '../compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetData,\n ParquetField,\n PrimitiveType\n} from '../schema/declare';\nimport {ParquetSchema} from '../schema/schema';\nimport * as Shred from '../schema/shred';\nimport {\n ColumnChunk,\n ColumnMetaData,\n CompressionCodec,\n ConvertedType,\n DataPageHeader,\n DataPageHeaderV2,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n KeyValue,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {osopen, oswrite, osclose} from '../utils/file-utils';\nimport {getBitWidth, serializeThrift} from '../utils/read-utils';\nimport Int64 from 'node-int64';\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Default Page and Row Group sizes\n */\nconst PARQUET_DEFAULT_PAGE_SIZE = 8192;\nconst PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;\n\n/**\n * Repetition and Definition Level Encoding\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\nexport interface ParquetWriterOptions {\n baseOffset?: number;\n rowGroupSize?: number;\n pageSize?: number;\n useDataPageV2?: boolean;\n\n // Write Stream Options\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\n/**\n * Write a parquet file to an output stream. The ParquetWriter will perform\n * buffering/batching for performance, so close() must be called after all rows\n * are written.\n */\n// eslint-disable-next-line @typescript-eslint/no-unused-vars\nexport class ParquetWriter<T> {\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified file\n */\n static async openFile<T>(\n schema: ParquetSchema,\n path: string,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n const outputStream = await osopen(path, opts);\n return ParquetWriter.openStream(schema, outputStream, opts);\n }\n\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified stream\n */\n static async openStream<T>(\n schema: ParquetSchema,\n outputStream: Writable,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n if (!opts) {\n // tslint:disable-next-line:no-parameter-reassignment\n opts = {};\n }\n\n const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);\n\n return new ParquetWriter(schema, envelopeWriter, opts);\n }\n\n public schema: ParquetSchema;\n public envelopeWriter: ParquetEnvelopeWriter;\n public rowBuffer: ParquetBuffer;\n public rowGroupSize: number;\n public closed: boolean;\n public userMetadata: Record<string, string>;\n\n /**\n * Create a new buffered parquet writer for a given envelope writer\n */\n constructor(\n schema: ParquetSchema,\n envelopeWriter: ParquetEnvelopeWriter,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.envelopeWriter = envelopeWriter;\n // @ts-ignore Row buffer typings...\n this.rowBuffer = {};\n this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;\n this.closed = false;\n this.userMetadata = {};\n\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n this.writeHeader();\n }\n\n async writeHeader(): Promise<void> {\n // TODO - better not mess with promises in the constructor\n try {\n await this.envelopeWriter.writeHeader();\n } catch (err) {\n await this.envelopeWriter.close();\n throw err;\n }\n }\n\n /**\n * Append a single row to the parquet file. Rows are buffered in memory until\n * rowGroupSize rows are in the buffer or close() is called\n */\n async appendRow<T>(row: T): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n Shred.shredRecord(this.schema, row, this.rowBuffer);\n if (this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n }\n\n /**\n * Finish writing the parquet file and commit the footer to disk. This method\n * MUST be called after you are finished adding rows. You must not call this\n * method twice on the same object or add any rows after the close() method has\n * been called\n */\n async close(callback?: () => void): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n\n this.closed = true;\n\n if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n\n await this.envelopeWriter.writeFooter(this.userMetadata);\n await this.envelopeWriter.close();\n // this.envelopeWriter = null;\n\n if (callback) {\n callback();\n }\n }\n\n /**\n * Add key<>value metadata to the file\n */\n setMetadata(key: string, value: string): void {\n // TODO: value to be any, obj -> JSON\n this.userMetadata[String(key)] = String(value);\n }\n\n /**\n * Set the parquet row group size. This values controls the maximum number\n * of rows that are buffered in memory at any given time as well as the number\n * of rows that are co-located on disk. A higher value is generally better for\n * read-time I/O performance at the tradeoff of write-time memory usage.\n */\n setRowGroupSize(cnt: number): void {\n this.rowGroupSize = cnt;\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.envelopeWriter.setPageSize(cnt);\n }\n}\n\n/**\n * Create a parquet file from a schema and a number of row groups. This class\n * performs direct, unbuffered writes to the underlying output stream and is\n * intendend for advanced and internal users; the writeXXX methods must be\n * called in the correct order to produce a valid file.\n */\nexport class ParquetEnvelopeWriter {\n /**\n * Create a new parquet envelope writer that writes to the specified stream\n */\n static async openStream(\n schema: ParquetSchema,\n outputStream: Writable,\n opts: ParquetWriterOptions\n ): Promise<ParquetEnvelopeWriter> {\n const writeFn = oswrite.bind(undefined, outputStream);\n const closeFn = osclose.bind(undefined, outputStream);\n return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);\n }\n\n public schema: ParquetSchema;\n public write: (buf: Buffer) => Promise<void>;\n public close: () => Promise<void>;\n public offset: number;\n public rowCount: number;\n public rowGroups: RowGroup[];\n public pageSize: number;\n public useDataPageV2: boolean;\n\n constructor(\n schema: ParquetSchema,\n writeFn: (buf: Buffer) => Promise<void>,\n closeFn: () => Promise<void>,\n fileOffset: number,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.write = writeFn;\n this.close = closeFn;\n this.offset = fileOffset;\n this.rowCount = 0;\n this.rowGroups = [];\n this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;\n this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;\n }\n\n writeSection(buf: Buffer): Promise<void> {\n this.offset += buf.length;\n return this.write(buf);\n }\n\n /**\n * Encode the parquet file header\n */\n writeHeader(): Promise<void> {\n return this.writeSection(Buffer.from(PARQUET_MAGIC));\n }\n\n /**\n * Encode a parquet row group. The records object should be created using the\n * shredRecord method\n */\n async writeRowGroup(records: ParquetBuffer): Promise<void> {\n const rgroup = await encodeRowGroup(this.schema, records, {\n baseOffset: this.offset,\n pageSize: this.pageSize,\n useDataPageV2: this.useDataPageV2\n });\n\n this.rowCount += records.rowCount;\n this.rowGroups.push(rgroup.metadata);\n return await this.writeSection(rgroup.body);\n }\n\n /**\n * Write the parquet file footer\n */\n writeFooter(userMetadata: Record<string, string>): Promise<void> {\n if (!userMetadata) {\n // tslint:disable-next-line:no-parameter-reassignment\n userMetadata = {};\n }\n\n return this.writeSection(\n encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)\n );\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.pageSize = cnt;\n }\n}\n\n/**\n * Create a parquet transform stream\n */\nexport class ParquetTransformer<T> extends Transform {\n public writer: ParquetWriter<T>;\n\n constructor(schema: ParquetSchema, opts: ParquetWriterOptions = {}) {\n super({objectMode: true});\n\n const writeProxy = (function (t: ParquetTransformer<any>) {\n return async function (b: any): Promise<void> {\n t.push(b);\n };\n })(this);\n\n this.writer = new ParquetWriter(\n schema,\n new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),\n opts\n );\n }\n\n // tslint:disable-next-line:function-name\n _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {\n if (row) {\n return this.writer.appendRow(row).then(callback);\n }\n callback();\n return Promise.resolve();\n }\n\n // tslint:disable-next-line:function-name\n async _flush(callback: (val?: any) => void) {\n await this.writer.close(callback);\n }\n}\n\n/**\n * Encode a consecutive array of data using one of the parquet encodings\n */\nfunction encodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n values: any[],\n opts: ParquetCodecOptions\n) {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].encodeValues(type, values, opts);\n}\n\n/**\n * Encode a parquet data page\n */\nasync function encodeDataPage(\n column: ParquetField,\n data: ParquetData\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax)\n // disableEnvelope: false\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax)\n // disableEnvelope: false\n });\n }\n\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, dataBuf);\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE,\n data_page_header: new DataPageHeader({\n num_values: data.count,\n encoding: Encoding[column.encoding!] as any,\n definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],\n repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]\n }),\n uncompressed_page_size: dataBuf.length,\n compressed_page_size: compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, compressedBuf]);\n\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode a parquet data page (v2)\n */\nasync function encodeDataPageV2(\n column: ParquetField,\n data: ParquetData,\n rowCount: number\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, valuesBuf);\n\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n }\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE_V2,\n data_page_header_v2: new DataPageHeaderV2({\n num_values: data.count,\n num_nulls: data.count - data.values.length,\n num_rows: rowCount,\n encoding: Encoding[column.encoding!] as any,\n definition_levels_byte_length: dLevelsBuf.length,\n repetition_levels_byte_length: rLevelsBuf.length,\n is_compressed: column.compression !== 'UNCOMPRESSED'\n }),\n uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,\n compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode an array of values into a parquet column chunk\n */\nasync function encodeColumnChunk(\n column: ParquetField,\n buffer: ParquetBuffer,\n offset: number,\n opts: ParquetWriterOptions\n): Promise<{\n body: Buffer;\n metadata: ColumnMetaData;\n metadataOffset: number;\n}> {\n const data = buffer.columnData[column.path.join()];\n const baseOffset = (opts.baseOffset || 0) + offset;\n /* encode data page(s) */\n // const pages: Buffer[] = [];\n let pageBuf: Buffer;\n // tslint:disable-next-line:variable-name\n let total_uncompressed_size = 0;\n // tslint:disable-next-line:variable-name\n let total_compressed_size = 0;\n {\n const result = opts.useDataPageV2\n ? await encodeDataPageV2(column, data, buffer.rowCount)\n : await encodeDataPage(column, data);\n // pages.push(result.page);\n pageBuf = result.page;\n total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;\n total_compressed_size += result.header.compressed_page_size + result.headerSize;\n }\n\n // const pagesBuf = Buffer.concat(pages);\n // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;\n\n /* prepare metadata header */\n const metadata = new ColumnMetaData({\n path_in_schema: column.path,\n num_values: data.count,\n data_page_offset: baseOffset,\n encodings: [],\n total_uncompressed_size, // : pagesBuf.length,\n total_compressed_size,\n type: Type[column.primitiveType!],\n codec: CompressionCodec[column.compression!]\n });\n\n /* list encodings */\n metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);\n metadata.encodings.push(Encoding[column.encoding!]);\n\n /* concat metadata header and data pages */\n const metadataOffset = baseOffset + pageBuf.length;\n const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);\n return {body, metadata, metadataOffset};\n}\n\n/**\n * Encode a list of column values into a parquet row group\n */\nasync function encodeRowGroup(\n schema: ParquetSchema,\n data: ParquetBuffer,\n opts: ParquetWriterOptions\n): Promise<{\n body: Buffer;\n metadata: RowGroup;\n}> {\n const metadata = new RowGroup({\n num_rows: data.rowCount,\n columns: [],\n total_byte_size: 0\n });\n\n let body = Buffer.alloc(0);\n for (const field of schema.fieldList) {\n if (field.isNested) {\n continue; // eslint-disable-line no-continue\n }\n\n const cchunkData = await encodeColumnChunk(field, data, body.length, opts);\n\n const cchunk = new ColumnChunk({\n file_offset: cchunkData.metadataOffset,\n meta_data: cchunkData.metadata\n });\n\n metadata.columns.push(cchunk);\n metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);\n\n body = Buffer.concat([body, cchunkData.body]);\n }\n\n return {body, metadata};\n}\n\n/**\n * Encode a parquet file metadata footer\n */\nfunction encodeFooter(\n schema: ParquetSchema,\n rowCount: number,\n rowGroups: RowGroup[],\n userMetadata: Record<string, string>\n): Buffer {\n const metadata = new FileMetaData({\n version: PARQUET_VERSION,\n created_by: 'parquets',\n num_rows: rowCount,\n row_groups: rowGroups,\n schema: [],\n key_value_metadata: []\n });\n\n for (const key in userMetadata) {\n const kv = new KeyValue({\n key,\n value: userMetadata[key]\n });\n metadata.key_value_metadata?.push?.(kv);\n }\n\n {\n const schemaRoot = new SchemaElement({\n name: 'root',\n num_children: Object.keys(schema.fields).length\n });\n metadata.schema.push(schemaRoot);\n }\n\n for (const field of schema.fieldList) {\n const relt = FieldRepetitionType[field.repetitionType];\n const schemaElem = new SchemaElement({\n name: field.name,\n repetition_type: relt as any\n });\n\n if (field.isNested) {\n schemaElem.num_children = field.fieldCount;\n } else {\n schemaElem.type = Type[field.primitiveType!] as Type;\n }\n\n if (field.originalType) {\n schemaElem.converted_type = ConvertedType[field.originalType] as ConvertedType;\n }\n\n schemaElem.type_length = field.typeLength;\n\n metadata.schema.push(schemaElem);\n }\n\n const metadataEncoded = serializeThrift(metadata);\n const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);\n metadataEncoded.copy(footerEncoded);\n footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);\n footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);\n return footerEncoded;\n}\n"],"file":"writer.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/encoder/writer.ts"],"names":["Transform","PARQUET_CODECS","Compression","Shred","ColumnChunk","ColumnMetaData","CompressionCodec","ConvertedType","DataPageHeader","DataPageHeaderV2","Encoding","FieldRepetitionType","FileMetaData","KeyValue","PageHeader","PageType","RowGroup","SchemaElement","Type","osopen","oswrite","osclose","getBitWidth","serializeThrift","Int64","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_DEFAULT_PAGE_SIZE","PARQUET_DEFAULT_ROW_GROUP_SIZE","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","ParquetWriter","openFile","schema","path","opts","outputStream","openStream","envelopeWriter","ParquetEnvelopeWriter","constructor","rowBuffer","rowGroupSize","closed","userMetadata","writeHeader","err","close","appendRow","row","Error","shredRecord","rowCount","callback","writeFooter","setMetadata","key","value","String","setRowGroupSize","cnt","setPageSize","writeFn","bind","undefined","closeFn","fileOffset","write","offset","rowGroups","pageSize","useDataPageV2","Boolean","writeSection","buf","length","Buffer","from","writeRowGroup","records","rgroup","encodeRowGroup","baseOffset","push","metadata","body","encodeFooter","ParquetTransformer","objectMode","writeProxy","t","b","writer","_transform","encoding","then","Promise","resolve","_flush","encodeValues","type","values","encodeDataPage","column","data","rLevelsBuf","alloc","rLevelMax","rlevels","bitWidth","dLevelsBuf","dLevelMax","dlevels","valuesBuf","primitiveType","typeLength","dataBuf","concat","compressedBuf","deflate","compression","header","DATA_PAGE","data_page_header","num_values","count","definition_level_encoding","repetition_level_encoding","uncompressed_page_size","compressed_page_size","headerBuf","page","headerSize","encodeDataPageV2","disableEnvelope","DATA_PAGE_V2","data_page_header_v2","num_nulls","num_rows","definition_levels_byte_length","repetition_levels_byte_length","is_compressed","encodeColumnChunk","buffer","columnData","join","pageBuf","total_uncompressed_size","total_compressed_size","result","path_in_schema","data_page_offset","encodings","codec","metadataOffset","columns","total_byte_size","field","fieldList","isNested","cchunkData","cchunk","file_offset","meta_data","Number","version","created_by","row_groups","key_value_metadata","kv","schemaRoot","name","num_children","Object","keys","fields","relt","repetitionType","schemaElem","repetition_type","fieldCount","originalType","converted_type","type_length","metadataEncoded","footerEncoded","copy","writeUInt32LE"],"mappings":";AAEA,SAAQA,SAAR,QAAkC,QAAlC;AACA,SAA6BC,cAA7B,QAAkD,WAAlD;AACA,OAAO,KAAKC,WAAZ,MAA6B,gBAA7B;AASA,OAAO,KAAKC,KAAZ,MAAuB,iBAAvB;AACA,SACEC,WADF,EAEEC,cAFF,EAGEC,gBAHF,EAIEC,aAJF,EAKEC,cALF,EAMEC,gBANF,EAOEC,QAPF,EAQEC,mBARF,EASEC,YATF,EAUEC,QAVF,EAWEC,UAXF,EAYEC,QAZF,EAaEC,QAbF,EAcEC,aAdF,EAeEC,IAfF,QAgBO,mBAhBP;AAiBA,SAAQC,MAAR,EAAgBC,OAAhB,EAAyBC,OAAzB,QAAuC,qBAAvC;AACA,SAAQC,WAAR,EAAqBC,eAArB,QAA2C,qBAA3C;AACA,OAAOC,KAAP,MAAkB,YAAlB;AAKA,MAAMC,aAAa,GAAG,MAAtB;AAKA,MAAMC,eAAe,GAAG,CAAxB;AAKA,MAAMC,yBAAyB,GAAG,IAAlC;AACA,MAAMC,8BAA8B,GAAG,IAAvC;AAKA,MAAMC,kBAAkB,GAAG,OAA3B;AACA,MAAMC,sBAAsB,GAAG,KAA/B;AAuBA,OAAO,MAAMC,aAAN,CAAuB;AAKP,eAARC,QAAQ,CACnBC,MADmB,EAEnBC,IAFmB,EAGnBC,IAHmB,EAIQ;AAC3B,UAAMC,YAAY,GAAG,MAAMjB,MAAM,CAACe,IAAD,EAAOC,IAAP,CAAjC;AACA,WAAOJ,aAAa,CAACM,UAAd,CAAyBJ,MAAzB,EAAiCG,YAAjC,EAA+CD,IAA/C,CAAP;AACD;;AAMsB,eAAVE,UAAU,CACrBJ,MADqB,EAErBG,YAFqB,EAGrBD,IAHqB,EAIM;AAC3B,QAAI,CAACA,IAAL,EAAW;AAETA,MAAAA,IAAI,GAAG,EAAP;AACD;;AAED,UAAMG,cAAc,GAAG,MAAMC,qBAAqB,CAACF,UAAtB,CAAiCJ,MAAjC,EAAyCG,YAAzC,EAAuDD,IAAvD,CAA7B;AAEA,WAAO,IAAIJ,aAAJ,CAAkBE,MAAlB,EAA0BK,cAA1B,EAA0CH,IAA1C,CAAP;AACD;;AAYDK,EAAAA,WAAW,CACTP,MADS,EAETK,cAFS,EAGTH,IAHS,EAIT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKK,cAAL,GAAsBA,cAAtB;AAEA,SAAKG,SAAL,GAAiB,EAAjB;AACA,SAAKC,YAAL,GAAoBP,IAAI,CAACO,YAAL,IAAqBd,8BAAzC;AACA,SAAKe,MAAL,GAAc,KAAd;AACA,SAAKC,YAAL,GAAoB,EAApB;AAGA,SAAKC,WAAL;AACD;;AAEgB,QAAXA,WAAW,GAAkB;AAEjC,QAAI;AACF,YAAM,KAAKP,cAAL,CAAoBO,WAApB,EAAN;AACD,KAFD,CAEE,OAAOC,GAAP,EAAY;AACZ,YAAM,KAAKR,cAAL,CAAoBS,KAApB,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAMc,QAATE,SAAS,CAAIC,GAAJ,EAA2B;AACxC,QAAI,KAAKN,MAAT,EAAiB;AACf,YAAM,IAAIO,KAAJ,CAAU,mBAAV,CAAN;AACD;;AACD/C,IAAAA,KAAK,CAACgD,WAAN,CAAkB,KAAKlB,MAAvB,EAA+BgB,GAA/B,EAAoC,KAAKR,SAAzC;;AACA,QAAI,KAAKA,SAAL,CAAeW,QAAf,IAA2B,KAAKV,YAApC,EAAkD;AAEhD,WAAKD,SAAL,GAAiB,EAAjB;AACD;AACF;;AAQU,QAALM,KAAK,CAACM,QAAD,EAAuC;AAChD,QAAI,KAAKV,MAAT,EAAiB;AACf,YAAM,IAAIO,KAAJ,CAAU,mBAAV,CAAN;AACD;;AAED,SAAKP,MAAL,GAAc,IAAd;;AAEA,QAAI,KAAKF,SAAL,CAAeW,QAAf,GAA0B,CAA1B,IAA+B,KAAKX,SAAL,CAAeW,QAAf,IAA2B,KAAKV,YAAnE,EAAiF;AAE/E,WAAKD,SAAL,GAAiB,EAAjB;AACD;;AAED,UAAM,KAAKH,cAAL,CAAoBgB,WAApB,CAAgC,KAAKV,YAArC,CAAN;AACA,UAAM,KAAKN,cAAL,CAAoBS,KAApB,EAAN;;AAGA,QAAIM,QAAJ,EAAc;AACZA,MAAAA,QAAQ;AACT;AACF;;AAKDE,EAAAA,WAAW,CAACC,GAAD,EAAcC,KAAd,EAAmC;AAE5C,SAAKb,YAAL,CAAkBc,MAAM,CAACF,GAAD,CAAxB,IAAiCE,MAAM,CAACD,KAAD,CAAvC;AACD;;AAQDE,EAAAA,eAAe,CAACC,GAAD,EAAoB;AACjC,SAAKlB,YAAL,GAAoBkB,GAApB;AACD;;AAMDC,EAAAA,WAAW,CAACD,GAAD,EAAoB;AAC7B,SAAKtB,cAAL,CAAoBuB,WAApB,CAAgCD,GAAhC;AACD;;AAxI2B;AAiJ9B,OAAO,MAAMrB,qBAAN,CAA4B;AAIV,eAAVF,UAAU,CACrBJ,MADqB,EAErBG,YAFqB,EAGrBD,IAHqB,EAIW;AAChC,UAAM2B,OAAO,GAAG1C,OAAO,CAAC2C,IAAR,CAAaC,SAAb,EAAwB5B,YAAxB,CAAhB;AACA,UAAM6B,OAAO,GAAG5C,OAAO,CAAC0C,IAAR,CAAaC,SAAb,EAAwB5B,YAAxB,CAAhB;AACA,WAAO,IAAIG,qBAAJ,CAA0BN,MAA1B,EAAkC6B,OAAlC,EAA2CG,OAA3C,EAAoD,CAApD,EAAuD9B,IAAvD,CAAP;AACD;;AAWDK,EAAAA,WAAW,CACTP,MADS,EAET6B,OAFS,EAGTG,OAHS,EAITC,UAJS,EAKT/B,IALS,EAMT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKkC,KAAL,GAAaL,OAAb;AACA,SAAKf,KAAL,GAAakB,OAAb;AACA,SAAKG,MAAL,GAAcF,UAAd;AACA,SAAKd,QAAL,GAAgB,CAAhB;AACA,SAAKiB,SAAL,GAAiB,EAAjB;AACA,SAAKC,QAAL,GAAgBnC,IAAI,CAACmC,QAAL,IAAiB3C,yBAAjC;AACA,SAAK4C,aAAL,GAAqB,mBAAmBpC,IAAnB,GAA0BqC,OAAO,CAACrC,IAAI,CAACoC,aAAN,CAAjC,GAAwD,KAA7E;AACD;;AAEDE,EAAAA,YAAY,CAACC,GAAD,EAA6B;AACvC,SAAKN,MAAL,IAAeM,GAAG,CAACC,MAAnB;AACA,WAAO,KAAKR,KAAL,CAAWO,GAAX,CAAP;AACD;;AAKD7B,EAAAA,WAAW,GAAkB;AAC3B,WAAO,KAAK4B,YAAL,CAAkBG,MAAM,CAACC,IAAP,CAAYpD,aAAZ,CAAlB,CAAP;AACD;;AAMkB,QAAbqD,aAAa,CAACC,OAAD,EAAwC;AACzD,UAAMC,MAAM,GAAG,MAAMC,cAAc,CAAC,KAAKhD,MAAN,EAAc8C,OAAd,EAAuB;AACxDG,MAAAA,UAAU,EAAE,KAAKd,MADuC;AAExDE,MAAAA,QAAQ,EAAE,KAAKA,QAFyC;AAGxDC,MAAAA,aAAa,EAAE,KAAKA;AAHoC,KAAvB,CAAnC;AAMA,SAAKnB,QAAL,IAAiB2B,OAAO,CAAC3B,QAAzB;AACA,SAAKiB,SAAL,CAAec,IAAf,CAAoBH,MAAM,CAACI,QAA3B;AACA,WAAO,MAAM,KAAKX,YAAL,CAAkBO,MAAM,CAACK,IAAzB,CAAb;AACD;;AAKD/B,EAAAA,WAAW,CAACV,YAAD,EAAsD;AAC/D,QAAI,CAACA,YAAL,EAAmB;AAEjBA,MAAAA,YAAY,GAAG,EAAf;AACD;;AAED,WAAO,KAAK6B,YAAL,CACLa,YAAY,CAAC,KAAKrD,MAAN,EAAc,KAAKmB,QAAnB,EAA6B,KAAKiB,SAAlC,EAA6CzB,YAA7C,CADP,CAAP;AAGD;;AAMDiB,EAAAA,WAAW,CAACD,GAAD,EAAoB;AAC7B,SAAKU,QAAL,GAAgBV,GAAhB;AACD;;AAxFgC;AA8FnC,OAAO,MAAM2B,kBAAN,SAAoCvF,SAApC,CAA8C;AAGnDwC,EAAAA,WAAW,CAACP,MAAD,EAAwBE,IAA0B,GAAG,EAArD,EAAyD;AAClE,UAAM;AAACqD,MAAAA,UAAU,EAAE;AAAb,KAAN;;AADkE;;AAGlE,UAAMC,UAAU,GAAI,UAAUC,CAAV,EAAsC;AACxD,aAAO,gBAAgBC,CAAhB,EAAuC;AAC5CD,QAAAA,CAAC,CAACP,IAAF,CAAOQ,CAAP;AACD,OAFD;AAGD,KAJkB,CAIhB,IAJgB,CAAnB;;AAMA,SAAKC,MAAL,GAAc,IAAI7D,aAAJ,CACZE,MADY,EAEZ,IAAIM,qBAAJ,CAA0BN,MAA1B,EAAkCwD,UAAlC,EAA8C,YAAY,CAAE,CAA5D,EAA8D,CAA9D,EAAiEtD,IAAjE,CAFY,EAGZA,IAHY,CAAd;AAKD;;AAGD0D,EAAAA,UAAU,CAAC5C,GAAD,EAAW6C,QAAX,EAA6BzC,QAA7B,EAA2E;AACnF,QAAIJ,GAAJ,EAAS;AACP,aAAO,KAAK2C,MAAL,CAAY5C,SAAZ,CAAsBC,GAAtB,EAA2B8C,IAA3B,CAAgC1C,QAAhC,CAAP;AACD;;AACDA,IAAAA,QAAQ;AACR,WAAO2C,OAAO,CAACC,OAAR,EAAP;AACD;;AAGW,QAANC,MAAM,CAAC7C,QAAD,EAAgC;AAC1C,UAAM,KAAKuC,MAAL,CAAY7C,KAAZ,CAAkBM,QAAlB,CAAN;AACD;;AA/BkD;;AAqCrD,SAAS8C,YAAT,CACEC,IADF,EAEEN,QAFF,EAGEO,MAHF,EAIElE,IAJF,EAKE;AACA,MAAI,EAAE2D,QAAQ,IAAI7F,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIiD,KAAJ,6BAA+B4C,QAA/B,EAAN;AACD;;AACD,SAAO7F,cAAc,CAAC6F,QAAD,CAAd,CAAyBK,YAAzB,CAAsCC,IAAtC,EAA4CC,MAA5C,EAAoDlE,IAApD,CAAP;AACD;;AAKD,eAAemE,cAAf,CACEC,MADF,EAEEC,IAFF,EAOG;AAED,MAAIC,UAAU,GAAG7B,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,IAAAA,UAAU,GAAGN,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACI,OAAlD,EAA2D;AAClFC,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACI,SAAR;AAD6D,KAA3D,CAAzB;AAID;;AAED,MAAIG,UAAU,GAAGlC,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACQ,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,UAAU,GAAGX,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACQ,OAAlD,EAA2D;AAClFH,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACQ,SAAR;AAD6D,KAA3D,CAAzB;AAID;;AAGD,QAAME,SAAS,GAAGd,YAAY,CAACI,MAAM,CAACW,aAAR,EAAwBX,MAAM,CAACT,QAA/B,EAA0CU,IAAI,CAACH,MAA/C,EAAuD;AACnFc,IAAAA,UAAU,EAAEZ,MAAM,CAACY,UADgE;AAEnFN,IAAAA,QAAQ,EAAEN,MAAM,CAACY;AAFkE,GAAvD,CAA9B;AAKA,QAAMC,OAAO,GAAGxC,MAAM,CAACyC,MAAP,CAAc,CAACZ,UAAD,EAAaK,UAAb,EAAyBG,SAAzB,CAAd,CAAhB;AAGA,QAAMK,aAAa,GAAG,MAAMpH,WAAW,CAACqH,OAAZ,CAAoBhB,MAAM,CAACiB,WAA3B,EAAyCJ,OAAzC,CAA5B;AAGA,QAAMK,MAAM,GAAG,IAAI3G,UAAJ,CAAe;AAC5BsF,IAAAA,IAAI,EAAErF,QAAQ,CAAC2G,SADa;AAE5BC,IAAAA,gBAAgB,EAAE,IAAInH,cAAJ,CAAmB;AACnCoH,MAAAA,UAAU,EAAEpB,IAAI,CAACqB,KADkB;AAEnC/B,MAAAA,QAAQ,EAAEpF,QAAQ,CAAC6F,MAAM,CAACT,QAAR,CAFiB;AAGnCgC,MAAAA,yBAAyB,EAAEpH,QAAQ,CAACoB,sBAAD,CAHA;AAInCiG,MAAAA,yBAAyB,EAAErH,QAAQ,CAACoB,sBAAD;AAJA,KAAnB,CAFU;AAQ5BkG,IAAAA,sBAAsB,EAAEZ,OAAO,CAACzC,MARJ;AAS5BsD,IAAAA,oBAAoB,EAAEX,aAAa,CAAC3C;AATR,GAAf,CAAf;AAaA,QAAMuD,SAAS,GAAG3G,eAAe,CAACkG,MAAD,CAAjC;AACA,QAAMU,IAAI,GAAGvD,MAAM,CAACyC,MAAP,CAAc,CAACa,SAAD,EAAYZ,aAAZ,CAAd,CAAb;AAEA,SAAO;AAACG,IAAAA,MAAD;AAASW,IAAAA,UAAU,EAAEF,SAAS,CAACvD,MAA/B;AAAuCwD,IAAAA;AAAvC,GAAP;AACD;;AAKD,eAAeE,gBAAf,CACE9B,MADF,EAEEC,IAFF,EAGEpD,QAHF,EAQG;AAED,QAAM6D,SAAS,GAAGd,YAAY,CAACI,MAAM,CAACW,aAAR,EAAwBX,MAAM,CAACT,QAA/B,EAA0CU,IAAI,CAACH,MAA/C,EAAuD;AACnFc,IAAAA,UAAU,EAAEZ,MAAM,CAACY,UADgE;AAEnFN,IAAAA,QAAQ,EAAEN,MAAM,CAACY;AAFkE,GAAvD,CAA9B;AAMA,QAAMG,aAAa,GAAG,MAAMpH,WAAW,CAACqH,OAAZ,CAAoBhB,MAAM,CAACiB,WAA3B,EAAyCP,SAAzC,CAA5B;AAGA,MAAIR,UAAU,GAAG7B,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,IAAAA,UAAU,GAAGN,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACI,OAAlD,EAA2D;AAClFC,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACI,SAAR,CAD6D;AAElF2B,MAAAA,eAAe,EAAE;AAFiE,KAA3D,CAAzB;AAID;;AAED,MAAIxB,UAAU,GAAGlC,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAjB;;AACA,MAAIH,MAAM,CAACQ,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,UAAU,GAAGX,YAAY,CAACtE,kBAAD,EAAqBC,sBAArB,EAA6C0E,IAAI,CAACQ,OAAlD,EAA2D;AAClFH,MAAAA,QAAQ,EAAEvF,WAAW,CAACiF,MAAM,CAACQ,SAAR,CAD6D;AAElFuB,MAAAA,eAAe,EAAE;AAFiE,KAA3D,CAAzB;AAID;;AAGD,QAAMb,MAAM,GAAG,IAAI3G,UAAJ,CAAe;AAC5BsF,IAAAA,IAAI,EAAErF,QAAQ,CAACwH,YADa;AAE5BC,IAAAA,mBAAmB,EAAE,IAAI/H,gBAAJ,CAAqB;AACxCmH,MAAAA,UAAU,EAAEpB,IAAI,CAACqB,KADuB;AAExCY,MAAAA,SAAS,EAAEjC,IAAI,CAACqB,KAAL,GAAarB,IAAI,CAACH,MAAL,CAAY1B,MAFI;AAGxC+D,MAAAA,QAAQ,EAAEtF,QAH8B;AAIxC0C,MAAAA,QAAQ,EAAEpF,QAAQ,CAAC6F,MAAM,CAACT,QAAR,CAJsB;AAKxC6C,MAAAA,6BAA6B,EAAE7B,UAAU,CAACnC,MALF;AAMxCiE,MAAAA,6BAA6B,EAAEnC,UAAU,CAAC9B,MANF;AAOxCkE,MAAAA,aAAa,EAAEtC,MAAM,CAACiB,WAAP,KAAuB;AAPE,KAArB,CAFO;AAW5BQ,IAAAA,sBAAsB,EAAEvB,UAAU,CAAC9B,MAAX,GAAoBmC,UAAU,CAACnC,MAA/B,GAAwCsC,SAAS,CAACtC,MAX9C;AAY5BsD,IAAAA,oBAAoB,EAAExB,UAAU,CAAC9B,MAAX,GAAoBmC,UAAU,CAACnC,MAA/B,GAAwC2C,aAAa,CAAC3C;AAZhD,GAAf,CAAf;AAgBA,QAAMuD,SAAS,GAAG3G,eAAe,CAACkG,MAAD,CAAjC;AACA,QAAMU,IAAI,GAAGvD,MAAM,CAACyC,MAAP,CAAc,CAACa,SAAD,EAAYzB,UAAZ,EAAwBK,UAAxB,EAAoCQ,aAApC,CAAd,CAAb;AACA,SAAO;AAACG,IAAAA,MAAD;AAASW,IAAAA,UAAU,EAAEF,SAAS,CAACvD,MAA/B;AAAuCwD,IAAAA;AAAvC,GAAP;AACD;;AAKD,eAAeW,iBAAf,CACEvC,MADF,EAEEwC,MAFF,EAGE3E,MAHF,EAIEjC,IAJF,EASG;AACD,QAAMqE,IAAI,GAAGuC,MAAM,CAACC,UAAP,CAAkBzC,MAAM,CAACrE,IAAP,CAAY+G,IAAZ,EAAlB,CAAb;AACA,QAAM/D,UAAU,GAAG,CAAC/C,IAAI,CAAC+C,UAAL,IAAmB,CAApB,IAAyBd,MAA5C;AAGA,MAAI8E,OAAJ;AAEA,MAAIC,uBAAuB,GAAG,CAA9B;AAEA,MAAIC,qBAAqB,GAAG,CAA5B;AACA;AACE,UAAMC,MAAM,GAAGlH,IAAI,CAACoC,aAAL,GACX,MAAM8D,gBAAgB,CAAC9B,MAAD,EAASC,IAAT,EAAeuC,MAAM,CAAC3F,QAAtB,CADX,GAEX,MAAMkD,cAAc,CAACC,MAAD,EAASC,IAAT,CAFxB;AAIA0C,IAAAA,OAAO,GAAGG,MAAM,CAAClB,IAAjB;AACAgB,IAAAA,uBAAuB,IAAIE,MAAM,CAAC5B,MAAP,CAAcO,sBAAd,GAAuCqB,MAAM,CAACjB,UAAzE;AACAgB,IAAAA,qBAAqB,IAAIC,MAAM,CAAC5B,MAAP,CAAcQ,oBAAd,GAAqCoB,MAAM,CAACjB,UAArE;AACD;AAMD,QAAMhD,QAAQ,GAAG,IAAI/E,cAAJ,CAAmB;AAClCiJ,IAAAA,cAAc,EAAE/C,MAAM,CAACrE,IADW;AAElC0F,IAAAA,UAAU,EAAEpB,IAAI,CAACqB,KAFiB;AAGlC0B,IAAAA,gBAAgB,EAAErE,UAHgB;AAIlCsE,IAAAA,SAAS,EAAE,EAJuB;AAKlCL,IAAAA,uBALkC;AAMlCC,IAAAA,qBANkC;AAOlChD,IAAAA,IAAI,EAAElF,IAAI,CAACqF,MAAM,CAACW,aAAR,CAPwB;AAQlCuC,IAAAA,KAAK,EAAEnJ,gBAAgB,CAACiG,MAAM,CAACiB,WAAR;AARW,GAAnB,CAAjB;AAYApC,EAAAA,QAAQ,CAACoE,SAAT,CAAmBrE,IAAnB,CAAwBzE,QAAQ,CAACoB,sBAAD,CAAhC;AACAsD,EAAAA,QAAQ,CAACoE,SAAT,CAAmBrE,IAAnB,CAAwBzE,QAAQ,CAAC6F,MAAM,CAACT,QAAR,CAAhC;AAGA,QAAM4D,cAAc,GAAGxE,UAAU,GAAGgE,OAAO,CAACvE,MAA5C;AACA,QAAMU,IAAI,GAAGT,MAAM,CAACyC,MAAP,CAAc,CAAC6B,OAAD,EAAU3H,eAAe,CAAC6D,QAAD,CAAzB,CAAd,CAAb;AACA,SAAO;AAACC,IAAAA,IAAD;AAAOD,IAAAA,QAAP;AAAiBsE,IAAAA;AAAjB,GAAP;AACD;;AAKD,eAAezE,cAAf,CACEhD,MADF,EAEEuE,IAFF,EAGErE,IAHF,EAOG;AACD,QAAMiD,QAAQ,GAAG,IAAIpE,QAAJ,CAAa;AAC5B0H,IAAAA,QAAQ,EAAElC,IAAI,CAACpD,QADa;AAE5BuG,IAAAA,OAAO,EAAE,EAFmB;AAG5BC,IAAAA,eAAe,EAAE;AAHW,GAAb,CAAjB;AAMA,MAAIvE,IAAI,GAAGT,MAAM,CAAC8B,KAAP,CAAa,CAAb,CAAX;;AACA,OAAK,MAAMmD,KAAX,IAAoB5H,MAAM,CAAC6H,SAA3B,EAAsC;AACpC,QAAID,KAAK,CAACE,QAAV,EAAoB;AAClB;AACD;;AAED,UAAMC,UAAU,GAAG,MAAMlB,iBAAiB,CAACe,KAAD,EAAQrD,IAAR,EAAcnB,IAAI,CAACV,MAAnB,EAA2BxC,IAA3B,CAA1C;AAEA,UAAM8H,MAAM,GAAG,IAAI7J,WAAJ,CAAgB;AAC7B8J,MAAAA,WAAW,EAAEF,UAAU,CAACN,cADK;AAE7BS,MAAAA,SAAS,EAAEH,UAAU,CAAC5E;AAFO,KAAhB,CAAf;AAKAA,IAAAA,QAAQ,CAACuE,OAAT,CAAiBxE,IAAjB,CAAsB8E,MAAtB;AACA7E,IAAAA,QAAQ,CAACwE,eAAT,GAA2B,IAAIpI,KAAJ,CAAU4I,MAAM,CAAChF,QAAQ,CAACwE,eAAV,CAAN,GAAmCI,UAAU,CAAC3E,IAAX,CAAgBV,MAA7D,CAA3B;AAEAU,IAAAA,IAAI,GAAGT,MAAM,CAACyC,MAAP,CAAc,CAAChC,IAAD,EAAO2E,UAAU,CAAC3E,IAAlB,CAAd,CAAP;AACD;;AAED,SAAO;AAACA,IAAAA,IAAD;AAAOD,IAAAA;AAAP,GAAP;AACD;;AAKD,SAASE,YAAT,CACErD,MADF,EAEEmB,QAFF,EAGEiB,SAHF,EAIEzB,YAJF,EAKU;AACR,QAAMwC,QAAQ,GAAG,IAAIxE,YAAJ,CAAiB;AAChCyJ,IAAAA,OAAO,EAAE3I,eADuB;AAEhC4I,IAAAA,UAAU,EAAE,UAFoB;AAGhC5B,IAAAA,QAAQ,EAAEtF,QAHsB;AAIhCmH,IAAAA,UAAU,EAAElG,SAJoB;AAKhCpC,IAAAA,MAAM,EAAE,EALwB;AAMhCuI,IAAAA,kBAAkB,EAAE;AANY,GAAjB,CAAjB;;AASA,OAAK,MAAMhH,GAAX,IAAkBZ,YAAlB,EAAgC;AAAA;;AAC9B,UAAM6H,EAAE,GAAG,IAAI5J,QAAJ,CAAa;AACtB2C,MAAAA,GADsB;AAEtBC,MAAAA,KAAK,EAAEb,YAAY,CAACY,GAAD;AAFG,KAAb,CAAX;AAIA,6BAAA4B,QAAQ,CAACoF,kBAAT,0GAA6BrF,IAA7B,8GAAoCsF,EAApC;AACD;;AAED;AACE,UAAMC,UAAU,GAAG,IAAIzJ,aAAJ,CAAkB;AACnC0J,MAAAA,IAAI,EAAE,MAD6B;AAEnCC,MAAAA,YAAY,EAAEC,MAAM,CAACC,IAAP,CAAY7I,MAAM,CAAC8I,MAAnB,EAA2BpG;AAFN,KAAlB,CAAnB;AAIAS,IAAAA,QAAQ,CAACnD,MAAT,CAAgBkD,IAAhB,CAAqBuF,UAArB;AACD;;AAED,OAAK,MAAMb,KAAX,IAAoB5H,MAAM,CAAC6H,SAA3B,EAAsC;AACpC,UAAMkB,IAAI,GAAGrK,mBAAmB,CAACkJ,KAAK,CAACoB,cAAP,CAAhC;AACA,UAAMC,UAAU,GAAG,IAAIjK,aAAJ,CAAkB;AACnC0J,MAAAA,IAAI,EAAEd,KAAK,CAACc,IADuB;AAEnCQ,MAAAA,eAAe,EAAEH;AAFkB,KAAlB,CAAnB;;AAKA,QAAInB,KAAK,CAACE,QAAV,EAAoB;AAClBmB,MAAAA,UAAU,CAACN,YAAX,GAA0Bf,KAAK,CAACuB,UAAhC;AACD,KAFD,MAEO;AACLF,MAAAA,UAAU,CAAC9E,IAAX,GAAkBlF,IAAI,CAAC2I,KAAK,CAAC3C,aAAP,CAAtB;AACD;;AAED,QAAI2C,KAAK,CAACwB,YAAV,EAAwB;AACtBH,MAAAA,UAAU,CAACI,cAAX,GAA4B/K,aAAa,CAACsJ,KAAK,CAACwB,YAAP,CAAzC;AACD;;AAEDH,IAAAA,UAAU,CAACK,WAAX,GAAyB1B,KAAK,CAAC1C,UAA/B;AAEA/B,IAAAA,QAAQ,CAACnD,MAAT,CAAgBkD,IAAhB,CAAqB+F,UAArB;AACD;;AAED,QAAMM,eAAe,GAAGjK,eAAe,CAAC6D,QAAD,CAAvC;AACA,QAAMqG,aAAa,GAAG7G,MAAM,CAAC8B,KAAP,CAAa8E,eAAe,CAAC7G,MAAhB,GAAyB,CAAtC,CAAtB;AACA6G,EAAAA,eAAe,CAACE,IAAhB,CAAqBD,aAArB;AACAA,EAAAA,aAAa,CAACE,aAAd,CAA4BH,eAAe,CAAC7G,MAA5C,EAAoD6G,eAAe,CAAC7G,MAApE;AACA8G,EAAAA,aAAa,CAACtH,KAAd,CAAoB1C,aAApB,EAAmC+J,eAAe,CAAC7G,MAAhB,GAAyB,CAA5D;AACA,SAAO8G,aAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {Transform, Writable} from 'stream';\nimport {ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport * as Compression from '../compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetData,\n ParquetField,\n PrimitiveType\n} from '../schema/declare';\nimport {ParquetSchema} from '../schema/schema';\nimport * as Shred from '../schema/shred';\nimport {\n ColumnChunk,\n ColumnMetaData,\n CompressionCodec,\n ConvertedType,\n DataPageHeader,\n DataPageHeaderV2,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n KeyValue,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {osopen, oswrite, osclose} from '../utils/file-utils';\nimport {getBitWidth, serializeThrift} from '../utils/read-utils';\nimport Int64 from 'node-int64';\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Default Page and Row Group sizes\n */\nconst PARQUET_DEFAULT_PAGE_SIZE = 8192;\nconst PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;\n\n/**\n * Repetition and Definition Level Encoding\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\nexport interface ParquetWriterOptions {\n baseOffset?: number;\n rowGroupSize?: number;\n pageSize?: number;\n useDataPageV2?: boolean;\n\n // Write Stream Options\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\n/**\n * Write a parquet file to an output stream. The ParquetWriter will perform\n * buffering/batching for performance, so close() must be called after all rows\n * are written.\n */\n// eslint-disable-next-line @typescript-eslint/no-unused-vars\nexport class ParquetWriter<T> {\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified file\n */\n static async openFile<T>(\n schema: ParquetSchema,\n path: string,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n const outputStream = await osopen(path, opts);\n return ParquetWriter.openStream(schema, outputStream, opts);\n }\n\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified stream\n */\n static async openStream<T>(\n schema: ParquetSchema,\n outputStream: Writable,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n if (!opts) {\n // tslint:disable-next-line:no-parameter-reassignment\n opts = {};\n }\n\n const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);\n\n return new ParquetWriter(schema, envelopeWriter, opts);\n }\n\n public schema: ParquetSchema;\n public envelopeWriter: ParquetEnvelopeWriter;\n public rowBuffer: ParquetBuffer;\n public rowGroupSize: number;\n public closed: boolean;\n public userMetadata: Record<string, string>;\n\n /**\n * Create a new buffered parquet writer for a given envelope writer\n */\n constructor(\n schema: ParquetSchema,\n envelopeWriter: ParquetEnvelopeWriter,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.envelopeWriter = envelopeWriter;\n // @ts-ignore Row buffer typings...\n this.rowBuffer = {};\n this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;\n this.closed = false;\n this.userMetadata = {};\n\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n this.writeHeader();\n }\n\n async writeHeader(): Promise<void> {\n // TODO - better not mess with promises in the constructor\n try {\n await this.envelopeWriter.writeHeader();\n } catch (err) {\n await this.envelopeWriter.close();\n throw err;\n }\n }\n\n /**\n * Append a single row to the parquet file. Rows are buffered in memory until\n * rowGroupSize rows are in the buffer or close() is called\n */\n async appendRow<T>(row: T): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n Shred.shredRecord(this.schema, row, this.rowBuffer);\n if (this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n }\n\n /**\n * Finish writing the parquet file and commit the footer to disk. This method\n * MUST be called after you are finished adding rows. You must not call this\n * method twice on the same object or add any rows after the close() method has\n * been called\n */\n async close(callback?: () => void): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n\n this.closed = true;\n\n if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n\n await this.envelopeWriter.writeFooter(this.userMetadata);\n await this.envelopeWriter.close();\n // this.envelopeWriter = null;\n\n if (callback) {\n callback();\n }\n }\n\n /**\n * Add key<>value metadata to the file\n */\n setMetadata(key: string, value: string): void {\n // TODO: value to be any, obj -> JSON\n this.userMetadata[String(key)] = String(value);\n }\n\n /**\n * Set the parquet row group size. This values controls the maximum number\n * of rows that are buffered in memory at any given time as well as the number\n * of rows that are co-located on disk. A higher value is generally better for\n * read-time I/O performance at the tradeoff of write-time memory usage.\n */\n setRowGroupSize(cnt: number): void {\n this.rowGroupSize = cnt;\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.envelopeWriter.setPageSize(cnt);\n }\n}\n\n/**\n * Create a parquet file from a schema and a number of row groups. This class\n * performs direct, unbuffered writes to the underlying output stream and is\n * intendend for advanced and internal users; the writeXXX methods must be\n * called in the correct order to produce a valid file.\n */\nexport class ParquetEnvelopeWriter {\n /**\n * Create a new parquet envelope writer that writes to the specified stream\n */\n static async openStream(\n schema: ParquetSchema,\n outputStream: Writable,\n opts: ParquetWriterOptions\n ): Promise<ParquetEnvelopeWriter> {\n const writeFn = oswrite.bind(undefined, outputStream);\n const closeFn = osclose.bind(undefined, outputStream);\n return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);\n }\n\n public schema: ParquetSchema;\n public write: (buf: Buffer) => Promise<void>;\n public close: () => Promise<void>;\n public offset: number;\n public rowCount: number;\n public rowGroups: RowGroup[];\n public pageSize: number;\n public useDataPageV2: boolean;\n\n constructor(\n schema: ParquetSchema,\n writeFn: (buf: Buffer) => Promise<void>,\n closeFn: () => Promise<void>,\n fileOffset: number,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.write = writeFn;\n this.close = closeFn;\n this.offset = fileOffset;\n this.rowCount = 0;\n this.rowGroups = [];\n this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;\n this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;\n }\n\n writeSection(buf: Buffer): Promise<void> {\n this.offset += buf.length;\n return this.write(buf);\n }\n\n /**\n * Encode the parquet file header\n */\n writeHeader(): Promise<void> {\n return this.writeSection(Buffer.from(PARQUET_MAGIC));\n }\n\n /**\n * Encode a parquet row group. The records object should be created using the\n * shredRecord method\n */\n async writeRowGroup(records: ParquetBuffer): Promise<void> {\n const rgroup = await encodeRowGroup(this.schema, records, {\n baseOffset: this.offset,\n pageSize: this.pageSize,\n useDataPageV2: this.useDataPageV2\n });\n\n this.rowCount += records.rowCount;\n this.rowGroups.push(rgroup.metadata);\n return await this.writeSection(rgroup.body);\n }\n\n /**\n * Write the parquet file footer\n */\n writeFooter(userMetadata: Record<string, string>): Promise<void> {\n if (!userMetadata) {\n // tslint:disable-next-line:no-parameter-reassignment\n userMetadata = {};\n }\n\n return this.writeSection(\n encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)\n );\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.pageSize = cnt;\n }\n}\n\n/**\n * Create a parquet transform stream\n */\nexport class ParquetTransformer<T> extends Transform {\n public writer: ParquetWriter<T>;\n\n constructor(schema: ParquetSchema, opts: ParquetWriterOptions = {}) {\n super({objectMode: true});\n\n const writeProxy = (function (t: ParquetTransformer<any>) {\n return async function (b: any): Promise<void> {\n t.push(b);\n };\n })(this);\n\n this.writer = new ParquetWriter(\n schema,\n new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),\n opts\n );\n }\n\n // tslint:disable-next-line:function-name\n _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {\n if (row) {\n return this.writer.appendRow(row).then(callback);\n }\n callback();\n return Promise.resolve();\n }\n\n // tslint:disable-next-line:function-name\n async _flush(callback: (val?: any) => void) {\n await this.writer.close(callback);\n }\n}\n\n/**\n * Encode a consecutive array of data using one of the parquet encodings\n */\nfunction encodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n values: any[],\n opts: ParquetCodecOptions\n) {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].encodeValues(type, values, opts);\n}\n\n/**\n * Encode a parquet data page\n */\nasync function encodeDataPage(\n column: ParquetField,\n data: ParquetData\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax)\n // disableEnvelope: false\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax)\n // disableEnvelope: false\n });\n }\n\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, dataBuf);\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE,\n data_page_header: new DataPageHeader({\n num_values: data.count,\n encoding: Encoding[column.encoding!] as any,\n definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],\n repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]\n }),\n uncompressed_page_size: dataBuf.length,\n compressed_page_size: compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, compressedBuf]);\n\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode a parquet data page (v2)\n */\nasync function encodeDataPageV2(\n column: ParquetField,\n data: ParquetData,\n rowCount: number\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, valuesBuf);\n\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n }\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE_V2,\n data_page_header_v2: new DataPageHeaderV2({\n num_values: data.count,\n num_nulls: data.count - data.values.length,\n num_rows: rowCount,\n encoding: Encoding[column.encoding!] as any,\n definition_levels_byte_length: dLevelsBuf.length,\n repetition_levels_byte_length: rLevelsBuf.length,\n is_compressed: column.compression !== 'UNCOMPRESSED'\n }),\n uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,\n compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode an array of values into a parquet column chunk\n */\nasync function encodeColumnChunk(\n column: ParquetField,\n buffer: ParquetBuffer,\n offset: number,\n opts: ParquetWriterOptions\n): Promise<{\n body: Buffer;\n metadata: ColumnMetaData;\n metadataOffset: number;\n}> {\n const data = buffer.columnData[column.path.join()];\n const baseOffset = (opts.baseOffset || 0) + offset;\n /* encode data page(s) */\n // const pages: Buffer[] = [];\n let pageBuf: Buffer;\n // tslint:disable-next-line:variable-name\n let total_uncompressed_size = 0;\n // tslint:disable-next-line:variable-name\n let total_compressed_size = 0;\n {\n const result = opts.useDataPageV2\n ? await encodeDataPageV2(column, data, buffer.rowCount)\n : await encodeDataPage(column, data);\n // pages.push(result.page);\n pageBuf = result.page;\n total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;\n total_compressed_size += result.header.compressed_page_size + result.headerSize;\n }\n\n // const pagesBuf = Buffer.concat(pages);\n // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;\n\n /* prepare metadata header */\n const metadata = new ColumnMetaData({\n path_in_schema: column.path,\n num_values: data.count,\n data_page_offset: baseOffset,\n encodings: [],\n total_uncompressed_size, // : pagesBuf.length,\n total_compressed_size,\n type: Type[column.primitiveType!],\n codec: CompressionCodec[column.compression!]\n });\n\n /* list encodings */\n metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);\n metadata.encodings.push(Encoding[column.encoding!]);\n\n /* concat metadata header and data pages */\n const metadataOffset = baseOffset + pageBuf.length;\n const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);\n return {body, metadata, metadataOffset};\n}\n\n/**\n * Encode a list of column values into a parquet row group\n */\nasync function encodeRowGroup(\n schema: ParquetSchema,\n data: ParquetBuffer,\n opts: ParquetWriterOptions\n): Promise<{\n body: Buffer;\n metadata: RowGroup;\n}> {\n const metadata = new RowGroup({\n num_rows: data.rowCount,\n columns: [],\n total_byte_size: 0\n });\n\n let body = Buffer.alloc(0);\n for (const field of schema.fieldList) {\n if (field.isNested) {\n continue; // eslint-disable-line no-continue\n }\n\n const cchunkData = await encodeColumnChunk(field, data, body.length, opts);\n\n const cchunk = new ColumnChunk({\n file_offset: cchunkData.metadataOffset,\n meta_data: cchunkData.metadata\n });\n\n metadata.columns.push(cchunk);\n metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);\n\n body = Buffer.concat([body, cchunkData.body]);\n }\n\n return {body, metadata};\n}\n\n/**\n * Encode a parquet file metadata footer\n */\nfunction encodeFooter(\n schema: ParquetSchema,\n rowCount: number,\n rowGroups: RowGroup[],\n userMetadata: Record<string, string>\n): Buffer {\n const metadata = new FileMetaData({\n version: PARQUET_VERSION,\n created_by: 'parquets',\n num_rows: rowCount,\n row_groups: rowGroups,\n schema: [],\n key_value_metadata: []\n });\n\n for (const key in userMetadata) {\n const kv = new KeyValue({\n key,\n value: userMetadata[key]\n });\n metadata.key_value_metadata?.push?.(kv);\n }\n\n {\n const schemaRoot = new SchemaElement({\n name: 'root',\n num_children: Object.keys(schema.fields).length\n });\n metadata.schema.push(schemaRoot);\n }\n\n for (const field of schema.fieldList) {\n const relt = FieldRepetitionType[field.repetitionType];\n const schemaElem = new SchemaElement({\n name: field.name,\n repetition_type: relt as any\n });\n\n if (field.isNested) {\n schemaElem.num_children = field.fieldCount;\n } else {\n schemaElem.type = Type[field.primitiveType!] as Type;\n }\n\n if (field.originalType) {\n schemaElem.converted_type = ConvertedType[field.originalType] as ConvertedType;\n }\n\n schemaElem.type_length = field.typeLength;\n\n metadata.schema.push(schemaElem);\n }\n\n const metadataEncoded = serializeThrift(metadata);\n const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);\n metadataEncoded.copy(footerEncoded);\n footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);\n footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);\n return footerEncoded;\n}\n"],"file":"writer.js"}
@@ -51,7 +51,7 @@ export async function decodePage(cursor, options) {
51
51
  const {
52
52
  pageHeader,
53
53
  length
54
- } = await decodePageHeader(cursor.buffer, cursor.offset);
54
+ } = decodePageHeader(cursor.buffer, cursor.offset);
55
55
  cursor.offset += length;
56
56
  const pageType = getThriftEnum(PageType, pageHeader.type);
57
57
 
@@ -72,7 +72,7 @@ export async function decodePage(cursor, options) {
72
72
  break;
73
73
 
74
74
  default:
75
- throw new Error(`invalid page type: ${pageType}`);
75
+ throw new Error("invalid page type: ".concat(pageType));
76
76
  }
77
77
 
78
78
  return page;
@@ -121,7 +121,7 @@ export function decodeSchema(schemaElements, offset, len) {
121
121
 
122
122
  switch (logicalType) {
123
123
  case 'DECIMAL':
124
- logicalType = `${logicalType}_${type}`;
124
+ logicalType = "".concat(logicalType, "_").concat(type);
125
125
  break;
126
126
 
127
127
  default:
@@ -148,7 +148,7 @@ export function decodeSchema(schemaElements, offset, len) {
148
148
 
149
149
  function decodeValues(type, encoding, cursor, count, opts) {
150
150
  if (!(encoding in PARQUET_CODECS)) {
151
- throw new Error(`invalid encoding: ${encoding}`);
151
+ throw new Error("invalid encoding: ".concat(encoding));
152
152
  }
153
153
 
154
154
  return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/parser/decoders.ts"],"names":["PARQUET_CODECS","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","decompress","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","decodePageHeader","getThriftEnum","getBitWidth","decodeDataPages","buffer","options","cursor","offset","size","length","data","rlevels","dlevels","values","pageHeaders","count","dictionary","numValues","Number","page","decodePage","map","value","index","push","undefined","pageHeader","pageType","type","decodeDataPage","decodeDataPageV2","decodeDictionaryPage","Error","decodeSchema","schemaElements","len","schema","next","i","schemaElement","repetitionType","repetition_type","optional","repeated","num_children","res","name","fields","logicalType","converted_type","typeLength","type_length","presision","precision","scale","decodeValues","encoding","opts","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","compression","valuesBuf","slice","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","Array","column","rLevelMax","bitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","decodeOptions","primitiveType","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","dictCursor","dictionary_page_header","d","toString"],"mappings":"AAUA,SAA2CA,cAA3C,QAAgE,WAAhE;AACA,SACEC,aADF,EAEEC,QAFF,EAGEC,mBAHF,EAKEC,QALF,EAOEC,IAPF,QAQO,mBARP;AASA,SAAQC,UAAR,QAAyB,gBAAzB;AACA,SAAQC,kBAAR,EAA4BC,sBAA5B,QAAyD,iBAAzD;AACA,SAAQC,gBAAR,EAA0BC,aAA1B,EAAyCC,WAAzC,QAA2D,qBAA3D;AASA,OAAO,eAAeC,eAAf,CACLC,MADK,EAELC,OAFK,EAGiB;AACtB,QAAMC,MAAoB,GAAG;AAC3BF,IAAAA,MAD2B;AAE3BG,IAAAA,MAAM,EAAE,CAFmB;AAG3BC,IAAAA,IAAI,EAAEJ,MAAM,CAACK;AAHc,GAA7B;AAMA,QAAMC,IAAiB,GAAG;AACxBC,IAAAA,OAAO,EAAE,EADe;AAExBC,IAAAA,OAAO,EAAE,EAFe;AAGxBC,IAAAA,MAAM,EAAE,EAHgB;AAIxBC,IAAAA,WAAW,EAAE,EAJW;AAKxBC,IAAAA,KAAK,EAAE;AALiB,GAA1B;AAQA,MAAIC,UAAU,GAAGX,OAAO,CAACW,UAAR,IAAsB,EAAvC;;AAEA,SAEEV,MAAM,CAACC,MAAP,GAAgBD,MAAM,CAACE,IAAvB,KACC,CAACH,OAAO,CAACY,SAAT,IAAsBP,IAAI,CAACE,OAAL,CAAaH,MAAb,GAAsBS,MAAM,CAACb,OAAO,CAACY,SAAT,CADnD,CAFF,EAIE;AAEA,UAAME,IAAI,GAAG,MAAMC,UAAU,CAACd,MAAD,EAASD,OAAT,CAA7B;;AAEA,QAAIc,IAAI,CAACH,UAAT,EAAqB;AACnBA,MAAAA,UAAU,GAAGG,IAAI,CAACH,UAAlB;AAEA;AACD;;AAED,QAAIA,UAAU,CAACP,MAAf,EAAuB;AAErBU,MAAAA,IAAI,CAACN,MAAL,GAAcM,IAAI,CAACN,MAAL,CAAYQ,GAAZ,CAAiBC,KAAD,IAAWN,UAAU,CAACM,KAAD,CAArC,CAAd;AACD;;AAED,SAAK,IAAIC,KAAK,GAAG,CAAjB,EAAoBA,KAAK,GAAGJ,IAAI,CAACR,OAAL,CAAaF,MAAzC,EAAiDc,KAAK,EAAtD,EAA0D;AACxDb,MAAAA,IAAI,CAACC,OAAL,CAAaa,IAAb,CAAkBL,IAAI,CAACR,OAAL,CAAaY,KAAb,CAAlB;AACAb,MAAAA,IAAI,CAACE,OAAL,CAAaY,IAAb,CAAkBL,IAAI,CAACP,OAAL,CAAaW,KAAb,CAAlB;AACA,YAAMD,KAAK,GAAGH,IAAI,CAACN,MAAL,CAAYU,KAAZ,CAAd;;AAEA,UAAID,KAAK,KAAKG,SAAd,EAAyB;AACvBf,QAAAA,IAAI,CAACG,MAAL,CAAYW,IAAZ,CAAiBF,KAAjB;AACD;AACF;;AAEDZ,IAAAA,IAAI,CAACK,KAAL,IAAcI,IAAI,CAACJ,KAAnB;AACAL,IAAAA,IAAI,CAACI,WAAL,CAAiBU,IAAjB,CAAsBL,IAAI,CAACO,UAA3B;AACD;;AAED,SAAOhB,IAAP;AACD;AAOD,OAAO,eAAeU,UAAf,CACLd,MADK,EAELD,OAFK,EAGqB;AAC1B,MAAIc,IAAJ;AACA,QAAM;AAACO,IAAAA,UAAD;AAAajB,IAAAA;AAAb,MAAuB,MAAMT,gBAAgB,CAACM,MAAM,CAACF,MAAR,EAAgBE,MAAM,CAACC,MAAvB,CAAnD;AACAD,EAAAA,MAAM,CAACC,MAAP,IAAiBE,MAAjB;AAEA,QAAMkB,QAAQ,GAAG1B,aAAa,CAACN,QAAD,EAAW+B,UAAU,CAACE,IAAtB,CAA9B;;AAEA,UAAQD,QAAR;AACE,SAAK,WAAL;AACER,MAAAA,IAAI,GAAG,MAAMU,cAAc,CAACvB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAA3B;AACA;;AACF,SAAK,cAAL;AACEc,MAAAA,IAAI,GAAG,MAAMW,gBAAgB,CAACxB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAA7B;AACA;;AACF,SAAK,iBAAL;AACEc,MAAAA,IAAI,GAAG;AACLH,QAAAA,UAAU,EAAE,MAAMe,oBAAoB,CAACzB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CADjC;AAELqB,QAAAA;AAFK,OAAP;AAIA;;AACF;AACE,YAAM,IAAIM,KAAJ,CAAW,sBAAqBL,QAAS,EAAzC,CAAN;AAdJ;;AAiBA,SAAOR,IAAP;AACD;AAYD,OAAO,SAASc,YAAT,CACLC,cADK,EAEL3B,MAFK,EAGL4B,GAHK,EAQL;AACA,QAAMC,MAAwB,GAAG,EAAjC;AACA,MAAIC,IAAI,GAAG9B,MAAX;;AACA,OAAK,IAAI+B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,GAApB,EAAyBG,CAAC,EAA1B,EAA8B;AAC5B,UAAMC,aAAa,GAAGL,cAAc,CAACG,IAAD,CAApC;AAEA,UAAMG,cAAc,GAClBH,IAAI,GAAG,CAAP,GAAWpC,aAAa,CAACP,mBAAD,EAAsB6C,aAAa,CAACE,eAApC,CAAxB,GAAgF,MADlF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQH,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEE,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAIX,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIO,aAAa,CAACK,YAAd,GAA8B,CAAlC,EAAqC;AACnC,YAAMC,GAAG,GAAGZ,YAAY,CAACC,cAAD,EAAiBG,IAAI,GAAG,CAAxB,EAA2BE,aAAa,CAACK,YAAzC,CAAxB;AACAP,MAAAA,IAAI,GAAGQ,GAAG,CAACR,IAAX;AACAD,MAAAA,MAAM,CAACG,aAAa,CAACO,IAAf,CAAN,GAA6B;AAE3BJ,QAAAA,QAF2B;AAG3BC,QAAAA,QAH2B;AAI3BI,QAAAA,MAAM,EAAEF,GAAG,CAACT;AAJe,OAA7B;AAMD,KATD,MASO;AACL,YAAMR,IAAI,GAAG3B,aAAa,CAACL,IAAD,EAAO2C,aAAa,CAACX,IAArB,CAA1B;AACA,UAAIoB,WAAW,GAAGpB,IAAlB;;AAEA,UAAIW,aAAa,CAACU,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG/C,aAAa,CAACT,aAAD,EAAgB+C,aAAa,CAACU,cAA9B,CAA3B;AACD;;AAED,cAAQD,WAAR;AACE,aAAK,SAAL;AACEA,UAAAA,WAAW,GAAI,GAAEA,WAAY,IAAGpB,IAAK,EAArC;AACA;;AACF;AAJF;;AAOAQ,MAAAA,MAAM,CAACG,aAAa,CAACO,IAAf,CAAN,GAA6B;AAC3BlB,QAAAA,IAAI,EAAEoB,WADqB;AAE3BE,QAAAA,UAAU,EAAEX,aAAa,CAACY,WAFC;AAG3BC,QAAAA,SAAS,EAAEb,aAAa,CAACc,SAHE;AAI3BC,QAAAA,KAAK,EAAEf,aAAa,CAACe,KAJM;AAK3BZ,QAAAA,QAL2B;AAM3BC,QAAAA;AAN2B,OAA7B;AAQAN,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACD,IAAAA,MAAD;AAAS7B,IAAAA,MAAT;AAAiB8B,IAAAA;AAAjB,GAAP;AACD;;AAKD,SAASkB,YAAT,CACE3B,IADF,EAEE4B,QAFF,EAGElD,MAHF,EAIES,KAJF,EAKE0C,IALF,EAMS;AACP,MAAI,EAAED,QAAQ,IAAIjE,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIyC,KAAJ,CAAW,qBAAoBwB,QAAS,EAAxC,CAAN;AACD;;AACD,SAAOjE,cAAc,CAACiE,QAAD,CAAd,CAAyBD,YAAzB,CAAsC3B,IAAtC,EAA4CtB,MAA5C,EAAoDS,KAApD,EAA2D0C,IAA3D,CAAP;AACD;;AAQD,eAAe5B,cAAf,CACEvB,MADF,EAEEoD,MAFF,EAGErD,OAHF,EAI4B;AAAA;;AAC1B,QAAMsD,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmD,MAAM,CAACE,oBAAzC;AACA,QAAMC,UAAU,4BAAGH,MAAM,CAACI,gBAAV,0DAAG,sBAAyBC,UAA5C;AAGA,MAAIC,UAAU,GAAG1D,MAAjB;;AAEA,MAAID,OAAO,CAAC4D,WAAR,KAAwB,cAA5B,EAA4C;AAC1C,UAAMC,SAAS,GAAG,MAAMrE,UAAU,CAChCQ,OAAO,CAAC4D,WADwB,EAEhC3D,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFgC,EAGhCD,MAAM,CAACU,sBAHyB,CAAlC;AAKAJ,IAAAA,UAAU,GAAG;AACX5D,MAAAA,MAAM,EAAE8D,SADG;AAEX3D,MAAAA,MAAM,EAAE,CAFG;AAGXC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHL,KAAb;AAKAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAGD,QAAMU,cAAc,GAAGpE,aAAa,CAClCR,QADkC,4BAElCiE,MAAM,CAACI,gBAF2B,2DAElC,uBAAyBQ,yBAFS,CAApC;AAKA,MAAIC,OAAO,GAAG,IAAIC,KAAJ,CAAUX,UAAV,CAAd;;AAEA,MAAIxD,OAAO,CAACoE,MAAR,CAAeC,SAAf,GAA2B,CAA/B,EAAkC;AAChCH,IAAAA,OAAO,GAAGhB,YAAY,CAACzD,kBAAD,EAAqBuE,cAArB,EAAqCL,UAArC,EAAiDH,UAAjD,EAA8D;AAClFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACG,OAAO,CAACoE,MAAR,CAAeC,SAAhB,CAD6D;AAElFE,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLL,IAAAA,OAAO,CAACM,IAAR,CAAa,CAAb;AACD;;AAGD,QAAMC,cAAc,GAAG7E,aAAa,CAClCR,QADkC,4BAElCiE,MAAM,CAACI,gBAF2B,2DAElC,uBAAyBiB,yBAFS,CAApC;AAKA,MAAIC,OAAO,GAAG,IAAIR,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIxD,OAAO,CAACoE,MAAR,CAAeQ,SAAf,GAA2B,CAA/B,EAAkC;AAChCD,IAAAA,OAAO,GAAGzB,YAAY,CAACzD,kBAAD,EAAqBgF,cAArB,EAAqCd,UAArC,EAAiDH,UAAjD,EAA8D;AAClFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACG,OAAO,CAACoE,MAAR,CAAeQ,SAAhB,CAD6D;AAElFL,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACD,MAAIK,iBAAiB,GAAG,CAAxB;;AACA,OAAK,MAAMC,IAAX,IAAmBH,OAAnB,EAA4B;AAC1B,QAAIG,IAAI,KAAK9E,OAAO,CAACoE,MAAR,CAAeQ,SAA5B,EAAuC;AACrCC,MAAAA,iBAAiB;AAClB;AACF;;AAGD,QAAME,aAAa,GAAGnF,aAAa,CAACR,QAAD,4BAAWiE,MAAM,CAACI,gBAAlB,2DAAW,uBAAyBN,QAApC,CAAnC;AACA,QAAM6B,aAAa,GAAG;AACpBnC,IAAAA,UAAU,EAAE7C,OAAO,CAACoE,MAAR,CAAevB,UADP;AAEpByB,IAAAA,QAAQ,EAAEtE,OAAO,CAACoE,MAAR,CAAevB;AAFL,GAAtB;AAKA,QAAMrC,MAAM,GAAG0C,YAAY,CACzBlD,OAAO,CAACoE,MAAR,CAAea,aADU,EAEzBF,aAFyB,EAGzBpB,UAHyB,EAIzBkB,iBAJyB,EAKzBG,aALyB,CAA3B;AAQA,SAAO;AACLzE,IAAAA,OAAO,EAAEoE,OADJ;AAELrE,IAAAA,OAAO,EAAE4D,OAFJ;AAGL1D,IAAAA,MAHK;AAILE,IAAAA,KAAK,EAAE8C,UAJF;AAKLnC,IAAAA,UAAU,EAAEgC;AALP,GAAP;AAOD;;AASD,eAAe5B,gBAAf,CACExB,MADF,EAEEoD,MAFF,EAGED,IAHF,EAI4B;AAAA;;AAC1B,QAAME,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmD,MAAM,CAACE,oBAAzC;AAEA,QAAMC,UAAU,6BAAGH,MAAM,CAAC6B,mBAAV,2DAAG,uBAA4BxB,UAA/C;AAEA,QAAMmB,iBAAiB,GAAGrB,UAAU,8BAAGH,MAAM,CAAC6B,mBAAV,2DAAG,uBAA4BC,SAA/B,CAApC;AACA,QAAMJ,aAAa,GAAGnF,aAAa,CACjCR,QADiC,4BAEjCiE,MAAM,CAAC6B,mBAF0B,2DAEjC,uBAA4B/B,QAFK,CAAnC;AAOA,MAAIe,OAAO,GAAG,IAAIC,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIJ,IAAI,CAACgB,MAAL,CAAYC,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BH,IAAAA,OAAO,GAAGhB,YAAY,CAACzD,kBAAD,EAAqBC,sBAArB,EAA6CO,MAA7C,EAAqDuD,UAArD,EAAkE;AACtFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACuD,IAAI,CAACgB,MAAL,CAAYC,SAAb,CADiE;AAEtFE,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLL,IAAAA,OAAO,CAACM,IAAR,CAAa,CAAb;AACD;;AAID,MAAIG,OAAO,GAAG,IAAIR,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIJ,IAAI,CAACgB,MAAL,CAAYQ,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BD,IAAAA,OAAO,GAAGzB,YAAY,CAACzD,kBAAD,EAAqBC,sBAArB,EAA6CO,MAA7C,EAAqDuD,UAArD,EAAkE;AACtFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACuD,IAAI,CAACgB,MAAL,CAAYQ,SAAb,CADiE;AAEtFL,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGD,MAAIY,eAAe,GAAGnF,MAAtB;;AAEA,gCAAIoD,MAAM,CAAC6B,mBAAX,mDAAI,uBAA4BG,aAAhC,EAA+C;AAC7C,UAAMxB,SAAS,GAAG,MAAMrE,UAAU,CAChC4D,IAAI,CAACQ,WAD2B,EAEhC3D,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFgC,EAGhCD,MAAM,CAACU,sBAHyB,CAAlC;AAMAqB,IAAAA,eAAe,GAAG;AAChBrF,MAAAA,MAAM,EAAE8D,SADQ;AAEhB3D,MAAAA,MAAM,EAAE,CAFQ;AAGhBC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHA,KAAlB;AAMAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAED,QAAM0B,aAAa,GAAG;AACpBnC,IAAAA,UAAU,EAAEO,IAAI,CAACgB,MAAL,CAAYvB,UADJ;AAEpByB,IAAAA,QAAQ,EAAElB,IAAI,CAACgB,MAAL,CAAYvB;AAFF,GAAtB;AAKA,QAAMrC,MAAM,GAAG0C,YAAY,CACzBE,IAAI,CAACgB,MAAL,CAAYa,aADa,EAEzBF,aAFyB,EAGzBK,eAHyB,EAIzBP,iBAJyB,EAKzBG,aALyB,CAA3B;AAQA,SAAO;AACLzE,IAAAA,OAAO,EAAEoE,OADJ;AAELrE,IAAAA,OAAO,EAAE4D,OAFJ;AAGL1D,IAAAA,MAHK;AAILE,IAAAA,KAAK,EAAE8C,UAJF;AAKLnC,IAAAA,UAAU,EAAEgC;AALP,GAAP;AAOD;;AAQD,eAAe3B,oBAAf,CACEzB,MADF,EAEEoB,UAFF,EAGErB,OAHF,EAIqB;AAAA;;AACnB,QAAMsD,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmB,UAAU,CAACkC,oBAA7C;AAEA,MAAI+B,UAAU,GAAG;AACfpF,IAAAA,MAAM,EAAE,CADO;AAEfH,IAAAA,MAAM,EAAEE,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFO;AAGfnD,IAAAA,IAAI,EAAEmD,SAAS,GAAGrD,MAAM,CAACC;AAHV,GAAjB;AAMAD,EAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;;AAEA,MAAItD,OAAO,CAAC4D,WAAR,KAAwB,cAA5B,EAA4C;AAC1C,UAAMC,SAAS,GAAG,MAAMrE,UAAU,CAChCQ,OAAO,CAAC4D,WADwB,EAEhC0B,UAAU,CAACvF,MAAX,CAAkB+D,KAAlB,CAAwBwB,UAAU,CAACpF,MAAnC,EAA2CoD,SAA3C,CAFgC,EAGhCjC,UAAU,CAAC0C,sBAHqB,CAAlC;AAMAuB,IAAAA,UAAU,GAAG;AACXvF,MAAAA,MAAM,EAAE8D,SADG;AAEX3D,MAAAA,MAAM,EAAE,CAFG;AAGXC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHL,KAAb;AAMAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAED,QAAM1C,SAAS,GAAG,CAAAS,UAAU,SAAV,IAAAA,UAAU,WAAV,qCAAAA,UAAU,CAAEkE,sBAAZ,gFAAoC7B,UAApC,KAAkD,CAApE;AAEA,SAAOR,YAAY,CACjBlD,OAAO,CAACoE,MAAR,CAAea,aADE,EAEjBjF,OAAO,CAACoE,MAAR,CAAejB,QAFE,EAGjBmC,UAHiB,EAIjB1E,SAJiB,EAKjBZ,OALiB,CAAZ,CAMLgB,GANK,CAMAwE,CAAD,IAAOA,CAAC,CAACC,QAAF,EANN,CAAP;AAOD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {\n ParquetCodec,\n ParquetData,\n ParquetOptions,\n ParquetPageData,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from '../schema/declare';\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport {\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n PageHeader,\n PageType,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {decompress} from '../compression';\nimport {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';\nimport {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';\n\n/**\n * Decode data pages\n * @param buffer - input data\n * @param column - parquet column\n * @param compression - compression type\n * @returns parquet data page data\n */\nexport async function decodeDataPages(\n buffer: Buffer,\n options: ParquetOptions\n): Promise<ParquetData> {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n\n let dictionary = options.dictionary || [];\n\n while (\n // @ts-ignore size can be undefined\n cursor.offset < cursor.size &&\n (!options.numValues || data.dlevels.length < Number(options.numValues))\n ) {\n // Looks like we have to decode these in sequence due to cursor updates?\n const page = await decodePage(cursor, options);\n\n if (page.dictionary) {\n dictionary = page.dictionary;\n // eslint-disable-next-line no-continue\n continue;\n }\n\n if (dictionary.length) {\n // eslint-disable-next-line no-loop-func\n page.values = page.values.map((value) => dictionary[value]);\n }\n\n for (let index = 0; index < page.rlevels.length; index++) {\n data.rlevels.push(page.rlevels[index]);\n data.dlevels.push(page.dlevels[index]);\n const value = page.values[index];\n\n if (value !== undefined) {\n data.values.push(value);\n }\n }\n\n data.count += page.count;\n data.pageHeaders.push(page.pageHeader);\n }\n\n return data;\n}\n\n/**\n * Decode parquet page based on page type\n * @param cursor\n * @param options\n */\nexport async function decodePage(\n cursor: CursorBuffer,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n let page;\n const {pageHeader, length} = await decodePageHeader(cursor.buffer, cursor.offset);\n cursor.offset += length;\n\n const pageType = getThriftEnum(PageType, pageHeader.type);\n\n switch (pageType) {\n case 'DATA_PAGE':\n page = await decodeDataPage(cursor, pageHeader, options);\n break;\n case 'DATA_PAGE_V2':\n page = await decodeDataPageV2(cursor, pageHeader, options);\n break;\n case 'DICTIONARY_PAGE':\n page = {\n dictionary: await decodeDictionaryPage(cursor, pageHeader, options),\n pageHeader\n };\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n return page;\n}\n\n/**\n * Decode parquet schema\n * @param schemaElements input schema elements data\n * @param offset offset to read from\n * @param len length of data\n * @returns result.offset\n * result.next - offset at the end of function\n * result.schema - schema read from the input data\n * @todo output offset is the same as input - possibly excess output field\n */\nexport function decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n const type = getThriftEnum(Type, schemaElement.type!);\n let logicalType = type;\n\n if (schemaElement.converted_type) {\n logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n switch (logicalType) {\n case 'DECIMAL':\n logicalType = `${logicalType}_${type}` as ParquetType;\n break;\n default:\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n presision: schemaElement.precision,\n scale: schemaElement.scale,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\n/**\n * Do decoding of parquet dataPage from column chunk\n * @param cursor\n * @param header\n * @param options\n */\nasync function decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n /* uncompress page */\n let dataCursor = cursor;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n\n if (options.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (options.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === options.column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;\n const decodeOptions = {\n typeLength: options.column.typeLength,\n bitWidth: options.column.typeLength\n };\n\n const values = decodeValues(\n options.column.primitiveType!,\n valueEncoding,\n dataCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of parquet dataPage in version 2 from column chunk\n * @param cursor\n * @param header\n * @param opts\n * @returns\n */\nasync function decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n opts: any\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (opts.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (opts.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = await decompress(\n opts.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const decodeOptions = {\n typeLength: opts.column.typeLength,\n bitWidth: opts.column.typeLength\n };\n\n const values = decodeValues(\n opts.column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.\n * @param cursor\n * @param pageHeader\n * @param options\n */\nasync function decodeDictionaryPage(\n cursor: CursorBuffer,\n pageHeader: PageHeader,\n options: ParquetOptions\n): Promise<string[]> {\n const cursorEnd = cursor.offset + pageHeader.compressed_page_size;\n\n let dictCursor = {\n offset: 0,\n buffer: cursor.buffer.slice(cursor.offset, cursorEnd),\n size: cursorEnd - cursor.offset\n };\n\n cursor.offset = cursorEnd;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n dictCursor.buffer.slice(dictCursor.offset, cursorEnd),\n pageHeader.uncompressed_page_size\n );\n\n dictCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const numValues = pageHeader?.dictionary_page_header?.num_values || 0;\n\n return decodeValues(\n options.column.primitiveType!,\n options.column.encoding!,\n dictCursor,\n numValues,\n options as ParquetCodecOptions\n ).map((d) => d.toString());\n}\n"],"file":"decoders.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/parser/decoders.ts"],"names":["PARQUET_CODECS","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","decompress","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","decodePageHeader","getThriftEnum","getBitWidth","decodeDataPages","buffer","options","cursor","offset","size","length","data","rlevels","dlevels","values","pageHeaders","count","dictionary","numValues","Number","page","decodePage","map","value","index","push","undefined","pageHeader","pageType","type","decodeDataPage","decodeDataPageV2","decodeDictionaryPage","Error","decodeSchema","schemaElements","len","schema","next","i","schemaElement","repetitionType","repetition_type","optional","repeated","num_children","res","name","fields","logicalType","converted_type","typeLength","type_length","presision","precision","scale","decodeValues","encoding","opts","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","compression","valuesBuf","slice","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","Array","column","rLevelMax","bitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","decodeOptions","primitiveType","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","dictCursor","dictionary_page_header","d","toString"],"mappings":"AAUA,SAA2CA,cAA3C,QAAgE,WAAhE;AACA,SACEC,aADF,EAEEC,QAFF,EAGEC,mBAHF,EAKEC,QALF,EAOEC,IAPF,QAQO,mBARP;AASA,SAAQC,UAAR,QAAyB,gBAAzB;AACA,SAAQC,kBAAR,EAA4BC,sBAA5B,QAAyD,iBAAzD;AACA,SAAQC,gBAAR,EAA0BC,aAA1B,EAAyCC,WAAzC,QAA2D,qBAA3D;AASA,OAAO,eAAeC,eAAf,CACLC,MADK,EAELC,OAFK,EAGiB;AACtB,QAAMC,MAAoB,GAAG;AAC3BF,IAAAA,MAD2B;AAE3BG,IAAAA,MAAM,EAAE,CAFmB;AAG3BC,IAAAA,IAAI,EAAEJ,MAAM,CAACK;AAHc,GAA7B;AAMA,QAAMC,IAAiB,GAAG;AACxBC,IAAAA,OAAO,EAAE,EADe;AAExBC,IAAAA,OAAO,EAAE,EAFe;AAGxBC,IAAAA,MAAM,EAAE,EAHgB;AAIxBC,IAAAA,WAAW,EAAE,EAJW;AAKxBC,IAAAA,KAAK,EAAE;AALiB,GAA1B;AAQA,MAAIC,UAAU,GAAGX,OAAO,CAACW,UAAR,IAAsB,EAAvC;;AAEA,SAEEV,MAAM,CAACC,MAAP,GAAgBD,MAAM,CAACE,IAAvB,KACC,CAACH,OAAO,CAACY,SAAT,IAAsBP,IAAI,CAACE,OAAL,CAAaH,MAAb,GAAsBS,MAAM,CAACb,OAAO,CAACY,SAAT,CADnD,CAFF,EAIE;AAEA,UAAME,IAAI,GAAG,MAAMC,UAAU,CAACd,MAAD,EAASD,OAAT,CAA7B;;AAEA,QAAIc,IAAI,CAACH,UAAT,EAAqB;AACnBA,MAAAA,UAAU,GAAGG,IAAI,CAACH,UAAlB;AAEA;AACD;;AAED,QAAIA,UAAU,CAACP,MAAf,EAAuB;AAErBU,MAAAA,IAAI,CAACN,MAAL,GAAcM,IAAI,CAACN,MAAL,CAAYQ,GAAZ,CAAiBC,KAAD,IAAWN,UAAU,CAACM,KAAD,CAArC,CAAd;AACD;;AAED,SAAK,IAAIC,KAAK,GAAG,CAAjB,EAAoBA,KAAK,GAAGJ,IAAI,CAACR,OAAL,CAAaF,MAAzC,EAAiDc,KAAK,EAAtD,EAA0D;AACxDb,MAAAA,IAAI,CAACC,OAAL,CAAaa,IAAb,CAAkBL,IAAI,CAACR,OAAL,CAAaY,KAAb,CAAlB;AACAb,MAAAA,IAAI,CAACE,OAAL,CAAaY,IAAb,CAAkBL,IAAI,CAACP,OAAL,CAAaW,KAAb,CAAlB;AACA,YAAMD,KAAK,GAAGH,IAAI,CAACN,MAAL,CAAYU,KAAZ,CAAd;;AAEA,UAAID,KAAK,KAAKG,SAAd,EAAyB;AACvBf,QAAAA,IAAI,CAACG,MAAL,CAAYW,IAAZ,CAAiBF,KAAjB;AACD;AACF;;AAEDZ,IAAAA,IAAI,CAACK,KAAL,IAAcI,IAAI,CAACJ,KAAnB;AACAL,IAAAA,IAAI,CAACI,WAAL,CAAiBU,IAAjB,CAAsBL,IAAI,CAACO,UAA3B;AACD;;AAED,SAAOhB,IAAP;AACD;AAOD,OAAO,eAAeU,UAAf,CACLd,MADK,EAELD,OAFK,EAGqB;AAC1B,MAAIc,IAAJ;AACA,QAAM;AAACO,IAAAA,UAAD;AAAajB,IAAAA;AAAb,MAAuBT,gBAAgB,CAACM,MAAM,CAACF,MAAR,EAAgBE,MAAM,CAACC,MAAvB,CAA7C;AACAD,EAAAA,MAAM,CAACC,MAAP,IAAiBE,MAAjB;AAEA,QAAMkB,QAAQ,GAAG1B,aAAa,CAACN,QAAD,EAAW+B,UAAU,CAACE,IAAtB,CAA9B;;AAEA,UAAQD,QAAR;AACE,SAAK,WAAL;AACER,MAAAA,IAAI,GAAG,MAAMU,cAAc,CAACvB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAA3B;AACA;;AACF,SAAK,cAAL;AACEc,MAAAA,IAAI,GAAG,MAAMW,gBAAgB,CAACxB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CAA7B;AACA;;AACF,SAAK,iBAAL;AACEc,MAAAA,IAAI,GAAG;AACLH,QAAAA,UAAU,EAAE,MAAMe,oBAAoB,CAACzB,MAAD,EAASoB,UAAT,EAAqBrB,OAArB,CADjC;AAELqB,QAAAA;AAFK,OAAP;AAIA;;AACF;AACE,YAAM,IAAIM,KAAJ,8BAAgCL,QAAhC,EAAN;AAdJ;;AAiBA,SAAOR,IAAP;AACD;AAYD,OAAO,SAASc,YAAT,CACLC,cADK,EAEL3B,MAFK,EAGL4B,GAHK,EAQL;AACA,QAAMC,MAAwB,GAAG,EAAjC;AACA,MAAIC,IAAI,GAAG9B,MAAX;;AACA,OAAK,IAAI+B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,GAApB,EAAyBG,CAAC,EAA1B,EAA8B;AAC5B,UAAMC,aAAa,GAAGL,cAAc,CAACG,IAAD,CAApC;AAEA,UAAMG,cAAc,GAClBH,IAAI,GAAG,CAAP,GAAWpC,aAAa,CAACP,mBAAD,EAAsB6C,aAAa,CAACE,eAApC,CAAxB,GAAgF,MADlF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQH,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEE,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAIX,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIO,aAAa,CAACK,YAAd,GAA8B,CAAlC,EAAqC;AACnC,YAAMC,GAAG,GAAGZ,YAAY,CAACC,cAAD,EAAiBG,IAAI,GAAG,CAAxB,EAA2BE,aAAa,CAACK,YAAzC,CAAxB;AACAP,MAAAA,IAAI,GAAGQ,GAAG,CAACR,IAAX;AACAD,MAAAA,MAAM,CAACG,aAAa,CAACO,IAAf,CAAN,GAA6B;AAE3BJ,QAAAA,QAF2B;AAG3BC,QAAAA,QAH2B;AAI3BI,QAAAA,MAAM,EAAEF,GAAG,CAACT;AAJe,OAA7B;AAMD,KATD,MASO;AACL,YAAMR,IAAI,GAAG3B,aAAa,CAACL,IAAD,EAAO2C,aAAa,CAACX,IAArB,CAA1B;AACA,UAAIoB,WAAW,GAAGpB,IAAlB;;AAEA,UAAIW,aAAa,CAACU,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG/C,aAAa,CAACT,aAAD,EAAgB+C,aAAa,CAACU,cAA9B,CAA3B;AACD;;AAED,cAAQD,WAAR;AACE,aAAK,SAAL;AACEA,UAAAA,WAAW,aAAMA,WAAN,cAAqBpB,IAArB,CAAX;AACA;;AACF;AAJF;;AAOAQ,MAAAA,MAAM,CAACG,aAAa,CAACO,IAAf,CAAN,GAA6B;AAC3BlB,QAAAA,IAAI,EAAEoB,WADqB;AAE3BE,QAAAA,UAAU,EAAEX,aAAa,CAACY,WAFC;AAG3BC,QAAAA,SAAS,EAAEb,aAAa,CAACc,SAHE;AAI3BC,QAAAA,KAAK,EAAEf,aAAa,CAACe,KAJM;AAK3BZ,QAAAA,QAL2B;AAM3BC,QAAAA;AAN2B,OAA7B;AAQAN,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACD,IAAAA,MAAD;AAAS7B,IAAAA,MAAT;AAAiB8B,IAAAA;AAAjB,GAAP;AACD;;AAKD,SAASkB,YAAT,CACE3B,IADF,EAEE4B,QAFF,EAGElD,MAHF,EAIES,KAJF,EAKE0C,IALF,EAMS;AACP,MAAI,EAAED,QAAQ,IAAIjE,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIyC,KAAJ,6BAA+BwB,QAA/B,EAAN;AACD;;AACD,SAAOjE,cAAc,CAACiE,QAAD,CAAd,CAAyBD,YAAzB,CAAsC3B,IAAtC,EAA4CtB,MAA5C,EAAoDS,KAApD,EAA2D0C,IAA3D,CAAP;AACD;;AAQD,eAAe5B,cAAf,CACEvB,MADF,EAEEoD,MAFF,EAGErD,OAHF,EAI4B;AAAA;;AAC1B,QAAMsD,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmD,MAAM,CAACE,oBAAzC;AACA,QAAMC,UAAU,4BAAGH,MAAM,CAACI,gBAAV,0DAAG,sBAAyBC,UAA5C;AAGA,MAAIC,UAAU,GAAG1D,MAAjB;;AAEA,MAAID,OAAO,CAAC4D,WAAR,KAAwB,cAA5B,EAA4C;AAC1C,UAAMC,SAAS,GAAG,MAAMrE,UAAU,CAChCQ,OAAO,CAAC4D,WADwB,EAEhC3D,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFgC,EAGhCD,MAAM,CAACU,sBAHyB,CAAlC;AAKAJ,IAAAA,UAAU,GAAG;AACX5D,MAAAA,MAAM,EAAE8D,SADG;AAEX3D,MAAAA,MAAM,EAAE,CAFG;AAGXC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHL,KAAb;AAKAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAGD,QAAMU,cAAc,GAAGpE,aAAa,CAClCR,QADkC,4BAElCiE,MAAM,CAACI,gBAF2B,2DAElC,uBAAyBQ,yBAFS,CAApC;AAKA,MAAIC,OAAO,GAAG,IAAIC,KAAJ,CAAUX,UAAV,CAAd;;AAEA,MAAIxD,OAAO,CAACoE,MAAR,CAAeC,SAAf,GAA2B,CAA/B,EAAkC;AAChCH,IAAAA,OAAO,GAAGhB,YAAY,CAACzD,kBAAD,EAAqBuE,cAArB,EAAqCL,UAArC,EAAiDH,UAAjD,EAA8D;AAClFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACG,OAAO,CAACoE,MAAR,CAAeC,SAAhB,CAD6D;AAElFE,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLL,IAAAA,OAAO,CAACM,IAAR,CAAa,CAAb;AACD;;AAGD,QAAMC,cAAc,GAAG7E,aAAa,CAClCR,QADkC,4BAElCiE,MAAM,CAACI,gBAF2B,2DAElC,uBAAyBiB,yBAFS,CAApC;AAKA,MAAIC,OAAO,GAAG,IAAIR,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIxD,OAAO,CAACoE,MAAR,CAAeQ,SAAf,GAA2B,CAA/B,EAAkC;AAChCD,IAAAA,OAAO,GAAGzB,YAAY,CAACzD,kBAAD,EAAqBgF,cAArB,EAAqCd,UAArC,EAAiDH,UAAjD,EAA8D;AAClFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACG,OAAO,CAACoE,MAAR,CAAeQ,SAAhB,CAD6D;AAElFL,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACD,MAAIK,iBAAiB,GAAG,CAAxB;;AACA,OAAK,MAAMC,IAAX,IAAmBH,OAAnB,EAA4B;AAC1B,QAAIG,IAAI,KAAK9E,OAAO,CAACoE,MAAR,CAAeQ,SAA5B,EAAuC;AACrCC,MAAAA,iBAAiB;AAClB;AACF;;AAGD,QAAME,aAAa,GAAGnF,aAAa,CAACR,QAAD,4BAAWiE,MAAM,CAACI,gBAAlB,2DAAW,uBAAyBN,QAApC,CAAnC;AACA,QAAM6B,aAAa,GAAG;AACpBnC,IAAAA,UAAU,EAAE7C,OAAO,CAACoE,MAAR,CAAevB,UADP;AAEpByB,IAAAA,QAAQ,EAAEtE,OAAO,CAACoE,MAAR,CAAevB;AAFL,GAAtB;AAKA,QAAMrC,MAAM,GAAG0C,YAAY,CACzBlD,OAAO,CAACoE,MAAR,CAAea,aADU,EAEzBF,aAFyB,EAGzBpB,UAHyB,EAIzBkB,iBAJyB,EAKzBG,aALyB,CAA3B;AAQA,SAAO;AACLzE,IAAAA,OAAO,EAAEoE,OADJ;AAELrE,IAAAA,OAAO,EAAE4D,OAFJ;AAGL1D,IAAAA,MAHK;AAILE,IAAAA,KAAK,EAAE8C,UAJF;AAKLnC,IAAAA,UAAU,EAAEgC;AALP,GAAP;AAOD;;AASD,eAAe5B,gBAAf,CACExB,MADF,EAEEoD,MAFF,EAGED,IAHF,EAI4B;AAAA;;AAC1B,QAAME,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmD,MAAM,CAACE,oBAAzC;AAEA,QAAMC,UAAU,6BAAGH,MAAM,CAAC6B,mBAAV,2DAAG,uBAA4BxB,UAA/C;AAEA,QAAMmB,iBAAiB,GAAGrB,UAAU,8BAAGH,MAAM,CAAC6B,mBAAV,2DAAG,uBAA4BC,SAA/B,CAApC;AACA,QAAMJ,aAAa,GAAGnF,aAAa,CACjCR,QADiC,4BAEjCiE,MAAM,CAAC6B,mBAF0B,2DAEjC,uBAA4B/B,QAFK,CAAnC;AAOA,MAAIe,OAAO,GAAG,IAAIC,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIJ,IAAI,CAACgB,MAAL,CAAYC,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BH,IAAAA,OAAO,GAAGhB,YAAY,CAACzD,kBAAD,EAAqBC,sBAArB,EAA6CO,MAA7C,EAAqDuD,UAArD,EAAkE;AACtFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACuD,IAAI,CAACgB,MAAL,CAAYC,SAAb,CADiE;AAEtFE,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLL,IAAAA,OAAO,CAACM,IAAR,CAAa,CAAb;AACD;;AAID,MAAIG,OAAO,GAAG,IAAIR,KAAJ,CAAUX,UAAV,CAAd;;AACA,MAAIJ,IAAI,CAACgB,MAAL,CAAYQ,SAAZ,GAAwB,CAA5B,EAA+B;AAC7BD,IAAAA,OAAO,GAAGzB,YAAY,CAACzD,kBAAD,EAAqBC,sBAArB,EAA6CO,MAA7C,EAAqDuD,UAArD,EAAkE;AACtFc,MAAAA,QAAQ,EAAEzE,WAAW,CAACuD,IAAI,CAACgB,MAAL,CAAYQ,SAAb,CADiE;AAEtFL,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGD,MAAIY,eAAe,GAAGnF,MAAtB;;AAEA,gCAAIoD,MAAM,CAAC6B,mBAAX,mDAAI,uBAA4BG,aAAhC,EAA+C;AAC7C,UAAMxB,SAAS,GAAG,MAAMrE,UAAU,CAChC4D,IAAI,CAACQ,WAD2B,EAEhC3D,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFgC,EAGhCD,MAAM,CAACU,sBAHyB,CAAlC;AAMAqB,IAAAA,eAAe,GAAG;AAChBrF,MAAAA,MAAM,EAAE8D,SADQ;AAEhB3D,MAAAA,MAAM,EAAE,CAFQ;AAGhBC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHA,KAAlB;AAMAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAED,QAAM0B,aAAa,GAAG;AACpBnC,IAAAA,UAAU,EAAEO,IAAI,CAACgB,MAAL,CAAYvB,UADJ;AAEpByB,IAAAA,QAAQ,EAAElB,IAAI,CAACgB,MAAL,CAAYvB;AAFF,GAAtB;AAKA,QAAMrC,MAAM,GAAG0C,YAAY,CACzBE,IAAI,CAACgB,MAAL,CAAYa,aADa,EAEzBF,aAFyB,EAGzBK,eAHyB,EAIzBP,iBAJyB,EAKzBG,aALyB,CAA3B;AAQA,SAAO;AACLzE,IAAAA,OAAO,EAAEoE,OADJ;AAELrE,IAAAA,OAAO,EAAE4D,OAFJ;AAGL1D,IAAAA,MAHK;AAILE,IAAAA,KAAK,EAAE8C,UAJF;AAKLnC,IAAAA,UAAU,EAAEgC;AALP,GAAP;AAOD;;AAQD,eAAe3B,oBAAf,CACEzB,MADF,EAEEoB,UAFF,EAGErB,OAHF,EAIqB;AAAA;;AACnB,QAAMsD,SAAS,GAAGrD,MAAM,CAACC,MAAP,GAAgBmB,UAAU,CAACkC,oBAA7C;AAEA,MAAI+B,UAAU,GAAG;AACfpF,IAAAA,MAAM,EAAE,CADO;AAEfH,IAAAA,MAAM,EAAEE,MAAM,CAACF,MAAP,CAAc+D,KAAd,CAAoB7D,MAAM,CAACC,MAA3B,EAAmCoD,SAAnC,CAFO;AAGfnD,IAAAA,IAAI,EAAEmD,SAAS,GAAGrD,MAAM,CAACC;AAHV,GAAjB;AAMAD,EAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;;AAEA,MAAItD,OAAO,CAAC4D,WAAR,KAAwB,cAA5B,EAA4C;AAC1C,UAAMC,SAAS,GAAG,MAAMrE,UAAU,CAChCQ,OAAO,CAAC4D,WADwB,EAEhC0B,UAAU,CAACvF,MAAX,CAAkB+D,KAAlB,CAAwBwB,UAAU,CAACpF,MAAnC,EAA2CoD,SAA3C,CAFgC,EAGhCjC,UAAU,CAAC0C,sBAHqB,CAAlC;AAMAuB,IAAAA,UAAU,GAAG;AACXvF,MAAAA,MAAM,EAAE8D,SADG;AAEX3D,MAAAA,MAAM,EAAE,CAFG;AAGXC,MAAAA,IAAI,EAAE0D,SAAS,CAACzD;AAHL,KAAb;AAMAH,IAAAA,MAAM,CAACC,MAAP,GAAgBoD,SAAhB;AACD;;AAED,QAAM1C,SAAS,GAAG,CAAAS,UAAU,SAAV,IAAAA,UAAU,WAAV,qCAAAA,UAAU,CAAEkE,sBAAZ,gFAAoC7B,UAApC,KAAkD,CAApE;AAEA,SAAOR,YAAY,CACjBlD,OAAO,CAACoE,MAAR,CAAea,aADE,EAEjBjF,OAAO,CAACoE,MAAR,CAAejB,QAFE,EAGjBmC,UAHiB,EAIjB1E,SAJiB,EAKjBZ,OALiB,CAAZ,CAMLgB,GANK,CAMAwE,CAAD,IAAOA,CAAC,CAACC,QAAF,EANN,CAAP;AAOD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {\n ParquetCodec,\n ParquetData,\n ParquetOptions,\n ParquetPageData,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from '../schema/declare';\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport {\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n PageHeader,\n PageType,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {decompress} from '../compression';\nimport {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';\nimport {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';\n\n/**\n * Decode data pages\n * @param buffer - input data\n * @param column - parquet column\n * @param compression - compression type\n * @returns parquet data page data\n */\nexport async function decodeDataPages(\n buffer: Buffer,\n options: ParquetOptions\n): Promise<ParquetData> {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n\n let dictionary = options.dictionary || [];\n\n while (\n // @ts-ignore size can be undefined\n cursor.offset < cursor.size &&\n (!options.numValues || data.dlevels.length < Number(options.numValues))\n ) {\n // Looks like we have to decode these in sequence due to cursor updates?\n const page = await decodePage(cursor, options);\n\n if (page.dictionary) {\n dictionary = page.dictionary;\n // eslint-disable-next-line no-continue\n continue;\n }\n\n if (dictionary.length) {\n // eslint-disable-next-line no-loop-func\n page.values = page.values.map((value) => dictionary[value]);\n }\n\n for (let index = 0; index < page.rlevels.length; index++) {\n data.rlevels.push(page.rlevels[index]);\n data.dlevels.push(page.dlevels[index]);\n const value = page.values[index];\n\n if (value !== undefined) {\n data.values.push(value);\n }\n }\n\n data.count += page.count;\n data.pageHeaders.push(page.pageHeader);\n }\n\n return data;\n}\n\n/**\n * Decode parquet page based on page type\n * @param cursor\n * @param options\n */\nexport async function decodePage(\n cursor: CursorBuffer,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n let page;\n const {pageHeader, length} = decodePageHeader(cursor.buffer, cursor.offset);\n cursor.offset += length;\n\n const pageType = getThriftEnum(PageType, pageHeader.type);\n\n switch (pageType) {\n case 'DATA_PAGE':\n page = await decodeDataPage(cursor, pageHeader, options);\n break;\n case 'DATA_PAGE_V2':\n page = await decodeDataPageV2(cursor, pageHeader, options);\n break;\n case 'DICTIONARY_PAGE':\n page = {\n dictionary: await decodeDictionaryPage(cursor, pageHeader, options),\n pageHeader\n };\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n return page;\n}\n\n/**\n * Decode parquet schema\n * @param schemaElements input schema elements data\n * @param offset offset to read from\n * @param len length of data\n * @returns result.offset\n * result.next - offset at the end of function\n * result.schema - schema read from the input data\n * @todo output offset is the same as input - possibly excess output field\n */\nexport function decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n const type = getThriftEnum(Type, schemaElement.type!);\n let logicalType = type;\n\n if (schemaElement.converted_type) {\n logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n switch (logicalType) {\n case 'DECIMAL':\n logicalType = `${logicalType}_${type}` as ParquetType;\n break;\n default:\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n presision: schemaElement.precision,\n scale: schemaElement.scale,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\n/**\n * Do decoding of parquet dataPage from column chunk\n * @param cursor\n * @param header\n * @param options\n */\nasync function decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n options: ParquetOptions\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n /* uncompress page */\n let dataCursor = cursor;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n\n if (options.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (options.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: getBitWidth(options.column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === options.column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;\n const decodeOptions = {\n typeLength: options.column.typeLength,\n bitWidth: options.column.typeLength\n };\n\n const values = decodeValues(\n options.column.primitiveType!,\n valueEncoding,\n dataCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of parquet dataPage in version 2 from column chunk\n * @param cursor\n * @param header\n * @param opts\n * @returns\n */\nasync function decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n opts: any\n): Promise<ParquetPageData> {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (opts.column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (opts.column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: getBitWidth(opts.column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = await decompress(\n opts.compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const decodeOptions = {\n typeLength: opts.column.typeLength,\n bitWidth: opts.column.typeLength\n };\n\n const values = decodeValues(\n opts.column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n decodeOptions\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!,\n pageHeader: header\n };\n}\n\n/**\n * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.\n * @param cursor\n * @param pageHeader\n * @param options\n */\nasync function decodeDictionaryPage(\n cursor: CursorBuffer,\n pageHeader: PageHeader,\n options: ParquetOptions\n): Promise<string[]> {\n const cursorEnd = cursor.offset + pageHeader.compressed_page_size;\n\n let dictCursor = {\n offset: 0,\n buffer: cursor.buffer.slice(cursor.offset, cursorEnd),\n size: cursorEnd - cursor.offset\n };\n\n cursor.offset = cursorEnd;\n\n if (options.compression !== 'UNCOMPRESSED') {\n const valuesBuf = await decompress(\n options.compression,\n dictCursor.buffer.slice(dictCursor.offset, cursorEnd),\n pageHeader.uncompressed_page_size\n );\n\n dictCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const numValues = pageHeader?.dictionary_page_header?.num_values || 0;\n\n return decodeValues(\n options.column.primitiveType!,\n options.column.encoding!,\n dictCursor,\n numValues,\n options as ParquetCodecOptions\n ).map((d) => d.toString());\n}\n"],"file":"decoders.js"}
@@ -1,19 +1,10 @@
1
1
  import _defineProperty from "@babel/runtime/helpers/esm/defineProperty";
2
2
  import { PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED } from '../../constants';
3
3
  import { CompressionCodec, Type } from '../parquet-thrift';
4
- import { fstat, fopen, fread, fclose } from '../utils/file-utils';
5
4
  import { decodeFileMetadata, getThriftEnum, fieldIndexOf } from '../utils/read-utils';
6
5
  import { decodeDataPages, decodePage } from './decoders';
7
6
  const DEFAULT_DICTIONARY_SIZE = 1e6;
8
7
  export class ParquetEnvelopeReader {
9
- static async openFile(filePath) {
10
- const fileStat = await fstat(filePath);
11
- const fileDescriptor = await fopen(filePath);
12
- const readFn = fread.bind(undefined, fileDescriptor);
13
- const closeFn = fclose.bind(undefined, fileDescriptor);
14
- return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);
15
- }
16
-
17
8
  static async openBuffer(buffer) {
18
9
  const readFn = (position, length) => Promise.resolve(buffer.slice(position, position + length));
19
10
 
@@ -49,7 +40,7 @@ export class ParquetEnvelopeReader {
49
40
  throw new Error('Encrypted parquet file not supported');
50
41
 
51
42
  default:
52
- throw new Error(`Invalid parquet file (magic=${magic})`);
43
+ throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
53
44
  }
54
45
  }
55
46
 
@@ -84,7 +75,7 @@ export class ParquetEnvelopeReader {
84
75
  const type = getThriftEnum(Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
85
76
 
86
77
  if (type !== field.primitiveType) {
87
- throw new Error(`chunk type not matching schema: ${type}`);
78
+ throw new Error("chunk type not matching schema: ".concat(type));
88
79
  }
89
80
 
90
81
  const compression = getThriftEnum(CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
@@ -143,14 +134,14 @@ export class ParquetEnvelopeReader {
143
134
  const magic = trailerBuf.slice(4).toString();
144
135
 
145
136
  if (magic !== PARQUET_MAGIC) {
146
- throw new Error(`Not a valid parquet file (magic="${magic})`);
137
+ throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
147
138
  }
148
139
 
149
140
  const metadataSize = trailerBuf.readUInt32LE(0);
150
141
  const metadataOffset = this.fileSize - metadataSize - trailerLen;
151
142
 
152
143
  if (metadataOffset < PARQUET_MAGIC.length) {
153
- throw new Error(`Invalid metadata size ${metadataOffset}`);
144
+ throw new Error("Invalid metadata size ".concat(metadataOffset));
154
145
  }
155
146
 
156
147
  const metadataBuf = await this.read(metadataOffset, metadataSize);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/parser/parquet-envelope-reader.ts"],"names":["PARQUET_MAGIC","PARQUET_MAGIC_ENCRYPTED","CompressionCodec","Type","fstat","fopen","fread","fclose","decodeFileMetadata","getThriftEnum","fieldIndexOf","decodeDataPages","decodePage","DEFAULT_DICTIONARY_SIZE","ParquetEnvelopeReader","openFile","filePath","fileStat","fileDescriptor","readFn","bind","undefined","closeFn","size","openBuffer","buffer","position","length","Promise","resolve","slice","constructor","read","close","fileSize","options","defaultDictionarySize","readHeader","magic","toString","Error","readRowGroup","schema","rowGroup","columnList","rowCount","Number","num_rows","columnData","colChunk","columns","colMetadata","meta_data","colKey","path_in_schema","join","readColumnChunk","file_path","field","findField","type","primitiveType","compression","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","Math","min","rLevelMax","dLevelMax","column","numValues","num_values","dictionary","dictionaryPageOffset","dictionary_page_offset","dictionaryOffset","getDictionary","pagesBuf","dictionarySize","cursor","offset","decodedPage","readFooter","trailerLen","trailerBuf","metadataSize","readUInt32LE","metadataOffset","metadataBuf","metadata"],"mappings":";AAEA,SAAQA,aAAR,EAAuBC,uBAAvB,QAAqD,iBAArD;AACA,SAAqBC,gBAArB,EAA+DC,IAA/D,QAA0E,mBAA1E;AAQA,SAAQC,KAAR,EAAeC,KAAf,EAAsBC,KAAtB,EAA6BC,MAA7B,QAA0C,qBAA1C;AACA,SAAQC,kBAAR,EAA4BC,aAA5B,EAA2CC,YAA3C,QAA8D,qBAA9D;AACA,SAAQC,eAAR,EAAyBC,UAAzB,QAA0C,YAA1C;AAEA,MAAMC,uBAAuB,GAAG,GAAhC;AAQA,OAAO,MAAMC,qBAAN,CAA4B;AAUZ,eAARC,QAAQ,CAACC,QAAD,EAAmD;AACtE,UAAMC,QAAQ,GAAG,MAAMb,KAAK,CAACY,QAAD,CAA5B;AACA,UAAME,cAAc,GAAG,MAAMb,KAAK,CAACW,QAAD,CAAlC;AAEA,UAAMG,MAAM,GAAGb,KAAK,CAACc,IAAN,CAAWC,SAAX,EAAsBH,cAAtB,CAAf;AACA,UAAMI,OAAO,GAAGf,MAAM,CAACa,IAAP,CAAYC,SAAZ,EAAuBH,cAAvB,CAAhB;AAEA,WAAO,IAAIJ,qBAAJ,CAA0BK,MAA1B,EAAkCG,OAAlC,EAA2CL,QAAQ,CAACM,IAApD,CAAP;AACD;;AAEsB,eAAVC,UAAU,CAACC,MAAD,EAAiD;AACtE,UAAMN,MAAM,GAAG,CAACO,QAAD,EAAmBC,MAAnB,KACbC,OAAO,CAACC,OAAR,CAAgBJ,MAAM,CAACK,KAAP,CAAaJ,QAAb,EAAuBA,QAAQ,GAAGC,MAAlC,CAAhB,CADF;;AAEA,UAAML,OAAO,GAAG,MAAMM,OAAO,CAACC,OAAR,EAAtB;;AACA,WAAO,IAAIf,qBAAJ,CAA0BK,MAA1B,EAAkCG,OAAlC,EAA2CG,MAAM,CAACE,MAAlD,CAAP;AACD;;AAEDI,EAAAA,WAAW,CACTC,IADS,EAETC,KAFS,EAGTC,QAHS,EAITC,OAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKH,IAAL,GAAYA,IAAZ;AACA,SAAKC,KAAL,GAAaA,KAAb;AACA,SAAKC,QAAL,GAAgBA,QAAhB;AACA,SAAKE,qBAAL,GAA6B,CAAAD,OAAO,SAAP,IAAAA,OAAO,WAAP,YAAAA,OAAO,CAAEC,qBAAT,KAAkCvB,uBAA/D;AACD;;AAEe,QAAVwB,UAAU,GAAkB;AAChC,UAAMZ,MAAM,GAAG,MAAM,KAAKO,IAAL,CAAU,CAAV,EAAahC,aAAa,CAAC2B,MAA3B,CAArB;AAEA,UAAMW,KAAK,GAAGb,MAAM,CAACc,QAAP,EAAd;;AACA,YAAQD,KAAR;AACE,WAAKtC,aAAL;AACE;;AACF,WAAKC,uBAAL;AACE,cAAM,IAAIuC,KAAJ,CAAU,sCAAV,CAAN;;AACF;AACE,cAAM,IAAIA,KAAJ,CAAW,+BAA8BF,KAAM,GAA/C,CAAN;AANJ;AAQD;;AAEiB,QAAZG,YAAY,CAChBC,MADgB,EAEhBC,QAFgB,EAGhBC,UAHgB,EAIQ;AACxB,UAAMnB,MAAqB,GAAG;AAC5BoB,MAAAA,QAAQ,EAAEC,MAAM,CAACH,QAAQ,CAACI,QAAV,CADY;AAE5BC,MAAAA,UAAU,EAAE;AAFgB,KAA9B;;AAIA,SAAK,MAAMC,QAAX,IAAuBN,QAAQ,CAACO,OAAhC,EAAyC;AACvC,YAAMC,WAAW,GAAGF,QAAQ,CAACG,SAA7B;AACA,YAAMC,MAAM,GAAGF,WAAH,aAAGA,WAAH,uBAAGA,WAAW,CAAEG,cAA5B;;AACA,UAAIV,UAAU,CAACjB,MAAX,GAAoB,CAApB,IAAyBjB,YAAY,CAACkC,UAAD,EAAaS,MAAb,CAAZ,GAAoC,CAAjE,EAAoE;AAClE;AACD;;AACD5B,MAAAA,MAAM,CAACuB,UAAP,CAAkBK,MAAM,CAAEE,IAAR,EAAlB,IAAoC,MAAM,KAAKC,eAAL,CAAqBd,MAArB,EAA6BO,QAA7B,CAA1C;AACD;;AACD,WAAOxB,MAAP;AACD;;AAOoB,QAAf+B,eAAe,CAACd,MAAD,EAAwBO,QAAxB,EAAqE;AAAA;;AACxF,QAAIA,QAAQ,CAACQ,SAAT,KAAuBpC,SAAvB,IAAoC4B,QAAQ,CAACQ,SAAT,KAAuB,IAA/D,EAAqE;AACnE,YAAM,IAAIjB,KAAJ,CAAU,uCAAV,CAAN;AACD;;AAED,UAAMkB,KAAK,GAAGhB,MAAM,CAACiB,SAAP,wBAAiBV,QAAQ,CAACG,SAA1B,wDAAiB,oBAAoBE,cAArC,CAAd;AACA,UAAMM,IAAmB,GAAGnD,aAAa,CAACN,IAAD,0BAAO8C,QAAQ,CAACG,SAAhB,yDAAO,qBAAoBQ,IAA3B,CAAzC;;AAEA,QAAIA,IAAI,KAAKF,KAAK,CAACG,aAAnB,EAAkC;AAChC,YAAM,IAAIrB,KAAJ,CAAW,mCAAkCoB,IAAK,EAAlD,CAAN;AACD;;AAED,UAAME,WAA+B,GAAGrD,aAAa,CACnDP,gBADmD,0BAEnD+C,QAAQ,CAACG,SAF0C,yDAEnD,qBAAoBW,KAF+B,CAArD;AAKA,UAAMC,WAAW,GAAGlB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBa,gBAArB,CAA1B;AACA,QAAIC,SAAS,GAAGpB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBe,qBAArB,CAAtB;;AAEA,QAAI,CAAClB,QAAQ,CAACQ,SAAd,EAAyB;AAAA;;AACvBS,MAAAA,SAAS,GAAGE,IAAI,CAACC,GAAL,CACV,KAAKnC,QAAL,GAAgB8B,WADN,EAEVlB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBe,qBAArB,CAFI,CAAZ;AAID;;AAED,UAAMhC,OAAuB,GAAG;AAC9ByB,MAAAA,IAD8B;AAE9BU,MAAAA,SAAS,EAAEZ,KAAK,CAACY,SAFa;AAG9BC,MAAAA,SAAS,EAAEb,KAAK,CAACa,SAHa;AAI9BT,MAAAA,WAJ8B;AAK9BU,MAAAA,MAAM,EAAEd,KALsB;AAM9Be,MAAAA,SAAS,0BAAExB,QAAQ,CAACG,SAAX,yDAAE,qBAAoBsB,UAND;AAO9BC,MAAAA,UAAU,EAAE;AAPkB,KAAhC;AAUA,QAAIA,UAAJ;AAEA,UAAMC,oBAAoB,GAAG3B,QAAH,aAAGA,QAAH,+CAAGA,QAAQ,CAAEG,SAAb,yDAAG,qBAAqByB,sBAAlD;;AAEA,QAAID,oBAAJ,EAA0B;AACxB,YAAME,gBAAgB,GAAGhC,MAAM,CAAC8B,oBAAD,CAA/B;AAEAD,MAAAA,UAAU,GAAG,MAAM,KAAKI,aAAL,CAAmBD,gBAAnB,EAAqC3C,OAArC,EAA8C6B,WAA9C,CAAnB;AACD;;AAEDW,IAAAA,UAAU,GAAG,uBAAAxC,OAAO,CAACwC,UAAR,oEAAoBhD,MAApB,GAA6BQ,OAAO,CAACwC,UAArC,GAAkDA,UAA/D;AACA,UAAMK,QAAQ,GAAG,MAAM,KAAKhD,IAAL,CAAUgC,WAAV,EAAuBE,SAAvB,CAAvB;AACA,WAAO,MAAMvD,eAAe,CAACqE,QAAD,EAAW,EAAC,GAAG7C,OAAJ;AAAawC,MAAAA;AAAb,KAAX,CAA5B;AACD;;AASkB,QAAbI,aAAa,CACjBH,oBADiB,EAEjBzC,OAFiB,EAGjB6B,WAHiB,EAIE;AACnB,QAAIY,oBAAoB,KAAK,CAA7B,EAAgC;AAQ9B,aAAO,EAAP;AACD;;AAED,UAAMK,cAAc,GAAGb,IAAI,CAACC,GAAL,CACrB,KAAKnC,QAAL,GAAgB0C,oBADK,EAErB,KAAKxC,qBAFgB,CAAvB;AAIA,UAAM4C,QAAQ,GAAG,MAAM,KAAKhD,IAAL,CAAU4C,oBAAV,EAAgCK,cAAhC,CAAvB;AAEA,UAAMC,MAAM,GAAG;AAACzD,MAAAA,MAAM,EAAEuD,QAAT;AAAmBG,MAAAA,MAAM,EAAE,CAA3B;AAA8B5D,MAAAA,IAAI,EAAEyD,QAAQ,CAACrD;AAA7C,KAAf;AACA,UAAMyD,WAAW,GAAG,MAAMxE,UAAU,CAACsE,MAAD,EAAS/C,OAAT,CAApC;AAEA,WAAOiD,WAAW,CAACT,UAAnB;AACD;;AAEe,QAAVU,UAAU,GAA0B;AACxC,UAAMC,UAAU,GAAGtF,aAAa,CAAC2B,MAAd,GAAuB,CAA1C;AACA,UAAM4D,UAAU,GAAG,MAAM,KAAKvD,IAAL,CAAU,KAAKE,QAAL,GAAgBoD,UAA1B,EAAsCA,UAAtC,CAAzB;AAEA,UAAMhD,KAAK,GAAGiD,UAAU,CAACzD,KAAX,CAAiB,CAAjB,EAAoBS,QAApB,EAAd;;AACA,QAAID,KAAK,KAAKtC,aAAd,EAA6B;AAC3B,YAAM,IAAIwC,KAAJ,CAAW,oCAAmCF,KAAM,GAApD,CAAN;AACD;;AAED,UAAMkD,YAAY,GAAGD,UAAU,CAACE,YAAX,CAAwB,CAAxB,CAArB;AACA,UAAMC,cAAc,GAAG,KAAKxD,QAAL,GAAgBsD,YAAhB,GAA+BF,UAAtD;;AACA,QAAII,cAAc,GAAG1F,aAAa,CAAC2B,MAAnC,EAA2C;AACzC,YAAM,IAAIa,KAAJ,CAAW,yBAAwBkD,cAAe,EAAlD,CAAN;AACD;;AAED,UAAMC,WAAW,GAAG,MAAM,KAAK3D,IAAL,CAAU0D,cAAV,EAA0BF,YAA1B,CAA1B;AAGA,UAAM;AAACI,MAAAA;AAAD,QAAapF,kBAAkB,CAACmF,WAAD,CAArC;AACA,WAAOC,QAAP;AACD;;AAzLgC","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {ParquetSchema} from '../schema/schema';\nimport {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';\nimport {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';\nimport {\n ParquetBuffer,\n ParquetCompression,\n ParquetData,\n PrimitiveType,\n ParquetOptions\n} from '../schema/declare';\nimport {fstat, fopen, fread, fclose} from '../utils/file-utils';\nimport {decodeFileMetadata, getThriftEnum, fieldIndexOf} from '../utils/read-utils';\nimport {decodeDataPages, decodePage} from './decoders';\n\nconst DEFAULT_DICTIONARY_SIZE = 1e6;\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n public defaultDictionarySize: number;\n\n static async openFile(filePath: string): Promise<ParquetEnvelopeReader> {\n const fileStat = await fstat(filePath);\n const fileDescriptor = await fopen(filePath);\n\n const readFn = fread.bind(undefined, fileDescriptor);\n const closeFn = fclose.bind(undefined, fileDescriptor);\n\n return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);\n }\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number,\n options?: any\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n this.defaultDictionarySize = options?.defaultDictionarySize || DEFAULT_DICTIONARY_SIZE;\n }\n\n async readHeader(): Promise<void> {\n const buffer = await this.read(0, PARQUET_MAGIC.length);\n\n const magic = buffer.toString();\n switch (magic) {\n case PARQUET_MAGIC:\n break;\n case PARQUET_MAGIC_ENCRYPTED:\n throw new Error('Encrypted parquet file not supported');\n default:\n throw new Error(`Invalid parquet file (magic=${magic})`);\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n /**\n * Do reading of parquet file's column chunk\n * @param schema\n * @param colChunk\n */\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n\n if (type !== field.primitiveType) {\n throw new Error(`chunk type not matching schema: ${type}`);\n }\n\n const compression: ParquetCompression = getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n let pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n\n if (!colChunk.file_path) {\n pagesSize = Math.min(\n this.fileSize - pagesOffset,\n Number(colChunk.meta_data?.total_compressed_size)\n );\n }\n\n const options: ParquetOptions = {\n type,\n rLevelMax: field.rLevelMax,\n dLevelMax: field.dLevelMax,\n compression,\n column: field,\n numValues: colChunk.meta_data?.num_values,\n dictionary: []\n };\n\n let dictionary;\n\n const dictionaryPageOffset = colChunk?.meta_data?.dictionary_page_offset;\n\n if (dictionaryPageOffset) {\n const dictionaryOffset = Number(dictionaryPageOffset);\n // Getting dictionary from column chunk to iterate all over indexes to get dataPage values.\n dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);\n }\n\n dictionary = options.dictionary?.length ? options.dictionary : dictionary;\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n return await decodeDataPages(pagesBuf, {...options, dictionary});\n }\n\n /**\n * Getting dictionary for allows to flatten values by indices.\n * @param dictionaryPageOffset\n * @param options\n * @param pagesOffset\n * @returns\n */\n async getDictionary(\n dictionaryPageOffset: number,\n options: ParquetOptions,\n pagesOffset: number\n ): Promise<string[]> {\n if (dictionaryPageOffset === 0) {\n // dictionarySize = Math.min(this.fileSize - pagesOffset, this.defaultDictionarySize);\n // pagesBuf = await this.read(pagesOffset, dictionarySize);\n\n // In this case we are working with parquet-mr files format. Problem is described below:\n // https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding\n // We need to get dictionary page from column chunk if it exists.\n // Now if we use code commented above we don't get DICTIONARY_PAGE we get DATA_PAGE instead.\n return [];\n }\n\n const dictionarySize = Math.min(\n this.fileSize - dictionaryPageOffset,\n this.defaultDictionarySize\n );\n const pagesBuf = await this.read(dictionaryPageOffset, dictionarySize);\n\n const cursor = {buffer: pagesBuf, offset: 0, size: pagesBuf.length};\n const decodedPage = await decodePage(cursor, options);\n\n return decodedPage.dictionary!;\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n const magic = trailerBuf.slice(4).toString();\n if (magic !== PARQUET_MAGIC) {\n throw new Error(`Not a valid parquet file (magic=\"${magic})`);\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error(`Invalid metadata size ${metadataOffset}`);\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n"],"file":"parquet-envelope-reader.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/parser/parquet-envelope-reader.ts"],"names":["PARQUET_MAGIC","PARQUET_MAGIC_ENCRYPTED","CompressionCodec","Type","decodeFileMetadata","getThriftEnum","fieldIndexOf","decodeDataPages","decodePage","DEFAULT_DICTIONARY_SIZE","ParquetEnvelopeReader","openBuffer","buffer","readFn","position","length","Promise","resolve","slice","closeFn","constructor","read","close","fileSize","options","defaultDictionarySize","readHeader","magic","toString","Error","readRowGroup","schema","rowGroup","columnList","rowCount","Number","num_rows","columnData","colChunk","columns","colMetadata","meta_data","colKey","path_in_schema","join","readColumnChunk","file_path","undefined","field","findField","type","primitiveType","compression","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","Math","min","rLevelMax","dLevelMax","column","numValues","num_values","dictionary","dictionaryPageOffset","dictionary_page_offset","dictionaryOffset","getDictionary","pagesBuf","dictionarySize","cursor","offset","size","decodedPage","readFooter","trailerLen","trailerBuf","metadataSize","readUInt32LE","metadataOffset","metadataBuf","metadata"],"mappings":";AAEA,SAAQA,aAAR,EAAuBC,uBAAvB,QAAqD,iBAArD;AACA,SAAqBC,gBAArB,EAA+DC,IAA/D,QAA0E,mBAA1E;AAQA,SAAQC,kBAAR,EAA4BC,aAA5B,EAA2CC,YAA3C,QAA8D,qBAA9D;AACA,SAAQC,eAAR,EAAyBC,UAAzB,QAA0C,YAA1C;AAEA,MAAMC,uBAAuB,GAAG,GAAhC;AAQA,OAAO,MAAMC,qBAAN,CAA4B;AAUV,eAAVC,UAAU,CAACC,MAAD,EAAiD;AACtE,UAAMC,MAAM,GAAG,CAACC,QAAD,EAAmBC,MAAnB,KACbC,OAAO,CAACC,OAAR,CAAgBL,MAAM,CAACM,KAAP,CAAaJ,QAAb,EAAuBA,QAAQ,GAAGC,MAAlC,CAAhB,CADF;;AAEA,UAAMI,OAAO,GAAG,MAAMH,OAAO,CAACC,OAAR,EAAtB;;AACA,WAAO,IAAIP,qBAAJ,CAA0BG,MAA1B,EAAkCM,OAAlC,EAA2CP,MAAM,CAACG,MAAlD,CAAP;AACD;;AAEDK,EAAAA,WAAW,CACTC,IADS,EAETC,KAFS,EAGTC,QAHS,EAITC,OAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKH,IAAL,GAAYA,IAAZ;AACA,SAAKC,KAAL,GAAaA,KAAb;AACA,SAAKC,QAAL,GAAgBA,QAAhB;AACA,SAAKE,qBAAL,GAA6B,CAAAD,OAAO,SAAP,IAAAA,OAAO,WAAP,YAAAA,OAAO,CAAEC,qBAAT,KAAkChB,uBAA/D;AACD;;AAEe,QAAViB,UAAU,GAAkB;AAChC,UAAMd,MAAM,GAAG,MAAM,KAAKS,IAAL,CAAU,CAAV,EAAarB,aAAa,CAACe,MAA3B,CAArB;AAEA,UAAMY,KAAK,GAAGf,MAAM,CAACgB,QAAP,EAAd;;AACA,YAAQD,KAAR;AACE,WAAK3B,aAAL;AACE;;AACF,WAAKC,uBAAL;AACE,cAAM,IAAI4B,KAAJ,CAAU,sCAAV,CAAN;;AACF;AACE,cAAM,IAAIA,KAAJ,uCAAyCF,KAAzC,OAAN;AANJ;AAQD;;AAEiB,QAAZG,YAAY,CAChBC,MADgB,EAEhBC,QAFgB,EAGhBC,UAHgB,EAIQ;AACxB,UAAMrB,MAAqB,GAAG;AAC5BsB,MAAAA,QAAQ,EAAEC,MAAM,CAACH,QAAQ,CAACI,QAAV,CADY;AAE5BC,MAAAA,UAAU,EAAE;AAFgB,KAA9B;;AAIA,SAAK,MAAMC,QAAX,IAAuBN,QAAQ,CAACO,OAAhC,EAAyC;AACvC,YAAMC,WAAW,GAAGF,QAAQ,CAACG,SAA7B;AACA,YAAMC,MAAM,GAAGF,WAAH,aAAGA,WAAH,uBAAGA,WAAW,CAAEG,cAA5B;;AACA,UAAIV,UAAU,CAAClB,MAAX,GAAoB,CAApB,IAAyBT,YAAY,CAAC2B,UAAD,EAAaS,MAAb,CAAZ,GAAoC,CAAjE,EAAoE;AAClE;AACD;;AACD9B,MAAAA,MAAM,CAACyB,UAAP,CAAkBK,MAAM,CAAEE,IAAR,EAAlB,IAAoC,MAAM,KAAKC,eAAL,CAAqBd,MAArB,EAA6BO,QAA7B,CAA1C;AACD;;AACD,WAAO1B,MAAP;AACD;;AAOoB,QAAfiC,eAAe,CAACd,MAAD,EAAwBO,QAAxB,EAAqE;AAAA;;AACxF,QAAIA,QAAQ,CAACQ,SAAT,KAAuBC,SAAvB,IAAoCT,QAAQ,CAACQ,SAAT,KAAuB,IAA/D,EAAqE;AACnE,YAAM,IAAIjB,KAAJ,CAAU,uCAAV,CAAN;AACD;;AAED,UAAMmB,KAAK,GAAGjB,MAAM,CAACkB,SAAP,wBAAiBX,QAAQ,CAACG,SAA1B,wDAAiB,oBAAoBE,cAArC,CAAd;AACA,UAAMO,IAAmB,GAAG7C,aAAa,CAACF,IAAD,0BAAOmC,QAAQ,CAACG,SAAhB,yDAAO,qBAAoBS,IAA3B,CAAzC;;AAEA,QAAIA,IAAI,KAAKF,KAAK,CAACG,aAAnB,EAAkC;AAChC,YAAM,IAAItB,KAAJ,2CAA6CqB,IAA7C,EAAN;AACD;;AAED,UAAME,WAA+B,GAAG/C,aAAa,CACnDH,gBADmD,0BAEnDoC,QAAQ,CAACG,SAF0C,yDAEnD,qBAAoBY,KAF+B,CAArD;AAKA,UAAMC,WAAW,GAAGnB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBc,gBAArB,CAA1B;AACA,QAAIC,SAAS,GAAGrB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBgB,qBAArB,CAAtB;;AAEA,QAAI,CAACnB,QAAQ,CAACQ,SAAd,EAAyB;AAAA;;AACvBU,MAAAA,SAAS,GAAGE,IAAI,CAACC,GAAL,CACV,KAAKpC,QAAL,GAAgB+B,WADN,EAEVnB,MAAM,yBAACG,QAAQ,CAACG,SAAV,yDAAC,qBAAoBgB,qBAArB,CAFI,CAAZ;AAID;;AAED,UAAMjC,OAAuB,GAAG;AAC9B0B,MAAAA,IAD8B;AAE9BU,MAAAA,SAAS,EAAEZ,KAAK,CAACY,SAFa;AAG9BC,MAAAA,SAAS,EAAEb,KAAK,CAACa,SAHa;AAI9BT,MAAAA,WAJ8B;AAK9BU,MAAAA,MAAM,EAAEd,KALsB;AAM9Be,MAAAA,SAAS,0BAAEzB,QAAQ,CAACG,SAAX,yDAAE,qBAAoBuB,UAND;AAO9BC,MAAAA,UAAU,EAAE;AAPkB,KAAhC;AAUA,QAAIA,UAAJ;AAEA,UAAMC,oBAAoB,GAAG5B,QAAH,aAAGA,QAAH,+CAAGA,QAAQ,CAAEG,SAAb,yDAAG,qBAAqB0B,sBAAlD;;AAEA,QAAID,oBAAJ,EAA0B;AACxB,YAAME,gBAAgB,GAAGjC,MAAM,CAAC+B,oBAAD,CAA/B;AAEAD,MAAAA,UAAU,GAAG,MAAM,KAAKI,aAAL,CAAmBD,gBAAnB,EAAqC5C,OAArC,EAA8C8B,WAA9C,CAAnB;AACD;;AAEDW,IAAAA,UAAU,GAAG,uBAAAzC,OAAO,CAACyC,UAAR,oEAAoBlD,MAApB,GAA6BS,OAAO,CAACyC,UAArC,GAAkDA,UAA/D;AACA,UAAMK,QAAQ,GAAG,MAAM,KAAKjD,IAAL,CAAUiC,WAAV,EAAuBE,SAAvB,CAAvB;AACA,WAAO,MAAMjD,eAAe,CAAC+D,QAAD,EAAW,EAAC,GAAG9C,OAAJ;AAAayC,MAAAA;AAAb,KAAX,CAA5B;AACD;;AASkB,QAAbI,aAAa,CACjBH,oBADiB,EAEjB1C,OAFiB,EAGjB8B,WAHiB,EAIE;AACnB,QAAIY,oBAAoB,KAAK,CAA7B,EAAgC;AAQ9B,aAAO,EAAP;AACD;;AAED,UAAMK,cAAc,GAAGb,IAAI,CAACC,GAAL,CACrB,KAAKpC,QAAL,GAAgB2C,oBADK,EAErB,KAAKzC,qBAFgB,CAAvB;AAIA,UAAM6C,QAAQ,GAAG,MAAM,KAAKjD,IAAL,CAAU6C,oBAAV,EAAgCK,cAAhC,CAAvB;AAEA,UAAMC,MAAM,GAAG;AAAC5D,MAAAA,MAAM,EAAE0D,QAAT;AAAmBG,MAAAA,MAAM,EAAE,CAA3B;AAA8BC,MAAAA,IAAI,EAAEJ,QAAQ,CAACvD;AAA7C,KAAf;AACA,UAAM4D,WAAW,GAAG,MAAMnE,UAAU,CAACgE,MAAD,EAAShD,OAAT,CAApC;AAEA,WAAOmD,WAAW,CAACV,UAAnB;AACD;;AAEe,QAAVW,UAAU,GAA0B;AACxC,UAAMC,UAAU,GAAG7E,aAAa,CAACe,MAAd,GAAuB,CAA1C;AACA,UAAM+D,UAAU,GAAG,MAAM,KAAKzD,IAAL,CAAU,KAAKE,QAAL,GAAgBsD,UAA1B,EAAsCA,UAAtC,CAAzB;AAEA,UAAMlD,KAAK,GAAGmD,UAAU,CAAC5D,KAAX,CAAiB,CAAjB,EAAoBU,QAApB,EAAd;;AACA,QAAID,KAAK,KAAK3B,aAAd,EAA6B;AAC3B,YAAM,IAAI6B,KAAJ,6CAA8CF,KAA9C,OAAN;AACD;;AAED,UAAMoD,YAAY,GAAGD,UAAU,CAACE,YAAX,CAAwB,CAAxB,CAArB;AACA,UAAMC,cAAc,GAAG,KAAK1D,QAAL,GAAgBwD,YAAhB,GAA+BF,UAAtD;;AACA,QAAII,cAAc,GAAGjF,aAAa,CAACe,MAAnC,EAA2C;AACzC,YAAM,IAAIc,KAAJ,iCAAmCoD,cAAnC,EAAN;AACD;;AAED,UAAMC,WAAW,GAAG,MAAM,KAAK7D,IAAL,CAAU4D,cAAV,EAA0BF,YAA1B,CAA1B;AAGA,UAAM;AAACI,MAAAA;AAAD,QAAa/E,kBAAkB,CAAC8E,WAAD,CAArC;AACA,WAAOC,QAAP;AACD;;AA/KgC","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {ParquetSchema} from '../schema/schema';\nimport {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';\nimport {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';\nimport {\n ParquetBuffer,\n ParquetCompression,\n ParquetData,\n PrimitiveType,\n ParquetOptions\n} from '../schema/declare';\nimport {decodeFileMetadata, getThriftEnum, fieldIndexOf} from '../utils/read-utils';\nimport {decodeDataPages, decodePage} from './decoders';\n\nconst DEFAULT_DICTIONARY_SIZE = 1e6;\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n public defaultDictionarySize: number;\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number,\n options?: any\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n this.defaultDictionarySize = options?.defaultDictionarySize || DEFAULT_DICTIONARY_SIZE;\n }\n\n async readHeader(): Promise<void> {\n const buffer = await this.read(0, PARQUET_MAGIC.length);\n\n const magic = buffer.toString();\n switch (magic) {\n case PARQUET_MAGIC:\n break;\n case PARQUET_MAGIC_ENCRYPTED:\n throw new Error('Encrypted parquet file not supported');\n default:\n throw new Error(`Invalid parquet file (magic=${magic})`);\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n /**\n * Do reading of parquet file's column chunk\n * @param schema\n * @param colChunk\n */\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n\n if (type !== field.primitiveType) {\n throw new Error(`chunk type not matching schema: ${type}`);\n }\n\n const compression: ParquetCompression = getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n let pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n\n if (!colChunk.file_path) {\n pagesSize = Math.min(\n this.fileSize - pagesOffset,\n Number(colChunk.meta_data?.total_compressed_size)\n );\n }\n\n const options: ParquetOptions = {\n type,\n rLevelMax: field.rLevelMax,\n dLevelMax: field.dLevelMax,\n compression,\n column: field,\n numValues: colChunk.meta_data?.num_values,\n dictionary: []\n };\n\n let dictionary;\n\n const dictionaryPageOffset = colChunk?.meta_data?.dictionary_page_offset;\n\n if (dictionaryPageOffset) {\n const dictionaryOffset = Number(dictionaryPageOffset);\n // Getting dictionary from column chunk to iterate all over indexes to get dataPage values.\n dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);\n }\n\n dictionary = options.dictionary?.length ? options.dictionary : dictionary;\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n return await decodeDataPages(pagesBuf, {...options, dictionary});\n }\n\n /**\n * Getting dictionary for allows to flatten values by indices.\n * @param dictionaryPageOffset\n * @param options\n * @param pagesOffset\n * @returns\n */\n async getDictionary(\n dictionaryPageOffset: number,\n options: ParquetOptions,\n pagesOffset: number\n ): Promise<string[]> {\n if (dictionaryPageOffset === 0) {\n // dictionarySize = Math.min(this.fileSize - pagesOffset, this.defaultDictionarySize);\n // pagesBuf = await this.read(pagesOffset, dictionarySize);\n\n // In this case we are working with parquet-mr files format. Problem is described below:\n // https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding\n // We need to get dictionary page from column chunk if it exists.\n // Now if we use code commented above we don't get DICTIONARY_PAGE we get DATA_PAGE instead.\n return [];\n }\n\n const dictionarySize = Math.min(\n this.fileSize - dictionaryPageOffset,\n this.defaultDictionarySize\n );\n const pagesBuf = await this.read(dictionaryPageOffset, dictionarySize);\n\n const cursor = {buffer: pagesBuf, offset: 0, size: pagesBuf.length};\n const decodedPage = await decodePage(cursor, options);\n\n return decodedPage.dictionary!;\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n const magic = trailerBuf.slice(4).toString();\n if (magic !== PARQUET_MAGIC) {\n throw new Error(`Not a valid parquet file (magic=\"${magic})`);\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error(`Invalid metadata size ${metadataOffset}`);\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n"],"file":"parquet-envelope-reader.js"}
@@ -48,19 +48,6 @@ export class ParquetReader {
48
48
  }
49
49
  }
50
50
 
51
- static async openFile(filePath) {
52
- const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);
53
-
54
- try {
55
- await envelopeReader.readHeader();
56
- const metadata = await envelopeReader.readFooter();
57
- return new ParquetReader(metadata, envelopeReader);
58
- } catch (err) {
59
- await envelopeReader.close();
60
- throw err;
61
- }
62
- }
63
-
64
51
  static async openBuffer(buffer) {
65
52
  const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);
66
53
 
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/parser/parquet-reader.ts"],"names":["ParquetEnvelopeReader","ParquetSchema","ParquetCursor","PARQUET_VERSION","decodeSchema","Symbol","asyncIterator","ParquetReader","openBlob","blob","readFn","start","length","arrayBuffer","slice","Buffer","from","closeFn","size","envelopeReader","readHeader","metadata","readFooter","err","close","openArrayBuffer","byteLength","openFile","filePath","openBuffer","buffer","constructor","version","Error","root","schema","num_children","getCursor","columnList","map","x","Array","isArray","getRowCount","Number","num_rows","getSchema","getMetadata","md","kv","key_value_metadata","key","value"],"mappings":";;;;AACA,SAAQA,qBAAR,QAAoC,2BAApC;AAEA,SAAQC,aAAR,QAA4B,kBAA5B;AACA,SAAQC,aAAR,QAA4B,kBAA5B;AACA,SAAQC,eAAR,QAA8B,iBAA9B;AACA,SAAQC,YAAR,QAA2B,YAA3B;wBAyKGC,MAAM,CAACC,a;AAhKV,OAAO,MAAMC,aAAN,CAAmD;AAInC,eAARC,QAAQ,CAAIC,IAAJ,EAA2C;AAC9D,UAAMC,MAAM,GAAG,OAAOC,KAAP,EAAsBC,MAAtB,KAAyC;AACtD,YAAMC,WAAW,GAAG,MAAMJ,IAAI,CAACK,KAAL,CAAWH,KAAX,EAAkBA,KAAK,GAAGC,MAA1B,EAAkCC,WAAlC,EAA1B;AACA,aAAOE,MAAM,CAACC,IAAP,CAAYH,WAAZ,CAAP;AACD,KAHD;;AAIA,UAAMI,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGT,IAAI,CAACS,IAAlB;AACA,UAAMC,cAAc,GAAG,IAAInB,qBAAJ,CAA0BU,MAA1B,EAAkCO,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMC,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAkBc,QAAlB,EAA4BF,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAK2B,eAAfE,eAAe,CAAIZ,WAAJ,EAAyD;AACnF,UAAMH,MAAM,GAAG,OAAOC,KAAP,EAAsBC,MAAtB,KAAyCG,MAAM,CAACC,IAAP,CAAYH,WAAZ,EAAyBF,KAAzB,EAAgCC,MAAhC,CAAxD;;AACA,UAAMK,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGL,WAAW,CAACa,UAAzB;AACA,UAAMP,cAAc,GAAG,IAAInB,qBAAJ,CAA0BU,MAA1B,EAAkCO,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMC,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAkBc,QAAlB,EAA4BF,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAMoB,eAARI,QAAQ,CAAIC,QAAJ,EAAiD;AACpE,UAAMT,cAAc,GAAG,MAAMnB,qBAAqB,CAAC2B,QAAtB,CAA+BC,QAA/B,CAA7B;;AACA,QAAI;AACF,YAAMT,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAqBc,QAArB,EAA+BF,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAEsB,eAAVM,UAAU,CAAIC,MAAJ,EAA+C;AACpE,UAAMX,cAAc,GAAG,MAAMnB,qBAAqB,CAAC6B,UAAtB,CAAiCC,MAAjC,CAA7B;;AACA,QAAI;AACF,YAAMX,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAqBc,QAArB,EAA+BF,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAYDQ,EAAAA,WAAW,CAACV,QAAD,EAAyBF,cAAzB,EAAgE;AAAA;;AAAA;;AAAA;;AACzE,QAAIE,QAAQ,CAACW,OAAT,KAAqB7B,eAAzB,EAA0C;AACxC,YAAM,IAAI8B,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAED,SAAKZ,QAAL,GAAgBA,QAAhB;AACA,SAAKF,cAAL,GAAsBA,cAAtB;AACA,UAAMe,IAAI,GAAG,KAAKb,QAAL,CAAcc,MAAd,CAAqB,CAArB,CAAb;AACA,UAAM;AAACA,MAAAA;AAAD,QAAW/B,YAAY,CAAC,KAAKiB,QAAL,CAAcc,MAAf,EAAuB,CAAvB,EAA0BD,IAAI,CAACE,YAA/B,CAA7B;AACA,SAAKD,MAAL,GAAc,IAAIlC,aAAJ,CAAkBkC,MAAlB,CAAd;AACD;;AAMU,QAALX,KAAK,GAAkB;AAC3B,UAAM,KAAKL,cAAL,CAAoBK,KAApB,EAAN;AAGD;;AAeDa,EAAAA,SAAS,CAACC,UAAD,EAAgE;AACvE,QAAI,CAACA,UAAL,EAAiB;AAEfA,MAAAA,UAAU,GAAG,EAAb;AACD;;AAGDA,IAAAA,UAAU,GAAGA,UAAU,CAACC,GAAX,CAAgBC,CAAD,IAAQC,KAAK,CAACC,OAAN,CAAcF,CAAd,IAAmBA,CAAnB,GAAuB,CAACA,CAAD,CAA9C,CAAb;AAEA,WAAO,IAAItC,aAAJ,CACL,KAAKmB,QADA,EAEL,KAAKF,cAFA,EAGL,KAAKgB,MAHA,EAILG,UAJK,CAAP;AAMD;;AAMDK,EAAAA,WAAW,GAAW;AACpB,WAAOC,MAAM,CAAC,KAAKvB,QAAL,CAAcwB,QAAf,CAAb;AACD;;AAKDC,EAAAA,SAAS,GAAkB;AACzB,WAAO,KAAKX,MAAZ;AACD;;AAKDY,EAAAA,WAAW,GAA2B;AACpC,UAAMC,EAA0B,GAAG,EAAnC;;AACA,SAAK,MAAMC,EAAX,IAAiB,KAAK5B,QAAL,CAAc6B,kBAA/B,EAAoD;AAClDF,MAAAA,EAAE,CAACC,EAAE,CAACE,GAAJ,CAAF,GAAaF,EAAE,CAACG,KAAhB;AACD;;AACD,WAAOJ,EAAP;AACD;;AAMD,4BAA2C;AACzC,WAAO,KAAKX,SAAL,GAAiBhC,MAAM,CAACC,aAAxB,GAAP;AACD;;AAlKuD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {ParquetEnvelopeReader} from './parquet-envelope-reader';\nimport {FileMetaData} from '../parquet-thrift';\nimport {ParquetSchema} from '../schema/schema';\nimport {ParquetCursor} from './parquet-cursor';\nimport {PARQUET_VERSION} from '../../constants';\nimport {decodeSchema} from './decoders';\n\n/**\n * A parquet reader allows retrieving the rows from a parquet file in order.\n * The basic usage is to create a reader and then retrieve a cursor/iterator\n * which allows you to consume row after row until all rows have been read. It is\n * important that you call close() after you are finished reading the file to\n * avoid leaking file descriptors.\n */\nexport class ParquetReader<T> implements AsyncIterable<T> {\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openBlob<T>(blob: Blob): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => {\n const arrayBuffer = await blob.slice(start, start + length).arrayBuffer();\n return Buffer.from(arrayBuffer);\n };\n const closeFn = async () => {};\n const size = blob.size;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);\n const closeFn = async () => {};\n const size = arrayBuffer.byteLength;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n /**\n * Open the parquet file pointed to by the specified path and return a new\n * parquet reader\n */\n static async openFile<T>(filePath: string): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is not recommended to call this constructor directly except for advanced\n * and internal use cases. Consider using one of the open{File,Buffer} methods\n * instead\n */\n constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {\n if (metadata.version !== PARQUET_VERSION) {\n throw new Error('invalid parquet version');\n }\n\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n const root = this.metadata.schema[0];\n const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);\n this.schema = new ParquetSchema(schema);\n }\n\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n async close(): Promise<void> {\n await this.envelopeReader.close();\n // this.envelopeReader = null;\n // this.metadata = null;\n }\n\n /**\n * Return a cursor to the file. You may open more than one cursor and use\n * them concurrently. All cursors become invalid once close() is called on\n * the reader object.\n *\n * The required_columns parameter controls which columns are actually read\n * from disk. An empty array or no value implies all columns. A list of column\n * names means that only those columns should be loaded from disk.\n */\n getCursor(): ParquetCursor<T>;\n // @ts-ignore\n getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;\n getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;\n getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {\n if (!columnList) {\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = [];\n }\n\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));\n\n return new ParquetCursor<T>(\n this.metadata,\n this.envelopeReader,\n this.schema,\n columnList as string[][]\n );\n }\n\n /**\n * Return the number of rows in this file. Note that the number of rows is\n * not neccessarily equal to the number of rows in each column.\n */\n getRowCount(): number {\n return Number(this.metadata.num_rows);\n }\n\n /**\n * Returns the ParquetSchema for this file\n */\n getSchema(): ParquetSchema {\n return this.schema;\n }\n\n /**\n * Returns the user (key/value) metadata for this file\n */\n getMetadata(): Record<string, string> {\n const md: Record<string, string> = {};\n for (const kv of this.metadata.key_value_metadata!) {\n md[kv.key] = kv.value!;\n }\n return md;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.getCursor()[Symbol.asyncIterator]();\n }\n}\n"],"file":"parquet-reader.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/parser/parquet-reader.ts"],"names":["ParquetEnvelopeReader","ParquetSchema","ParquetCursor","PARQUET_VERSION","decodeSchema","Symbol","asyncIterator","ParquetReader","openBlob","blob","readFn","start","length","arrayBuffer","slice","Buffer","from","closeFn","size","envelopeReader","readHeader","metadata","readFooter","err","close","openArrayBuffer","byteLength","openBuffer","buffer","constructor","version","Error","root","schema","num_children","getCursor","columnList","map","x","Array","isArray","getRowCount","Number","num_rows","getSchema","getMetadata","md","kv","key_value_metadata","key","value"],"mappings":";;;;AACA,SAAQA,qBAAR,QAAoC,2BAApC;AAEA,SAAQC,aAAR,QAA4B,kBAA5B;AACA,SAAQC,aAAR,QAA4B,kBAA5B;AACA,SAAQC,eAAR,QAA8B,iBAA9B;AACA,SAAQC,YAAR,QAA2B,YAA3B;wBAyJGC,MAAM,CAACC,a;AAhJV,OAAO,MAAMC,aAAN,CAAmD;AAInC,eAARC,QAAQ,CAAIC,IAAJ,EAA2C;AAC9D,UAAMC,MAAM,GAAG,OAAOC,KAAP,EAAsBC,MAAtB,KAAyC;AACtD,YAAMC,WAAW,GAAG,MAAMJ,IAAI,CAACK,KAAL,CAAWH,KAAX,EAAkBA,KAAK,GAAGC,MAA1B,EAAkCC,WAAlC,EAA1B;AACA,aAAOE,MAAM,CAACC,IAAP,CAAYH,WAAZ,CAAP;AACD,KAHD;;AAIA,UAAMI,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGT,IAAI,CAACS,IAAlB;AACA,UAAMC,cAAc,GAAG,IAAInB,qBAAJ,CAA0BU,MAA1B,EAAkCO,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMC,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAkBc,QAAlB,EAA4BF,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAK2B,eAAfE,eAAe,CAAIZ,WAAJ,EAAyD;AACnF,UAAMH,MAAM,GAAG,OAAOC,KAAP,EAAsBC,MAAtB,KAAyCG,MAAM,CAACC,IAAP,CAAYH,WAAZ,EAAyBF,KAAzB,EAAgCC,MAAhC,CAAxD;;AACA,UAAMK,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGL,WAAW,CAACa,UAAzB;AACA,UAAMP,cAAc,GAAG,IAAInB,qBAAJ,CAA0BU,MAA1B,EAAkCO,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMC,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAkBc,QAAlB,EAA4BF,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAEsB,eAAVI,UAAU,CAAIC,MAAJ,EAA+C;AACpE,UAAMT,cAAc,GAAG,MAAMnB,qBAAqB,CAAC2B,UAAtB,CAAiCC,MAAjC,CAA7B;;AACA,QAAI;AACF,YAAMT,cAAc,CAACC,UAAf,EAAN;AACA,YAAMC,QAAQ,GAAG,MAAMF,cAAc,CAACG,UAAf,EAAvB;AACA,aAAO,IAAIf,aAAJ,CAAqBc,QAArB,EAA+BF,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOI,GAAP,EAAY;AACZ,YAAMJ,cAAc,CAACK,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAYDM,EAAAA,WAAW,CAACR,QAAD,EAAyBF,cAAzB,EAAgE;AAAA;;AAAA;;AAAA;;AACzE,QAAIE,QAAQ,CAACS,OAAT,KAAqB3B,eAAzB,EAA0C;AACxC,YAAM,IAAI4B,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAED,SAAKV,QAAL,GAAgBA,QAAhB;AACA,SAAKF,cAAL,GAAsBA,cAAtB;AACA,UAAMa,IAAI,GAAG,KAAKX,QAAL,CAAcY,MAAd,CAAqB,CAArB,CAAb;AACA,UAAM;AAACA,MAAAA;AAAD,QAAW7B,YAAY,CAAC,KAAKiB,QAAL,CAAcY,MAAf,EAAuB,CAAvB,EAA0BD,IAAI,CAACE,YAA/B,CAA7B;AACA,SAAKD,MAAL,GAAc,IAAIhC,aAAJ,CAAkBgC,MAAlB,CAAd;AACD;;AAMU,QAALT,KAAK,GAAkB;AAC3B,UAAM,KAAKL,cAAL,CAAoBK,KAApB,EAAN;AAGD;;AAeDW,EAAAA,SAAS,CAACC,UAAD,EAAgE;AACvE,QAAI,CAACA,UAAL,EAAiB;AAEfA,MAAAA,UAAU,GAAG,EAAb;AACD;;AAGDA,IAAAA,UAAU,GAAGA,UAAU,CAACC,GAAX,CAAgBC,CAAD,IAAQC,KAAK,CAACC,OAAN,CAAcF,CAAd,IAAmBA,CAAnB,GAAuB,CAACA,CAAD,CAA9C,CAAb;AAEA,WAAO,IAAIpC,aAAJ,CACL,KAAKmB,QADA,EAEL,KAAKF,cAFA,EAGL,KAAKc,MAHA,EAILG,UAJK,CAAP;AAMD;;AAMDK,EAAAA,WAAW,GAAW;AACpB,WAAOC,MAAM,CAAC,KAAKrB,QAAL,CAAcsB,QAAf,CAAb;AACD;;AAKDC,EAAAA,SAAS,GAAkB;AACzB,WAAO,KAAKX,MAAZ;AACD;;AAKDY,EAAAA,WAAW,GAA2B;AACpC,UAAMC,EAA0B,GAAG,EAAnC;;AACA,SAAK,MAAMC,EAAX,IAAiB,KAAK1B,QAAL,CAAc2B,kBAA/B,EAAoD;AAClDF,MAAAA,EAAE,CAACC,EAAE,CAACE,GAAJ,CAAF,GAAaF,EAAE,CAACG,KAAhB;AACD;;AACD,WAAOJ,EAAP;AACD;;AAMD,4BAA2C;AACzC,WAAO,KAAKX,SAAL,GAAiB9B,MAAM,CAACC,aAAxB,GAAP;AACD;;AAlJuD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {ParquetEnvelopeReader} from './parquet-envelope-reader';\nimport {FileMetaData} from '../parquet-thrift';\nimport {ParquetSchema} from '../schema/schema';\nimport {ParquetCursor} from './parquet-cursor';\nimport {PARQUET_VERSION} from '../../constants';\nimport {decodeSchema} from './decoders';\n\n/**\n * A parquet reader allows retrieving the rows from a parquet file in order.\n * The basic usage is to create a reader and then retrieve a cursor/iterator\n * which allows you to consume row after row until all rows have been read. It is\n * important that you call close() after you are finished reading the file to\n * avoid leaking file descriptors.\n */\nexport class ParquetReader<T> implements AsyncIterable<T> {\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openBlob<T>(blob: Blob): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => {\n const arrayBuffer = await blob.slice(start, start + length).arrayBuffer();\n return Buffer.from(arrayBuffer);\n };\n const closeFn = async () => {};\n const size = blob.size;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);\n const closeFn = async () => {};\n const size = arrayBuffer.byteLength;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is not recommended to call this constructor directly except for advanced\n * and internal use cases. Consider using one of the open{File,Buffer} methods\n * instead\n */\n constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {\n if (metadata.version !== PARQUET_VERSION) {\n throw new Error('invalid parquet version');\n }\n\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n const root = this.metadata.schema[0];\n const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);\n this.schema = new ParquetSchema(schema);\n }\n\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n async close(): Promise<void> {\n await this.envelopeReader.close();\n // this.envelopeReader = null;\n // this.metadata = null;\n }\n\n /**\n * Return a cursor to the file. You may open more than one cursor and use\n * them concurrently. All cursors become invalid once close() is called on\n * the reader object.\n *\n * The required_columns parameter controls which columns are actually read\n * from disk. An empty array or no value implies all columns. A list of column\n * names means that only those columns should be loaded from disk.\n */\n getCursor(): ParquetCursor<T>;\n // @ts-ignore\n getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;\n getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;\n getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {\n if (!columnList) {\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = [];\n }\n\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));\n\n return new ParquetCursor<T>(\n this.metadata,\n this.envelopeReader,\n this.schema,\n columnList as string[][]\n );\n }\n\n /**\n * Return the number of rows in this file. Note that the number of rows is\n * not neccessarily equal to the number of rows in each column.\n */\n getRowCount(): number {\n return Number(this.metadata.num_rows);\n }\n\n /**\n * Returns the ParquetSchema for this file\n */\n getSchema(): ParquetSchema {\n return this.schema;\n }\n\n /**\n * Returns the user (key/value) metadata for this file\n */\n getMetadata(): Record<string, string> {\n const md: Record<string, string> = {};\n for (const kv of this.metadata.key_value_metadata!) {\n md[kv.key] = kv.value!;\n }\n return md;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.getCursor()[Symbol.asyncIterator]();\n }\n}\n"],"file":"parquet-reader.js"}
@@ -124,19 +124,19 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
124
124
  const typeDef = PARQUET_LOGICAL_TYPES[opts.type];
125
125
 
126
126
  if (!typeDef) {
127
- throw new Error(`invalid parquet type: ${opts.type}`);
127
+ throw new Error("invalid parquet type: ".concat(opts.type));
128
128
  }
129
129
 
130
130
  opts.encoding = opts.encoding || 'PLAIN';
131
131
 
132
132
  if (!(opts.encoding in PARQUET_CODECS)) {
133
- throw new Error(`unsupported parquet encoding: ${opts.encoding}`);
133
+ throw new Error("unsupported parquet encoding: ".concat(opts.encoding));
134
134
  }
135
135
 
136
136
  opts.compression = opts.compression || 'UNCOMPRESSED';
137
137
 
138
138
  if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {
139
- throw new Error(`unsupported compression method: ${opts.compression}`);
139
+ throw new Error("unsupported compression method: ".concat(opts.compression));
140
140
  }
141
141
 
142
142
  const cpath = path.concat([name]);