@loaders.gl/parquet 3.3.0-alpha.5 → 3.3.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/dist/dist.min.js +14 -14
  2. package/dist/dist.min.js.map +2 -2
  3. package/dist/es5/bundle.js +0 -1
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +3 -1
  6. package/dist/es5/constants.js.map +1 -1
  7. package/dist/es5/index.js +23 -39
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +2 -11
  10. package/dist/es5/lib/convert-schema.js.map +1 -1
  11. package/dist/es5/lib/parse-parquet.js +29 -72
  12. package/dist/es5/lib/parse-parquet.js.map +1 -1
  13. package/dist/es5/lib/read-array-buffer.js +0 -10
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -1
  15. package/dist/es5/lib/wasm/encode-parquet-wasm.js +0 -11
  16. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
  17. package/dist/es5/lib/wasm/load-wasm/index.js +0 -1
  18. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -1
  19. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +0 -14
  20. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  21. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +0 -10
  22. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  23. package/dist/es5/lib/wasm/parse-parquet-wasm.js +1 -19
  24. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
  25. package/dist/es5/parquet-loader.js +2 -1
  26. package/dist/es5/parquet-loader.js.map +1 -1
  27. package/dist/es5/parquet-wasm-loader.js +2 -1
  28. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  29. package/dist/es5/parquet-wasm-writer.js +1 -3
  30. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  31. package/dist/es5/parquet-writer.js +1 -2
  32. package/dist/es5/parquet-writer.js.map +1 -1
  33. package/dist/es5/parquetjs/codecs/declare.js.map +1 -1
  34. package/dist/es5/parquetjs/codecs/dictionary.js +2 -9
  35. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  36. package/dist/es5/parquetjs/codecs/index.js +0 -8
  37. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  38. package/dist/es5/parquetjs/codecs/plain.js +1 -77
  39. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  40. package/dist/es5/parquetjs/codecs/rle.js +1 -39
  41. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  42. package/dist/es5/parquetjs/compression.js +5 -30
  43. package/dist/es5/parquetjs/compression.js.map +1 -1
  44. package/dist/es5/parquetjs/encoder/writer.js +31 -149
  45. package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
  46. package/dist/es5/parquetjs/file.js +3 -12
  47. package/dist/es5/parquetjs/file.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +0 -1
  49. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  50. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +0 -15
  51. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  52. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +0 -48
  53. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  54. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +0 -47
  55. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +0 -82
  57. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +0 -25
  59. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +0 -1
  61. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +0 -1
  63. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  64. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +0 -39
  65. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -51
  67. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/DateType.js +0 -15
  69. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  70. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +0 -26
  71. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -30
  73. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +0 -1
  75. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +0 -15
  77. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -1
  79. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +0 -59
  81. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +0 -15
  83. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/IntType.js +0 -26
  85. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +0 -15
  87. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +0 -26
  89. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  90. package/dist/es5/parquetjs/parquet-thrift/ListType.js +0 -15
  91. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +0 -85
  93. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/MapType.js +0 -15
  95. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +0 -15
  97. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  98. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +0 -15
  99. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  100. package/dist/es5/parquetjs/parquet-thrift/NullType.js +0 -15
  101. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  102. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +0 -25
  103. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  104. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +0 -30
  105. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  106. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +0 -54
  107. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  108. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +0 -31
  109. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  110. package/dist/es5/parquetjs/parquet-thrift/PageType.js +0 -1
  111. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -1
  112. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +0 -41
  113. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  114. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +0 -59
  115. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  116. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +0 -30
  117. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  118. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +0 -42
  119. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  120. package/dist/es5/parquetjs/parquet-thrift/StringType.js +0 -15
  121. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  122. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +0 -27
  123. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  124. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +0 -30
  125. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  126. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +0 -27
  127. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  128. package/dist/es5/parquetjs/parquet-thrift/Type.js +0 -1
  129. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -1
  130. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -15
  131. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  132. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +0 -15
  133. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  134. package/dist/es5/parquetjs/parquet-thrift/index.js +0 -86
  135. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
  136. package/dist/es5/parquetjs/parser/decoders.js +3 -82
  137. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  138. package/dist/es5/parquetjs/parser/parquet-cursor.js +5 -37
  139. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
  140. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +2 -88
  141. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  142. package/dist/es5/parquetjs/parser/parquet-reader.js +14 -67
  143. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  144. package/dist/es5/parquetjs/schema/declare.js +3 -7
  145. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  146. package/dist/es5/parquetjs/schema/schema.js +6 -34
  147. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  148. package/dist/es5/parquetjs/schema/shred.js +11 -41
  149. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  150. package/dist/es5/parquetjs/schema/types.js +3 -84
  151. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  152. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -2
  153. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
  154. package/dist/es5/parquetjs/utils/file-utils.js +1 -7
  155. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  156. package/dist/es5/parquetjs/utils/read-utils.js +6 -38
  157. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  158. package/dist/es5/workers/parquet-worker.js +0 -2
  159. package/dist/es5/workers/parquet-worker.js.map +1 -1
  160. package/dist/esm/bundle.js +1 -1
  161. package/dist/esm/bundle.js.map +1 -1
  162. package/dist/esm/constants.js +3 -0
  163. package/dist/esm/constants.js.map +1 -1
  164. package/dist/esm/index.js +10 -2
  165. package/dist/esm/index.js.map +1 -1
  166. package/dist/esm/lib/convert-schema.js +1 -7
  167. package/dist/esm/lib/convert-schema.js.map +1 -1
  168. package/dist/esm/lib/parse-parquet.js +2 -5
  169. package/dist/esm/lib/parse-parquet.js.map +1 -1
  170. package/dist/esm/lib/read-array-buffer.js +2 -1
  171. package/dist/esm/lib/read-array-buffer.js.map +1 -1
  172. package/dist/esm/lib/wasm/encode-parquet-wasm.js +1 -1
  173. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -1
  174. package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -1
  175. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  176. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  177. package/dist/esm/lib/wasm/parse-parquet-wasm.js +2 -3
  178. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
  179. package/dist/esm/parquet-loader.js +3 -1
  180. package/dist/esm/parquet-loader.js.map +1 -1
  181. package/dist/esm/parquet-wasm-loader.js +3 -1
  182. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  183. package/dist/esm/parquet-wasm-writer.js +2 -1
  184. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  185. package/dist/esm/parquet-writer.js +2 -2
  186. package/dist/esm/parquet-writer.js.map +1 -1
  187. package/dist/esm/parquetjs/codecs/declare.js.map +1 -1
  188. package/dist/esm/parquetjs/codecs/dictionary.js +2 -1
  189. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -1
  190. package/dist/esm/parquetjs/codecs/index.js +2 -0
  191. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  192. package/dist/esm/parquetjs/codecs/plain.js +2 -68
  193. package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
  194. package/dist/esm/parquetjs/codecs/rle.js +3 -29
  195. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  196. package/dist/esm/parquetjs/compression.js +9 -5
  197. package/dist/esm/parquetjs/compression.js.map +1 -1
  198. package/dist/esm/parquetjs/encoder/writer.js +21 -51
  199. package/dist/esm/parquetjs/encoder/writer.js.map +1 -1
  200. package/dist/esm/parquetjs/file.js +1 -0
  201. package/dist/esm/parquetjs/file.js.map +1 -1
  202. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  203. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  204. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +1 -8
  205. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  206. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +0 -44
  207. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  208. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +0 -42
  209. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  210. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +0 -82
  211. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  212. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +0 -18
  213. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  214. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  215. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  216. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  217. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  218. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +0 -34
  219. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  220. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -49
  221. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  222. package/dist/esm/parquetjs/parquet-thrift/DateType.js +1 -8
  223. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -1
  224. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +0 -19
  225. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  226. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -24
  227. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  228. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +1 -1
  229. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  230. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +1 -8
  231. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  232. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  233. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  234. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +2 -53
  235. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  236. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +1 -8
  237. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  238. package/dist/esm/parquetjs/parquet-thrift/IntType.js +0 -19
  239. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -1
  240. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +1 -8
  241. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  242. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +0 -19
  243. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  244. package/dist/esm/parquetjs/parquet-thrift/ListType.js +1 -8
  245. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -1
  246. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +0 -90
  247. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  248. package/dist/esm/parquetjs/parquet-thrift/MapType.js +1 -8
  249. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -1
  250. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +1 -8
  251. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  252. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +1 -8
  253. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  254. package/dist/esm/parquetjs/parquet-thrift/NullType.js +1 -8
  255. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -1
  256. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +0 -16
  257. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  258. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +0 -24
  259. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  260. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +0 -49
  261. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  262. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +0 -24
  263. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  264. package/dist/esm/parquetjs/parquet-thrift/PageType.js +1 -1
  265. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -1
  266. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +0 -33
  267. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  268. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +0 -59
  269. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  270. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +0 -24
  271. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  272. package/dist/esm/parquetjs/parquet-thrift/Statistics.js +0 -38
  273. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  274. package/dist/esm/parquetjs/parquet-thrift/StringType.js +1 -8
  275. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -1
  276. package/dist/esm/parquetjs/parquet-thrift/TimeType.js +0 -19
  277. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  278. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +0 -24
  279. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  280. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +0 -19
  281. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  282. package/dist/esm/parquetjs/parquet-thrift/Type.js +1 -1
  283. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -1
  284. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +1 -8
  285. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  286. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +1 -8
  287. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  288. package/dist/esm/parquetjs/parquet-thrift/index.js +1 -0
  289. package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -1
  290. package/dist/esm/parquetjs/parser/decoders.js +9 -39
  291. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  292. package/dist/esm/parquetjs/parser/parquet-cursor.js +1 -13
  293. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -1
  294. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +6 -32
  295. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  296. package/dist/esm/parquetjs/parser/parquet-reader.js +1 -18
  297. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  298. package/dist/esm/parquetjs/schema/declare.js +4 -4
  299. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  300. package/dist/esm/parquetjs/schema/schema.js +3 -29
  301. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  302. package/dist/esm/parquetjs/schema/shred.js +7 -22
  303. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  304. package/dist/esm/parquetjs/schema/types.js +3 -78
  305. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  306. package/dist/esm/parquetjs/utils/buffer-utils.js +2 -1
  307. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -1
  308. package/dist/esm/parquetjs/utils/file-utils.js +1 -0
  309. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  310. package/dist/esm/parquetjs/utils/read-utils.js +5 -12
  311. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -1
  312. package/dist/esm/workers/parquet-worker.js.map +1 -1
  313. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +5 -1
  314. package/dist/lib/wasm/load-wasm/load-wasm-node.js +5 -1
  315. package/dist/parquet-worker.js +14 -14
  316. package/dist/parquet-worker.js.map +2 -2
  317. package/dist/parquetjs/codecs/index.js +5 -1
  318. package/dist/parquetjs/encoder/writer.d.ts +1 -0
  319. package/dist/parquetjs/encoder/writer.d.ts.map +1 -1
  320. package/dist/parquetjs/encoder/writer.js +5 -1
  321. package/dist/parquetjs/parquet-thrift/BsonType.js +5 -1
  322. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +5 -1
  323. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -1
  324. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +5 -1
  325. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +5 -1
  326. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -1
  327. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +5 -1
  328. package/dist/parquetjs/parquet-thrift/DateType.js +5 -1
  329. package/dist/parquetjs/parquet-thrift/DecimalType.js +5 -1
  330. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +5 -1
  331. package/dist/parquetjs/parquet-thrift/EnumType.js +5 -1
  332. package/dist/parquetjs/parquet-thrift/FileMetaData.js +5 -1
  333. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +5 -1
  334. package/dist/parquetjs/parquet-thrift/IntType.js +5 -1
  335. package/dist/parquetjs/parquet-thrift/JsonType.js +5 -1
  336. package/dist/parquetjs/parquet-thrift/KeyValue.js +5 -1
  337. package/dist/parquetjs/parquet-thrift/ListType.js +5 -1
  338. package/dist/parquetjs/parquet-thrift/LogicalType.js +5 -1
  339. package/dist/parquetjs/parquet-thrift/MapType.js +5 -1
  340. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +5 -1
  341. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +5 -1
  342. package/dist/parquetjs/parquet-thrift/NullType.js +5 -1
  343. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +5 -1
  344. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +5 -1
  345. package/dist/parquetjs/parquet-thrift/PageHeader.js +5 -1
  346. package/dist/parquetjs/parquet-thrift/PageLocation.js +5 -1
  347. package/dist/parquetjs/parquet-thrift/RowGroup.js +5 -1
  348. package/dist/parquetjs/parquet-thrift/SchemaElement.js +5 -1
  349. package/dist/parquetjs/parquet-thrift/SortingColumn.js +5 -1
  350. package/dist/parquetjs/parquet-thrift/Statistics.js +5 -1
  351. package/dist/parquetjs/parquet-thrift/StringType.js +5 -1
  352. package/dist/parquetjs/parquet-thrift/TimeType.js +5 -1
  353. package/dist/parquetjs/parquet-thrift/TimeUnit.js +5 -1
  354. package/dist/parquetjs/parquet-thrift/TimestampType.js +5 -1
  355. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +5 -1
  356. package/dist/parquetjs/parquet-thrift/UUIDType.js +5 -1
  357. package/dist/parquetjs/parquet-thrift/index.js +5 -1
  358. package/dist/parquetjs/schema/shred.js +5 -1
  359. package/dist/parquetjs/utils/file-utils.d.ts +2 -0
  360. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  361. package/package.json +6 -6
@@ -1,31 +1,29 @@
1
+
2
+
1
3
  import varint from 'varint';
4
+
2
5
  export function encodeValues(type, values, opts) {
3
6
  if (!('bitWidth' in opts)) {
4
7
  throw new Error('bitWidth is required');
5
8
  }
6
-
7
9
  switch (type) {
8
10
  case 'BOOLEAN':
9
11
  case 'INT32':
10
12
  case 'INT64':
11
13
  values = values.map(x => parseInt(x, 10));
12
14
  break;
13
-
14
15
  default:
15
16
  throw new Error("unsupported type: ".concat(type));
16
17
  }
17
-
18
18
  let buf = Buffer.alloc(0);
19
19
  let run = [];
20
20
  let repeats = 0;
21
-
22
21
  for (let i = 0; i < values.length; i++) {
23
22
  if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {
24
23
  if (run.length) {
25
24
  buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
26
25
  run = [];
27
26
  }
28
-
29
27
  repeats = 1;
30
28
  } else if (repeats > 0 && values[i] === values[i - 1]) {
31
29
  repeats += 1;
@@ -34,21 +32,17 @@ export function encodeValues(type, values, opts) {
34
32
  buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);
35
33
  repeats = 0;
36
34
  }
37
-
38
35
  run.push(values[i]);
39
36
  }
40
37
  }
41
-
42
38
  if (repeats) {
43
39
  buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);
44
40
  } else if (run.length) {
45
41
  buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
46
42
  }
47
-
48
43
  if (opts.disableEnvelope) {
49
44
  return buf;
50
45
  }
51
-
52
46
  const envelope = Buffer.alloc(buf.length + 4);
53
47
  envelope.writeUInt32LE(buf.length, undefined);
54
48
  buf.copy(envelope, 4);
@@ -58,17 +52,13 @@ export function decodeValues(type, cursor, count, opts) {
58
52
  if (!('bitWidth' in opts)) {
59
53
  throw new Error('bitWidth is required');
60
54
  }
61
-
62
55
  if (!opts.disableEnvelope) {
63
56
  cursor.offset += 4;
64
57
  }
65
-
66
58
  let values = [];
67
-
68
59
  while (values.length < count) {
69
60
  const header = varint.decode(cursor.buffer, cursor.offset);
70
61
  cursor.offset += varint.encodingLength(header);
71
-
72
62
  if (header & 1) {
73
63
  const count = (header >> 1) * 8;
74
64
  values.push(...decodeRunBitpacked(cursor, count, opts));
@@ -77,39 +67,30 @@ export function decodeValues(type, cursor, count, opts) {
77
67
  values.push(...decodeRunRepeated(cursor, count, opts));
78
68
  }
79
69
  }
80
-
81
70
  values = values.slice(0, count);
82
-
83
71
  if (values.length !== count) {
84
72
  throw new Error('invalid RLE encoding');
85
73
  }
86
-
87
74
  return values;
88
75
  }
89
-
90
76
  function decodeRunBitpacked(cursor, count, opts) {
91
77
  const bitWidth = opts.bitWidth;
92
-
93
78
  if (count % 8 !== 0) {
94
79
  throw new Error('must be a multiple of 8');
95
80
  }
96
81
 
97
82
  const values = new Array(count).fill(0);
98
-
99
83
  for (let b = 0; b < bitWidth * count; b++) {
100
84
  if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & 1 << b % 8) {
101
85
  values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;
102
86
  }
103
87
  }
104
-
105
88
  cursor.offset += bitWidth * (count / 8);
106
89
  return values;
107
90
  }
108
-
109
91
  function decodeRunRepeated(cursor, count, opts) {
110
92
  const bitWidth = opts.bitWidth;
111
93
  let value = 0;
112
-
113
94
  for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {
114
95
  value << 8;
115
96
  value += cursor.buffer[cursor.offset];
@@ -118,29 +99,22 @@ function decodeRunRepeated(cursor, count, opts) {
118
99
 
119
100
  return new Array(count).fill(value);
120
101
  }
121
-
122
102
  function encodeRunBitpacked(values, opts) {
123
103
  const bitWidth = opts.bitWidth;
124
-
125
104
  for (let i = 0; i < values.length % 8; i++) {
126
105
  values.push(0);
127
106
  }
128
-
129
107
  const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));
130
-
131
108
  for (let b = 0; b < bitWidth * values.length; b++) {
132
109
  if ((values[Math.floor(b / bitWidth)] & 1 << b % bitWidth) > 0) {
133
110
  buf[Math.floor(b / 8)] |= 1 << b % 8;
134
111
  }
135
112
  }
136
-
137
113
  return Buffer.concat([Buffer.from(varint.encode(values.length / 8 << 1 | 1)), buf]);
138
114
  }
139
-
140
115
  function encodeRunRepeated(value, count, opts) {
141
116
  const bitWidth = opts.bitWidth;
142
117
  const buf = Buffer.alloc(Math.ceil(bitWidth / 8));
143
-
144
118
  for (let i = 0; i < buf.length; i++) {
145
119
  buf.writeUInt8(value & 0xff, i);
146
120
  value >> 8;
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["varint","encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":"AAIA,OAAOA,MAAP,MAAmB,QAAnB;AAGA,OAAO,SAASC,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAYC,CAAD,IAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAA1B,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,6BAA+BH,IAA/B,EAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,QAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;AAED,OAAO,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,UAAME,MAAM,GAAG7B,MAAM,CAAC8B,MAAP,CAAcJ,MAAM,CAACK,MAArB,EAA6BL,MAAM,CAACE,MAApC,CAAf;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB5B,MAAM,CAACgC,cAAP,CAAsBH,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AACd,YAAMF,KAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGc,kBAAkB,CAACP,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AACL,YAAMuB,KAAK,GAAGE,MAAM,IAAI,CAAxB;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGe,iBAAiB,CAACR,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACgC,KAAP,CAAa,CAAb,EAAgBR,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS8B,kBAAT,CACEP,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,MAAIT,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,QAAMF,MAAM,GAAG,IAAIkC,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGT,KAA/B,EAAsCY,CAAC,EAAvC,EAA2C;AACzC,QAAIb,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAAP,GAAgBY,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnEpC,MAAAA,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDV,EAAAA,MAAM,CAACE,MAAP,IAAiBQ,QAAQ,IAAIT,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAAS+B,iBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI5B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG0B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CtB,CAAC,EAA9C,EAAkD;AAEhD4B,IAAAA,KAAK,IAAI,CAAT;AACAA,IAAAA,KAAK,IAAIhB,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIS,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAASzB,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,QAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIjC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIwB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGjC,MAAM,CAACY,MAAtC,EAA8CwB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACpC,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE3B,MAAAA,GAAG,CAAC+B,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO7B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAgB1C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2BwB,KAA3B,EAA0Cf,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,QAAM3B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACqC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B5B,CAA7B;AAEA4B,IAAAA,KAAK,IAAI,CAAT;AACD;;AAED,SAAOhC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAclB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
1
+ {"version":3,"file":"rle.js","names":["varint","encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"sources":["../../../../src/parquetjs/codecs/rle.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"mappings":";;AAIA,OAAOA,MAAM,MAAM,QAAQ;;AAG3B,OAAO,SAASC,YAAY,CAC1BC,IAAmB,EACnBC,MAAa,EACbC,IAAyB,EACjB;EACR,IAAI,EAAE,UAAU,IAAIA,IAAI,CAAC,EAAE;IACzB,MAAM,IAAIC,KAAK,CAAC,sBAAsB,CAAC;EACzC;EAEA,QAAQH,IAAI;IACV,KAAK,SAAS;IACd,KAAK,OAAO;IACZ,KAAK,OAAO;MAEVC,MAAM,GAAGA,MAAM,CAACG,GAAG,CAAEC,CAAC,IAAKC,QAAQ,CAACD,CAAC,EAAE,EAAE,CAAC,CAAC;MAC3C;IAEF;MACE,MAAM,IAAIF,KAAK,6BAAsBH,IAAI,EAAG;EAAC;EAGjD,IAAIO,GAAG,GAAGC,MAAM,CAACC,KAAK,CAAC,CAAC,CAAC;EACzB,IAAIC,GAAU,GAAG,EAAE;EACnB,IAAIC,OAAO,GAAG,CAAC;EAEf,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGX,MAAM,CAACY,MAAM,EAAED,CAAC,EAAE,EAAE;IAGtC,IAAID,OAAO,KAAK,CAAC,IAAID,GAAG,CAACG,MAAM,GAAG,CAAC,KAAK,CAAC,IAAIZ,MAAM,CAACW,CAAC,CAAC,KAAKX,MAAM,CAACW,CAAC,GAAG,CAAC,CAAC,EAAE;MAExE,IAAIF,GAAG,CAACG,MAAM,EAAE;QACdN,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAEQ,kBAAkB,CAACL,GAAG,EAAER,IAAI,CAAC,CAAC,CAAC;QACzDQ,GAAG,GAAG,EAAE;MACV;MACAC,OAAO,GAAG,CAAC;IACb,CAAC,MAAM,IAAIA,OAAO,GAAG,CAAC,IAAIV,MAAM,CAACW,CAAC,CAAC,KAAKX,MAAM,CAACW,CAAC,GAAG,CAAC,CAAC,EAAE;MACrDD,OAAO,IAAI,CAAC;IACd,CAAC,MAAM;MAEL,IAAIA,OAAO,EAAE;QACXJ,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAES,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAC,CAAC,EAAED,OAAO,EAAET,IAAI,CAAC,CAAC,CAAC;QAC3ES,OAAO,GAAG,CAAC;MACb;MACAD,GAAG,CAACO,IAAI,CAAChB,MAAM,CAACW,CAAC,CAAC,CAAC;IACrB;EACF;EAEA,IAAID,OAAO,EAAE;IACXJ,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAES,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAM,GAAG,CAAC,CAAC,EAAEF,OAAO,EAAET,IAAI,CAAC,CAAC,CAAC;EACzF,CAAC,MAAM,IAAIQ,GAAG,CAACG,MAAM,EAAE;IACrBN,GAAG,GAAGC,MAAM,CAACM,MAAM,CAAC,CAACP,GAAG,EAAEQ,kBAAkB,CAACL,GAAG,EAAER,IAAI,CAAC,CAAC,CAAC;EAC3D;EAEA,IAAIA,IAAI,CAACgB,eAAe,EAAE;IACxB,OAAOX,GAAG;EACZ;EAEA,MAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAK,CAACF,GAAG,CAACM,MAAM,GAAG,CAAC,CAAC;EAC7CM,QAAQ,CAACC,aAAa,CAACb,GAAG,CAACM,MAAM,EAAEQ,SAAS,CAAC;EAC7Cd,GAAG,CAACe,IAAI,CAACH,QAAQ,EAAE,CAAC,CAAC;EAErB,OAAOA,QAAQ;AACjB;AAEA,OAAO,SAASI,YAAY,CAC1BvB,IAAmB,EACnBwB,MAAoB,EACpBC,KAAa,EACbvB,IAAyB,EACf;EACV,IAAI,EAAE,UAAU,IAAIA,IAAI,CAAC,EAAE;IACzB,MAAM,IAAIC,KAAK,CAAC,sBAAsB,CAAC;EACzC;EAEA,IAAI,CAACD,IAAI,CAACgB,eAAe,EAAE;IACzBM,MAAM,CAACE,MAAM,IAAI,CAAC;EACpB;EAEA,IAAIzB,MAAgB,GAAG,EAAE;EACzB,OAAOA,MAAM,CAACY,MAAM,GAAGY,KAAK,EAAE;IAC5B,MAAME,MAAM,GAAG7B,MAAM,CAAC8B,MAAM,CAACJ,MAAM,CAACK,MAAM,EAAEL,MAAM,CAACE,MAAM,CAAC;IAC1DF,MAAM,CAACE,MAAM,IAAI5B,MAAM,CAACgC,cAAc,CAACH,MAAM,CAAC;IAC9C,IAAIA,MAAM,GAAG,CAAC,EAAE;MACd,MAAMF,KAAK,GAAG,CAACE,MAAM,IAAI,CAAC,IAAI,CAAC;MAC/B1B,MAAM,CAACgB,IAAI,CAAC,GAAGc,kBAAkB,CAACP,MAAM,EAAEC,KAAK,EAAEvB,IAAI,CAAC,CAAC;IACzD,CAAC,MAAM;MACL,MAAMuB,KAAK,GAAGE,MAAM,IAAI,CAAC;MACzB1B,MAAM,CAACgB,IAAI,CAAC,GAAGe,iBAAiB,CAACR,MAAM,EAAEC,KAAK,EAAEvB,IAAI,CAAC,CAAC;IACxD;EACF;EACAD,MAAM,GAAGA,MAAM,CAACgC,KAAK,CAAC,CAAC,EAAER,KAAK,CAAC;EAE/B,IAAIxB,MAAM,CAACY,MAAM,KAAKY,KAAK,EAAE;IAC3B,MAAM,IAAItB,KAAK,CAAC,sBAAsB,CAAC;EACzC;EAEA,OAAOF,MAAM;AACf;AAEA,SAAS8B,kBAAkB,CACzBP,MAAoB,EACpBC,KAAa,EACbvB,IAAyB,EACf;EAEV,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,IAAIT,KAAK,GAAG,CAAC,KAAK,CAAC,EAAE;IACnB,MAAM,IAAItB,KAAK,CAAC,yBAAyB,CAAC;EAC5C;;EAGA,MAAMF,MAAM,GAAG,IAAIkC,KAAK,CAACV,KAAK,CAAC,CAACW,IAAI,CAAC,CAAC,CAAC;EACvC,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,QAAQ,GAAGT,KAAK,EAAEY,CAAC,EAAE,EAAE;IACzC,IAAIb,MAAM,CAACK,MAAM,CAACL,MAAM,CAACE,MAAM,GAAGY,IAAI,CAACC,KAAK,CAACF,CAAC,GAAG,CAAC,CAAC,CAAC,GAAI,CAAC,IAAIA,CAAC,GAAG,CAAE,EAAE;MACnEpC,MAAM,CAACqC,IAAI,CAACC,KAAK,CAACF,CAAC,GAAGH,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAIG,CAAC,GAAGH,QAAQ;IACvD;EACF;EAEAV,MAAM,CAACE,MAAM,IAAIQ,QAAQ,IAAIT,KAAK,GAAG,CAAC,CAAC;EACvC,OAAOxB,MAAM;AACf;AAEA,SAAS+B,iBAAiB,CACxBR,MAAoB,EACpBC,KAAa,EACbvB,IAAyB,EACf;EAEV,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,IAAIM,KAAK,GAAG,CAAC;EACb,KAAK,IAAI5B,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG0B,IAAI,CAACG,IAAI,CAACP,QAAQ,GAAG,CAAC,CAAC,EAAEtB,CAAC,EAAE,EAAE;IAEhD4B,KAAK,IAAI,CAAC;IACVA,KAAK,IAAIhB,MAAM,CAACK,MAAM,CAACL,MAAM,CAACE,MAAM,CAAC;IACrCF,MAAM,CAACE,MAAM,IAAI,CAAC;EACpB;;EAGA,OAAO,IAAIS,KAAK,CAACV,KAAK,CAAC,CAACW,IAAI,CAACI,KAAK,CAAC;AACrC;AAEA,SAASzB,kBAAkB,CAACd,MAAgB,EAAEC,IAAyB,EAAU;EAE/E,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,KAAK,IAAItB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGX,MAAM,CAACY,MAAM,GAAG,CAAC,EAAED,CAAC,EAAE,EAAE;IAC1CX,MAAM,CAACgB,IAAI,CAAC,CAAC,CAAC;EAChB;EAEA,MAAMV,GAAG,GAAGC,MAAM,CAACC,KAAK,CAAC6B,IAAI,CAACG,IAAI,CAACP,QAAQ,IAAIjC,MAAM,CAACY,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;EACnE,KAAK,IAAIwB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,QAAQ,GAAGjC,MAAM,CAACY,MAAM,EAAEwB,CAAC,EAAE,EAAE;IACjD,IAAI,CAACpC,MAAM,CAACqC,IAAI,CAACC,KAAK,CAACF,CAAC,GAAGH,QAAQ,CAAC,CAAC,GAAI,CAAC,IAAIG,CAAC,GAAGH,QAAS,IAAI,CAAC,EAAE;MAChE3B,GAAG,CAAC+B,IAAI,CAACC,KAAK,CAACF,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAIA,CAAC,GAAG,CAAC;IACtC;EACF;EAEA,OAAO7B,MAAM,CAACM,MAAM,CAAC,CAACN,MAAM,CAACkC,IAAI,CAAC5C,MAAM,CAAC6C,MAAM,CAAG1C,MAAM,CAACY,MAAM,GAAG,CAAC,IAAK,CAAC,GAAI,CAAC,CAAC,CAAC,EAAEN,GAAG,CAAC,CAAC;AACzF;AAEA,SAASS,iBAAiB,CAACwB,KAAa,EAAEf,KAAa,EAAEvB,IAAyB,EAAU;EAE1F,MAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAAQ;EAEtC,MAAM3B,GAAG,GAAGC,MAAM,CAACC,KAAK,CAAC6B,IAAI,CAACG,IAAI,CAACP,QAAQ,GAAG,CAAC,CAAC,CAAC;EAEjD,KAAK,IAAItB,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGL,GAAG,CAACM,MAAM,EAAED,CAAC,EAAE,EAAE;IACnCL,GAAG,CAACqC,UAAU,CAACJ,KAAK,GAAG,IAAI,EAAE5B,CAAC,CAAC;IAE/B4B,KAAK,IAAI,CAAC;EACZ;;EAEA,OAAOhC,MAAM,CAACM,MAAM,CAAC,CAACN,MAAM,CAACkC,IAAI,CAAC5C,MAAM,CAAC6C,MAAM,CAAClB,KAAK,IAAI,CAAC,CAAC,CAAC,EAAElB,GAAG,CAAC,CAAC;AACrE"}
@@ -1,11 +1,16 @@
1
+
2
+
1
3
  import { NoCompression, GZipCompression, SnappyCompression, BrotliCompression, LZOCompression, LZ4Compression, ZstdCompression } from '@loaders.gl/compression';
2
4
  import { toArrayBuffer, toBuffer } from './utils/buffer-utils';
5
+
3
6
  import lz4js from 'lz4js';
4
7
  import lzo from 'lzo';
8
+
5
9
  const modules = {
6
10
  lz4js,
7
11
  lzo
8
12
  };
13
+
9
14
  export const PARQUET_COMPRESSION_METHODS = {
10
15
  UNCOMPRESSED: new NoCompression(),
11
16
  GZIP: new GZipCompression(),
@@ -26,37 +31,36 @@ export const PARQUET_COMPRESSION_METHODS = {
26
31
  modules
27
32
  })
28
33
  };
34
+
29
35
  export async function preloadCompressions(options) {
30
36
  const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
31
37
  return await Promise.all(compressions.map(compression => compression.preload()));
32
38
  }
39
+
33
40
  export async function deflate(method, value) {
34
41
  const compression = PARQUET_COMPRESSION_METHODS[method];
35
-
36
42
  if (!compression) {
37
43
  throw new Error("parquet: invalid compression method: ".concat(method));
38
44
  }
39
-
40
45
  const inputArrayBuffer = toArrayBuffer(value);
41
46
  const compressedArrayBuffer = await compression.compress(inputArrayBuffer);
42
47
  return toBuffer(compressedArrayBuffer);
43
48
  }
49
+
44
50
  export async function decompress(method, value, size) {
45
51
  const compression = PARQUET_COMPRESSION_METHODS[method];
46
-
47
52
  if (!compression) {
48
53
  throw new Error("parquet: invalid compression method: ".concat(method));
49
54
  }
50
-
51
55
  const inputArrayBuffer = toArrayBuffer(value);
52
56
  const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);
53
57
  return toBuffer(compressedArrayBuffer);
54
58
  }
59
+
55
60
  export function inflate(method, value, size) {
56
61
  if (!(method in PARQUET_COMPRESSION_METHODS)) {
57
62
  throw new Error("invalid compression method: ".concat(method));
58
63
  }
59
-
60
64
  return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);
61
65
  }
62
66
  //# sourceMappingURL=compression.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/parquetjs/compression.ts"],"names":["NoCompression","GZipCompression","SnappyCompression","BrotliCompression","LZOCompression","LZ4Compression","ZstdCompression","toArrayBuffer","toBuffer","lz4js","lzo","modules","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","GZIP","SNAPPY","BROTLI","LZ4","LZ4_RAW","LZO","ZSTD","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","Error","inputArrayBuffer","compressedArrayBuffer","compress","decompress","size","inflate"],"mappings":"AAIA,SAEEA,aAFF,EAGEC,eAHF,EAIEC,iBAJF,EAKEC,iBALF,EAMEC,cANF,EAOEC,cAPF,EAQEC,eARF,QASO,yBATP;AAYA,SAAQC,aAAR,EAAuBC,QAAvB,QAAsC,sBAAtC;AAMA,OAAOC,KAAP,MAAkB,OAAlB;AACA,OAAOC,GAAP,MAAgB,KAAhB;AAIA,MAAMC,OAAO,GAAG;AAQdF,EAAAA,KARc;AASdC,EAAAA;AATc,CAAhB;AAcA,OAAO,MAAME,2BAAoE,GAAG;AAClFC,EAAAA,YAAY,EAAE,IAAIb,aAAJ,EADoE;AAElFc,EAAAA,IAAI,EAAE,IAAIb,eAAJ,EAF4E;AAGlFc,EAAAA,MAAM,EAAE,IAAIb,iBAAJ,EAH0E;AAIlFc,EAAAA,MAAM,EAAE,IAAIb,iBAAJ,CAAsB;AAACQ,IAAAA;AAAD,GAAtB,CAJ0E;AAMlFM,EAAAA,GAAG,EAAE,IAAIZ,cAAJ,CAAmB;AAACM,IAAAA;AAAD,GAAnB,CAN6E;AAOlFO,EAAAA,OAAO,EAAE,IAAIb,cAAJ,CAAmB;AAACM,IAAAA;AAAD,GAAnB,CAPyE;AAQlFQ,EAAAA,GAAG,EAAE,IAAIf,cAAJ,CAAmB;AAACO,IAAAA;AAAD,GAAnB,CAR6E;AASlFS,EAAAA,IAAI,EAAE,IAAId,eAAJ,CAAoB;AAACK,IAAAA;AAAD,GAApB;AAT4E,CAA7E;AAgBP,OAAO,eAAeU,mBAAf,CAAmCC,OAAnC,EAA8E;AACnF,QAAMC,YAAY,GAAGC,MAAM,CAACC,MAAP,CAAcb,2BAAd,CAArB;AACA,SAAO,MAAMc,OAAO,CAACC,GAAR,CAAYJ,YAAY,CAACK,GAAb,CAAkBC,WAAD,IAAiBA,WAAW,CAACC,OAAZ,EAAlC,CAAZ,CAAb;AACD;AAKD,OAAO,eAAeC,OAAf,CAAuBC,MAAvB,EAAmDC,KAAnD,EAAmF;AACxF,QAAMJ,WAAW,GAAGjB,2BAA2B,CAACoB,MAAD,CAA/C;;AACA,MAAI,CAACH,WAAL,EAAkB;AAChB,UAAM,IAAIK,KAAJ,gDAAkDF,MAAlD,EAAN;AACD;;AACD,QAAMG,gBAAgB,GAAG5B,aAAa,CAAC0B,KAAD,CAAtC;AACA,QAAMG,qBAAqB,GAAG,MAAMP,WAAW,CAACQ,QAAZ,CAAqBF,gBAArB,CAApC;AACA,SAAO3B,QAAQ,CAAC4B,qBAAD,CAAf;AACD;AAKD,OAAO,eAAeE,UAAf,CACLN,MADK,EAELC,KAFK,EAGLM,IAHK,EAIY;AACjB,QAAMV,WAAW,GAAGjB,2BAA2B,CAACoB,MAAD,CAA/C;;AACA,MAAI,CAACH,WAAL,EAAkB;AAChB,UAAM,IAAIK,KAAJ,gDAAkDF,MAAlD,EAAN;AACD;;AACD,QAAMG,gBAAgB,GAAG5B,aAAa,CAAC0B,KAAD,CAAtC;AACA,QAAMG,qBAAqB,GAAG,MAAMP,WAAW,CAACS,UAAZ,CAAuBH,gBAAvB,EAAyCI,IAAzC,CAApC;AACA,SAAO/B,QAAQ,CAAC4B,qBAAD,CAAf;AACD;AAKD,OAAO,SAASI,OAAT,CAAiBR,MAAjB,EAA6CC,KAA7C,EAA4DM,IAA5D,EAAkF;AACvF,MAAI,EAAEP,MAAM,IAAIpB,2BAAZ,CAAJ,EAA8C;AAC5C,UAAM,IAAIsB,KAAJ,uCAAyCF,MAAzC,EAAN;AACD;;AAED,SAAOpB,2BAA2B,CAACoB,MAAD,CAA3B,CAAoCQ,OAApC,CAA4CP,KAA5C,EAAmDM,IAAnD,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\n// Forked from https://github.com/ironSource/parquetjs under MIT license\n\nimport {\n Compression,\n NoCompression,\n GZipCompression,\n SnappyCompression,\n BrotliCompression,\n LZOCompression,\n LZ4Compression,\n ZstdCompression\n} from '@loaders.gl/compression';\n\nimport {ParquetCompression} from './schema/declare';\nimport {toArrayBuffer, toBuffer} from './utils/buffer-utils';\n\n// TODO switch to worker compression to avoid bundling...\n\n// import brotli from 'brotli'; - brotli has problems with decompress in browsers\n// import brotliDecompress from 'brotli/decompress';\nimport lz4js from 'lz4js';\nimport lzo from 'lzo';\n// import {ZstdCodec} from 'zstd-codec';\n\n// Inject large dependencies through Compression constructor options\nconst modules = {\n // brotli has problems with decompress in browsers\n // brotli: {\n // decompress: brotliDecompress,\n // compress: () => {\n // throw new Error('brotli compress');\n // }\n // },\n lz4js,\n lzo\n // 'zstd-codec': ZstdCodec\n};\n\n// See https://github.com/apache/parquet-format/blob/master/Compression.md\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression> = {\n UNCOMPRESSED: new NoCompression(),\n GZIP: new GZipCompression(),\n SNAPPY: new SnappyCompression(),\n BROTLI: new BrotliCompression({modules}),\n // TODO: Understand difference between LZ4 and LZ4_RAW\n LZ4: new LZ4Compression({modules}),\n LZ4_RAW: new LZ4Compression({modules}),\n LZO: new LZOCompression({modules}),\n ZSTD: new ZstdCompression({modules})\n};\n\n/**\n * Register compressions that have big external libraries\n * @param options.modules External library dependencies\n */\nexport async function preloadCompressions(options?: {modules: {[key: string]: any}}) {\n const compressions = Object.values(PARQUET_COMPRESSION_METHODS);\n return await Promise.all(compressions.map((compression) => compression.preload()));\n}\n\n/**\n * Deflate a value using compression method `method`\n */\nexport async function deflate(method: ParquetCompression, value: Buffer): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.compress(inputArrayBuffer);\n return toBuffer(compressedArrayBuffer);\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport async function decompress(\n method: ParquetCompression,\n value: Buffer,\n size: number\n): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);\n return toBuffer(compressedArrayBuffer);\n}\n\n/*\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n // @ts-ignore\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\n/*\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n*/\n"],"file":"compression.js"}
1
+ {"version":3,"file":"compression.js","names":["NoCompression","GZipCompression","SnappyCompression","BrotliCompression","LZOCompression","LZ4Compression","ZstdCompression","toArrayBuffer","toBuffer","lz4js","lzo","modules","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","GZIP","SNAPPY","BROTLI","LZ4","LZ4_RAW","LZO","ZSTD","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","Error","inputArrayBuffer","compressedArrayBuffer","compress","decompress","size","inflate"],"sources":["../../../src/parquetjs/compression.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\n// Forked from https://github.com/ironSource/parquetjs under MIT license\n\nimport {\n Compression,\n NoCompression,\n GZipCompression,\n SnappyCompression,\n BrotliCompression,\n LZOCompression,\n LZ4Compression,\n ZstdCompression\n} from '@loaders.gl/compression';\n\nimport {ParquetCompression} from './schema/declare';\nimport {toArrayBuffer, toBuffer} from './utils/buffer-utils';\n\n// TODO switch to worker compression to avoid bundling...\n\n// import brotli from 'brotli'; - brotli has problems with decompress in browsers\n// import brotliDecompress from 'brotli/decompress';\nimport lz4js from 'lz4js';\nimport lzo from 'lzo';\n// import {ZstdCodec} from 'zstd-codec';\n\n// Inject large dependencies through Compression constructor options\nconst modules = {\n // brotli has problems with decompress in browsers\n // brotli: {\n // decompress: brotliDecompress,\n // compress: () => {\n // throw new Error('brotli compress');\n // }\n // },\n lz4js,\n lzo\n // 'zstd-codec': ZstdCodec\n};\n\n// See https://github.com/apache/parquet-format/blob/master/Compression.md\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression> = {\n UNCOMPRESSED: new NoCompression(),\n GZIP: new GZipCompression(),\n SNAPPY: new SnappyCompression(),\n BROTLI: new BrotliCompression({modules}),\n // TODO: Understand difference between LZ4 and LZ4_RAW\n LZ4: new LZ4Compression({modules}),\n LZ4_RAW: new LZ4Compression({modules}),\n LZO: new LZOCompression({modules}),\n ZSTD: new ZstdCompression({modules})\n};\n\n/**\n * Register compressions that have big external libraries\n * @param options.modules External library dependencies\n */\nexport async function preloadCompressions(options?: {modules: {[key: string]: any}}) {\n const compressions = Object.values(PARQUET_COMPRESSION_METHODS);\n return await Promise.all(compressions.map((compression) => compression.preload()));\n}\n\n/**\n * Deflate a value using compression method `method`\n */\nexport async function deflate(method: ParquetCompression, value: Buffer): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.compress(inputArrayBuffer);\n return toBuffer(compressedArrayBuffer);\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport async function decompress(\n method: ParquetCompression,\n value: Buffer,\n size: number\n): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);\n return toBuffer(compressedArrayBuffer);\n}\n\n/*\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n // @ts-ignore\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\n/*\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n*/\n"],"mappings":";;AAIA,SAEEA,aAAa,EACbC,eAAe,EACfC,iBAAiB,EACjBC,iBAAiB,EACjBC,cAAc,EACdC,cAAc,EACdC,eAAe,QACV,yBAAyB;AAGhC,SAAQC,aAAa,EAAEC,QAAQ,QAAO,sBAAsB;;AAM5D,OAAOC,KAAK,MAAM,OAAO;AACzB,OAAOC,GAAG,MAAM,KAAK;;AAIrB,MAAMC,OAAO,GAAG;EAQdF,KAAK;EACLC;AAEF,CAAC;;AAGD,OAAO,MAAME,2BAAoE,GAAG;EAClFC,YAAY,EAAE,IAAIb,aAAa,EAAE;EACjCc,IAAI,EAAE,IAAIb,eAAe,EAAE;EAC3Bc,MAAM,EAAE,IAAIb,iBAAiB,EAAE;EAC/Bc,MAAM,EAAE,IAAIb,iBAAiB,CAAC;IAACQ;EAAO,CAAC,CAAC;EAExCM,GAAG,EAAE,IAAIZ,cAAc,CAAC;IAACM;EAAO,CAAC,CAAC;EAClCO,OAAO,EAAE,IAAIb,cAAc,CAAC;IAACM;EAAO,CAAC,CAAC;EACtCQ,GAAG,EAAE,IAAIf,cAAc,CAAC;IAACO;EAAO,CAAC,CAAC;EAClCS,IAAI,EAAE,IAAId,eAAe,CAAC;IAACK;EAAO,CAAC;AACrC,CAAC;;AAMD,OAAO,eAAeU,mBAAmB,CAACC,OAAyC,EAAE;EACnF,MAAMC,YAAY,GAAGC,MAAM,CAACC,MAAM,CAACb,2BAA2B,CAAC;EAC/D,OAAO,MAAMc,OAAO,CAACC,GAAG,CAACJ,YAAY,CAACK,GAAG,CAAEC,WAAW,IAAKA,WAAW,CAACC,OAAO,EAAE,CAAC,CAAC;AACpF;;AAKA,OAAO,eAAeC,OAAO,CAACC,MAA0B,EAAEC,KAAa,EAAmB;EACxF,MAAMJ,WAAW,GAAGjB,2BAA2B,CAACoB,MAAM,CAAC;EACvD,IAAI,CAACH,WAAW,EAAE;IAChB,MAAM,IAAIK,KAAK,gDAAyCF,MAAM,EAAG;EACnE;EACA,MAAMG,gBAAgB,GAAG5B,aAAa,CAAC0B,KAAK,CAAC;EAC7C,MAAMG,qBAAqB,GAAG,MAAMP,WAAW,CAACQ,QAAQ,CAACF,gBAAgB,CAAC;EAC1E,OAAO3B,QAAQ,CAAC4B,qBAAqB,CAAC;AACxC;;AAKA,OAAO,eAAeE,UAAU,CAC9BN,MAA0B,EAC1BC,KAAa,EACbM,IAAY,EACK;EACjB,MAAMV,WAAW,GAAGjB,2BAA2B,CAACoB,MAAM,CAAC;EACvD,IAAI,CAACH,WAAW,EAAE;IAChB,MAAM,IAAIK,KAAK,gDAAyCF,MAAM,EAAG;EACnE;EACA,MAAMG,gBAAgB,GAAG5B,aAAa,CAAC0B,KAAK,CAAC;EAC7C,MAAMG,qBAAqB,GAAG,MAAMP,WAAW,CAACS,UAAU,CAACH,gBAAgB,EAAEI,IAAI,CAAC;EAClF,OAAO/B,QAAQ,CAAC4B,qBAAqB,CAAC;AACxC;;AAKA,OAAO,SAASI,OAAO,CAACR,MAA0B,EAAEC,KAAa,EAAEM,IAAY,EAAU;EACvF,IAAI,EAAEP,MAAM,IAAIpB,2BAA2B,CAAC,EAAE;IAC5C,MAAM,IAAIsB,KAAK,uCAAgCF,MAAM,EAAG;EAC1D;EAEA,OAAOpB,2BAA2B,CAACoB,MAAM,CAAC,CAACQ,OAAO,CAACP,KAAK,EAAEM,IAAI,CAAC;AACjE"}
@@ -7,10 +7,14 @@ import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageH
7
7
  import { osopen, oswrite, osclose } from '../utils/file-utils';
8
8
  import { getBitWidth, serializeThrift } from '../utils/read-utils';
9
9
  import Int64 from 'node-int64';
10
+
10
11
  const PARQUET_MAGIC = 'PAR1';
12
+
11
13
  const PARQUET_VERSION = 1;
14
+
12
15
  const PARQUET_DEFAULT_PAGE_SIZE = 8192;
13
16
  const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
17
+
14
18
  const PARQUET_RDLVL_TYPE = 'INT32';
15
19
  const PARQUET_RDLVL_ENCODING = 'RLE';
16
20
  export class ParquetWriter {
@@ -23,33 +27,25 @@ export class ParquetWriter {
23
27
  if (!opts) {
24
28
  opts = {};
25
29
  }
26
-
27
30
  const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
28
31
  return new ParquetWriter(schema, envelopeWriter, opts);
29
32
  }
30
-
31
33
  constructor(schema, envelopeWriter, opts) {
32
34
  _defineProperty(this, "schema", void 0);
33
-
34
35
  _defineProperty(this, "envelopeWriter", void 0);
35
-
36
36
  _defineProperty(this, "rowBuffer", void 0);
37
-
38
37
  _defineProperty(this, "rowGroupSize", void 0);
39
-
40
38
  _defineProperty(this, "closed", void 0);
41
-
42
39
  _defineProperty(this, "userMetadata", void 0);
43
-
44
40
  this.schema = schema;
45
41
  this.envelopeWriter = envelopeWriter;
46
42
  this.rowBuffer = {};
47
43
  this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
48
44
  this.closed = false;
49
45
  this.userMetadata = {};
46
+
50
47
  this.writeHeader();
51
48
  }
52
-
53
49
  async writeHeader() {
54
50
  try {
55
51
  await this.envelopeWriter.writeHeader();
@@ -63,9 +59,7 @@ export class ParquetWriter {
63
59
  if (this.closed) {
64
60
  throw new Error('writer was closed');
65
61
  }
66
-
67
62
  Shred.shredRecord(this.schema, row, this.rowBuffer);
68
-
69
63
  if (this.rowBuffer.rowCount >= this.rowGroupSize) {
70
64
  this.rowBuffer = {};
71
65
  }
@@ -75,13 +69,10 @@ export class ParquetWriter {
75
69
  if (this.closed) {
76
70
  throw new Error('writer was closed');
77
71
  }
78
-
79
72
  this.closed = true;
80
-
81
73
  if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
82
74
  this.rowBuffer = {};
83
75
  }
84
-
85
76
  await this.envelopeWriter.writeFooter(this.userMetadata);
86
77
  await this.envelopeWriter.close();
87
78
 
@@ -101,32 +92,23 @@ export class ParquetWriter {
101
92
  setPageSize(cnt) {
102
93
  this.envelopeWriter.setPageSize(cnt);
103
94
  }
104
-
105
95
  }
96
+
106
97
  export class ParquetEnvelopeWriter {
107
98
  static async openStream(schema, outputStream, opts) {
108
99
  const writeFn = oswrite.bind(undefined, outputStream);
109
100
  const closeFn = osclose.bind(undefined, outputStream);
110
101
  return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
111
102
  }
112
-
113
103
  constructor(schema, writeFn, closeFn, fileOffset, opts) {
114
104
  _defineProperty(this, "schema", void 0);
115
-
116
105
  _defineProperty(this, "write", void 0);
117
-
118
106
  _defineProperty(this, "close", void 0);
119
-
120
107
  _defineProperty(this, "offset", void 0);
121
-
122
108
  _defineProperty(this, "rowCount", void 0);
123
-
124
109
  _defineProperty(this, "rowGroups", void 0);
125
-
126
110
  _defineProperty(this, "pageSize", void 0);
127
-
128
111
  _defineProperty(this, "useDataPageV2", void 0);
129
-
130
112
  this.schema = schema;
131
113
  this.write = writeFn;
132
114
  this.close = closeFn;
@@ -136,7 +118,6 @@ export class ParquetEnvelopeWriter {
136
118
  this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
137
119
  this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
138
120
  }
139
-
140
121
  writeSection(buf) {
141
122
  this.offset += buf.length;
142
123
  return this.write(buf);
@@ -161,29 +142,26 @@ export class ParquetEnvelopeWriter {
161
142
  if (!userMetadata) {
162
143
  userMetadata = {};
163
144
  }
164
-
165
145
  return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
166
146
  }
167
147
 
168
148
  setPageSize(cnt) {
169
149
  this.pageSize = cnt;
170
150
  }
171
-
172
151
  }
152
+
173
153
  export class ParquetTransformer extends Transform {
174
- constructor(schema, opts = {}) {
154
+ constructor(schema) {
155
+ let opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
175
156
  super({
176
157
  objectMode: true
177
158
  });
178
-
179
159
  _defineProperty(this, "writer", void 0);
180
-
181
160
  const writeProxy = function (t) {
182
161
  return async function (b) {
183
162
  t.push(b);
184
163
  };
185
164
  }(this);
186
-
187
165
  this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts), opts);
188
166
  }
189
167
 
@@ -191,7 +169,6 @@ export class ParquetTransformer extends Transform {
191
169
  if (row) {
192
170
  return this.writer.appendRow(row).then(callback);
193
171
  }
194
-
195
172
  callback();
196
173
  return Promise.resolve();
197
174
  }
@@ -199,20 +176,17 @@ export class ParquetTransformer extends Transform {
199
176
  async _flush(callback) {
200
177
  await this.writer.close(callback);
201
178
  }
202
-
203
179
  }
204
180
 
205
181
  function encodeValues(type, encoding, values, opts) {
206
182
  if (!(encoding in PARQUET_CODECS)) {
207
183
  throw new Error("invalid encoding: ".concat(encoding));
208
184
  }
209
-
210
185
  return PARQUET_CODECS[encoding].encodeValues(type, values, opts);
211
186
  }
212
187
 
213
188
  async function encodeDataPage(column, data) {
214
189
  let rLevelsBuf = Buffer.alloc(0);
215
-
216
190
  if (column.rLevelMax > 0) {
217
191
  rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
218
192
  bitWidth: getBitWidth(column.rLevelMax)
@@ -220,7 +194,6 @@ async function encodeDataPage(column, data) {
220
194
  }
221
195
 
222
196
  let dLevelsBuf = Buffer.alloc(0);
223
-
224
197
  if (column.dLevelMax > 0) {
225
198
  dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
226
199
  bitWidth: getBitWidth(column.dLevelMax)
@@ -232,7 +205,9 @@ async function encodeDataPage(column, data) {
232
205
  bitWidth: column.typeLength
233
206
  });
234
207
  const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
208
+
235
209
  const compressedBuf = await Compression.deflate(column.compression, dataBuf);
210
+
236
211
  const header = new PageHeader({
237
212
  type: PageType.DATA_PAGE,
238
213
  data_page_header: new DataPageHeader({
@@ -241,9 +216,11 @@ async function encodeDataPage(column, data) {
241
216
  definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING],
242
217
  repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING]
243
218
  }),
219
+
244
220
  uncompressed_page_size: dataBuf.length,
245
221
  compressed_page_size: compressedBuf.length
246
222
  });
223
+
247
224
  const headerBuf = serializeThrift(header);
248
225
  const page = Buffer.concat([headerBuf, compressedBuf]);
249
226
  return {
@@ -258,18 +235,17 @@ async function encodeDataPageV2(column, data, rowCount) {
258
235
  typeLength: column.typeLength,
259
236
  bitWidth: column.typeLength
260
237
  });
238
+
261
239
  const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
262
- let rLevelsBuf = Buffer.alloc(0);
263
240
 
241
+ let rLevelsBuf = Buffer.alloc(0);
264
242
  if (column.rLevelMax > 0) {
265
243
  rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
266
244
  bitWidth: getBitWidth(column.rLevelMax),
267
245
  disableEnvelope: true
268
246
  });
269
247
  }
270
-
271
248
  let dLevelsBuf = Buffer.alloc(0);
272
-
273
249
  if (column.dLevelMax > 0) {
274
250
  dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
275
251
  bitWidth: getBitWidth(column.dLevelMax),
@@ -291,6 +267,7 @@ async function encodeDataPageV2(column, data, rowCount) {
291
267
  uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
292
268
  compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
293
269
  });
270
+
294
271
  const headerBuf = serializeThrift(header);
295
272
  const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
296
273
  return {
@@ -312,6 +289,7 @@ async function encodeColumnChunk(column, buffer, offset, opts) {
312
289
  total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
313
290
  total_compressed_size += result.header.compressed_page_size + result.headerSize;
314
291
  }
292
+
315
293
  const metadata = new ColumnMetaData({
316
294
  path_in_schema: column.path,
317
295
  num_values: data.count,
@@ -322,8 +300,10 @@ async function encodeColumnChunk(column, buffer, offset, opts) {
322
300
  type: Type[column.primitiveType],
323
301
  codec: CompressionCodec[column.compression]
324
302
  });
303
+
325
304
  metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);
326
305
  metadata.encodings.push(Encoding[column.encoding]);
306
+
327
307
  const metadataOffset = baseOffset + pageBuf.length;
328
308
  const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);
329
309
  return {
@@ -340,7 +320,6 @@ async function encodeRowGroup(schema, data, opts) {
340
320
  total_byte_size: 0
341
321
  });
342
322
  let body = Buffer.alloc(0);
343
-
344
323
  for (const field of schema.fieldList) {
345
324
  if (field.isNested) {
346
325
  continue;
@@ -355,7 +334,6 @@ async function encodeRowGroup(schema, data, opts) {
355
334
  metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);
356
335
  body = Buffer.concat([body, cchunkData.body]);
357
336
  }
358
-
359
337
  return {
360
338
  body,
361
339
  metadata
@@ -371,17 +349,14 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
371
349
  schema: [],
372
350
  key_value_metadata: []
373
351
  });
374
-
375
352
  for (const key in userMetadata) {
376
- var _metadata$key_value_m, _metadata$key_value_m2;
377
-
353
+ var _metadata$key_value_m, _metadata$key_value_m2, _metadata$key_value_m3;
378
354
  const kv = new KeyValue({
379
355
  key,
380
356
  value: userMetadata[key]
381
357
  });
382
- (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m, kv);
358
+ (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = (_metadata$key_value_m3 = _metadata$key_value_m).push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m3, kv);
383
359
  }
384
-
385
360
  {
386
361
  const schemaRoot = new SchemaElement({
387
362
  name: 'root',
@@ -389,28 +364,23 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
389
364
  });
390
365
  metadata.schema.push(schemaRoot);
391
366
  }
392
-
393
367
  for (const field of schema.fieldList) {
394
368
  const relt = FieldRepetitionType[field.repetitionType];
395
369
  const schemaElem = new SchemaElement({
396
370
  name: field.name,
397
371
  repetition_type: relt
398
372
  });
399
-
400
373
  if (field.isNested) {
401
374
  schemaElem.num_children = field.fieldCount;
402
375
  } else {
403
376
  schemaElem.type = Type[field.primitiveType];
404
377
  }
405
-
406
378
  if (field.originalType) {
407
379
  schemaElem.converted_type = ConvertedType[field.originalType];
408
380
  }
409
-
410
381
  schemaElem.type_length = field.typeLength;
411
382
  metadata.schema.push(schemaElem);
412
383
  }
413
-
414
384
  const metadataEncoded = serializeThrift(metadata);
415
385
  const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
416
386
  metadataEncoded.copy(footerEncoded);