@loaders.gl/parquet 3.1.3 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (438) hide show
  1. package/dist/bundle.js +2 -2
  2. package/dist/bundle.js.map +1 -0
  3. package/dist/constants.js +6 -18
  4. package/dist/constants.js.map +1 -0
  5. package/dist/dist.min.js +17 -8
  6. package/dist/dist.min.js.map +3 -3
  7. package/dist/index.js +14 -29
  8. package/dist/index.js.map +1 -0
  9. package/dist/lib/convert-schema.js +63 -62
  10. package/dist/lib/convert-schema.js.map +1 -0
  11. package/dist/lib/parse-parquet.js +25 -25
  12. package/dist/lib/parse-parquet.js.map +1 -0
  13. package/dist/lib/read-array-buffer.js +8 -28
  14. package/dist/lib/read-array-buffer.js.map +1 -0
  15. package/dist/parquet-loader.js +19 -24
  16. package/dist/parquet-loader.js.map +1 -0
  17. package/dist/parquet-worker.js +18 -9
  18. package/dist/parquet-worker.js.map +3 -3
  19. package/dist/parquet-writer.js +14 -17
  20. package/dist/parquet-writer.js.map +1 -0
  21. package/dist/{es5/parquetjs → parquetjs}/LICENSE +0 -0
  22. package/dist/parquetjs/codecs/declare.js +2 -2
  23. package/dist/{es5/parquetjs → parquetjs}/codecs/declare.js.map +0 -0
  24. package/dist/parquetjs/codecs/dictionary.js +10 -12
  25. package/dist/parquetjs/codecs/dictionary.js.map +1 -0
  26. package/dist/parquetjs/codecs/index.js +22 -50
  27. package/dist/parquetjs/codecs/index.js.map +1 -0
  28. package/dist/parquetjs/codecs/plain.js +232 -173
  29. package/dist/parquetjs/codecs/plain.js.map +1 -0
  30. package/dist/parquetjs/codecs/rle.js +140 -134
  31. package/dist/parquetjs/codecs/rle.js.map +1 -0
  32. package/dist/parquetjs/compression.js +48 -154
  33. package/dist/parquetjs/compression.js.map +1 -0
  34. package/dist/parquetjs/encoder/writer.js +383 -440
  35. package/dist/parquetjs/encoder/writer.js.map +1 -0
  36. package/dist/parquetjs/file.js +66 -85
  37. package/dist/parquetjs/file.js.map +1 -0
  38. package/dist/{es5/parquetjs → parquetjs}/modules.d.ts +0 -0
  39. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +7 -14
  40. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
  41. package/dist/parquetjs/parquet-thrift/BsonType.js +37 -56
  42. package/dist/parquetjs/parquet-thrift/BsonType.js.map +1 -0
  43. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +215 -205
  44. package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
  45. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +212 -207
  46. package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
  47. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +422 -391
  48. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
  49. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +90 -99
  50. package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
  51. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +12 -19
  52. package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
  53. package/dist/parquetjs/parquet-thrift/ConvertedType.js +26 -33
  54. package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
  55. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +162 -162
  56. package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
  57. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +234 -224
  58. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
  59. package/dist/parquetjs/parquet-thrift/DateType.js +37 -56
  60. package/dist/parquetjs/parquet-thrift/DateType.js.map +1 -0
  61. package/dist/parquetjs/parquet-thrift/DecimalType.js +91 -101
  62. package/dist/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
  63. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -118
  64. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
  65. package/dist/parquetjs/parquet-thrift/Encoding.js +12 -19
  66. package/dist/parquetjs/parquet-thrift/Encoding.js.map +1 -0
  67. package/dist/parquetjs/parquet-thrift/EnumType.js +37 -56
  68. package/dist/parquetjs/parquet-thrift/EnumType.js.map +1 -0
  69. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -14
  70. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
  71. package/dist/parquetjs/parquet-thrift/FileMetaData.js +264 -250
  72. package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
  73. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +37 -56
  74. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
  75. package/dist/parquetjs/parquet-thrift/IntType.js +91 -101
  76. package/dist/parquetjs/parquet-thrift/IntType.js.map +1 -0
  77. package/dist/parquetjs/parquet-thrift/JsonType.js +37 -56
  78. package/dist/parquetjs/parquet-thrift/JsonType.js.map +1 -0
  79. package/dist/parquetjs/parquet-thrift/KeyValue.js +89 -98
  80. package/dist/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
  81. package/dist/parquetjs/parquet-thrift/ListType.js +37 -56
  82. package/dist/parquetjs/parquet-thrift/ListType.js.map +1 -0
  83. package/dist/parquetjs/parquet-thrift/LogicalType.js +450 -363
  84. package/dist/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
  85. package/dist/parquetjs/parquet-thrift/MapType.js +37 -56
  86. package/dist/parquetjs/parquet-thrift/MapType.js.map +1 -0
  87. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +37 -56
  88. package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
  89. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +37 -56
  90. package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
  91. package/dist/parquetjs/parquet-thrift/NullType.js +37 -56
  92. package/dist/parquetjs/parquet-thrift/NullType.js.map +1 -0
  93. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +80 -92
  94. package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
  95. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +115 -123
  96. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
  97. package/dist/parquetjs/parquet-thrift/PageHeader.js +231 -214
  98. package/dist/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
  99. package/dist/parquetjs/parquet-thrift/PageLocation.js +124 -137
  100. package/dist/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
  101. package/dist/parquetjs/parquet-thrift/PageType.js +8 -15
  102. package/dist/parquetjs/parquet-thrift/PageType.js.map +1 -0
  103. package/dist/parquetjs/parquet-thrift/RowGroup.js +172 -176
  104. package/dist/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
  105. package/dist/parquetjs/parquet-thrift/SchemaElement.js +268 -237
  106. package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
  107. package/dist/parquetjs/parquet-thrift/SortingColumn.js +115 -123
  108. package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
  109. package/dist/parquetjs/parquet-thrift/Statistics.js +179 -172
  110. package/dist/parquetjs/parquet-thrift/Statistics.js.map +1 -0
  111. package/dist/parquetjs/parquet-thrift/StringType.js +37 -56
  112. package/dist/parquetjs/parquet-thrift/StringType.js.map +1 -0
  113. package/dist/parquetjs/parquet-thrift/TimeType.js +92 -102
  114. package/dist/parquetjs/parquet-thrift/TimeType.js.map +1 -0
  115. package/dist/parquetjs/parquet-thrift/TimeUnit.js +120 -121
  116. package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
  117. package/dist/parquetjs/parquet-thrift/TimestampType.js +92 -102
  118. package/dist/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
  119. package/dist/parquetjs/parquet-thrift/Type.js +12 -19
  120. package/dist/parquetjs/parquet-thrift/Type.js.map +1 -0
  121. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +37 -56
  122. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
  123. package/dist/parquetjs/parquet-thrift/UUIDType.js +37 -56
  124. package/dist/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
  125. package/dist/parquetjs/parquet-thrift/index.js +44 -61
  126. package/dist/parquetjs/parquet-thrift/index.js.map +1 -0
  127. package/dist/parquetjs/parser/decoders.js +283 -301
  128. package/dist/{es5/parquetjs → parquetjs}/parser/decoders.js.map +1 -1
  129. package/dist/parquetjs/parser/parquet-cursor.js +85 -69
  130. package/dist/parquetjs/parser/parquet-cursor.js.map +1 -0
  131. package/dist/parquetjs/parser/parquet-envelope-reader.js +146 -127
  132. package/dist/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  133. package/dist/parquetjs/parser/parquet-reader.js +113 -127
  134. package/dist/parquetjs/parser/parquet-reader.js.map +1 -0
  135. package/dist/parquetjs/schema/declare.js +12 -9
  136. package/dist/parquetjs/schema/declare.js.map +1 -0
  137. package/dist/parquetjs/schema/schema.js +162 -148
  138. package/dist/{es5/parquetjs → parquetjs}/schema/schema.js.map +1 -1
  139. package/dist/parquetjs/schema/shred.js +151 -214
  140. package/dist/parquetjs/schema/shred.js.map +1 -0
  141. package/dist/parquetjs/schema/types.js +415 -357
  142. package/dist/parquetjs/schema/types.js.map +1 -0
  143. package/dist/parquetjs/utils/buffer-utils.js +10 -20
  144. package/dist/parquetjs/utils/buffer-utils.js.map +1 -0
  145. package/dist/parquetjs/utils/file-utils.js +28 -40
  146. package/dist/parquetjs/utils/file-utils.js.map +1 -0
  147. package/dist/parquetjs/utils/read-utils.js +95 -99
  148. package/dist/parquetjs/utils/read-utils.js.map +1 -0
  149. package/dist/workers/parquet-worker.js +4 -5
  150. package/dist/workers/parquet-worker.js.map +1 -0
  151. package/package.json +8 -8
  152. package/dist/es5/bundle.js +0 -7
  153. package/dist/es5/bundle.js.map +0 -1
  154. package/dist/es5/constants.js +0 -17
  155. package/dist/es5/constants.js.map +0 -1
  156. package/dist/es5/index.js +0 -82
  157. package/dist/es5/index.js.map +0 -1
  158. package/dist/es5/lib/convert-schema.js +0 -82
  159. package/dist/es5/lib/convert-schema.js.map +0 -1
  160. package/dist/es5/lib/parse-parquet.js +0 -173
  161. package/dist/es5/lib/parse-parquet.js.map +0 -1
  162. package/dist/es5/lib/read-array-buffer.js +0 -53
  163. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  164. package/dist/es5/parquet-loader.js +0 -30
  165. package/dist/es5/parquet-loader.js.map +0 -1
  166. package/dist/es5/parquet-writer.js +0 -25
  167. package/dist/es5/parquet-writer.js.map +0 -1
  168. package/dist/es5/parquetjs/codecs/declare.js +0 -2
  169. package/dist/es5/parquetjs/codecs/dictionary.js +0 -30
  170. package/dist/es5/parquetjs/codecs/dictionary.js.map +0 -1
  171. package/dist/es5/parquetjs/codecs/index.js +0 -56
  172. package/dist/es5/parquetjs/codecs/index.js.map +0 -1
  173. package/dist/es5/parquetjs/codecs/plain.js +0 -287
  174. package/dist/es5/parquetjs/codecs/plain.js.map +0 -1
  175. package/dist/es5/parquetjs/codecs/rle.js +0 -174
  176. package/dist/es5/parquetjs/codecs/rle.js.map +0 -1
  177. package/dist/es5/parquetjs/compression.js +0 -167
  178. package/dist/es5/parquetjs/compression.js.map +0 -1
  179. package/dist/es5/parquetjs/encoder/writer.js +0 -875
  180. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  181. package/dist/es5/parquetjs/file.js +0 -103
  182. package/dist/es5/parquetjs/file.js.map +0 -1
  183. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
  184. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
  185. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +0 -67
  186. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +0 -1
  187. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +0 -241
  188. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
  189. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +0 -245
  190. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
  191. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +0 -449
  192. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
  193. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +0 -124
  194. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
  195. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
  196. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
  197. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +0 -34
  198. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
  199. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +0 -191
  200. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
  201. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -258
  202. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
  203. package/dist/es5/parquetjs/parquet-thrift/DateType.js +0 -67
  204. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +0 -1
  205. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +0 -122
  206. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
  207. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -143
  208. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
  209. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +0 -20
  210. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +0 -1
  211. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +0 -67
  212. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +0 -1
  213. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
  214. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
  215. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +0 -298
  216. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
  217. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +0 -67
  218. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
  219. package/dist/es5/parquetjs/parquet-thrift/IntType.js +0 -122
  220. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +0 -1
  221. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +0 -67
  222. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +0 -1
  223. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +0 -120
  224. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
  225. package/dist/es5/parquetjs/parquet-thrift/ListType.js +0 -67
  226. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +0 -1
  227. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +0 -508
  228. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
  229. package/dist/es5/parquetjs/parquet-thrift/MapType.js +0 -67
  230. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +0 -1
  231. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +0 -67
  232. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
  233. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +0 -67
  234. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
  235. package/dist/es5/parquetjs/parquet-thrift/NullType.js +0 -67
  236. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +0 -1
  237. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +0 -114
  238. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
  239. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +0 -145
  240. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
  241. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +0 -258
  242. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
  243. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +0 -155
  244. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
  245. package/dist/es5/parquetjs/parquet-thrift/PageType.js +0 -16
  246. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +0 -1
  247. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +0 -206
  248. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
  249. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +0 -290
  250. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
  251. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +0 -145
  252. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
  253. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +0 -207
  254. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +0 -1
  255. package/dist/es5/parquetjs/parquet-thrift/StringType.js +0 -67
  256. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +0 -1
  257. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +0 -124
  258. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +0 -1
  259. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +0 -156
  260. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
  261. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +0 -124
  262. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
  263. package/dist/es5/parquetjs/parquet-thrift/Type.js +0 -20
  264. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +0 -1
  265. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -67
  266. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
  267. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +0 -67
  268. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
  269. package/dist/es5/parquetjs/parquet-thrift/index.js +0 -565
  270. package/dist/es5/parquetjs/parquet-thrift/index.js.map +0 -1
  271. package/dist/es5/parquetjs/parser/decoders.js +0 -489
  272. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -215
  273. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  274. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -413
  275. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  276. package/dist/es5/parquetjs/parser/parquet-reader.js +0 -364
  277. package/dist/es5/parquetjs/parser/parquet-reader.js.map +0 -1
  278. package/dist/es5/parquetjs/schema/declare.js +0 -25
  279. package/dist/es5/parquetjs/schema/declare.js.map +0 -1
  280. package/dist/es5/parquetjs/schema/schema.js +0 -203
  281. package/dist/es5/parquetjs/schema/shred.js +0 -223
  282. package/dist/es5/parquetjs/schema/shred.js.map +0 -1
  283. package/dist/es5/parquetjs/schema/types.js +0 -492
  284. package/dist/es5/parquetjs/schema/types.js.map +0 -1
  285. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -21
  286. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  287. package/dist/es5/parquetjs/utils/file-utils.js +0 -55
  288. package/dist/es5/parquetjs/utils/file-utils.js.map +0 -1
  289. package/dist/es5/parquetjs/utils/read-utils.js +0 -159
  290. package/dist/es5/parquetjs/utils/read-utils.js.map +0 -1
  291. package/dist/es5/workers/parquet-worker.js +0 -8
  292. package/dist/es5/workers/parquet-worker.js.map +0 -1
  293. package/dist/esm/bundle.js +0 -5
  294. package/dist/esm/bundle.js.map +0 -1
  295. package/dist/esm/constants.js +0 -6
  296. package/dist/esm/constants.js.map +0 -1
  297. package/dist/esm/index.js +0 -15
  298. package/dist/esm/index.js.map +0 -1
  299. package/dist/esm/lib/convert-schema.js +0 -71
  300. package/dist/esm/lib/convert-schema.js.map +0 -1
  301. package/dist/esm/lib/parse-parquet.js +0 -28
  302. package/dist/esm/lib/parse-parquet.js.map +0 -1
  303. package/dist/esm/lib/read-array-buffer.js +0 -9
  304. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  305. package/dist/esm/parquet-loader.js +0 -22
  306. package/dist/esm/parquet-loader.js.map +0 -1
  307. package/dist/esm/parquet-writer.js +0 -18
  308. package/dist/esm/parquet-writer.js.map +0 -1
  309. package/dist/esm/parquetjs/LICENSE +0 -20
  310. package/dist/esm/parquetjs/codecs/declare.js +0 -2
  311. package/dist/esm/parquetjs/codecs/declare.js.map +0 -1
  312. package/dist/esm/parquetjs/codecs/dictionary.js +0 -12
  313. package/dist/esm/parquetjs/codecs/dictionary.js.map +0 -1
  314. package/dist/esm/parquetjs/codecs/index.js +0 -23
  315. package/dist/esm/parquetjs/codecs/index.js.map +0 -1
  316. package/dist/esm/parquetjs/codecs/plain.js +0 -270
  317. package/dist/esm/parquetjs/codecs/plain.js.map +0 -1
  318. package/dist/esm/parquetjs/codecs/rle.js +0 -151
  319. package/dist/esm/parquetjs/codecs/rle.js.map +0 -1
  320. package/dist/esm/parquetjs/compression.js +0 -62
  321. package/dist/esm/parquetjs/compression.js.map +0 -1
  322. package/dist/esm/parquetjs/encoder/writer.js +0 -421
  323. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  324. package/dist/esm/parquetjs/file.js +0 -80
  325. package/dist/esm/parquetjs/file.js.map +0 -1
  326. package/dist/esm/parquetjs/modules.d.ts +0 -21
  327. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +0 -8
  328. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
  329. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +0 -39
  330. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +0 -1
  331. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +0 -217
  332. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
  333. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +0 -218
  334. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
  335. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +0 -429
  336. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
  337. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +0 -95
  338. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
  339. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +0 -13
  340. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
  341. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +0 -27
  342. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
  343. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +0 -166
  344. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
  345. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -236
  346. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
  347. package/dist/esm/parquetjs/parquet-thrift/DateType.js +0 -39
  348. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +0 -1
  349. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +0 -95
  350. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
  351. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -117
  352. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
  353. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +0 -13
  354. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +0 -1
  355. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +0 -39
  356. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +0 -1
  357. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -8
  358. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
  359. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +0 -270
  360. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
  361. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +0 -39
  362. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
  363. package/dist/esm/parquetjs/parquet-thrift/IntType.js +0 -95
  364. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +0 -1
  365. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +0 -39
  366. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +0 -1
  367. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +0 -93
  368. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
  369. package/dist/esm/parquetjs/parquet-thrift/ListType.js +0 -39
  370. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +0 -1
  371. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +0 -467
  372. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
  373. package/dist/esm/parquetjs/parquet-thrift/MapType.js +0 -39
  374. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +0 -1
  375. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +0 -39
  376. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
  377. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +0 -39
  378. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
  379. package/dist/esm/parquetjs/parquet-thrift/NullType.js +0 -39
  380. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +0 -1
  381. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +0 -85
  382. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
  383. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +0 -119
  384. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
  385. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +0 -233
  386. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
  387. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +0 -128
  388. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
  389. package/dist/esm/parquetjs/parquet-thrift/PageType.js +0 -9
  390. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +0 -1
  391. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +0 -178
  392. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
  393. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +0 -270
  394. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
  395. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +0 -119
  396. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
  397. package/dist/esm/parquetjs/parquet-thrift/Statistics.js +0 -183
  398. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +0 -1
  399. package/dist/esm/parquetjs/parquet-thrift/StringType.js +0 -39
  400. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +0 -1
  401. package/dist/esm/parquetjs/parquet-thrift/TimeType.js +0 -96
  402. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +0 -1
  403. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +0 -126
  404. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
  405. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +0 -96
  406. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
  407. package/dist/esm/parquetjs/parquet-thrift/Type.js +0 -13
  408. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +0 -1
  409. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -39
  410. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
  411. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +0 -39
  412. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
  413. package/dist/esm/parquetjs/parquet-thrift/index.js +0 -44
  414. package/dist/esm/parquetjs/parquet-thrift/index.js.map +0 -1
  415. package/dist/esm/parquetjs/parser/decoders.js +0 -300
  416. package/dist/esm/parquetjs/parser/decoders.js.map +0 -1
  417. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -90
  418. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  419. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -155
  420. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  421. package/dist/esm/parquetjs/parser/parquet-reader.js +0 -120
  422. package/dist/esm/parquetjs/parser/parquet-reader.js.map +0 -1
  423. package/dist/esm/parquetjs/schema/declare.js +0 -13
  424. package/dist/esm/parquetjs/schema/declare.js.map +0 -1
  425. package/dist/esm/parquetjs/schema/schema.js +0 -176
  426. package/dist/esm/parquetjs/schema/schema.js.map +0 -1
  427. package/dist/esm/parquetjs/schema/shred.js +0 -162
  428. package/dist/esm/parquetjs/schema/shred.js.map +0 -1
  429. package/dist/esm/parquetjs/schema/types.js +0 -476
  430. package/dist/esm/parquetjs/schema/types.js.map +0 -1
  431. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -12
  432. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  433. package/dist/esm/parquetjs/utils/file-utils.js +0 -34
  434. package/dist/esm/parquetjs/utils/file-utils.js.map +0 -1
  435. package/dist/esm/parquetjs/utils/read-utils.js +0 -105
  436. package/dist/esm/parquetjs/utils/read-utils.js.map +0 -1
  437. package/dist/esm/workers/parquet-worker.js +0 -4
  438. package/dist/esm/workers/parquet-worker.js.map +0 -1
@@ -1,478 +1,421 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
5
- }) : (function(o, m, k, k2) {
6
- if (k2 === undefined) k2 = k;
7
- o[k2] = m[k];
8
- }));
9
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
10
- Object.defineProperty(o, "default", { enumerable: true, value: v });
11
- }) : function(o, v) {
12
- o["default"] = v;
13
- });
14
- var __importStar = (this && this.__importStar) || function (mod) {
15
- if (mod && mod.__esModule) return mod;
16
- var result = {};
17
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
18
- __setModuleDefault(result, mod);
19
- return result;
20
- };
21
- var __importDefault = (this && this.__importDefault) || function (mod) {
22
- return (mod && mod.__esModule) ? mod : { "default": mod };
23
- };
24
- Object.defineProperty(exports, "__esModule", { value: true });
25
- exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
26
- // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
27
- /* eslint-disable camelcase */
28
- const stream_1 = require("stream");
29
- const codecs_1 = require("../codecs");
30
- const Compression = __importStar(require("../compression"));
31
- const Shred = __importStar(require("../schema/shred"));
32
- const parquet_thrift_1 = require("../parquet-thrift");
33
- const file_utils_1 = require("../utils/file-utils");
34
- const read_utils_1 = require("../utils/read-utils");
35
- const node_int64_1 = __importDefault(require("node-int64"));
36
- /**
37
- * Parquet File Magic String
38
- */
1
+ import _defineProperty from "@babel/runtime/helpers/esm/defineProperty";
2
+ import { Transform } from 'stream';
3
+ import { PARQUET_CODECS } from '../codecs';
4
+ import * as Compression from '../compression';
5
+ import * as Shred from '../schema/shred';
6
+ import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageHeader, DataPageHeaderV2, Encoding, FieldRepetitionType, FileMetaData, KeyValue, PageHeader, PageType, RowGroup, SchemaElement, Type } from '../parquet-thrift';
7
+ import { osopen, oswrite, osclose } from '../utils/file-utils';
8
+ import { getBitWidth, serializeThrift } from '../utils/read-utils';
9
+ import Int64 from 'node-int64';
39
10
  const PARQUET_MAGIC = 'PAR1';
40
- /**
41
- * Parquet File Format Version
42
- */
43
11
  const PARQUET_VERSION = 1;
44
- /**
45
- * Default Page and Row Group sizes
46
- */
47
12
  const PARQUET_DEFAULT_PAGE_SIZE = 8192;
48
13
  const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
49
- /**
50
- * Repetition and Definition Level Encoding
51
- */
52
14
  const PARQUET_RDLVL_TYPE = 'INT32';
53
15
  const PARQUET_RDLVL_ENCODING = 'RLE';
54
- /**
55
- * Write a parquet file to an output stream. The ParquetWriter will perform
56
- * buffering/batching for performance, so close() must be called after all rows
57
- * are written.
58
- */
59
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
60
- class ParquetWriter {
61
- /**
62
- * Create a new buffered parquet writer for a given envelope writer
63
- */
64
- constructor(schema, envelopeWriter, opts) {
65
- this.schema = schema;
66
- this.envelopeWriter = envelopeWriter;
67
- // @ts-ignore Row buffer typings...
68
- this.rowBuffer = {};
69
- this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
70
- this.closed = false;
71
- this.userMetadata = {};
72
- // eslint-disable-next-line @typescript-eslint/no-floating-promises
73
- this.writeHeader();
16
+ export class ParquetWriter {
17
+ static async openFile(schema, path, opts) {
18
+ const outputStream = await osopen(path, opts);
19
+ return ParquetWriter.openStream(schema, outputStream, opts);
20
+ }
21
+
22
+ static async openStream(schema, outputStream, opts) {
23
+ if (!opts) {
24
+ opts = {};
74
25
  }
75
- /**
76
- * Convenience method to create a new buffered parquet writer that writes to
77
- * the specified file
78
- */
79
- static async openFile(schema, path, opts) {
80
- const outputStream = await (0, file_utils_1.osopen)(path, opts);
81
- return ParquetWriter.openStream(schema, outputStream, opts);
26
+
27
+ const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
28
+ return new ParquetWriter(schema, envelopeWriter, opts);
29
+ }
30
+
31
+ constructor(schema, envelopeWriter, opts) {
32
+ _defineProperty(this, "schema", void 0);
33
+
34
+ _defineProperty(this, "envelopeWriter", void 0);
35
+
36
+ _defineProperty(this, "rowBuffer", void 0);
37
+
38
+ _defineProperty(this, "rowGroupSize", void 0);
39
+
40
+ _defineProperty(this, "closed", void 0);
41
+
42
+ _defineProperty(this, "userMetadata", void 0);
43
+
44
+ this.schema = schema;
45
+ this.envelopeWriter = envelopeWriter;
46
+ this.rowBuffer = {};
47
+ this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
48
+ this.closed = false;
49
+ this.userMetadata = {};
50
+ this.writeHeader();
51
+ }
52
+
53
+ async writeHeader() {
54
+ try {
55
+ await this.envelopeWriter.writeHeader();
56
+ } catch (err) {
57
+ await this.envelopeWriter.close();
58
+ throw err;
82
59
  }
83
- /**
84
- * Convenience method to create a new buffered parquet writer that writes to
85
- * the specified stream
86
- */
87
- static async openStream(schema, outputStream, opts) {
88
- if (!opts) {
89
- // tslint:disable-next-line:no-parameter-reassignment
90
- opts = {};
91
- }
92
- const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
93
- return new ParquetWriter(schema, envelopeWriter, opts);
60
+ }
61
+
62
+ async appendRow(row) {
63
+ if (this.closed) {
64
+ throw new Error('writer was closed');
94
65
  }
95
- async writeHeader() {
96
- // TODO - better not mess with promises in the constructor
97
- try {
98
- await this.envelopeWriter.writeHeader();
99
- }
100
- catch (err) {
101
- await this.envelopeWriter.close();
102
- throw err;
103
- }
66
+
67
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
68
+
69
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
70
+ this.rowBuffer = {};
104
71
  }
105
- /**
106
- * Append a single row to the parquet file. Rows are buffered in memory until
107
- * rowGroupSize rows are in the buffer or close() is called
108
- */
109
- async appendRow(row) {
110
- if (this.closed) {
111
- throw new Error('writer was closed');
112
- }
113
- Shred.shredRecord(this.schema, row, this.rowBuffer);
114
- if (this.rowBuffer.rowCount >= this.rowGroupSize) {
115
- // @ts-ignore
116
- this.rowBuffer = {};
117
- }
72
+ }
73
+
74
+ async close(callback) {
75
+ if (this.closed) {
76
+ throw new Error('writer was closed');
118
77
  }
119
- /**
120
- * Finish writing the parquet file and commit the footer to disk. This method
121
- * MUST be called after you are finished adding rows. You must not call this
122
- * method twice on the same object or add any rows after the close() method has
123
- * been called
124
- */
125
- async close(callback) {
126
- if (this.closed) {
127
- throw new Error('writer was closed');
128
- }
129
- this.closed = true;
130
- if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
131
- // @ts-ignore
132
- this.rowBuffer = {};
133
- }
134
- await this.envelopeWriter.writeFooter(this.userMetadata);
135
- await this.envelopeWriter.close();
136
- // this.envelopeWriter = null;
137
- if (callback) {
138
- callback();
139
- }
78
+
79
+ this.closed = true;
80
+
81
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
82
+ this.rowBuffer = {};
140
83
  }
141
- /**
142
- * Add key<>value metadata to the file
143
- */
144
- setMetadata(key, value) {
145
- // TODO: value to be any, obj -> JSON
146
- this.userMetadata[String(key)] = String(value);
147
- }
148
- /**
149
- * Set the parquet row group size. This values controls the maximum number
150
- * of rows that are buffered in memory at any given time as well as the number
151
- * of rows that are co-located on disk. A higher value is generally better for
152
- * read-time I/O performance at the tradeoff of write-time memory usage.
153
- */
154
- setRowGroupSize(cnt) {
155
- this.rowGroupSize = cnt;
156
- }
157
- /**
158
- * Set the parquet data page size. The data page size controls the maximum
159
- * number of column values that are written to disk as a consecutive array
160
- */
161
- setPageSize(cnt) {
162
- this.envelopeWriter.setPageSize(cnt);
84
+
85
+ await this.envelopeWriter.writeFooter(this.userMetadata);
86
+ await this.envelopeWriter.close();
87
+
88
+ if (callback) {
89
+ callback();
163
90
  }
91
+ }
92
+
93
+ setMetadata(key, value) {
94
+ this.userMetadata[String(key)] = String(value);
95
+ }
96
+
97
+ setRowGroupSize(cnt) {
98
+ this.rowGroupSize = cnt;
99
+ }
100
+
101
+ setPageSize(cnt) {
102
+ this.envelopeWriter.setPageSize(cnt);
103
+ }
104
+
164
105
  }
165
- exports.ParquetWriter = ParquetWriter;
166
- /**
167
- * Create a parquet file from a schema and a number of row groups. This class
168
- * performs direct, unbuffered writes to the underlying output stream and is
169
- * intendend for advanced and internal users; the writeXXX methods must be
170
- * called in the correct order to produce a valid file.
171
- */
172
- class ParquetEnvelopeWriter {
173
- constructor(schema, writeFn, closeFn, fileOffset, opts) {
174
- this.schema = schema;
175
- this.write = writeFn;
176
- this.close = closeFn;
177
- this.offset = fileOffset;
178
- this.rowCount = 0;
179
- this.rowGroups = [];
180
- this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
181
- this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
182
- }
183
- /**
184
- * Create a new parquet envelope writer that writes to the specified stream
185
- */
186
- static async openStream(schema, outputStream, opts) {
187
- const writeFn = file_utils_1.oswrite.bind(undefined, outputStream);
188
- const closeFn = file_utils_1.osclose.bind(undefined, outputStream);
189
- return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
190
- }
191
- writeSection(buf) {
192
- this.offset += buf.length;
193
- return this.write(buf);
194
- }
195
- /**
196
- * Encode the parquet file header
197
- */
198
- writeHeader() {
199
- return this.writeSection(Buffer.from(PARQUET_MAGIC));
200
- }
201
- /**
202
- * Encode a parquet row group. The records object should be created using the
203
- * shredRecord method
204
- */
205
- async writeRowGroup(records) {
206
- const rgroup = await encodeRowGroup(this.schema, records, {
207
- baseOffset: this.offset,
208
- pageSize: this.pageSize,
209
- useDataPageV2: this.useDataPageV2
210
- });
211
- this.rowCount += records.rowCount;
212
- this.rowGroups.push(rgroup.metadata);
213
- return await this.writeSection(rgroup.body);
214
- }
215
- /**
216
- * Write the parquet file footer
217
- */
218
- writeFooter(userMetadata) {
219
- if (!userMetadata) {
220
- // tslint:disable-next-line:no-parameter-reassignment
221
- userMetadata = {};
222
- }
223
- return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
224
- }
225
- /**
226
- * Set the parquet data page size. The data page size controls the maximum
227
- * number of column values that are written to disk as a consecutive array
228
- */
229
- setPageSize(cnt) {
230
- this.pageSize = cnt;
106
+ export class ParquetEnvelopeWriter {
107
+ static async openStream(schema, outputStream, opts) {
108
+ const writeFn = oswrite.bind(undefined, outputStream);
109
+ const closeFn = osclose.bind(undefined, outputStream);
110
+ return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
111
+ }
112
+
113
+ constructor(schema, writeFn, closeFn, fileOffset, opts) {
114
+ _defineProperty(this, "schema", void 0);
115
+
116
+ _defineProperty(this, "write", void 0);
117
+
118
+ _defineProperty(this, "close", void 0);
119
+
120
+ _defineProperty(this, "offset", void 0);
121
+
122
+ _defineProperty(this, "rowCount", void 0);
123
+
124
+ _defineProperty(this, "rowGroups", void 0);
125
+
126
+ _defineProperty(this, "pageSize", void 0);
127
+
128
+ _defineProperty(this, "useDataPageV2", void 0);
129
+
130
+ this.schema = schema;
131
+ this.write = writeFn;
132
+ this.close = closeFn;
133
+ this.offset = fileOffset;
134
+ this.rowCount = 0;
135
+ this.rowGroups = [];
136
+ this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
137
+ this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
138
+ }
139
+
140
+ writeSection(buf) {
141
+ this.offset += buf.length;
142
+ return this.write(buf);
143
+ }
144
+
145
+ writeHeader() {
146
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
147
+ }
148
+
149
+ async writeRowGroup(records) {
150
+ const rgroup = await encodeRowGroup(this.schema, records, {
151
+ baseOffset: this.offset,
152
+ pageSize: this.pageSize,
153
+ useDataPageV2: this.useDataPageV2
154
+ });
155
+ this.rowCount += records.rowCount;
156
+ this.rowGroups.push(rgroup.metadata);
157
+ return await this.writeSection(rgroup.body);
158
+ }
159
+
160
+ writeFooter(userMetadata) {
161
+ if (!userMetadata) {
162
+ userMetadata = {};
231
163
  }
164
+
165
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
166
+ }
167
+
168
+ setPageSize(cnt) {
169
+ this.pageSize = cnt;
170
+ }
171
+
232
172
  }
233
- exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
234
- /**
235
- * Create a parquet transform stream
236
- */
237
- class ParquetTransformer extends stream_1.Transform {
238
- constructor(schema, opts = {}) {
239
- super({ objectMode: true });
240
- const writeProxy = (function (t) {
241
- return async function (b) {
242
- t.push(b);
243
- };
244
- })(this);
245
- this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => { }, 0, opts), opts);
246
- }
247
- // tslint:disable-next-line:function-name
248
- _transform(row, encoding, callback) {
249
- if (row) {
250
- return this.writer.appendRow(row).then(callback);
251
- }
252
- callback();
253
- return Promise.resolve();
254
- }
255
- // tslint:disable-next-line:function-name
256
- async _flush(callback) {
257
- await this.writer.close(callback);
173
+ export class ParquetTransformer extends Transform {
174
+ constructor(schema, opts = {}) {
175
+ super({
176
+ objectMode: true
177
+ });
178
+
179
+ _defineProperty(this, "writer", void 0);
180
+
181
+ const writeProxy = function (t) {
182
+ return async function (b) {
183
+ t.push(b);
184
+ };
185
+ }(this);
186
+
187
+ this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts), opts);
188
+ }
189
+
190
+ _transform(row, encoding, callback) {
191
+ if (row) {
192
+ return this.writer.appendRow(row).then(callback);
258
193
  }
194
+
195
+ callback();
196
+ return Promise.resolve();
197
+ }
198
+
199
+ async _flush(callback) {
200
+ await this.writer.close(callback);
201
+ }
202
+
259
203
  }
260
- exports.ParquetTransformer = ParquetTransformer;
261
- /**
262
- * Encode a consecutive array of data using one of the parquet encodings
263
- */
204
+
264
205
  function encodeValues(type, encoding, values, opts) {
265
- if (!(encoding in codecs_1.PARQUET_CODECS)) {
266
- throw new Error(`invalid encoding: ${encoding}`);
267
- }
268
- return codecs_1.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
206
+ if (!(encoding in PARQUET_CODECS)) {
207
+ throw new Error("invalid encoding: ".concat(encoding));
208
+ }
209
+
210
+ return PARQUET_CODECS[encoding].encodeValues(type, values, opts);
269
211
  }
270
- /**
271
- * Encode a parquet data page
272
- */
212
+
273
213
  async function encodeDataPage(column, data) {
274
- /* encode repetition and definition levels */
275
- let rLevelsBuf = Buffer.alloc(0);
276
- if (column.rLevelMax > 0) {
277
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
278
- bitWidth: (0, read_utils_1.getBitWidth)(column.rLevelMax)
279
- // disableEnvelope: false
280
- });
281
- }
282
- let dLevelsBuf = Buffer.alloc(0);
283
- if (column.dLevelMax > 0) {
284
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
285
- bitWidth: (0, read_utils_1.getBitWidth)(column.dLevelMax)
286
- // disableEnvelope: false
287
- });
288
- }
289
- /* encode values */
290
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
291
- typeLength: column.typeLength,
292
- bitWidth: column.typeLength
214
+ let rLevelsBuf = Buffer.alloc(0);
215
+
216
+ if (column.rLevelMax > 0) {
217
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
218
+ bitWidth: getBitWidth(column.rLevelMax)
293
219
  });
294
- const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
295
- // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
296
- const compressedBuf = await Compression.deflate(column.compression, dataBuf);
297
- /* build page header */
298
- const header = new parquet_thrift_1.PageHeader({
299
- type: parquet_thrift_1.PageType.DATA_PAGE,
300
- data_page_header: new parquet_thrift_1.DataPageHeader({
301
- num_values: data.count,
302
- encoding: parquet_thrift_1.Encoding[column.encoding],
303
- definition_level_encoding: parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
304
- repetition_level_encoding: parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]
305
- }),
306
- uncompressed_page_size: dataBuf.length,
307
- compressed_page_size: compressedBuf.length
220
+ }
221
+
222
+ let dLevelsBuf = Buffer.alloc(0);
223
+
224
+ if (column.dLevelMax > 0) {
225
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
226
+ bitWidth: getBitWidth(column.dLevelMax)
308
227
  });
309
- /* concat page header, repetition and definition levels and values */
310
- const headerBuf = (0, read_utils_1.serializeThrift)(header);
311
- const page = Buffer.concat([headerBuf, compressedBuf]);
312
- return { header, headerSize: headerBuf.length, page };
228
+ }
229
+
230
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
231
+ typeLength: column.typeLength,
232
+ bitWidth: column.typeLength
233
+ });
234
+ const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
235
+ const compressedBuf = await Compression.deflate(column.compression, dataBuf);
236
+ const header = new PageHeader({
237
+ type: PageType.DATA_PAGE,
238
+ data_page_header: new DataPageHeader({
239
+ num_values: data.count,
240
+ encoding: Encoding[column.encoding],
241
+ definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING],
242
+ repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING]
243
+ }),
244
+ uncompressed_page_size: dataBuf.length,
245
+ compressed_page_size: compressedBuf.length
246
+ });
247
+ const headerBuf = serializeThrift(header);
248
+ const page = Buffer.concat([headerBuf, compressedBuf]);
249
+ return {
250
+ header,
251
+ headerSize: headerBuf.length,
252
+ page
253
+ };
313
254
  }
314
- /**
315
- * Encode a parquet data page (v2)
316
- */
255
+
317
256
  async function encodeDataPageV2(column, data, rowCount) {
318
- /* encode values */
319
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
320
- typeLength: column.typeLength,
321
- bitWidth: column.typeLength
257
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
258
+ typeLength: column.typeLength,
259
+ bitWidth: column.typeLength
260
+ });
261
+ const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
262
+ let rLevelsBuf = Buffer.alloc(0);
263
+
264
+ if (column.rLevelMax > 0) {
265
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
266
+ bitWidth: getBitWidth(column.rLevelMax),
267
+ disableEnvelope: true
322
268
  });
323
- // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
324
- const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
325
- /* encode repetition and definition levels */
326
- let rLevelsBuf = Buffer.alloc(0);
327
- if (column.rLevelMax > 0) {
328
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
329
- bitWidth: (0, read_utils_1.getBitWidth)(column.rLevelMax),
330
- disableEnvelope: true
331
- });
332
- }
333
- let dLevelsBuf = Buffer.alloc(0);
334
- if (column.dLevelMax > 0) {
335
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
336
- bitWidth: (0, read_utils_1.getBitWidth)(column.dLevelMax),
337
- disableEnvelope: true
338
- });
339
- }
340
- /* build page header */
341
- const header = new parquet_thrift_1.PageHeader({
342
- type: parquet_thrift_1.PageType.DATA_PAGE_V2,
343
- data_page_header_v2: new parquet_thrift_1.DataPageHeaderV2({
344
- num_values: data.count,
345
- num_nulls: data.count - data.values.length,
346
- num_rows: rowCount,
347
- encoding: parquet_thrift_1.Encoding[column.encoding],
348
- definition_levels_byte_length: dLevelsBuf.length,
349
- repetition_levels_byte_length: rLevelsBuf.length,
350
- is_compressed: column.compression !== 'UNCOMPRESSED'
351
- }),
352
- uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
353
- compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
269
+ }
270
+
271
+ let dLevelsBuf = Buffer.alloc(0);
272
+
273
+ if (column.dLevelMax > 0) {
274
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
275
+ bitWidth: getBitWidth(column.dLevelMax),
276
+ disableEnvelope: true
354
277
  });
355
- /* concat page header, repetition and definition levels and values */
356
- const headerBuf = (0, read_utils_1.serializeThrift)(header);
357
- const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
358
- return { header, headerSize: headerBuf.length, page };
278
+ }
279
+
280
+ const header = new PageHeader({
281
+ type: PageType.DATA_PAGE_V2,
282
+ data_page_header_v2: new DataPageHeaderV2({
283
+ num_values: data.count,
284
+ num_nulls: data.count - data.values.length,
285
+ num_rows: rowCount,
286
+ encoding: Encoding[column.encoding],
287
+ definition_levels_byte_length: dLevelsBuf.length,
288
+ repetition_levels_byte_length: rLevelsBuf.length,
289
+ is_compressed: column.compression !== 'UNCOMPRESSED'
290
+ }),
291
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
292
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
293
+ });
294
+ const headerBuf = serializeThrift(header);
295
+ const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
296
+ return {
297
+ header,
298
+ headerSize: headerBuf.length,
299
+ page
300
+ };
359
301
  }
360
- /**
361
- * Encode an array of values into a parquet column chunk
362
- */
302
+
363
303
  async function encodeColumnChunk(column, buffer, offset, opts) {
364
- const data = buffer.columnData[column.path.join()];
365
- const baseOffset = (opts.baseOffset || 0) + offset;
366
- /* encode data page(s) */
367
- // const pages: Buffer[] = [];
368
- let pageBuf;
369
- // tslint:disable-next-line:variable-name
370
- let total_uncompressed_size = 0;
371
- // tslint:disable-next-line:variable-name
372
- let total_compressed_size = 0;
373
- {
374
- const result = opts.useDataPageV2
375
- ? await encodeDataPageV2(column, data, buffer.rowCount)
376
- : await encodeDataPage(column, data);
377
- // pages.push(result.page);
378
- pageBuf = result.page;
379
- total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
380
- total_compressed_size += result.header.compressed_page_size + result.headerSize;
381
- }
382
- // const pagesBuf = Buffer.concat(pages);
383
- // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;
384
- /* prepare metadata header */
385
- const metadata = new parquet_thrift_1.ColumnMetaData({
386
- path_in_schema: column.path,
387
- num_values: data.count,
388
- data_page_offset: baseOffset,
389
- encodings: [],
390
- total_uncompressed_size,
391
- total_compressed_size,
392
- type: parquet_thrift_1.Type[column.primitiveType],
393
- codec: parquet_thrift_1.CompressionCodec[column.compression]
394
- });
395
- /* list encodings */
396
- metadata.encodings.push(parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING]);
397
- metadata.encodings.push(parquet_thrift_1.Encoding[column.encoding]);
398
- /* concat metadata header and data pages */
399
- const metadataOffset = baseOffset + pageBuf.length;
400
- const body = Buffer.concat([pageBuf, (0, read_utils_1.serializeThrift)(metadata)]);
401
- return { body, metadata, metadataOffset };
304
+ const data = buffer.columnData[column.path.join()];
305
+ const baseOffset = (opts.baseOffset || 0) + offset;
306
+ let pageBuf;
307
+ let total_uncompressed_size = 0;
308
+ let total_compressed_size = 0;
309
+ {
310
+ const result = opts.useDataPageV2 ? await encodeDataPageV2(column, data, buffer.rowCount) : await encodeDataPage(column, data);
311
+ pageBuf = result.page;
312
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
313
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
314
+ }
315
+ const metadata = new ColumnMetaData({
316
+ path_in_schema: column.path,
317
+ num_values: data.count,
318
+ data_page_offset: baseOffset,
319
+ encodings: [],
320
+ total_uncompressed_size,
321
+ total_compressed_size,
322
+ type: Type[column.primitiveType],
323
+ codec: CompressionCodec[column.compression]
324
+ });
325
+ metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);
326
+ metadata.encodings.push(Encoding[column.encoding]);
327
+ const metadataOffset = baseOffset + pageBuf.length;
328
+ const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);
329
+ return {
330
+ body,
331
+ metadata,
332
+ metadataOffset
333
+ };
402
334
  }
403
- /**
404
- * Encode a list of column values into a parquet row group
405
- */
335
+
406
336
  async function encodeRowGroup(schema, data, opts) {
407
- const metadata = new parquet_thrift_1.RowGroup({
408
- num_rows: data.rowCount,
409
- columns: [],
410
- total_byte_size: 0
411
- });
412
- let body = Buffer.alloc(0);
413
- for (const field of schema.fieldList) {
414
- if (field.isNested) {
415
- continue; // eslint-disable-line no-continue
416
- }
417
- const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
418
- const cchunk = new parquet_thrift_1.ColumnChunk({
419
- file_offset: cchunkData.metadataOffset,
420
- meta_data: cchunkData.metadata
421
- });
422
- metadata.columns.push(cchunk);
423
- metadata.total_byte_size = new node_int64_1.default(Number(metadata.total_byte_size) + cchunkData.body.length);
424
- body = Buffer.concat([body, cchunkData.body]);
337
+ const metadata = new RowGroup({
338
+ num_rows: data.rowCount,
339
+ columns: [],
340
+ total_byte_size: 0
341
+ });
342
+ let body = Buffer.alloc(0);
343
+
344
+ for (const field of schema.fieldList) {
345
+ if (field.isNested) {
346
+ continue;
425
347
  }
426
- return { body, metadata };
348
+
349
+ const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
350
+ const cchunk = new ColumnChunk({
351
+ file_offset: cchunkData.metadataOffset,
352
+ meta_data: cchunkData.metadata
353
+ });
354
+ metadata.columns.push(cchunk);
355
+ metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);
356
+ body = Buffer.concat([body, cchunkData.body]);
357
+ }
358
+
359
+ return {
360
+ body,
361
+ metadata
362
+ };
427
363
  }
428
- /**
429
- * Encode a parquet file metadata footer
430
- */
364
+
431
365
  function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
432
- const metadata = new parquet_thrift_1.FileMetaData({
433
- version: PARQUET_VERSION,
434
- created_by: 'parquets',
435
- num_rows: rowCount,
436
- row_groups: rowGroups,
437
- schema: [],
438
- key_value_metadata: []
366
+ const metadata = new FileMetaData({
367
+ version: PARQUET_VERSION,
368
+ created_by: 'parquets',
369
+ num_rows: rowCount,
370
+ row_groups: rowGroups,
371
+ schema: [],
372
+ key_value_metadata: []
373
+ });
374
+
375
+ for (const key in userMetadata) {
376
+ var _metadata$key_value_m, _metadata$key_value_m2;
377
+
378
+ const kv = new KeyValue({
379
+ key,
380
+ value: userMetadata[key]
439
381
  });
440
- for (const key in userMetadata) {
441
- const kv = new parquet_thrift_1.KeyValue({
442
- key,
443
- value: userMetadata[key]
444
- });
445
- metadata.key_value_metadata?.push?.(kv);
446
- }
447
- {
448
- const schemaRoot = new parquet_thrift_1.SchemaElement({
449
- name: 'root',
450
- num_children: Object.keys(schema.fields).length
451
- });
452
- metadata.schema.push(schemaRoot);
382
+ (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m, kv);
383
+ }
384
+
385
+ {
386
+ const schemaRoot = new SchemaElement({
387
+ name: 'root',
388
+ num_children: Object.keys(schema.fields).length
389
+ });
390
+ metadata.schema.push(schemaRoot);
391
+ }
392
+
393
+ for (const field of schema.fieldList) {
394
+ const relt = FieldRepetitionType[field.repetitionType];
395
+ const schemaElem = new SchemaElement({
396
+ name: field.name,
397
+ repetition_type: relt
398
+ });
399
+
400
+ if (field.isNested) {
401
+ schemaElem.num_children = field.fieldCount;
402
+ } else {
403
+ schemaElem.type = Type[field.primitiveType];
453
404
  }
454
- for (const field of schema.fieldList) {
455
- const relt = parquet_thrift_1.FieldRepetitionType[field.repetitionType];
456
- const schemaElem = new parquet_thrift_1.SchemaElement({
457
- name: field.name,
458
- repetition_type: relt
459
- });
460
- if (field.isNested) {
461
- schemaElem.num_children = field.fieldCount;
462
- }
463
- else {
464
- schemaElem.type = parquet_thrift_1.Type[field.primitiveType];
465
- }
466
- if (field.originalType) {
467
- schemaElem.converted_type = parquet_thrift_1.ConvertedType[field.originalType];
468
- }
469
- schemaElem.type_length = field.typeLength;
470
- metadata.schema.push(schemaElem);
405
+
406
+ if (field.originalType) {
407
+ schemaElem.converted_type = ConvertedType[field.originalType];
471
408
  }
472
- const metadataEncoded = (0, read_utils_1.serializeThrift)(metadata);
473
- const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
474
- metadataEncoded.copy(footerEncoded);
475
- footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
476
- footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
477
- return footerEncoded;
409
+
410
+ schemaElem.type_length = field.typeLength;
411
+ metadata.schema.push(schemaElem);
412
+ }
413
+
414
+ const metadataEncoded = serializeThrift(metadata);
415
+ const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
416
+ metadataEncoded.copy(footerEncoded);
417
+ footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
418
+ footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
419
+ return footerEncoded;
478
420
  }
421
+ //# sourceMappingURL=writer.js.map