@loaders.gl/parquet 3.1.0-alpha.4 → 3.1.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/dist/bundle.d.ts +2 -0
  2. package/dist/bundle.d.ts.map +1 -0
  3. package/dist/bundle.js +5 -0
  4. package/dist/constants.d.ts +15 -0
  5. package/dist/constants.d.ts.map +1 -0
  6. package/dist/constants.js +18 -0
  7. package/dist/dist.min.js +27 -13
  8. package/dist/dist.min.js.map +7 -1
  9. package/dist/es5/bundle.js +1 -1
  10. package/dist/es5/bundle.js.map +1 -1
  11. package/dist/es5/constants.js +5 -5
  12. package/dist/es5/constants.js.map +1 -1
  13. package/dist/es5/index.js +16 -45
  14. package/dist/es5/index.js.map +1 -1
  15. package/dist/es5/lib/convert-schema.js +13 -13
  16. package/dist/es5/lib/convert-schema.js.map +1 -1
  17. package/dist/es5/lib/parse-parquet.js +19 -154
  18. package/dist/es5/lib/parse-parquet.js.map +1 -1
  19. package/dist/es5/lib/read-array-buffer.js +6 -43
  20. package/dist/es5/lib/read-array-buffer.js.map +1 -1
  21. package/dist/es5/parquet-loader.js +4 -4
  22. package/dist/es5/parquet-loader.js.map +1 -1
  23. package/dist/es5/parquet-writer.js +4 -4
  24. package/dist/es5/parquet-writer.js.map +1 -1
  25. package/dist/es5/parquetjs/codecs/dictionary.js +2 -10
  26. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  27. package/dist/es5/parquetjs/codecs/index.js +4 -6
  28. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  29. package/dist/es5/parquetjs/codecs/plain.js +41 -43
  30. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  31. package/dist/es5/parquetjs/codecs/rle.js +25 -35
  32. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  33. package/dist/es5/parquetjs/compression.js +28 -122
  34. package/dist/es5/parquetjs/compression.js.map +1 -1
  35. package/dist/es5/parquetjs/encoder/writer.js +301 -737
  36. package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
  37. package/dist/es5/parquetjs/file.js +15 -15
  38. package/dist/es5/parquetjs/file.js.map +1 -1
  39. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  40. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +31 -45
  41. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  42. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +141 -152
  43. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  44. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +147 -160
  45. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  46. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +248 -259
  47. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +67 -79
  49. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  50. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  51. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  52. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +113 -124
  53. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  54. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +158 -169
  55. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/DateType.js +31 -45
  57. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +68 -79
  59. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +83 -94
  61. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  63. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +31 -45
  64. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +170 -182
  67. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +31 -45
  69. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  70. package/dist/es5/parquetjs/parquet-thrift/IntType.js +68 -79
  71. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +31 -45
  73. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +68 -79
  75. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/ListType.js +31 -45
  77. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +319 -343
  79. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/MapType.js +31 -45
  81. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +31 -45
  83. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +31 -45
  85. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/NullType.js +31 -45
  87. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +64 -75
  89. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  90. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +83 -94
  91. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +158 -169
  93. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +83 -94
  95. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +113 -124
  98. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +188 -199
  100. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +83 -94
  102. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +124 -135
  104. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  105. package/dist/es5/parquetjs/parquet-thrift/StringType.js +31 -45
  106. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  107. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +68 -79
  108. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  109. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +88 -101
  110. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  111. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +68 -79
  112. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  113. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  114. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -45
  115. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  116. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +31 -45
  117. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  118. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  119. package/dist/es5/parquetjs/parser/decoders.js +218 -397
  120. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  121. package/dist/es5/parquetjs/parser/parquet-cursor.js +62 -180
  122. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
  123. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +124 -408
  124. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  125. package/dist/es5/parquetjs/parser/parquet-reader.js +91 -369
  126. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  127. package/dist/es5/parquetjs/schema/declare.js +9 -11
  128. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  129. package/dist/es5/parquetjs/schema/schema.js +73 -87
  130. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  131. package/dist/es5/parquetjs/schema/shred.js +56 -96
  132. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  133. package/dist/es5/parquetjs/schema/types.js +39 -40
  134. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  135. package/dist/es5/parquetjs/utils/buffer-utils.js +1 -1
  136. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
  137. package/dist/es5/parquetjs/utils/file-utils.js +8 -65
  138. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  139. package/dist/es5/parquetjs/utils/read-utils.js +22 -50
  140. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  141. package/dist/esm/index.js +2 -3
  142. package/dist/esm/index.js.map +1 -1
  143. package/dist/esm/parquet-loader.js +1 -1
  144. package/dist/esm/parquet-loader.js.map +1 -1
  145. package/dist/esm/parquet-writer.js +1 -1
  146. package/dist/esm/parquet-writer.js.map +1 -1
  147. package/dist/esm/parquetjs/codecs/plain.js +3 -3
  148. package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
  149. package/dist/esm/parquetjs/codecs/rle.js +1 -1
  150. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  151. package/dist/esm/parquetjs/compression.js +4 -13
  152. package/dist/esm/parquetjs/compression.js.map +1 -1
  153. package/dist/esm/parquetjs/encoder/writer.js +1 -1
  154. package/dist/esm/parquetjs/encoder/writer.js.map +1 -1
  155. package/dist/esm/parquetjs/parser/decoders.js +4 -4
  156. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  157. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +4 -13
  158. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  159. package/dist/esm/parquetjs/parser/parquet-reader.js +0 -13
  160. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  161. package/dist/esm/parquetjs/schema/schema.js +3 -3
  162. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  163. package/dist/esm/parquetjs/schema/shred.js +2 -2
  164. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  165. package/dist/esm/parquetjs/schema/types.js +20 -20
  166. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  167. package/dist/esm/parquetjs/utils/file-utils.js +0 -45
  168. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  169. package/dist/index.d.ts +28 -0
  170. package/dist/index.d.ts.map +1 -0
  171. package/dist/index.js +30 -0
  172. package/dist/lib/convert-schema.d.ts +8 -0
  173. package/dist/lib/convert-schema.d.ts.map +1 -0
  174. package/dist/lib/convert-schema.js +70 -0
  175. package/dist/lib/parse-parquet.d.ts +4 -0
  176. package/dist/lib/parse-parquet.d.ts.map +1 -0
  177. package/dist/lib/parse-parquet.js +28 -0
  178. package/dist/lib/read-array-buffer.d.ts +19 -0
  179. package/dist/lib/read-array-buffer.d.ts.map +1 -0
  180. package/dist/lib/read-array-buffer.js +29 -0
  181. package/dist/parquet-loader.d.ts +23 -0
  182. package/dist/parquet-loader.d.ts.map +1 -0
  183. package/dist/parquet-loader.js +27 -0
  184. package/dist/parquet-worker.js +27 -13
  185. package/dist/parquet-worker.js.map +7 -1
  186. package/dist/parquet-writer.d.ts +4 -0
  187. package/dist/parquet-writer.d.ts.map +1 -0
  188. package/dist/parquet-writer.js +21 -0
  189. package/dist/parquetjs/codecs/declare.d.ts +17 -0
  190. package/dist/parquetjs/codecs/declare.d.ts.map +1 -0
  191. package/dist/parquetjs/codecs/declare.js +2 -0
  192. package/dist/parquetjs/codecs/dictionary.d.ts +3 -0
  193. package/dist/parquetjs/codecs/dictionary.d.ts.map +1 -0
  194. package/dist/parquetjs/codecs/dictionary.js +14 -0
  195. package/dist/parquetjs/codecs/index.d.ts +5 -0
  196. package/dist/parquetjs/codecs/index.d.ts.map +1 -0
  197. package/dist/parquetjs/codecs/index.js +51 -0
  198. package/dist/parquetjs/codecs/plain.d.ts +6 -0
  199. package/dist/parquetjs/codecs/plain.d.ts.map +1 -0
  200. package/dist/parquetjs/codecs/plain.js +211 -0
  201. package/dist/parquetjs/codecs/rle.d.ts +6 -0
  202. package/dist/parquetjs/codecs/rle.d.ts.map +1 -0
  203. package/dist/parquetjs/codecs/rle.js +145 -0
  204. package/dist/parquetjs/compression.d.ts +23 -0
  205. package/dist/parquetjs/compression.d.ts.map +1 -0
  206. package/dist/parquetjs/compression.js +168 -0
  207. package/dist/parquetjs/encoder/writer.d.ts +123 -0
  208. package/dist/parquetjs/encoder/writer.d.ts.map +1 -0
  209. package/dist/parquetjs/encoder/writer.js +478 -0
  210. package/dist/parquetjs/file.d.ts +10 -0
  211. package/dist/parquetjs/file.d.ts.map +1 -0
  212. package/dist/parquetjs/file.js +99 -0
  213. package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts +6 -0
  214. package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts.map +1 -0
  215. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +15 -0
  216. package/dist/parquetjs/parquet-thrift/BsonType.d.ts +9 -0
  217. package/dist/parquetjs/parquet-thrift/BsonType.d.ts.map +1 -0
  218. package/dist/parquetjs/parquet-thrift/BsonType.js +58 -0
  219. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts +25 -0
  220. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -0
  221. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +207 -0
  222. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts +22 -0
  223. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -0
  224. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +213 -0
  225. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts +42 -0
  226. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -0
  227. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +398 -0
  228. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts +13 -0
  229. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -0
  230. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +104 -0
  231. package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts +11 -0
  232. package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts.map +1 -0
  233. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +20 -0
  234. package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts +25 -0
  235. package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts.map +1 -0
  236. package/dist/parquetjs/parquet-thrift/ConvertedType.js +34 -0
  237. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts +21 -0
  238. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -0
  239. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +166 -0
  240. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts +27 -0
  241. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -0
  242. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +226 -0
  243. package/dist/parquetjs/parquet-thrift/DateType.d.ts +9 -0
  244. package/dist/parquetjs/parquet-thrift/DateType.d.ts.map +1 -0
  245. package/dist/parquetjs/parquet-thrift/DateType.js +58 -0
  246. package/dist/parquetjs/parquet-thrift/DecimalType.d.ts +13 -0
  247. package/dist/parquetjs/parquet-thrift/DecimalType.d.ts.map +1 -0
  248. package/dist/parquetjs/parquet-thrift/DecimalType.js +105 -0
  249. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts +16 -0
  250. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -0
  251. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +122 -0
  252. package/dist/parquetjs/parquet-thrift/Encoding.d.ts +11 -0
  253. package/dist/parquetjs/parquet-thrift/Encoding.d.ts.map +1 -0
  254. package/dist/parquetjs/parquet-thrift/Encoding.js +20 -0
  255. package/dist/parquetjs/parquet-thrift/EnumType.d.ts +9 -0
  256. package/dist/parquetjs/parquet-thrift/EnumType.d.ts.map +1 -0
  257. package/dist/parquetjs/parquet-thrift/EnumType.js +58 -0
  258. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts +6 -0
  259. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts.map +1 -0
  260. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +15 -0
  261. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts +28 -0
  262. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -0
  263. package/dist/parquetjs/parquet-thrift/FileMetaData.js +256 -0
  264. package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts +9 -0
  265. package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts.map +1 -0
  266. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +58 -0
  267. package/dist/parquetjs/parquet-thrift/IntType.d.ts +13 -0
  268. package/dist/parquetjs/parquet-thrift/IntType.d.ts.map +1 -0
  269. package/dist/parquetjs/parquet-thrift/IntType.js +105 -0
  270. package/dist/parquetjs/parquet-thrift/JsonType.d.ts +9 -0
  271. package/dist/parquetjs/parquet-thrift/JsonType.d.ts.map +1 -0
  272. package/dist/parquetjs/parquet-thrift/JsonType.js +58 -0
  273. package/dist/parquetjs/parquet-thrift/KeyValue.d.ts +13 -0
  274. package/dist/parquetjs/parquet-thrift/KeyValue.d.ts.map +1 -0
  275. package/dist/parquetjs/parquet-thrift/KeyValue.js +102 -0
  276. package/dist/parquetjs/parquet-thrift/ListType.d.ts +9 -0
  277. package/dist/parquetjs/parquet-thrift/ListType.d.ts.map +1 -0
  278. package/dist/parquetjs/parquet-thrift/ListType.js +58 -0
  279. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts +61 -0
  280. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -0
  281. package/dist/parquetjs/parquet-thrift/LogicalType.js +380 -0
  282. package/dist/parquetjs/parquet-thrift/MapType.d.ts +9 -0
  283. package/dist/parquetjs/parquet-thrift/MapType.d.ts.map +1 -0
  284. package/dist/parquetjs/parquet-thrift/MapType.js +58 -0
  285. package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts +9 -0
  286. package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts.map +1 -0
  287. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +58 -0
  288. package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts +9 -0
  289. package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts.map +1 -0
  290. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +58 -0
  291. package/dist/parquetjs/parquet-thrift/NullType.d.ts +9 -0
  292. package/dist/parquetjs/parquet-thrift/NullType.d.ts.map +1 -0
  293. package/dist/parquetjs/parquet-thrift/NullType.js +58 -0
  294. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts +12 -0
  295. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -0
  296. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +97 -0
  297. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts +17 -0
  298. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -0
  299. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +127 -0
  300. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts +30 -0
  301. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -0
  302. package/dist/parquetjs/parquet-thrift/PageHeader.js +216 -0
  303. package/dist/parquetjs/parquet-thrift/PageLocation.d.ts +16 -0
  304. package/dist/parquetjs/parquet-thrift/PageLocation.d.ts.map +1 -0
  305. package/dist/parquetjs/parquet-thrift/PageLocation.js +141 -0
  306. package/dist/parquetjs/parquet-thrift/PageType.d.ts +7 -0
  307. package/dist/parquetjs/parquet-thrift/PageType.d.ts.map +1 -0
  308. package/dist/parquetjs/parquet-thrift/PageType.js +16 -0
  309. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts +20 -0
  310. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -0
  311. package/dist/parquetjs/parquet-thrift/RowGroup.js +182 -0
  312. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts +33 -0
  313. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -0
  314. package/dist/parquetjs/parquet-thrift/SchemaElement.js +239 -0
  315. package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts +15 -0
  316. package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts.map +1 -0
  317. package/dist/parquetjs/parquet-thrift/SortingColumn.js +127 -0
  318. package/dist/parquetjs/parquet-thrift/Statistics.d.ts +23 -0
  319. package/dist/parquetjs/parquet-thrift/Statistics.d.ts.map +1 -0
  320. package/dist/parquetjs/parquet-thrift/Statistics.js +176 -0
  321. package/dist/parquetjs/parquet-thrift/StringType.d.ts +9 -0
  322. package/dist/parquetjs/parquet-thrift/StringType.d.ts.map +1 -0
  323. package/dist/parquetjs/parquet-thrift/StringType.js +58 -0
  324. package/dist/parquetjs/parquet-thrift/TimeType.d.ts +14 -0
  325. package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -0
  326. package/dist/parquetjs/parquet-thrift/TimeType.js +106 -0
  327. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts +17 -0
  328. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -0
  329. package/dist/parquetjs/parquet-thrift/TimeUnit.js +127 -0
  330. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts +14 -0
  331. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -0
  332. package/dist/parquetjs/parquet-thrift/TimestampType.js +106 -0
  333. package/dist/parquetjs/parquet-thrift/Type.d.ts +11 -0
  334. package/dist/parquetjs/parquet-thrift/Type.d.ts.map +1 -0
  335. package/dist/parquetjs/parquet-thrift/Type.js +20 -0
  336. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts +9 -0
  337. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts.map +1 -0
  338. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +58 -0
  339. package/dist/parquetjs/parquet-thrift/UUIDType.d.ts +9 -0
  340. package/dist/parquetjs/parquet-thrift/UUIDType.d.ts.map +1 -0
  341. package/dist/parquetjs/parquet-thrift/UUIDType.js +58 -0
  342. package/dist/parquetjs/parquet-thrift/index.d.ts +44 -0
  343. package/dist/parquetjs/parquet-thrift/index.d.ts.map +1 -0
  344. package/dist/parquetjs/parquet-thrift/index.js +61 -0
  345. package/dist/parquetjs/parser/decoders.d.ts +34 -0
  346. package/dist/parquetjs/parser/decoders.d.ts.map +1 -0
  347. package/dist/parquetjs/parser/decoders.js +318 -0
  348. package/dist/parquetjs/parser/parquet-cursor.d.ts +36 -0
  349. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +1 -0
  350. package/dist/parquetjs/parser/parquet-cursor.js +74 -0
  351. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +40 -0
  352. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +1 -0
  353. package/dist/parquetjs/parser/parquet-envelope-reader.js +136 -0
  354. package/dist/parquetjs/parser/parquet-reader.d.ts +68 -0
  355. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -0
  356. package/dist/parquetjs/parser/parquet-reader.js +134 -0
  357. package/dist/parquetjs/schema/declare.d.ts +80 -0
  358. package/dist/parquetjs/schema/declare.d.ts.map +1 -0
  359. package/dist/parquetjs/schema/declare.js +10 -0
  360. package/dist/parquetjs/schema/schema.d.ts +26 -0
  361. package/dist/parquetjs/schema/schema.d.ts.map +1 -0
  362. package/dist/parquetjs/schema/schema.js +162 -0
  363. package/dist/parquetjs/schema/shred.d.ts +48 -0
  364. package/dist/parquetjs/schema/shred.d.ts.map +1 -0
  365. package/dist/parquetjs/schema/shred.js +225 -0
  366. package/dist/parquetjs/schema/types.d.ts +20 -0
  367. package/dist/parquetjs/schema/types.d.ts.map +1 -0
  368. package/dist/parquetjs/schema/types.js +418 -0
  369. package/dist/parquetjs/utils/buffer-utils.d.ts +10 -0
  370. package/dist/parquetjs/utils/buffer-utils.d.ts.map +1 -0
  371. package/dist/parquetjs/utils/buffer-utils.js +22 -0
  372. package/dist/parquetjs/utils/file-utils.d.ts +16 -0
  373. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -0
  374. package/dist/parquetjs/utils/file-utils.js +46 -0
  375. package/dist/parquetjs/utils/read-utils.d.ts +25 -0
  376. package/dist/parquetjs/utils/read-utils.d.ts.map +1 -0
  377. package/dist/parquetjs/utils/read-utils.js +109 -0
  378. package/dist/workers/parquet-worker.d.ts +2 -0
  379. package/dist/workers/parquet-worker.d.ts.map +1 -0
  380. package/dist/workers/parquet-worker.js +5 -0
  381. package/package.json +8 -8
  382. package/src/index.ts +3 -3
  383. package/src/parquetjs/compression.ts +10 -10
  384. package/src/parquetjs/parser/decoders.ts +1 -1
  385. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -11
  386. package/src/parquetjs/parser/parquet-reader.ts +0 -16
  387. package/src/parquetjs/utils/file-utils.ts +0 -49
@@ -0,0 +1,478 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
5
+ }) : (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ o[k2] = m[k];
8
+ }));
9
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
10
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
11
+ }) : function(o, v) {
12
+ o["default"] = v;
13
+ });
14
+ var __importStar = (this && this.__importStar) || function (mod) {
15
+ if (mod && mod.__esModule) return mod;
16
+ var result = {};
17
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
18
+ __setModuleDefault(result, mod);
19
+ return result;
20
+ };
21
+ var __importDefault = (this && this.__importDefault) || function (mod) {
22
+ return (mod && mod.__esModule) ? mod : { "default": mod };
23
+ };
24
+ Object.defineProperty(exports, "__esModule", { value: true });
25
+ exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
26
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
27
+ /* eslint-disable camelcase */
28
+ const stream_1 = require("stream");
29
+ const codecs_1 = require("../codecs");
30
+ const Compression = __importStar(require("../compression"));
31
+ const Shred = __importStar(require("../schema/shred"));
32
+ const parquet_thrift_1 = require("../parquet-thrift");
33
+ const file_utils_1 = require("../utils/file-utils");
34
+ const read_utils_1 = require("../utils/read-utils");
35
+ const node_int64_1 = __importDefault(require("node-int64"));
36
+ /**
37
+ * Parquet File Magic String
38
+ */
39
+ const PARQUET_MAGIC = 'PAR1';
40
+ /**
41
+ * Parquet File Format Version
42
+ */
43
+ const PARQUET_VERSION = 1;
44
+ /**
45
+ * Default Page and Row Group sizes
46
+ */
47
+ const PARQUET_DEFAULT_PAGE_SIZE = 8192;
48
+ const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
49
+ /**
50
+ * Repetition and Definition Level Encoding
51
+ */
52
+ const PARQUET_RDLVL_TYPE = 'INT32';
53
+ const PARQUET_RDLVL_ENCODING = 'RLE';
54
+ /**
55
+ * Write a parquet file to an output stream. The ParquetWriter will perform
56
+ * buffering/batching for performance, so close() must be called after all rows
57
+ * are written.
58
+ */
59
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
60
+ class ParquetWriter {
61
+ /**
62
+ * Create a new buffered parquet writer for a given envelope writer
63
+ */
64
+ constructor(schema, envelopeWriter, opts) {
65
+ this.schema = schema;
66
+ this.envelopeWriter = envelopeWriter;
67
+ // @ts-ignore Row buffer typings...
68
+ this.rowBuffer = {};
69
+ this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
70
+ this.closed = false;
71
+ this.userMetadata = {};
72
+ // eslint-disable-next-line @typescript-eslint/no-floating-promises
73
+ this.writeHeader();
74
+ }
75
+ /**
76
+ * Convenience method to create a new buffered parquet writer that writes to
77
+ * the specified file
78
+ */
79
+ static async openFile(schema, path, opts) {
80
+ const outputStream = await (0, file_utils_1.osopen)(path, opts);
81
+ return ParquetWriter.openStream(schema, outputStream, opts);
82
+ }
83
+ /**
84
+ * Convenience method to create a new buffered parquet writer that writes to
85
+ * the specified stream
86
+ */
87
+ static async openStream(schema, outputStream, opts) {
88
+ if (!opts) {
89
+ // tslint:disable-next-line:no-parameter-reassignment
90
+ opts = {};
91
+ }
92
+ const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
93
+ return new ParquetWriter(schema, envelopeWriter, opts);
94
+ }
95
+ async writeHeader() {
96
+ // TODO - better not mess with promises in the constructor
97
+ try {
98
+ await this.envelopeWriter.writeHeader();
99
+ }
100
+ catch (err) {
101
+ await this.envelopeWriter.close();
102
+ throw err;
103
+ }
104
+ }
105
+ /**
106
+ * Append a single row to the parquet file. Rows are buffered in memory until
107
+ * rowGroupSize rows are in the buffer or close() is called
108
+ */
109
+ async appendRow(row) {
110
+ if (this.closed) {
111
+ throw new Error('writer was closed');
112
+ }
113
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
114
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
115
+ // @ts-ignore
116
+ this.rowBuffer = {};
117
+ }
118
+ }
119
+ /**
120
+ * Finish writing the parquet file and commit the footer to disk. This method
121
+ * MUST be called after you are finished adding rows. You must not call this
122
+ * method twice on the same object or add any rows after the close() method has
123
+ * been called
124
+ */
125
+ async close(callback) {
126
+ if (this.closed) {
127
+ throw new Error('writer was closed');
128
+ }
129
+ this.closed = true;
130
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
131
+ // @ts-ignore
132
+ this.rowBuffer = {};
133
+ }
134
+ await this.envelopeWriter.writeFooter(this.userMetadata);
135
+ await this.envelopeWriter.close();
136
+ // this.envelopeWriter = null;
137
+ if (callback) {
138
+ callback();
139
+ }
140
+ }
141
+ /**
142
+ * Add key<>value metadata to the file
143
+ */
144
+ setMetadata(key, value) {
145
+ // TODO: value to be any, obj -> JSON
146
+ this.userMetadata[String(key)] = String(value);
147
+ }
148
+ /**
149
+ * Set the parquet row group size. This values controls the maximum number
150
+ * of rows that are buffered in memory at any given time as well as the number
151
+ * of rows that are co-located on disk. A higher value is generally better for
152
+ * read-time I/O performance at the tradeoff of write-time memory usage.
153
+ */
154
+ setRowGroupSize(cnt) {
155
+ this.rowGroupSize = cnt;
156
+ }
157
+ /**
158
+ * Set the parquet data page size. The data page size controls the maximum
159
+ * number of column values that are written to disk as a consecutive array
160
+ */
161
+ setPageSize(cnt) {
162
+ this.envelopeWriter.setPageSize(cnt);
163
+ }
164
+ }
165
+ exports.ParquetWriter = ParquetWriter;
166
+ /**
167
+ * Create a parquet file from a schema and a number of row groups. This class
168
+ * performs direct, unbuffered writes to the underlying output stream and is
169
+ * intendend for advanced and internal users; the writeXXX methods must be
170
+ * called in the correct order to produce a valid file.
171
+ */
172
+ class ParquetEnvelopeWriter {
173
+ constructor(schema, writeFn, closeFn, fileOffset, opts) {
174
+ this.schema = schema;
175
+ this.write = writeFn;
176
+ this.close = closeFn;
177
+ this.offset = fileOffset;
178
+ this.rowCount = 0;
179
+ this.rowGroups = [];
180
+ this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
181
+ this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
182
+ }
183
+ /**
184
+ * Create a new parquet envelope writer that writes to the specified stream
185
+ */
186
+ static async openStream(schema, outputStream, opts) {
187
+ const writeFn = file_utils_1.oswrite.bind(undefined, outputStream);
188
+ const closeFn = file_utils_1.osclose.bind(undefined, outputStream);
189
+ return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
190
+ }
191
+ writeSection(buf) {
192
+ this.offset += buf.length;
193
+ return this.write(buf);
194
+ }
195
+ /**
196
+ * Encode the parquet file header
197
+ */
198
+ writeHeader() {
199
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
200
+ }
201
+ /**
202
+ * Encode a parquet row group. The records object should be created using the
203
+ * shredRecord method
204
+ */
205
+ async writeRowGroup(records) {
206
+ const rgroup = await encodeRowGroup(this.schema, records, {
207
+ baseOffset: this.offset,
208
+ pageSize: this.pageSize,
209
+ useDataPageV2: this.useDataPageV2
210
+ });
211
+ this.rowCount += records.rowCount;
212
+ this.rowGroups.push(rgroup.metadata);
213
+ return await this.writeSection(rgroup.body);
214
+ }
215
+ /**
216
+ * Write the parquet file footer
217
+ */
218
+ writeFooter(userMetadata) {
219
+ if (!userMetadata) {
220
+ // tslint:disable-next-line:no-parameter-reassignment
221
+ userMetadata = {};
222
+ }
223
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
224
+ }
225
+ /**
226
+ * Set the parquet data page size. The data page size controls the maximum
227
+ * number of column values that are written to disk as a consecutive array
228
+ */
229
+ setPageSize(cnt) {
230
+ this.pageSize = cnt;
231
+ }
232
+ }
233
+ exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
234
+ /**
235
+ * Create a parquet transform stream
236
+ */
237
+ class ParquetTransformer extends stream_1.Transform {
238
+ constructor(schema, opts = {}) {
239
+ super({ objectMode: true });
240
+ const writeProxy = (function (t) {
241
+ return async function (b) {
242
+ t.push(b);
243
+ };
244
+ })(this);
245
+ this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => { }, 0, opts), opts);
246
+ }
247
+ // tslint:disable-next-line:function-name
248
+ _transform(row, encoding, callback) {
249
+ if (row) {
250
+ return this.writer.appendRow(row).then(callback);
251
+ }
252
+ callback();
253
+ return Promise.resolve();
254
+ }
255
+ // tslint:disable-next-line:function-name
256
+ async _flush(callback) {
257
+ await this.writer.close(callback);
258
+ }
259
+ }
260
+ exports.ParquetTransformer = ParquetTransformer;
261
+ /**
262
+ * Encode a consecutive array of data using one of the parquet encodings
263
+ */
264
+ function encodeValues(type, encoding, values, opts) {
265
+ if (!(encoding in codecs_1.PARQUET_CODECS)) {
266
+ throw new Error(`invalid encoding: ${encoding}`);
267
+ }
268
+ return codecs_1.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
269
+ }
270
+ /**
271
+ * Encode a parquet data page
272
+ */
273
+ async function encodeDataPage(column, data) {
274
+ /* encode repetition and definition levels */
275
+ let rLevelsBuf = Buffer.alloc(0);
276
+ if (column.rLevelMax > 0) {
277
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
278
+ bitWidth: (0, read_utils_1.getBitWidth)(column.rLevelMax)
279
+ // disableEnvelope: false
280
+ });
281
+ }
282
+ let dLevelsBuf = Buffer.alloc(0);
283
+ if (column.dLevelMax > 0) {
284
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
285
+ bitWidth: (0, read_utils_1.getBitWidth)(column.dLevelMax)
286
+ // disableEnvelope: false
287
+ });
288
+ }
289
+ /* encode values */
290
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
291
+ typeLength: column.typeLength,
292
+ bitWidth: column.typeLength
293
+ });
294
+ const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
295
+ // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
296
+ const compressedBuf = await Compression.deflate(column.compression, dataBuf);
297
+ /* build page header */
298
+ const header = new parquet_thrift_1.PageHeader({
299
+ type: parquet_thrift_1.PageType.DATA_PAGE,
300
+ data_page_header: new parquet_thrift_1.DataPageHeader({
301
+ num_values: data.count,
302
+ encoding: parquet_thrift_1.Encoding[column.encoding],
303
+ definition_level_encoding: parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
304
+ repetition_level_encoding: parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]
305
+ }),
306
+ uncompressed_page_size: dataBuf.length,
307
+ compressed_page_size: compressedBuf.length
308
+ });
309
+ /* concat page header, repetition and definition levels and values */
310
+ const headerBuf = (0, read_utils_1.serializeThrift)(header);
311
+ const page = Buffer.concat([headerBuf, compressedBuf]);
312
+ return { header, headerSize: headerBuf.length, page };
313
+ }
314
+ /**
315
+ * Encode a parquet data page (v2)
316
+ */
317
+ async function encodeDataPageV2(column, data, rowCount) {
318
+ /* encode values */
319
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
320
+ typeLength: column.typeLength,
321
+ bitWidth: column.typeLength
322
+ });
323
+ // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
324
+ const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
325
+ /* encode repetition and definition levels */
326
+ let rLevelsBuf = Buffer.alloc(0);
327
+ if (column.rLevelMax > 0) {
328
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
329
+ bitWidth: (0, read_utils_1.getBitWidth)(column.rLevelMax),
330
+ disableEnvelope: true
331
+ });
332
+ }
333
+ let dLevelsBuf = Buffer.alloc(0);
334
+ if (column.dLevelMax > 0) {
335
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
336
+ bitWidth: (0, read_utils_1.getBitWidth)(column.dLevelMax),
337
+ disableEnvelope: true
338
+ });
339
+ }
340
+ /* build page header */
341
+ const header = new parquet_thrift_1.PageHeader({
342
+ type: parquet_thrift_1.PageType.DATA_PAGE_V2,
343
+ data_page_header_v2: new parquet_thrift_1.DataPageHeaderV2({
344
+ num_values: data.count,
345
+ num_nulls: data.count - data.values.length,
346
+ num_rows: rowCount,
347
+ encoding: parquet_thrift_1.Encoding[column.encoding],
348
+ definition_levels_byte_length: dLevelsBuf.length,
349
+ repetition_levels_byte_length: rLevelsBuf.length,
350
+ is_compressed: column.compression !== 'UNCOMPRESSED'
351
+ }),
352
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
353
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
354
+ });
355
+ /* concat page header, repetition and definition levels and values */
356
+ const headerBuf = (0, read_utils_1.serializeThrift)(header);
357
+ const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
358
+ return { header, headerSize: headerBuf.length, page };
359
+ }
360
+ /**
361
+ * Encode an array of values into a parquet column chunk
362
+ */
363
+ async function encodeColumnChunk(column, buffer, offset, opts) {
364
+ const data = buffer.columnData[column.path.join()];
365
+ const baseOffset = (opts.baseOffset || 0) + offset;
366
+ /* encode data page(s) */
367
+ // const pages: Buffer[] = [];
368
+ let pageBuf;
369
+ // tslint:disable-next-line:variable-name
370
+ let total_uncompressed_size = 0;
371
+ // tslint:disable-next-line:variable-name
372
+ let total_compressed_size = 0;
373
+ {
374
+ const result = opts.useDataPageV2
375
+ ? await encodeDataPageV2(column, data, buffer.rowCount)
376
+ : await encodeDataPage(column, data);
377
+ // pages.push(result.page);
378
+ pageBuf = result.page;
379
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
380
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
381
+ }
382
+ // const pagesBuf = Buffer.concat(pages);
383
+ // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;
384
+ /* prepare metadata header */
385
+ const metadata = new parquet_thrift_1.ColumnMetaData({
386
+ path_in_schema: column.path,
387
+ num_values: data.count,
388
+ data_page_offset: baseOffset,
389
+ encodings: [],
390
+ total_uncompressed_size,
391
+ total_compressed_size,
392
+ type: parquet_thrift_1.Type[column.primitiveType],
393
+ codec: parquet_thrift_1.CompressionCodec[column.compression]
394
+ });
395
+ /* list encodings */
396
+ metadata.encodings.push(parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING]);
397
+ metadata.encodings.push(parquet_thrift_1.Encoding[column.encoding]);
398
+ /* concat metadata header and data pages */
399
+ const metadataOffset = baseOffset + pageBuf.length;
400
+ const body = Buffer.concat([pageBuf, (0, read_utils_1.serializeThrift)(metadata)]);
401
+ return { body, metadata, metadataOffset };
402
+ }
403
+ /**
404
+ * Encode a list of column values into a parquet row group
405
+ */
406
+ async function encodeRowGroup(schema, data, opts) {
407
+ const metadata = new parquet_thrift_1.RowGroup({
408
+ num_rows: data.rowCount,
409
+ columns: [],
410
+ total_byte_size: 0
411
+ });
412
+ let body = Buffer.alloc(0);
413
+ for (const field of schema.fieldList) {
414
+ if (field.isNested) {
415
+ continue; // eslint-disable-line no-continue
416
+ }
417
+ const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
418
+ const cchunk = new parquet_thrift_1.ColumnChunk({
419
+ file_offset: cchunkData.metadataOffset,
420
+ meta_data: cchunkData.metadata
421
+ });
422
+ metadata.columns.push(cchunk);
423
+ metadata.total_byte_size = new node_int64_1.default(Number(metadata.total_byte_size) + cchunkData.body.length);
424
+ body = Buffer.concat([body, cchunkData.body]);
425
+ }
426
+ return { body, metadata };
427
+ }
428
+ /**
429
+ * Encode a parquet file metadata footer
430
+ */
431
+ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
432
+ const metadata = new parquet_thrift_1.FileMetaData({
433
+ version: PARQUET_VERSION,
434
+ created_by: 'parquets',
435
+ num_rows: rowCount,
436
+ row_groups: rowGroups,
437
+ schema: [],
438
+ key_value_metadata: []
439
+ });
440
+ for (const key in userMetadata) {
441
+ const kv = new parquet_thrift_1.KeyValue({
442
+ key,
443
+ value: userMetadata[key]
444
+ });
445
+ metadata.key_value_metadata?.push?.(kv);
446
+ }
447
+ {
448
+ const schemaRoot = new parquet_thrift_1.SchemaElement({
449
+ name: 'root',
450
+ num_children: Object.keys(schema.fields).length
451
+ });
452
+ metadata.schema.push(schemaRoot);
453
+ }
454
+ for (const field of schema.fieldList) {
455
+ const relt = parquet_thrift_1.FieldRepetitionType[field.repetitionType];
456
+ const schemaElem = new parquet_thrift_1.SchemaElement({
457
+ name: field.name,
458
+ repetition_type: relt
459
+ });
460
+ if (field.isNested) {
461
+ schemaElem.num_children = field.fieldCount;
462
+ }
463
+ else {
464
+ schemaElem.type = parquet_thrift_1.Type[field.primitiveType];
465
+ }
466
+ if (field.originalType) {
467
+ schemaElem.converted_type = parquet_thrift_1.ConvertedType[field.originalType];
468
+ }
469
+ schemaElem.type_length = field.typeLength;
470
+ metadata.schema.push(schemaElem);
471
+ }
472
+ const metadataEncoded = (0, read_utils_1.serializeThrift)(metadata);
473
+ const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
474
+ metadataEncoded.copy(footerEncoded);
475
+ footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
476
+ footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
477
+ return footerEncoded;
478
+ }
@@ -0,0 +1,10 @@
1
+ /// <reference types="node" />
2
+ import fs from 'fs';
3
+ export declare function fopen(filePath: any): Promise<unknown>;
4
+ export declare function fstat(filePath: any): Promise<fs.Stats>;
5
+ export declare function fread(fd: any, position: any, length: any): Promise<unknown>;
6
+ export declare function fclose(fd: any): Promise<unknown>;
7
+ export declare function oswrite(os: any, buf: any): Promise<void>;
8
+ export declare function osclose(os: any): Promise<void>;
9
+ export declare function osopen(path: any, opts: any): Promise<unknown>;
10
+ //# sourceMappingURL=file.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file.d.ts","sourceRoot":"","sources":["../../src/parquetjs/file.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,wBAAgB,KAAK,CAAC,QAAQ,KAAA,oBAU7B;AAED,wBAAgB,KAAK,CAAC,QAAQ,KAAA,qBAU7B;AAED,wBAAgB,KAAK,CAAC,EAAE,KAAA,EAAE,QAAQ,KAAA,EAAE,MAAM,KAAA,oBAYzC;AAED,wBAAgB,MAAM,CAAC,EAAE,KAAA,oBAUxB;AAED,wBAAgB,OAAO,CAAC,EAAE,KAAA,EAAE,GAAG,KAAA,GAAG,OAAO,CAAC,IAAI,CAAC,CAU9C;AAED,wBAAgB,OAAO,CAAC,EAAE,KAAA,GAAG,OAAO,CAAC,IAAI,CAAC,CAUzC;AAED,wBAAgB,MAAM,CAAC,IAAI,KAAA,EAAE,IAAI,KAAA,oBAYhC"}
@@ -0,0 +1,99 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.osopen = exports.osclose = exports.oswrite = exports.fclose = exports.fread = exports.fstat = exports.fopen = void 0;
7
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
8
+ const fs_1 = __importDefault(require("fs"));
9
+ function fopen(filePath) {
10
+ return new Promise((resolve, reject) => {
11
+ fs_1.default.open(filePath, 'r', (err, fd) => {
12
+ if (err) {
13
+ reject(err);
14
+ }
15
+ else {
16
+ resolve(fd);
17
+ }
18
+ });
19
+ });
20
+ }
21
+ exports.fopen = fopen;
22
+ function fstat(filePath) {
23
+ return new Promise((resolve, reject) => {
24
+ fs_1.default.stat(filePath, (err, stat) => {
25
+ if (err) {
26
+ reject(err);
27
+ }
28
+ else {
29
+ resolve(stat);
30
+ }
31
+ });
32
+ });
33
+ }
34
+ exports.fstat = fstat;
35
+ function fread(fd, position, length) {
36
+ const buffer = Buffer.alloc(length);
37
+ return new Promise((resolve, reject) => {
38
+ fs_1.default.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => {
39
+ if (err || bytesRead !== length) {
40
+ reject(err || Error('read failed'));
41
+ }
42
+ else {
43
+ resolve(buf);
44
+ }
45
+ });
46
+ });
47
+ }
48
+ exports.fread = fread;
49
+ function fclose(fd) {
50
+ return new Promise((resolve, reject) => {
51
+ fs_1.default.close(fd, (err) => {
52
+ if (err) {
53
+ reject(err);
54
+ }
55
+ else {
56
+ resolve(err);
57
+ }
58
+ });
59
+ });
60
+ }
61
+ exports.fclose = fclose;
62
+ function oswrite(os, buf) {
63
+ return new Promise((resolve, reject) => {
64
+ os.write(buf, (err) => {
65
+ if (err) {
66
+ reject(err);
67
+ }
68
+ else {
69
+ resolve();
70
+ }
71
+ });
72
+ });
73
+ }
74
+ exports.oswrite = oswrite;
75
+ function osclose(os) {
76
+ return new Promise((resolve, reject) => {
77
+ os.close((err) => {
78
+ if (err) {
79
+ reject(err);
80
+ }
81
+ else {
82
+ resolve();
83
+ }
84
+ });
85
+ });
86
+ }
87
+ exports.osclose = osclose;
88
+ function osopen(path, opts) {
89
+ return new Promise((resolve, reject) => {
90
+ const outputStream = fs_1.default.createWriteStream(path, opts);
91
+ outputStream.on('open', function (fd) {
92
+ resolve(outputStream);
93
+ });
94
+ outputStream.on('error', function (err) {
95
+ reject(err);
96
+ });
97
+ });
98
+ }
99
+ exports.osopen = osopen;
@@ -0,0 +1,6 @@
1
+ export declare enum BoundaryOrder {
2
+ UNORDERED = 0,
3
+ ASCENDING = 1,
4
+ DESCENDING = 2
5
+ }
6
+ //# sourceMappingURL=BoundaryOrder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BoundaryOrder.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parquet-thrift/BoundaryOrder.ts"],"names":[],"mappings":"AAMA,oBAAY,aAAa;IACvB,SAAS,IAAI;IACb,SAAS,IAAI;IACb,UAAU,IAAI;CACf"}
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.BoundaryOrder = void 0;
4
+ /* tslint:disable */
5
+ /* eslint-disable */
6
+ /*
7
+ * Autogenerated by @creditkarma/thrift-typescript v3.7.2
8
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
9
+ */
10
+ var BoundaryOrder;
11
+ (function (BoundaryOrder) {
12
+ BoundaryOrder[BoundaryOrder["UNORDERED"] = 0] = "UNORDERED";
13
+ BoundaryOrder[BoundaryOrder["ASCENDING"] = 1] = "ASCENDING";
14
+ BoundaryOrder[BoundaryOrder["DESCENDING"] = 2] = "DESCENDING";
15
+ })(BoundaryOrder = exports.BoundaryOrder || (exports.BoundaryOrder = {}));
@@ -0,0 +1,9 @@
1
+ import * as thrift from 'thrift';
2
+ export interface IBsonTypeArgs {
3
+ }
4
+ export declare class BsonType {
5
+ constructor();
6
+ write(output: thrift.TProtocol): void;
7
+ static read(input: thrift.TProtocol): BsonType;
8
+ }
9
+ //# sourceMappingURL=BsonType.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BsonType.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parquet-thrift/BsonType.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,MAAM,MAAM,QAAQ,CAAC;AACjC,MAAM,WAAW,aAAa;CAAG;AACjC,qBAAa,QAAQ;;IAEZ,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,SAAS,GAAG,IAAI;WAM9B,IAAI,CAAC,KAAK,EAAE,MAAM,CAAC,SAAS,GAAG,QAAQ;CAmBtD"}
@@ -0,0 +1,58 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
5
+ }) : (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ o[k2] = m[k];
8
+ }));
9
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
10
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
11
+ }) : function(o, v) {
12
+ o["default"] = v;
13
+ });
14
+ var __importStar = (this && this.__importStar) || function (mod) {
15
+ if (mod && mod.__esModule) return mod;
16
+ var result = {};
17
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
18
+ __setModuleDefault(result, mod);
19
+ return result;
20
+ };
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.BsonType = void 0;
23
+ /* tslint:disable */
24
+ /* eslint-disable */
25
+ /*
26
+ * Autogenerated by @creditkarma/thrift-typescript v3.7.2
27
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
28
+ */
29
+ const thrift = __importStar(require("thrift"));
30
+ class BsonType {
31
+ constructor() { }
32
+ write(output) {
33
+ output.writeStructBegin('BsonType');
34
+ output.writeFieldStop();
35
+ output.writeStructEnd();
36
+ return;
37
+ }
38
+ static read(input) {
39
+ input.readStructBegin();
40
+ while (true) {
41
+ const ret = input.readFieldBegin();
42
+ const fieldType = ret.ftype;
43
+ const fieldId = ret.fid;
44
+ if (fieldType === thrift.Thrift.Type.STOP) {
45
+ break;
46
+ }
47
+ switch (fieldId) {
48
+ default: {
49
+ input.skip(fieldType);
50
+ }
51
+ }
52
+ input.readFieldEnd();
53
+ }
54
+ input.readStructEnd();
55
+ return new BsonType();
56
+ }
57
+ }
58
+ exports.BsonType = BsonType;