@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/dist/dist.min.js +22 -29
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/constants.js +0 -2
  4. package/dist/es5/constants.js.map +1 -1
  5. package/dist/es5/index.js +47 -9
  6. package/dist/es5/index.js.map +1 -1
  7. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  8. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
  10. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  11. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  12. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  13. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +4 -32
  14. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  15. package/dist/es5/lib/geo/decode-geo-metadata.js +77 -0
  16. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  17. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  18. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  19. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +173 -0
  20. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  21. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +150 -0
  22. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  23. package/dist/es5/lib/wasm/encode-parquet-wasm.js +14 -16
  24. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
  25. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -1
  26. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +16 -18
  27. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  28. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +6 -8
  29. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  30. package/dist/es5/lib/wasm/parse-parquet-wasm.js +16 -18
  31. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
  32. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  33. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  34. package/dist/es5/parquet-loader.js +4 -3
  35. package/dist/es5/parquet-loader.js.map +1 -1
  36. package/dist/es5/parquet-wasm-loader.js +1 -2
  37. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  38. package/dist/es5/parquet-wasm-writer.js +1 -1
  39. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  40. package/dist/es5/parquet-writer.js +1 -1
  41. package/dist/es5/parquet-writer.js.map +1 -1
  42. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  43. package/dist/es5/parquetjs/codecs/index.js +0 -1
  44. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  45. package/dist/es5/parquetjs/codecs/plain.js +0 -3
  46. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  47. package/dist/es5/parquetjs/codecs/rle.js +0 -4
  48. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  49. package/dist/es5/parquetjs/compression.js +58 -58
  50. package/dist/es5/parquetjs/compression.js.map +1 -1
  51. package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
  52. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  53. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +4 -4
  54. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +4 -4
  61. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +4 -4
  63. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  64. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  67. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  69. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +4 -4
  70. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  71. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +4 -4
  73. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  75. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  77. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  79. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  81. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  83. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  85. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  87. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  89. package/dist/es5/parquetjs/parquet-thrift/PageType.js +4 -4
  90. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -1
  91. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  93. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  95. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  98. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/Type.js +4 -4
  100. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  102. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
  104. package/dist/es5/parquetjs/parser/decoders.js +244 -261
  105. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  106. package/dist/es5/parquetjs/parser/parquet-reader.js +555 -256
  107. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  108. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  109. package/dist/es5/parquetjs/schema/schema.js +2 -12
  110. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  111. package/dist/es5/parquetjs/schema/shred.js +40 -46
  112. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  113. package/dist/es5/parquetjs/schema/types.js +6 -11
  114. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  115. package/dist/es5/parquetjs/utils/file-utils.js +2 -4
  116. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  117. package/dist/es5/parquetjs/utils/read-utils.js +0 -7
  118. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  119. package/dist/es5/workers/parquet-worker.js.map +1 -1
  120. package/dist/esm/bundle.js +0 -1
  121. package/dist/esm/bundle.js.map +1 -1
  122. package/dist/esm/constants.js +0 -3
  123. package/dist/esm/constants.js.map +1 -1
  124. package/dist/esm/index.js +11 -9
  125. package/dist/esm/index.js.map +1 -1
  126. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  127. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  128. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
  129. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  130. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -16
  131. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  132. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +37 -0
  133. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  134. package/dist/esm/lib/geo/decode-geo-metadata.js +58 -0
  135. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  136. package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
  137. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  138. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +35 -0
  139. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  140. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +18 -0
  141. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  142. package/dist/esm/lib/wasm/encode-parquet-wasm.js +0 -1
  143. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -1
  144. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +0 -1
  145. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  146. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  147. package/dist/esm/lib/wasm/parse-parquet-wasm.js +0 -3
  148. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
  149. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  150. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  151. package/dist/esm/parquet-loader.js +4 -4
  152. package/dist/esm/parquet-loader.js.map +1 -1
  153. package/dist/esm/parquet-wasm-loader.js +1 -3
  154. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  155. package/dist/esm/parquet-wasm-writer.js +1 -2
  156. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  157. package/dist/esm/parquet-writer.js +1 -2
  158. package/dist/esm/parquet-writer.js.map +1 -1
  159. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -1
  160. package/dist/esm/parquetjs/codecs/index.js +0 -2
  161. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  162. package/dist/esm/parquetjs/codecs/plain.js +0 -4
  163. package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
  164. package/dist/esm/parquetjs/codecs/rle.js +0 -6
  165. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  166. package/dist/esm/parquetjs/compression.js +10 -10
  167. package/dist/esm/parquetjs/compression.js.map +1 -1
  168. package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +6 -74
  169. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  170. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +3 -4
  171. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  172. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +0 -1
  173. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  174. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  175. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  176. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  177. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  178. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +3 -4
  179. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  180. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +3 -4
  181. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  182. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  183. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  184. package/dist/esm/parquetjs/parquet-thrift/DateType.js +0 -1
  185. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -1
  186. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  187. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  188. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +3 -4
  189. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  190. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +0 -1
  191. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  192. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +3 -4
  193. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  194. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  195. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +0 -1
  196. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  197. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -1
  198. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +0 -1
  199. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  200. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  201. package/dist/esm/parquetjs/parquet-thrift/ListType.js +0 -1
  202. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -1
  203. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  204. package/dist/esm/parquetjs/parquet-thrift/MapType.js +0 -1
  205. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -1
  206. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +0 -1
  207. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  208. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +0 -1
  209. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  210. package/dist/esm/parquetjs/parquet-thrift/NullType.js +0 -1
  211. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -1
  212. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  213. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  214. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  215. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  216. package/dist/esm/parquetjs/parquet-thrift/PageType.js +3 -4
  217. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -1
  218. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  219. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  220. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  221. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  222. package/dist/esm/parquetjs/parquet-thrift/StringType.js +0 -1
  223. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -1
  224. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  225. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  226. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  227. package/dist/esm/parquetjs/parquet-thrift/Type.js +3 -4
  228. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -1
  229. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -1
  230. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  231. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +0 -1
  232. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  233. package/dist/esm/parquetjs/parquet-thrift/index.js +0 -1
  234. package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -1
  235. package/dist/esm/parquetjs/parser/decoders.js +1 -18
  236. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  237. package/dist/esm/parquetjs/parser/parquet-reader.js +153 -80
  238. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  239. package/dist/esm/parquetjs/schema/declare.js +0 -1
  240. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  241. package/dist/esm/parquetjs/schema/schema.js +0 -10
  242. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  243. package/dist/esm/parquetjs/schema/shred.js +42 -48
  244. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  245. package/dist/esm/parquetjs/schema/types.js +6 -10
  246. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  247. package/dist/esm/parquetjs/utils/file-utils.js +1 -2
  248. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  249. package/dist/esm/parquetjs/utils/read-utils.js +0 -8
  250. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -1
  251. package/dist/index.d.ts +24 -4
  252. package/dist/index.d.ts.map +1 -1
  253. package/dist/index.js +26 -9
  254. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  255. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  256. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  257. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  258. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  259. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  260. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  261. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  262. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  263. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  264. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  265. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  266. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  267. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  268. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  269. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  270. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  271. package/dist/lib/geo/geoparquet-schema.js +69 -0
  272. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  273. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  274. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  275. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  276. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  277. package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
  278. package/dist/parquet-loader.d.ts +2 -0
  279. package/dist/parquet-loader.d.ts.map +1 -1
  280. package/dist/parquet-loader.js +3 -1
  281. package/dist/parquet-worker.js +25 -32
  282. package/dist/parquet-worker.js.map +3 -3
  283. package/dist/parquetjs/compression.d.ts.map +1 -1
  284. package/dist/parquetjs/compression.js +16 -5
  285. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
  286. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  287. package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
  288. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  289. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  290. package/dist/parquetjs/parser/parquet-reader.js +168 -102
  291. package/dist/parquetjs/schema/declare.d.ts +14 -7
  292. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  293. package/dist/parquetjs/schema/declare.js +2 -0
  294. package/dist/parquetjs/schema/shred.d.ts +115 -0
  295. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  296. package/dist/parquetjs/schema/shred.js +161 -43
  297. package/dist/parquetjs/schema/types.d.ts +2 -2
  298. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  299. package/dist/parquetjs/schema/types.js +4 -6
  300. package/dist/parquetjs/utils/file-utils.d.ts +3 -4
  301. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  302. package/dist/parquetjs/utils/file-utils.js +2 -5
  303. package/package.json +8 -7
  304. package/src/index.ts +24 -4
  305. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  306. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  307. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  308. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  309. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  310. package/src/lib/geo/geoparquet-schema.ts +69 -0
  311. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  312. package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
  313. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  314. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  315. package/src/parquet-loader.ts +5 -1
  316. package/src/parquetjs/compression.ts +14 -1
  317. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
  318. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  319. package/src/parquetjs/schema/declare.ts +17 -9
  320. package/src/parquetjs/schema/shred.ts +157 -28
  321. package/src/parquetjs/schema/types.ts +25 -30
  322. package/src/parquetjs/utils/file-utils.ts +3 -4
  323. package/dist/es5/lib/convert-schema.js.map +0 -1
  324. package/dist/es5/lib/parse-parquet.js +0 -130
  325. package/dist/es5/lib/parse-parquet.js.map +0 -1
  326. package/dist/es5/lib/read-array-buffer.js +0 -43
  327. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  328. package/dist/es5/parquetjs/encoder/writer.js +0 -757
  329. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  330. package/dist/es5/parquetjs/file.js +0 -94
  331. package/dist/es5/parquetjs/file.js.map +0 -1
  332. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
  333. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  334. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
  335. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  336. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
  337. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  338. package/dist/esm/lib/convert-schema.js.map +0 -1
  339. package/dist/esm/lib/parse-parquet.js +0 -25
  340. package/dist/esm/lib/parse-parquet.js.map +0 -1
  341. package/dist/esm/lib/read-array-buffer.js +0 -10
  342. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  343. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  344. package/dist/esm/parquetjs/file.js +0 -81
  345. package/dist/esm/parquetjs/file.js.map +0 -1
  346. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
  347. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  348. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
  349. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  350. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
  351. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  352. package/dist/lib/convert-schema.d.ts +0 -8
  353. package/dist/lib/convert-schema.d.ts.map +0 -1
  354. package/dist/lib/parse-parquet.d.ts +0 -4
  355. package/dist/lib/parse-parquet.d.ts.map +0 -1
  356. package/dist/lib/parse-parquet.js +0 -28
  357. package/dist/lib/read-array-buffer.d.ts +0 -19
  358. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  359. package/dist/lib/read-array-buffer.js +0 -29
  360. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  361. package/dist/parquetjs/file.d.ts +0 -10
  362. package/dist/parquetjs/file.d.ts.map +0 -1
  363. package/dist/parquetjs/file.js +0 -99
  364. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  365. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  366. package/dist/parquetjs/parser/parquet-cursor.js +0 -74
  367. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  368. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  369. package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
  370. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  371. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  372. package/dist/parquetjs/utils/buffer-utils.js +0 -22
  373. package/src/lib/parse-parquet.ts +0 -27
  374. package/src/lib/read-array-buffer.ts +0 -31
  375. package/src/parquetjs/file.ts +0 -90
  376. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  377. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  378. package/src/parquetjs/utils/buffer-utils.ts +0 -18
@@ -1,757 +0,0 @@
1
- "use strict";
2
-
3
- var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
- var _typeof = require("@babel/runtime/helpers/typeof");
5
- Object.defineProperty(exports, "__esModule", {
6
- value: true
7
- });
8
- exports.ParquetWriter = exports.ParquetTransformer = exports.ParquetEnvelopeWriter = void 0;
9
- var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
10
- var _assertThisInitialized2 = _interopRequireDefault(require("@babel/runtime/helpers/assertThisInitialized"));
11
- var _inherits2 = _interopRequireDefault(require("@babel/runtime/helpers/inherits"));
12
- var _possibleConstructorReturn2 = _interopRequireDefault(require("@babel/runtime/helpers/possibleConstructorReturn"));
13
- var _getPrototypeOf2 = _interopRequireDefault(require("@babel/runtime/helpers/getPrototypeOf"));
14
- var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
15
- var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
16
- var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
17
- var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
18
- var _stream = require("stream");
19
- var _codecs = require("../codecs");
20
- var Compression = _interopRequireWildcard(require("../compression"));
21
- var Shred = _interopRequireWildcard(require("../schema/shred"));
22
- var _parquetThrift = require("../parquet-thrift");
23
- var _fileUtils = require("../utils/file-utils");
24
- var _readUtils = require("../utils/read-utils");
25
- var _nodeInt = _interopRequireDefault(require("node-int64"));
26
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
27
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
28
- function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
29
- function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
30
- function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
31
- function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = (0, _getPrototypeOf2.default)(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = (0, _getPrototypeOf2.default)(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return (0, _possibleConstructorReturn2.default)(this, result); }; }
32
- function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
33
- var PARQUET_MAGIC = 'PAR1';
34
-
35
- var PARQUET_VERSION = 1;
36
-
37
- var PARQUET_DEFAULT_PAGE_SIZE = 8192;
38
- var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
39
-
40
- var PARQUET_RDLVL_TYPE = 'INT32';
41
- var PARQUET_RDLVL_ENCODING = 'RLE';
42
- var ParquetWriter = function () {
43
- function ParquetWriter(schema, envelopeWriter, opts) {
44
- (0, _classCallCheck2.default)(this, ParquetWriter);
45
- (0, _defineProperty2.default)(this, "schema", void 0);
46
- (0, _defineProperty2.default)(this, "envelopeWriter", void 0);
47
- (0, _defineProperty2.default)(this, "rowBuffer", void 0);
48
- (0, _defineProperty2.default)(this, "rowGroupSize", void 0);
49
- (0, _defineProperty2.default)(this, "closed", void 0);
50
- (0, _defineProperty2.default)(this, "userMetadata", void 0);
51
- this.schema = schema;
52
- this.envelopeWriter = envelopeWriter;
53
- this.rowBuffer = {};
54
- this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
55
- this.closed = false;
56
- this.userMetadata = {};
57
-
58
- this.writeHeader();
59
- }
60
- (0, _createClass2.default)(ParquetWriter, [{
61
- key: "writeHeader",
62
- value: function () {
63
- var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
64
- return _regenerator.default.wrap(function _callee$(_context) {
65
- while (1) {
66
- switch (_context.prev = _context.next) {
67
- case 0:
68
- _context.prev = 0;
69
- _context.next = 3;
70
- return this.envelopeWriter.writeHeader();
71
- case 3:
72
- _context.next = 10;
73
- break;
74
- case 5:
75
- _context.prev = 5;
76
- _context.t0 = _context["catch"](0);
77
- _context.next = 9;
78
- return this.envelopeWriter.close();
79
- case 9:
80
- throw _context.t0;
81
- case 10:
82
- case "end":
83
- return _context.stop();
84
- }
85
- }
86
- }, _callee, this, [[0, 5]]);
87
- }));
88
- function writeHeader() {
89
- return _writeHeader.apply(this, arguments);
90
- }
91
- return writeHeader;
92
- }()
93
- }, {
94
- key: "appendRow",
95
- value: function () {
96
- var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
97
- return _regenerator.default.wrap(function _callee2$(_context2) {
98
- while (1) {
99
- switch (_context2.prev = _context2.next) {
100
- case 0:
101
- if (!this.closed) {
102
- _context2.next = 2;
103
- break;
104
- }
105
- throw new Error('writer was closed');
106
- case 2:
107
- Shred.shredRecord(this.schema, row, this.rowBuffer);
108
- if (this.rowBuffer.rowCount >= this.rowGroupSize) {
109
- this.rowBuffer = {};
110
- }
111
- case 4:
112
- case "end":
113
- return _context2.stop();
114
- }
115
- }
116
- }, _callee2, this);
117
- }));
118
- function appendRow(_x) {
119
- return _appendRow.apply(this, arguments);
120
- }
121
- return appendRow;
122
- }()
123
- }, {
124
- key: "close",
125
- value: function () {
126
- var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
127
- return _regenerator.default.wrap(function _callee3$(_context3) {
128
- while (1) {
129
- switch (_context3.prev = _context3.next) {
130
- case 0:
131
- if (!this.closed) {
132
- _context3.next = 2;
133
- break;
134
- }
135
- throw new Error('writer was closed');
136
- case 2:
137
- this.closed = true;
138
- if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
139
- this.rowBuffer = {};
140
- }
141
- _context3.next = 6;
142
- return this.envelopeWriter.writeFooter(this.userMetadata);
143
- case 6:
144
- _context3.next = 8;
145
- return this.envelopeWriter.close();
146
- case 8:
147
-
148
- if (callback) {
149
- callback();
150
- }
151
- case 9:
152
- case "end":
153
- return _context3.stop();
154
- }
155
- }
156
- }, _callee3, this);
157
- }));
158
- function close(_x2) {
159
- return _close.apply(this, arguments);
160
- }
161
- return close;
162
- }()
163
- }, {
164
- key: "setMetadata",
165
- value:
166
- function setMetadata(key, value) {
167
- this.userMetadata[String(key)] = String(value);
168
- }
169
-
170
- }, {
171
- key: "setRowGroupSize",
172
- value:
173
- function setRowGroupSize(cnt) {
174
- this.rowGroupSize = cnt;
175
- }
176
-
177
- }, {
178
- key: "setPageSize",
179
- value:
180
- function setPageSize(cnt) {
181
- this.envelopeWriter.setPageSize(cnt);
182
- }
183
- }], [{
184
- key: "openFile",
185
- value: function () {
186
- var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, path, opts) {
187
- var outputStream;
188
- return _regenerator.default.wrap(function _callee4$(_context4) {
189
- while (1) {
190
- switch (_context4.prev = _context4.next) {
191
- case 0:
192
- _context4.next = 2;
193
- return (0, _fileUtils.osopen)(path, opts);
194
- case 2:
195
- outputStream = _context4.sent;
196
- return _context4.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
197
- case 4:
198
- case "end":
199
- return _context4.stop();
200
- }
201
- }
202
- }, _callee4);
203
- }));
204
- function openFile(_x3, _x4, _x5) {
205
- return _openFile.apply(this, arguments);
206
- }
207
- return openFile;
208
- }()
209
- }, {
210
- key: "openStream",
211
- value: function () {
212
- var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream, opts) {
213
- var envelopeWriter;
214
- return _regenerator.default.wrap(function _callee5$(_context5) {
215
- while (1) {
216
- switch (_context5.prev = _context5.next) {
217
- case 0:
218
- if (!opts) {
219
- opts = {};
220
- }
221
- _context5.next = 3;
222
- return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
223
- case 3:
224
- envelopeWriter = _context5.sent;
225
- return _context5.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
226
- case 5:
227
- case "end":
228
- return _context5.stop();
229
- }
230
- }
231
- }, _callee5);
232
- }));
233
- function openStream(_x6, _x7, _x8) {
234
- return _openStream.apply(this, arguments);
235
- }
236
- return openStream;
237
- }()
238
- }]);
239
- return ParquetWriter;
240
- }();
241
- exports.ParquetWriter = ParquetWriter;
242
- var ParquetEnvelopeWriter = function () {
243
- function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
244
- (0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
245
- (0, _defineProperty2.default)(this, "schema", void 0);
246
- (0, _defineProperty2.default)(this, "write", void 0);
247
- (0, _defineProperty2.default)(this, "close", void 0);
248
- (0, _defineProperty2.default)(this, "offset", void 0);
249
- (0, _defineProperty2.default)(this, "rowCount", void 0);
250
- (0, _defineProperty2.default)(this, "rowGroups", void 0);
251
- (0, _defineProperty2.default)(this, "pageSize", void 0);
252
- (0, _defineProperty2.default)(this, "useDataPageV2", void 0);
253
- this.schema = schema;
254
- this.write = writeFn;
255
- this.close = closeFn;
256
- this.offset = fileOffset;
257
- this.rowCount = 0;
258
- this.rowGroups = [];
259
- this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
260
- this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
261
- }
262
- (0, _createClass2.default)(ParquetEnvelopeWriter, [{
263
- key: "writeSection",
264
- value: function writeSection(buf) {
265
- this.offset += buf.length;
266
- return this.write(buf);
267
- }
268
-
269
- }, {
270
- key: "writeHeader",
271
- value:
272
- function writeHeader() {
273
- return this.writeSection(Buffer.from(PARQUET_MAGIC));
274
- }
275
-
276
- }, {
277
- key: "writeRowGroup",
278
- value: function () {
279
- var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(records) {
280
- var rgroup;
281
- return _regenerator.default.wrap(function _callee6$(_context6) {
282
- while (1) {
283
- switch (_context6.prev = _context6.next) {
284
- case 0:
285
- _context6.next = 2;
286
- return encodeRowGroup(this.schema, records, {
287
- baseOffset: this.offset,
288
- pageSize: this.pageSize,
289
- useDataPageV2: this.useDataPageV2
290
- });
291
- case 2:
292
- rgroup = _context6.sent;
293
- this.rowCount += records.rowCount;
294
- this.rowGroups.push(rgroup.metadata);
295
- _context6.next = 7;
296
- return this.writeSection(rgroup.body);
297
- case 7:
298
- return _context6.abrupt("return", _context6.sent);
299
- case 8:
300
- case "end":
301
- return _context6.stop();
302
- }
303
- }
304
- }, _callee6, this);
305
- }));
306
- function writeRowGroup(_x9) {
307
- return _writeRowGroup.apply(this, arguments);
308
- }
309
- return writeRowGroup;
310
- }()
311
- }, {
312
- key: "writeFooter",
313
- value:
314
- function writeFooter(userMetadata) {
315
- if (!userMetadata) {
316
- userMetadata = {};
317
- }
318
- return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
319
- }
320
-
321
- }, {
322
- key: "setPageSize",
323
- value:
324
- function setPageSize(cnt) {
325
- this.pageSize = cnt;
326
- }
327
- }], [{
328
- key: "openStream",
329
- value: function () {
330
- var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(schema, outputStream, opts) {
331
- var writeFn, closeFn;
332
- return _regenerator.default.wrap(function _callee7$(_context7) {
333
- while (1) {
334
- switch (_context7.prev = _context7.next) {
335
- case 0:
336
- writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
337
- closeFn = _fileUtils.osclose.bind(undefined, outputStream);
338
- return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
339
- case 3:
340
- case "end":
341
- return _context7.stop();
342
- }
343
- }
344
- }, _callee7);
345
- }));
346
- function openStream(_x10, _x11, _x12) {
347
- return _openStream2.apply(this, arguments);
348
- }
349
- return openStream;
350
- }()
351
- }]);
352
- return ParquetEnvelopeWriter;
353
- }();
354
- exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
355
- var ParquetTransformer = function (_Transform) {
356
- (0, _inherits2.default)(ParquetTransformer, _Transform);
357
- var _super = _createSuper(ParquetTransformer);
358
- function ParquetTransformer(schema) {
359
- var _this;
360
- var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
361
- (0, _classCallCheck2.default)(this, ParquetTransformer);
362
- _this = _super.call(this, {
363
- objectMode: true
364
- });
365
- (0, _defineProperty2.default)((0, _assertThisInitialized2.default)(_this), "writer", void 0);
366
- var writeProxy = function (t) {
367
- return function () {
368
- var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(b) {
369
- return _regenerator.default.wrap(function _callee8$(_context8) {
370
- while (1) {
371
- switch (_context8.prev = _context8.next) {
372
- case 0:
373
- t.push(b);
374
- case 1:
375
- case "end":
376
- return _context8.stop();
377
- }
378
- }
379
- }, _callee8);
380
- }));
381
- return function (_x13) {
382
- return _ref.apply(this, arguments);
383
- };
384
- }();
385
- }((0, _assertThisInitialized2.default)(_this));
386
- _this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
387
- return _regenerator.default.wrap(function _callee9$(_context9) {
388
- while (1) {
389
- switch (_context9.prev = _context9.next) {
390
- case 0:
391
- case "end":
392
- return _context9.stop();
393
- }
394
- }
395
- }, _callee9);
396
- })), 0, opts), opts);
397
- return _this;
398
- }
399
-
400
- (0, _createClass2.default)(ParquetTransformer, [{
401
- key: "_transform",
402
- value:
403
- function _transform(row, encoding, callback) {
404
- if (row) {
405
- return this.writer.appendRow(row).then(callback);
406
- }
407
- callback();
408
- return Promise.resolve();
409
- }
410
-
411
- }, {
412
- key: "_flush",
413
- value: function () {
414
- var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(callback) {
415
- return _regenerator.default.wrap(function _callee10$(_context10) {
416
- while (1) {
417
- switch (_context10.prev = _context10.next) {
418
- case 0:
419
- _context10.next = 2;
420
- return this.writer.close(callback);
421
- case 2:
422
- case "end":
423
- return _context10.stop();
424
- }
425
- }
426
- }, _callee10, this);
427
- }));
428
- function _flush(_x14) {
429
- return _flush2.apply(this, arguments);
430
- }
431
- return _flush;
432
- }()
433
- }]);
434
- return ParquetTransformer;
435
- }(_stream.Transform);
436
- exports.ParquetTransformer = ParquetTransformer;
437
- function encodeValues(type, encoding, values, opts) {
438
- if (!(encoding in _codecs.PARQUET_CODECS)) {
439
- throw new Error("invalid encoding: ".concat(encoding));
440
- }
441
- return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
442
- }
443
-
444
- function encodeDataPage(_x15, _x16) {
445
- return _encodeDataPage.apply(this, arguments);
446
- }
447
- function _encodeDataPage() {
448
- _encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(column, data) {
449
- var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
450
- return _regenerator.default.wrap(function _callee11$(_context11) {
451
- while (1) {
452
- switch (_context11.prev = _context11.next) {
453
- case 0:
454
- rLevelsBuf = Buffer.alloc(0);
455
- if (column.rLevelMax > 0) {
456
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
457
- bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
458
- });
459
- }
460
- dLevelsBuf = Buffer.alloc(0);
461
- if (column.dLevelMax > 0) {
462
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
463
- bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
464
- });
465
- }
466
-
467
- valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
468
- typeLength: column.typeLength,
469
- bitWidth: column.typeLength
470
- });
471
- dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
472
- _context11.next = 8;
473
- return Compression.deflate(column.compression, dataBuf);
474
- case 8:
475
- compressedBuf = _context11.sent;
476
- header = new _parquetThrift.PageHeader({
477
- type: _parquetThrift.PageType.DATA_PAGE,
478
- data_page_header: new _parquetThrift.DataPageHeader({
479
- num_values: data.count,
480
- encoding: _parquetThrift.Encoding[column.encoding],
481
- definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
482
- repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
483
- }),
484
-
485
- uncompressed_page_size: dataBuf.length,
486
- compressed_page_size: compressedBuf.length
487
- });
488
- headerBuf = (0, _readUtils.serializeThrift)(header);
489
- page = Buffer.concat([headerBuf, compressedBuf]);
490
- return _context11.abrupt("return", {
491
- header: header,
492
- headerSize: headerBuf.length,
493
- page: page
494
- });
495
- case 13:
496
- case "end":
497
- return _context11.stop();
498
- }
499
- }
500
- }, _callee11);
501
- }));
502
- return _encodeDataPage.apply(this, arguments);
503
- }
504
- function encodeDataPageV2(_x17, _x18, _x19) {
505
- return _encodeDataPageV.apply(this, arguments);
506
- }
507
- function _encodeDataPageV() {
508
- _encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(column, data, rowCount) {
509
- var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
510
- return _regenerator.default.wrap(function _callee12$(_context12) {
511
- while (1) {
512
- switch (_context12.prev = _context12.next) {
513
- case 0:
514
- valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
515
- typeLength: column.typeLength,
516
- bitWidth: column.typeLength
517
- });
518
- _context12.next = 3;
519
- return Compression.deflate(column.compression, valuesBuf);
520
- case 3:
521
- compressedBuf = _context12.sent;
522
- rLevelsBuf = Buffer.alloc(0);
523
- if (column.rLevelMax > 0) {
524
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
525
- bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
526
- disableEnvelope: true
527
- });
528
- }
529
- dLevelsBuf = Buffer.alloc(0);
530
- if (column.dLevelMax > 0) {
531
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
532
- bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
533
- disableEnvelope: true
534
- });
535
- }
536
-
537
- header = new _parquetThrift.PageHeader({
538
- type: _parquetThrift.PageType.DATA_PAGE_V2,
539
- data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
540
- num_values: data.count,
541
- num_nulls: data.count - data.values.length,
542
- num_rows: rowCount,
543
- encoding: _parquetThrift.Encoding[column.encoding],
544
- definition_levels_byte_length: dLevelsBuf.length,
545
- repetition_levels_byte_length: rLevelsBuf.length,
546
- is_compressed: column.compression !== 'UNCOMPRESSED'
547
- }),
548
- uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
549
- compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
550
- });
551
- headerBuf = (0, _readUtils.serializeThrift)(header);
552
- page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
553
- return _context12.abrupt("return", {
554
- header: header,
555
- headerSize: headerBuf.length,
556
- page: page
557
- });
558
- case 12:
559
- case "end":
560
- return _context12.stop();
561
- }
562
- }
563
- }, _callee12);
564
- }));
565
- return _encodeDataPageV.apply(this, arguments);
566
- }
567
- function encodeColumnChunk(_x20, _x21, _x22, _x23) {
568
- return _encodeColumnChunk.apply(this, arguments);
569
- }
570
- function _encodeColumnChunk() {
571
- _encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(column, buffer, offset, opts) {
572
- var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
573
- return _regenerator.default.wrap(function _callee13$(_context13) {
574
- while (1) {
575
- switch (_context13.prev = _context13.next) {
576
- case 0:
577
- data = buffer.columnData[column.path.join()];
578
- baseOffset = (opts.baseOffset || 0) + offset;
579
- total_uncompressed_size = 0;
580
- total_compressed_size = 0;
581
- if (!opts.useDataPageV2) {
582
- _context13.next = 10;
583
- break;
584
- }
585
- _context13.next = 7;
586
- return encodeDataPageV2(column, data, buffer.rowCount);
587
- case 7:
588
- _context13.t0 = _context13.sent;
589
- _context13.next = 13;
590
- break;
591
- case 10:
592
- _context13.next = 12;
593
- return encodeDataPage(column, data);
594
- case 12:
595
- _context13.t0 = _context13.sent;
596
- case 13:
597
- result = _context13.t0;
598
- pageBuf = result.page;
599
- total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
600
- total_compressed_size += result.header.compressed_page_size + result.headerSize;
601
- metadata = new _parquetThrift.ColumnMetaData({
602
- path_in_schema: column.path,
603
- num_values: data.count,
604
- data_page_offset: baseOffset,
605
- encodings: [],
606
- total_uncompressed_size: total_uncompressed_size,
607
- total_compressed_size: total_compressed_size,
608
- type: _parquetThrift.Type[column.primitiveType],
609
- codec: _parquetThrift.CompressionCodec[column.compression]
610
- });
611
- metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
612
- metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
613
-
614
- metadataOffset = baseOffset + pageBuf.length;
615
- body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
616
- return _context13.abrupt("return", {
617
- body: body,
618
- metadata: metadata,
619
- metadataOffset: metadataOffset
620
- });
621
- case 23:
622
- case "end":
623
- return _context13.stop();
624
- }
625
- }
626
- }, _callee13);
627
- }));
628
- return _encodeColumnChunk.apply(this, arguments);
629
- }
630
- function encodeRowGroup(_x24, _x25, _x26) {
631
- return _encodeRowGroup.apply(this, arguments);
632
- }
633
- function _encodeRowGroup() {
634
- _encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14(schema, data, opts) {
635
- var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
636
- return _regenerator.default.wrap(function _callee14$(_context14) {
637
- while (1) {
638
- switch (_context14.prev = _context14.next) {
639
- case 0:
640
- metadata = new _parquetThrift.RowGroup({
641
- num_rows: data.rowCount,
642
- columns: [],
643
- total_byte_size: 0
644
- });
645
- body = Buffer.alloc(0);
646
- _iterator2 = _createForOfIteratorHelper(schema.fieldList);
647
- _context14.prev = 3;
648
- _iterator2.s();
649
- case 5:
650
- if ((_step2 = _iterator2.n()).done) {
651
- _context14.next = 18;
652
- break;
653
- }
654
- field = _step2.value;
655
- if (!field.isNested) {
656
- _context14.next = 9;
657
- break;
658
- }
659
- return _context14.abrupt("continue", 16);
660
- case 9:
661
- _context14.next = 11;
662
- return encodeColumnChunk(field, data, body.length, opts);
663
- case 11:
664
- cchunkData = _context14.sent;
665
- cchunk = new _parquetThrift.ColumnChunk({
666
- file_offset: cchunkData.metadataOffset,
667
- meta_data: cchunkData.metadata
668
- });
669
- metadata.columns.push(cchunk);
670
- metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
671
- body = Buffer.concat([body, cchunkData.body]);
672
- case 16:
673
- _context14.next = 5;
674
- break;
675
- case 18:
676
- _context14.next = 23;
677
- break;
678
- case 20:
679
- _context14.prev = 20;
680
- _context14.t0 = _context14["catch"](3);
681
- _iterator2.e(_context14.t0);
682
- case 23:
683
- _context14.prev = 23;
684
- _iterator2.f();
685
- return _context14.finish(23);
686
- case 26:
687
- return _context14.abrupt("return", {
688
- body: body,
689
- metadata: metadata
690
- });
691
- case 27:
692
- case "end":
693
- return _context14.stop();
694
- }
695
- }
696
- }, _callee14, null, [[3, 20, 23, 26]]);
697
- }));
698
- return _encodeRowGroup.apply(this, arguments);
699
- }
700
- function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
701
- var metadata = new _parquetThrift.FileMetaData({
702
- version: PARQUET_VERSION,
703
- created_by: 'parquets',
704
- num_rows: rowCount,
705
- row_groups: rowGroups,
706
- schema: [],
707
- key_value_metadata: []
708
- });
709
- for (var key in userMetadata) {
710
- var _metadata$key_value_m, _metadata$key_value_m2, _metadata$key_value_m3;
711
- var kv = new _parquetThrift.KeyValue({
712
- key: key,
713
- value: userMetadata[key]
714
- });
715
- (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = (_metadata$key_value_m3 = _metadata$key_value_m).push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m3, kv);
716
- }
717
- {
718
- var schemaRoot = new _parquetThrift.SchemaElement({
719
- name: 'root',
720
- num_children: Object.keys(schema.fields).length
721
- });
722
- metadata.schema.push(schemaRoot);
723
- }
724
- var _iterator = _createForOfIteratorHelper(schema.fieldList),
725
- _step;
726
- try {
727
- for (_iterator.s(); !(_step = _iterator.n()).done;) {
728
- var field = _step.value;
729
- var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
730
- var schemaElem = new _parquetThrift.SchemaElement({
731
- name: field.name,
732
- repetition_type: relt
733
- });
734
- if (field.isNested) {
735
- schemaElem.num_children = field.fieldCount;
736
- } else {
737
- schemaElem.type = _parquetThrift.Type[field.primitiveType];
738
- }
739
- if (field.originalType) {
740
- schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
741
- }
742
- schemaElem.type_length = field.typeLength;
743
- metadata.schema.push(schemaElem);
744
- }
745
- } catch (err) {
746
- _iterator.e(err);
747
- } finally {
748
- _iterator.f();
749
- }
750
- var metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
751
- var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
752
- metadataEncoded.copy(footerEncoded);
753
- footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
754
- footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
755
- return footerEncoded;
756
- }
757
- //# sourceMappingURL=writer.js.map