@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/dist/dist.min.js +22 -29
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/constants.js +0 -2
  4. package/dist/es5/constants.js.map +1 -1
  5. package/dist/es5/index.js +47 -9
  6. package/dist/es5/index.js.map +1 -1
  7. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  8. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
  10. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  11. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  12. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  13. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +4 -32
  14. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  15. package/dist/es5/lib/geo/decode-geo-metadata.js +77 -0
  16. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  17. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  18. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  19. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +173 -0
  20. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  21. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +150 -0
  22. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  23. package/dist/es5/lib/wasm/encode-parquet-wasm.js +14 -16
  24. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
  25. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -1
  26. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +16 -18
  27. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  28. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +6 -8
  29. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  30. package/dist/es5/lib/wasm/parse-parquet-wasm.js +16 -18
  31. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
  32. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  33. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  34. package/dist/es5/parquet-loader.js +4 -3
  35. package/dist/es5/parquet-loader.js.map +1 -1
  36. package/dist/es5/parquet-wasm-loader.js +1 -2
  37. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  38. package/dist/es5/parquet-wasm-writer.js +1 -1
  39. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  40. package/dist/es5/parquet-writer.js +1 -1
  41. package/dist/es5/parquet-writer.js.map +1 -1
  42. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  43. package/dist/es5/parquetjs/codecs/index.js +0 -1
  44. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  45. package/dist/es5/parquetjs/codecs/plain.js +0 -3
  46. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  47. package/dist/es5/parquetjs/codecs/rle.js +0 -4
  48. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  49. package/dist/es5/parquetjs/compression.js +58 -58
  50. package/dist/es5/parquetjs/compression.js.map +1 -1
  51. package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
  52. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  53. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +4 -4
  54. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +4 -4
  61. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +4 -4
  63. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  64. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  67. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  69. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +4 -4
  70. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  71. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +4 -4
  73. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  75. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  77. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  79. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  81. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  83. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  85. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  87. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  89. package/dist/es5/parquetjs/parquet-thrift/PageType.js +4 -4
  90. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -1
  91. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  93. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  95. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  98. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/Type.js +4 -4
  100. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  102. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
  104. package/dist/es5/parquetjs/parser/decoders.js +244 -261
  105. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  106. package/dist/es5/parquetjs/parser/parquet-reader.js +555 -256
  107. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  108. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  109. package/dist/es5/parquetjs/schema/schema.js +2 -12
  110. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  111. package/dist/es5/parquetjs/schema/shred.js +40 -46
  112. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  113. package/dist/es5/parquetjs/schema/types.js +6 -11
  114. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  115. package/dist/es5/parquetjs/utils/file-utils.js +2 -4
  116. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  117. package/dist/es5/parquetjs/utils/read-utils.js +0 -7
  118. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  119. package/dist/es5/workers/parquet-worker.js.map +1 -1
  120. package/dist/esm/bundle.js +0 -1
  121. package/dist/esm/bundle.js.map +1 -1
  122. package/dist/esm/constants.js +0 -3
  123. package/dist/esm/constants.js.map +1 -1
  124. package/dist/esm/index.js +11 -9
  125. package/dist/esm/index.js.map +1 -1
  126. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  127. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  128. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
  129. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  130. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -16
  131. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  132. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +37 -0
  133. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  134. package/dist/esm/lib/geo/decode-geo-metadata.js +58 -0
  135. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  136. package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
  137. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  138. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +35 -0
  139. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  140. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +18 -0
  141. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  142. package/dist/esm/lib/wasm/encode-parquet-wasm.js +0 -1
  143. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -1
  144. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +0 -1
  145. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  146. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  147. package/dist/esm/lib/wasm/parse-parquet-wasm.js +0 -3
  148. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
  149. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  150. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  151. package/dist/esm/parquet-loader.js +4 -4
  152. package/dist/esm/parquet-loader.js.map +1 -1
  153. package/dist/esm/parquet-wasm-loader.js +1 -3
  154. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  155. package/dist/esm/parquet-wasm-writer.js +1 -2
  156. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  157. package/dist/esm/parquet-writer.js +1 -2
  158. package/dist/esm/parquet-writer.js.map +1 -1
  159. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -1
  160. package/dist/esm/parquetjs/codecs/index.js +0 -2
  161. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  162. package/dist/esm/parquetjs/codecs/plain.js +0 -4
  163. package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
  164. package/dist/esm/parquetjs/codecs/rle.js +0 -6
  165. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  166. package/dist/esm/parquetjs/compression.js +10 -10
  167. package/dist/esm/parquetjs/compression.js.map +1 -1
  168. package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +6 -74
  169. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  170. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +3 -4
  171. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  172. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +0 -1
  173. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  174. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  175. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  176. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  177. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  178. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +3 -4
  179. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  180. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +3 -4
  181. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  182. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  183. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  184. package/dist/esm/parquetjs/parquet-thrift/DateType.js +0 -1
  185. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -1
  186. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  187. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  188. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +3 -4
  189. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  190. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +0 -1
  191. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  192. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +3 -4
  193. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  194. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  195. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +0 -1
  196. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  197. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -1
  198. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +0 -1
  199. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  200. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  201. package/dist/esm/parquetjs/parquet-thrift/ListType.js +0 -1
  202. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -1
  203. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  204. package/dist/esm/parquetjs/parquet-thrift/MapType.js +0 -1
  205. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -1
  206. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +0 -1
  207. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  208. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +0 -1
  209. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  210. package/dist/esm/parquetjs/parquet-thrift/NullType.js +0 -1
  211. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -1
  212. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  213. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  214. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  215. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  216. package/dist/esm/parquetjs/parquet-thrift/PageType.js +3 -4
  217. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -1
  218. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  219. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  220. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  221. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  222. package/dist/esm/parquetjs/parquet-thrift/StringType.js +0 -1
  223. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -1
  224. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  225. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  226. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  227. package/dist/esm/parquetjs/parquet-thrift/Type.js +3 -4
  228. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -1
  229. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -1
  230. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  231. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +0 -1
  232. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  233. package/dist/esm/parquetjs/parquet-thrift/index.js +0 -1
  234. package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -1
  235. package/dist/esm/parquetjs/parser/decoders.js +1 -18
  236. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  237. package/dist/esm/parquetjs/parser/parquet-reader.js +153 -80
  238. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  239. package/dist/esm/parquetjs/schema/declare.js +0 -1
  240. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  241. package/dist/esm/parquetjs/schema/schema.js +0 -10
  242. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  243. package/dist/esm/parquetjs/schema/shred.js +42 -48
  244. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  245. package/dist/esm/parquetjs/schema/types.js +6 -10
  246. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  247. package/dist/esm/parquetjs/utils/file-utils.js +1 -2
  248. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  249. package/dist/esm/parquetjs/utils/read-utils.js +0 -8
  250. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -1
  251. package/dist/index.d.ts +24 -4
  252. package/dist/index.d.ts.map +1 -1
  253. package/dist/index.js +26 -9
  254. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  255. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  256. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  257. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  258. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  259. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  260. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  261. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  262. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  263. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  264. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  265. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  266. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  267. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  268. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  269. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  270. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  271. package/dist/lib/geo/geoparquet-schema.js +69 -0
  272. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  273. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  274. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  275. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  276. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  277. package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
  278. package/dist/parquet-loader.d.ts +2 -0
  279. package/dist/parquet-loader.d.ts.map +1 -1
  280. package/dist/parquet-loader.js +3 -1
  281. package/dist/parquet-worker.js +25 -32
  282. package/dist/parquet-worker.js.map +3 -3
  283. package/dist/parquetjs/compression.d.ts.map +1 -1
  284. package/dist/parquetjs/compression.js +16 -5
  285. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
  286. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  287. package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
  288. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  289. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  290. package/dist/parquetjs/parser/parquet-reader.js +168 -102
  291. package/dist/parquetjs/schema/declare.d.ts +14 -7
  292. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  293. package/dist/parquetjs/schema/declare.js +2 -0
  294. package/dist/parquetjs/schema/shred.d.ts +115 -0
  295. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  296. package/dist/parquetjs/schema/shred.js +161 -43
  297. package/dist/parquetjs/schema/types.d.ts +2 -2
  298. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  299. package/dist/parquetjs/schema/types.js +4 -6
  300. package/dist/parquetjs/utils/file-utils.d.ts +3 -4
  301. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  302. package/dist/parquetjs/utils/file-utils.js +2 -5
  303. package/package.json +8 -7
  304. package/src/index.ts +24 -4
  305. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  306. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  307. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  308. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  309. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  310. package/src/lib/geo/geoparquet-schema.ts +69 -0
  311. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  312. package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
  313. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  314. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  315. package/src/parquet-loader.ts +5 -1
  316. package/src/parquetjs/compression.ts +14 -1
  317. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
  318. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  319. package/src/parquetjs/schema/declare.ts +17 -9
  320. package/src/parquetjs/schema/shred.ts +157 -28
  321. package/src/parquetjs/schema/types.ts +25 -30
  322. package/src/parquetjs/utils/file-utils.ts +3 -4
  323. package/dist/es5/lib/convert-schema.js.map +0 -1
  324. package/dist/es5/lib/parse-parquet.js +0 -130
  325. package/dist/es5/lib/parse-parquet.js.map +0 -1
  326. package/dist/es5/lib/read-array-buffer.js +0 -43
  327. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  328. package/dist/es5/parquetjs/encoder/writer.js +0 -757
  329. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  330. package/dist/es5/parquetjs/file.js +0 -94
  331. package/dist/es5/parquetjs/file.js.map +0 -1
  332. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
  333. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  334. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
  335. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  336. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
  337. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  338. package/dist/esm/lib/convert-schema.js.map +0 -1
  339. package/dist/esm/lib/parse-parquet.js +0 -25
  340. package/dist/esm/lib/parse-parquet.js.map +0 -1
  341. package/dist/esm/lib/read-array-buffer.js +0 -10
  342. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  343. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  344. package/dist/esm/parquetjs/file.js +0 -81
  345. package/dist/esm/parquetjs/file.js.map +0 -1
  346. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
  347. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  348. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
  349. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  350. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
  351. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  352. package/dist/lib/convert-schema.d.ts +0 -8
  353. package/dist/lib/convert-schema.d.ts.map +0 -1
  354. package/dist/lib/parse-parquet.d.ts +0 -4
  355. package/dist/lib/parse-parquet.d.ts.map +0 -1
  356. package/dist/lib/parse-parquet.js +0 -28
  357. package/dist/lib/read-array-buffer.d.ts +0 -19
  358. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  359. package/dist/lib/read-array-buffer.js +0 -29
  360. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  361. package/dist/parquetjs/file.d.ts +0 -10
  362. package/dist/parquetjs/file.d.ts.map +0 -1
  363. package/dist/parquetjs/file.js +0 -99
  364. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  365. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  366. package/dist/parquetjs/parser/parquet-cursor.js +0 -74
  367. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  368. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  369. package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
  370. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  371. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  372. package/dist/parquetjs/utils/buffer-utils.js +0 -22
  373. package/src/lib/parse-parquet.ts +0 -27
  374. package/src/lib/read-array-buffer.ts +0 -31
  375. package/src/parquetjs/file.ts +0 -90
  376. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  377. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  378. package/src/parquetjs/utils/buffer-utils.ts +0 -18
@@ -10,299 +10,598 @@ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/
10
10
  var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
11
11
  var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
12
12
  var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
13
- var _parquetEnvelopeReader = require("./parquet-envelope-reader");
13
+ var _awaitAsyncGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/awaitAsyncGenerator"));
14
+ var _wrapAsyncGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/wrapAsyncGenerator"));
14
15
  var _schema = require("../schema/schema");
15
- var _parquetCursor = require("./parquet-cursor");
16
- var _constants = require("../../constants");
17
16
  var _decoders = require("./decoders");
18
- var _Symbol$asyncIterator;
17
+ var _shred = require("../schema/shred");
18
+ var _constants = require("../../constants");
19
+ var _parquetThrift = require("../parquet-thrift");
20
+ var _readUtils = require("../utils/read-utils");
19
21
  function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
20
22
  function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
21
- function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
22
- _Symbol$asyncIterator = Symbol.asyncIterator;
23
+ function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; return arr2; }
24
+ function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); enumerableOnly && (symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; })), keys.push.apply(keys, symbols); } return keys; }
25
+ function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = null != arguments[i] ? arguments[i] : {}; i % 2 ? ownKeys(Object(source), !0).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } return target; }
26
+ function _asyncIterator(iterable) { var method, async, sync, retry = 2; for ("undefined" != typeof Symbol && (async = Symbol.asyncIterator, sync = Symbol.iterator); retry--;) { if (async && null != (method = iterable[async])) return method.call(iterable); if (sync && null != (method = iterable[sync])) return new AsyncFromSyncIterator(method.call(iterable)); async = "@@asyncIterator", sync = "@@iterator"; } throw new TypeError("Object is not async iterable"); }
27
+ function AsyncFromSyncIterator(s) { function AsyncFromSyncIteratorContinuation(r) { if (Object(r) !== r) return Promise.reject(new TypeError(r + " is not an object.")); var done = r.done; return Promise.resolve(r.value).then(function (value) { return { value: value, done: done }; }); } return AsyncFromSyncIterator = function AsyncFromSyncIterator(s) { this.s = s, this.n = s.next; }, AsyncFromSyncIterator.prototype = { s: null, n: null, next: function next() { return AsyncFromSyncIteratorContinuation(this.n.apply(this.s, arguments)); }, return: function _return(value) { var ret = this.s.return; return void 0 === ret ? Promise.resolve({ value: value, done: !0 }) : AsyncFromSyncIteratorContinuation(ret.apply(this.s, arguments)); }, throw: function _throw(value) { var thr = this.s.return; return void 0 === thr ? Promise.reject(value) : AsyncFromSyncIteratorContinuation(thr.apply(this.s, arguments)); } }, new AsyncFromSyncIterator(s); }
28
+ var DEFAULT_PROPS = {
29
+ defaultDictionarySize: 1e6
30
+ };
23
31
  var ParquetReader = function () {
24
- function ParquetReader(metadata, envelopeReader) {
32
+ function ParquetReader(file, props) {
25
33
  (0, _classCallCheck2.default)(this, ParquetReader);
26
- (0, _defineProperty2.default)(this, "metadata", void 0);
27
- (0, _defineProperty2.default)(this, "envelopeReader", void 0);
28
- (0, _defineProperty2.default)(this, "schema", void 0);
29
- if (metadata.version !== _constants.PARQUET_VERSION) {
30
- throw new Error('invalid parquet version');
31
- }
32
- this.metadata = metadata;
33
- this.envelopeReader = envelopeReader;
34
- var root = this.metadata.schema[0];
35
- var _decodeSchema = (0, _decoders.decodeSchema)(this.metadata.schema, 1, root.num_children),
36
- schema = _decodeSchema.schema;
37
- this.schema = new _schema.ParquetSchema(schema);
34
+ (0, _defineProperty2.default)(this, "props", void 0);
35
+ (0, _defineProperty2.default)(this, "file", void 0);
36
+ (0, _defineProperty2.default)(this, "metadata", null);
37
+ this.file = file;
38
+ this.props = _objectSpread(_objectSpread({}, DEFAULT_PROPS), props);
38
39
  }
39
-
40
40
  (0, _createClass2.default)(ParquetReader, [{
41
41
  key: "close",
42
- value: function () {
43
- var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
42
+ value: function close() {
43
+ this.file.close();
44
+ }
45
+ }, {
46
+ key: "rowIterator",
47
+ value: function rowIterator(props) {
48
+ var _this = this;
49
+ return (0, _wrapAsyncGenerator2.default)(_regenerator.default.mark(function _callee() {
50
+ var _iteratorAbruptCompletion, _didIteratorError, _iteratorError, _iterator, _step, rows, _iterator3, _step3, row;
44
51
  return _regenerator.default.wrap(function _callee$(_context) {
45
- while (1) {
46
- switch (_context.prev = _context.next) {
47
- case 0:
48
- _context.next = 2;
49
- return this.envelopeReader.close();
50
- case 2:
51
- case "end":
52
- return _context.stop();
53
- }
52
+ while (1) switch (_context.prev = _context.next) {
53
+ case 0:
54
+ _iteratorAbruptCompletion = false;
55
+ _didIteratorError = false;
56
+ _context.prev = 2;
57
+ _iterator = _asyncIterator(_this.rowBatchIterator(props));
58
+ case 4:
59
+ _context.next = 6;
60
+ return (0, _awaitAsyncGenerator2.default)(_iterator.next());
61
+ case 6:
62
+ if (!(_iteratorAbruptCompletion = !(_step = _context.sent).done)) {
63
+ _context.next = 28;
64
+ break;
65
+ }
66
+ rows = _step.value;
67
+ _iterator3 = _createForOfIteratorHelper(rows);
68
+ _context.prev = 9;
69
+ _iterator3.s();
70
+ case 11:
71
+ if ((_step3 = _iterator3.n()).done) {
72
+ _context.next = 17;
73
+ break;
74
+ }
75
+ row = _step3.value;
76
+ _context.next = 15;
77
+ return row;
78
+ case 15:
79
+ _context.next = 11;
80
+ break;
81
+ case 17:
82
+ _context.next = 22;
83
+ break;
84
+ case 19:
85
+ _context.prev = 19;
86
+ _context.t0 = _context["catch"](9);
87
+ _iterator3.e(_context.t0);
88
+ case 22:
89
+ _context.prev = 22;
90
+ _iterator3.f();
91
+ return _context.finish(22);
92
+ case 25:
93
+ _iteratorAbruptCompletion = false;
94
+ _context.next = 4;
95
+ break;
96
+ case 28:
97
+ _context.next = 34;
98
+ break;
99
+ case 30:
100
+ _context.prev = 30;
101
+ _context.t1 = _context["catch"](2);
102
+ _didIteratorError = true;
103
+ _iteratorError = _context.t1;
104
+ case 34:
105
+ _context.prev = 34;
106
+ _context.prev = 35;
107
+ if (!(_iteratorAbruptCompletion && _iterator.return != null)) {
108
+ _context.next = 39;
109
+ break;
110
+ }
111
+ _context.next = 39;
112
+ return (0, _awaitAsyncGenerator2.default)(_iterator.return());
113
+ case 39:
114
+ _context.prev = 39;
115
+ if (!_didIteratorError) {
116
+ _context.next = 42;
117
+ break;
118
+ }
119
+ throw _iteratorError;
120
+ case 42:
121
+ return _context.finish(39);
122
+ case 43:
123
+ return _context.finish(34);
124
+ case 44:
125
+ case "end":
126
+ return _context.stop();
54
127
  }
55
- }, _callee, this);
56
- }));
57
- function close() {
58
- return _close.apply(this, arguments);
59
- }
60
- return close;
61
- }()
62
- }, {
63
- key: "getCursor",
64
- value: function getCursor(columnList) {
65
- if (!columnList) {
66
- columnList = [];
67
- }
68
-
69
- columnList = columnList.map(function (x) {
70
- return Array.isArray(x) ? x : [x];
71
- });
72
- return new _parquetCursor.ParquetCursor(this.metadata, this.envelopeReader, this.schema, columnList);
128
+ }, _callee, null, [[2, 30, 34, 44], [9, 19, 22, 25], [35,, 39, 43]]);
129
+ }))();
73
130
  }
74
-
75
131
  }, {
76
- key: "getRowCount",
77
- value:
78
- function getRowCount() {
79
- return Number(this.metadata.num_rows);
132
+ key: "rowBatchIterator",
133
+ value: function rowBatchIterator(props) {
134
+ var _this2 = this;
135
+ return (0, _wrapAsyncGenerator2.default)(_regenerator.default.mark(function _callee2() {
136
+ var schema, _iteratorAbruptCompletion2, _didIteratorError2, _iteratorError2, _iterator2, _step2, rowGroup;
137
+ return _regenerator.default.wrap(function _callee2$(_context2) {
138
+ while (1) switch (_context2.prev = _context2.next) {
139
+ case 0:
140
+ _context2.next = 2;
141
+ return (0, _awaitAsyncGenerator2.default)(_this2.getSchema());
142
+ case 2:
143
+ schema = _context2.sent;
144
+ _iteratorAbruptCompletion2 = false;
145
+ _didIteratorError2 = false;
146
+ _context2.prev = 5;
147
+ _iterator2 = _asyncIterator(_this2.rowGroupIterator(props));
148
+ case 7:
149
+ _context2.next = 9;
150
+ return (0, _awaitAsyncGenerator2.default)(_iterator2.next());
151
+ case 9:
152
+ if (!(_iteratorAbruptCompletion2 = !(_step2 = _context2.sent).done)) {
153
+ _context2.next = 16;
154
+ break;
155
+ }
156
+ rowGroup = _step2.value;
157
+ _context2.next = 13;
158
+ return (0, _shred.materializeRecords)(schema, rowGroup);
159
+ case 13:
160
+ _iteratorAbruptCompletion2 = false;
161
+ _context2.next = 7;
162
+ break;
163
+ case 16:
164
+ _context2.next = 22;
165
+ break;
166
+ case 18:
167
+ _context2.prev = 18;
168
+ _context2.t0 = _context2["catch"](5);
169
+ _didIteratorError2 = true;
170
+ _iteratorError2 = _context2.t0;
171
+ case 22:
172
+ _context2.prev = 22;
173
+ _context2.prev = 23;
174
+ if (!(_iteratorAbruptCompletion2 && _iterator2.return != null)) {
175
+ _context2.next = 27;
176
+ break;
177
+ }
178
+ _context2.next = 27;
179
+ return (0, _awaitAsyncGenerator2.default)(_iterator2.return());
180
+ case 27:
181
+ _context2.prev = 27;
182
+ if (!_didIteratorError2) {
183
+ _context2.next = 30;
184
+ break;
185
+ }
186
+ throw _iteratorError2;
187
+ case 30:
188
+ return _context2.finish(27);
189
+ case 31:
190
+ return _context2.finish(22);
191
+ case 32:
192
+ case "end":
193
+ return _context2.stop();
194
+ }
195
+ }, _callee2, null, [[5, 18, 22, 32], [23,, 27, 31]]);
196
+ }))();
80
197
  }
81
-
82
198
  }, {
83
- key: "getSchema",
84
- value:
85
- function getSchema() {
86
- return this.schema;
199
+ key: "rowGroupIterator",
200
+ value: function rowGroupIterator(props) {
201
+ var _this3 = this;
202
+ return (0, _wrapAsyncGenerator2.default)(_regenerator.default.mark(function _callee3() {
203
+ var columnList, metadata, schema, rowGroupCount, rowGroupIndex, rowGroup;
204
+ return _regenerator.default.wrap(function _callee3$(_context3) {
205
+ while (1) switch (_context3.prev = _context3.next) {
206
+ case 0:
207
+ columnList = ((props === null || props === void 0 ? void 0 : props.columnList) || []).map(function (x) {
208
+ return Array.isArray(x) ? x : [x];
209
+ });
210
+ _context3.next = 3;
211
+ return (0, _awaitAsyncGenerator2.default)(_this3.getFileMetadata());
212
+ case 3:
213
+ metadata = _context3.sent;
214
+ _context3.next = 6;
215
+ return (0, _awaitAsyncGenerator2.default)(_this3.getSchema());
216
+ case 6:
217
+ schema = _context3.sent;
218
+ rowGroupCount = (metadata === null || metadata === void 0 ? void 0 : metadata.row_groups.length) || 0;
219
+ rowGroupIndex = 0;
220
+ case 9:
221
+ if (!(rowGroupIndex < rowGroupCount)) {
222
+ _context3.next = 18;
223
+ break;
224
+ }
225
+ _context3.next = 12;
226
+ return (0, _awaitAsyncGenerator2.default)(_this3.readRowGroup(schema, metadata.row_groups[rowGroupIndex], columnList));
227
+ case 12:
228
+ rowGroup = _context3.sent;
229
+ _context3.next = 15;
230
+ return rowGroup;
231
+ case 15:
232
+ rowGroupIndex++;
233
+ _context3.next = 9;
234
+ break;
235
+ case 18:
236
+ case "end":
237
+ return _context3.stop();
238
+ }
239
+ }, _callee3);
240
+ }))();
87
241
  }
88
-
89
242
  }, {
90
- key: "getMetadata",
91
- value:
92
- function getMetadata() {
93
- var md = {};
94
- var _iterator = _createForOfIteratorHelper(this.metadata.key_value_metadata),
95
- _step;
96
- try {
97
- for (_iterator.s(); !(_step = _iterator.n()).done;) {
98
- var kv = _step.value;
99
- md[kv.key] = kv.value;
100
- }
101
- } catch (err) {
102
- _iterator.e(err);
103
- } finally {
104
- _iterator.f();
243
+ key: "getRowCount",
244
+ value: function () {
245
+ var _getRowCount = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4() {
246
+ var metadata;
247
+ return _regenerator.default.wrap(function _callee4$(_context4) {
248
+ while (1) switch (_context4.prev = _context4.next) {
249
+ case 0:
250
+ _context4.next = 2;
251
+ return this.getFileMetadata();
252
+ case 2:
253
+ metadata = _context4.sent;
254
+ return _context4.abrupt("return", Number(metadata.num_rows));
255
+ case 4:
256
+ case "end":
257
+ return _context4.stop();
258
+ }
259
+ }, _callee4, this);
260
+ }));
261
+ function getRowCount() {
262
+ return _getRowCount.apply(this, arguments);
105
263
  }
106
- return md;
107
- }
108
-
264
+ return getRowCount;
265
+ }()
109
266
  }, {
110
- key: _Symbol$asyncIterator,
111
- value:
112
- function value() {
113
- return this.getCursor()[Symbol.asyncIterator]();
114
- }
115
- }], [{
116
- key: "openBlob",
267
+ key: "getSchema",
117
268
  value: function () {
118
- var _openBlob = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(blob) {
119
- var readFn, closeFn, size, envelopeReader, metadata;
120
- return _regenerator.default.wrap(function _callee4$(_context4) {
121
- while (1) {
122
- switch (_context4.prev = _context4.next) {
123
- case 0:
124
- readFn = function () {
125
- var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(start, length) {
126
- var arrayBuffer;
127
- return _regenerator.default.wrap(function _callee2$(_context2) {
128
- while (1) {
129
- switch (_context2.prev = _context2.next) {
130
- case 0:
131
- _context2.next = 2;
132
- return blob.slice(start, start + length).arrayBuffer();
133
- case 2:
134
- arrayBuffer = _context2.sent;
135
- return _context2.abrupt("return", Buffer.from(arrayBuffer));
136
- case 4:
137
- case "end":
138
- return _context2.stop();
139
- }
140
- }
141
- }, _callee2);
142
- }));
143
- return function readFn(_x2, _x3) {
144
- return _ref.apply(this, arguments);
145
- };
146
- }();
147
- closeFn = function () {
148
- var _ref2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3() {
149
- return _regenerator.default.wrap(function _callee3$(_context3) {
150
- while (1) {
151
- switch (_context3.prev = _context3.next) {
152
- case 0:
153
- case "end":
154
- return _context3.stop();
155
- }
156
- }
157
- }, _callee3);
158
- }));
159
- return function closeFn() {
160
- return _ref2.apply(this, arguments);
161
- };
162
- }();
163
- size = blob.size;
164
- envelopeReader = new _parquetEnvelopeReader.ParquetEnvelopeReader(readFn, closeFn, size);
165
- _context4.prev = 4;
166
- _context4.next = 7;
167
- return envelopeReader.readHeader();
168
- case 7:
169
- _context4.next = 9;
170
- return envelopeReader.readFooter();
171
- case 9:
172
- metadata = _context4.sent;
173
- return _context4.abrupt("return", new ParquetReader(metadata, envelopeReader));
174
- case 13:
175
- _context4.prev = 13;
176
- _context4.t0 = _context4["catch"](4);
177
- _context4.next = 17;
178
- return envelopeReader.close();
179
- case 17:
180
- throw _context4.t0;
181
- case 18:
182
- case "end":
183
- return _context4.stop();
184
- }
269
+ var _getSchema = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5() {
270
+ var metadata, root, _decodeSchema, schemaDefinition, schema;
271
+ return _regenerator.default.wrap(function _callee5$(_context5) {
272
+ while (1) switch (_context5.prev = _context5.next) {
273
+ case 0:
274
+ _context5.next = 2;
275
+ return this.getFileMetadata();
276
+ case 2:
277
+ metadata = _context5.sent;
278
+ root = metadata.schema[0];
279
+ _decodeSchema = (0, _decoders.decodeSchema)(metadata.schema, 1, root.num_children), schemaDefinition = _decodeSchema.schema;
280
+ schema = new _schema.ParquetSchema(schemaDefinition);
281
+ return _context5.abrupt("return", schema);
282
+ case 7:
283
+ case "end":
284
+ return _context5.stop();
185
285
  }
186
- }, _callee4, null, [[4, 13]]);
286
+ }, _callee5, this);
187
287
  }));
188
- function openBlob(_x) {
189
- return _openBlob.apply(this, arguments);
288
+ function getSchema() {
289
+ return _getSchema.apply(this, arguments);
190
290
  }
191
- return openBlob;
291
+ return getSchema;
192
292
  }()
193
293
  }, {
194
- key: "openArrayBuffer",
294
+ key: "getSchemaMetadata",
195
295
  value: function () {
196
- var _openArrayBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(arrayBuffer) {
197
- var readFn, closeFn, size, envelopeReader, metadata;
296
+ var _getSchemaMetadata = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6() {
297
+ var metadata, md, _iterator4, _step4, kv;
298
+ return _regenerator.default.wrap(function _callee6$(_context6) {
299
+ while (1) switch (_context6.prev = _context6.next) {
300
+ case 0:
301
+ _context6.next = 2;
302
+ return this.getFileMetadata();
303
+ case 2:
304
+ metadata = _context6.sent;
305
+ md = {};
306
+ _iterator4 = _createForOfIteratorHelper(metadata.key_value_metadata);
307
+ try {
308
+ for (_iterator4.s(); !(_step4 = _iterator4.n()).done;) {
309
+ kv = _step4.value;
310
+ md[kv.key] = kv.value;
311
+ }
312
+ } catch (err) {
313
+ _iterator4.e(err);
314
+ } finally {
315
+ _iterator4.f();
316
+ }
317
+ return _context6.abrupt("return", md);
318
+ case 7:
319
+ case "end":
320
+ return _context6.stop();
321
+ }
322
+ }, _callee6, this);
323
+ }));
324
+ function getSchemaMetadata() {
325
+ return _getSchemaMetadata.apply(this, arguments);
326
+ }
327
+ return getSchemaMetadata;
328
+ }()
329
+ }, {
330
+ key: "getFileMetadata",
331
+ value: function () {
332
+ var _getFileMetadata = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7() {
198
333
  return _regenerator.default.wrap(function _callee7$(_context7) {
199
- while (1) {
200
- switch (_context7.prev = _context7.next) {
201
- case 0:
202
- readFn = function () {
203
- var _ref3 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(start, length) {
204
- return _regenerator.default.wrap(function _callee5$(_context5) {
205
- while (1) {
206
- switch (_context5.prev = _context5.next) {
207
- case 0:
208
- return _context5.abrupt("return", Buffer.from(arrayBuffer, start, length));
209
- case 1:
210
- case "end":
211
- return _context5.stop();
212
- }
213
- }
214
- }, _callee5);
215
- }));
216
- return function readFn(_x5, _x6) {
217
- return _ref3.apply(this, arguments);
218
- };
219
- }();
220
- closeFn = function () {
221
- var _ref4 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6() {
222
- return _regenerator.default.wrap(function _callee6$(_context6) {
223
- while (1) {
224
- switch (_context6.prev = _context6.next) {
225
- case 0:
226
- case "end":
227
- return _context6.stop();
228
- }
229
- }
230
- }, _callee6);
231
- }));
232
- return function closeFn() {
233
- return _ref4.apply(this, arguments);
234
- };
235
- }();
236
- size = arrayBuffer.byteLength;
237
- envelopeReader = new _parquetEnvelopeReader.ParquetEnvelopeReader(readFn, closeFn, size);
238
- _context7.prev = 4;
239
- _context7.next = 7;
240
- return envelopeReader.readHeader();
241
- case 7:
242
- _context7.next = 9;
243
- return envelopeReader.readFooter();
244
- case 9:
245
- metadata = _context7.sent;
246
- return _context7.abrupt("return", new ParquetReader(metadata, envelopeReader));
247
- case 13:
248
- _context7.prev = 13;
249
- _context7.t0 = _context7["catch"](4);
250
- _context7.next = 17;
251
- return envelopeReader.close();
252
- case 17:
253
- throw _context7.t0;
254
- case 18:
255
- case "end":
256
- return _context7.stop();
257
- }
334
+ while (1) switch (_context7.prev = _context7.next) {
335
+ case 0:
336
+ if (this.metadata) {
337
+ _context7.next = 4;
338
+ break;
339
+ }
340
+ _context7.next = 3;
341
+ return this.readHeader();
342
+ case 3:
343
+ this.metadata = this.readFooter();
344
+ case 4:
345
+ return _context7.abrupt("return", this.metadata);
346
+ case 5:
347
+ case "end":
348
+ return _context7.stop();
258
349
  }
259
- }, _callee7, null, [[4, 13]]);
350
+ }, _callee7, this);
260
351
  }));
261
- function openArrayBuffer(_x4) {
262
- return _openArrayBuffer.apply(this, arguments);
352
+ function getFileMetadata() {
353
+ return _getFileMetadata.apply(this, arguments);
263
354
  }
264
- return openArrayBuffer;
355
+ return getFileMetadata;
265
356
  }()
266
357
  }, {
267
- key: "openBuffer",
358
+ key: "readHeader",
268
359
  value: function () {
269
- var _openBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(buffer) {
270
- var envelopeReader, metadata;
360
+ var _readHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8() {
361
+ var buffer, magic;
271
362
  return _regenerator.default.wrap(function _callee8$(_context8) {
272
- while (1) {
273
- switch (_context8.prev = _context8.next) {
274
- case 0:
275
- _context8.next = 2;
276
- return _parquetEnvelopeReader.ParquetEnvelopeReader.openBuffer(buffer);
277
- case 2:
278
- envelopeReader = _context8.sent;
279
- _context8.prev = 3;
280
- _context8.next = 6;
281
- return envelopeReader.readHeader();
282
- case 6:
283
- _context8.next = 8;
284
- return envelopeReader.readFooter();
285
- case 8:
286
- metadata = _context8.sent;
287
- return _context8.abrupt("return", new ParquetReader(metadata, envelopeReader));
288
- case 12:
289
- _context8.prev = 12;
290
- _context8.t0 = _context8["catch"](3);
291
- _context8.next = 16;
292
- return envelopeReader.close();
293
- case 16:
294
- throw _context8.t0;
295
- case 17:
296
- case "end":
297
- return _context8.stop();
298
- }
363
+ while (1) switch (_context8.prev = _context8.next) {
364
+ case 0:
365
+ _context8.next = 2;
366
+ return this.file.read(0, _constants.PARQUET_MAGIC.length);
367
+ case 2:
368
+ buffer = _context8.sent;
369
+ magic = buffer.toString();
370
+ _context8.t0 = magic;
371
+ _context8.next = _context8.t0 === _constants.PARQUET_MAGIC ? 7 : _context8.t0 === _constants.PARQUET_MAGIC_ENCRYPTED ? 8 : 9;
372
+ break;
373
+ case 7:
374
+ return _context8.abrupt("break", 10);
375
+ case 8:
376
+ throw new Error('Encrypted parquet file not supported');
377
+ case 9:
378
+ throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
379
+ case 10:
380
+ case "end":
381
+ return _context8.stop();
382
+ }
383
+ }, _callee8, this);
384
+ }));
385
+ function readHeader() {
386
+ return _readHeader.apply(this, arguments);
387
+ }
388
+ return readHeader;
389
+ }()
390
+ }, {
391
+ key: "readFooter",
392
+ value: function () {
393
+ var _readFooter = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
394
+ var trailerLen, trailerBuf, magic, metadataSize, metadataOffset, metadataBuf, _decodeFileMetadata, metadata;
395
+ return _regenerator.default.wrap(function _callee9$(_context9) {
396
+ while (1) switch (_context9.prev = _context9.next) {
397
+ case 0:
398
+ trailerLen = _constants.PARQUET_MAGIC.length + 4;
399
+ _context9.next = 3;
400
+ return this.file.read(this.file.size - trailerLen, trailerLen);
401
+ case 3:
402
+ trailerBuf = _context9.sent;
403
+ magic = trailerBuf.slice(4).toString();
404
+ if (!(magic !== _constants.PARQUET_MAGIC)) {
405
+ _context9.next = 7;
406
+ break;
407
+ }
408
+ throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
409
+ case 7:
410
+ metadataSize = trailerBuf.readUInt32LE(0);
411
+ metadataOffset = this.file.size - metadataSize - trailerLen;
412
+ if (!(metadataOffset < _constants.PARQUET_MAGIC.length)) {
413
+ _context9.next = 11;
414
+ break;
415
+ }
416
+ throw new Error("Invalid metadata size ".concat(metadataOffset));
417
+ case 11:
418
+ _context9.next = 13;
419
+ return this.file.read(metadataOffset, metadataSize);
420
+ case 13:
421
+ metadataBuf = _context9.sent;
422
+ _decodeFileMetadata = (0, _readUtils.decodeFileMetadata)(metadataBuf), metadata = _decodeFileMetadata.metadata;
423
+ return _context9.abrupt("return", metadata);
424
+ case 16:
425
+ case "end":
426
+ return _context9.stop();
427
+ }
428
+ }, _callee9, this);
429
+ }));
430
+ function readFooter() {
431
+ return _readFooter.apply(this, arguments);
432
+ }
433
+ return readFooter;
434
+ }()
435
+ }, {
436
+ key: "readRowGroup",
437
+ value: function () {
438
+ var _readRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(schema, rowGroup, columnList) {
439
+ var buffer, _iterator5, _step5, colChunk, colMetadata, colKey;
440
+ return _regenerator.default.wrap(function _callee10$(_context10) {
441
+ while (1) switch (_context10.prev = _context10.next) {
442
+ case 0:
443
+ buffer = {
444
+ rowCount: Number(rowGroup.num_rows),
445
+ columnData: {}
446
+ };
447
+ _iterator5 = _createForOfIteratorHelper(rowGroup.columns);
448
+ _context10.prev = 2;
449
+ _iterator5.s();
450
+ case 4:
451
+ if ((_step5 = _iterator5.n()).done) {
452
+ _context10.next = 15;
453
+ break;
454
+ }
455
+ colChunk = _step5.value;
456
+ colMetadata = colChunk.meta_data;
457
+ colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
458
+ if (!(columnList.length > 0 && (0, _readUtils.fieldIndexOf)(columnList, colKey) < 0)) {
459
+ _context10.next = 10;
460
+ break;
461
+ }
462
+ return _context10.abrupt("continue", 13);
463
+ case 10:
464
+ _context10.next = 12;
465
+ return this.readColumnChunk(schema, colChunk);
466
+ case 12:
467
+ buffer.columnData[colKey.join()] = _context10.sent;
468
+ case 13:
469
+ _context10.next = 4;
470
+ break;
471
+ case 15:
472
+ _context10.next = 20;
473
+ break;
474
+ case 17:
475
+ _context10.prev = 17;
476
+ _context10.t0 = _context10["catch"](2);
477
+ _iterator5.e(_context10.t0);
478
+ case 20:
479
+ _context10.prev = 20;
480
+ _iterator5.f();
481
+ return _context10.finish(20);
482
+ case 23:
483
+ return _context10.abrupt("return", buffer);
484
+ case 24:
485
+ case "end":
486
+ return _context10.stop();
487
+ }
488
+ }, _callee10, this, [[2, 17, 20, 23]]);
489
+ }));
490
+ function readRowGroup(_x, _x2, _x3) {
491
+ return _readRowGroup.apply(this, arguments);
492
+ }
493
+ return readRowGroup;
494
+ }()
495
+ }, {
496
+ key: "readColumnChunk",
497
+ value: function () {
498
+ var _readColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(schema, colChunk) {
499
+ var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _options$dictionary;
500
+ var field, type, compression, pagesOffset, pagesSize, _colChunk$meta_data6, options, dictionary, dictionaryPageOffset, dictionaryOffset, pagesBuf;
501
+ return _regenerator.default.wrap(function _callee11$(_context11) {
502
+ while (1) switch (_context11.prev = _context11.next) {
503
+ case 0:
504
+ if (!(colChunk.file_path !== undefined && colChunk.file_path !== null)) {
505
+ _context11.next = 2;
506
+ break;
507
+ }
508
+ throw new Error('external references are not supported');
509
+ case 2:
510
+ field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
511
+ type = (0, _readUtils.getThriftEnum)(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
512
+ if (!(type !== field.primitiveType)) {
513
+ _context11.next = 6;
514
+ break;
515
+ }
516
+ throw new Error("chunk type not matching schema: ".concat(type));
517
+ case 6:
518
+ compression = (0, _readUtils.getThriftEnum)(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
519
+ pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
520
+ pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
521
+ if (!colChunk.file_path) {
522
+ pagesSize = Math.min(this.file.size - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
523
+ }
524
+ options = {
525
+ type: type,
526
+ rLevelMax: field.rLevelMax,
527
+ dLevelMax: field.dLevelMax,
528
+ compression: compression,
529
+ column: field,
530
+ numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
531
+ dictionary: []
532
+ };
533
+ dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
534
+ if (!dictionaryPageOffset) {
535
+ _context11.next = 17;
536
+ break;
537
+ }
538
+ dictionaryOffset = Number(dictionaryPageOffset);
539
+ _context11.next = 16;
540
+ return this.getDictionary(dictionaryOffset, options, pagesOffset);
541
+ case 16:
542
+ dictionary = _context11.sent;
543
+ case 17:
544
+ dictionary = (_options$dictionary = options.dictionary) !== null && _options$dictionary !== void 0 && _options$dictionary.length ? options.dictionary : dictionary;
545
+ _context11.next = 20;
546
+ return this.file.read(pagesOffset, pagesSize);
547
+ case 20:
548
+ pagesBuf = _context11.sent;
549
+ _context11.next = 23;
550
+ return (0, _decoders.decodeDataPages)(pagesBuf, _objectSpread(_objectSpread({}, options), {}, {
551
+ dictionary: dictionary
552
+ }));
553
+ case 23:
554
+ return _context11.abrupt("return", _context11.sent);
555
+ case 24:
556
+ case "end":
557
+ return _context11.stop();
558
+ }
559
+ }, _callee11, this);
560
+ }));
561
+ function readColumnChunk(_x4, _x5) {
562
+ return _readColumnChunk.apply(this, arguments);
563
+ }
564
+ return readColumnChunk;
565
+ }()
566
+ }, {
567
+ key: "getDictionary",
568
+ value: function () {
569
+ var _getDictionary = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(dictionaryPageOffset, options, pagesOffset) {
570
+ var dictionarySize, pagesBuf, cursor, decodedPage;
571
+ return _regenerator.default.wrap(function _callee12$(_context12) {
572
+ while (1) switch (_context12.prev = _context12.next) {
573
+ case 0:
574
+ if (!(dictionaryPageOffset === 0)) {
575
+ _context12.next = 2;
576
+ break;
577
+ }
578
+ return _context12.abrupt("return", []);
579
+ case 2:
580
+ dictionarySize = Math.min(this.file.size - dictionaryPageOffset, this.props.defaultDictionarySize);
581
+ _context12.next = 5;
582
+ return this.file.read(dictionaryPageOffset, dictionarySize);
583
+ case 5:
584
+ pagesBuf = _context12.sent;
585
+ cursor = {
586
+ buffer: pagesBuf,
587
+ offset: 0,
588
+ size: pagesBuf.length
589
+ };
590
+ _context12.next = 9;
591
+ return (0, _decoders.decodePage)(cursor, options);
592
+ case 9:
593
+ decodedPage = _context12.sent;
594
+ return _context12.abrupt("return", decodedPage.dictionary);
595
+ case 11:
596
+ case "end":
597
+ return _context12.stop();
299
598
  }
300
- }, _callee8, null, [[3, 12]]);
599
+ }, _callee12, this);
301
600
  }));
302
- function openBuffer(_x7) {
303
- return _openBuffer.apply(this, arguments);
601
+ function getDictionary(_x6, _x7, _x8) {
602
+ return _getDictionary.apply(this, arguments);
304
603
  }
305
- return openBuffer;
604
+ return getDictionary;
306
605
  }()
307
606
  }]);
308
607
  return ParquetReader;