@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/dist/dist.min.js +22 -29
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/constants.js +0 -2
  4. package/dist/es5/constants.js.map +1 -1
  5. package/dist/es5/index.js +47 -9
  6. package/dist/es5/index.js.map +1 -1
  7. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  8. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
  10. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  11. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  12. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  13. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +4 -32
  14. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  15. package/dist/es5/lib/geo/decode-geo-metadata.js +77 -0
  16. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  17. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  18. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  19. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +173 -0
  20. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  21. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +150 -0
  22. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  23. package/dist/es5/lib/wasm/encode-parquet-wasm.js +14 -16
  24. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
  25. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -1
  26. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +16 -18
  27. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  28. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +6 -8
  29. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  30. package/dist/es5/lib/wasm/parse-parquet-wasm.js +16 -18
  31. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
  32. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  33. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  34. package/dist/es5/parquet-loader.js +4 -3
  35. package/dist/es5/parquet-loader.js.map +1 -1
  36. package/dist/es5/parquet-wasm-loader.js +1 -2
  37. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  38. package/dist/es5/parquet-wasm-writer.js +1 -1
  39. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  40. package/dist/es5/parquet-writer.js +1 -1
  41. package/dist/es5/parquet-writer.js.map +1 -1
  42. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  43. package/dist/es5/parquetjs/codecs/index.js +0 -1
  44. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  45. package/dist/es5/parquetjs/codecs/plain.js +0 -3
  46. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  47. package/dist/es5/parquetjs/codecs/rle.js +0 -4
  48. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  49. package/dist/es5/parquetjs/compression.js +58 -58
  50. package/dist/es5/parquetjs/compression.js.map +1 -1
  51. package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
  52. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  53. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +4 -4
  54. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +4 -4
  61. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +4 -4
  63. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  64. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  67. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  69. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +4 -4
  70. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  71. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +4 -4
  73. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  75. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  77. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  79. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  81. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  83. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  85. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  87. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  89. package/dist/es5/parquetjs/parquet-thrift/PageType.js +4 -4
  90. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -1
  91. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  93. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  95. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  98. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/Type.js +4 -4
  100. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  102. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
  104. package/dist/es5/parquetjs/parser/decoders.js +244 -261
  105. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  106. package/dist/es5/parquetjs/parser/parquet-reader.js +555 -256
  107. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  108. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  109. package/dist/es5/parquetjs/schema/schema.js +2 -12
  110. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  111. package/dist/es5/parquetjs/schema/shred.js +40 -46
  112. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  113. package/dist/es5/parquetjs/schema/types.js +6 -11
  114. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  115. package/dist/es5/parquetjs/utils/file-utils.js +2 -4
  116. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  117. package/dist/es5/parquetjs/utils/read-utils.js +0 -7
  118. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  119. package/dist/es5/workers/parquet-worker.js.map +1 -1
  120. package/dist/esm/bundle.js +0 -1
  121. package/dist/esm/bundle.js.map +1 -1
  122. package/dist/esm/constants.js +0 -3
  123. package/dist/esm/constants.js.map +1 -1
  124. package/dist/esm/index.js +11 -9
  125. package/dist/esm/index.js.map +1 -1
  126. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  127. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  128. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
  129. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  130. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -16
  131. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  132. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +37 -0
  133. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  134. package/dist/esm/lib/geo/decode-geo-metadata.js +58 -0
  135. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  136. package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
  137. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  138. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +35 -0
  139. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  140. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +18 -0
  141. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  142. package/dist/esm/lib/wasm/encode-parquet-wasm.js +0 -1
  143. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -1
  144. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +0 -1
  145. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  146. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  147. package/dist/esm/lib/wasm/parse-parquet-wasm.js +0 -3
  148. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
  149. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  150. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  151. package/dist/esm/parquet-loader.js +4 -4
  152. package/dist/esm/parquet-loader.js.map +1 -1
  153. package/dist/esm/parquet-wasm-loader.js +1 -3
  154. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  155. package/dist/esm/parquet-wasm-writer.js +1 -2
  156. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  157. package/dist/esm/parquet-writer.js +1 -2
  158. package/dist/esm/parquet-writer.js.map +1 -1
  159. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -1
  160. package/dist/esm/parquetjs/codecs/index.js +0 -2
  161. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  162. package/dist/esm/parquetjs/codecs/plain.js +0 -4
  163. package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
  164. package/dist/esm/parquetjs/codecs/rle.js +0 -6
  165. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  166. package/dist/esm/parquetjs/compression.js +10 -10
  167. package/dist/esm/parquetjs/compression.js.map +1 -1
  168. package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +6 -74
  169. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  170. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +3 -4
  171. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -1
  172. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +0 -1
  173. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  174. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  175. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  176. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  177. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  178. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +3 -4
  179. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  180. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +3 -4
  181. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -1
  182. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  183. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  184. package/dist/esm/parquetjs/parquet-thrift/DateType.js +0 -1
  185. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -1
  186. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  187. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  188. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +3 -4
  189. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -1
  190. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +0 -1
  191. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  192. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +3 -4
  193. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -1
  194. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  195. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +0 -1
  196. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  197. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -1
  198. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +0 -1
  199. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  200. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  201. package/dist/esm/parquetjs/parquet-thrift/ListType.js +0 -1
  202. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -1
  203. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  204. package/dist/esm/parquetjs/parquet-thrift/MapType.js +0 -1
  205. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -1
  206. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +0 -1
  207. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  208. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +0 -1
  209. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  210. package/dist/esm/parquetjs/parquet-thrift/NullType.js +0 -1
  211. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -1
  212. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  213. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  214. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  215. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  216. package/dist/esm/parquetjs/parquet-thrift/PageType.js +3 -4
  217. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -1
  218. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  219. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  220. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  221. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  222. package/dist/esm/parquetjs/parquet-thrift/StringType.js +0 -1
  223. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -1
  224. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  225. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  226. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  227. package/dist/esm/parquetjs/parquet-thrift/Type.js +3 -4
  228. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -1
  229. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -1
  230. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  231. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +0 -1
  232. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  233. package/dist/esm/parquetjs/parquet-thrift/index.js +0 -1
  234. package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -1
  235. package/dist/esm/parquetjs/parser/decoders.js +1 -18
  236. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  237. package/dist/esm/parquetjs/parser/parquet-reader.js +153 -80
  238. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  239. package/dist/esm/parquetjs/schema/declare.js +0 -1
  240. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  241. package/dist/esm/parquetjs/schema/schema.js +0 -10
  242. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  243. package/dist/esm/parquetjs/schema/shred.js +42 -48
  244. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  245. package/dist/esm/parquetjs/schema/types.js +6 -10
  246. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  247. package/dist/esm/parquetjs/utils/file-utils.js +1 -2
  248. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  249. package/dist/esm/parquetjs/utils/read-utils.js +0 -8
  250. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -1
  251. package/dist/index.d.ts +24 -4
  252. package/dist/index.d.ts.map +1 -1
  253. package/dist/index.js +26 -9
  254. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  255. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  256. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  257. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  258. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  259. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  260. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  261. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  262. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  263. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  264. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  265. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  266. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  267. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  268. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  269. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  270. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  271. package/dist/lib/geo/geoparquet-schema.js +69 -0
  272. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  273. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  274. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  275. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  276. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  277. package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
  278. package/dist/parquet-loader.d.ts +2 -0
  279. package/dist/parquet-loader.d.ts.map +1 -1
  280. package/dist/parquet-loader.js +3 -1
  281. package/dist/parquet-worker.js +25 -32
  282. package/dist/parquet-worker.js.map +3 -3
  283. package/dist/parquetjs/compression.d.ts.map +1 -1
  284. package/dist/parquetjs/compression.js +16 -5
  285. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
  286. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  287. package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
  288. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  289. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  290. package/dist/parquetjs/parser/parquet-reader.js +168 -102
  291. package/dist/parquetjs/schema/declare.d.ts +14 -7
  292. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  293. package/dist/parquetjs/schema/declare.js +2 -0
  294. package/dist/parquetjs/schema/shred.d.ts +115 -0
  295. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  296. package/dist/parquetjs/schema/shred.js +161 -43
  297. package/dist/parquetjs/schema/types.d.ts +2 -2
  298. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  299. package/dist/parquetjs/schema/types.js +4 -6
  300. package/dist/parquetjs/utils/file-utils.d.ts +3 -4
  301. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  302. package/dist/parquetjs/utils/file-utils.js +2 -5
  303. package/package.json +8 -7
  304. package/src/index.ts +24 -4
  305. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  306. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  307. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  308. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  309. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  310. package/src/lib/geo/geoparquet-schema.ts +69 -0
  311. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  312. package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
  313. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  314. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  315. package/src/parquet-loader.ts +5 -1
  316. package/src/parquetjs/compression.ts +14 -1
  317. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
  318. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  319. package/src/parquetjs/schema/declare.ts +17 -9
  320. package/src/parquetjs/schema/shred.ts +157 -28
  321. package/src/parquetjs/schema/types.ts +25 -30
  322. package/src/parquetjs/utils/file-utils.ts +3 -4
  323. package/dist/es5/lib/convert-schema.js.map +0 -1
  324. package/dist/es5/lib/parse-parquet.js +0 -130
  325. package/dist/es5/lib/parse-parquet.js.map +0 -1
  326. package/dist/es5/lib/read-array-buffer.js +0 -43
  327. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  328. package/dist/es5/parquetjs/encoder/writer.js +0 -757
  329. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  330. package/dist/es5/parquetjs/file.js +0 -94
  331. package/dist/es5/parquetjs/file.js.map +0 -1
  332. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
  333. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  334. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
  335. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  336. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
  337. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  338. package/dist/esm/lib/convert-schema.js.map +0 -1
  339. package/dist/esm/lib/parse-parquet.js +0 -25
  340. package/dist/esm/lib/parse-parquet.js.map +0 -1
  341. package/dist/esm/lib/read-array-buffer.js +0 -10
  342. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  343. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  344. package/dist/esm/parquetjs/file.js +0 -81
  345. package/dist/esm/parquetjs/file.js.map +0 -1
  346. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
  347. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  348. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
  349. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  350. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
  351. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  352. package/dist/lib/convert-schema.d.ts +0 -8
  353. package/dist/lib/convert-schema.d.ts.map +0 -1
  354. package/dist/lib/parse-parquet.d.ts +0 -4
  355. package/dist/lib/parse-parquet.d.ts.map +0 -1
  356. package/dist/lib/parse-parquet.js +0 -28
  357. package/dist/lib/read-array-buffer.d.ts +0 -19
  358. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  359. package/dist/lib/read-array-buffer.js +0 -29
  360. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  361. package/dist/parquetjs/file.d.ts +0 -10
  362. package/dist/parquetjs/file.d.ts.map +0 -1
  363. package/dist/parquetjs/file.js +0 -99
  364. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  365. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  366. package/dist/parquetjs/parser/parquet-cursor.js +0 -74
  367. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  368. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  369. package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
  370. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  371. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  372. package/dist/parquetjs/utils/buffer-utils.js +0 -22
  373. package/src/lib/parse-parquet.ts +0 -27
  374. package/src/lib/read-array-buffer.ts +0 -31
  375. package/src/parquetjs/file.ts +0 -90
  376. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  377. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  378. package/src/parquetjs/utils/buffer-utils.ts +0 -18
@@ -146,9 +146,14 @@ function shredRecordFields(
146
146
  */
147
147
  export function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {
148
148
  const records: ParquetRecord[] = [];
149
- for (let i = 0; i < buffer.rowCount; i++) records.push({});
149
+ for (let i = 0; i < buffer.rowCount; i++) {
150
+ records.push({});
151
+ }
150
152
  for (const key in buffer.columnData) {
151
- materializeColumn(schema, buffer, key, records);
153
+ const columnData = buffer.columnData[key];
154
+ if (columnData.count) {
155
+ materializeColumn(schema, columnData, key, records);
156
+ }
152
157
  }
153
158
  return records;
154
159
  }
@@ -156,33 +161,151 @@ export function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer)
156
161
  // eslint-disable-next-line max-statements, complexity
157
162
  function materializeColumn(
158
163
  schema: ParquetSchema,
159
- buffer: ParquetBuffer,
164
+ columnData: ParquetData,
160
165
  key: string,
161
166
  records: ParquetRecord[]
162
- ) {
163
- const data = buffer.columnData[key];
164
- if (!data.count) return;
165
-
167
+ ): void {
166
168
  const field = schema.findField(key);
167
169
  const branch = schema.findFieldBranch(key);
168
170
 
169
171
  // tslint:disable-next-line:prefer-array-literal
170
172
  const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
171
173
  let vIndex = 0;
172
- for (let i = 0; i < data.count; i++) {
173
- const dLevel = data.dlevels[i];
174
- const rLevel = data.rlevels[i];
174
+ for (let i = 0; i < columnData.count; i++) {
175
+ const dLevel = columnData.dlevels[i];
176
+ const rLevel = columnData.rlevels[i];
175
177
  rLevels[rLevel]++;
176
178
  rLevels.fill(0, rLevel + 1);
177
179
 
178
180
  let rIndex = 0;
179
181
  let record = records[rLevels[rIndex++] - 1];
180
182
 
181
- // Internal nodes
183
+ // Internal nodes - Build a nested row object
182
184
  for (const step of branch) {
183
- if (step === field) break;
184
- if (dLevel < step.dLevelMax) break;
185
- if (step.repetitionType === 'REPEATED') {
185
+ if (step === field || dLevel < step.dLevelMax) {
186
+ break;
187
+ }
188
+
189
+ switch (step.repetitionType) {
190
+ case 'REPEATED':
191
+ if (!(step.name in record)) {
192
+ // eslint-disable max-depth
193
+ record[step.name] = [];
194
+ }
195
+ const ix = rLevels[rIndex++];
196
+ while (record[step.name].length <= ix) {
197
+ // eslint-disable max-depth
198
+ record[step.name].push({});
199
+ }
200
+ record = record[step.name][ix];
201
+ break;
202
+
203
+ default:
204
+ record[step.name] = record[step.name] || {};
205
+ record = record[step.name];
206
+ }
207
+ }
208
+
209
+ // Leaf node - Add the value
210
+ if (dLevel === field.dLevelMax) {
211
+ const value = Types.fromPrimitive(
212
+ // @ts-ignore
213
+ field.originalType || field.primitiveType,
214
+ columnData.values[vIndex],
215
+ field
216
+ );
217
+ vIndex++;
218
+
219
+ switch (field.repetitionType) {
220
+ case 'REPEATED':
221
+ if (!(field.name in record)) {
222
+ // eslint-disable max-depth
223
+ record[field.name] = [];
224
+ }
225
+ const ix = rLevels[rIndex];
226
+ while (record[field.name].length <= ix) {
227
+ // eslint-disable max-depth
228
+ record[field.name].push(null);
229
+ }
230
+ record[field.name][ix] = value;
231
+ break;
232
+
233
+ default:
234
+ record[field.name] = value;
235
+ }
236
+ }
237
+ }
238
+ }
239
+
240
+ // Columnar export
241
+
242
+ /**
243
+ * 'Materialize' a list of <value, repetition_level, definition_level>
244
+ * tuples back to nested records (objects/arrays) using the Google Dremel
245
+ * Algorithm..
246
+ *
247
+ * The buffer argument must point to an object with the following structure (i.e.
248
+ * the same structure that is returned by shredRecords):
249
+ *
250
+ * buffer = {
251
+ * columnData: [
252
+ * 'my_col': {
253
+ * dlevels: [d1, d2, .. dN],
254
+ * rlevels: [r1, r2, .. rN],
255
+ * values: [v1, v2, .. vN],
256
+ * }, ...
257
+ * ],
258
+ * rowCount: X,
259
+ * }
260
+ *
261
+ export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
262
+ const columns: ParquetRecord = {};
263
+ for (const key in buffer.columnData) {
264
+ const columnData = buffer.columnData[key];
265
+ if (columnData.count) {
266
+ extractColumn(schema, columnData, key, columns);
267
+ }
268
+ }
269
+ return columns;
270
+ }
271
+
272
+ // eslint-disable-next-line max-statements, complexity
273
+ function extractColumn(
274
+ schema: ParquetSchema,
275
+ columnData: ParquetData,
276
+ key: string,
277
+ columns: Record<string, unknown>
278
+ ) {
279
+ if (columnData.count <= 0) {
280
+ return;
281
+ }
282
+
283
+ const record = columns;
284
+
285
+ const field = schema.findField(key);
286
+ const branch = schema.findFieldBranch(key);
287
+
288
+ // tslint:disable-next-line:prefer-array-literal
289
+ const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
290
+ let vIndex = 0;
291
+
292
+ let i = 0;
293
+ const dLevel = columnData.dlevels[i];
294
+ const rLevel = columnData.rlevels[i];
295
+ rLevels[rLevel]++;
296
+ rLevels.fill(0, rLevel + 1);
297
+
298
+ let rIndex = 0;
299
+ let record = records[rLevels[rIndex++] - 1];
300
+
301
+ // Internal nodes
302
+ for (const step of branch) {
303
+ if (step === field || dLevel < step.dLevelMax) {
304
+ break;
305
+ }
306
+
307
+ switch (step.repetitionType) {
308
+ case 'REPEATED':
186
309
  if (!(step.name in record)) {
187
310
  // eslint-disable max-depth
188
311
  record[step.name] = [];
@@ -193,22 +316,26 @@ function materializeColumn(
193
316
  record[step.name].push({});
194
317
  }
195
318
  record = record[step.name][ix];
196
- } else {
319
+ break;
320
+
321
+ default:
197
322
  record[step.name] = record[step.name] || {};
198
323
  record = record[step.name];
199
- }
200
324
  }
325
+ }
201
326
 
202
- // Leaf node
203
- if (dLevel === field.dLevelMax) {
204
- const value = Types.fromPrimitive(
205
- // @ts-ignore
206
- field.originalType || field.primitiveType,
207
- data.values[vIndex],
208
- field
209
- );
210
- vIndex++;
211
- if (field.repetitionType === 'REPEATED') {
327
+ // Leaf node
328
+ if (dLevel === field.dLevelMax) {
329
+ const value = Types.fromPrimitive(
330
+ // @ts-ignore
331
+ field.originalType || field.primitiveType,
332
+ columnData.values[vIndex],
333
+ field
334
+ );
335
+ vIndex++;
336
+
337
+ switch (field.repetitionType) {
338
+ case 'REPEATED':
212
339
  if (!(field.name in record)) {
213
340
  // eslint-disable max-depth
214
341
  record[field.name] = [];
@@ -219,9 +346,11 @@ function materializeColumn(
219
346
  record[field.name].push(null);
220
347
  }
221
348
  record[field.name][ix] = value;
222
- } else {
349
+ break;
350
+
351
+ default:
223
352
  record[field.name] = value;
224
- }
225
353
  }
226
354
  }
227
355
  }
356
+ */
@@ -1,6 +1,6 @@
1
1
  // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
2
2
  /* eslint-disable camelcase */
3
- import BSON from 'bson';
3
+ import {BSONLoader, BSONWriter} from '@loaders.gl/bson';
4
4
  import {OriginalType, ParquetField, ParquetType, PrimitiveType} from './declare';
5
5
 
6
6
  export interface ParquetTypeKit {
@@ -168,7 +168,7 @@ export const PARQUET_LOGICAL_TYPES: Record<ParquetType, ParquetTypeKit> = {
168
168
  * Convert a value from it's native representation to the internal/underlying
169
169
  * primitive type
170
170
  */
171
- export function toPrimitive(type: ParquetType, value: any, field?: ParquetField) {
171
+ export function toPrimitive(type: ParquetType, value: unknown, field?: ParquetField): unknown {
172
172
  if (!(type in PARQUET_LOGICAL_TYPES)) {
173
173
  throw new Error(`invalid type: ${type}`);
174
174
  }
@@ -180,7 +180,7 @@ export function toPrimitive(type: ParquetType, value: any, field?: ParquetField)
180
180
  * Convert a value from it's internal/underlying primitive representation to
181
181
  * the native representation
182
182
  */
183
- export function fromPrimitive(type: ParquetType, value: any, field?: ParquetField) {
183
+ export function fromPrimitive(type: ParquetType, value: unknown, field?: ParquetField) {
184
184
  if (!(type in PARQUET_LOGICAL_TYPES)) {
185
185
  throw new Error(`invalid type: ${type}`);
186
186
  }
@@ -192,29 +192,27 @@ export function fromPrimitive(type: ParquetType, value: any, field?: ParquetFiel
192
192
  return value;
193
193
  }
194
194
 
195
- function toPrimitive_BOOLEAN(value: any) {
195
+ function toPrimitive_BOOLEAN(value: unknown): boolean {
196
196
  return Boolean(value);
197
197
  }
198
198
 
199
- function fromPrimitive_BOOLEAN(value: any) {
199
+ function fromPrimitive_BOOLEAN(value: any): boolean {
200
200
  return Boolean(value);
201
201
  }
202
202
 
203
- function toPrimitive_FLOAT(value: any) {
203
+ function toPrimitive_FLOAT(value: any): number {
204
204
  const v = parseFloat(value);
205
205
  if (isNaN(v)) {
206
206
  throw new Error(`invalid value for FLOAT: ${value}`);
207
207
  }
208
-
209
208
  return v;
210
209
  }
211
210
 
212
- function toPrimitive_DOUBLE(value: any) {
211
+ function toPrimitive_DOUBLE(value: any): number {
213
212
  const v = parseFloat(value);
214
213
  if (isNaN(v)) {
215
214
  throw new Error(`invalid value for DOUBLE: ${value}`);
216
215
  }
217
-
218
216
  return v;
219
217
  }
220
218
 
@@ -263,31 +261,28 @@ function toPrimitive_INT32(value: any) {
263
261
  return v;
264
262
  }
265
263
 
266
- function decimalToPrimitive_INT32(value: number, field: ParquetField) {
264
+ function decimalToPrimitive_INT32(value: number, field: ParquetField): number {
267
265
  const primitiveValue = value * 10 ** (field.scale || 0);
268
266
  const v = Math.round(((primitiveValue * 10 ** -field.presision!) % 1) * 10 ** field.presision!);
269
267
  if (v < -0x80000000 || v > 0x7fffffff || isNaN(v)) {
270
268
  throw new Error(`invalid value for INT32: ${value}`);
271
269
  }
272
-
273
270
  return v;
274
271
  }
275
272
 
276
- function toPrimitive_UINT32(value: any) {
273
+ function toPrimitive_UINT32(value: any): number {
277
274
  const v = parseInt(value, 10);
278
275
  if (v < 0 || v > 0xffffffffffff || isNaN(v)) {
279
276
  throw new Error(`invalid value for UINT32: ${value}`);
280
277
  }
281
-
282
278
  return v;
283
279
  }
284
280
 
285
- function toPrimitive_INT64(value: any) {
281
+ function toPrimitive_INT64(value: any): number {
286
282
  const v = parseInt(value, 10);
287
283
  if (isNaN(v)) {
288
284
  throw new Error(`invalid value for INT64: ${value}`);
289
285
  }
290
-
291
286
  return v;
292
287
  }
293
288
 
@@ -319,37 +314,38 @@ function toPrimitive_INT96(value: any) {
319
314
  return v;
320
315
  }
321
316
 
322
- function toPrimitive_BYTE_ARRAY(value: any) {
317
+ function toPrimitive_BYTE_ARRAY(value: any): Buffer {
323
318
  return Buffer.from(value);
324
319
  }
325
320
 
326
- function decimalToPrimitive_BYTE_ARRAY(value: any) {
321
+ function decimalToPrimitive_BYTE_ARRAY(value: any): Buffer {
327
322
  // TBD
328
323
  return Buffer.from(value);
329
324
  }
330
325
 
331
- function toPrimitive_UTF8(value: any) {
326
+ function toPrimitive_UTF8(value: any): Buffer {
332
327
  return Buffer.from(value, 'utf8');
333
328
  }
334
329
 
335
- function fromPrimitive_UTF8(value: any) {
330
+ function fromPrimitive_UTF8(value: any): string {
336
331
  return value.toString();
337
332
  }
338
333
 
339
- function toPrimitive_JSON(value: any) {
334
+ function toPrimitive_JSON(value: any): Buffer {
340
335
  return Buffer.from(JSON.stringify(value));
341
336
  }
342
337
 
343
- function fromPrimitive_JSON(value: any) {
338
+ function fromPrimitive_JSON(value: any): unknown {
344
339
  return JSON.parse(value);
345
340
  }
346
341
 
347
- function toPrimitive_BSON(value: any) {
348
- return Buffer.from(BSON.serialize(value));
342
+ function toPrimitive_BSON(value: any): Buffer {
343
+ const arrayBuffer = BSONWriter.encodeSync?.(value) as ArrayBuffer;
344
+ return Buffer.from(arrayBuffer);
349
345
  }
350
346
 
351
347
  function fromPrimitive_BSON(value: any) {
352
- return BSON.deserialize(value);
348
+ return BSONLoader.parseSync?.(value);
353
349
  }
354
350
 
355
351
  function toPrimitive_TIME_MILLIS(value: any) {
@@ -361,18 +357,17 @@ function toPrimitive_TIME_MILLIS(value: any) {
361
357
  return v;
362
358
  }
363
359
 
364
- function toPrimitive_TIME_MICROS(value: any) {
360
+ function toPrimitive_TIME_MICROS(value: any): number {
365
361
  const v = parseInt(value, 10);
366
362
  if (v < 0 || isNaN(v)) {
367
363
  throw new Error(`invalid value for TIME_MICROS: ${value}`);
368
364
  }
369
-
370
365
  return v;
371
366
  }
372
367
 
373
368
  const kMillisPerDay = 86400000;
374
369
 
375
- function toPrimitive_DATE(value: any) {
370
+ function toPrimitive_DATE(value: any): number {
376
371
  /* convert from date */
377
372
  if (value instanceof Date) {
378
373
  return value.getTime() / kMillisPerDay;
@@ -389,11 +384,11 @@ function toPrimitive_DATE(value: any) {
389
384
  }
390
385
  }
391
386
 
392
- function fromPrimitive_DATE(value: any) {
387
+ function fromPrimitive_DATE(value: any): Date {
393
388
  return new Date(value * kMillisPerDay);
394
389
  }
395
390
 
396
- function toPrimitive_TIMESTAMP_MILLIS(value: any) {
391
+ function toPrimitive_TIMESTAMP_MILLIS(value: any): number {
397
392
  /* convert from date */
398
393
  if (value instanceof Date) {
399
394
  return value.getTime();
@@ -410,7 +405,7 @@ function toPrimitive_TIMESTAMP_MILLIS(value: any) {
410
405
  }
411
406
  }
412
407
 
413
- function fromPrimitive_TIMESTAMP_MILLIS(value: any) {
408
+ function fromPrimitive_TIMESTAMP_MILLIS(value: any): Date {
414
409
  return new Date(value);
415
410
  }
416
411
 
@@ -1,6 +1,5 @@
1
1
  // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
2
- import fs from 'fs';
3
- import {Writable} from 'stream';
2
+ import {fs, stream} from '@loaders.gl/loader-utils';
4
3
 
5
4
  export function load(name: string): any {
6
5
  return (module || (global as any)).require(name);
@@ -14,7 +13,7 @@ export interface WriteStreamOptions {
14
13
  start?: number;
15
14
  }
16
15
 
17
- export function oswrite(os: Writable, buf: Buffer): Promise<void> {
16
+ export function oswrite(os: stream.Writable, buf: Buffer): Promise<void> {
18
17
  return new Promise((resolve, reject) => {
19
18
  os.write(buf, (err) => {
20
19
  if (err) {
@@ -26,7 +25,7 @@ export function oswrite(os: Writable, buf: Buffer): Promise<void> {
26
25
  });
27
26
  }
28
27
 
29
- export function osclose(os: Writable): Promise<void> {
28
+ export function osclose(os: stream.Writable): Promise<void> {
30
29
  return new Promise((resolve, reject) => {
31
30
  (os as any).close((err: any) => {
32
31
  if (err) {
@@ -1 +0,0 @@
1
- {"version":3,"file":"convert-schema.js","names":["PARQUET_TYPE_MAPPING","BOOLEAN","Bool","INT32","Int32","INT64","Float64","INT96","FLOAT","Float32","DOUBLE","BYTE_ARRAY","Binary","FIXED_LEN_BYTE_ARRAY","UTF8","Utf8","DATE","TIME_MILLIS","Int64","TIME_MICROS","TIMESTAMP_MILLIS","TIMESTAMP_MICROS","UINT_8","UINT_16","Uint16","UINT_32","Uint32","UINT_64","Uint64","INT_8","Int8","INT_16","Int16","INT_32","INT_64","JSON","BSON","INTERVAL","DECIMAL_INT32","DECIMAL_INT64","DECIMAL_BYTE_ARRAY","DECIMAL_FIXED_LEN_BYTE_ARRAY","convertParquetToArrowSchema","parquetSchema","fields","getFields","schema","Schema","getFieldMetadata","field","metadata","Map","key","value","stringify","set","name","childField","nestedField","Field","Struct","optional","push","FieldType","type","arrowField"],"sources":["../../../src/lib/convert-schema.ts"],"sourcesContent":["import type {ParquetSchema} from '../parquetjs/schema/schema';\nimport type {FieldDefinition, ParquetField, ParquetType} from '../parquetjs/schema/declare';\n\nimport {\n Schema,\n Struct,\n Field,\n DataType,\n Bool,\n Float64,\n Int32,\n Float32,\n Binary,\n Utf8,\n Int64,\n Uint16,\n Uint32,\n Uint64,\n Int8,\n Int16\n} from '@loaders.gl/schema';\n\nexport const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {\n BOOLEAN: Bool,\n INT32: Int32,\n INT64: Float64,\n INT96: Float64,\n FLOAT: Float32,\n DOUBLE: Float64,\n BYTE_ARRAY: Binary,\n FIXED_LEN_BYTE_ARRAY: Binary,\n UTF8: Utf8,\n DATE: Int32,\n TIME_MILLIS: Int64,\n TIME_MICROS: Int64,\n TIMESTAMP_MILLIS: Int64,\n TIMESTAMP_MICROS: Int64,\n UINT_8: Int32,\n UINT_16: Uint16,\n UINT_32: Uint32,\n UINT_64: Uint64,\n INT_8: Int8,\n INT_16: Int16,\n INT_32: Int32,\n INT_64: Int64,\n JSON: Binary,\n BSON: Binary,\n // TODO check interal type\n INTERVAL: Binary,\n DECIMAL_INT32: Float32,\n DECIMAL_INT64: Float64,\n DECIMAL_BYTE_ARRAY: Float64,\n DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64\n};\n\nexport function convertParquetToArrowSchema(parquetSchema: ParquetSchema): Schema {\n const fields = getFields(parquetSchema.schema);\n\n // TODO add metadata if needed.\n return new Schema(fields);\n}\n\nfunction getFieldMetadata(field: ParquetField): Map<string, string> {\n const metadata = new Map();\n\n for (const key in field) {\n if (key !== 'name') {\n const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];\n metadata.set(key, value);\n }\n }\n\n return metadata;\n}\n\nfunction getFields(schema: FieldDefinition): Field[] {\n const fields: Field[] = [];\n\n for (const name in schema) {\n const field = schema[name];\n\n if (field.fields) {\n const childField = getFields(field.fields);\n const nestedField = new Field(name, new Struct(childField), field.optional);\n fields.push(nestedField);\n } else {\n const FieldType = PARQUET_TYPE_MAPPING[field.type];\n const metadata = getFieldMetadata(field);\n const arrowField = new Field(name, new FieldType(), field.optional, metadata);\n fields.push(arrowField);\n }\n }\n\n return fields;\n}\n"],"mappings":";;;;;;;AAGA;AAmBO,IAAMA,oBAA8D,GAAG;EAC5EC,OAAO,EAAEC,YAAI;EACbC,KAAK,EAAEC,aAAK;EACZC,KAAK,EAAEC,eAAO;EACdC,KAAK,EAAED,eAAO;EACdE,KAAK,EAAEC,eAAO;EACdC,MAAM,EAAEJ,eAAO;EACfK,UAAU,EAAEC,cAAM;EAClBC,oBAAoB,EAAED,cAAM;EAC5BE,IAAI,EAAEC,YAAI;EACVC,IAAI,EAAEZ,aAAK;EACXa,WAAW,EAAEC,aAAK;EAClBC,WAAW,EAAED,aAAK;EAClBE,gBAAgB,EAAEF,aAAK;EACvBG,gBAAgB,EAAEH,aAAK;EACvBI,MAAM,EAAElB,aAAK;EACbmB,OAAO,EAAEC,cAAM;EACfC,OAAO,EAAEC,cAAM;EACfC,OAAO,EAAEC,cAAM;EACfC,KAAK,EAAEC,YAAI;EACXC,MAAM,EAAEC,aAAK;EACbC,MAAM,EAAE7B,aAAK;EACb8B,MAAM,EAAEhB,aAAK;EACbiB,IAAI,EAAEvB,cAAM;EACZwB,IAAI,EAAExB,cAAM;EAEZyB,QAAQ,EAAEzB,cAAM;EAChB0B,aAAa,EAAE7B,eAAO;EACtB8B,aAAa,EAAEjC,eAAO;EACtBkC,kBAAkB,EAAElC,eAAO;EAC3BmC,4BAA4B,EAAEnC;AAChC,CAAC;AAAC;AAEK,SAASoC,2BAA2B,CAACC,aAA4B,EAAU;EAChF,IAAMC,MAAM,GAAGC,SAAS,CAACF,aAAa,CAACG,MAAM,CAAC;;EAG9C,OAAO,IAAIC,cAAM,CAACH,MAAM,CAAC;AAC3B;AAEA,SAASI,gBAAgB,CAACC,KAAmB,EAAuB;EAClE,IAAMC,QAAQ,GAAG,IAAIC,GAAG,EAAE;EAE1B,KAAK,IAAMC,GAAG,IAAIH,KAAK,EAAE;IACvB,IAAIG,GAAG,KAAK,MAAM,EAAE;MAClB,IAAMC,KAAK,GAAG,OAAOJ,KAAK,CAACG,GAAG,CAAC,KAAK,QAAQ,GAAGjB,IAAI,CAACmB,SAAS,CAACL,KAAK,CAACG,GAAG,CAAC,CAAC,GAAGH,KAAK,CAACG,GAAG,CAAC;MACtFF,QAAQ,CAACK,GAAG,CAACH,GAAG,EAAEC,KAAK,CAAC;IAC1B;EACF;EAEA,OAAOH,QAAQ;AACjB;AAEA,SAASL,SAAS,CAACC,MAAuB,EAAW;EACnD,IAAMF,MAAe,GAAG,EAAE;EAE1B,KAAK,IAAMY,IAAI,IAAIV,MAAM,EAAE;IACzB,IAAMG,KAAK,GAAGH,MAAM,CAACU,IAAI,CAAC;IAE1B,IAAIP,KAAK,CAACL,MAAM,EAAE;MAChB,IAAMa,UAAU,GAAGZ,SAAS,CAACI,KAAK,CAACL,MAAM,CAAC;MAC1C,IAAMc,WAAW,GAAG,IAAIC,aAAK,CAACH,IAAI,EAAE,IAAII,cAAM,CAACH,UAAU,CAAC,EAAER,KAAK,CAACY,QAAQ,CAAC;MAC3EjB,MAAM,CAACkB,IAAI,CAACJ,WAAW,CAAC;IAC1B,CAAC,MAAM;MACL,IAAMK,SAAS,GAAG/D,oBAAoB,CAACiD,KAAK,CAACe,IAAI,CAAC;MAClD,IAAMd,QAAQ,GAAGF,gBAAgB,CAACC,KAAK,CAAC;MACxC,IAAMgB,UAAU,GAAG,IAAIN,aAAK,CAACH,IAAI,EAAE,IAAIO,SAAS,EAAE,EAAEd,KAAK,CAACY,QAAQ,EAAEX,QAAQ,CAAC;MAC7EN,MAAM,CAACkB,IAAI,CAACG,UAAU,CAAC;IACzB;EACF;EAEA,OAAOrB,MAAM;AACf"}
@@ -1,130 +0,0 @@
1
- "use strict";
2
-
3
- var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
- Object.defineProperty(exports, "__esModule", {
5
- value: true
6
- });
7
- exports.parseParquet = parseParquet;
8
- exports.parseParquetFileInBatches = parseParquetFileInBatches;
9
- var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
10
- var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
11
- var _awaitAsyncGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/awaitAsyncGenerator"));
12
- var _wrapAsyncGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/wrapAsyncGenerator"));
13
- var _parquetReader = require("../parquetjs/parser/parquet-reader");
14
- function _asyncIterator(iterable) { var method, async, sync, retry = 2; for ("undefined" != typeof Symbol && (async = Symbol.asyncIterator, sync = Symbol.iterator); retry--;) { if (async && null != (method = iterable[async])) return method.call(iterable); if (sync && null != (method = iterable[sync])) return new AsyncFromSyncIterator(method.call(iterable)); async = "@@asyncIterator", sync = "@@iterator"; } throw new TypeError("Object is not async iterable"); }
15
- function AsyncFromSyncIterator(s) { function AsyncFromSyncIteratorContinuation(r) { if (Object(r) !== r) return Promise.reject(new TypeError(r + " is not an object.")); var done = r.done; return Promise.resolve(r.value).then(function (value) { return { value: value, done: done }; }); } return AsyncFromSyncIterator = function AsyncFromSyncIterator(s) { this.s = s, this.n = s.next; }, AsyncFromSyncIterator.prototype = { s: null, n: null, next: function next() { return AsyncFromSyncIteratorContinuation(this.n.apply(this.s, arguments)); }, return: function _return(value) { var ret = this.s.return; return void 0 === ret ? Promise.resolve({ value: value, done: !0 }) : AsyncFromSyncIteratorContinuation(ret.apply(this.s, arguments)); }, throw: function _throw(value) { var thr = this.s.return; return void 0 === thr ? Promise.reject(value) : AsyncFromSyncIteratorContinuation(thr.apply(this.s, arguments)); } }, new AsyncFromSyncIterator(s); }
16
- function parseParquet(_x3, _x4) {
17
- return _parseParquet.apply(this, arguments);
18
- }
19
- function _parseParquet() {
20
- _parseParquet = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(arrayBuffer, options) {
21
- var blob, _iteratorAbruptCompletion, _didIteratorError, _iteratorError, _iterator, _step, batch;
22
- return _regenerator.default.wrap(function _callee2$(_context2) {
23
- while (1) {
24
- switch (_context2.prev = _context2.next) {
25
- case 0:
26
- blob = new Blob([arrayBuffer]);
27
- _iteratorAbruptCompletion = false;
28
- _didIteratorError = false;
29
- _context2.prev = 3;
30
- _iterator = _asyncIterator(parseParquetFileInBatches(blob, options));
31
- case 5:
32
- _context2.next = 7;
33
- return _iterator.next();
34
- case 7:
35
- if (!(_iteratorAbruptCompletion = !(_step = _context2.sent).done)) {
36
- _context2.next = 13;
37
- break;
38
- }
39
- batch = _step.value;
40
- return _context2.abrupt("return", batch);
41
- case 10:
42
- _iteratorAbruptCompletion = false;
43
- _context2.next = 5;
44
- break;
45
- case 13:
46
- _context2.next = 19;
47
- break;
48
- case 15:
49
- _context2.prev = 15;
50
- _context2.t0 = _context2["catch"](3);
51
- _didIteratorError = true;
52
- _iteratorError = _context2.t0;
53
- case 19:
54
- _context2.prev = 19;
55
- _context2.prev = 20;
56
- if (!(_iteratorAbruptCompletion && _iterator.return != null)) {
57
- _context2.next = 24;
58
- break;
59
- }
60
- _context2.next = 24;
61
- return _iterator.return();
62
- case 24:
63
- _context2.prev = 24;
64
- if (!_didIteratorError) {
65
- _context2.next = 27;
66
- break;
67
- }
68
- throw _iteratorError;
69
- case 27:
70
- return _context2.finish(24);
71
- case 28:
72
- return _context2.finish(19);
73
- case 29:
74
- return _context2.abrupt("return", null);
75
- case 30:
76
- case "end":
77
- return _context2.stop();
78
- }
79
- }
80
- }, _callee2, null, [[3, 15, 19, 29], [20,, 24, 28]]);
81
- }));
82
- return _parseParquet.apply(this, arguments);
83
- }
84
- function parseParquetFileInBatches(_x, _x2) {
85
- return _parseParquetFileInBatches.apply(this, arguments);
86
- }
87
- function _parseParquetFileInBatches() {
88
- _parseParquetFileInBatches = (0, _wrapAsyncGenerator2.default)(_regenerator.default.mark(function _callee(blob, options) {
89
- var reader, rows, cursor, record;
90
- return _regenerator.default.wrap(function _callee$(_context) {
91
- while (1) {
92
- switch (_context.prev = _context.next) {
93
- case 0:
94
- _context.next = 2;
95
- return (0, _awaitAsyncGenerator2.default)(_parquetReader.ParquetReader.openBlob(blob));
96
- case 2:
97
- reader = _context.sent;
98
- rows = [];
99
- _context.prev = 4;
100
- cursor = reader.getCursor();
101
- case 6:
102
- _context.next = 8;
103
- return (0, _awaitAsyncGenerator2.default)(cursor.next());
104
- case 8:
105
- if (!(record = _context.sent)) {
106
- _context.next = 12;
107
- break;
108
- }
109
- rows.push(record);
110
- _context.next = 6;
111
- break;
112
- case 12:
113
- _context.prev = 12;
114
- _context.next = 15;
115
- return (0, _awaitAsyncGenerator2.default)(reader.close());
116
- case 15:
117
- return _context.finish(12);
118
- case 16:
119
- _context.next = 18;
120
- return rows;
121
- case 18:
122
- case "end":
123
- return _context.stop();
124
- }
125
- }
126
- }, _callee, null, [[4,, 12, 16]]);
127
- }));
128
- return _parseParquetFileInBatches.apply(this, arguments);
129
- }
130
- //# sourceMappingURL=parse-parquet.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"parse-parquet.js","names":["parseParquet","arrayBuffer","options","blob","Blob","parseParquetFileInBatches","batch","ParquetReader","openBlob","reader","rows","cursor","getCursor","next","record","push","close"],"sources":["../../../src/lib/parse-parquet.ts"],"sourcesContent":["// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\nimport type {ParquetLoaderOptions} from '../parquet-loader';\n\nimport {ParquetReader} from '../parquetjs/parser/parquet-reader';\n\nexport async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {\n const blob = new Blob([arrayBuffer]);\n for await (const batch of parseParquetFileInBatches(blob, options)) {\n return batch;\n }\n return null;\n}\n\nexport async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {\n const reader = await ParquetReader.openBlob(blob);\n const rows: any[][] = [];\n try {\n const cursor = reader.getCursor();\n let record: any[] | null;\n while ((record = await cursor.next())) {\n rows.push(record);\n }\n } finally {\n await reader.close();\n }\n yield rows;\n}\n"],"mappings":";;;;;;;;;;;;AAGA;AAAiE;AAAA;AAAA,SAE3CA,YAAY;EAAA;AAAA;AAAA;EAAA,0EAA3B,kBAA4BC,WAAwB,EAAEC,OAA8B;IAAA;IAAA;MAAA;QAAA;UAAA;YACnFC,IAAI,GAAG,IAAIC,IAAI,CAAC,CAACH,WAAW,CAAC,CAAC;YAAA;YAAA;YAAA;YAAA,2BACVI,yBAAyB,CAACF,IAAI,EAAED,OAAO,CAAC;UAAA;YAAA;YAAA;UAAA;YAAA;cAAA;cAAA;YAAA;YAAjDI,KAAK;YAAA,kCACbA,KAAK;UAAA;YAAA;YAAA;YAAA;UAAA;YAAA;YAAA;UAAA;YAAA;YAAA;YAAA;YAAA;UAAA;YAAA;YAAA;YAAA;cAAA;cAAA;YAAA;YAAA;YAAA;UAAA;YAAA;YAAA;cAAA;cAAA;YAAA;YAAA;UAAA;YAAA;UAAA;YAAA;UAAA;YAAA,kCAEP,IAAI;UAAA;UAAA;YAAA;QAAA;MAAA;IAAA;EAAA,CACZ;EAAA;AAAA;AAAA,SAEsBD,yBAAyB;EAAA;AAAA;AAAA;EAAA,yFAAzC,iBAA0CF,IAAU,EAAED,OAA8B;IAAA;IAAA;MAAA;QAAA;UAAA;YAAA;YAAA,0CACpEK,4BAAa,CAACC,QAAQ,CAACL,IAAI,CAAC;UAAA;YAA3CM,MAAM;YACNC,IAAa,GAAG,EAAE;YAAA;YAEhBC,MAAM,GAAGF,MAAM,CAACG,SAAS,EAAE;UAAA;YAAA;YAAA,0CAEVD,MAAM,CAACE,IAAI,EAAE;UAAA;YAAA,MAA5BC,MAAM;cAAA;cAAA;YAAA;YACZJ,IAAI,CAACK,IAAI,CAACD,MAAM,CAAC;YAAC;YAAA;UAAA;YAAA;YAAA;YAAA,0CAGdL,MAAM,CAACO,KAAK,EAAE;UAAA;YAAA;UAAA;YAAA;YAEtB,OAAMN,IAAI;UAAA;UAAA;YAAA;QAAA;MAAA;IAAA;EAAA,CACX;EAAA;AAAA"}
@@ -1,43 +0,0 @@
1
- "use strict";
2
-
3
- var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
- Object.defineProperty(exports, "__esModule", {
5
- value: true
6
- });
7
- exports.readArrayBuffer = readArrayBuffer;
8
- var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
9
- var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
10
- function readArrayBuffer(_x, _x2, _x3) {
11
- return _readArrayBuffer.apply(this, arguments);
12
- }
13
- function _readArrayBuffer() {
14
- _readArrayBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(file, start, length) {
15
- var slice;
16
- return _regenerator.default.wrap(function _callee$(_context) {
17
- while (1) {
18
- switch (_context.prev = _context.next) {
19
- case 0:
20
- if (!(file instanceof Blob)) {
21
- _context.next = 5;
22
- break;
23
- }
24
- slice = file.slice(start, start + length);
25
- _context.next = 4;
26
- return slice.arrayBuffer();
27
- case 4:
28
- return _context.abrupt("return", _context.sent);
29
- case 5:
30
- _context.next = 7;
31
- return file.read(start, start + length);
32
- case 7:
33
- return _context.abrupt("return", _context.sent);
34
- case 8:
35
- case "end":
36
- return _context.stop();
37
- }
38
- }
39
- }, _callee);
40
- }));
41
- return _readArrayBuffer.apply(this, arguments);
42
- }
43
- //# sourceMappingURL=read-array-buffer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"read-array-buffer.js","names":["readArrayBuffer","file","start","length","Blob","slice","arrayBuffer","read"],"sources":["../../../src/lib/read-array-buffer.ts"],"sourcesContent":["// Random-Access read\n\nexport async function readArrayBuffer(\n file: Blob | ArrayBuffer | any,\n start: number,\n length: number\n): Promise<ArrayBuffer> {\n if (file instanceof Blob) {\n const slice = file.slice(start, start + length);\n return await slice.arrayBuffer();\n }\n return await file.read(start, start + length);\n}\n\n/**\n * Read a slice of a Blob or File, without loading the entire file into memory\n * The trick when reading File objects is to read successive \"slices\" of the File\n * Per spec https://w3c.github.io/FileAPI/, slicing a File only updates the start and end fields\n * Actually reading from file happens in `readAsArrayBuffer`\n * @param blob to read\n export async function readBlob(blob: Blob): Promise<ArrayBuffer> {\n return await new Promise((resolve, reject) => {\n const fileReader = new FileReader();\n fileReader.onload = (event: ProgressEvent<FileReader>) =>\n resolve(event?.target?.result as ArrayBuffer);\n // TODO - reject with a proper Error\n fileReader.onerror = (error: ProgressEvent<FileReader>) => reject(error);\n fileReader.readAsArrayBuffer(blob);\n });\n}\n*/\n"],"mappings":";;;;;;;;;SAEsBA,eAAe;EAAA;AAAA;AAAA;EAAA,6EAA9B,iBACLC,IAA8B,EAC9BC,KAAa,EACbC,MAAc;IAAA;IAAA;MAAA;QAAA;UAAA;YAAA,MAEVF,IAAI,YAAYG,IAAI;cAAA;cAAA;YAAA;YAChBC,KAAK,GAAGJ,IAAI,CAACI,KAAK,CAACH,KAAK,EAAEA,KAAK,GAAGC,MAAM,CAAC;YAAA;YAAA,OAClCE,KAAK,CAACC,WAAW,EAAE;UAAA;YAAA;UAAA;YAAA;YAAA,OAErBL,IAAI,CAACM,IAAI,CAACL,KAAK,EAAEA,KAAK,GAAGC,MAAM,CAAC;UAAA;YAAA;UAAA;UAAA;YAAA;QAAA;MAAA;IAAA;EAAA,CAC9C;EAAA;AAAA"}