@loaders.gl/parquet 4.0.0-alpha.5 → 4.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (596) hide show
  1. package/dist/bundle.js +2 -2
  2. package/dist/constants.js +18 -6
  3. package/dist/dist.min.js +27 -25
  4. package/dist/dist.min.js.map +3 -3
  5. package/dist/es5/bundle.js +6 -0
  6. package/dist/es5/bundle.js.map +1 -0
  7. package/dist/es5/constants.js +17 -0
  8. package/dist/es5/constants.js.map +1 -0
  9. package/dist/es5/index.js +128 -0
  10. package/dist/es5/index.js.map +1 -0
  11. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  12. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  13. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
  14. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  15. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +114 -0
  16. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  17. package/dist/es5/lib/arrow/convert-schema-to-parquet.js +47 -0
  18. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  19. package/dist/es5/lib/geo/decode-geo-metadata.js +81 -0
  20. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  21. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  22. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  23. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
  24. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  25. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +172 -0
  26. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  27. package/dist/es5/lib/wasm/encode-parquet-wasm.js +43 -0
  28. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -0
  29. package/dist/es5/lib/wasm/load-wasm/index.js +13 -0
  30. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -0
  31. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +42 -0
  32. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
  33. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +31 -0
  34. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
  35. package/dist/es5/lib/wasm/parse-parquet-wasm.js +60 -0
  36. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -0
  37. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  38. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  39. package/dist/es5/parquet-loader.js +44 -0
  40. package/dist/es5/parquet-loader.js.map +1 -0
  41. package/dist/es5/parquet-wasm-loader.js +30 -0
  42. package/dist/es5/parquet-wasm-loader.js.map +1 -0
  43. package/dist/es5/parquet-wasm-writer.js +26 -0
  44. package/dist/es5/parquet-wasm-writer.js.map +1 -0
  45. package/dist/es5/parquet-writer.js +24 -0
  46. package/dist/es5/parquet-writer.js.map +1 -0
  47. package/dist/es5/parquetjs/codecs/declare.js +2 -0
  48. package/dist/es5/parquetjs/codecs/declare.js.map +1 -0
  49. package/dist/es5/parquetjs/codecs/dictionary.js +23 -0
  50. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  51. package/dist/es5/parquetjs/codecs/index.js +47 -0
  52. package/dist/es5/parquetjs/codecs/index.js.map +1 -0
  53. package/dist/es5/parquetjs/codecs/plain.js +208 -0
  54. package/dist/es5/parquetjs/codecs/plain.js.map +1 -0
  55. package/dist/es5/parquetjs/codecs/rle.js +132 -0
  56. package/dist/es5/parquetjs/codecs/rle.js.map +1 -0
  57. package/dist/es5/parquetjs/compression.js +137 -0
  58. package/dist/es5/parquetjs/compression.js.map +1 -0
  59. package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
  60. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  61. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +14 -0
  62. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
  63. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +52 -0
  64. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -0
  65. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +193 -0
  66. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
  67. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +198 -0
  68. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
  69. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +367 -0
  70. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
  71. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +99 -0
  72. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
  73. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +19 -0
  74. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
  75. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +33 -0
  76. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
  77. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +152 -0
  78. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
  79. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +207 -0
  80. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
  81. package/dist/es5/parquetjs/parquet-thrift/DateType.js +52 -0
  82. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -0
  83. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +96 -0
  84. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
  85. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -0
  86. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
  87. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +19 -0
  88. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -0
  89. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +52 -0
  90. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -0
  91. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -0
  92. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
  93. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +239 -0
  94. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
  95. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +52 -0
  96. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
  97. package/dist/es5/parquetjs/parquet-thrift/IntType.js +96 -0
  98. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -0
  99. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +52 -0
  100. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -0
  101. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +94 -0
  102. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
  103. package/dist/es5/parquetjs/parquet-thrift/ListType.js +52 -0
  104. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -0
  105. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +423 -0
  106. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
  107. package/dist/es5/parquetjs/parquet-thrift/MapType.js +52 -0
  108. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -0
  109. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +52 -0
  110. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
  111. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +52 -0
  112. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
  113. package/dist/es5/parquetjs/parquet-thrift/NullType.js +52 -0
  114. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -0
  115. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +89 -0
  116. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
  117. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +115 -0
  118. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
  119. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +204 -0
  120. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
  121. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +124 -0
  122. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
  123. package/dist/es5/parquetjs/parquet-thrift/PageType.js +15 -0
  124. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -0
  125. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +165 -0
  126. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
  127. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +231 -0
  128. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
  129. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +115 -0
  130. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
  131. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +165 -0
  132. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -0
  133. package/dist/es5/parquetjs/parquet-thrift/StringType.js +52 -0
  134. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -0
  135. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +97 -0
  136. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -0
  137. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +126 -0
  138. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
  139. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +97 -0
  140. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
  141. package/dist/es5/parquetjs/parquet-thrift/Type.js +19 -0
  142. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -0
  143. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +52 -0
  144. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
  145. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +52 -0
  146. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
  147. package/dist/es5/parquetjs/parquet-thrift/index.js +479 -0
  148. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -0
  149. package/dist/es5/parquetjs/parser/decoders.js +393 -0
  150. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  151. package/dist/es5/parquetjs/parser/parquet-reader.js +610 -0
  152. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  153. package/dist/es5/parquetjs/schema/declare.js +21 -0
  154. package/dist/es5/parquetjs/schema/declare.js.map +1 -0
  155. package/dist/es5/parquetjs/schema/schema.js +165 -0
  156. package/dist/es5/parquetjs/schema/schema.js.map +1 -0
  157. package/dist/es5/parquetjs/schema/shred.js +282 -0
  158. package/dist/es5/parquetjs/schema/shred.js.map +1 -0
  159. package/dist/es5/parquetjs/schema/types.js +406 -0
  160. package/dist/es5/parquetjs/schema/types.js.map +1 -0
  161. package/dist/es5/parquetjs/utils/file-utils.js +47 -0
  162. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  163. package/dist/es5/parquetjs/utils/read-utils.js +120 -0
  164. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  165. package/dist/es5/workers/parquet-worker.js +6 -0
  166. package/dist/es5/workers/parquet-worker.js.map +1 -0
  167. package/dist/esm/bundle.js +4 -0
  168. package/dist/esm/bundle.js.map +1 -0
  169. package/dist/esm/constants.js +6 -0
  170. package/dist/esm/constants.js.map +1 -0
  171. package/dist/esm/index.js +31 -0
  172. package/dist/esm/index.js.map +1 -0
  173. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  174. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  175. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
  176. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  177. package/dist/esm/lib/arrow/convert-schema-from-parquet.js +95 -0
  178. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  179. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +39 -0
  180. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  181. package/dist/esm/lib/geo/decode-geo-metadata.js +62 -0
  182. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  183. package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
  184. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  185. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +39 -0
  186. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  187. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +29 -0
  188. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  189. package/dist/esm/lib/wasm/encode-parquet-wasm.js +15 -0
  190. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -0
  191. package/dist/esm/lib/wasm/load-wasm/index.js +2 -0
  192. package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -0
  193. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +11 -0
  194. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
  195. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js +5 -0
  196. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
  197. package/dist/esm/lib/wasm/parse-parquet-wasm.js +21 -0
  198. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -0
  199. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  200. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  201. package/dist/esm/parquet-loader.js +36 -0
  202. package/dist/esm/parquet-loader.js.map +1 -0
  203. package/dist/esm/parquet-wasm-loader.js +22 -0
  204. package/dist/esm/parquet-wasm-loader.js.map +1 -0
  205. package/dist/esm/parquet-wasm-writer.js +19 -0
  206. package/dist/esm/parquet-wasm-writer.js.map +1 -0
  207. package/dist/esm/parquet-writer.js +17 -0
  208. package/dist/esm/parquet-writer.js.map +1 -0
  209. package/dist/esm/parquetjs/LICENSE +20 -0
  210. package/dist/esm/parquetjs/codecs/declare.js +2 -0
  211. package/dist/esm/parquetjs/codecs/declare.js.map +1 -0
  212. package/dist/esm/parquetjs/codecs/dictionary.js +13 -0
  213. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  214. package/dist/esm/parquetjs/codecs/index.js +23 -0
  215. package/dist/esm/parquetjs/codecs/index.js.map +1 -0
  216. package/dist/esm/parquetjs/codecs/plain.js +200 -0
  217. package/dist/esm/parquetjs/codecs/plain.js.map +1 -0
  218. package/dist/esm/parquetjs/codecs/rle.js +119 -0
  219. package/dist/esm/parquetjs/codecs/rle.js.map +1 -0
  220. package/dist/esm/parquetjs/compression.js +61 -0
  221. package/dist/esm/parquetjs/compression.js.map +1 -0
  222. package/dist/{parquetjs/encoder/writer.js → esm/parquetjs/encoder/parquet-encoder.js} +8 -106
  223. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  224. package/dist/esm/parquetjs/modules.d.ts +21 -0
  225. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +7 -0
  226. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
  227. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +31 -0
  228. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -0
  229. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +173 -0
  230. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
  231. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +176 -0
  232. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
  233. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +347 -0
  234. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
  235. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +77 -0
  236. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
  237. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +12 -0
  238. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
  239. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +26 -0
  240. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
  241. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +132 -0
  242. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
  243. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +187 -0
  244. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
  245. package/dist/esm/parquetjs/parquet-thrift/DateType.js +31 -0
  246. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -0
  247. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +76 -0
  248. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
  249. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +93 -0
  250. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
  251. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +12 -0
  252. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -0
  253. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +31 -0
  254. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -0
  255. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -0
  256. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
  257. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +219 -0
  258. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
  259. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +31 -0
  260. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
  261. package/dist/esm/parquetjs/parquet-thrift/IntType.js +76 -0
  262. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -0
  263. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +31 -0
  264. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -0
  265. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +74 -0
  266. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
  267. package/dist/esm/parquetjs/parquet-thrift/ListType.js +31 -0
  268. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -0
  269. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +377 -0
  270. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
  271. package/dist/esm/parquetjs/parquet-thrift/MapType.js +31 -0
  272. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -0
  273. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +31 -0
  274. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
  275. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +31 -0
  276. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
  277. package/dist/esm/parquetjs/parquet-thrift/NullType.js +31 -0
  278. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -0
  279. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +69 -0
  280. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
  281. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +95 -0
  282. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
  283. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +184 -0
  284. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
  285. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +104 -0
  286. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
  287. package/dist/esm/parquetjs/parquet-thrift/PageType.js +8 -0
  288. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -0
  289. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +145 -0
  290. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
  291. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +211 -0
  292. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
  293. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +95 -0
  294. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
  295. package/dist/esm/parquetjs/parquet-thrift/Statistics.js +145 -0
  296. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -0
  297. package/dist/esm/parquetjs/parquet-thrift/StringType.js +31 -0
  298. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -0
  299. package/dist/esm/parquetjs/parquet-thrift/TimeType.js +77 -0
  300. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -0
  301. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +102 -0
  302. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
  303. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +77 -0
  304. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
  305. package/dist/esm/parquetjs/parquet-thrift/Type.js +12 -0
  306. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -0
  307. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -0
  308. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
  309. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +31 -0
  310. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
  311. package/dist/esm/parquetjs/parquet-thrift/index.js +44 -0
  312. package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -0
  313. package/dist/esm/parquetjs/parser/decoders.js +253 -0
  314. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  315. package/dist/{parquetjs/parser/parquet-envelope-reader.js → esm/parquetjs/parser/parquet-reader.js} +95 -74
  316. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  317. package/dist/esm/parquetjs/schema/declare.js +12 -0
  318. package/dist/esm/parquetjs/schema/declare.js.map +1 -0
  319. package/dist/esm/parquetjs/schema/schema.js +140 -0
  320. package/dist/esm/parquetjs/schema/schema.js.map +1 -0
  321. package/dist/esm/parquetjs/schema/shred.js +228 -0
  322. package/dist/esm/parquetjs/schema/shred.js.map +1 -0
  323. package/dist/esm/parquetjs/schema/types.js +397 -0
  324. package/dist/esm/parquetjs/schema/types.js.map +1 -0
  325. package/dist/esm/parquetjs/utils/file-utils.js +34 -0
  326. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  327. package/dist/esm/parquetjs/utils/read-utils.js +90 -0
  328. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  329. package/dist/esm/workers/parquet-worker.js +4 -0
  330. package/dist/esm/workers/parquet-worker.js.map +1 -0
  331. package/dist/index.d.ts +16 -20
  332. package/dist/index.d.ts.map +1 -1
  333. package/dist/index.js +58 -15
  334. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  335. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  336. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  337. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  338. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  339. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  340. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  341. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  342. package/dist/lib/arrow/convert-schema-from-parquet.js +86 -0
  343. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  344. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  345. package/dist/lib/arrow/convert-schema-to-parquet.js +71 -0
  346. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  347. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  348. package/dist/lib/geo/decode-geo-metadata.js +77 -0
  349. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  350. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  351. package/dist/lib/geo/geoparquet-schema.js +69 -0
  352. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  353. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  354. package/dist/lib/parsers/parse-parquet-to-columns.js +46 -0
  355. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +5 -0
  356. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  357. package/dist/lib/parsers/parse-parquet-to-rows.js +37 -0
  358. package/dist/lib/wasm/encode-parquet-wasm.d.ts +21 -0
  359. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
  360. package/dist/lib/wasm/encode-parquet-wasm.js +30 -0
  361. package/dist/lib/wasm/load-wasm/index.d.ts +2 -0
  362. package/dist/lib/wasm/load-wasm/index.d.ts.map +1 -0
  363. package/dist/lib/wasm/load-wasm/index.js +5 -0
  364. package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts +3 -0
  365. package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
  366. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +38 -0
  367. package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts +3 -0
  368. package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
  369. package/dist/lib/wasm/load-wasm/load-wasm-node.js +31 -0
  370. package/dist/lib/wasm/parse-parquet-wasm.d.ts +10 -0
  371. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
  372. package/dist/lib/wasm/parse-parquet-wasm.js +27 -0
  373. package/dist/parquet-loader.d.ts +6 -15
  374. package/dist/parquet-loader.d.ts.map +1 -1
  375. package/dist/parquet-loader.js +38 -19
  376. package/dist/parquet-wasm-loader.d.ts +23 -0
  377. package/dist/parquet-wasm-loader.d.ts.map +1 -0
  378. package/dist/parquet-wasm-loader.js +27 -0
  379. package/dist/parquet-wasm-writer.d.ts +3 -0
  380. package/dist/parquet-wasm-writer.d.ts.map +1 -0
  381. package/dist/parquet-wasm-writer.js +23 -0
  382. package/dist/parquet-worker.js +27 -25
  383. package/dist/parquet-worker.js.map +3 -3
  384. package/dist/parquet-writer.d.ts +3 -2
  385. package/dist/parquet-writer.d.ts.map +1 -1
  386. package/dist/parquet-writer.js +18 -14
  387. package/dist/parquetjs/codecs/declare.js +2 -2
  388. package/dist/parquetjs/codecs/dictionary.js +12 -10
  389. package/dist/parquetjs/codecs/index.js +54 -22
  390. package/dist/parquetjs/codecs/plain.js +173 -232
  391. package/dist/parquetjs/codecs/rle.js +134 -140
  392. package/dist/parquetjs/compression.d.ts +3 -0
  393. package/dist/parquetjs/compression.d.ts.map +1 -1
  394. package/dist/parquetjs/compression.js +169 -48
  395. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +15 -23
  396. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  397. package/dist/parquetjs/encoder/parquet-encoder.js +484 -0
  398. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +14 -7
  399. package/dist/parquetjs/parquet-thrift/BsonType.js +60 -37
  400. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +209 -215
  401. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +210 -211
  402. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +394 -421
  403. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +102 -89
  404. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +19 -12
  405. package/dist/parquetjs/parquet-thrift/ConvertedType.js +33 -26
  406. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +165 -161
  407. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +228 -234
  408. package/dist/parquetjs/parquet-thrift/DateType.js +60 -37
  409. package/dist/parquetjs/parquet-thrift/DecimalType.js +104 -90
  410. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +121 -112
  411. package/dist/parquetjs/parquet-thrift/Encoding.js +19 -12
  412. package/dist/parquetjs/parquet-thrift/EnumType.js +60 -37
  413. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -7
  414. package/dist/parquetjs/parquet-thrift/FileMetaData.js +253 -263
  415. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +60 -37
  416. package/dist/parquetjs/parquet-thrift/IntType.js +104 -90
  417. package/dist/parquetjs/parquet-thrift/JsonType.js +60 -37
  418. package/dist/parquetjs/parquet-thrift/KeyValue.js +101 -88
  419. package/dist/parquetjs/parquet-thrift/ListType.js +60 -37
  420. package/dist/parquetjs/parquet-thrift/LogicalType.js +366 -449
  421. package/dist/parquetjs/parquet-thrift/MapType.js +60 -37
  422. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +60 -37
  423. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +60 -37
  424. package/dist/parquetjs/parquet-thrift/NullType.js +60 -37
  425. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +96 -80
  426. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +126 -114
  427. package/dist/parquetjs/parquet-thrift/PageHeader.js +218 -231
  428. package/dist/parquetjs/parquet-thrift/PageLocation.js +140 -123
  429. package/dist/parquetjs/parquet-thrift/PageType.js +15 -8
  430. package/dist/parquetjs/parquet-thrift/RowGroup.js +179 -171
  431. package/dist/parquetjs/parquet-thrift/SchemaElement.js +241 -268
  432. package/dist/parquetjs/parquet-thrift/SortingColumn.js +126 -114
  433. package/dist/parquetjs/parquet-thrift/Statistics.js +175 -178
  434. package/dist/parquetjs/parquet-thrift/StringType.js +60 -37
  435. package/dist/parquetjs/parquet-thrift/TimeType.js +105 -91
  436. package/dist/parquetjs/parquet-thrift/TimeUnit.js +124 -119
  437. package/dist/parquetjs/parquet-thrift/TimestampType.js +105 -91
  438. package/dist/parquetjs/parquet-thrift/Type.js +19 -12
  439. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +60 -37
  440. package/dist/parquetjs/parquet-thrift/UUIDType.js +60 -37
  441. package/dist/parquetjs/parquet-thrift/index.js +65 -44
  442. package/dist/parquetjs/parser/decoders.d.ts +2 -2
  443. package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
  444. package/dist/parquetjs/parser/decoders.js +301 -283
  445. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  446. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  447. package/dist/parquetjs/parser/parquet-reader.js +193 -113
  448. package/dist/parquetjs/schema/declare.d.ts +26 -18
  449. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  450. package/dist/parquetjs/schema/declare.js +11 -12
  451. package/dist/parquetjs/schema/schema.d.ts +4 -4
  452. package/dist/parquetjs/schema/schema.d.ts.map +1 -1
  453. package/dist/parquetjs/schema/schema.js +148 -162
  454. package/dist/parquetjs/schema/shred.d.ts +33 -12
  455. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  456. package/dist/parquetjs/schema/shred.js +340 -147
  457. package/dist/parquetjs/schema/types.d.ts +2 -2
  458. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  459. package/dist/parquetjs/schema/types.js +355 -415
  460. package/dist/parquetjs/utils/file-utils.d.ts +5 -4
  461. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  462. package/dist/parquetjs/utils/file-utils.js +37 -28
  463. package/dist/parquetjs/utils/read-utils.js +99 -95
  464. package/dist/workers/parquet-worker.js +5 -4
  465. package/package.json +17 -12
  466. package/src/index.ts +58 -7
  467. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  468. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  469. package/src/lib/arrow/convert-schema-from-parquet.ts +104 -0
  470. package/src/lib/arrow/convert-schema-to-parquet.ts +90 -0
  471. package/src/lib/geo/decode-geo-metadata.ts +108 -0
  472. package/src/lib/geo/geoparquet-schema.ts +69 -0
  473. package/src/lib/parsers/parse-parquet-to-columns.ts +60 -0
  474. package/src/lib/parsers/parse-parquet-to-rows.ts +45 -0
  475. package/src/lib/wasm/encode-parquet-wasm.ts +40 -0
  476. package/src/lib/wasm/load-wasm/index.ts +1 -0
  477. package/src/lib/wasm/load-wasm/load-wasm-browser.ts +15 -0
  478. package/src/lib/wasm/load-wasm/load-wasm-node.ts +5 -0
  479. package/src/lib/wasm/parse-parquet-wasm.ts +42 -0
  480. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  481. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  482. package/src/parquet-loader.ts +30 -3
  483. package/src/parquet-wasm-loader.ts +36 -0
  484. package/src/parquet-wasm-writer.ts +24 -0
  485. package/src/parquet-writer.ts +4 -1
  486. package/src/parquetjs/compression.ts +24 -7
  487. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +33 -38
  488. package/src/parquetjs/parser/decoders.ts +3 -3
  489. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  490. package/src/parquetjs/schema/declare.ts +22 -13
  491. package/src/parquetjs/schema/schema.ts +8 -8
  492. package/src/parquetjs/schema/shred.ts +239 -71
  493. package/src/parquetjs/schema/types.ts +25 -30
  494. package/src/parquetjs/utils/file-utils.ts +3 -4
  495. package/dist/bundle.js.map +0 -1
  496. package/dist/constants.js.map +0 -1
  497. package/dist/index.js.map +0 -1
  498. package/dist/lib/convert-schema.d.ts +0 -8
  499. package/dist/lib/convert-schema.d.ts.map +0 -1
  500. package/dist/lib/convert-schema.js +0 -71
  501. package/dist/lib/convert-schema.js.map +0 -1
  502. package/dist/lib/parse-parquet.d.ts +0 -4
  503. package/dist/lib/parse-parquet.d.ts.map +0 -1
  504. package/dist/lib/parse-parquet.js +0 -28
  505. package/dist/lib/parse-parquet.js.map +0 -1
  506. package/dist/lib/read-array-buffer.d.ts +0 -19
  507. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  508. package/dist/lib/read-array-buffer.js +0 -9
  509. package/dist/lib/read-array-buffer.js.map +0 -1
  510. package/dist/parquet-loader.js.map +0 -1
  511. package/dist/parquet-writer.js.map +0 -1
  512. package/dist/parquetjs/codecs/declare.js.map +0 -1
  513. package/dist/parquetjs/codecs/dictionary.js.map +0 -1
  514. package/dist/parquetjs/codecs/index.js.map +0 -1
  515. package/dist/parquetjs/codecs/plain.js.map +0 -1
  516. package/dist/parquetjs/codecs/rle.js.map +0 -1
  517. package/dist/parquetjs/compression.js.map +0 -1
  518. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  519. package/dist/parquetjs/encoder/writer.js.map +0 -1
  520. package/dist/parquetjs/file.d.ts +0 -10
  521. package/dist/parquetjs/file.d.ts.map +0 -1
  522. package/dist/parquetjs/file.js +0 -80
  523. package/dist/parquetjs/file.js.map +0 -1
  524. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
  525. package/dist/parquetjs/parquet-thrift/BsonType.js.map +0 -1
  526. package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
  527. package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
  528. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
  529. package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
  530. package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
  531. package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
  532. package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
  533. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
  534. package/dist/parquetjs/parquet-thrift/DateType.js.map +0 -1
  535. package/dist/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
  536. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
  537. package/dist/parquetjs/parquet-thrift/Encoding.js.map +0 -1
  538. package/dist/parquetjs/parquet-thrift/EnumType.js.map +0 -1
  539. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
  540. package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
  541. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
  542. package/dist/parquetjs/parquet-thrift/IntType.js.map +0 -1
  543. package/dist/parquetjs/parquet-thrift/JsonType.js.map +0 -1
  544. package/dist/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
  545. package/dist/parquetjs/parquet-thrift/ListType.js.map +0 -1
  546. package/dist/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
  547. package/dist/parquetjs/parquet-thrift/MapType.js.map +0 -1
  548. package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
  549. package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
  550. package/dist/parquetjs/parquet-thrift/NullType.js.map +0 -1
  551. package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
  552. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
  553. package/dist/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
  554. package/dist/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
  555. package/dist/parquetjs/parquet-thrift/PageType.js.map +0 -1
  556. package/dist/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
  557. package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
  558. package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
  559. package/dist/parquetjs/parquet-thrift/Statistics.js.map +0 -1
  560. package/dist/parquetjs/parquet-thrift/StringType.js.map +0 -1
  561. package/dist/parquetjs/parquet-thrift/TimeType.js.map +0 -1
  562. package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
  563. package/dist/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
  564. package/dist/parquetjs/parquet-thrift/Type.js.map +0 -1
  565. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
  566. package/dist/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
  567. package/dist/parquetjs/parquet-thrift/index.js.map +0 -1
  568. package/dist/parquetjs/parser/decoders.js.map +0 -1
  569. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  570. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  571. package/dist/parquetjs/parser/parquet-cursor.js +0 -90
  572. package/dist/parquetjs/parser/parquet-cursor.js.map +0 -1
  573. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  574. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  575. package/dist/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  576. package/dist/parquetjs/parser/parquet-reader.js.map +0 -1
  577. package/dist/parquetjs/schema/declare.js.map +0 -1
  578. package/dist/parquetjs/schema/schema.js.map +0 -1
  579. package/dist/parquetjs/schema/shred.js.map +0 -1
  580. package/dist/parquetjs/schema/types.js.map +0 -1
  581. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  582. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  583. package/dist/parquetjs/utils/buffer-utils.js +0 -12
  584. package/dist/parquetjs/utils/buffer-utils.js.map +0 -1
  585. package/dist/parquetjs/utils/file-utils.js.map +0 -1
  586. package/dist/parquetjs/utils/read-utils.js.map +0 -1
  587. package/dist/workers/parquet-worker.js.map +0 -1
  588. package/src/lib/convert-schema.ts +0 -95
  589. package/src/lib/parse-parquet.ts +0 -27
  590. package/src/lib/read-array-buffer.ts +0 -31
  591. package/src/parquetjs/file.ts +0 -90
  592. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  593. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  594. package/src/parquetjs/utils/buffer-utils.ts +0 -18
  595. /package/dist/{parquetjs → es5/parquetjs}/LICENSE +0 -0
  596. /package/dist/{parquetjs → es5/parquetjs}/modules.d.ts +0 -0
@@ -1,163 +1,280 @@
1
1
  // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
2
- import {ParquetEnvelopeReader} from './parquet-envelope-reader';
3
- import {FileMetaData} from '../parquet-thrift';
2
+ import type {ReadableFile} from '@loaders.gl/loader-utils';
3
+
4
4
  import {ParquetSchema} from '../schema/schema';
5
- import {ParquetCursor} from './parquet-cursor';
6
- import {PARQUET_VERSION} from '../../constants';
7
5
  import {decodeSchema} from './decoders';
6
+ import {materializeRows} from '../schema/shred';
7
+
8
+ import {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';
9
+ import {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';
10
+ import {
11
+ ParquetRowGroup,
12
+ ParquetCompression,
13
+ ParquetColumnChunk,
14
+ PrimitiveType,
15
+ ParquetOptions
16
+ } from '../schema/declare';
17
+ import {decodeFileMetadata, getThriftEnum, fieldIndexOf} from '../utils/read-utils';
18
+ import {decodeDataPages, decodePage} from './decoders';
19
+
20
+ export type ParquetReaderProps = {
21
+ defaultDictionarySize?: number;
22
+ };
23
+
24
+ /** Properties for initializing a ParquetRowGroupReader */
25
+ export type ParquetIterationProps = {
26
+ /** Filter allowing some columns to be dropped */
27
+ columnList?: string[] | string[][];
28
+ };
29
+
30
+ const DEFAULT_PROPS: Required<ParquetReaderProps> = {
31
+ defaultDictionarySize: 1e6
32
+ };
8
33
 
9
34
  /**
10
- * A parquet reader allows retrieving the rows from a parquet file in order.
11
- * The basic usage is to create a reader and then retrieve a cursor/iterator
12
- * which allows you to consume row after row until all rows have been read. It is
13
- * important that you call close() after you are finished reading the file to
14
- * avoid leaking file descriptors.
35
+ * The parquet envelope reader allows direct, unbuffered access to the individual
36
+ * sections of the parquet file, namely the header, footer and the row groups.
37
+ * This class is intended for advanced/internal users; if you just want to retrieve
38
+ * rows from a parquet file use the ParquetReader instead
15
39
  */
16
- export class ParquetReader<T> implements AsyncIterable<T> {
17
- /**
18
- * return a new parquet reader initialized with a read function
19
- */
20
- static async openBlob<T>(blob: Blob): Promise<ParquetReader<T>> {
21
- const readFn = async (start: number, length: number) => {
22
- const arrayBuffer = await blob.slice(start, start + length).arrayBuffer();
23
- return Buffer.from(arrayBuffer);
24
- };
25
- const closeFn = async () => {};
26
- const size = blob.size;
27
- const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
28
- try {
29
- await envelopeReader.readHeader();
30
- const metadata = await envelopeReader.readFooter();
31
- return new ParquetReader(metadata, envelopeReader);
32
- } catch (err) {
33
- await envelopeReader.close();
34
- throw err;
35
- }
40
+ export class ParquetReader {
41
+ props: Required<ParquetReaderProps>;
42
+ file: ReadableFile;
43
+ metadata: Promise<FileMetaData> | null = null;
44
+
45
+ constructor(file: ReadableFile, props?: ParquetReaderProps) {
46
+ this.file = file;
47
+ this.props = {...DEFAULT_PROPS, ...props};
36
48
  }
37
49
 
38
- /**
39
- * return a new parquet reader initialized with a read function
40
- */
41
- static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {
42
- const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);
43
- const closeFn = async () => {};
44
- const size = arrayBuffer.byteLength;
45
- const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
46
- try {
47
- await envelopeReader.readHeader();
48
- const metadata = await envelopeReader.readFooter();
49
- return new ParquetReader(metadata, envelopeReader);
50
- } catch (err) {
51
- await envelopeReader.close();
52
- throw err;
50
+ close(): void {
51
+ // eslint-disable-next-line @typescript-eslint/no-floating-promises
52
+ this.file.close();
53
+ }
54
+
55
+ // HIGH LEVEL METHODS
56
+
57
+ /** Yield one row at a time */
58
+ async *rowIterator(props?: ParquetIterationProps) {
59
+ for await (const rows of this.rowBatchIterator(props)) {
60
+ // yield *rows
61
+ for (const row of rows) {
62
+ yield row;
63
+ }
53
64
  }
54
65
  }
55
66
 
56
- static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {
57
- const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);
58
- try {
59
- await envelopeReader.readHeader();
60
- const metadata = await envelopeReader.readFooter();
61
- return new ParquetReader<T>(metadata, envelopeReader);
62
- } catch (err) {
63
- await envelopeReader.close();
64
- throw err;
67
+ /** Yield one batch of rows at a time */
68
+ async *rowBatchIterator(props?: ParquetIterationProps) {
69
+ const schema = await this.getSchema();
70
+ for await (const rowGroup of this.rowGroupIterator(props)) {
71
+ yield materializeRows(schema, rowGroup);
65
72
  }
66
73
  }
67
74
 
68
- public metadata: FileMetaData;
69
- public envelopeReader: ParquetEnvelopeReader;
70
- public schema: ParquetSchema;
75
+ /** Iterate over the raw row groups */
76
+ async *rowGroupIterator(props?: ParquetIterationProps) {
77
+ // Ensure strings are nested in arrays
78
+ const columnList: string[][] = (props?.columnList || []).map((x) =>
79
+ Array.isArray(x) ? x : [x]
80
+ );
71
81
 
72
- /**
73
- * Create a new parquet reader from the file metadata and an envelope reader.
74
- * It is not recommended to call this constructor directly except for advanced
75
- * and internal use cases. Consider using one of the open{File,Buffer} methods
76
- * instead
77
- */
78
- constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {
79
- if (metadata.version !== PARQUET_VERSION) {
80
- throw new Error('invalid parquet version');
82
+ const metadata = await this.getFileMetadata();
83
+ const schema = await this.getSchema();
84
+
85
+ const rowGroupCount = metadata?.row_groups.length || 0;
86
+
87
+ for (let rowGroupIndex = 0; rowGroupIndex < rowGroupCount; rowGroupIndex++) {
88
+ const rowGroup = await this.readRowGroup(
89
+ schema,
90
+ metadata.row_groups[rowGroupIndex],
91
+ columnList
92
+ );
93
+ yield rowGroup;
81
94
  }
95
+ }
82
96
 
83
- this.metadata = metadata;
84
- this.envelopeReader = envelopeReader;
85
- const root = this.metadata.schema[0];
86
- const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);
87
- this.schema = new ParquetSchema(schema);
97
+ async getRowCount(): Promise<number> {
98
+ const metadata = await this.getFileMetadata();
99
+ return Number(metadata.num_rows);
88
100
  }
89
101
 
90
- /**
91
- * Close this parquet reader. You MUST call this method once you're finished
92
- * reading rows
93
- */
94
- async close(): Promise<void> {
95
- await this.envelopeReader.close();
96
- // this.envelopeReader = null;
97
- // this.metadata = null;
102
+ async getSchema(): Promise<ParquetSchema> {
103
+ const metadata = await this.getFileMetadata();
104
+ const root = metadata.schema[0];
105
+ const {schema: schemaDefinition} = decodeSchema(metadata.schema, 1, root.num_children!);
106
+ const schema = new ParquetSchema(schemaDefinition);
107
+ return schema;
98
108
  }
99
109
 
100
110
  /**
101
- * Return a cursor to the file. You may open more than one cursor and use
102
- * them concurrently. All cursors become invalid once close() is called on
103
- * the reader object.
104
- *
105
- * The required_columns parameter controls which columns are actually read
106
- * from disk. An empty array or no value implies all columns. A list of column
107
- * names means that only those columns should be loaded from disk.
111
+ * Returns the user (key/value) metadata for this file
112
+ * In parquet this is not stored on the schema like it is in arrow
108
113
  */
109
- getCursor(): ParquetCursor<T>;
110
- // @ts-ignore
111
- getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;
112
- getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;
113
- getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {
114
- if (!columnList) {
115
- // tslint:disable-next-line:no-parameter-reassignment
116
- columnList = [];
114
+ async getSchemaMetadata(): Promise<Record<string, string>> {
115
+ const metadata = await this.getFileMetadata();
116
+ const md: Record<string, string> = {};
117
+ for (const kv of metadata.key_value_metadata!) {
118
+ md[kv.key] = kv.value!;
117
119
  }
120
+ return md;
121
+ }
122
+
123
+ async getFileMetadata(): Promise<FileMetaData> {
124
+ if (!this.metadata) {
125
+ await this.readHeader();
126
+ this.metadata = this.readFooter();
127
+ }
128
+ return this.metadata;
129
+ }
118
130
 
119
- // tslint:disable-next-line:no-parameter-reassignment
120
- columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));
131
+ // LOW LEVEL METHODS
121
132
 
122
- return new ParquetCursor<T>(
123
- this.metadata,
124
- this.envelopeReader,
125
- this.schema,
126
- columnList as string[][]
127
- );
133
+ /** Metadata is stored in the footer */
134
+ async readHeader(): Promise<void> {
135
+ const buffer = await this.file.read(0, PARQUET_MAGIC.length);
136
+ const magic = buffer.toString();
137
+ switch (magic) {
138
+ case PARQUET_MAGIC:
139
+ break;
140
+ case PARQUET_MAGIC_ENCRYPTED:
141
+ throw new Error('Encrypted parquet file not supported');
142
+ default:
143
+ throw new Error(`Invalid parquet file (magic=${magic})`);
144
+ }
128
145
  }
129
146
 
130
- /**
131
- * Return the number of rows in this file. Note that the number of rows is
132
- * not neccessarily equal to the number of rows in each column.
133
- */
134
- getRowCount(): number {
135
- return Number(this.metadata.num_rows);
147
+ /** Metadata is stored in the footer */
148
+ async readFooter(): Promise<FileMetaData> {
149
+ const trailerLen = PARQUET_MAGIC.length + 4;
150
+ const trailerBuf = await this.file.read(this.file.size - trailerLen, trailerLen);
151
+
152
+ const magic = trailerBuf.slice(4).toString();
153
+ if (magic !== PARQUET_MAGIC) {
154
+ throw new Error(`Not a valid parquet file (magic="${magic})`);
155
+ }
156
+
157
+ const metadataSize = trailerBuf.readUInt32LE(0);
158
+ const metadataOffset = this.file.size - metadataSize - trailerLen;
159
+ if (metadataOffset < PARQUET_MAGIC.length) {
160
+ throw new Error(`Invalid metadata size ${metadataOffset}`);
161
+ }
162
+
163
+ const metadataBuf = await this.file.read(metadataOffset, metadataSize);
164
+ // let metadata = new parquet_thrift.FileMetaData();
165
+ // parquet_util.decodeThrift(metadata, metadataBuf);
166
+ const {metadata} = decodeFileMetadata(metadataBuf);
167
+ return metadata;
136
168
  }
137
169
 
138
- /**
139
- * Returns the ParquetSchema for this file
140
- */
141
- getSchema(): ParquetSchema {
142
- return this.schema;
170
+ /** Data is stored in row groups (similar to Apache Arrow record batches) */
171
+ async readRowGroup(
172
+ schema: ParquetSchema,
173
+ rowGroup: RowGroup,
174
+ columnList: string[][]
175
+ ): Promise<ParquetRowGroup> {
176
+ const buffer: ParquetRowGroup = {
177
+ rowCount: Number(rowGroup.num_rows),
178
+ columnData: {}
179
+ };
180
+ for (const colChunk of rowGroup.columns) {
181
+ const colMetadata = colChunk.meta_data;
182
+ const colKey = colMetadata?.path_in_schema;
183
+ if (columnList.length > 0 && fieldIndexOf(columnList, colKey!) < 0) {
184
+ continue; // eslint-disable-line no-continue
185
+ }
186
+ buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);
187
+ }
188
+ return buffer;
143
189
  }
144
190
 
145
191
  /**
146
- * Returns the user (key/value) metadata for this file
192
+ * Each row group contains column chunks for all the columns.
147
193
  */
148
- getMetadata(): Record<string, string> {
149
- const md: Record<string, string> = {};
150
- for (const kv of this.metadata.key_value_metadata!) {
151
- md[kv.key] = kv.value!;
194
+ async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetColumnChunk> {
195
+ if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
196
+ throw new Error('external references are not supported');
152
197
  }
153
- return md;
198
+
199
+ const field = schema.findField(colChunk.meta_data?.path_in_schema!);
200
+ const type: PrimitiveType = getThriftEnum(Type, colChunk.meta_data?.type!) as any;
201
+
202
+ if (type !== field.primitiveType) {
203
+ throw new Error(`chunk type not matching schema: ${type}`);
204
+ }
205
+
206
+ const compression: ParquetCompression = getThriftEnum(
207
+ CompressionCodec,
208
+ colChunk.meta_data?.codec!
209
+ ) as any;
210
+
211
+ const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);
212
+ let pagesSize = Number(colChunk.meta_data?.total_compressed_size!);
213
+
214
+ if (!colChunk.file_path) {
215
+ pagesSize = Math.min(
216
+ this.file.size - pagesOffset,
217
+ Number(colChunk.meta_data?.total_compressed_size)
218
+ );
219
+ }
220
+
221
+ const options: ParquetOptions = {
222
+ type,
223
+ rLevelMax: field.rLevelMax,
224
+ dLevelMax: field.dLevelMax,
225
+ compression,
226
+ column: field,
227
+ numValues: colChunk.meta_data?.num_values,
228
+ dictionary: []
229
+ };
230
+
231
+ let dictionary;
232
+
233
+ const dictionaryPageOffset = colChunk?.meta_data?.dictionary_page_offset;
234
+
235
+ if (dictionaryPageOffset) {
236
+ const dictionaryOffset = Number(dictionaryPageOffset);
237
+ // Getting dictionary from column chunk to iterate all over indexes to get dataPage values.
238
+ dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);
239
+ }
240
+
241
+ dictionary = options.dictionary?.length ? options.dictionary : dictionary;
242
+ const pagesBuf = await this.file.read(pagesOffset, pagesSize);
243
+ return await decodeDataPages(pagesBuf, {...options, dictionary});
154
244
  }
155
245
 
156
246
  /**
157
- * Implement AsyncIterable
247
+ * Getting dictionary for allows to flatten values by indices.
248
+ * @param dictionaryPageOffset
249
+ * @param options
250
+ * @param pagesOffset
251
+ * @returns
158
252
  */
159
- // tslint:disable-next-line:function-name
160
- [Symbol.asyncIterator](): AsyncIterator<T> {
161
- return this.getCursor()[Symbol.asyncIterator]();
253
+ async getDictionary(
254
+ dictionaryPageOffset: number,
255
+ options: ParquetOptions,
256
+ pagesOffset: number
257
+ ): Promise<string[]> {
258
+ if (dictionaryPageOffset === 0) {
259
+ // dictionarySize = Math.min(this.fileSize - pagesOffset, this.defaultDictionarySize);
260
+ // pagesBuf = await this.read(pagesOffset, dictionarySize);
261
+
262
+ // In this case we are working with parquet-mr files format. Problem is described below:
263
+ // https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding
264
+ // We need to get dictionary page from column chunk if it exists.
265
+ // Now if we use code commented above we don't get DICTIONARY_PAGE we get DATA_PAGE instead.
266
+ return [];
267
+ }
268
+
269
+ const dictionarySize = Math.min(
270
+ this.file.size - dictionaryPageOffset,
271
+ this.props.defaultDictionarySize
272
+ );
273
+ const pagesBuf = await this.file.read(dictionaryPageOffset, dictionarySize);
274
+
275
+ const cursor = {buffer: pagesBuf, offset: 0, size: pagesBuf.length};
276
+ const decodedPage = await decodePage(cursor, options);
277
+
278
+ return decodedPage.dictionary!;
162
279
  }
163
280
  }
@@ -98,6 +98,7 @@ export interface ParquetField {
98
98
  fields?: Record<string, ParquetField>;
99
99
  }
100
100
 
101
+ /** @todo better name, this is an internal type? */
101
102
  export interface ParquetOptions {
102
103
  type: ParquetType;
103
104
  rLevelMax: number;
@@ -108,32 +109,40 @@ export interface ParquetOptions {
108
109
  dictionary?: ParquetDictionary;
109
110
  }
110
111
 
111
- export interface ParquetData {
112
- dlevels: number[];
113
- rlevels: number[];
114
- values: any[];
115
- count: number;
116
- pageHeaders: PageHeader[];
117
- }
118
-
119
112
  export interface ParquetPageData {
120
113
  dlevels: number[];
121
114
  rlevels: number[];
122
- values: any[];
115
+ /** Actual column chunks */
116
+ values: any[]; // ArrayLike<any>;
123
117
  count: number;
124
118
  dictionary?: ParquetDictionary;
119
+ /** The "raw" page header from the file */
125
120
  pageHeader: PageHeader;
126
121
  }
127
122
 
128
- export interface ParquetRecord {
123
+ export interface ParquetRow {
129
124
  [key: string]: any;
130
125
  }
131
126
 
132
- export class ParquetBuffer {
127
+ /** @
128
+ * Holds data for one row group (column chunks) */
129
+ export class ParquetRowGroup {
130
+ /** Number of rows in this page */
133
131
  rowCount: number;
134
- columnData: Record<string, ParquetData>;
135
- constructor(rowCount: number = 0, columnData: Record<string, ParquetData> = {}) {
132
+ /** Map of Column chunks */
133
+ columnData: Record<string, ParquetColumnChunk>;
134
+
135
+ constructor(rowCount: number = 0, columnData: Record<string, ParquetColumnChunk> = {}) {
136
136
  this.rowCount = rowCount;
137
137
  this.columnData = columnData;
138
138
  }
139
139
  }
140
+
141
+ /** Holds the data for one column chunk */
142
+ export interface ParquetColumnChunk {
143
+ dlevels: number[];
144
+ rlevels: number[];
145
+ values: any[];
146
+ count: number;
147
+ pageHeaders: PageHeader[];
148
+ }
@@ -4,14 +4,14 @@ import {PARQUET_CODECS} from '../codecs';
4
4
  import {PARQUET_COMPRESSION_METHODS} from '../compression';
5
5
  import {
6
6
  FieldDefinition,
7
- ParquetBuffer,
7
+ ParquetRowGroup,
8
8
  ParquetCompression,
9
9
  ParquetField,
10
- ParquetRecord,
10
+ ParquetRow,
11
11
  RepetitionType,
12
12
  SchemaDefinition
13
13
  } from './declare';
14
- import {materializeRecords, shredBuffer, shredRecord} from './shred';
14
+ import {materializeRows, shredBuffer, shredRecord} from './shred';
15
15
  import {PARQUET_LOGICAL_TYPES} from './types';
16
16
 
17
17
  /**
@@ -70,12 +70,12 @@ export class ParquetSchema {
70
70
  return branch;
71
71
  }
72
72
 
73
- shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {
74
- shredRecord(this, record, buffer);
73
+ shredRecord(row: ParquetRow, rowGroup: ParquetRowGroup): void {
74
+ shredRecord(this, row, rowGroup);
75
75
  }
76
76
 
77
- materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {
78
- return materializeRecords(this, buffer);
77
+ materializeRows(rowGroup: ParquetRowGroup): ParquetRow[] {
78
+ return materializeRows(this, rowGroup);
79
79
  }
80
80
 
81
81
  compress(type: ParquetCompression): this {
@@ -84,7 +84,7 @@ export class ParquetSchema {
84
84
  return this;
85
85
  }
86
86
 
87
- buffer(): ParquetBuffer {
87
+ rowGroup(): ParquetRowGroup {
88
88
  return shredBuffer(this);
89
89
  }
90
90
  }