@loaders.gl/parquet 4.0.0-alpha.5 → 4.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (596) hide show
  1. package/dist/bundle.js +2 -2
  2. package/dist/constants.js +18 -6
  3. package/dist/dist.min.js +27 -25
  4. package/dist/dist.min.js.map +3 -3
  5. package/dist/es5/bundle.js +6 -0
  6. package/dist/es5/bundle.js.map +1 -0
  7. package/dist/es5/constants.js +17 -0
  8. package/dist/es5/constants.js.map +1 -0
  9. package/dist/es5/index.js +128 -0
  10. package/dist/es5/index.js.map +1 -0
  11. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  12. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  13. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +19 -0
  14. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  15. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +114 -0
  16. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  17. package/dist/es5/lib/arrow/convert-schema-to-parquet.js +47 -0
  18. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  19. package/dist/es5/lib/geo/decode-geo-metadata.js +81 -0
  20. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  21. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  22. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  23. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
  24. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  25. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +172 -0
  26. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  27. package/dist/es5/lib/wasm/encode-parquet-wasm.js +43 -0
  28. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -0
  29. package/dist/es5/lib/wasm/load-wasm/index.js +13 -0
  30. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -0
  31. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +42 -0
  32. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
  33. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +31 -0
  34. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
  35. package/dist/es5/lib/wasm/parse-parquet-wasm.js +60 -0
  36. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -0
  37. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  38. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  39. package/dist/es5/parquet-loader.js +44 -0
  40. package/dist/es5/parquet-loader.js.map +1 -0
  41. package/dist/es5/parquet-wasm-loader.js +30 -0
  42. package/dist/es5/parquet-wasm-loader.js.map +1 -0
  43. package/dist/es5/parquet-wasm-writer.js +26 -0
  44. package/dist/es5/parquet-wasm-writer.js.map +1 -0
  45. package/dist/es5/parquet-writer.js +24 -0
  46. package/dist/es5/parquet-writer.js.map +1 -0
  47. package/dist/es5/parquetjs/codecs/declare.js +2 -0
  48. package/dist/es5/parquetjs/codecs/declare.js.map +1 -0
  49. package/dist/es5/parquetjs/codecs/dictionary.js +23 -0
  50. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  51. package/dist/es5/parquetjs/codecs/index.js +47 -0
  52. package/dist/es5/parquetjs/codecs/index.js.map +1 -0
  53. package/dist/es5/parquetjs/codecs/plain.js +208 -0
  54. package/dist/es5/parquetjs/codecs/plain.js.map +1 -0
  55. package/dist/es5/parquetjs/codecs/rle.js +132 -0
  56. package/dist/es5/parquetjs/codecs/rle.js.map +1 -0
  57. package/dist/es5/parquetjs/compression.js +137 -0
  58. package/dist/es5/parquetjs/compression.js.map +1 -0
  59. package/dist/es5/parquetjs/encoder/parquet-encoder.js +625 -0
  60. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  61. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +14 -0
  62. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
  63. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +52 -0
  64. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -0
  65. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +193 -0
  66. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
  67. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +198 -0
  68. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
  69. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +367 -0
  70. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
  71. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +99 -0
  72. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
  73. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +19 -0
  74. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
  75. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +33 -0
  76. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
  77. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +152 -0
  78. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
  79. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +207 -0
  80. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
  81. package/dist/es5/parquetjs/parquet-thrift/DateType.js +52 -0
  82. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -0
  83. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +96 -0
  84. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
  85. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +113 -0
  86. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
  87. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +19 -0
  88. package/dist/es5/parquetjs/parquet-thrift/Encoding.js.map +1 -0
  89. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +52 -0
  90. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -0
  91. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -0
  92. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
  93. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +239 -0
  94. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
  95. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +52 -0
  96. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
  97. package/dist/es5/parquetjs/parquet-thrift/IntType.js +96 -0
  98. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -0
  99. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +52 -0
  100. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -0
  101. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +94 -0
  102. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
  103. package/dist/es5/parquetjs/parquet-thrift/ListType.js +52 -0
  104. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -0
  105. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +423 -0
  106. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
  107. package/dist/es5/parquetjs/parquet-thrift/MapType.js +52 -0
  108. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -0
  109. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +52 -0
  110. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
  111. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +52 -0
  112. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
  113. package/dist/es5/parquetjs/parquet-thrift/NullType.js +52 -0
  114. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -0
  115. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +89 -0
  116. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
  117. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +115 -0
  118. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
  119. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +204 -0
  120. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
  121. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +124 -0
  122. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
  123. package/dist/es5/parquetjs/parquet-thrift/PageType.js +15 -0
  124. package/dist/es5/parquetjs/parquet-thrift/PageType.js.map +1 -0
  125. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +165 -0
  126. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
  127. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +231 -0
  128. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
  129. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +115 -0
  130. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
  131. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +165 -0
  132. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -0
  133. package/dist/es5/parquetjs/parquet-thrift/StringType.js +52 -0
  134. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -0
  135. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +97 -0
  136. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -0
  137. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +126 -0
  138. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
  139. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +97 -0
  140. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
  141. package/dist/es5/parquetjs/parquet-thrift/Type.js +19 -0
  142. package/dist/es5/parquetjs/parquet-thrift/Type.js.map +1 -0
  143. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +52 -0
  144. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
  145. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +52 -0
  146. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
  147. package/dist/es5/parquetjs/parquet-thrift/index.js +479 -0
  148. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -0
  149. package/dist/es5/parquetjs/parser/decoders.js +393 -0
  150. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  151. package/dist/es5/parquetjs/parser/parquet-reader.js +610 -0
  152. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  153. package/dist/es5/parquetjs/schema/declare.js +21 -0
  154. package/dist/es5/parquetjs/schema/declare.js.map +1 -0
  155. package/dist/es5/parquetjs/schema/schema.js +165 -0
  156. package/dist/es5/parquetjs/schema/schema.js.map +1 -0
  157. package/dist/es5/parquetjs/schema/shred.js +282 -0
  158. package/dist/es5/parquetjs/schema/shred.js.map +1 -0
  159. package/dist/es5/parquetjs/schema/types.js +406 -0
  160. package/dist/es5/parquetjs/schema/types.js.map +1 -0
  161. package/dist/es5/parquetjs/utils/file-utils.js +47 -0
  162. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  163. package/dist/es5/parquetjs/utils/read-utils.js +120 -0
  164. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  165. package/dist/es5/workers/parquet-worker.js +6 -0
  166. package/dist/es5/workers/parquet-worker.js.map +1 -0
  167. package/dist/esm/bundle.js +4 -0
  168. package/dist/esm/bundle.js.map +1 -0
  169. package/dist/esm/constants.js +6 -0
  170. package/dist/esm/constants.js.map +1 -0
  171. package/dist/esm/index.js +31 -0
  172. package/dist/esm/index.js.map +1 -0
  173. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  174. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  175. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +8 -0
  176. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  177. package/dist/esm/lib/arrow/convert-schema-from-parquet.js +95 -0
  178. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  179. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +39 -0
  180. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  181. package/dist/esm/lib/geo/decode-geo-metadata.js +62 -0
  182. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  183. package/dist/esm/lib/geo/geoparquet-schema.js +76 -0
  184. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  185. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +39 -0
  186. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  187. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +29 -0
  188. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  189. package/dist/esm/lib/wasm/encode-parquet-wasm.js +15 -0
  190. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -0
  191. package/dist/esm/lib/wasm/load-wasm/index.js +2 -0
  192. package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -0
  193. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +11 -0
  194. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
  195. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js +5 -0
  196. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
  197. package/dist/esm/lib/wasm/parse-parquet-wasm.js +21 -0
  198. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -0
  199. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  200. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  201. package/dist/esm/parquet-loader.js +36 -0
  202. package/dist/esm/parquet-loader.js.map +1 -0
  203. package/dist/esm/parquet-wasm-loader.js +22 -0
  204. package/dist/esm/parquet-wasm-loader.js.map +1 -0
  205. package/dist/esm/parquet-wasm-writer.js +19 -0
  206. package/dist/esm/parquet-wasm-writer.js.map +1 -0
  207. package/dist/esm/parquet-writer.js +17 -0
  208. package/dist/esm/parquet-writer.js.map +1 -0
  209. package/dist/esm/parquetjs/LICENSE +20 -0
  210. package/dist/esm/parquetjs/codecs/declare.js +2 -0
  211. package/dist/esm/parquetjs/codecs/declare.js.map +1 -0
  212. package/dist/esm/parquetjs/codecs/dictionary.js +13 -0
  213. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  214. package/dist/esm/parquetjs/codecs/index.js +23 -0
  215. package/dist/esm/parquetjs/codecs/index.js.map +1 -0
  216. package/dist/esm/parquetjs/codecs/plain.js +200 -0
  217. package/dist/esm/parquetjs/codecs/plain.js.map +1 -0
  218. package/dist/esm/parquetjs/codecs/rle.js +119 -0
  219. package/dist/esm/parquetjs/codecs/rle.js.map +1 -0
  220. package/dist/esm/parquetjs/compression.js +61 -0
  221. package/dist/esm/parquetjs/compression.js.map +1 -0
  222. package/dist/{parquetjs/encoder/writer.js → esm/parquetjs/encoder/parquet-encoder.js} +8 -106
  223. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  224. package/dist/esm/parquetjs/modules.d.ts +21 -0
  225. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js +7 -0
  226. package/dist/esm/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
  227. package/dist/esm/parquetjs/parquet-thrift/BsonType.js +31 -0
  228. package/dist/esm/parquetjs/parquet-thrift/BsonType.js.map +1 -0
  229. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js +173 -0
  230. package/dist/esm/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
  231. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js +176 -0
  232. package/dist/esm/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
  233. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js +347 -0
  234. package/dist/esm/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
  235. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js +77 -0
  236. package/dist/esm/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
  237. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +12 -0
  238. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
  239. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js +26 -0
  240. package/dist/esm/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
  241. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js +132 -0
  242. package/dist/esm/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
  243. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js +187 -0
  244. package/dist/esm/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
  245. package/dist/esm/parquetjs/parquet-thrift/DateType.js +31 -0
  246. package/dist/esm/parquetjs/parquet-thrift/DateType.js.map +1 -0
  247. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js +76 -0
  248. package/dist/esm/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
  249. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js +93 -0
  250. package/dist/esm/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
  251. package/dist/esm/parquetjs/parquet-thrift/Encoding.js +12 -0
  252. package/dist/esm/parquetjs/parquet-thrift/Encoding.js.map +1 -0
  253. package/dist/esm/parquetjs/parquet-thrift/EnumType.js +31 -0
  254. package/dist/esm/parquetjs/parquet-thrift/EnumType.js.map +1 -0
  255. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js +7 -0
  256. package/dist/esm/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
  257. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js +219 -0
  258. package/dist/esm/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
  259. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js +31 -0
  260. package/dist/esm/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
  261. package/dist/esm/parquetjs/parquet-thrift/IntType.js +76 -0
  262. package/dist/esm/parquetjs/parquet-thrift/IntType.js.map +1 -0
  263. package/dist/esm/parquetjs/parquet-thrift/JsonType.js +31 -0
  264. package/dist/esm/parquetjs/parquet-thrift/JsonType.js.map +1 -0
  265. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js +74 -0
  266. package/dist/esm/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
  267. package/dist/esm/parquetjs/parquet-thrift/ListType.js +31 -0
  268. package/dist/esm/parquetjs/parquet-thrift/ListType.js.map +1 -0
  269. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js +377 -0
  270. package/dist/esm/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
  271. package/dist/esm/parquetjs/parquet-thrift/MapType.js +31 -0
  272. package/dist/esm/parquetjs/parquet-thrift/MapType.js.map +1 -0
  273. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js +31 -0
  274. package/dist/esm/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
  275. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js +31 -0
  276. package/dist/esm/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
  277. package/dist/esm/parquetjs/parquet-thrift/NullType.js +31 -0
  278. package/dist/esm/parquetjs/parquet-thrift/NullType.js.map +1 -0
  279. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js +69 -0
  280. package/dist/esm/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
  281. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js +95 -0
  282. package/dist/esm/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
  283. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js +184 -0
  284. package/dist/esm/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
  285. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js +104 -0
  286. package/dist/esm/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
  287. package/dist/esm/parquetjs/parquet-thrift/PageType.js +8 -0
  288. package/dist/esm/parquetjs/parquet-thrift/PageType.js.map +1 -0
  289. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js +145 -0
  290. package/dist/esm/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
  291. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js +211 -0
  292. package/dist/esm/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
  293. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js +95 -0
  294. package/dist/esm/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
  295. package/dist/esm/parquetjs/parquet-thrift/Statistics.js +145 -0
  296. package/dist/esm/parquetjs/parquet-thrift/Statistics.js.map +1 -0
  297. package/dist/esm/parquetjs/parquet-thrift/StringType.js +31 -0
  298. package/dist/esm/parquetjs/parquet-thrift/StringType.js.map +1 -0
  299. package/dist/esm/parquetjs/parquet-thrift/TimeType.js +77 -0
  300. package/dist/esm/parquetjs/parquet-thrift/TimeType.js.map +1 -0
  301. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js +102 -0
  302. package/dist/esm/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
  303. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js +77 -0
  304. package/dist/esm/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
  305. package/dist/esm/parquetjs/parquet-thrift/Type.js +12 -0
  306. package/dist/esm/parquetjs/parquet-thrift/Type.js.map +1 -0
  307. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -0
  308. package/dist/esm/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
  309. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js +31 -0
  310. package/dist/esm/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
  311. package/dist/esm/parquetjs/parquet-thrift/index.js +44 -0
  312. package/dist/esm/parquetjs/parquet-thrift/index.js.map +1 -0
  313. package/dist/esm/parquetjs/parser/decoders.js +253 -0
  314. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  315. package/dist/{parquetjs/parser/parquet-envelope-reader.js → esm/parquetjs/parser/parquet-reader.js} +95 -74
  316. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  317. package/dist/esm/parquetjs/schema/declare.js +12 -0
  318. package/dist/esm/parquetjs/schema/declare.js.map +1 -0
  319. package/dist/esm/parquetjs/schema/schema.js +140 -0
  320. package/dist/esm/parquetjs/schema/schema.js.map +1 -0
  321. package/dist/esm/parquetjs/schema/shred.js +228 -0
  322. package/dist/esm/parquetjs/schema/shred.js.map +1 -0
  323. package/dist/esm/parquetjs/schema/types.js +397 -0
  324. package/dist/esm/parquetjs/schema/types.js.map +1 -0
  325. package/dist/esm/parquetjs/utils/file-utils.js +34 -0
  326. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  327. package/dist/esm/parquetjs/utils/read-utils.js +90 -0
  328. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  329. package/dist/esm/workers/parquet-worker.js +4 -0
  330. package/dist/esm/workers/parquet-worker.js.map +1 -0
  331. package/dist/index.d.ts +16 -20
  332. package/dist/index.d.ts.map +1 -1
  333. package/dist/index.js +58 -15
  334. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  335. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  336. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  337. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  338. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  339. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  340. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  341. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  342. package/dist/lib/arrow/convert-schema-from-parquet.js +86 -0
  343. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  344. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  345. package/dist/lib/arrow/convert-schema-to-parquet.js +71 -0
  346. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  347. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  348. package/dist/lib/geo/decode-geo-metadata.js +77 -0
  349. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  350. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  351. package/dist/lib/geo/geoparquet-schema.js +69 -0
  352. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  353. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  354. package/dist/lib/parsers/parse-parquet-to-columns.js +46 -0
  355. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +5 -0
  356. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  357. package/dist/lib/parsers/parse-parquet-to-rows.js +37 -0
  358. package/dist/lib/wasm/encode-parquet-wasm.d.ts +21 -0
  359. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
  360. package/dist/lib/wasm/encode-parquet-wasm.js +30 -0
  361. package/dist/lib/wasm/load-wasm/index.d.ts +2 -0
  362. package/dist/lib/wasm/load-wasm/index.d.ts.map +1 -0
  363. package/dist/lib/wasm/load-wasm/index.js +5 -0
  364. package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts +3 -0
  365. package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
  366. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +38 -0
  367. package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts +3 -0
  368. package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
  369. package/dist/lib/wasm/load-wasm/load-wasm-node.js +31 -0
  370. package/dist/lib/wasm/parse-parquet-wasm.d.ts +10 -0
  371. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
  372. package/dist/lib/wasm/parse-parquet-wasm.js +27 -0
  373. package/dist/parquet-loader.d.ts +6 -15
  374. package/dist/parquet-loader.d.ts.map +1 -1
  375. package/dist/parquet-loader.js +38 -19
  376. package/dist/parquet-wasm-loader.d.ts +23 -0
  377. package/dist/parquet-wasm-loader.d.ts.map +1 -0
  378. package/dist/parquet-wasm-loader.js +27 -0
  379. package/dist/parquet-wasm-writer.d.ts +3 -0
  380. package/dist/parquet-wasm-writer.d.ts.map +1 -0
  381. package/dist/parquet-wasm-writer.js +23 -0
  382. package/dist/parquet-worker.js +27 -25
  383. package/dist/parquet-worker.js.map +3 -3
  384. package/dist/parquet-writer.d.ts +3 -2
  385. package/dist/parquet-writer.d.ts.map +1 -1
  386. package/dist/parquet-writer.js +18 -14
  387. package/dist/parquetjs/codecs/declare.js +2 -2
  388. package/dist/parquetjs/codecs/dictionary.js +12 -10
  389. package/dist/parquetjs/codecs/index.js +54 -22
  390. package/dist/parquetjs/codecs/plain.js +173 -232
  391. package/dist/parquetjs/codecs/rle.js +134 -140
  392. package/dist/parquetjs/compression.d.ts +3 -0
  393. package/dist/parquetjs/compression.d.ts.map +1 -1
  394. package/dist/parquetjs/compression.js +169 -48
  395. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +15 -23
  396. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  397. package/dist/parquetjs/encoder/parquet-encoder.js +484 -0
  398. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +14 -7
  399. package/dist/parquetjs/parquet-thrift/BsonType.js +60 -37
  400. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +209 -215
  401. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +210 -211
  402. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +394 -421
  403. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +102 -89
  404. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +19 -12
  405. package/dist/parquetjs/parquet-thrift/ConvertedType.js +33 -26
  406. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +165 -161
  407. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +228 -234
  408. package/dist/parquetjs/parquet-thrift/DateType.js +60 -37
  409. package/dist/parquetjs/parquet-thrift/DecimalType.js +104 -90
  410. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +121 -112
  411. package/dist/parquetjs/parquet-thrift/Encoding.js +19 -12
  412. package/dist/parquetjs/parquet-thrift/EnumType.js +60 -37
  413. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +14 -7
  414. package/dist/parquetjs/parquet-thrift/FileMetaData.js +253 -263
  415. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +60 -37
  416. package/dist/parquetjs/parquet-thrift/IntType.js +104 -90
  417. package/dist/parquetjs/parquet-thrift/JsonType.js +60 -37
  418. package/dist/parquetjs/parquet-thrift/KeyValue.js +101 -88
  419. package/dist/parquetjs/parquet-thrift/ListType.js +60 -37
  420. package/dist/parquetjs/parquet-thrift/LogicalType.js +366 -449
  421. package/dist/parquetjs/parquet-thrift/MapType.js +60 -37
  422. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +60 -37
  423. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +60 -37
  424. package/dist/parquetjs/parquet-thrift/NullType.js +60 -37
  425. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +96 -80
  426. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +126 -114
  427. package/dist/parquetjs/parquet-thrift/PageHeader.js +218 -231
  428. package/dist/parquetjs/parquet-thrift/PageLocation.js +140 -123
  429. package/dist/parquetjs/parquet-thrift/PageType.js +15 -8
  430. package/dist/parquetjs/parquet-thrift/RowGroup.js +179 -171
  431. package/dist/parquetjs/parquet-thrift/SchemaElement.js +241 -268
  432. package/dist/parquetjs/parquet-thrift/SortingColumn.js +126 -114
  433. package/dist/parquetjs/parquet-thrift/Statistics.js +175 -178
  434. package/dist/parquetjs/parquet-thrift/StringType.js +60 -37
  435. package/dist/parquetjs/parquet-thrift/TimeType.js +105 -91
  436. package/dist/parquetjs/parquet-thrift/TimeUnit.js +124 -119
  437. package/dist/parquetjs/parquet-thrift/TimestampType.js +105 -91
  438. package/dist/parquetjs/parquet-thrift/Type.js +19 -12
  439. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +60 -37
  440. package/dist/parquetjs/parquet-thrift/UUIDType.js +60 -37
  441. package/dist/parquetjs/parquet-thrift/index.js +65 -44
  442. package/dist/parquetjs/parser/decoders.d.ts +2 -2
  443. package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
  444. package/dist/parquetjs/parser/decoders.js +301 -283
  445. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  446. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  447. package/dist/parquetjs/parser/parquet-reader.js +193 -113
  448. package/dist/parquetjs/schema/declare.d.ts +26 -18
  449. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  450. package/dist/parquetjs/schema/declare.js +11 -12
  451. package/dist/parquetjs/schema/schema.d.ts +4 -4
  452. package/dist/parquetjs/schema/schema.d.ts.map +1 -1
  453. package/dist/parquetjs/schema/schema.js +148 -162
  454. package/dist/parquetjs/schema/shred.d.ts +33 -12
  455. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  456. package/dist/parquetjs/schema/shred.js +340 -147
  457. package/dist/parquetjs/schema/types.d.ts +2 -2
  458. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  459. package/dist/parquetjs/schema/types.js +355 -415
  460. package/dist/parquetjs/utils/file-utils.d.ts +5 -4
  461. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  462. package/dist/parquetjs/utils/file-utils.js +37 -28
  463. package/dist/parquetjs/utils/read-utils.js +99 -95
  464. package/dist/workers/parquet-worker.js +5 -4
  465. package/package.json +17 -12
  466. package/src/index.ts +58 -7
  467. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  468. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  469. package/src/lib/arrow/convert-schema-from-parquet.ts +104 -0
  470. package/src/lib/arrow/convert-schema-to-parquet.ts +90 -0
  471. package/src/lib/geo/decode-geo-metadata.ts +108 -0
  472. package/src/lib/geo/geoparquet-schema.ts +69 -0
  473. package/src/lib/parsers/parse-parquet-to-columns.ts +60 -0
  474. package/src/lib/parsers/parse-parquet-to-rows.ts +45 -0
  475. package/src/lib/wasm/encode-parquet-wasm.ts +40 -0
  476. package/src/lib/wasm/load-wasm/index.ts +1 -0
  477. package/src/lib/wasm/load-wasm/load-wasm-browser.ts +15 -0
  478. package/src/lib/wasm/load-wasm/load-wasm-node.ts +5 -0
  479. package/src/lib/wasm/parse-parquet-wasm.ts +42 -0
  480. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  481. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  482. package/src/parquet-loader.ts +30 -3
  483. package/src/parquet-wasm-loader.ts +36 -0
  484. package/src/parquet-wasm-writer.ts +24 -0
  485. package/src/parquet-writer.ts +4 -1
  486. package/src/parquetjs/compression.ts +24 -7
  487. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +33 -38
  488. package/src/parquetjs/parser/decoders.ts +3 -3
  489. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  490. package/src/parquetjs/schema/declare.ts +22 -13
  491. package/src/parquetjs/schema/schema.ts +8 -8
  492. package/src/parquetjs/schema/shred.ts +239 -71
  493. package/src/parquetjs/schema/types.ts +25 -30
  494. package/src/parquetjs/utils/file-utils.ts +3 -4
  495. package/dist/bundle.js.map +0 -1
  496. package/dist/constants.js.map +0 -1
  497. package/dist/index.js.map +0 -1
  498. package/dist/lib/convert-schema.d.ts +0 -8
  499. package/dist/lib/convert-schema.d.ts.map +0 -1
  500. package/dist/lib/convert-schema.js +0 -71
  501. package/dist/lib/convert-schema.js.map +0 -1
  502. package/dist/lib/parse-parquet.d.ts +0 -4
  503. package/dist/lib/parse-parquet.d.ts.map +0 -1
  504. package/dist/lib/parse-parquet.js +0 -28
  505. package/dist/lib/parse-parquet.js.map +0 -1
  506. package/dist/lib/read-array-buffer.d.ts +0 -19
  507. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  508. package/dist/lib/read-array-buffer.js +0 -9
  509. package/dist/lib/read-array-buffer.js.map +0 -1
  510. package/dist/parquet-loader.js.map +0 -1
  511. package/dist/parquet-writer.js.map +0 -1
  512. package/dist/parquetjs/codecs/declare.js.map +0 -1
  513. package/dist/parquetjs/codecs/dictionary.js.map +0 -1
  514. package/dist/parquetjs/codecs/index.js.map +0 -1
  515. package/dist/parquetjs/codecs/plain.js.map +0 -1
  516. package/dist/parquetjs/codecs/rle.js.map +0 -1
  517. package/dist/parquetjs/compression.js.map +0 -1
  518. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  519. package/dist/parquetjs/encoder/writer.js.map +0 -1
  520. package/dist/parquetjs/file.d.ts +0 -10
  521. package/dist/parquetjs/file.d.ts.map +0 -1
  522. package/dist/parquetjs/file.js +0 -80
  523. package/dist/parquetjs/file.js.map +0 -1
  524. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
  525. package/dist/parquetjs/parquet-thrift/BsonType.js.map +0 -1
  526. package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
  527. package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
  528. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
  529. package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
  530. package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
  531. package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
  532. package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
  533. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
  534. package/dist/parquetjs/parquet-thrift/DateType.js.map +0 -1
  535. package/dist/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
  536. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
  537. package/dist/parquetjs/parquet-thrift/Encoding.js.map +0 -1
  538. package/dist/parquetjs/parquet-thrift/EnumType.js.map +0 -1
  539. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
  540. package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
  541. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
  542. package/dist/parquetjs/parquet-thrift/IntType.js.map +0 -1
  543. package/dist/parquetjs/parquet-thrift/JsonType.js.map +0 -1
  544. package/dist/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
  545. package/dist/parquetjs/parquet-thrift/ListType.js.map +0 -1
  546. package/dist/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
  547. package/dist/parquetjs/parquet-thrift/MapType.js.map +0 -1
  548. package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
  549. package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
  550. package/dist/parquetjs/parquet-thrift/NullType.js.map +0 -1
  551. package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
  552. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
  553. package/dist/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
  554. package/dist/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
  555. package/dist/parquetjs/parquet-thrift/PageType.js.map +0 -1
  556. package/dist/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
  557. package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
  558. package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
  559. package/dist/parquetjs/parquet-thrift/Statistics.js.map +0 -1
  560. package/dist/parquetjs/parquet-thrift/StringType.js.map +0 -1
  561. package/dist/parquetjs/parquet-thrift/TimeType.js.map +0 -1
  562. package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
  563. package/dist/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
  564. package/dist/parquetjs/parquet-thrift/Type.js.map +0 -1
  565. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
  566. package/dist/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
  567. package/dist/parquetjs/parquet-thrift/index.js.map +0 -1
  568. package/dist/parquetjs/parser/decoders.js.map +0 -1
  569. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  570. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  571. package/dist/parquetjs/parser/parquet-cursor.js +0 -90
  572. package/dist/parquetjs/parser/parquet-cursor.js.map +0 -1
  573. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  574. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  575. package/dist/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  576. package/dist/parquetjs/parser/parquet-reader.js.map +0 -1
  577. package/dist/parquetjs/schema/declare.js.map +0 -1
  578. package/dist/parquetjs/schema/schema.js.map +0 -1
  579. package/dist/parquetjs/schema/shred.js.map +0 -1
  580. package/dist/parquetjs/schema/types.js.map +0 -1
  581. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  582. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  583. package/dist/parquetjs/utils/buffer-utils.js +0 -12
  584. package/dist/parquetjs/utils/buffer-utils.js.map +0 -1
  585. package/dist/parquetjs/utils/file-utils.js.map +0 -1
  586. package/dist/parquetjs/utils/read-utils.js.map +0 -1
  587. package/dist/workers/parquet-worker.js.map +0 -1
  588. package/src/lib/convert-schema.ts +0 -95
  589. package/src/lib/parse-parquet.ts +0 -27
  590. package/src/lib/read-array-buffer.ts +0 -31
  591. package/src/parquetjs/file.ts +0 -90
  592. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  593. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  594. package/src/parquetjs/utils/buffer-utils.ts +0 -18
  595. /package/dist/{parquetjs → es5/parquetjs}/LICENSE +0 -0
  596. /package/dist/{parquetjs → es5/parquetjs}/modules.d.ts +0 -0
@@ -0,0 +1,910 @@
1
+
2
+ /*
3
+ /*
4
+ * Licensed to the Apache Software Foundation (ASF) under one
5
+ * or more contributor license agreements. See the NOTICE file
6
+ * distributed with this work for additional information
7
+ * regarding copyright ownership. The ASF licenses this file
8
+ * to you under the Apache License, Version 2.0 (the
9
+ * "License"); you may not use this file except in compliance
10
+ * with the License. You may obtain a copy of the License at
11
+ *
12
+ * http://www.apache.org/licenses/LICENSE-2.0
13
+ *
14
+ * Unless required by applicable law or agreed to in writing,
15
+ * software distributed under the License is distributed on an
16
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17
+ * KIND, either express or implied. See the License for the
18
+ * specific language governing permissions and limitations
19
+ * under the License.
20
+ */
21
+ package org.apache.parquet.arrow.schema;
22
+
23
+ import static java.util.Arrays.asList;
24
+ import static java.util.Optional.empty;
25
+ import static java.util.Optional.of;
26
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
27
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
28
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
29
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
30
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
31
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
32
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
33
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
34
+ import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
35
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
36
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
37
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
38
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
39
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
40
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
41
+ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
42
+ import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
43
+ import static org.apache.parquet.schema.Type.Repetition.REPEATED;
44
+ import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
45
+
46
+ import java.util.ArrayList;
47
+ import java.util.List;
48
+ import java.util.Optional;
49
+
50
+ import org.apache.arrow.vector.types.DateUnit;
51
+ import org.apache.arrow.vector.types.FloatingPointPrecision;
52
+ import org.apache.arrow.vector.types.TimeUnit;
53
+ import org.apache.arrow.vector.types.pojo.ArrowType;
54
+ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
55
+ import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
56
+ import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
57
+ import org.apache.arrow.vector.types.pojo.ArrowType.Date;
58
+ import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
59
+ import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
60
+ import org.apache.arrow.vector.types.pojo.ArrowType.Int;
61
+ import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
62
+ import org.apache.arrow.vector.types.pojo.ArrowType.Null;
63
+ import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
64
+ import org.apache.arrow.vector.types.pojo.ArrowType.Time;
65
+ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
66
+ import org.apache.arrow.vector.types.pojo.ArrowType.Union;
67
+ import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
68
+ import org.apache.arrow.vector.types.pojo.Field;
69
+ import org.apache.arrow.vector.types.pojo.Schema;
70
+ import org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping;
71
+ import org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping;
72
+ import org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping;
73
+ import org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping;
74
+ import org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping;
75
+ import org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping;
76
+ import org.apache.parquet.schema.GroupType;
77
+ import org.apache.parquet.schema.LogicalTypeAnnotation;
78
+ import org.apache.parquet.schema.MessageType;
79
+ import org.apache.parquet.schema.PrimitiveType;
80
+ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
81
+ import org.apache.parquet.schema.Type;
82
+ import org.apache.parquet.schema.Type.Repetition;
83
+ import org.apache.parquet.schema.Types;
84
+ import org.apache.parquet.schema.Types.GroupBuilder;
85
+
86
+ /**
87
+ * Logic to convert Parquet and Arrow Schemas back and forth and maintain the mapping
88
+ */
89
+ public class SchemaConverter {
90
+
91
+ // Indicates if Int96 should be converted to Arrow Timestamp
92
+ private final boolean convertInt96ToArrowTimestamp;
93
+
94
+ /**
95
+ * For when we'll need this to be configurable
96
+ */
97
+ public SchemaConverter() {
98
+ this(false);
99
+ }
100
+
101
+ // TODO(PARQUET-1511): pass the parameters in a configuration object
102
+ public SchemaConverter(final boolean convertInt96ToArrowTimestamp) {
103
+ this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp;
104
+ }
105
+
106
+ /**
107
+ * Creates a Parquet Schema from an Arrow one and returns the mapping
108
+ * @param arrowSchema the provided Arrow Schema
109
+ * @return the mapping between the 2
110
+ */
111
+ public SchemaMapping fromArrow(Schema arrowSchema) {
112
+ List<Field> fields = arrowSchema.getFields();
113
+ List<TypeMapping> parquetFields = fromArrow(fields);
114
+ MessageType parquetType = addToBuilder(parquetFields, Types.buildMessage()).named("root");
115
+ return new SchemaMapping(arrowSchema, parquetType, parquetFields);
116
+ }
117
+
118
+ private <T> GroupBuilder<T> addToBuilder(List<TypeMapping> parquetFields, GroupBuilder<T> builder) {
119
+ for (TypeMapping type : parquetFields) {
120
+ builder = builder.addField(type.getParquetType());
121
+ }
122
+ return builder;
123
+ }
124
+
125
+ private List<TypeMapping> fromArrow(List<Field> fields) {
126
+ List<TypeMapping> result = new ArrayList<>(fields.size());
127
+ for (Field field : fields) {
128
+ result.add(fromArrow(field));
129
+ }
130
+ return result;
131
+ }
132
+
133
+ private TypeMapping fromArrow(final Field field) {
134
+ return fromArrow(field, field.getName());
135
+ }
136
+
137
+ /**
138
+ * @param field arrow field
139
+ * @param fieldName overrides field.getName()
140
+ * @return mapping
141
+ */
142
+ private TypeMapping fromArrow(final Field field, final String fieldName) {
143
+ final List<Field> children = field.getChildren();
144
+ return field.getType().accept(new ArrowTypeVisitor<TypeMapping>() {
145
+
146
+ @Override
147
+ public TypeMapping visit(Null type) {
148
+ // TODO(PARQUET-757): null original type
149
+ return primitive(BINARY);
150
+ }
151
+
152
+ @Override
153
+ public TypeMapping visit(Struct type) {
154
+ List<TypeMapping> parquetTypes = fromArrow(children);
155
+ return new StructTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
156
+ }
157
+
158
+ @Override
159
+ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
160
+ return createListTypeMapping();
161
+ }
162
+
163
+ @Override
164
+ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
165
+ return createListTypeMapping();
166
+ }
167
+
168
+ private ListTypeMapping createListTypeMapping() {
169
+ if (children.size() != 1) {
170
+ throw new IllegalArgumentException("list fields must have exactly one child: " + field);
171
+ }
172
+ TypeMapping parquetChild = fromArrow(children.get(0), "element");
173
+ GroupType list = Types.optionalList().element(parquetChild.getParquetType()).named(fieldName);
174
+ return new ListTypeMapping(field, new List3Levels(list), parquetChild);
175
+ }
176
+
177
+ @Override
178
+ public TypeMapping visit(Union type) {
179
+ // TODO(PARQUET-756): add Union OriginalType
180
+ List<TypeMapping> parquetTypes = fromArrow(children);
181
+ return new UnionTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
182
+ }
183
+
184
+ @Override
185
+ public TypeMapping visit(Int type) {
186
+ boolean signed = type.getIsSigned();
187
+ switch (type.getBitWidth()) {
188
+ case 8:
189
+ case 16:
190
+ case 32:
191
+ return primitive(INT32, intType(type.getBitWidth(), signed));
192
+ case 64:
193
+ return primitive(INT64, intType(64, signed));
194
+ default:
195
+ throw new IllegalArgumentException("Illegal int type: " + field);
196
+ }
197
+ }
198
+
199
+ @Override
200
+ public TypeMapping visit(FloatingPoint type) {
201
+ switch (type.getPrecision()) {
202
+ case HALF:
203
+ // TODO(PARQUET-757): original type HalfFloat
204
+ return primitive(FLOAT);
205
+ case SINGLE:
206
+ return primitive(FLOAT);
207
+ case DOUBLE:
208
+ return primitive(DOUBLE);
209
+ default:
210
+ throw new IllegalArgumentException("Illegal float type: " + field);
211
+ }
212
+ }
213
+
214
+ @Override
215
+ public TypeMapping visit(Utf8 type) {
216
+ return primitive(BINARY, stringType());
217
+ }
218
+
219
+ @Override
220
+ public TypeMapping visit(Binary type) {
221
+ return primitive(BINARY);
222
+ }
223
+
224
+ @Override
225
+ public TypeMapping visit(Bool type) {
226
+ return primitive(BOOLEAN);
227
+ }
228
+
229
+ /**
230
+ * See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
231
+ * @param type an arrow decimal type
232
+ * @return a mapping from the arrow decimal to the Parquet type
233
+ */
234
+ @Override
235
+ public TypeMapping visit(Decimal type) {
236
+ int precision = type.getPrecision();
237
+ int scale = type.getScale();
238
+ if (1 <= precision && precision <= 9) {
239
+ return decimal(INT32, precision, scale);
240
+ } else if (1 <= precision && precision <= 18) {
241
+ return decimal(INT64, precision, scale);
242
+ } else {
243
+ // Better: FIXED_LENGTH_BYTE_ARRAY with length
244
+ return decimal(BINARY, precision, scale);
245
+ }
246
+ }
247
+
248
+ @Override
249
+ public TypeMapping visit(Date type) {
250
+ return primitive(INT32, dateType());
251
+ }
252
+
253
+ @Override
254
+ public TypeMapping visit(Time type) {
255
+ int bitWidth = type.getBitWidth();
256
+ TimeUnit timeUnit = type.getUnit();
257
+ if (bitWidth == 32 && timeUnit == TimeUnit.MILLISECOND) {
258
+ return primitive(INT32, timeType(false, MILLIS));
259
+ } else if (bitWidth == 64 && timeUnit == TimeUnit.MICROSECOND) {
260
+ return primitive(INT64, timeType(false, MICROS));
261
+ } else if (bitWidth == 64 && timeUnit == TimeUnit.NANOSECOND) {
262
+ return primitive(INT64, timeType(false, NANOS));
263
+ }
264
+ throw new UnsupportedOperationException("Unsupported type " + type);
265
+ }
266
+
267
+ @Override
268
+ public TypeMapping visit(Timestamp type) {
269
+ TimeUnit timeUnit = type.getUnit();
270
+ if (timeUnit == TimeUnit.MILLISECOND) {
271
+ return primitive(INT64, timestampType(isUtcNormalized(type), MILLIS));
272
+ } else if (timeUnit == TimeUnit.MICROSECOND) {
273
+ return primitive(INT64, timestampType(isUtcNormalized(type), MICROS));
274
+ } else if (timeUnit == TimeUnit.NANOSECOND) {
275
+ return primitive(INT64, timestampType(isUtcNormalized(type), NANOS));
276
+ }
277
+ throw new UnsupportedOperationException("Unsupported type " + type);
278
+ }
279
+
280
+ private boolean isUtcNormalized(Timestamp timestamp) {
281
+ String timeZone = timestamp.getTimezone();
282
+ return timeZone != null && !timeZone.isEmpty();
283
+ }
284
+
285
+ /**
286
+ * See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval
287
+ */
288
+ @Override
289
+ public TypeMapping visit(Interval type) {
290
+ // TODO(PARQUET-675): fix interval original types
291
+ return primitiveFLBA(12, LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance());
292
+ }
293
+
294
+ @Override
295
+ public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
296
+ return primitive(BINARY);
297
+ }
298
+
299
+ private TypeMapping mapping(PrimitiveType parquetType) {
300
+ return new PrimitiveTypeMapping(field, parquetType);
301
+ }
302
+
303
+ private TypeMapping decimal(PrimitiveTypeName type, int precision, int scale) {
304
+ return mapping(Types.optional(type).as(decimalType(scale, precision)).named(fieldName));
305
+ }
306
+
307
+ private TypeMapping primitive(PrimitiveTypeName type) {
308
+ return mapping(Types.optional(type).named(fieldName));
309
+ }
310
+
311
+ private TypeMapping primitive(PrimitiveTypeName type, LogicalTypeAnnotation otype) {
312
+ return mapping(Types.optional(type).as(otype).named(fieldName));
313
+ }
314
+
315
+ private TypeMapping primitiveFLBA(int length, LogicalTypeAnnotation otype) {
316
+ return mapping(Types.optional(FIXED_LEN_BYTE_ARRAY).length(length).as(otype).named(fieldName));
317
+ }
318
+ });
319
+ }
320
+
321
+ /**
322
+ * Creates an Arrow Schema from an Parquet one and returns the mapping
323
+ * @param parquetSchema the provided Parquet Schema
324
+ * @return the mapping between the 2
325
+ */
326
+ public SchemaMapping fromParquet(MessageType parquetSchema) {
327
+ List<Type> fields = parquetSchema.getFields();
328
+ List<TypeMapping> mappings = fromParquet(fields);
329
+ List<Field> arrowFields = fields(mappings);
330
+ return new SchemaMapping(new Schema(arrowFields), parquetSchema, mappings);
331
+ }
332
+
333
+ private List<Field> fields(List<TypeMapping> mappings) {
334
+ List<Field> result = new ArrayList<>(mappings.size());
335
+ for (TypeMapping typeMapping : mappings) {
336
+ result.add(typeMapping.getArrowField());
337
+ }
338
+ return result;
339
+ }
340
+
341
+ private List<TypeMapping> fromParquet(List<Type> fields) {
342
+ List<TypeMapping> result = new ArrayList<>(fields.size());
343
+ for (Type type : fields) {
344
+ result.add(fromParquet(type));
345
+ }
346
+ return result;
347
+ }
348
+
349
+ private TypeMapping fromParquet(Type type) {
350
+ return fromParquet(type, type.getName(), type.getRepetition());
351
+ }
352
+
353
+ /**
354
+ * @param type parquet type
355
+ * @param name overrides parquet.getName)
356
+ * @param repetition overrides parquet.getRepetition()
357
+ * @return a type mapping from the Parquet type to an Arrow type
358
+ */
359
+ private TypeMapping fromParquet(Type type, String name, Repetition repetition) {
360
+ if (repetition == REPEATED) {
361
+ // case where we have a repeated field that is not in a List/Map
362
+ TypeMapping child = fromParquet(type, null, REQUIRED);
363
+ Field arrowField = new Field(name, false, new ArrowType.List(), asList(child.getArrowField()));
364
+ return new RepeatedTypeMapping(arrowField, type, child);
365
+ }
366
+ if (type.isPrimitive()) {
367
+ return fromParquetPrimitive(type.asPrimitiveType(), name);
368
+ } else {
369
+ return fromParquetGroup(type.asGroupType(), name);
370
+ }
371
+ }
372
+
373
+ /**
374
+ * @param type parquet types
375
+ * @param name overrides parquet.getName()
376
+ * @return the mapping
377
+ */
378
+ private TypeMapping fromParquetGroup(GroupType type, String name) {
379
+ LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
380
+ if (logicalType == null) {
381
+ List<TypeMapping> typeMappings = fromParquet(type.getFields());
382
+ Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
383
+ return new StructTypeMapping(arrowField, type, typeMappings);
384
+ } else {
385
+ return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
386
+ @Override
387
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
388
+ List3Levels list3Levels = new List3Levels(type);
389
+ TypeMapping child = fromParquet(list3Levels.getElement(), null, list3Levels.getElement().getRepetition());
390
+ Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new ArrowType.List(), asList(child.getArrowField()));
391
+ return of(new ListTypeMapping(arrowField, list3Levels, child));
392
+ }
393
+ }).orElseThrow(() -> new UnsupportedOperationException("Unsupported type " + type));
394
+ }
395
+ }
396
+
397
+ /**
398
+ * @param type parquet types
399
+ * @param name overrides parquet.getName()
400
+ * @return the mapping
401
+ */
402
+ private TypeMapping fromParquetPrimitive(final PrimitiveType type, final String name) {
403
+ return type.getPrimitiveTypeName().convert(new PrimitiveType.PrimitiveTypeNameConverter<TypeMapping, RuntimeException>() {
404
+
405
+ private TypeMapping field(ArrowType arrowType) {
406
+ Field field = new Field(name, type.isRepetition(OPTIONAL), arrowType, null);
407
+ return new PrimitiveTypeMapping(field, type);
408
+ }
409
+
410
+ @Override
411
+ public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
412
+ return field(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
413
+ }
414
+
415
+ @Override
416
+ public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
417
+ return field(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
418
+ }
419
+
420
+ @Override
421
+ public TypeMapping convertINT32(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
422
+ LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
423
+ if (logicalTypeAnnotation == null) {
424
+ return integer(32, true);
425
+ }
426
+ return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
427
+ @Override
428
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
429
+ return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
430
+ }
431
+
432
+ @Override
433
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
434
+ return of(field(new ArrowType.Date(DateUnit.DAY)));
435
+ }
436
+
437
+ @Override
438
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
439
+ return timeLogicalType.getUnit() == MILLIS ? of(field(new ArrowType.Time(TimeUnit.MILLISECOND, 32))) : empty();
440
+ }
441
+
442
+ @Override
443
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
444
+ if (intLogicalType.getBitWidth() == 64) {
445
+ return empty();
446
+ }
447
+ return of(integer(intLogicalType.getBitWidth(), intLogicalType.isSigned()));
448
+ }
449
+ }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
450
+ }
451
+
452
+ @Override
453
+ public TypeMapping convertINT64(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
454
+ LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
455
+ if (logicalTypeAnnotation == null) {
456
+ return integer(64, true);
457
+ }
458
+
459
+ return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
460
+ @Override
461
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
462
+ return of(field(new ArrowType.Date(DateUnit.DAY)));
463
+ }
464
+
465
+ @Override
466
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
467
+ return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
468
+ }
469
+
470
+ @Override
471
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
472
+ return of(integer(intLogicalType.getBitWidth(), intLogicalType.isSigned()));
473
+ }
474
+
475
+ @Override
476
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
477
+ if (timeLogicalType.getUnit() == MICROS) {
478
+ return of(field(new ArrowType.Time(TimeUnit.MICROSECOND, 64)));
479
+ } else if (timeLogicalType.getUnit() == NANOS) {
480
+ return of(field(new ArrowType.Time(TimeUnit.NANOSECOND, 64)));
481
+ }
482
+ return empty();
483
+ }
484
+
485
+ @Override
486
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
487
+ switch (timestampLogicalType.getUnit()) {
488
+ case MICROS:
489
+ return of(field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, getTimeZone(timestampLogicalType))));
490
+ case MILLIS:
491
+ return of(field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, getTimeZone(timestampLogicalType))));
492
+ case NANOS:
493
+ return of(field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, getTimeZone(timestampLogicalType))));
494
+ }
495
+ return empty();
496
+ }
497
+
498
+ private String getTimeZone(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
499
+ return timestampLogicalType.isAdjustedToUTC() ? "UTC" : null;
500
+ }
501
+ }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
502
+ }
503
+
504
+ @Override
505
+ public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
506
+ if (convertInt96ToArrowTimestamp) {
507
+ return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null));
508
+ } else {
509
+ return field(new ArrowType.Binary());
510
+ }
511
+ }
512
+
513
+ @Override
514
+ public TypeMapping convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
515
+ LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
516
+ if (logicalTypeAnnotation == null) {
517
+ return field(new ArrowType.Binary());
518
+ }
519
+
520
+ return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
521
+ @Override
522
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
523
+ return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
524
+ }
525
+ }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
526
+ }
527
+
528
+ @Override
529
+ public TypeMapping convertBOOLEAN(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
530
+ return field(new ArrowType.Bool());
531
+ }
532
+
533
+ @Override
534
+ public TypeMapping convertBINARY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
535
+ LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
536
+ if (logicalTypeAnnotation == null) {
537
+ return field(new ArrowType.Binary());
538
+ }
539
+ return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
540
+ @Override
541
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
542
+ return of(field(new ArrowType.Utf8()));
543
+ }
544
+
545
+ @Override
546
+ public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
547
+ return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
548
+ }
549
+ }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
550
+ }
551
+
552
+ private TypeMapping decimal(int precision, int scale) {
553
+ return field(new ArrowType.Decimal(precision, scale));
554
+ }
555
+
556
+ private TypeMapping integer(int width, boolean signed) {
557
+ return field(new ArrowType.Int(width, signed));
558
+ }
559
+ });
560
+ }
561
+
562
+ /**
563
+ * Maps a Parquet and Arrow Schema
564
+ * For now does not validate primitive type compatibility
565
+ * @param arrowSchema an Arrow schema
566
+ * @param parquetSchema a Parquet message type
567
+ * @return the mapping between the 2
568
+ */
569
+ public SchemaMapping map(Schema arrowSchema, MessageType parquetSchema) {
570
+ List<TypeMapping> children = map(arrowSchema.getFields(), parquetSchema.getFields());
571
+ return new SchemaMapping(arrowSchema, parquetSchema, children);
572
+ }
573
+
574
+ private List<TypeMapping> map(List<Field> arrowFields, List<Type> parquetFields) {
575
+ if (arrowFields.size() != parquetFields.size()) {
576
+ throw new IllegalArgumentException("Can not map schemas as sizes differ: " + arrowFields + " != " + parquetFields);
577
+ }
578
+ List<TypeMapping> result = new ArrayList<>(arrowFields.size());
579
+ for (int i = 0; i < arrowFields.size(); i++) {
580
+ Field arrowField = arrowFields.get(i);
581
+ Type parquetField = parquetFields.get(i);
582
+ result.add(map(arrowField, parquetField));
583
+ }
584
+ return result;
585
+ }
586
+
587
+ private TypeMapping map(final Field arrowField, final Type parquetField) {
588
+ return arrowField.getType().accept(new ArrowTypeVisitor<TypeMapping>() {
589
+
590
+ @Override
591
+ public TypeMapping visit(Null type) {
592
+ if (!parquetField.isRepetition(OPTIONAL)) {
593
+ throw new IllegalArgumentException("Parquet type can't be null: " + parquetField);
594
+ }
595
+ return primitive();
596
+ }
597
+
598
+ @Override
599
+ public TypeMapping visit(Struct type) {
600
+ if (parquetField.isPrimitive()) {
601
+ throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
602
+ }
603
+ GroupType groupType = parquetField.asGroupType();
604
+ return new StructTypeMapping(arrowField, groupType, map(arrowField.getChildren(), groupType.getFields()));
605
+ }
606
+
607
+ @Override
608
+ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
609
+ return createListTypeMapping(type);
610
+ }
611
+
612
+ @Override
613
+ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
614
+ return createListTypeMapping(type);
615
+ }
616
+
617
+ private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
618
+ if (arrowField.getChildren().size() != 1) {
619
+ throw new IllegalArgumentException("Invalid list type: " + type);
620
+ }
621
+ Field arrowChild = arrowField.getChildren().get(0);
622
+ if (parquetField.isRepetition(REPEATED)) {
623
+ return new RepeatedTypeMapping(arrowField, parquetField, map(arrowChild, parquetField));
624
+ }
625
+ if (parquetField.isPrimitive()) {
626
+ throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
627
+ }
628
+ List3Levels list3Levels = new List3Levels(parquetField.asGroupType());
629
+ if (arrowField.getChildren().size() != 1) {
630
+ throw new IllegalArgumentException("invalid arrow list: " + arrowField);
631
+ }
632
+ return new ListTypeMapping(arrowField, list3Levels, map(arrowChild, list3Levels.getElement()));
633
+ }
634
+
635
+ @Override
636
+ public TypeMapping visit(Union type) {
637
+ if (parquetField.isPrimitive()) {
638
+ throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
639
+ }
640
+ GroupType groupType = parquetField.asGroupType();
641
+ return new UnionTypeMapping(arrowField, groupType, map(arrowField.getChildren(), groupType.getFields()));
642
+ }
643
+
644
+ @Override
645
+ public TypeMapping visit(Int type) {
646
+ return primitive();
647
+ }
648
+
649
+ @Override
650
+ public TypeMapping visit(FloatingPoint type) {
651
+ return primitive();
652
+ }
653
+
654
+ @Override
655
+ public TypeMapping visit(Utf8 type) {
656
+ return primitive();
657
+ }
658
+
659
+ @Override
660
+ public TypeMapping visit(Binary type) {
661
+ return primitive();
662
+ }
663
+
664
+ @Override
665
+ public TypeMapping visit(Bool type) {
666
+ return primitive();
667
+ }
668
+
669
+ @Override
670
+ public TypeMapping visit(Decimal type) {
671
+ return primitive();
672
+ }
673
+
674
+ @Override
675
+ public TypeMapping visit(Date type) {
676
+ return primitive();
677
+ }
678
+
679
+ @Override
680
+ public TypeMapping visit(Time type) {
681
+ return primitive();
682
+ }
683
+
684
+ @Override
685
+ public TypeMapping visit(Timestamp type) {
686
+ return primitive();
687
+ }
688
+
689
+ @Override
690
+ public TypeMapping visit(Interval type) {
691
+ return primitive();
692
+ }
693
+
694
+ @Override
695
+ public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
696
+ return primitive();
697
+ }
698
+
699
+ private TypeMapping primitive() {
700
+ if (!parquetField.isPrimitive()) {
701
+ throw new IllegalArgumentException("Can not map schemas as one is primitive and the other is not: " + arrowField + " != " + parquetField);
702
+ }
703
+ return new PrimitiveTypeMapping(arrowField, parquetField.asPrimitiveType());
704
+ }
705
+ });
706
+ }
707
+ }
708
+
709
+ /*
710
+ * Licensed to the Apache Software Foundation (ASF) under one
711
+ * or more contributor license agreements. See the NOTICE file
712
+ * distributed with this work for additional information
713
+ * regarding copyright ownership. The ASF licenses this file
714
+ * to you under the Apache License, Version 2.0 (the
715
+ * "License"); you may not use this file except in compliance
716
+ * with the License. You may obtain a copy of the License at
717
+ *
718
+ * http://www.apache.org/licenses/LICENSE-2.0
719
+ *
720
+ * Unless required by applicable law or agreed to in writing,
721
+ * software distributed under the License is distributed on an
722
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
723
+ * KIND, either express or implied. See the License for the
724
+ * specific language governing permissions and limitations
725
+ * under the License.
726
+ */
727
+ package org.apache.parquet.arrow.schema;
728
+
729
+ import static java.util.Arrays.asList;
730
+
731
+ import java.util.Collections;
732
+ import java.util.List;
733
+
734
+ import org.apache.arrow.vector.types.pojo.Field;
735
+ import org.apache.arrow.vector.types.pojo.Schema;
736
+ import org.apache.parquet.schema.GroupType;
737
+ import org.apache.parquet.schema.MessageType;
738
+ import org.apache.parquet.schema.PrimitiveType;
739
+ import org.apache.parquet.schema.Type;
740
+
741
+ /**
742
+ * The mapping between an Arrow and a Parquet schema
743
+ * @see SchemaConverter
744
+ */
745
+ public class SchemaMapping {
746
+
747
+ private final Schema arrowSchema;
748
+ private final MessageType parquetSchema;
749
+ private final List<TypeMapping> children;
750
+
751
+ SchemaMapping(Schema arrowSchema, MessageType parquetSchema, List<TypeMapping> children) {
752
+ super();
753
+ this.arrowSchema = arrowSchema;
754
+ this.parquetSchema = parquetSchema;
755
+ this.children = Collections.unmodifiableList(children);
756
+ }
757
+
758
+ public Schema getArrowSchema() {
759
+ return arrowSchema;
760
+ }
761
+
762
+ public MessageType getParquetSchema() {
763
+ return parquetSchema;
764
+ }
765
+
766
+ /**
767
+ * @return mapping between individual fields of each of the 2 schemas (should be the same width)
768
+ */
769
+ public List<TypeMapping> getChildren() {
770
+ return children;
771
+ }
772
+
773
+ /**
774
+ * To traverse a schema mapping
775
+ * @param <T> the Java return type of the visitor
776
+ */
777
+ public interface TypeMappingVisitor<T> {
778
+ T visit(PrimitiveTypeMapping primitiveTypeMapping);
779
+ T visit(StructTypeMapping structTypeMapping);
780
+ T visit(UnionTypeMapping unionTypeMapping);
781
+ T visit(ListTypeMapping listTypeMapping);
782
+ T visit(RepeatedTypeMapping repeatedTypeMapping);
783
+ }
784
+
785
+ /**
786
+ * Mapping between an Arrow and a Parquet types
787
+ */
788
+ public abstract static class TypeMapping {
789
+
790
+ private final Field arrowField;
791
+ private final Type parquetType;
792
+ private List<TypeMapping> children;
793
+
794
+ TypeMapping(Field arrowField, Type parquetType, List<TypeMapping> children) {
795
+ super();
796
+ this.arrowField = arrowField;
797
+ this.parquetType = parquetType;
798
+ this.children = children;
799
+ }
800
+
801
+ public Field getArrowField() {
802
+ return arrowField;
803
+ }
804
+
805
+ public Type getParquetType() {
806
+ return parquetType;
807
+ }
808
+
809
+ public List<TypeMapping> getChildren() {
810
+ return children;
811
+ }
812
+
813
+ public abstract <T> T accept(TypeMappingVisitor<T> visitor);
814
+
815
+ }
816
+
817
+ /**
818
+ * mapping between two primitive types
819
+ */
820
+ public static class PrimitiveTypeMapping extends TypeMapping {
821
+ public PrimitiveTypeMapping(Field arrowField, PrimitiveType parquetType) {
822
+ super(arrowField, parquetType, Collections.<TypeMapping>emptyList());
823
+ }
824
+
825
+ @Override
826
+ public <T> T accept(TypeMappingVisitor<T> visitor) {
827
+ return visitor.visit(this);
828
+ }
829
+ }
830
+
831
+ /**
832
+ * mapping of a struct type
833
+ */
834
+ public static class StructTypeMapping extends TypeMapping {
835
+ public StructTypeMapping(Field arrowField, GroupType parquetType, List<TypeMapping> children) {
836
+ super(arrowField, parquetType, children);
837
+ }
838
+
839
+ @Override
840
+ public <T> T accept(TypeMappingVisitor<T> visitor) {
841
+ return visitor.visit(this);
842
+ }
843
+ }
844
+
845
+ /**
846
+ * mapping of a union type
847
+ */
848
+ public static class UnionTypeMapping extends TypeMapping {
849
+ public UnionTypeMapping(Field arrowField, GroupType parquetType, List<TypeMapping> children) {
850
+ super(arrowField, parquetType, children);
851
+ }
852
+
853
+ @Override
854
+ public <T> T accept(TypeMappingVisitor<T> visitor) {
855
+ return visitor.visit(this);
856
+ }
857
+ }
858
+
859
+ /**
860
+ * mapping of a List type and standard 3-level List annotated Parquet type
861
+ */
862
+ public static class ListTypeMapping extends TypeMapping {
863
+ private final List3Levels list3Levels;
864
+ private final TypeMapping child;
865
+
866
+ public ListTypeMapping(Field arrowField, List3Levels list3Levels, TypeMapping child) {
867
+ super(arrowField, list3Levels.getList(), asList(child));
868
+ this.list3Levels = list3Levels;
869
+ this.child = child;
870
+ if (list3Levels.getElement() != child.getParquetType()) {
871
+ throw new IllegalArgumentException(list3Levels + " <=> " + child);
872
+ }
873
+ }
874
+
875
+ public List3Levels getList3Levels() {
876
+ return list3Levels;
877
+ }
878
+
879
+ public TypeMapping getChild() {
880
+ return child;
881
+ }
882
+
883
+ @Override
884
+ public <T> T accept(TypeMappingVisitor<T> visitor) {
885
+ return visitor.visit(this);
886
+ }
887
+ }
888
+
889
+ /**
890
+ * mapping of a List type and repeated Parquet field (non-list annotated)
891
+ */
892
+ public static class RepeatedTypeMapping extends TypeMapping {
893
+ private final TypeMapping child;
894
+
895
+ public RepeatedTypeMapping(Field arrowField, Type parquetType, TypeMapping child) {
896
+ super(arrowField, parquetType, asList(child));
897
+ this.child = child;
898
+ }
899
+
900
+ public TypeMapping getChild() {
901
+ return child;
902
+ }
903
+
904
+ @Override
905
+ public <T> T accept(TypeMappingVisitor<T> visitor) {
906
+ return visitor.visit(this);
907
+ }
908
+ }
909
+ }
910
+ */