@loaders.gl/parquet 4.2.0-alpha.4 → 4.2.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/dist/index.cjs +385 -598
  2. package/dist/index.cjs.map +7 -0
  3. package/dist/index.d.ts +12 -12
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +6 -1
  6. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -2
  7. package/dist/lib/arrow/convert-row-group-to-columns.js +8 -6
  8. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +3 -3
  9. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
  10. package/dist/lib/arrow/convert-schema-from-parquet.js +71 -82
  11. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +1 -1
  12. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
  13. package/dist/lib/arrow/convert-schema-to-parquet.js +65 -35
  14. package/dist/lib/constants.js +15 -2
  15. package/dist/lib/parsers/get-parquet-schema.d.ts +1 -1
  16. package/dist/lib/parsers/get-parquet-schema.d.ts.map +1 -1
  17. package/dist/lib/parsers/get-parquet-schema.js +9 -7
  18. package/dist/lib/parsers/parse-geoparquet.d.ts +1 -1
  19. package/dist/lib/parsers/parse-geoparquet.d.ts.map +1 -1
  20. package/dist/lib/parsers/parse-geoparquet.js +47 -45
  21. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +1 -1
  22. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  23. package/dist/lib/parsers/parse-parquet-to-columns.js +36 -25
  24. package/dist/lib/parsers/parse-parquet.d.ts +1 -1
  25. package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
  26. package/dist/lib/parsers/parse-parquet.js +62 -50
  27. package/dist/lib/wasm/encode-parquet-wasm.d.ts +1 -1
  28. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -1
  29. package/dist/lib/wasm/encode-parquet-wasm.js +49 -12
  30. package/dist/lib/wasm/load-wasm.js +13 -10
  31. package/dist/lib/wasm/parse-parquet-wasm.d.ts +1 -1
  32. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
  33. package/dist/lib/wasm/parse-parquet-wasm.js +19 -15
  34. package/dist/parquet-loader.d.ts +1 -1
  35. package/dist/parquet-loader.d.ts.map +1 -1
  36. package/dist/parquet-loader.js +78 -65
  37. package/dist/parquet-wasm-loader.js +25 -28
  38. package/dist/parquet-wasm-writer.js +19 -23
  39. package/dist/parquet-writer.js +17 -13
  40. package/dist/parquetjs/codecs/declare.d.ts +1 -1
  41. package/dist/parquetjs/codecs/declare.d.ts.map +1 -1
  42. package/dist/parquetjs/codecs/declare.js +0 -1
  43. package/dist/parquetjs/codecs/dictionary.js +4 -8
  44. package/dist/parquetjs/codecs/index.d.ts +3 -3
  45. package/dist/parquetjs/codecs/index.d.ts.map +1 -1
  46. package/dist/parquetjs/codecs/index.js +20 -17
  47. package/dist/parquetjs/codecs/plain.d.ts +2 -2
  48. package/dist/parquetjs/codecs/plain.d.ts.map +1 -1
  49. package/dist/parquetjs/codecs/plain.js +166 -162
  50. package/dist/parquetjs/codecs/rle.d.ts +2 -2
  51. package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
  52. package/dist/parquetjs/codecs/rle.js +124 -105
  53. package/dist/parquetjs/compression.d.ts +1 -1
  54. package/dist/parquetjs/compression.d.ts.map +1 -1
  55. package/dist/parquetjs/compression.js +157 -43
  56. package/dist/parquetjs/encoder/parquet-encoder.d.ts +3 -3
  57. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
  58. package/dist/parquetjs/encoder/parquet-encoder.js +420 -275
  59. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +12 -7
  60. package/dist/parquetjs/parquet-thrift/BsonType.js +31 -27
  61. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts +1 -1
  62. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -1
  63. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +182 -166
  64. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts +1 -1
  65. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -1
  66. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +183 -166
  67. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts +6 -6
  68. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -1
  69. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +372 -333
  70. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts +1 -1
  71. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -1
  72. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +74 -68
  73. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +17 -12
  74. package/dist/parquetjs/parquet-thrift/ConvertedType.js +31 -26
  75. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts +2 -2
  76. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -1
  77. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +141 -124
  78. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts +2 -2
  79. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -1
  80. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +205 -181
  81. package/dist/parquetjs/parquet-thrift/DateType.js +31 -27
  82. package/dist/parquetjs/parquet-thrift/DecimalType.js +78 -69
  83. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts +1 -1
  84. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -1
  85. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +96 -86
  86. package/dist/parquetjs/parquet-thrift/Encoding.js +17 -12
  87. package/dist/parquetjs/parquet-thrift/EnumType.js +31 -27
  88. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +12 -7
  89. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts +4 -4
  90. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -1
  91. package/dist/parquetjs/parquet-thrift/FileMetaData.js +224 -205
  92. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +31 -27
  93. package/dist/parquetjs/parquet-thrift/IntType.js +78 -69
  94. package/dist/parquetjs/parquet-thrift/JsonType.js +31 -27
  95. package/dist/parquetjs/parquet-thrift/KeyValue.js +75 -67
  96. package/dist/parquetjs/parquet-thrift/ListType.js +31 -27
  97. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts +13 -13
  98. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -1
  99. package/dist/parquetjs/parquet-thrift/LogicalType.js +338 -344
  100. package/dist/parquetjs/parquet-thrift/MapType.js +31 -27
  101. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +31 -27
  102. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +31 -27
  103. package/dist/parquetjs/parquet-thrift/NullType.js +31 -27
  104. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts +1 -1
  105. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -1
  106. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +67 -60
  107. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts +2 -2
  108. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -1
  109. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +101 -88
  110. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts +5 -5
  111. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -1
  112. package/dist/parquetjs/parquet-thrift/PageHeader.js +193 -175
  113. package/dist/parquetjs/parquet-thrift/PageLocation.js +111 -96
  114. package/dist/parquetjs/parquet-thrift/PageType.js +13 -8
  115. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts +2 -2
  116. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -1
  117. package/dist/parquetjs/parquet-thrift/RowGroup.js +149 -133
  118. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts +4 -4
  119. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -1
  120. package/dist/parquetjs/parquet-thrift/SchemaElement.js +221 -205
  121. package/dist/parquetjs/parquet-thrift/SortingColumn.js +101 -88
  122. package/dist/parquetjs/parquet-thrift/Statistics.js +149 -137
  123. package/dist/parquetjs/parquet-thrift/StringType.js +31 -27
  124. package/dist/parquetjs/parquet-thrift/TimeType.d.ts +1 -1
  125. package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -1
  126. package/dist/parquetjs/parquet-thrift/TimeType.js +78 -69
  127. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts +2 -2
  128. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -1
  129. package/dist/parquetjs/parquet-thrift/TimeUnit.js +96 -91
  130. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts +1 -1
  131. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -1
  132. package/dist/parquetjs/parquet-thrift/TimestampType.js +78 -69
  133. package/dist/parquetjs/parquet-thrift/Type.js +17 -12
  134. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -27
  135. package/dist/parquetjs/parquet-thrift/UUIDType.js +31 -27
  136. package/dist/parquetjs/parquet-thrift/index.d.ts +44 -44
  137. package/dist/parquetjs/parquet-thrift/index.d.ts.map +1 -1
  138. package/dist/parquetjs/parquet-thrift/index.js +6 -1
  139. package/dist/parquetjs/parser/decoders.d.ts +3 -3
  140. package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
  141. package/dist/parquetjs/parser/decoders.js +306 -242
  142. package/dist/parquetjs/parser/parquet-reader.d.ts +3 -3
  143. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  144. package/dist/parquetjs/parser/parquet-reader.js +183 -158
  145. package/dist/parquetjs/schema/declare.d.ts +1 -1
  146. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  147. package/dist/parquetjs/schema/declare.js +10 -9
  148. package/dist/parquetjs/schema/schema.d.ts +1 -1
  149. package/dist/parquetjs/schema/schema.d.ts.map +1 -1
  150. package/dist/parquetjs/schema/schema.js +142 -120
  151. package/dist/parquetjs/schema/shred.d.ts +2 -2
  152. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  153. package/dist/parquetjs/schema/shred.js +301 -204
  154. package/dist/parquetjs/schema/types.d.ts +1 -1
  155. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  156. package/dist/parquetjs/schema/types.js +329 -314
  157. package/dist/parquetjs/utils/file-utils.js +24 -23
  158. package/dist/parquetjs/utils/read-utils.d.ts +1 -1
  159. package/dist/parquetjs/utils/read-utils.d.ts.map +1 -1
  160. package/dist/parquetjs/utils/read-utils.js +76 -69
  161. package/dist/polyfills/buffer/buffer-polyfill.browser.js +8 -3
  162. package/dist/polyfills/buffer/buffer-polyfill.node.js +13 -8
  163. package/dist/polyfills/buffer/buffer.js +1747 -1367
  164. package/dist/polyfills/buffer/index.d.ts +3 -3
  165. package/dist/polyfills/buffer/index.d.ts.map +1 -1
  166. package/dist/polyfills/buffer/index.js +5 -1
  167. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
  168. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
  169. package/dist/polyfills/buffer/install-buffer-polyfill.js +2 -1
  170. package/dist/polyfills/util.js +4 -1
  171. package/dist/workers/parquet-worker.js +3 -1
  172. package/package.json +21 -19
  173. package/src/index.ts +1 -1
  174. package/src/lib/parsers/parse-parquet-to-columns.ts +1 -1
  175. package/src/lib/parsers/parse-parquet.ts +1 -1
  176. package/src/parquetjs/encoder/parquet-encoder.ts +2 -2
  177. package/src/parquetjs/parser/decoders.ts +2 -2
  178. package/src/parquetjs/parser/parquet-reader.ts +3 -2
  179. package/src/parquetjs/schema/schema.ts +1 -1
  180. package/src/parquetjs/schema/types.ts +1 -0
  181. package/src/parquetjs/utils/read-utils.ts +2 -2
  182. package/src/polyfills/buffer/buffer.ts +0 -3
  183. package/dist/index.js.map +0 -1
  184. package/dist/lib/arrow/convert-columns-to-row-group.js.map +0 -1
  185. package/dist/lib/arrow/convert-row-group-to-columns.js.map +0 -1
  186. package/dist/lib/arrow/convert-schema-from-parquet.js.map +0 -1
  187. package/dist/lib/arrow/convert-schema-to-parquet.js.map +0 -1
  188. package/dist/lib/constants.js.map +0 -1
  189. package/dist/lib/parsers/get-parquet-schema.js.map +0 -1
  190. package/dist/lib/parsers/parse-geoparquet.js.map +0 -1
  191. package/dist/lib/parsers/parse-parquet-to-columns.js.map +0 -1
  192. package/dist/lib/parsers/parse-parquet.js.map +0 -1
  193. package/dist/lib/wasm/encode-parquet-wasm.js.map +0 -1
  194. package/dist/lib/wasm/load-wasm.js.map +0 -1
  195. package/dist/lib/wasm/parse-parquet-wasm.js.map +0 -1
  196. package/dist/lib/wip/convert-schema-deep.java.disabled +0 -910
  197. package/dist/lib/wip/convert-schema-deep.rs.disabled +0 -976
  198. package/dist/parquet-loader.js.map +0 -1
  199. package/dist/parquet-wasm-loader.js.map +0 -1
  200. package/dist/parquet-wasm-writer.js.map +0 -1
  201. package/dist/parquet-writer.js.map +0 -1
  202. package/dist/parquetjs/LICENSE +0 -20
  203. package/dist/parquetjs/codecs/declare.js.map +0 -1
  204. package/dist/parquetjs/codecs/dictionary.js.map +0 -1
  205. package/dist/parquetjs/codecs/index.js.map +0 -1
  206. package/dist/parquetjs/codecs/plain.js.map +0 -1
  207. package/dist/parquetjs/codecs/rle.js.map +0 -1
  208. package/dist/parquetjs/compression.js.map +0 -1
  209. package/dist/parquetjs/encoder/parquet-encoder.js.map +0 -1
  210. package/dist/parquetjs/modules.d.ts +0 -21
  211. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
  212. package/dist/parquetjs/parquet-thrift/BsonType.js.map +0 -1
  213. package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
  214. package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
  215. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
  216. package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
  217. package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
  218. package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
  219. package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
  220. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
  221. package/dist/parquetjs/parquet-thrift/DateType.js.map +0 -1
  222. package/dist/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
  223. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
  224. package/dist/parquetjs/parquet-thrift/Encoding.js.map +0 -1
  225. package/dist/parquetjs/parquet-thrift/EnumType.js.map +0 -1
  226. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
  227. package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
  228. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
  229. package/dist/parquetjs/parquet-thrift/IntType.js.map +0 -1
  230. package/dist/parquetjs/parquet-thrift/JsonType.js.map +0 -1
  231. package/dist/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
  232. package/dist/parquetjs/parquet-thrift/ListType.js.map +0 -1
  233. package/dist/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
  234. package/dist/parquetjs/parquet-thrift/MapType.js.map +0 -1
  235. package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
  236. package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
  237. package/dist/parquetjs/parquet-thrift/NullType.js.map +0 -1
  238. package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
  239. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
  240. package/dist/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
  241. package/dist/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
  242. package/dist/parquetjs/parquet-thrift/PageType.js.map +0 -1
  243. package/dist/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
  244. package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
  245. package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
  246. package/dist/parquetjs/parquet-thrift/Statistics.js.map +0 -1
  247. package/dist/parquetjs/parquet-thrift/StringType.js.map +0 -1
  248. package/dist/parquetjs/parquet-thrift/TimeType.js.map +0 -1
  249. package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
  250. package/dist/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
  251. package/dist/parquetjs/parquet-thrift/Type.js.map +0 -1
  252. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
  253. package/dist/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
  254. package/dist/parquetjs/parquet-thrift/index.js.map +0 -1
  255. package/dist/parquetjs/parser/decoders.js.map +0 -1
  256. package/dist/parquetjs/parser/parquet-reader.js.map +0 -1
  257. package/dist/parquetjs/schema/declare.js.map +0 -1
  258. package/dist/parquetjs/schema/schema.js.map +0 -1
  259. package/dist/parquetjs/schema/shred.js.map +0 -1
  260. package/dist/parquetjs/schema/types.js.map +0 -1
  261. package/dist/parquetjs/utils/file-utils.js.map +0 -1
  262. package/dist/parquetjs/utils/read-utils.js.map +0 -1
  263. package/dist/polyfills/buffer/buffer-polyfill.browser.js.map +0 -1
  264. package/dist/polyfills/buffer/buffer-polyfill.node.js.map +0 -1
  265. package/dist/polyfills/buffer/buffer.js.map +0 -1
  266. package/dist/polyfills/buffer/index.js.map +0 -1
  267. package/dist/polyfills/buffer/install-buffer-polyfill.js.map +0 -1
  268. package/dist/polyfills/util.js.map +0 -1
  269. package/dist/workers/parquet-worker.js.map +0 -1
@@ -5,318 +5,463 @@ import { ColumnChunk, ColumnMetaData, CompressionCodec, ConvertedType, DataPageH
5
5
  import { osopen, oswrite, osclose } from "../utils/file-utils.js";
6
6
  import { getBitWidth, serializeThrift } from "../utils/read-utils.js";
7
7
  import Int64 from 'node-int64';
8
+ /**
9
+ * Parquet File Magic String
10
+ */
8
11
  const PARQUET_MAGIC = 'PAR1';
12
+ /**
13
+ * Parquet File Format Version
14
+ */
9
15
  const PARQUET_VERSION = 1;
16
+ /**
17
+ * Default Page and Row Group sizes
18
+ */
10
19
  const PARQUET_DEFAULT_PAGE_SIZE = 8192;
11
20
  const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
21
+ /**
22
+ * Repetition and Definition Level Encoding
23
+ */
12
24
  const PARQUET_RDLVL_TYPE = 'INT32';
13
25
  const PARQUET_RDLVL_ENCODING = 'RLE';
26
+ /**
27
+ * Write a parquet file to an output stream. The ParquetEncoder will perform
28
+ * buffering/batching for performance, so close() must be called after all rows
29
+ * are written.
30
+ */
31
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
14
32
  export class ParquetEncoder {
15
- static async openFile(schema, path, opts) {
16
- const outputStream = await osopen(path, opts);
17
- return ParquetEncoder.openStream(schema, outputStream, opts);
18
- }
19
- static async openStream(schema, outputStream) {
20
- let opts = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
21
- const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
22
- return new ParquetEncoder(schema, envelopeWriter, opts);
23
- }
24
- constructor(schema, envelopeWriter, opts) {
25
- this.schema = void 0;
26
- this.envelopeWriter = void 0;
27
- this.rowBuffer = void 0;
28
- this.rowGroupSize = void 0;
29
- this.closed = void 0;
30
- this.userMetadata = void 0;
31
- this.schema = schema;
32
- this.envelopeWriter = envelopeWriter;
33
- this.rowBuffer = {};
34
- this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
35
- this.closed = false;
36
- this.userMetadata = {};
37
- this.writeHeader();
38
- }
39
- async writeHeader() {
40
- try {
41
- await this.envelopeWriter.writeHeader();
42
- } catch (err) {
43
- await this.envelopeWriter.close();
44
- throw err;
33
+ /**
34
+ * Convenience method to create a new buffered parquet writer that writes to
35
+ * the specified file
36
+ */
37
+ static async openFile(schema, path, opts) {
38
+ const outputStream = await osopen(path, opts);
39
+ return ParquetEncoder.openStream(schema, outputStream, opts);
45
40
  }
46
- }
47
- async appendRow(row) {
48
- if (this.closed) {
49
- throw new Error('writer was closed');
41
+ /**
42
+ * Convenience method to create a new buffered parquet writer that writes to
43
+ * the specified stream
44
+ */
45
+ static async openStream(schema, outputStream, opts = {}) {
46
+ const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
47
+ return new ParquetEncoder(schema, envelopeWriter, opts);
50
48
  }
51
- Shred.shredRecord(this.schema, row, this.rowBuffer);
52
- if (this.rowBuffer.rowCount >= this.rowGroupSize) {
53
- this.rowBuffer = {};
49
+ schema;
50
+ envelopeWriter;
51
+ rowBuffer;
52
+ rowGroupSize;
53
+ closed;
54
+ userMetadata;
55
+ /**
56
+ * Create a new buffered parquet writer for a given envelope writer
57
+ */
58
+ constructor(schema, envelopeWriter, opts) {
59
+ this.schema = schema;
60
+ this.envelopeWriter = envelopeWriter;
61
+ // @ts-ignore Row buffer typings...
62
+ this.rowBuffer = {};
63
+ this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
64
+ this.closed = false;
65
+ this.userMetadata = {};
66
+ // eslint-disable-next-line @typescript-eslint/no-floating-promises
67
+ this.writeHeader();
54
68
  }
55
- }
56
- async close(callback) {
57
- if (this.closed) {
58
- throw new Error('writer was closed');
69
+ async writeHeader() {
70
+ // TODO - better not mess with promises in the constructor
71
+ try {
72
+ await this.envelopeWriter.writeHeader();
73
+ }
74
+ catch (err) {
75
+ await this.envelopeWriter.close();
76
+ throw err;
77
+ }
59
78
  }
60
- this.closed = true;
61
- if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
62
- this.rowBuffer = {};
79
+ /**
80
+ * Append a single row to the parquet file. Rows are buffered in memory until
81
+ * rowGroupSize rows are in the buffer or close() is called
82
+ */
83
+ async appendRow(row) {
84
+ if (this.closed) {
85
+ throw new Error('writer was closed');
86
+ }
87
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
88
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
89
+ // @ts-ignore
90
+ this.rowBuffer = {};
91
+ }
63
92
  }
64
- await this.envelopeWriter.writeFooter(this.userMetadata);
65
- await this.envelopeWriter.close();
66
- if (callback) {
67
- callback();
93
+ /**
94
+ * Finish writing the parquet file and commit the footer to disk. This method
95
+ * MUST be called after you are finished adding rows. You must not call this
96
+ * method twice on the same object or add any rows after the close() method has
97
+ * been called
98
+ */
99
+ async close(callback) {
100
+ if (this.closed) {
101
+ throw new Error('writer was closed');
102
+ }
103
+ this.closed = true;
104
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
105
+ // @ts-ignore
106
+ this.rowBuffer = {};
107
+ }
108
+ await this.envelopeWriter.writeFooter(this.userMetadata);
109
+ await this.envelopeWriter.close();
110
+ // this.envelopeWriter = null;
111
+ if (callback) {
112
+ callback();
113
+ }
114
+ }
115
+ /**
116
+ * Add key<>value metadata to the file
117
+ */
118
+ setMetadata(key, value) {
119
+ // TODO: value to be any, obj -> JSON
120
+ this.userMetadata[String(key)] = String(value);
121
+ }
122
+ /**
123
+ * Set the parquet row group size. This values controls the maximum number
124
+ * of rows that are buffered in memory at any given time as well as the number
125
+ * of rows that are co-located on disk. A higher value is generally better for
126
+ * read-time I/O performance at the tradeoff of write-time memory usage.
127
+ */
128
+ setRowGroupSize(cnt) {
129
+ this.rowGroupSize = cnt;
130
+ }
131
+ /**
132
+ * Set the parquet data page size. The data page size controls the maximum
133
+ * number of column values that are written to disk as a consecutive array
134
+ */
135
+ setPageSize(cnt) {
136
+ this.envelopeWriter.setPageSize(cnt);
68
137
  }
69
- }
70
- setMetadata(key, value) {
71
- this.userMetadata[String(key)] = String(value);
72
- }
73
- setRowGroupSize(cnt) {
74
- this.rowGroupSize = cnt;
75
- }
76
- setPageSize(cnt) {
77
- this.envelopeWriter.setPageSize(cnt);
78
- }
79
138
  }
139
+ /**
140
+ * Create a parquet file from a schema and a number of row groups. This class
141
+ * performs direct, unbuffered writes to the underlying output stream and is
142
+ * intendend for advanced and internal users; the writeXXX methods must be
143
+ * called in the correct order to produce a valid file.
144
+ */
80
145
  export class ParquetEnvelopeWriter {
81
- static async openStream(schema, outputStream, opts) {
82
- const writeFn = oswrite.bind(undefined, outputStream);
83
- const closeFn = osclose.bind(undefined, outputStream);
84
- return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
85
- }
86
- constructor(schema, writeFn, closeFn, fileOffset, opts) {
87
- this.schema = void 0;
88
- this.write = void 0;
89
- this.close = void 0;
90
- this.offset = void 0;
91
- this.rowCount = void 0;
92
- this.rowGroups = void 0;
93
- this.pageSize = void 0;
94
- this.useDataPageV2 = void 0;
95
- this.schema = schema;
96
- this.write = writeFn;
97
- this.close = closeFn;
98
- this.offset = fileOffset;
99
- this.rowCount = 0;
100
- this.rowGroups = [];
101
- this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
102
- this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
103
- }
104
- writeSection(buf) {
105
- this.offset += buf.length;
106
- return this.write(buf);
107
- }
108
- writeHeader() {
109
- return this.writeSection(Buffer.from(PARQUET_MAGIC));
110
- }
111
- async writeRowGroup(records) {
112
- const rgroup = await encodeRowGroup(this.schema, records, {
113
- baseOffset: this.offset,
114
- pageSize: this.pageSize,
115
- useDataPageV2: this.useDataPageV2
116
- });
117
- this.rowCount += records.rowCount;
118
- this.rowGroups.push(rgroup.metadata);
119
- return await this.writeSection(rgroup.body);
146
+ /**
147
+ * Create a new parquet envelope writer that writes to the specified stream
148
+ */
149
+ static async openStream(schema, outputStream, opts) {
150
+ const writeFn = oswrite.bind(undefined, outputStream);
151
+ const closeFn = osclose.bind(undefined, outputStream);
152
+ return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
153
+ }
154
+ schema;
155
+ write;
156
+ close;
157
+ offset;
158
+ rowCount;
159
+ rowGroups;
160
+ pageSize;
161
+ useDataPageV2;
162
+ constructor(schema, writeFn, closeFn, fileOffset, opts) {
163
+ this.schema = schema;
164
+ this.write = writeFn;
165
+ this.close = closeFn;
166
+ this.offset = fileOffset;
167
+ this.rowCount = 0;
168
+ this.rowGroups = [];
169
+ this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
170
+ this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
171
+ }
172
+ writeSection(buf) {
173
+ this.offset += buf.length;
174
+ return this.write(buf);
175
+ }
176
+ /**
177
+ * Encode the parquet file header
178
+ */
179
+ writeHeader() {
180
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
181
+ }
182
+ /**
183
+ * Encode a parquet row group. The records object should be created using the
184
+ * shredRecord method
185
+ */
186
+ async writeRowGroup(records) {
187
+ const rgroup = await encodeRowGroup(this.schema, records, {
188
+ baseOffset: this.offset,
189
+ pageSize: this.pageSize,
190
+ useDataPageV2: this.useDataPageV2
191
+ });
192
+ this.rowCount += records.rowCount;
193
+ this.rowGroups.push(rgroup.metadata);
194
+ return await this.writeSection(rgroup.body);
195
+ }
196
+ /**
197
+ * Write the parquet file footer
198
+ */
199
+ writeFooter(userMetadata) {
200
+ if (!userMetadata) {
201
+ // tslint:disable-next-line:no-parameter-reassignment
202
+ userMetadata = {};
203
+ }
204
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
205
+ }
206
+ /**
207
+ * Set the parquet data page size. The data page size controls the maximum
208
+ * number of column values that are written to disk as a consecutive array
209
+ */
210
+ setPageSize(cnt) {
211
+ this.pageSize = cnt;
212
+ }
213
+ }
214
+ /**
215
+ * Create a parquet transform stream
216
+ export class ParquetTransformer<T> extends stream.Transform {
217
+ public writer: ParquetEncoder<T>;
218
+
219
+ constructor(schema: ParquetSchema, opts: ParquetEncoderOptions = {}) {
220
+ super({objectMode: true});
221
+
222
+ const writeProxy = (function (t: ParquetTransformer<any>) {
223
+ return async function (b: any): Promise<void> {
224
+ t.push(b);
225
+ };
226
+ })(this);
227
+
228
+ this.writer = new ParquetEncoder(
229
+ schema,
230
+ new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),
231
+ opts
232
+ );
120
233
  }
121
- writeFooter(userMetadata) {
122
- if (!userMetadata) {
123
- userMetadata = {};
234
+
235
+ // tslint:disable-next-line:function-name
236
+ _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {
237
+ if (row) {
238
+ return this.writer.appendRow(row).then(callback);
124
239
  }
125
- return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
240
+ callback();
241
+ return Promise.resolve();
126
242
  }
127
- setPageSize(cnt) {
128
- this.pageSize = cnt;
243
+
244
+ // tslint:disable-next-line:function-name
245
+ async _flush(callback: (val?: any) => void) {
246
+ await this.writer.close(callback);
129
247
  }
130
248
  }
249
+ */
250
+ /**
251
+ * Encode a consecutive array of data using one of the parquet encodings
252
+ */
131
253
  function encodeValues(type, encoding, values, opts) {
132
- if (!(encoding in PARQUET_CODECS)) {
133
- throw new Error(`invalid encoding: ${encoding}`);
134
- }
135
- return PARQUET_CODECS[encoding].encodeValues(type, values, opts);
254
+ if (!(encoding in PARQUET_CODECS)) {
255
+ throw new Error(`invalid encoding: ${encoding}`);
256
+ }
257
+ return PARQUET_CODECS[encoding].encodeValues(type, values, opts);
136
258
  }
259
+ /**
260
+ * Encode a parquet data page
261
+ */
137
262
  async function encodeDataPage(column, data) {
138
- let rLevelsBuf = Buffer.alloc(0);
139
- if (column.rLevelMax > 0) {
140
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
141
- bitWidth: getBitWidth(column.rLevelMax)
263
+ /* encode repetition and definition levels */
264
+ let rLevelsBuf = Buffer.alloc(0);
265
+ if (column.rLevelMax > 0) {
266
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
267
+ bitWidth: getBitWidth(column.rLevelMax)
268
+ // disableEnvelope: false
269
+ });
270
+ }
271
+ let dLevelsBuf = Buffer.alloc(0);
272
+ if (column.dLevelMax > 0) {
273
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
274
+ bitWidth: getBitWidth(column.dLevelMax)
275
+ // disableEnvelope: false
276
+ });
277
+ }
278
+ /* encode values */
279
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
280
+ typeLength: column.typeLength,
281
+ bitWidth: column.typeLength
142
282
  });
143
- }
144
- let dLevelsBuf = Buffer.alloc(0);
145
- if (column.dLevelMax > 0) {
146
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
147
- bitWidth: getBitWidth(column.dLevelMax)
283
+ const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
284
+ // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
285
+ const compressedBuf = await Compression.deflate(column.compression, dataBuf);
286
+ /* build page header */
287
+ const header = new PageHeader({
288
+ type: PageType.DATA_PAGE,
289
+ data_page_header: new DataPageHeader({
290
+ num_values: data.count,
291
+ encoding: Encoding[column.encoding],
292
+ definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],
293
+ repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]
294
+ }),
295
+ uncompressed_page_size: dataBuf.length,
296
+ compressed_page_size: compressedBuf.length
148
297
  });
149
- }
150
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
151
- typeLength: column.typeLength,
152
- bitWidth: column.typeLength
153
- });
154
- const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
155
- const compressedBuf = await Compression.deflate(column.compression, dataBuf);
156
- const header = new PageHeader({
157
- type: PageType.DATA_PAGE,
158
- data_page_header: new DataPageHeader({
159
- num_values: data.count,
160
- encoding: Encoding[column.encoding],
161
- definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING],
162
- repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING]
163
- }),
164
- uncompressed_page_size: dataBuf.length,
165
- compressed_page_size: compressedBuf.length
166
- });
167
- const headerBuf = serializeThrift(header);
168
- const page = Buffer.concat([headerBuf, compressedBuf]);
169
- return {
170
- header,
171
- headerSize: headerBuf.length,
172
- page
173
- };
298
+ /* concat page header, repetition and definition levels and values */
299
+ const headerBuf = serializeThrift(header);
300
+ const page = Buffer.concat([headerBuf, compressedBuf]);
301
+ return { header, headerSize: headerBuf.length, page };
174
302
  }
303
+ /**
304
+ * Encode a parquet data page (v2)
305
+ */
175
306
  async function encodeDataPageV2(column, data, rowCount) {
176
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
177
- typeLength: column.typeLength,
178
- bitWidth: column.typeLength
179
- });
180
- const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
181
- let rLevelsBuf = Buffer.alloc(0);
182
- if (column.rLevelMax > 0) {
183
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
184
- bitWidth: getBitWidth(column.rLevelMax),
185
- disableEnvelope: true
307
+ /* encode values */
308
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
309
+ typeLength: column.typeLength,
310
+ bitWidth: column.typeLength
186
311
  });
187
- }
188
- let dLevelsBuf = Buffer.alloc(0);
189
- if (column.dLevelMax > 0) {
190
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
191
- bitWidth: getBitWidth(column.dLevelMax),
192
- disableEnvelope: true
312
+ // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
313
+ const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
314
+ /* encode repetition and definition levels */
315
+ let rLevelsBuf = Buffer.alloc(0);
316
+ if (column.rLevelMax > 0) {
317
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
318
+ bitWidth: getBitWidth(column.rLevelMax),
319
+ disableEnvelope: true
320
+ });
321
+ }
322
+ let dLevelsBuf = Buffer.alloc(0);
323
+ if (column.dLevelMax > 0) {
324
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
325
+ bitWidth: getBitWidth(column.dLevelMax),
326
+ disableEnvelope: true
327
+ });
328
+ }
329
+ /* build page header */
330
+ const header = new PageHeader({
331
+ type: PageType.DATA_PAGE_V2,
332
+ data_page_header_v2: new DataPageHeaderV2({
333
+ num_values: data.count,
334
+ num_nulls: data.count - data.values.length,
335
+ num_rows: rowCount,
336
+ encoding: Encoding[column.encoding],
337
+ definition_levels_byte_length: dLevelsBuf.length,
338
+ repetition_levels_byte_length: rLevelsBuf.length,
339
+ is_compressed: column.compression !== 'UNCOMPRESSED'
340
+ }),
341
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
342
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
193
343
  });
194
- }
195
- const header = new PageHeader({
196
- type: PageType.DATA_PAGE_V2,
197
- data_page_header_v2: new DataPageHeaderV2({
198
- num_values: data.count,
199
- num_nulls: data.count - data.values.length,
200
- num_rows: rowCount,
201
- encoding: Encoding[column.encoding],
202
- definition_levels_byte_length: dLevelsBuf.length,
203
- repetition_levels_byte_length: rLevelsBuf.length,
204
- is_compressed: column.compression !== 'UNCOMPRESSED'
205
- }),
206
- uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
207
- compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
208
- });
209
- const headerBuf = serializeThrift(header);
210
- const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
211
- return {
212
- header,
213
- headerSize: headerBuf.length,
214
- page
215
- };
344
+ /* concat page header, repetition and definition levels and values */
345
+ const headerBuf = serializeThrift(header);
346
+ const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
347
+ return { header, headerSize: headerBuf.length, page };
216
348
  }
349
+ /**
350
+ * Encode an array of values into a parquet column chunk
351
+ */
217
352
  async function encodeColumnChunk(column, buffer, offset, opts) {
218
- const data = buffer.columnData[column.path.join()];
219
- const baseOffset = (opts.baseOffset || 0) + offset;
220
- let pageBuf;
221
- let total_uncompressed_size = 0;
222
- let total_compressed_size = 0;
223
- {
224
- const result = opts.useDataPageV2 ? await encodeDataPageV2(column, data, buffer.rowCount) : await encodeDataPage(column, data);
225
- pageBuf = result.page;
226
- total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
227
- total_compressed_size += result.header.compressed_page_size + result.headerSize;
228
- }
229
- const metadata = new ColumnMetaData({
230
- path_in_schema: column.path,
231
- num_values: data.count,
232
- data_page_offset: baseOffset,
233
- encodings: [],
234
- total_uncompressed_size,
235
- total_compressed_size,
236
- type: Type[column.primitiveType],
237
- codec: CompressionCodec[column.compression]
238
- });
239
- metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);
240
- metadata.encodings.push(Encoding[column.encoding]);
241
- const metadataOffset = baseOffset + pageBuf.length;
242
- const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);
243
- return {
244
- body,
245
- metadata,
246
- metadataOffset
247
- };
353
+ const data = buffer.columnData[column.path.join()];
354
+ const baseOffset = (opts.baseOffset || 0) + offset;
355
+ /* encode data page(s) */
356
+ // const pages: Buffer[] = [];
357
+ let pageBuf;
358
+ // tslint:disable-next-line:variable-name
359
+ let total_uncompressed_size = 0;
360
+ // tslint:disable-next-line:variable-name
361
+ let total_compressed_size = 0;
362
+ {
363
+ const result = opts.useDataPageV2
364
+ ? await encodeDataPageV2(column, data, buffer.rowCount)
365
+ : await encodeDataPage(column, data);
366
+ // pages.push(result.page);
367
+ pageBuf = result.page;
368
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
369
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
370
+ }
371
+ // const pagesBuf = Buffer.concat(pages);
372
+ // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;
373
+ /* prepare metadata header */
374
+ const metadata = new ColumnMetaData({
375
+ path_in_schema: column.path,
376
+ num_values: data.count,
377
+ data_page_offset: baseOffset,
378
+ encodings: [],
379
+ total_uncompressed_size, // : pagesBuf.length,
380
+ total_compressed_size,
381
+ type: Type[column.primitiveType],
382
+ codec: CompressionCodec[column.compression]
383
+ });
384
+ /* list encodings */
385
+ metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);
386
+ metadata.encodings.push(Encoding[column.encoding]);
387
+ /* concat metadata header and data pages */
388
+ const metadataOffset = baseOffset + pageBuf.length;
389
+ const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);
390
+ return { body, metadata, metadataOffset };
248
391
  }
392
+ /**
393
+ * Encode a list of column values into a parquet row group
394
+ */
249
395
  async function encodeRowGroup(schema, data, opts) {
250
- const metadata = new RowGroup({
251
- num_rows: data.rowCount,
252
- columns: [],
253
- total_byte_size: 0
254
- });
255
- let body = Buffer.alloc(0);
256
- for (const field of schema.fieldList) {
257
- if (field.isNested) {
258
- continue;
259
- }
260
- const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
261
- const cchunk = new ColumnChunk({
262
- file_offset: cchunkData.metadataOffset,
263
- meta_data: cchunkData.metadata
396
+ const metadata = new RowGroup({
397
+ num_rows: data.rowCount,
398
+ columns: [],
399
+ total_byte_size: 0
264
400
  });
265
- metadata.columns.push(cchunk);
266
- metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);
267
- body = Buffer.concat([body, cchunkData.body]);
268
- }
269
- return {
270
- body,
271
- metadata
272
- };
401
+ let body = Buffer.alloc(0);
402
+ for (const field of schema.fieldList) {
403
+ if (field.isNested) {
404
+ continue; // eslint-disable-line no-continue
405
+ }
406
+ const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
407
+ const cchunk = new ColumnChunk({
408
+ file_offset: cchunkData.metadataOffset,
409
+ meta_data: cchunkData.metadata
410
+ });
411
+ metadata.columns.push(cchunk);
412
+ metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);
413
+ body = Buffer.concat([body, cchunkData.body]);
414
+ }
415
+ return { body, metadata };
273
416
  }
417
+ /**
418
+ * Encode a parquet file metadata footer
419
+ */
274
420
  function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
275
- const metadata = new FileMetaData({
276
- version: PARQUET_VERSION,
277
- created_by: 'parquets',
278
- num_rows: rowCount,
279
- row_groups: rowGroups,
280
- schema: [],
281
- key_value_metadata: []
282
- });
283
- for (const key in userMetadata) {
284
- var _metadata$key_value_m, _metadata$key_value_m2, _metadata$key_value_m3;
285
- const kv = new KeyValue({
286
- key,
287
- value: userMetadata[key]
421
+ const metadata = new FileMetaData({
422
+ version: PARQUET_VERSION,
423
+ created_by: 'parquets',
424
+ num_rows: rowCount,
425
+ row_groups: rowGroups,
426
+ schema: [],
427
+ key_value_metadata: []
288
428
  });
289
- (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = (_metadata$key_value_m3 = _metadata$key_value_m).push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m3, kv);
290
- }
291
- {
292
- const schemaRoot = new SchemaElement({
293
- name: 'root',
294
- num_children: Object.keys(schema.fields).length
295
- });
296
- metadata.schema.push(schemaRoot);
297
- }
298
- for (const field of schema.fieldList) {
299
- const relt = FieldRepetitionType[field.repetitionType];
300
- const schemaElem = new SchemaElement({
301
- name: field.name,
302
- repetition_type: relt
303
- });
304
- if (field.isNested) {
305
- schemaElem.num_children = field.fieldCount;
306
- } else {
307
- schemaElem.type = Type[field.primitiveType];
429
+ for (const key in userMetadata) {
430
+ const kv = new KeyValue({
431
+ key,
432
+ value: userMetadata[key]
433
+ });
434
+ metadata.key_value_metadata?.push?.(kv);
308
435
  }
309
- if (field.originalType) {
310
- schemaElem.converted_type = ConvertedType[field.originalType];
436
+ {
437
+ const schemaRoot = new SchemaElement({
438
+ name: 'root',
439
+ num_children: Object.keys(schema.fields).length
440
+ });
441
+ metadata.schema.push(schemaRoot);
311
442
  }
312
- schemaElem.type_length = field.typeLength;
313
- metadata.schema.push(schemaElem);
314
- }
315
- const metadataEncoded = serializeThrift(metadata);
316
- const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
317
- metadataEncoded.copy(footerEncoded);
318
- footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
319
- footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
320
- return footerEncoded;
443
+ for (const field of schema.fieldList) {
444
+ const relt = FieldRepetitionType[field.repetitionType];
445
+ const schemaElem = new SchemaElement({
446
+ name: field.name,
447
+ repetition_type: relt
448
+ });
449
+ if (field.isNested) {
450
+ schemaElem.num_children = field.fieldCount;
451
+ }
452
+ else {
453
+ schemaElem.type = Type[field.primitiveType];
454
+ }
455
+ if (field.originalType) {
456
+ schemaElem.converted_type = ConvertedType[field.originalType];
457
+ }
458
+ schemaElem.type_length = field.typeLength;
459
+ metadata.schema.push(schemaElem);
460
+ }
461
+ const metadataEncoded = serializeThrift(metadata);
462
+ const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
463
+ metadataEncoded.copy(footerEncoded);
464
+ footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
465
+ footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
466
+ return footerEncoded;
321
467
  }
322
- //# sourceMappingURL=parquet-encoder.js.map