@loaders.gl/parquet 3.1.0-alpha.5 → 3.1.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. package/dist/bundle.d.ts +1 -0
  2. package/dist/bundle.d.ts.map +1 -0
  3. package/dist/bundle.js +5 -0
  4. package/dist/constants.d.ts +1 -0
  5. package/dist/constants.d.ts.map +1 -0
  6. package/dist/constants.js +18 -0
  7. package/dist/dist.min.js +27 -13
  8. package/dist/dist.min.js.map +7 -1
  9. package/dist/es5/index.js +6 -26
  10. package/dist/es5/index.js.map +1 -1
  11. package/dist/es5/parquet-loader.js +1 -1
  12. package/dist/es5/parquet-loader.js.map +1 -1
  13. package/dist/es5/parquet-writer.js +1 -1
  14. package/dist/es5/parquet-writer.js.map +1 -1
  15. package/dist/es5/parquetjs/codecs/rle.js +1 -1
  16. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  17. package/dist/es5/parquetjs/compression.js +1 -12
  18. package/dist/es5/parquetjs/compression.js.map +1 -1
  19. package/dist/es5/parquetjs/parser/decoders.js +1 -1
  20. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  21. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -13
  22. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  23. package/dist/es5/parquetjs/parser/parquet-reader.js +0 -13
  24. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  25. package/dist/es5/parquetjs/utils/file-utils.js +0 -53
  26. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  27. package/dist/esm/index.js +2 -3
  28. package/dist/esm/index.js.map +1 -1
  29. package/dist/esm/parquet-loader.js +1 -1
  30. package/dist/esm/parquet-loader.js.map +1 -1
  31. package/dist/esm/parquet-writer.js +1 -1
  32. package/dist/esm/parquet-writer.js.map +1 -1
  33. package/dist/esm/parquetjs/codecs/rle.js +1 -1
  34. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  35. package/dist/esm/parquetjs/compression.js +1 -10
  36. package/dist/esm/parquetjs/compression.js.map +1 -1
  37. package/dist/esm/parquetjs/parser/decoders.js +1 -1
  38. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  39. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -9
  40. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  41. package/dist/esm/parquetjs/parser/parquet-reader.js +0 -13
  42. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  43. package/dist/esm/parquetjs/utils/file-utils.js +0 -45
  44. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  45. package/dist/index.d.ts +3 -3
  46. package/dist/index.d.ts.map +1 -0
  47. package/dist/index.js +30 -0
  48. package/dist/lib/convert-schema.d.ts +1 -0
  49. package/dist/lib/convert-schema.d.ts.map +1 -0
  50. package/dist/lib/convert-schema.js +70 -0
  51. package/dist/lib/parse-parquet.d.ts +1 -0
  52. package/dist/lib/parse-parquet.d.ts.map +1 -0
  53. package/dist/lib/parse-parquet.js +28 -0
  54. package/dist/lib/read-array-buffer.d.ts +1 -0
  55. package/dist/lib/read-array-buffer.d.ts.map +1 -0
  56. package/dist/lib/read-array-buffer.js +29 -0
  57. package/dist/parquet-loader.d.ts +1 -0
  58. package/dist/parquet-loader.d.ts.map +1 -0
  59. package/dist/parquet-loader.js +27 -0
  60. package/dist/parquet-worker.js +27 -13
  61. package/dist/parquet-worker.js.map +7 -1
  62. package/dist/parquet-writer.d.ts +1 -0
  63. package/dist/parquet-writer.d.ts.map +1 -0
  64. package/dist/parquet-writer.js +21 -0
  65. package/dist/parquetjs/codecs/declare.d.ts +1 -0
  66. package/dist/parquetjs/codecs/declare.d.ts.map +1 -0
  67. package/dist/parquetjs/codecs/declare.js +2 -0
  68. package/dist/parquetjs/codecs/dictionary.d.ts +1 -0
  69. package/dist/parquetjs/codecs/dictionary.d.ts.map +1 -0
  70. package/dist/parquetjs/codecs/dictionary.js +14 -0
  71. package/dist/parquetjs/codecs/index.d.ts +1 -0
  72. package/dist/parquetjs/codecs/index.d.ts.map +1 -0
  73. package/dist/parquetjs/codecs/index.js +51 -0
  74. package/dist/parquetjs/codecs/plain.d.ts +1 -0
  75. package/dist/parquetjs/codecs/plain.d.ts.map +1 -0
  76. package/dist/parquetjs/codecs/plain.js +211 -0
  77. package/dist/parquetjs/codecs/rle.d.ts +1 -0
  78. package/dist/parquetjs/codecs/rle.d.ts.map +1 -0
  79. package/dist/parquetjs/codecs/rle.js +145 -0
  80. package/dist/parquetjs/compression.d.ts +1 -0
  81. package/dist/parquetjs/compression.d.ts.map +1 -0
  82. package/dist/parquetjs/compression.js +168 -0
  83. package/dist/parquetjs/encoder/writer.d.ts +1 -0
  84. package/dist/parquetjs/encoder/writer.d.ts.map +1 -0
  85. package/dist/parquetjs/encoder/writer.js +478 -0
  86. package/dist/parquetjs/file.d.ts +1 -0
  87. package/dist/parquetjs/file.d.ts.map +1 -0
  88. package/dist/parquetjs/file.js +99 -0
  89. package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts +1 -0
  90. package/dist/parquetjs/parquet-thrift/BoundaryOrder.d.ts.map +1 -0
  91. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +15 -0
  92. package/dist/parquetjs/parquet-thrift/BsonType.d.ts +1 -0
  93. package/dist/parquetjs/parquet-thrift/BsonType.d.ts.map +1 -0
  94. package/dist/parquetjs/parquet-thrift/BsonType.js +58 -0
  95. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts +1 -0
  96. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -0
  97. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +207 -0
  98. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts +1 -0
  99. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -0
  100. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +213 -0
  101. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts +1 -0
  102. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -0
  103. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +398 -0
  104. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts +1 -0
  105. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -0
  106. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +104 -0
  107. package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts +1 -0
  108. package/dist/parquetjs/parquet-thrift/CompressionCodec.d.ts.map +1 -0
  109. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +20 -0
  110. package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts +1 -0
  111. package/dist/parquetjs/parquet-thrift/ConvertedType.d.ts.map +1 -0
  112. package/dist/parquetjs/parquet-thrift/ConvertedType.js +34 -0
  113. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts +1 -0
  114. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -0
  115. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +166 -0
  116. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts +1 -0
  117. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -0
  118. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +226 -0
  119. package/dist/parquetjs/parquet-thrift/DateType.d.ts +1 -0
  120. package/dist/parquetjs/parquet-thrift/DateType.d.ts.map +1 -0
  121. package/dist/parquetjs/parquet-thrift/DateType.js +58 -0
  122. package/dist/parquetjs/parquet-thrift/DecimalType.d.ts +1 -0
  123. package/dist/parquetjs/parquet-thrift/DecimalType.d.ts.map +1 -0
  124. package/dist/parquetjs/parquet-thrift/DecimalType.js +105 -0
  125. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts +1 -0
  126. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -0
  127. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +122 -0
  128. package/dist/parquetjs/parquet-thrift/Encoding.d.ts +1 -0
  129. package/dist/parquetjs/parquet-thrift/Encoding.d.ts.map +1 -0
  130. package/dist/parquetjs/parquet-thrift/Encoding.js +20 -0
  131. package/dist/parquetjs/parquet-thrift/EnumType.d.ts +1 -0
  132. package/dist/parquetjs/parquet-thrift/EnumType.d.ts.map +1 -0
  133. package/dist/parquetjs/parquet-thrift/EnumType.js +58 -0
  134. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts +1 -0
  135. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.d.ts.map +1 -0
  136. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +15 -0
  137. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts +1 -0
  138. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -0
  139. package/dist/parquetjs/parquet-thrift/FileMetaData.js +256 -0
  140. package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts +1 -0
  141. package/dist/parquetjs/parquet-thrift/IndexPageHeader.d.ts.map +1 -0
  142. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +58 -0
  143. package/dist/parquetjs/parquet-thrift/IntType.d.ts +1 -0
  144. package/dist/parquetjs/parquet-thrift/IntType.d.ts.map +1 -0
  145. package/dist/parquetjs/parquet-thrift/IntType.js +105 -0
  146. package/dist/parquetjs/parquet-thrift/JsonType.d.ts +1 -0
  147. package/dist/parquetjs/parquet-thrift/JsonType.d.ts.map +1 -0
  148. package/dist/parquetjs/parquet-thrift/JsonType.js +58 -0
  149. package/dist/parquetjs/parquet-thrift/KeyValue.d.ts +1 -0
  150. package/dist/parquetjs/parquet-thrift/KeyValue.d.ts.map +1 -0
  151. package/dist/parquetjs/parquet-thrift/KeyValue.js +102 -0
  152. package/dist/parquetjs/parquet-thrift/ListType.d.ts +1 -0
  153. package/dist/parquetjs/parquet-thrift/ListType.d.ts.map +1 -0
  154. package/dist/parquetjs/parquet-thrift/ListType.js +58 -0
  155. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts +1 -0
  156. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -0
  157. package/dist/parquetjs/parquet-thrift/LogicalType.js +380 -0
  158. package/dist/parquetjs/parquet-thrift/MapType.d.ts +1 -0
  159. package/dist/parquetjs/parquet-thrift/MapType.d.ts.map +1 -0
  160. package/dist/parquetjs/parquet-thrift/MapType.js +58 -0
  161. package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts +1 -0
  162. package/dist/parquetjs/parquet-thrift/MicroSeconds.d.ts.map +1 -0
  163. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +58 -0
  164. package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts +1 -0
  165. package/dist/parquetjs/parquet-thrift/MilliSeconds.d.ts.map +1 -0
  166. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +58 -0
  167. package/dist/parquetjs/parquet-thrift/NullType.d.ts +1 -0
  168. package/dist/parquetjs/parquet-thrift/NullType.d.ts.map +1 -0
  169. package/dist/parquetjs/parquet-thrift/NullType.js +58 -0
  170. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts +1 -0
  171. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -0
  172. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +97 -0
  173. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts +1 -0
  174. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -0
  175. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +127 -0
  176. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts +1 -0
  177. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -0
  178. package/dist/parquetjs/parquet-thrift/PageHeader.js +216 -0
  179. package/dist/parquetjs/parquet-thrift/PageLocation.d.ts +1 -0
  180. package/dist/parquetjs/parquet-thrift/PageLocation.d.ts.map +1 -0
  181. package/dist/parquetjs/parquet-thrift/PageLocation.js +141 -0
  182. package/dist/parquetjs/parquet-thrift/PageType.d.ts +1 -0
  183. package/dist/parquetjs/parquet-thrift/PageType.d.ts.map +1 -0
  184. package/dist/parquetjs/parquet-thrift/PageType.js +16 -0
  185. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts +1 -0
  186. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -0
  187. package/dist/parquetjs/parquet-thrift/RowGroup.js +182 -0
  188. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts +1 -0
  189. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -0
  190. package/dist/parquetjs/parquet-thrift/SchemaElement.js +239 -0
  191. package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts +1 -0
  192. package/dist/parquetjs/parquet-thrift/SortingColumn.d.ts.map +1 -0
  193. package/dist/parquetjs/parquet-thrift/SortingColumn.js +127 -0
  194. package/dist/parquetjs/parquet-thrift/Statistics.d.ts +1 -0
  195. package/dist/parquetjs/parquet-thrift/Statistics.d.ts.map +1 -0
  196. package/dist/parquetjs/parquet-thrift/Statistics.js +176 -0
  197. package/dist/parquetjs/parquet-thrift/StringType.d.ts +1 -0
  198. package/dist/parquetjs/parquet-thrift/StringType.d.ts.map +1 -0
  199. package/dist/parquetjs/parquet-thrift/StringType.js +58 -0
  200. package/dist/parquetjs/parquet-thrift/TimeType.d.ts +1 -0
  201. package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -0
  202. package/dist/parquetjs/parquet-thrift/TimeType.js +106 -0
  203. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts +1 -0
  204. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -0
  205. package/dist/parquetjs/parquet-thrift/TimeUnit.js +127 -0
  206. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts +1 -0
  207. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -0
  208. package/dist/parquetjs/parquet-thrift/TimestampType.js +106 -0
  209. package/dist/parquetjs/parquet-thrift/Type.d.ts +1 -0
  210. package/dist/parquetjs/parquet-thrift/Type.d.ts.map +1 -0
  211. package/dist/parquetjs/parquet-thrift/Type.js +20 -0
  212. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts +1 -0
  213. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.d.ts.map +1 -0
  214. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +58 -0
  215. package/dist/parquetjs/parquet-thrift/UUIDType.d.ts +1 -0
  216. package/dist/parquetjs/parquet-thrift/UUIDType.d.ts.map +1 -0
  217. package/dist/parquetjs/parquet-thrift/UUIDType.js +58 -0
  218. package/dist/parquetjs/parquet-thrift/index.d.ts +1 -0
  219. package/dist/parquetjs/parquet-thrift/index.d.ts.map +1 -0
  220. package/dist/parquetjs/parquet-thrift/index.js +61 -0
  221. package/dist/parquetjs/parser/decoders.d.ts +1 -0
  222. package/dist/parquetjs/parser/decoders.d.ts.map +1 -0
  223. package/dist/parquetjs/parser/decoders.js +318 -0
  224. package/dist/parquetjs/parser/parquet-cursor.d.ts +1 -0
  225. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +1 -0
  226. package/dist/parquetjs/parser/parquet-cursor.js +74 -0
  227. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +1 -1
  228. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +1 -0
  229. package/dist/parquetjs/parser/parquet-envelope-reader.js +136 -0
  230. package/dist/parquetjs/parser/parquet-reader.d.ts +1 -5
  231. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -0
  232. package/dist/parquetjs/parser/parquet-reader.js +134 -0
  233. package/dist/parquetjs/schema/declare.d.ts +1 -0
  234. package/dist/parquetjs/schema/declare.d.ts.map +1 -0
  235. package/dist/parquetjs/schema/declare.js +10 -0
  236. package/dist/parquetjs/schema/schema.d.ts +1 -0
  237. package/dist/parquetjs/schema/schema.d.ts.map +1 -0
  238. package/dist/parquetjs/schema/schema.js +162 -0
  239. package/dist/parquetjs/schema/shred.d.ts +1 -0
  240. package/dist/parquetjs/schema/shred.d.ts.map +1 -0
  241. package/dist/parquetjs/schema/shred.js +225 -0
  242. package/dist/parquetjs/schema/types.d.ts +1 -0
  243. package/dist/parquetjs/schema/types.d.ts.map +1 -0
  244. package/dist/parquetjs/schema/types.js +418 -0
  245. package/dist/parquetjs/utils/buffer-utils.d.ts +1 -0
  246. package/dist/parquetjs/utils/buffer-utils.d.ts.map +1 -0
  247. package/dist/parquetjs/utils/buffer-utils.js +22 -0
  248. package/dist/parquetjs/utils/file-utils.d.ts +1 -4
  249. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -0
  250. package/dist/parquetjs/utils/file-utils.js +46 -0
  251. package/dist/parquetjs/utils/read-utils.d.ts +1 -0
  252. package/dist/parquetjs/utils/read-utils.d.ts.map +1 -0
  253. package/dist/parquetjs/utils/read-utils.js +109 -0
  254. package/dist/workers/parquet-worker.d.ts +1 -0
  255. package/dist/workers/parquet-worker.d.ts.map +1 -0
  256. package/dist/workers/parquet-worker.js +5 -0
  257. package/package.json +7 -8
  258. package/src/index.ts +3 -3
  259. package/src/parquetjs/codecs/rle.ts +1 -1
  260. package/src/parquetjs/compression.ts +10 -10
  261. package/src/parquetjs/parser/decoders.ts +1 -1
  262. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -11
  263. package/src/parquetjs/parser/parquet-reader.ts +0 -16
  264. package/src/parquetjs/utils/file-utils.ts +0 -49
@@ -0,0 +1,168 @@
1
+ "use strict";
2
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
3
+ /* eslint-disable camelcase */
4
+ // Forked from https://github.com/ironSource/parquetjs under MIT license
5
+ var __importDefault = (this && this.__importDefault) || function (mod) {
6
+ return (mod && mod.__esModule) ? mod : { "default": mod };
7
+ };
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.inflate = exports.decompress = exports.deflate = exports.preloadCompressions = exports.PARQUET_COMPRESSION_METHODS = void 0;
10
+ const compression_1 = require("@loaders.gl/compression");
11
+ const buffer_utils_1 = require("./utils/buffer-utils");
12
+ // TODO switch to worker compression to avoid bundling...
13
+ // import brotli from 'brotli'; - brotli has problems with decompress in browsers
14
+ // import brotliDecompress from 'brotli/decompress';
15
+ const lz4js_1 = __importDefault(require("lz4js"));
16
+ const lzo_1 = __importDefault(require("lzo"));
17
+ // import {ZstdCodec} from 'zstd-codec';
18
+ // Inject large dependencies through Compression constructor options
19
+ const modules = {
20
+ // brotli has problems with decompress in browsers
21
+ // brotli: {
22
+ // decompress: brotliDecompress,
23
+ // compress: () => {
24
+ // throw new Error('brotli compress');
25
+ // }
26
+ // },
27
+ lz4js: lz4js_1.default,
28
+ lzo: lzo_1.default
29
+ // 'zstd-codec': ZstdCodec
30
+ };
31
+ // See https://github.com/apache/parquet-format/blob/master/Compression.md
32
+ exports.PARQUET_COMPRESSION_METHODS = {
33
+ UNCOMPRESSED: new compression_1.NoCompression(),
34
+ GZIP: new compression_1.GZipCompression(),
35
+ SNAPPY: new compression_1.SnappyCompression(),
36
+ BROTLI: new compression_1.BrotliCompression({ modules }),
37
+ // TODO: Understand difference between LZ4 and LZ4_RAW
38
+ LZ4: new compression_1.LZ4Compression({ modules }),
39
+ LZ4_RAW: new compression_1.LZ4Compression({ modules }),
40
+ LZO: new compression_1.LZOCompression({ modules }),
41
+ ZSTD: new compression_1.ZstdCompression({ modules })
42
+ };
43
+ /**
44
+ * Register compressions that have big external libraries
45
+ * @param options.modules External library dependencies
46
+ */
47
+ async function preloadCompressions(options) {
48
+ const compressions = Object.values(exports.PARQUET_COMPRESSION_METHODS);
49
+ return await Promise.all(compressions.map((compression) => compression.preload()));
50
+ }
51
+ exports.preloadCompressions = preloadCompressions;
52
+ /**
53
+ * Deflate a value using compression method `method`
54
+ */
55
+ async function deflate(method, value) {
56
+ const compression = exports.PARQUET_COMPRESSION_METHODS[method];
57
+ if (!compression) {
58
+ throw new Error(`parquet: invalid compression method: ${method}`);
59
+ }
60
+ const inputArrayBuffer = (0, buffer_utils_1.toArrayBuffer)(value);
61
+ const compressedArrayBuffer = await compression.compress(inputArrayBuffer);
62
+ return (0, buffer_utils_1.toBuffer)(compressedArrayBuffer);
63
+ }
64
+ exports.deflate = deflate;
65
+ /**
66
+ * Inflate a value using compression method `method`
67
+ */
68
+ async function decompress(method, value, size) {
69
+ const compression = exports.PARQUET_COMPRESSION_METHODS[method];
70
+ if (!compression) {
71
+ throw new Error(`parquet: invalid compression method: ${method}`);
72
+ }
73
+ const inputArrayBuffer = (0, buffer_utils_1.toArrayBuffer)(value);
74
+ const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);
75
+ return (0, buffer_utils_1.toBuffer)(compressedArrayBuffer);
76
+ }
77
+ exports.decompress = decompress;
78
+ /*
79
+ * Inflate a value using compression method `method`
80
+ */
81
+ function inflate(method, value, size) {
82
+ if (!(method in exports.PARQUET_COMPRESSION_METHODS)) {
83
+ throw new Error(`invalid compression method: ${method}`);
84
+ }
85
+ // @ts-ignore
86
+ return exports.PARQUET_COMPRESSION_METHODS[method].inflate(value, size);
87
+ }
88
+ exports.inflate = inflate;
89
+ /*
90
+ function deflate_identity(value: Buffer): Buffer {
91
+ return value;
92
+ }
93
+
94
+ function deflate_gzip(value: Buffer): Buffer {
95
+ return zlib.gzipSync(value);
96
+ }
97
+
98
+ function deflate_snappy(value: Buffer): Buffer {
99
+ return snappyjs.compress(value);
100
+ }
101
+
102
+ function deflate_lzo(value: Buffer): Buffer {
103
+ lzo = lzo || Util.load('lzo');
104
+ return lzo.compress(value);
105
+ }
106
+
107
+ function deflate_brotli(value: Buffer): Buffer {
108
+ brotli = brotli || Util.load('brotli');
109
+ const result = brotli.compress(value, {
110
+ mode: 0,
111
+ quality: 8,
112
+ lgwin: 22
113
+ });
114
+ return result ? Buffer.from(result) : Buffer.alloc(0);
115
+ }
116
+
117
+ function deflate_lz4(value: Buffer): Buffer {
118
+ lz4js = lz4js || Util.load('lz4js');
119
+ try {
120
+ // let result = Buffer.alloc(lz4js.encodeBound(value.length));
121
+ // const compressedSize = lz4.encodeBlock(value, result);
122
+ // // remove unnecessary bytes
123
+ // result = result.slice(0, compressedSize);
124
+ // return result;
125
+ return Buffer.from(lz4js.compress(value));
126
+ } catch (err) {
127
+ throw err;
128
+ }
129
+ }
130
+ function inflate_identity(value: Buffer): Buffer {
131
+ return value;
132
+ }
133
+
134
+ function inflate_gzip(value: Buffer): Buffer {
135
+ return zlib.gunzipSync(value);
136
+ }
137
+
138
+ function inflate_snappy(value: Buffer): Buffer {
139
+ return snappyjs.uncompress(value);
140
+ }
141
+
142
+ function inflate_lzo(value: Buffer, size: number): Buffer {
143
+ lzo = lzo || Util.load('lzo');
144
+ return lzo.decompress(value, size);
145
+ }
146
+
147
+ function inflate_lz4(value: Buffer, size: number): Buffer {
148
+ lz4js = lz4js || Util.load('lz4js');
149
+ try {
150
+ // let result = Buffer.alloc(size);
151
+ // const uncompressedSize = lz4js.decodeBlock(value, result);
152
+ // // remove unnecessary bytes
153
+ // result = result.slice(0, uncompressedSize);
154
+ // return result;
155
+ return Buffer.from(lz4js.decompress(value, size));
156
+ } catch (err) {
157
+ throw err;
158
+ }
159
+ }
160
+
161
+ function inflate_brotli(value: Buffer): Buffer {
162
+ brotli = brotli || Util.load('brotli');
163
+ if (!value.length) {
164
+ return Buffer.alloc(0);
165
+ }
166
+ return Buffer.from(brotli.decompress(value));
167
+ }
168
+ */
@@ -120,3 +120,4 @@ export declare class ParquetTransformer<T> extends Transform {
120
120
  _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void>;
121
121
  _flush(callback: (val?: any) => void): Promise<void>;
122
122
  }
123
+ //# sourceMappingURL=writer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/encoder/writer.ts"],"names":[],"mappings":";AAEA,OAAO,EAAC,SAAS,EAAE,QAAQ,EAAC,MAAM,QAAQ,CAAC;AAG3C,OAAO,EACL,aAAa,EAKd,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAaL,QAAQ,EAGT,MAAM,mBAAmB,CAAC;AA2B3B,MAAM,WAAW,oBAAoB;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,OAAO,CAAC;IAGxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AAEH,qBAAa,aAAa,CAAC,CAAC;IAC1B;;;OAGG;WACU,QAAQ,CAAC,CAAC,EACrB,MAAM,EAAE,aAAa,EACrB,IAAI,EAAE,MAAM,EACZ,IAAI,CAAC,EAAE,oBAAoB,GAC1B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAK5B;;;OAGG;WACU,UAAU,CAAC,CAAC,EACvB,MAAM,EAAE,aAAa,EACrB,YAAY,EAAE,QAAQ,EACtB,IAAI,CAAC,EAAE,oBAAoB,GAC1B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAWrB,MAAM,EAAE,aAAa,CAAC;IACtB,cAAc,EAAE,qBAAqB,CAAC;IACtC,SAAS,EAAE,aAAa,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE5C;;OAEG;gBAED,MAAM,EAAE,aAAa,EACrB,cAAc,EAAE,qBAAqB,EACrC,IAAI,EAAE,oBAAoB;IActB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAUlC;;;OAGG;IACG,SAAS,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAWzC;;;;;OAKG;IACG,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBjD;;OAEG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAK7C;;;;;OAKG;IACH,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIlC;;;OAGG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG/B;AAED;;;;;GAKG;AACH,qBAAa,qBAAqB;IAChC;;OAEG;WACU,UAAU,CACrB,MAAM,EAAE,aAAa,EACrB,YAAY,EAAE,QAAQ,EACtB,IAAI,EAAE,oBAAoB,GACzB,OAAO,CAAC,qBAAqB,CAAC;IAM1B,MAAM,EAAE,aAAa,CAAC;IACtB,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtC,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,OAAO,CAAC;gBAG5B,MAAM,EAAE,aAAa,EACrB,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,EACvC,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,EAC5B,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,oBAAoB;IAY5B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKxC;;OAEG;IACH,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;;OAGG;IACG,aAAa,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1D;;OAEG;IACH,WAAW,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAWhE;;;OAGG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG/B;AAED;;GAEG;AACH,qBAAa,kBAAkB,CAAC,CAAC,CAAE,SAAQ,SAAS;IAC3C,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;gBAEpB,MAAM,EAAE,aAAa,EAAE,IAAI,GAAE,oBAAyB;IAiBlE,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAS9E,MAAM,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,KAAK,IAAI;CAG3C"}
@@ -0,0 +1,478 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
5
+ }) : (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ o[k2] = m[k];
8
+ }));
9
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
10
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
11
+ }) : function(o, v) {
12
+ o["default"] = v;
13
+ });
14
+ var __importStar = (this && this.__importStar) || function (mod) {
15
+ if (mod && mod.__esModule) return mod;
16
+ var result = {};
17
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
18
+ __setModuleDefault(result, mod);
19
+ return result;
20
+ };
21
+ var __importDefault = (this && this.__importDefault) || function (mod) {
22
+ return (mod && mod.__esModule) ? mod : { "default": mod };
23
+ };
24
+ Object.defineProperty(exports, "__esModule", { value: true });
25
+ exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
26
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
27
+ /* eslint-disable camelcase */
28
+ const stream_1 = require("stream");
29
+ const codecs_1 = require("../codecs");
30
+ const Compression = __importStar(require("../compression"));
31
+ const Shred = __importStar(require("../schema/shred"));
32
+ const parquet_thrift_1 = require("../parquet-thrift");
33
+ const file_utils_1 = require("../utils/file-utils");
34
+ const read_utils_1 = require("../utils/read-utils");
35
+ const node_int64_1 = __importDefault(require("node-int64"));
36
+ /**
37
+ * Parquet File Magic String
38
+ */
39
+ const PARQUET_MAGIC = 'PAR1';
40
+ /**
41
+ * Parquet File Format Version
42
+ */
43
+ const PARQUET_VERSION = 1;
44
+ /**
45
+ * Default Page and Row Group sizes
46
+ */
47
+ const PARQUET_DEFAULT_PAGE_SIZE = 8192;
48
+ const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
49
+ /**
50
+ * Repetition and Definition Level Encoding
51
+ */
52
+ const PARQUET_RDLVL_TYPE = 'INT32';
53
+ const PARQUET_RDLVL_ENCODING = 'RLE';
54
+ /**
55
+ * Write a parquet file to an output stream. The ParquetWriter will perform
56
+ * buffering/batching for performance, so close() must be called after all rows
57
+ * are written.
58
+ */
59
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
60
+ class ParquetWriter {
61
+ /**
62
+ * Create a new buffered parquet writer for a given envelope writer
63
+ */
64
+ constructor(schema, envelopeWriter, opts) {
65
+ this.schema = schema;
66
+ this.envelopeWriter = envelopeWriter;
67
+ // @ts-ignore Row buffer typings...
68
+ this.rowBuffer = {};
69
+ this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;
70
+ this.closed = false;
71
+ this.userMetadata = {};
72
+ // eslint-disable-next-line @typescript-eslint/no-floating-promises
73
+ this.writeHeader();
74
+ }
75
+ /**
76
+ * Convenience method to create a new buffered parquet writer that writes to
77
+ * the specified file
78
+ */
79
+ static async openFile(schema, path, opts) {
80
+ const outputStream = await (0, file_utils_1.osopen)(path, opts);
81
+ return ParquetWriter.openStream(schema, outputStream, opts);
82
+ }
83
+ /**
84
+ * Convenience method to create a new buffered parquet writer that writes to
85
+ * the specified stream
86
+ */
87
+ static async openStream(schema, outputStream, opts) {
88
+ if (!opts) {
89
+ // tslint:disable-next-line:no-parameter-reassignment
90
+ opts = {};
91
+ }
92
+ const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
93
+ return new ParquetWriter(schema, envelopeWriter, opts);
94
+ }
95
+ async writeHeader() {
96
+ // TODO - better not mess with promises in the constructor
97
+ try {
98
+ await this.envelopeWriter.writeHeader();
99
+ }
100
+ catch (err) {
101
+ await this.envelopeWriter.close();
102
+ throw err;
103
+ }
104
+ }
105
+ /**
106
+ * Append a single row to the parquet file. Rows are buffered in memory until
107
+ * rowGroupSize rows are in the buffer or close() is called
108
+ */
109
+ async appendRow(row) {
110
+ if (this.closed) {
111
+ throw new Error('writer was closed');
112
+ }
113
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
114
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
115
+ // @ts-ignore
116
+ this.rowBuffer = {};
117
+ }
118
+ }
119
+ /**
120
+ * Finish writing the parquet file and commit the footer to disk. This method
121
+ * MUST be called after you are finished adding rows. You must not call this
122
+ * method twice on the same object or add any rows after the close() method has
123
+ * been called
124
+ */
125
+ async close(callback) {
126
+ if (this.closed) {
127
+ throw new Error('writer was closed');
128
+ }
129
+ this.closed = true;
130
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
131
+ // @ts-ignore
132
+ this.rowBuffer = {};
133
+ }
134
+ await this.envelopeWriter.writeFooter(this.userMetadata);
135
+ await this.envelopeWriter.close();
136
+ // this.envelopeWriter = null;
137
+ if (callback) {
138
+ callback();
139
+ }
140
+ }
141
+ /**
142
+ * Add key<>value metadata to the file
143
+ */
144
+ setMetadata(key, value) {
145
+ // TODO: value to be any, obj -> JSON
146
+ this.userMetadata[String(key)] = String(value);
147
+ }
148
+ /**
149
+ * Set the parquet row group size. This values controls the maximum number
150
+ * of rows that are buffered in memory at any given time as well as the number
151
+ * of rows that are co-located on disk. A higher value is generally better for
152
+ * read-time I/O performance at the tradeoff of write-time memory usage.
153
+ */
154
+ setRowGroupSize(cnt) {
155
+ this.rowGroupSize = cnt;
156
+ }
157
+ /**
158
+ * Set the parquet data page size. The data page size controls the maximum
159
+ * number of column values that are written to disk as a consecutive array
160
+ */
161
+ setPageSize(cnt) {
162
+ this.envelopeWriter.setPageSize(cnt);
163
+ }
164
+ }
165
+ exports.ParquetWriter = ParquetWriter;
166
+ /**
167
+ * Create a parquet file from a schema and a number of row groups. This class
168
+ * performs direct, unbuffered writes to the underlying output stream and is
169
+ * intendend for advanced and internal users; the writeXXX methods must be
170
+ * called in the correct order to produce a valid file.
171
+ */
172
+ class ParquetEnvelopeWriter {
173
+ constructor(schema, writeFn, closeFn, fileOffset, opts) {
174
+ this.schema = schema;
175
+ this.write = writeFn;
176
+ this.close = closeFn;
177
+ this.offset = fileOffset;
178
+ this.rowCount = 0;
179
+ this.rowGroups = [];
180
+ this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
181
+ this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
182
+ }
183
+ /**
184
+ * Create a new parquet envelope writer that writes to the specified stream
185
+ */
186
+ static async openStream(schema, outputStream, opts) {
187
+ const writeFn = file_utils_1.oswrite.bind(undefined, outputStream);
188
+ const closeFn = file_utils_1.osclose.bind(undefined, outputStream);
189
+ return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
190
+ }
191
+ writeSection(buf) {
192
+ this.offset += buf.length;
193
+ return this.write(buf);
194
+ }
195
+ /**
196
+ * Encode the parquet file header
197
+ */
198
+ writeHeader() {
199
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
200
+ }
201
+ /**
202
+ * Encode a parquet row group. The records object should be created using the
203
+ * shredRecord method
204
+ */
205
+ async writeRowGroup(records) {
206
+ const rgroup = await encodeRowGroup(this.schema, records, {
207
+ baseOffset: this.offset,
208
+ pageSize: this.pageSize,
209
+ useDataPageV2: this.useDataPageV2
210
+ });
211
+ this.rowCount += records.rowCount;
212
+ this.rowGroups.push(rgroup.metadata);
213
+ return await this.writeSection(rgroup.body);
214
+ }
215
+ /**
216
+ * Write the parquet file footer
217
+ */
218
+ writeFooter(userMetadata) {
219
+ if (!userMetadata) {
220
+ // tslint:disable-next-line:no-parameter-reassignment
221
+ userMetadata = {};
222
+ }
223
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
224
+ }
225
+ /**
226
+ * Set the parquet data page size. The data page size controls the maximum
227
+ * number of column values that are written to disk as a consecutive array
228
+ */
229
+ setPageSize(cnt) {
230
+ this.pageSize = cnt;
231
+ }
232
+ }
233
+ exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
234
+ /**
235
+ * Create a parquet transform stream
236
+ */
237
+ class ParquetTransformer extends stream_1.Transform {
238
+ constructor(schema, opts = {}) {
239
+ super({ objectMode: true });
240
+ const writeProxy = (function (t) {
241
+ return async function (b) {
242
+ t.push(b);
243
+ };
244
+ })(this);
245
+ this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => { }, 0, opts), opts);
246
+ }
247
+ // tslint:disable-next-line:function-name
248
+ _transform(row, encoding, callback) {
249
+ if (row) {
250
+ return this.writer.appendRow(row).then(callback);
251
+ }
252
+ callback();
253
+ return Promise.resolve();
254
+ }
255
+ // tslint:disable-next-line:function-name
256
+ async _flush(callback) {
257
+ await this.writer.close(callback);
258
+ }
259
+ }
260
+ exports.ParquetTransformer = ParquetTransformer;
261
+ /**
262
+ * Encode a consecutive array of data using one of the parquet encodings
263
+ */
264
+ function encodeValues(type, encoding, values, opts) {
265
+ if (!(encoding in codecs_1.PARQUET_CODECS)) {
266
+ throw new Error(`invalid encoding: ${encoding}`);
267
+ }
268
+ return codecs_1.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
269
+ }
270
+ /**
271
+ * Encode a parquet data page
272
+ */
273
+ async function encodeDataPage(column, data) {
274
+ /* encode repetition and definition levels */
275
+ let rLevelsBuf = Buffer.alloc(0);
276
+ if (column.rLevelMax > 0) {
277
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
278
+ bitWidth: (0, read_utils_1.getBitWidth)(column.rLevelMax)
279
+ // disableEnvelope: false
280
+ });
281
+ }
282
+ let dLevelsBuf = Buffer.alloc(0);
283
+ if (column.dLevelMax > 0) {
284
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
285
+ bitWidth: (0, read_utils_1.getBitWidth)(column.dLevelMax)
286
+ // disableEnvelope: false
287
+ });
288
+ }
289
+ /* encode values */
290
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
291
+ typeLength: column.typeLength,
292
+ bitWidth: column.typeLength
293
+ });
294
+ const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
295
+ // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
296
+ const compressedBuf = await Compression.deflate(column.compression, dataBuf);
297
+ /* build page header */
298
+ const header = new parquet_thrift_1.PageHeader({
299
+ type: parquet_thrift_1.PageType.DATA_PAGE,
300
+ data_page_header: new parquet_thrift_1.DataPageHeader({
301
+ num_values: data.count,
302
+ encoding: parquet_thrift_1.Encoding[column.encoding],
303
+ definition_level_encoding: parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING],
304
+ repetition_level_encoding: parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]
305
+ }),
306
+ uncompressed_page_size: dataBuf.length,
307
+ compressed_page_size: compressedBuf.length
308
+ });
309
+ /* concat page header, repetition and definition levels and values */
310
+ const headerBuf = (0, read_utils_1.serializeThrift)(header);
311
+ const page = Buffer.concat([headerBuf, compressedBuf]);
312
+ return { header, headerSize: headerBuf.length, page };
313
+ }
314
+ /**
315
+ * Encode a parquet data page (v2)
316
+ */
317
+ async function encodeDataPageV2(column, data, rowCount) {
318
+ /* encode values */
319
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
320
+ typeLength: column.typeLength,
321
+ bitWidth: column.typeLength
322
+ });
323
+ // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
324
+ const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
325
+ /* encode repetition and definition levels */
326
+ let rLevelsBuf = Buffer.alloc(0);
327
+ if (column.rLevelMax > 0) {
328
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
329
+ bitWidth: (0, read_utils_1.getBitWidth)(column.rLevelMax),
330
+ disableEnvelope: true
331
+ });
332
+ }
333
+ let dLevelsBuf = Buffer.alloc(0);
334
+ if (column.dLevelMax > 0) {
335
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
336
+ bitWidth: (0, read_utils_1.getBitWidth)(column.dLevelMax),
337
+ disableEnvelope: true
338
+ });
339
+ }
340
+ /* build page header */
341
+ const header = new parquet_thrift_1.PageHeader({
342
+ type: parquet_thrift_1.PageType.DATA_PAGE_V2,
343
+ data_page_header_v2: new parquet_thrift_1.DataPageHeaderV2({
344
+ num_values: data.count,
345
+ num_nulls: data.count - data.values.length,
346
+ num_rows: rowCount,
347
+ encoding: parquet_thrift_1.Encoding[column.encoding],
348
+ definition_levels_byte_length: dLevelsBuf.length,
349
+ repetition_levels_byte_length: rLevelsBuf.length,
350
+ is_compressed: column.compression !== 'UNCOMPRESSED'
351
+ }),
352
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
353
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
354
+ });
355
+ /* concat page header, repetition and definition levels and values */
356
+ const headerBuf = (0, read_utils_1.serializeThrift)(header);
357
+ const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
358
+ return { header, headerSize: headerBuf.length, page };
359
+ }
360
+ /**
361
+ * Encode an array of values into a parquet column chunk
362
+ */
363
+ async function encodeColumnChunk(column, buffer, offset, opts) {
364
+ const data = buffer.columnData[column.path.join()];
365
+ const baseOffset = (opts.baseOffset || 0) + offset;
366
+ /* encode data page(s) */
367
+ // const pages: Buffer[] = [];
368
+ let pageBuf;
369
+ // tslint:disable-next-line:variable-name
370
+ let total_uncompressed_size = 0;
371
+ // tslint:disable-next-line:variable-name
372
+ let total_compressed_size = 0;
373
+ {
374
+ const result = opts.useDataPageV2
375
+ ? await encodeDataPageV2(column, data, buffer.rowCount)
376
+ : await encodeDataPage(column, data);
377
+ // pages.push(result.page);
378
+ pageBuf = result.page;
379
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
380
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
381
+ }
382
+ // const pagesBuf = Buffer.concat(pages);
383
+ // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;
384
+ /* prepare metadata header */
385
+ const metadata = new parquet_thrift_1.ColumnMetaData({
386
+ path_in_schema: column.path,
387
+ num_values: data.count,
388
+ data_page_offset: baseOffset,
389
+ encodings: [],
390
+ total_uncompressed_size,
391
+ total_compressed_size,
392
+ type: parquet_thrift_1.Type[column.primitiveType],
393
+ codec: parquet_thrift_1.CompressionCodec[column.compression]
394
+ });
395
+ /* list encodings */
396
+ metadata.encodings.push(parquet_thrift_1.Encoding[PARQUET_RDLVL_ENCODING]);
397
+ metadata.encodings.push(parquet_thrift_1.Encoding[column.encoding]);
398
+ /* concat metadata header and data pages */
399
+ const metadataOffset = baseOffset + pageBuf.length;
400
+ const body = Buffer.concat([pageBuf, (0, read_utils_1.serializeThrift)(metadata)]);
401
+ return { body, metadata, metadataOffset };
402
+ }
403
+ /**
404
+ * Encode a list of column values into a parquet row group
405
+ */
406
+ async function encodeRowGroup(schema, data, opts) {
407
+ const metadata = new parquet_thrift_1.RowGroup({
408
+ num_rows: data.rowCount,
409
+ columns: [],
410
+ total_byte_size: 0
411
+ });
412
+ let body = Buffer.alloc(0);
413
+ for (const field of schema.fieldList) {
414
+ if (field.isNested) {
415
+ continue; // eslint-disable-line no-continue
416
+ }
417
+ const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
418
+ const cchunk = new parquet_thrift_1.ColumnChunk({
419
+ file_offset: cchunkData.metadataOffset,
420
+ meta_data: cchunkData.metadata
421
+ });
422
+ metadata.columns.push(cchunk);
423
+ metadata.total_byte_size = new node_int64_1.default(Number(metadata.total_byte_size) + cchunkData.body.length);
424
+ body = Buffer.concat([body, cchunkData.body]);
425
+ }
426
+ return { body, metadata };
427
+ }
428
+ /**
429
+ * Encode a parquet file metadata footer
430
+ */
431
+ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
432
+ const metadata = new parquet_thrift_1.FileMetaData({
433
+ version: PARQUET_VERSION,
434
+ created_by: 'parquets',
435
+ num_rows: rowCount,
436
+ row_groups: rowGroups,
437
+ schema: [],
438
+ key_value_metadata: []
439
+ });
440
+ for (const key in userMetadata) {
441
+ const kv = new parquet_thrift_1.KeyValue({
442
+ key,
443
+ value: userMetadata[key]
444
+ });
445
+ metadata.key_value_metadata?.push?.(kv);
446
+ }
447
+ {
448
+ const schemaRoot = new parquet_thrift_1.SchemaElement({
449
+ name: 'root',
450
+ num_children: Object.keys(schema.fields).length
451
+ });
452
+ metadata.schema.push(schemaRoot);
453
+ }
454
+ for (const field of schema.fieldList) {
455
+ const relt = parquet_thrift_1.FieldRepetitionType[field.repetitionType];
456
+ const schemaElem = new parquet_thrift_1.SchemaElement({
457
+ name: field.name,
458
+ repetition_type: relt
459
+ });
460
+ if (field.isNested) {
461
+ schemaElem.num_children = field.fieldCount;
462
+ }
463
+ else {
464
+ schemaElem.type = parquet_thrift_1.Type[field.primitiveType];
465
+ }
466
+ if (field.originalType) {
467
+ schemaElem.converted_type = parquet_thrift_1.ConvertedType[field.originalType];
468
+ }
469
+ schemaElem.type_length = field.typeLength;
470
+ metadata.schema.push(schemaElem);
471
+ }
472
+ const metadataEncoded = (0, read_utils_1.serializeThrift)(metadata);
473
+ const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
474
+ metadataEncoded.copy(footerEncoded);
475
+ footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
476
+ footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
477
+ return footerEncoded;
478
+ }
@@ -7,3 +7,4 @@ export declare function fclose(fd: any): Promise<unknown>;
7
7
  export declare function oswrite(os: any, buf: any): Promise<void>;
8
8
  export declare function osclose(os: any): Promise<void>;
9
9
  export declare function osopen(path: any, opts: any): Promise<unknown>;
10
+ //# sourceMappingURL=file.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file.d.ts","sourceRoot":"","sources":["../../src/parquetjs/file.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,wBAAgB,KAAK,CAAC,QAAQ,KAAA,oBAU7B;AAED,wBAAgB,KAAK,CAAC,QAAQ,KAAA,qBAU7B;AAED,wBAAgB,KAAK,CAAC,EAAE,KAAA,EAAE,QAAQ,KAAA,EAAE,MAAM,KAAA,oBAYzC;AAED,wBAAgB,MAAM,CAAC,EAAE,KAAA,oBAUxB;AAED,wBAAgB,OAAO,CAAC,EAAE,KAAA,EAAE,GAAG,KAAA,GAAG,OAAO,CAAC,IAAI,CAAC,CAU9C;AAED,wBAAgB,OAAO,CAAC,EAAE,KAAA,GAAG,OAAO,CAAC,IAAI,CAAC,CAUzC;AAED,wBAAgB,MAAM,CAAC,IAAI,KAAA,EAAE,IAAI,KAAA,oBAYhC"}