@loaders.gl/parquet 4.2.0-alpha.4 → 4.2.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/dist/index.cjs +385 -598
  2. package/dist/index.cjs.map +7 -0
  3. package/dist/index.d.ts +12 -12
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +6 -1
  6. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -2
  7. package/dist/lib/arrow/convert-row-group-to-columns.js +8 -6
  8. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +3 -3
  9. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
  10. package/dist/lib/arrow/convert-schema-from-parquet.js +71 -82
  11. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +1 -1
  12. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
  13. package/dist/lib/arrow/convert-schema-to-parquet.js +65 -35
  14. package/dist/lib/constants.js +15 -2
  15. package/dist/lib/parsers/get-parquet-schema.d.ts +1 -1
  16. package/dist/lib/parsers/get-parquet-schema.d.ts.map +1 -1
  17. package/dist/lib/parsers/get-parquet-schema.js +9 -7
  18. package/dist/lib/parsers/parse-geoparquet.d.ts +1 -1
  19. package/dist/lib/parsers/parse-geoparquet.d.ts.map +1 -1
  20. package/dist/lib/parsers/parse-geoparquet.js +47 -45
  21. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +1 -1
  22. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  23. package/dist/lib/parsers/parse-parquet-to-columns.js +36 -25
  24. package/dist/lib/parsers/parse-parquet.d.ts +1 -1
  25. package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
  26. package/dist/lib/parsers/parse-parquet.js +62 -50
  27. package/dist/lib/wasm/encode-parquet-wasm.d.ts +1 -1
  28. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -1
  29. package/dist/lib/wasm/encode-parquet-wasm.js +49 -12
  30. package/dist/lib/wasm/load-wasm.js +13 -10
  31. package/dist/lib/wasm/parse-parquet-wasm.d.ts +1 -1
  32. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
  33. package/dist/lib/wasm/parse-parquet-wasm.js +19 -15
  34. package/dist/parquet-loader.d.ts +1 -1
  35. package/dist/parquet-loader.d.ts.map +1 -1
  36. package/dist/parquet-loader.js +78 -65
  37. package/dist/parquet-wasm-loader.js +25 -28
  38. package/dist/parquet-wasm-writer.js +19 -23
  39. package/dist/parquet-writer.js +17 -13
  40. package/dist/parquetjs/codecs/declare.d.ts +1 -1
  41. package/dist/parquetjs/codecs/declare.d.ts.map +1 -1
  42. package/dist/parquetjs/codecs/declare.js +0 -1
  43. package/dist/parquetjs/codecs/dictionary.js +4 -8
  44. package/dist/parquetjs/codecs/index.d.ts +3 -3
  45. package/dist/parquetjs/codecs/index.d.ts.map +1 -1
  46. package/dist/parquetjs/codecs/index.js +20 -17
  47. package/dist/parquetjs/codecs/plain.d.ts +2 -2
  48. package/dist/parquetjs/codecs/plain.d.ts.map +1 -1
  49. package/dist/parquetjs/codecs/plain.js +166 -162
  50. package/dist/parquetjs/codecs/rle.d.ts +2 -2
  51. package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
  52. package/dist/parquetjs/codecs/rle.js +124 -105
  53. package/dist/parquetjs/compression.d.ts +1 -1
  54. package/dist/parquetjs/compression.d.ts.map +1 -1
  55. package/dist/parquetjs/compression.js +157 -43
  56. package/dist/parquetjs/encoder/parquet-encoder.d.ts +3 -3
  57. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
  58. package/dist/parquetjs/encoder/parquet-encoder.js +420 -275
  59. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +12 -7
  60. package/dist/parquetjs/parquet-thrift/BsonType.js +31 -27
  61. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts +1 -1
  62. package/dist/parquetjs/parquet-thrift/ColumnChunk.d.ts.map +1 -1
  63. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +182 -166
  64. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts +1 -1
  65. package/dist/parquetjs/parquet-thrift/ColumnIndex.d.ts.map +1 -1
  66. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +183 -166
  67. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts +6 -6
  68. package/dist/parquetjs/parquet-thrift/ColumnMetaData.d.ts.map +1 -1
  69. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +372 -333
  70. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts +1 -1
  71. package/dist/parquetjs/parquet-thrift/ColumnOrder.d.ts.map +1 -1
  72. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +74 -68
  73. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +17 -12
  74. package/dist/parquetjs/parquet-thrift/ConvertedType.js +31 -26
  75. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts +2 -2
  76. package/dist/parquetjs/parquet-thrift/DataPageHeader.d.ts.map +1 -1
  77. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +141 -124
  78. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts +2 -2
  79. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.d.ts.map +1 -1
  80. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +205 -181
  81. package/dist/parquetjs/parquet-thrift/DateType.js +31 -27
  82. package/dist/parquetjs/parquet-thrift/DecimalType.js +78 -69
  83. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts +1 -1
  84. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.d.ts.map +1 -1
  85. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +96 -86
  86. package/dist/parquetjs/parquet-thrift/Encoding.js +17 -12
  87. package/dist/parquetjs/parquet-thrift/EnumType.js +31 -27
  88. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +12 -7
  89. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts +4 -4
  90. package/dist/parquetjs/parquet-thrift/FileMetaData.d.ts.map +1 -1
  91. package/dist/parquetjs/parquet-thrift/FileMetaData.js +224 -205
  92. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +31 -27
  93. package/dist/parquetjs/parquet-thrift/IntType.js +78 -69
  94. package/dist/parquetjs/parquet-thrift/JsonType.js +31 -27
  95. package/dist/parquetjs/parquet-thrift/KeyValue.js +75 -67
  96. package/dist/parquetjs/parquet-thrift/ListType.js +31 -27
  97. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts +13 -13
  98. package/dist/parquetjs/parquet-thrift/LogicalType.d.ts.map +1 -1
  99. package/dist/parquetjs/parquet-thrift/LogicalType.js +338 -344
  100. package/dist/parquetjs/parquet-thrift/MapType.js +31 -27
  101. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +31 -27
  102. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +31 -27
  103. package/dist/parquetjs/parquet-thrift/NullType.js +31 -27
  104. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts +1 -1
  105. package/dist/parquetjs/parquet-thrift/OffsetIndex.d.ts.map +1 -1
  106. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +67 -60
  107. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts +2 -2
  108. package/dist/parquetjs/parquet-thrift/PageEncodingStats.d.ts.map +1 -1
  109. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +101 -88
  110. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts +5 -5
  111. package/dist/parquetjs/parquet-thrift/PageHeader.d.ts.map +1 -1
  112. package/dist/parquetjs/parquet-thrift/PageHeader.js +193 -175
  113. package/dist/parquetjs/parquet-thrift/PageLocation.js +111 -96
  114. package/dist/parquetjs/parquet-thrift/PageType.js +13 -8
  115. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts +2 -2
  116. package/dist/parquetjs/parquet-thrift/RowGroup.d.ts.map +1 -1
  117. package/dist/parquetjs/parquet-thrift/RowGroup.js +149 -133
  118. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts +4 -4
  119. package/dist/parquetjs/parquet-thrift/SchemaElement.d.ts.map +1 -1
  120. package/dist/parquetjs/parquet-thrift/SchemaElement.js +221 -205
  121. package/dist/parquetjs/parquet-thrift/SortingColumn.js +101 -88
  122. package/dist/parquetjs/parquet-thrift/Statistics.js +149 -137
  123. package/dist/parquetjs/parquet-thrift/StringType.js +31 -27
  124. package/dist/parquetjs/parquet-thrift/TimeType.d.ts +1 -1
  125. package/dist/parquetjs/parquet-thrift/TimeType.d.ts.map +1 -1
  126. package/dist/parquetjs/parquet-thrift/TimeType.js +78 -69
  127. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts +2 -2
  128. package/dist/parquetjs/parquet-thrift/TimeUnit.d.ts.map +1 -1
  129. package/dist/parquetjs/parquet-thrift/TimeUnit.js +96 -91
  130. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts +1 -1
  131. package/dist/parquetjs/parquet-thrift/TimestampType.d.ts.map +1 -1
  132. package/dist/parquetjs/parquet-thrift/TimestampType.js +78 -69
  133. package/dist/parquetjs/parquet-thrift/Type.js +17 -12
  134. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +31 -27
  135. package/dist/parquetjs/parquet-thrift/UUIDType.js +31 -27
  136. package/dist/parquetjs/parquet-thrift/index.d.ts +44 -44
  137. package/dist/parquetjs/parquet-thrift/index.d.ts.map +1 -1
  138. package/dist/parquetjs/parquet-thrift/index.js +6 -1
  139. package/dist/parquetjs/parser/decoders.d.ts +3 -3
  140. package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
  141. package/dist/parquetjs/parser/decoders.js +306 -242
  142. package/dist/parquetjs/parser/parquet-reader.d.ts +3 -3
  143. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  144. package/dist/parquetjs/parser/parquet-reader.js +183 -158
  145. package/dist/parquetjs/schema/declare.d.ts +1 -1
  146. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  147. package/dist/parquetjs/schema/declare.js +10 -9
  148. package/dist/parquetjs/schema/schema.d.ts +1 -1
  149. package/dist/parquetjs/schema/schema.d.ts.map +1 -1
  150. package/dist/parquetjs/schema/schema.js +142 -120
  151. package/dist/parquetjs/schema/shred.d.ts +2 -2
  152. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  153. package/dist/parquetjs/schema/shred.js +301 -204
  154. package/dist/parquetjs/schema/types.d.ts +1 -1
  155. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  156. package/dist/parquetjs/schema/types.js +329 -314
  157. package/dist/parquetjs/utils/file-utils.js +24 -23
  158. package/dist/parquetjs/utils/read-utils.d.ts +1 -1
  159. package/dist/parquetjs/utils/read-utils.d.ts.map +1 -1
  160. package/dist/parquetjs/utils/read-utils.js +76 -69
  161. package/dist/polyfills/buffer/buffer-polyfill.browser.js +8 -3
  162. package/dist/polyfills/buffer/buffer-polyfill.node.js +13 -8
  163. package/dist/polyfills/buffer/buffer.js +1747 -1367
  164. package/dist/polyfills/buffer/index.d.ts +3 -3
  165. package/dist/polyfills/buffer/index.d.ts.map +1 -1
  166. package/dist/polyfills/buffer/index.js +5 -1
  167. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
  168. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
  169. package/dist/polyfills/buffer/install-buffer-polyfill.js +2 -1
  170. package/dist/polyfills/util.js +4 -1
  171. package/dist/workers/parquet-worker.js +3 -1
  172. package/package.json +21 -19
  173. package/src/index.ts +1 -1
  174. package/src/lib/parsers/parse-parquet-to-columns.ts +1 -1
  175. package/src/lib/parsers/parse-parquet.ts +1 -1
  176. package/src/parquetjs/encoder/parquet-encoder.ts +2 -2
  177. package/src/parquetjs/parser/decoders.ts +2 -2
  178. package/src/parquetjs/parser/parquet-reader.ts +3 -2
  179. package/src/parquetjs/schema/schema.ts +1 -1
  180. package/src/parquetjs/schema/types.ts +1 -0
  181. package/src/parquetjs/utils/read-utils.ts +2 -2
  182. package/src/polyfills/buffer/buffer.ts +0 -3
  183. package/dist/index.js.map +0 -1
  184. package/dist/lib/arrow/convert-columns-to-row-group.js.map +0 -1
  185. package/dist/lib/arrow/convert-row-group-to-columns.js.map +0 -1
  186. package/dist/lib/arrow/convert-schema-from-parquet.js.map +0 -1
  187. package/dist/lib/arrow/convert-schema-to-parquet.js.map +0 -1
  188. package/dist/lib/constants.js.map +0 -1
  189. package/dist/lib/parsers/get-parquet-schema.js.map +0 -1
  190. package/dist/lib/parsers/parse-geoparquet.js.map +0 -1
  191. package/dist/lib/parsers/parse-parquet-to-columns.js.map +0 -1
  192. package/dist/lib/parsers/parse-parquet.js.map +0 -1
  193. package/dist/lib/wasm/encode-parquet-wasm.js.map +0 -1
  194. package/dist/lib/wasm/load-wasm.js.map +0 -1
  195. package/dist/lib/wasm/parse-parquet-wasm.js.map +0 -1
  196. package/dist/lib/wip/convert-schema-deep.java.disabled +0 -910
  197. package/dist/lib/wip/convert-schema-deep.rs.disabled +0 -976
  198. package/dist/parquet-loader.js.map +0 -1
  199. package/dist/parquet-wasm-loader.js.map +0 -1
  200. package/dist/parquet-wasm-writer.js.map +0 -1
  201. package/dist/parquet-writer.js.map +0 -1
  202. package/dist/parquetjs/LICENSE +0 -20
  203. package/dist/parquetjs/codecs/declare.js.map +0 -1
  204. package/dist/parquetjs/codecs/dictionary.js.map +0 -1
  205. package/dist/parquetjs/codecs/index.js.map +0 -1
  206. package/dist/parquetjs/codecs/plain.js.map +0 -1
  207. package/dist/parquetjs/codecs/rle.js.map +0 -1
  208. package/dist/parquetjs/compression.js.map +0 -1
  209. package/dist/parquetjs/encoder/parquet-encoder.js.map +0 -1
  210. package/dist/parquetjs/modules.d.ts +0 -21
  211. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +0 -1
  212. package/dist/parquetjs/parquet-thrift/BsonType.js.map +0 -1
  213. package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +0 -1
  214. package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +0 -1
  215. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +0 -1
  216. package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +0 -1
  217. package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +0 -1
  218. package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +0 -1
  219. package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +0 -1
  220. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +0 -1
  221. package/dist/parquetjs/parquet-thrift/DateType.js.map +0 -1
  222. package/dist/parquetjs/parquet-thrift/DecimalType.js.map +0 -1
  223. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +0 -1
  224. package/dist/parquetjs/parquet-thrift/Encoding.js.map +0 -1
  225. package/dist/parquetjs/parquet-thrift/EnumType.js.map +0 -1
  226. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +0 -1
  227. package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +0 -1
  228. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +0 -1
  229. package/dist/parquetjs/parquet-thrift/IntType.js.map +0 -1
  230. package/dist/parquetjs/parquet-thrift/JsonType.js.map +0 -1
  231. package/dist/parquetjs/parquet-thrift/KeyValue.js.map +0 -1
  232. package/dist/parquetjs/parquet-thrift/ListType.js.map +0 -1
  233. package/dist/parquetjs/parquet-thrift/LogicalType.js.map +0 -1
  234. package/dist/parquetjs/parquet-thrift/MapType.js.map +0 -1
  235. package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +0 -1
  236. package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +0 -1
  237. package/dist/parquetjs/parquet-thrift/NullType.js.map +0 -1
  238. package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +0 -1
  239. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +0 -1
  240. package/dist/parquetjs/parquet-thrift/PageHeader.js.map +0 -1
  241. package/dist/parquetjs/parquet-thrift/PageLocation.js.map +0 -1
  242. package/dist/parquetjs/parquet-thrift/PageType.js.map +0 -1
  243. package/dist/parquetjs/parquet-thrift/RowGroup.js.map +0 -1
  244. package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +0 -1
  245. package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +0 -1
  246. package/dist/parquetjs/parquet-thrift/Statistics.js.map +0 -1
  247. package/dist/parquetjs/parquet-thrift/StringType.js.map +0 -1
  248. package/dist/parquetjs/parquet-thrift/TimeType.js.map +0 -1
  249. package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +0 -1
  250. package/dist/parquetjs/parquet-thrift/TimestampType.js.map +0 -1
  251. package/dist/parquetjs/parquet-thrift/Type.js.map +0 -1
  252. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +0 -1
  253. package/dist/parquetjs/parquet-thrift/UUIDType.js.map +0 -1
  254. package/dist/parquetjs/parquet-thrift/index.js.map +0 -1
  255. package/dist/parquetjs/parser/decoders.js.map +0 -1
  256. package/dist/parquetjs/parser/parquet-reader.js.map +0 -1
  257. package/dist/parquetjs/schema/declare.js.map +0 -1
  258. package/dist/parquetjs/schema/schema.js.map +0 -1
  259. package/dist/parquetjs/schema/shred.js.map +0 -1
  260. package/dist/parquetjs/schema/types.js.map +0 -1
  261. package/dist/parquetjs/utils/file-utils.js.map +0 -1
  262. package/dist/parquetjs/utils/read-utils.js.map +0 -1
  263. package/dist/polyfills/buffer/buffer-polyfill.browser.js.map +0 -1
  264. package/dist/polyfills/buffer/buffer-polyfill.node.js.map +0 -1
  265. package/dist/polyfills/buffer/buffer.js.map +0 -1
  266. package/dist/polyfills/buffer/index.js.map +0 -1
  267. package/dist/polyfills/buffer/install-buffer-polyfill.js.map +0 -1
  268. package/dist/polyfills/util.js.map +0 -1
  269. package/dist/workers/parquet-worker.js.map +0 -1
@@ -1,228 +1,325 @@
1
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
1
2
  import { ParquetRowGroup } from "./declare.js";
2
3
  import * as Types from "./types.js";
3
4
  export { ParquetRowGroup };
4
5
  export function shredBuffer(schema) {
5
- const columnData = {};
6
- for (const field of schema.fieldList) {
7
- columnData[field.key] = {
8
- dlevels: [],
9
- rlevels: [],
10
- values: [],
11
- pageHeaders: [],
12
- count: 0
13
- };
14
- }
15
- return {
16
- rowCount: 0,
17
- columnData
18
- };
6
+ const columnData = {};
7
+ for (const field of schema.fieldList) {
8
+ columnData[field.key] = {
9
+ dlevels: [],
10
+ rlevels: [],
11
+ values: [],
12
+ pageHeaders: [],
13
+ count: 0
14
+ };
15
+ }
16
+ return { rowCount: 0, columnData };
19
17
  }
18
+ /**
19
+ * 'Shred' a record into a list of <value, repetition_level, definition_level>
20
+ * tuples per column using the Google Dremel Algorithm..
21
+ *
22
+ * The rowGroup argument must point to an object into which the shredded record
23
+ * will be returned. You may re-use the rowGroup for repeated calls to this function
24
+ * to append to an existing rowGroup, as long as the schema is unchanged.
25
+ *
26
+ * The format in which the shredded records will be stored in the rowGroup is as
27
+ * follows:
28
+ *
29
+ * rowGroup = {
30
+ * columnData: [
31
+ * 'my_col': {
32
+ * dlevels: [d1, d2, .. dN],
33
+ * rlevels: [r1, r2, .. rN],
34
+ * values: [v1, v2, .. vN],
35
+ * }, ...
36
+ * ],
37
+ * rowCount: X,
38
+ * }
39
+ */
20
40
  export function shredRecord(schema, record, rowGroup) {
21
- const data = shredBuffer(schema).columnData;
22
- shredRecordFields(schema.fields, record, data, 0, 0);
23
- if (rowGroup.rowCount === 0) {
24
- rowGroup.rowCount = 1;
25
- rowGroup.columnData = data;
26
- return;
27
- }
28
- rowGroup.rowCount += 1;
29
- for (const field of schema.fieldList) {
30
- Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);
31
- Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);
32
- Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);
33
- rowGroup.columnData[field.key].count += data[field.key].count;
34
- }
35
- }
36
- function shredRecordFields(fields, record, data, rLevel, dLevel) {
37
- for (const name in fields) {
38
- const field = fields[name];
39
- let values = [];
40
- if (record && field.name in record && record[field.name] !== undefined && record[field.name] !== null) {
41
- if (record[field.name].constructor === Array) {
42
- values = record[field.name];
43
- } else {
44
- values.push(record[field.name]);
45
- }
46
- }
47
- if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {
48
- throw new Error(`missing required field: ${field.name}`);
41
+ /* shred the record, this may raise an exception */
42
+ const data = shredBuffer(schema).columnData;
43
+ shredRecordFields(schema.fields, record, data, 0, 0);
44
+ /* if no error during shredding, add the shredded record to the rowGroup */
45
+ if (rowGroup.rowCount === 0) {
46
+ rowGroup.rowCount = 1;
47
+ rowGroup.columnData = data;
48
+ return;
49
49
  }
50
- if (values.length > 1 && field.repetitionType !== 'REPEATED') {
51
- throw new Error(`too many values for field: ${field.name}`);
50
+ rowGroup.rowCount += 1;
51
+ for (const field of schema.fieldList) {
52
+ Array.prototype.push.apply(rowGroup.columnData[field.key].rlevels, data[field.key].rlevels);
53
+ Array.prototype.push.apply(rowGroup.columnData[field.key].dlevels, data[field.key].dlevels);
54
+ Array.prototype.push.apply(rowGroup.columnData[field.key].values, data[field.key].values);
55
+ rowGroup.columnData[field.key].count += data[field.key].count;
52
56
  }
53
- if (values.length === 0) {
54
- if (field.isNested) {
55
- shredRecordFields(field.fields, null, data, rLevel, dLevel);
56
- } else {
57
- data[field.key].count += 1;
58
- data[field.key].rlevels.push(rLevel);
59
- data[field.key].dlevels.push(dLevel);
60
- }
61
- continue;
62
- }
63
- for (let i = 0; i < values.length; i++) {
64
- const rlvl = i === 0 ? rLevel : field.rLevelMax;
65
- if (field.isNested) {
66
- shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
67
- } else {
68
- data[field.key].count += 1;
69
- data[field.key].rlevels.push(rlvl);
70
- data[field.key].dlevels.push(field.dLevelMax);
71
- data[field.key].values.push(Types.toPrimitive(field.originalType || field.primitiveType, values[i]));
72
- }
57
+ }
58
+ // eslint-disable-next-line max-statements, complexity
59
+ function shredRecordFields(fields, record, data, rLevel, dLevel) {
60
+ for (const name in fields) {
61
+ const field = fields[name];
62
+ // fetch values
63
+ let values = [];
64
+ if (record &&
65
+ field.name in record &&
66
+ record[field.name] !== undefined &&
67
+ record[field.name] !== null) {
68
+ if (record[field.name].constructor === Array) {
69
+ values = record[field.name];
70
+ }
71
+ else {
72
+ values.push(record[field.name]);
73
+ }
74
+ }
75
+ // check values
76
+ if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {
77
+ throw new Error(`missing required field: ${field.name}`);
78
+ }
79
+ if (values.length > 1 && field.repetitionType !== 'REPEATED') {
80
+ throw new Error(`too many values for field: ${field.name}`);
81
+ }
82
+ // push null
83
+ if (values.length === 0) {
84
+ if (field.isNested) {
85
+ shredRecordFields(field.fields, null, data, rLevel, dLevel);
86
+ }
87
+ else {
88
+ data[field.key].count += 1;
89
+ data[field.key].rlevels.push(rLevel);
90
+ data[field.key].dlevels.push(dLevel);
91
+ }
92
+ continue; // eslint-disable-line no-continue
93
+ }
94
+ // push values
95
+ for (let i = 0; i < values.length; i++) {
96
+ const rlvl = i === 0 ? rLevel : field.rLevelMax;
97
+ if (field.isNested) {
98
+ shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
99
+ }
100
+ else {
101
+ data[field.key].count += 1;
102
+ data[field.key].rlevels.push(rlvl);
103
+ data[field.key].dlevels.push(field.dLevelMax);
104
+ data[field.key].values.push(Types.toPrimitive((field.originalType || field.primitiveType), values[i]));
105
+ }
106
+ }
73
107
  }
74
- }
75
108
  }
109
+ /**
110
+ * 'Materialize' a list of <value, repetition_level, definition_level>
111
+ * tuples back to nested records (objects/arrays) using the Google Dremel
112
+ * Algorithm..
113
+ *
114
+ * The rowGroup argument must point to an object with the following structure (i.e.
115
+ * the same structure that is returned by shredRecords):
116
+ *
117
+ * rowGroup = {
118
+ * columnData: [
119
+ * 'my_col': {
120
+ * dlevels: [d1, d2, .. dN],
121
+ * rlevels: [r1, r2, .. rN],
122
+ * values: [v1, v2, .. vN],
123
+ * }, ...
124
+ * ],
125
+ * rowCount: X,
126
+ * }
127
+ */
76
128
  export function materializeRows(schema, rowGroup) {
77
- const rows = [];
78
- for (let i = 0; i < rowGroup.rowCount; i++) {
79
- rows.push({});
80
- }
81
- for (const key in rowGroup.columnData) {
82
- const columnData = rowGroup.columnData[key];
83
- if (columnData.count) {
84
- materializeColumnAsRows(schema, columnData, key, rows);
129
+ const rows = [];
130
+ // rows = new Array(rowGroup.rowCount).fill({})'
131
+ for (let i = 0; i < rowGroup.rowCount; i++) {
132
+ rows.push({});
85
133
  }
86
- }
87
- return rows;
134
+ for (const key in rowGroup.columnData) {
135
+ const columnData = rowGroup.columnData[key];
136
+ if (columnData.count) {
137
+ materializeColumnAsRows(schema, columnData, key, rows);
138
+ }
139
+ }
140
+ return rows;
88
141
  }
142
+ /** Populate record fields for one column */
143
+ // eslint-disable-next-line max-statements, complexity
89
144
  function materializeColumnAsRows(schema, columnData, key, rows) {
90
- const field = schema.findField(key);
91
- const branch = schema.findFieldBranch(key);
92
- const rLevels = new Array(field.rLevelMax + 1).fill(0);
93
- let vIndex = 0;
94
- for (let i = 0; i < columnData.count; i++) {
95
- const dLevel = columnData.dlevels[i];
96
- const rLevel = columnData.rlevels[i];
97
- rLevels[rLevel]++;
98
- rLevels.fill(0, rLevel + 1);
99
- let rIndex = 0;
100
- let record = rows[rLevels[rIndex++] - 1];
101
- for (const step of branch) {
102
- if (step === field || dLevel < step.dLevelMax) {
103
- break;
104
- }
105
- switch (step.repetitionType) {
106
- case 'REPEATED':
107
- if (!(step.name in record)) {
108
- record[step.name] = [];
109
- }
110
- const ix = rLevels[rIndex++];
111
- while (record[step.name].length <= ix) {
112
- record[step.name].push({});
113
- }
114
- record = record[step.name][ix];
115
- break;
116
- default:
117
- record[step.name] = record[step.name] || {};
118
- record = record[step.name];
119
- }
120
- }
121
- if (dLevel === field.dLevelMax) {
122
- const value = Types.fromPrimitive(field.originalType || field.primitiveType, columnData.values[vIndex], field);
123
- vIndex++;
124
- switch (field.repetitionType) {
125
- case 'REPEATED':
126
- if (!(field.name in record)) {
127
- record[field.name] = [];
128
- }
129
- const ix = rLevels[rIndex];
130
- while (record[field.name].length <= ix) {
131
- record[field.name].push(null);
132
- }
133
- record[field.name][ix] = value;
134
- break;
135
- default:
136
- record[field.name] = value;
137
- }
145
+ const field = schema.findField(key);
146
+ const branch = schema.findFieldBranch(key);
147
+ // tslint:disable-next-line:prefer-array-literal
148
+ const rLevels = new Array(field.rLevelMax + 1).fill(0);
149
+ let vIndex = 0;
150
+ for (let i = 0; i < columnData.count; i++) {
151
+ const dLevel = columnData.dlevels[i];
152
+ const rLevel = columnData.rlevels[i];
153
+ rLevels[rLevel]++;
154
+ rLevels.fill(0, rLevel + 1);
155
+ let rIndex = 0;
156
+ let record = rows[rLevels[rIndex++] - 1];
157
+ // Internal nodes - Build a nested row object
158
+ for (const step of branch) {
159
+ if (step === field || dLevel < step.dLevelMax) {
160
+ break;
161
+ }
162
+ switch (step.repetitionType) {
163
+ case 'REPEATED':
164
+ if (!(step.name in record)) {
165
+ // eslint-disable max-depth
166
+ record[step.name] = [];
167
+ }
168
+ const ix = rLevels[rIndex++];
169
+ while (record[step.name].length <= ix) {
170
+ // eslint-disable max-depth
171
+ record[step.name].push({});
172
+ }
173
+ record = record[step.name][ix];
174
+ break;
175
+ default:
176
+ record[step.name] = record[step.name] || {};
177
+ record = record[step.name];
178
+ }
179
+ }
180
+ // Leaf node - Add the value
181
+ if (dLevel === field.dLevelMax) {
182
+ const value = Types.fromPrimitive(
183
+ // @ts-ignore
184
+ field.originalType || field.primitiveType, columnData.values[vIndex], field);
185
+ vIndex++;
186
+ switch (field.repetitionType) {
187
+ case 'REPEATED':
188
+ if (!(field.name in record)) {
189
+ // eslint-disable max-depth
190
+ record[field.name] = [];
191
+ }
192
+ const ix = rLevels[rIndex];
193
+ while (record[field.name].length <= ix) {
194
+ // eslint-disable max-depth
195
+ record[field.name].push(null);
196
+ }
197
+ record[field.name][ix] = value;
198
+ break;
199
+ default:
200
+ record[field.name] = value;
201
+ }
202
+ }
138
203
  }
139
- }
140
204
  }
205
+ // Columnar export
206
+ /**
207
+ * 'Materialize' a list of <value, repetition_level, definition_level>
208
+ * tuples back to nested records (objects/arrays) using the Google Dremel
209
+ * Algorithm..
210
+ *
211
+ * The rowGroup argument must point to an object with the following structure (i.e.
212
+ * the same structure that is returned by shredRecords):
213
+ *
214
+ * rowGroup = {
215
+ * columnData: [
216
+ * 'my_col': {
217
+ * dlevels: [d1, d2, .. dN],
218
+ * rlevels: [r1, r2, .. rN],
219
+ * values: [v1, v2, .. vN],
220
+ * }, ...
221
+ * ],
222
+ * rowCount: X,
223
+ * }
224
+ */
141
225
  export function materializeColumns(schema, rowGroup) {
142
- const columns = {};
143
- for (const key in rowGroup.columnData) {
144
- const columnData = rowGroup.columnData[key];
145
- if (columnData.count) {
146
- materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);
226
+ const columns = {};
227
+ for (const key in rowGroup.columnData) {
228
+ const columnData = rowGroup.columnData[key];
229
+ if (columnData.count) {
230
+ materializeColumnAsColumnarArray(schema, columnData, rowGroup.rowCount, key, columns);
231
+ }
147
232
  }
148
- }
149
- return columns;
233
+ return columns;
150
234
  }
235
+ // eslint-disable-next-line max-statements, complexity
151
236
  function materializeColumnAsColumnarArray(schema, columnData, rowCount, key, columns) {
152
- if (columnData.count <= 0) {
153
- return;
154
- }
155
- const field = schema.findField(key);
156
- const branch = schema.findFieldBranch(key);
157
- const columnName = branch[0].name;
158
- let column;
159
- const {
160
- values
161
- } = columnData;
162
- if (values.length === rowCount && branch[0].primitiveType) {
163
- column = values;
164
- }
165
- if (column) {
166
- columns[columnName] = column;
167
- return;
168
- }
169
- column = new Array(rowCount);
170
- for (let i = 0; i < rowCount; i++) {
171
- column[i] = {};
172
- }
173
- columns[columnName] = column;
174
- const rLevels = new Array(field.rLevelMax + 1).fill(0);
175
- let vIndex = 0;
176
- for (let i = 0; i < columnData.count; i++) {
177
- const dLevel = columnData.dlevels[i];
178
- const rLevel = columnData.rlevels[i];
179
- rLevels[rLevel]++;
180
- rLevels.fill(0, rLevel + 1);
181
- let rIndex = 0;
182
- let record = column[rLevels[rIndex++] - 1];
183
- for (const step of branch) {
184
- if (step === field || dLevel < step.dLevelMax) {
185
- break;
186
- }
187
- switch (step.repetitionType) {
188
- case 'REPEATED':
189
- if (!(step.name in record)) {
190
- record[step.name] = [];
191
- }
192
- const ix = rLevels[rIndex++];
193
- while (record[step.name].length <= ix) {
194
- record[step.name].push({});
195
- }
196
- record = record[step.name][ix];
197
- break;
198
- default:
199
- record[step.name] = record[step.name] || {};
200
- record = record[step.name];
201
- }
237
+ if (columnData.count <= 0) {
238
+ return;
239
+ }
240
+ const field = schema.findField(key);
241
+ const branch = schema.findFieldBranch(key);
242
+ const columnName = branch[0].name;
243
+ let column;
244
+ const { values } = columnData;
245
+ if (values.length === rowCount && branch[0].primitiveType) {
246
+ // if (branch[0].repetitionType === `REQUIRED`) {
247
+ // switch (branch[0].primitiveType) {
248
+ // case 'INT32': return values instanceof Int32Array ? values : new Int32Array(values);
249
+ // }
250
+ // }
251
+ column = values;
202
252
  }
203
- if (dLevel === field.dLevelMax) {
204
- const value = Types.fromPrimitive(field.originalType || field.primitiveType, columnData.values[vIndex], field);
205
- vIndex++;
206
- switch (field.repetitionType) {
207
- case 'REPEATED':
208
- if (!(field.name in record)) {
209
- record[field.name] = [];
210
- }
211
- const ix = rLevels[rIndex];
212
- while (record[field.name].length <= ix) {
213
- record[field.name].push(null);
214
- }
215
- record[field.name][ix] = value;
216
- break;
217
- default:
218
- record[field.name] = value;
219
- }
253
+ if (column) {
254
+ columns[columnName] = column;
255
+ return;
256
+ }
257
+ column = new Array(rowCount);
258
+ for (let i = 0; i < rowCount; i++) {
259
+ column[i] = {};
260
+ }
261
+ columns[columnName] = column;
262
+ // tslint:disable-next-line:prefer-array-literal
263
+ const rLevels = new Array(field.rLevelMax + 1).fill(0);
264
+ let vIndex = 0;
265
+ for (let i = 0; i < columnData.count; i++) {
266
+ const dLevel = columnData.dlevels[i];
267
+ const rLevel = columnData.rlevels[i];
268
+ rLevels[rLevel]++;
269
+ rLevels.fill(0, rLevel + 1);
270
+ let rIndex = 0;
271
+ let record = column[rLevels[rIndex++] - 1];
272
+ // Internal nodes - Build a nested row object
273
+ for (const step of branch) {
274
+ if (step === field || dLevel < step.dLevelMax) {
275
+ break;
276
+ }
277
+ switch (step.repetitionType) {
278
+ case 'REPEATED':
279
+ if (!(step.name in record)) {
280
+ // eslint-disable max-depth
281
+ record[step.name] = [];
282
+ }
283
+ const ix = rLevels[rIndex++];
284
+ while (record[step.name].length <= ix) {
285
+ // eslint-disable max-depth
286
+ record[step.name].push({});
287
+ }
288
+ record = record[step.name][ix];
289
+ break;
290
+ default:
291
+ record[step.name] = record[step.name] || {};
292
+ record = record[step.name];
293
+ }
294
+ }
295
+ // Leaf node - Add the value
296
+ if (dLevel === field.dLevelMax) {
297
+ const value = Types.fromPrimitive(
298
+ // @ts-ignore
299
+ field.originalType || field.primitiveType, columnData.values[vIndex], field);
300
+ vIndex++;
301
+ switch (field.repetitionType) {
302
+ case 'REPEATED':
303
+ if (!(field.name in record)) {
304
+ // eslint-disable max-depth
305
+ record[field.name] = [];
306
+ }
307
+ const ix = rLevels[rIndex];
308
+ while (record[field.name].length <= ix) {
309
+ // eslint-disable max-depth
310
+ record[field.name].push(null);
311
+ }
312
+ record[field.name][ix] = value;
313
+ break;
314
+ default:
315
+ record[field.name] = value;
316
+ }
317
+ }
220
318
  }
221
- }
222
- for (let i = 0; i < rowCount; ++i) {
223
- if (columnName in column[i]) {
224
- column[i] = column[i][columnName];
319
+ // Remove one level of nesting
320
+ for (let i = 0; i < rowCount; ++i) {
321
+ if (columnName in column[i]) {
322
+ column[i] = column[i][columnName];
323
+ }
225
324
  }
226
- }
227
325
  }
228
- //# sourceMappingURL=shred.js.map
@@ -1,4 +1,4 @@
1
- import { OriginalType, ParquetField, ParquetType, PrimitiveType } from './declare';
1
+ import { OriginalType, ParquetField, ParquetType, PrimitiveType } from "./declare.js";
2
2
  export interface ParquetTypeKit {
3
3
  primitiveType: PrimitiveType;
4
4
  originalType?: OriginalType;
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,GAAG,OAAO,CAM5F;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,OAUpF"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,qBAAkB;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,GAAG,OAAO,CAM5F;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,OAUpF"}