@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/dist.min.js +19 -19
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +49 -8
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  6. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  7. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
  8. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  10. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  11. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
  12. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  13. package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
  14. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  15. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  16. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  17. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
  18. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  19. package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +51 -27
  20. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  21. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  22. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  23. package/dist/es5/parquet-loader.js +4 -2
  24. package/dist/es5/parquet-loader.js.map +1 -1
  25. package/dist/es5/parquet-wasm-loader.js +1 -1
  26. package/dist/es5/parquet-wasm-writer.js +1 -1
  27. package/dist/es5/parquet-writer.js +1 -1
  28. package/dist/es5/parquetjs/compression.js +15 -5
  29. package/dist/es5/parquetjs/compression.js.map +1 -1
  30. package/dist/es5/parquetjs/encoder/{writer.js → parquet-encoder.js} +70 -158
  31. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  32. package/dist/es5/parquetjs/parser/parquet-reader.js +553 -222
  33. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  34. package/dist/es5/parquetjs/schema/declare.js +3 -1
  35. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  36. package/dist/es5/parquetjs/schema/shred.js +39 -33
  37. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  38. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  39. package/dist/es5/parquetjs/utils/file-utils.js +2 -3
  40. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  41. package/dist/esm/index.js +13 -3
  42. package/dist/esm/index.js.map +1 -1
  43. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  44. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  45. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
  46. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  47. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
  48. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  49. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
  50. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  51. package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
  52. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  53. package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
  54. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  55. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
  56. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  57. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +19 -0
  58. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  59. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  60. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  61. package/dist/esm/parquet-loader.js +4 -2
  62. package/dist/esm/parquet-loader.js.map +1 -1
  63. package/dist/esm/parquet-wasm-loader.js +1 -1
  64. package/dist/esm/parquet-wasm-writer.js +1 -1
  65. package/dist/esm/parquet-writer.js +1 -1
  66. package/dist/esm/parquetjs/compression.js +10 -1
  67. package/dist/esm/parquetjs/compression.js.map +1 -1
  68. package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +7 -37
  69. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  70. package/dist/esm/parquetjs/parser/parquet-reader.js +158 -72
  71. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  72. package/dist/esm/parquetjs/schema/declare.js +1 -0
  73. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  74. package/dist/esm/parquetjs/schema/shred.js +42 -34
  75. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  76. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  77. package/dist/esm/parquetjs/utils/file-utils.js +1 -1
  78. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  79. package/dist/index.d.ts +24 -4
  80. package/dist/index.d.ts.map +1 -1
  81. package/dist/index.js +26 -9
  82. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  83. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  84. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  85. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  86. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  87. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  88. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  89. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  90. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  91. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  92. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  93. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  94. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  95. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  96. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  97. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  98. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  99. package/dist/lib/geo/geoparquet-schema.js +69 -0
  100. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  101. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  102. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  103. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  104. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  105. package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
  106. package/dist/parquet-loader.d.ts +2 -0
  107. package/dist/parquet-loader.d.ts.map +1 -1
  108. package/dist/parquet-loader.js +3 -1
  109. package/dist/parquet-worker.js +20 -20
  110. package/dist/parquet-worker.js.map +3 -3
  111. package/dist/parquetjs/compression.d.ts.map +1 -1
  112. package/dist/parquetjs/compression.js +16 -5
  113. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
  114. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  115. package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
  116. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  117. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  118. package/dist/parquetjs/parser/parquet-reader.js +168 -102
  119. package/dist/parquetjs/schema/declare.d.ts +14 -7
  120. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  121. package/dist/parquetjs/schema/declare.js +2 -0
  122. package/dist/parquetjs/schema/shred.d.ts +115 -0
  123. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  124. package/dist/parquetjs/schema/shred.js +161 -43
  125. package/dist/parquetjs/schema/types.d.ts +2 -2
  126. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  127. package/dist/parquetjs/utils/file-utils.d.ts +3 -4
  128. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  129. package/dist/parquetjs/utils/file-utils.js +2 -5
  130. package/package.json +7 -5
  131. package/src/index.ts +24 -4
  132. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  133. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  134. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  135. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  136. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  137. package/src/lib/geo/geoparquet-schema.ts +69 -0
  138. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  139. package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
  140. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  141. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  142. package/src/parquet-loader.ts +5 -1
  143. package/src/parquetjs/compression.ts +14 -1
  144. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
  145. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  146. package/src/parquetjs/schema/declare.ts +17 -9
  147. package/src/parquetjs/schema/shred.ts +157 -28
  148. package/src/parquetjs/schema/types.ts +21 -27
  149. package/src/parquetjs/utils/file-utils.ts +3 -4
  150. package/dist/es5/lib/convert-schema.js.map +0 -1
  151. package/dist/es5/lib/parse-parquet.js.map +0 -1
  152. package/dist/es5/lib/read-array-buffer.js +0 -43
  153. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  154. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  155. package/dist/es5/parquetjs/file.js +0 -94
  156. package/dist/es5/parquetjs/file.js.map +0 -1
  157. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
  158. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  159. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
  160. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  161. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
  162. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  163. package/dist/esm/lib/convert-schema.js.map +0 -1
  164. package/dist/esm/lib/parse-parquet.js +0 -25
  165. package/dist/esm/lib/parse-parquet.js.map +0 -1
  166. package/dist/esm/lib/read-array-buffer.js +0 -10
  167. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  168. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  169. package/dist/esm/parquetjs/file.js +0 -81
  170. package/dist/esm/parquetjs/file.js.map +0 -1
  171. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
  172. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  173. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
  174. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  175. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
  176. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  177. package/dist/lib/convert-schema.d.ts +0 -8
  178. package/dist/lib/convert-schema.d.ts.map +0 -1
  179. package/dist/lib/parse-parquet.d.ts +0 -4
  180. package/dist/lib/parse-parquet.d.ts.map +0 -1
  181. package/dist/lib/parse-parquet.js +0 -28
  182. package/dist/lib/read-array-buffer.d.ts +0 -19
  183. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  184. package/dist/lib/read-array-buffer.js +0 -29
  185. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  186. package/dist/parquetjs/file.d.ts +0 -10
  187. package/dist/parquetjs/file.d.ts.map +0 -1
  188. package/dist/parquetjs/file.js +0 -99
  189. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  190. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  191. package/dist/parquetjs/parser/parquet-cursor.js +0 -74
  192. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  193. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  194. package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
  195. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  196. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  197. package/dist/parquetjs/utils/buffer-utils.js +0 -22
  198. package/src/lib/parse-parquet.ts +0 -27
  199. package/src/lib/read-array-buffer.ts +0 -31
  200. package/src/parquetjs/file.ts +0 -90
  201. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  202. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  203. package/src/parquetjs/utils/buffer-utils.ts +0 -18
@@ -155,75 +155,193 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
155
155
  */
156
156
  function materializeRecords(schema, buffer) {
157
157
  const records = [];
158
- for (let i = 0; i < buffer.rowCount; i++)
158
+ for (let i = 0; i < buffer.rowCount; i++) {
159
159
  records.push({});
160
+ }
160
161
  for (const key in buffer.columnData) {
161
- materializeColumn(schema, buffer, key, records);
162
+ const columnData = buffer.columnData[key];
163
+ if (columnData.count) {
164
+ materializeColumn(schema, columnData, key, records);
165
+ }
162
166
  }
163
167
  return records;
164
168
  }
165
169
  exports.materializeRecords = materializeRecords;
166
170
  // eslint-disable-next-line max-statements, complexity
167
- function materializeColumn(schema, buffer, key, records) {
168
- const data = buffer.columnData[key];
169
- if (!data.count)
170
- return;
171
+ function materializeColumn(schema, columnData, key, records) {
171
172
  const field = schema.findField(key);
172
173
  const branch = schema.findFieldBranch(key);
173
174
  // tslint:disable-next-line:prefer-array-literal
174
175
  const rLevels = new Array(field.rLevelMax + 1).fill(0);
175
176
  let vIndex = 0;
176
- for (let i = 0; i < data.count; i++) {
177
- const dLevel = data.dlevels[i];
178
- const rLevel = data.rlevels[i];
177
+ for (let i = 0; i < columnData.count; i++) {
178
+ const dLevel = columnData.dlevels[i];
179
+ const rLevel = columnData.rlevels[i];
179
180
  rLevels[rLevel]++;
180
181
  rLevels.fill(0, rLevel + 1);
181
182
  let rIndex = 0;
182
183
  let record = records[rLevels[rIndex++] - 1];
183
- // Internal nodes
184
+ // Internal nodes - Build a nested row object
184
185
  for (const step of branch) {
185
- if (step === field)
186
+ if (step === field || dLevel < step.dLevelMax) {
186
187
  break;
187
- if (dLevel < step.dLevelMax)
188
- break;
189
- if (step.repetitionType === 'REPEATED') {
190
- if (!(step.name in record)) {
191
- // eslint-disable max-depth
192
- record[step.name] = [];
193
- }
194
- const ix = rLevels[rIndex++];
195
- while (record[step.name].length <= ix) {
196
- // eslint-disable max-depth
197
- record[step.name].push({});
198
- }
199
- record = record[step.name][ix];
200
188
  }
201
- else {
202
- record[step.name] = record[step.name] || {};
203
- record = record[step.name];
189
+ switch (step.repetitionType) {
190
+ case 'REPEATED':
191
+ if (!(step.name in record)) {
192
+ // eslint-disable max-depth
193
+ record[step.name] = [];
194
+ }
195
+ const ix = rLevels[rIndex++];
196
+ while (record[step.name].length <= ix) {
197
+ // eslint-disable max-depth
198
+ record[step.name].push({});
199
+ }
200
+ record = record[step.name][ix];
201
+ break;
202
+ default:
203
+ record[step.name] = record[step.name] || {};
204
+ record = record[step.name];
204
205
  }
205
206
  }
206
- // Leaf node
207
+ // Leaf node - Add the value
207
208
  if (dLevel === field.dLevelMax) {
208
209
  const value = Types.fromPrimitive(
209
210
  // @ts-ignore
210
- field.originalType || field.primitiveType, data.values[vIndex], field);
211
+ field.originalType || field.primitiveType, columnData.values[vIndex], field);
211
212
  vIndex++;
212
- if (field.repetitionType === 'REPEATED') {
213
- if (!(field.name in record)) {
214
- // eslint-disable max-depth
215
- record[field.name] = [];
216
- }
217
- const ix = rLevels[rIndex];
218
- while (record[field.name].length <= ix) {
219
- // eslint-disable max-depth
220
- record[field.name].push(null);
221
- }
222
- record[field.name][ix] = value;
223
- }
224
- else {
225
- record[field.name] = value;
213
+ switch (field.repetitionType) {
214
+ case 'REPEATED':
215
+ if (!(field.name in record)) {
216
+ // eslint-disable max-depth
217
+ record[field.name] = [];
218
+ }
219
+ const ix = rLevels[rIndex];
220
+ while (record[field.name].length <= ix) {
221
+ // eslint-disable max-depth
222
+ record[field.name].push(null);
223
+ }
224
+ record[field.name][ix] = value;
225
+ break;
226
+ default:
227
+ record[field.name] = value;
226
228
  }
227
229
  }
228
230
  }
229
231
  }
232
+ // Columnar export
233
+ /**
234
+ * 'Materialize' a list of <value, repetition_level, definition_level>
235
+ * tuples back to nested records (objects/arrays) using the Google Dremel
236
+ * Algorithm..
237
+ *
238
+ * The buffer argument must point to an object with the following structure (i.e.
239
+ * the same structure that is returned by shredRecords):
240
+ *
241
+ * buffer = {
242
+ * columnData: [
243
+ * 'my_col': {
244
+ * dlevels: [d1, d2, .. dN],
245
+ * rlevels: [r1, r2, .. rN],
246
+ * values: [v1, v2, .. vN],
247
+ * }, ...
248
+ * ],
249
+ * rowCount: X,
250
+ * }
251
+ *
252
+ export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
253
+ const columns: ParquetRecord = {};
254
+ for (const key in buffer.columnData) {
255
+ const columnData = buffer.columnData[key];
256
+ if (columnData.count) {
257
+ extractColumn(schema, columnData, key, columns);
258
+ }
259
+ }
260
+ return columns;
261
+ }
262
+
263
+ // eslint-disable-next-line max-statements, complexity
264
+ function extractColumn(
265
+ schema: ParquetSchema,
266
+ columnData: ParquetData,
267
+ key: string,
268
+ columns: Record<string, unknown>
269
+ ) {
270
+ if (columnData.count <= 0) {
271
+ return;
272
+ }
273
+
274
+ const record = columns;
275
+
276
+ const field = schema.findField(key);
277
+ const branch = schema.findFieldBranch(key);
278
+
279
+ // tslint:disable-next-line:prefer-array-literal
280
+ const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
281
+ let vIndex = 0;
282
+
283
+ let i = 0;
284
+ const dLevel = columnData.dlevels[i];
285
+ const rLevel = columnData.rlevels[i];
286
+ rLevels[rLevel]++;
287
+ rLevels.fill(0, rLevel + 1);
288
+
289
+ let rIndex = 0;
290
+ let record = records[rLevels[rIndex++] - 1];
291
+
292
+ // Internal nodes
293
+ for (const step of branch) {
294
+ if (step === field || dLevel < step.dLevelMax) {
295
+ break;
296
+ }
297
+
298
+ switch (step.repetitionType) {
299
+ case 'REPEATED':
300
+ if (!(step.name in record)) {
301
+ // eslint-disable max-depth
302
+ record[step.name] = [];
303
+ }
304
+ const ix = rLevels[rIndex++];
305
+ while (record[step.name].length <= ix) {
306
+ // eslint-disable max-depth
307
+ record[step.name].push({});
308
+ }
309
+ record = record[step.name][ix];
310
+ break;
311
+
312
+ default:
313
+ record[step.name] = record[step.name] || {};
314
+ record = record[step.name];
315
+ }
316
+ }
317
+
318
+ // Leaf node
319
+ if (dLevel === field.dLevelMax) {
320
+ const value = Types.fromPrimitive(
321
+ // @ts-ignore
322
+ field.originalType || field.primitiveType,
323
+ columnData.values[vIndex],
324
+ field
325
+ );
326
+ vIndex++;
327
+
328
+ switch (field.repetitionType) {
329
+ case 'REPEATED':
330
+ if (!(field.name in record)) {
331
+ // eslint-disable max-depth
332
+ record[field.name] = [];
333
+ }
334
+ const ix = rLevels[rIndex];
335
+ while (record[field.name].length <= ix) {
336
+ // eslint-disable max-depth
337
+ record[field.name].push(null);
338
+ }
339
+ record[field.name][ix] = value;
340
+ break;
341
+
342
+ default:
343
+ record[field.name] = value;
344
+ }
345
+ }
346
+ }
347
+ */
@@ -11,10 +11,10 @@ export declare const PARQUET_LOGICAL_TYPES: Record<ParquetType, ParquetTypeKit>;
11
11
  * Convert a value from it's native representation to the internal/underlying
12
12
  * primitive type
13
13
  */
14
- export declare function toPrimitive(type: ParquetType, value: any, field?: ParquetField): any;
14
+ export declare function toPrimitive(type: ParquetType, value: unknown, field?: ParquetField): unknown;
15
15
  /**
16
16
  * Convert a value from it's internal/underlying primitive representation to
17
17
  * the native representation
18
18
  */
19
- export declare function fromPrimitive(type: ParquetType, value: any, field?: ParquetField): any;
19
+ export declare function fromPrimitive(type: ParquetType, value: unknown, field?: ParquetField): any;
20
20
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,YAAY,OAM9E;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,YAAY,OAUhF"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,GAAG,OAAO,CAM5F;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,OAUpF"}
@@ -1,8 +1,7 @@
1
1
  /// <reference types="node" />
2
2
  /// <reference types="node" />
3
3
  /// <reference types="node" />
4
- import fs from 'fs';
5
- import { Writable } from 'stream';
4
+ import { fs, stream } from '@loaders.gl/loader-utils';
6
5
  export declare function load(name: string): any;
7
6
  export interface WriteStreamOptions {
8
7
  flags?: string;
@@ -12,7 +11,7 @@ export interface WriteStreamOptions {
12
11
  autoClose?: boolean;
13
12
  start?: number;
14
13
  }
15
- export declare function oswrite(os: Writable, buf: Buffer): Promise<void>;
16
- export declare function osclose(os: Writable): Promise<void>;
14
+ export declare function oswrite(os: stream.Writable, buf: Buffer): Promise<void>;
15
+ export declare function osclose(os: stream.Writable): Promise<void>;
17
16
  export declare function osopen(path: string, opts?: WriteStreamOptions): Promise<fs.WriteStream>;
18
17
  //# sourceMappingURL=file-utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/file-utils.ts"],"names":[],"mappings":";;;AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAC,QAAQ,EAAC,MAAM,QAAQ,CAAC;AAEhC,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAEtC;AACD,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUhE;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAUnD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,CAAC,CAMvF"}
1
+ {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/file-utils.ts"],"names":[],"mappings":";;;AACA,OAAO,EAAC,EAAE,EAAE,MAAM,EAAC,MAAM,0BAA0B,CAAC;AAEpD,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAEtC;AACD,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUvE;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAU1D;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,CAAC,CAMvF"}
@@ -1,11 +1,8 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.osopen = exports.osclose = exports.oswrite = exports.load = void 0;
7
4
  // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
8
- const fs_1 = __importDefault(require("fs"));
5
+ const loader_utils_1 = require("@loaders.gl/loader-utils");
9
6
  function load(name) {
10
7
  return (module || global).require(name);
11
8
  }
@@ -38,7 +35,7 @@ function osclose(os) {
38
35
  exports.osclose = osclose;
39
36
  function osopen(path, opts) {
40
37
  return new Promise((resolve, reject) => {
41
- const outputStream = fs_1.default.createWriteStream(path, opts);
38
+ const outputStream = loader_utils_1.fs.createWriteStream(path, opts);
42
39
  outputStream.once('open', (fd) => resolve(outputStream));
43
40
  outputStream.once('error', (err) => reject(err));
44
41
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "3.4.0-alpha.1",
3
+ "version": "3.4.0-alpha.2",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -37,12 +37,14 @@
37
37
  "net": false,
38
38
  "tls": false,
39
39
  "lzo": false,
40
+ "stream": false,
41
+ "fs": false,
40
42
  "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
41
43
  },
42
44
  "dependencies": {
43
- "@loaders.gl/compression": "3.4.0-alpha.1",
44
- "@loaders.gl/loader-utils": "3.4.0-alpha.1",
45
- "@loaders.gl/schema": "3.4.0-alpha.1",
45
+ "@loaders.gl/compression": "3.4.0-alpha.2",
46
+ "@loaders.gl/loader-utils": "3.4.0-alpha.2",
47
+ "@loaders.gl/schema": "3.4.0-alpha.2",
46
48
  "async-mutex": "^0.2.2",
47
49
  "brotli": "^1.3.2",
48
50
  "bson": "^1.0.4",
@@ -68,5 +70,5 @@
68
70
  "@types/varint": "^5.0.0",
69
71
  "apache-arrow": "^4.0.0"
70
72
  },
71
- "gitHead": "4085b0323050e4361614471319a1fb4729547bbf"
73
+ "gitHead": "f1c00c124d8d0c41a138ff40afb0d1a00711bf2e"
72
74
  }
package/src/index.ts CHANGED
@@ -4,7 +4,11 @@ import type {LoaderWithParser} from '@loaders.gl/loader-utils';
4
4
 
5
5
  import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';
6
6
  import {ParquetLoader as ParquetWorkerLoader} from './parquet-loader';
7
- import {parseParquet, parseParquetFileInBatches} from './lib/parse-parquet';
7
+ import {parseParquet, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-rows';
8
+ import {
9
+ parseParquetInColumns,
10
+ parseParquetFileInColumnarBatches
11
+ } from './lib/parsers/parse-parquet-to-columns';
8
12
  import {parseParquet as parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
9
13
 
10
14
  export {ParquetWorkerLoader, ParquetWasmWorkerLoader};
@@ -16,6 +20,13 @@ export const ParquetLoader = {
16
20
  parseFileInBatches: parseParquetFileInBatches
17
21
  };
18
22
 
23
+ /** ParquetJS table loader */
24
+ export const ParquetColumnarLoader = {
25
+ ...ParquetWorkerLoader,
26
+ parse: parseParquetInColumns,
27
+ parseFileInBatches: parseParquetFileInColumnarBatches
28
+ };
29
+
19
30
  export const ParquetWasmLoader = {
20
31
  ...ParquetWasmWorkerLoader,
21
32
  parse: parseParquetWasm
@@ -32,9 +43,18 @@ export {preloadCompressions} from './parquetjs/compression';
32
43
 
33
44
  export {ParquetSchema} from './parquetjs/schema/schema';
34
45
  export {ParquetReader} from './parquetjs/parser/parquet-reader';
35
- export {ParquetEnvelopeReader} from './parquetjs/parser/parquet-envelope-reader';
36
- // export {ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer} from './parquetjs/encoder/writer';
37
- export {convertParquetToArrowSchema} from './lib/convert-schema';
46
+ export {ParquetEncoder} from './parquetjs/encoder/parquet-encoder';
47
+
48
+ export {
49
+ convertSchemaFromParquet,
50
+ convertSchemaFromParquet as convertParquetToArrowSchema
51
+ } from './lib/arrow/convert-schema-from-parquet';
38
52
 
39
53
  // TESTS
40
54
  export const _typecheckParquetLoader: LoaderWithParser = ParquetLoader;
55
+
56
+ // Geo Metadata
57
+ export {default as geoJSONSchema} from './lib/geo/geoparquet-schema';
58
+
59
+ export type {GeoMetadata} from './lib/geo/decode-geo-metadata';
60
+ export {getGeoMetadata, setGeoMetadata, unpackGeoMetadata} from './lib/geo/decode-geo-metadata';
File without changes
@@ -0,0 +1,15 @@
1
+ // loaders.gl, MIT license
2
+
3
+ import {Schema} from '@loaders.gl/schema';
4
+ import {ParquetBuffer} from '@loaders.gl/parquet/parquetjs/schema/declare';
5
+
6
+ export function convertParquetRowGroupToColumns(
7
+ schema: Schema,
8
+ rowGroup: ParquetBuffer
9
+ ): Record<string, any[]> {
10
+ const columns: Record<string, any[]> = {};
11
+ for (const [columnName, data] of Object.entries(rowGroup.columnData)) {
12
+ columns[columnName] = columns[columnName] || data.values;
13
+ }
14
+ return columns;
15
+ }
@@ -1,5 +1,8 @@
1
- import type {ParquetSchema} from '../parquetjs/schema/schema';
2
- import type {FieldDefinition, ParquetField, ParquetType} from '../parquetjs/schema/declare';
1
+ // loaders.gl, MIT license
2
+
3
+ import type {ParquetSchema} from '../../parquetjs/schema/schema';
4
+ import type {FieldDefinition, ParquetField, ParquetType} from '../../parquetjs/schema/declare';
5
+ import {FileMetaData} from '@loaders.gl/parquet/parquetjs/parquet-thrift';
3
6
 
4
7
  import {
5
8
  Schema,
@@ -45,7 +48,7 @@ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
45
48
  INT_64: Int64,
46
49
  JSON: Binary,
47
50
  BSON: Binary,
48
- // TODO check interal type
51
+ // TODO check interval type
49
52
  INTERVAL: Binary,
50
53
  DECIMAL_INT32: Float32,
51
54
  DECIMAL_INT64: Float64,
@@ -53,24 +56,13 @@ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
53
56
  DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
54
57
  };
55
58
 
56
- export function convertParquetToArrowSchema(parquetSchema: ParquetSchema): Schema {
59
+ export function convertSchemaFromParquet(
60
+ parquetSchema: ParquetSchema,
61
+ parquetMetadata?: FileMetaData
62
+ ): Schema {
57
63
  const fields = getFields(parquetSchema.schema);
58
-
59
- // TODO add metadata if needed.
60
- return new Schema(fields);
61
- }
62
-
63
- function getFieldMetadata(field: ParquetField): Map<string, string> {
64
- const metadata = new Map();
65
-
66
- for (const key in field) {
67
- if (key !== 'name') {
68
- const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
69
- metadata.set(key, value);
70
- }
71
- }
72
-
73
- return metadata;
64
+ const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
65
+ return new Schema(fields, metadata);
74
66
  }
75
67
 
76
68
  function getFields(schema: FieldDefinition): Field[] {
@@ -80,8 +72,8 @@ function getFields(schema: FieldDefinition): Field[] {
80
72
  const field = schema[name];
81
73
 
82
74
  if (field.fields) {
83
- const childField = getFields(field.fields);
84
- const nestedField = new Field(name, new Struct(childField), field.optional);
75
+ const childFields = getFields(field.fields);
76
+ const nestedField = new Field(name, new Struct(childFields), field.optional);
85
77
  fields.push(nestedField);
86
78
  } else {
87
79
  const FieldType = PARQUET_TYPE_MAPPING[field.type];
@@ -93,3 +85,30 @@ function getFields(schema: FieldDefinition): Field[] {
93
85
 
94
86
  return fields;
95
87
  }
88
+
89
+ function getFieldMetadata(field: ParquetField): Map<string, string> {
90
+ const metadata = new Map();
91
+
92
+ for (const key in field) {
93
+ if (key !== 'name') {
94
+ let value = field[key] || '';
95
+ value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
96
+ metadata.set(key, value);
97
+ }
98
+ }
99
+
100
+ return metadata;
101
+ }
102
+
103
+ function getSchemaMetadata(parquetMetadata: FileMetaData): Map<string, string> {
104
+ const metadata = new Map();
105
+
106
+ const keyValueList = parquetMetadata.key_value_metadata || [];
107
+ for (const {key, value} of keyValueList) {
108
+ if (typeof value === 'string') {
109
+ metadata.set(key, value);
110
+ }
111
+ }
112
+
113
+ return metadata;
114
+ }
@@ -0,0 +1,102 @@
1
+ // loaders.gl, MIT license
2
+
3
+ // import type {ParquetSchema} from '../../parquetjs/schema/schema';
4
+ import type {
5
+ // FieldDefinition, ParquetField,
6
+ ParquetType
7
+ } from '../../parquetjs/schema/declare';
8
+
9
+ import {
10
+ Schema,
11
+ // Struct,
12
+ // Field,
13
+ DataType,
14
+ Bool,
15
+ Float64,
16
+ Int32,
17
+ Float32,
18
+ Binary,
19
+ Utf8,
20
+ Int64,
21
+ Uint16,
22
+ Uint32,
23
+ Uint64,
24
+ Int8,
25
+ Int16
26
+ } from '@loaders.gl/schema';
27
+
28
+ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
29
+ BOOLEAN: Bool,
30
+ INT32: Int32,
31
+ INT64: Float64,
32
+ INT96: Float64,
33
+ FLOAT: Float32,
34
+ DOUBLE: Float64,
35
+ BYTE_ARRAY: Binary,
36
+ FIXED_LEN_BYTE_ARRAY: Binary,
37
+ UTF8: Utf8,
38
+ DATE: Int32,
39
+ TIME_MILLIS: Int64,
40
+ TIME_MICROS: Int64,
41
+ TIMESTAMP_MILLIS: Int64,
42
+ TIMESTAMP_MICROS: Int64,
43
+ UINT_8: Int32,
44
+ UINT_16: Uint16,
45
+ UINT_32: Uint32,
46
+ UINT_64: Uint64,
47
+ INT_8: Int8,
48
+ INT_16: Int16,
49
+ INT_32: Int32,
50
+ INT_64: Int64,
51
+ JSON: Binary,
52
+ BSON: Binary,
53
+ // TODO check interval type
54
+ INTERVAL: Binary,
55
+ DECIMAL_INT32: Float32,
56
+ DECIMAL_INT64: Float64,
57
+ DECIMAL_BYTE_ARRAY: Float64,
58
+ DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
59
+ };
60
+
61
+ export function convertToParquetSchema(schema: Schema): Schema {
62
+ const fields = []; // getFields(schema.fields);
63
+
64
+ // TODO add metadata if needed.
65
+ return new Schema(fields);
66
+ }
67
+
68
+ // function getFields(schema: Field[]): Definition[] {
69
+ // const fields: Field[] = [];
70
+
71
+ // for (const name in schema) {
72
+ // const field = schema[name];
73
+
74
+ // // @ts-ignore
75
+ // const children = field.children as DataType[];
76
+ // if (children) {
77
+ // const childField = getFields(field.fields);
78
+ // const nestedField = new Field(name, new Struct(childField), field.optional);
79
+ // fields.push(nestedField);
80
+ // } else {
81
+ // const FieldType = PARQUET_TYPE_MAPPING[field.type];
82
+ // const metadata = getFieldMetadata(field);
83
+ // const arrowField = new Field(name, new FieldType(), field.optional, metadata);
84
+ // fields.push(arrowField);
85
+ // }
86
+ // }
87
+
88
+ // return fields;
89
+ // }
90
+
91
+ // function getFieldMetadata(field: ParquetField): Map<string, string> {
92
+ // const metadata = new Map();
93
+
94
+ // for (const key in field) {
95
+ // if (key !== 'name') {
96
+ // const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
97
+ // metadata.set(key, value);
98
+ // }
99
+ // }
100
+
101
+ // return metadata;
102
+ // }