@loaders.gl/parquet 3.3.0 → 3.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/dist.min.js +26 -17
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +3 -3
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/parse-parquet.js +25 -49
  6. package/dist/es5/lib/parse-parquet.js.map +1 -1
  7. package/dist/es5/parquet-loader.js +2 -3
  8. package/dist/es5/parquet-loader.js.map +1 -1
  9. package/dist/es5/parquet-wasm-loader.js +1 -1
  10. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  11. package/dist/es5/parquet-wasm-writer.js +1 -1
  12. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  13. package/dist/es5/parquet-writer.js +1 -1
  14. package/dist/es5/parquet-writer.js.map +1 -1
  15. package/dist/es5/parquetjs/compression.js +5 -15
  16. package/dist/es5/parquetjs/compression.js.map +1 -1
  17. package/dist/es5/parquetjs/encoder/{parquet-encoder.js → writer.js} +158 -70
  18. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  19. package/dist/es5/parquetjs/file.js +94 -0
  20. package/dist/es5/parquetjs/file.js.map +1 -0
  21. package/dist/es5/parquetjs/parser/parquet-cursor.js +183 -0
  22. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  23. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +327 -0
  24. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  25. package/dist/es5/parquetjs/parser/parquet-reader.js +222 -553
  26. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  27. package/dist/es5/parquetjs/schema/declare.js +1 -3
  28. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  29. package/dist/es5/parquetjs/schema/shred.js +33 -39
  30. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  31. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  32. package/dist/es5/parquetjs/utils/buffer-utils.js +19 -0
  33. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  34. package/dist/es5/parquetjs/utils/file-utils.js +3 -2
  35. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  36. package/dist/esm/index.js +1 -1
  37. package/dist/esm/index.js.map +1 -1
  38. package/dist/esm/lib/parse-parquet.js +12 -6
  39. package/dist/esm/lib/parse-parquet.js.map +1 -1
  40. package/dist/esm/parquet-loader.js +2 -3
  41. package/dist/esm/parquet-loader.js.map +1 -1
  42. package/dist/esm/parquet-wasm-loader.js +1 -1
  43. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  44. package/dist/esm/parquet-wasm-writer.js +1 -1
  45. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  46. package/dist/esm/parquet-writer.js +1 -1
  47. package/dist/esm/parquet-writer.js.map +1 -1
  48. package/dist/esm/parquetjs/compression.js +1 -10
  49. package/dist/esm/parquetjs/compression.js.map +1 -1
  50. package/dist/esm/parquetjs/encoder/{parquet-encoder.js → writer.js} +37 -7
  51. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  52. package/dist/esm/parquetjs/file.js +81 -0
  53. package/dist/esm/parquetjs/file.js.map +1 -0
  54. package/dist/esm/parquetjs/parser/parquet-cursor.js +78 -0
  55. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  56. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +129 -0
  57. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  58. package/dist/esm/parquetjs/parser/parquet-reader.js +72 -158
  59. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  60. package/dist/esm/parquetjs/schema/declare.js +0 -1
  61. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  62. package/dist/esm/parquetjs/schema/shred.js +34 -42
  63. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  64. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  65. package/dist/esm/parquetjs/utils/buffer-utils.js +13 -0
  66. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  67. package/dist/esm/parquetjs/utils/file-utils.js +1 -1
  68. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  69. package/dist/index.d.ts +1 -1
  70. package/dist/index.d.ts.map +1 -1
  71. package/dist/index.js +4 -3
  72. package/dist/lib/parse-parquet.d.ts +2 -2
  73. package/dist/lib/parse-parquet.d.ts.map +1 -1
  74. package/dist/lib/parse-parquet.js +12 -24
  75. package/dist/parquet-loader.d.ts +0 -1
  76. package/dist/parquet-loader.d.ts.map +1 -1
  77. package/dist/parquet-loader.js +1 -2
  78. package/dist/parquet-worker.js +24 -15
  79. package/dist/parquet-worker.js.map +3 -3
  80. package/dist/parquetjs/compression.d.ts.map +1 -1
  81. package/dist/parquetjs/compression.js +5 -16
  82. package/dist/parquetjs/encoder/{parquet-encoder.d.ts → writer.d.ts} +19 -10
  83. package/dist/parquetjs/encoder/writer.d.ts.map +1 -0
  84. package/dist/parquetjs/encoder/{parquet-encoder.js → writer.js} +37 -39
  85. package/dist/parquetjs/file.d.ts +10 -0
  86. package/dist/parquetjs/file.d.ts.map +1 -0
  87. package/dist/parquetjs/file.js +99 -0
  88. package/dist/parquetjs/parser/parquet-cursor.d.ts +36 -0
  89. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +1 -0
  90. package/dist/parquetjs/parser/parquet-cursor.js +74 -0
  91. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +40 -0
  92. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +1 -0
  93. package/dist/parquetjs/parser/parquet-envelope-reader.js +136 -0
  94. package/dist/parquetjs/parser/parquet-reader.d.ts +57 -47
  95. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  96. package/dist/parquetjs/parser/parquet-reader.js +102 -168
  97. package/dist/parquetjs/schema/declare.d.ts +7 -14
  98. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  99. package/dist/parquetjs/schema/declare.js +0 -2
  100. package/dist/parquetjs/schema/shred.d.ts +0 -115
  101. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  102. package/dist/parquetjs/schema/shred.js +43 -161
  103. package/dist/parquetjs/schema/types.d.ts +2 -2
  104. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  105. package/dist/parquetjs/utils/buffer-utils.d.ts +10 -0
  106. package/dist/parquetjs/utils/buffer-utils.d.ts.map +1 -0
  107. package/dist/parquetjs/utils/buffer-utils.js +22 -0
  108. package/dist/parquetjs/utils/file-utils.d.ts +4 -3
  109. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  110. package/dist/parquetjs/utils/file-utils.js +5 -2
  111. package/package.json +5 -7
  112. package/src/index.ts +2 -2
  113. package/src/lib/parse-parquet.ts +12 -25
  114. package/src/parquet-loader.ts +1 -3
  115. package/src/parquetjs/compression.ts +1 -14
  116. package/src/parquetjs/encoder/{parquet-encoder.ts → writer.ts} +28 -22
  117. package/src/parquetjs/file.ts +90 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +199 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +122 -239
  121. package/src/parquetjs/schema/declare.ts +9 -17
  122. package/src/parquetjs/schema/shred.ts +28 -157
  123. package/src/parquetjs/schema/types.ts +27 -21
  124. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  125. package/src/parquetjs/utils/file-utils.ts +4 -3
  126. package/dist/es5/lib/convert-schema-deep.ts.disabled +0 -910
  127. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +0 -1
  128. package/dist/esm/lib/convert-schema-deep.ts.disabled +0 -910
  129. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +0 -1
  130. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +0 -1
  131. package/src/lib/convert-schema-deep.ts.disabled +0 -910
@@ -45,119 +45,4 @@ export declare function shredRecord(schema: ParquetSchema, record: any, buffer:
45
45
  * }
46
46
  */
47
47
  export declare function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[];
48
- /**
49
- * 'Materialize' a list of <value, repetition_level, definition_level>
50
- * tuples back to nested records (objects/arrays) using the Google Dremel
51
- * Algorithm..
52
- *
53
- * The buffer argument must point to an object with the following structure (i.e.
54
- * the same structure that is returned by shredRecords):
55
- *
56
- * buffer = {
57
- * columnData: [
58
- * 'my_col': {
59
- * dlevels: [d1, d2, .. dN],
60
- * rlevels: [r1, r2, .. rN],
61
- * values: [v1, v2, .. vN],
62
- * }, ...
63
- * ],
64
- * rowCount: X,
65
- * }
66
- *
67
- export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
68
- const columns: ParquetRecord = {};
69
- for (const key in buffer.columnData) {
70
- const columnData = buffer.columnData[key];
71
- if (columnData.count) {
72
- extractColumn(schema, columnData, key, columns);
73
- }
74
- }
75
- return columns;
76
- }
77
-
78
- // eslint-disable-next-line max-statements, complexity
79
- function extractColumn(
80
- schema: ParquetSchema,
81
- columnData: ParquetData,
82
- key: string,
83
- columns: Record<string, unknown>
84
- ) {
85
- if (columnData.count <= 0) {
86
- return;
87
- }
88
-
89
- const record = columns;
90
-
91
- const field = schema.findField(key);
92
- const branch = schema.findFieldBranch(key);
93
-
94
- // tslint:disable-next-line:prefer-array-literal
95
- const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
96
- let vIndex = 0;
97
-
98
- let i = 0;
99
- const dLevel = columnData.dlevels[i];
100
- const rLevel = columnData.rlevels[i];
101
- rLevels[rLevel]++;
102
- rLevels.fill(0, rLevel + 1);
103
-
104
- let rIndex = 0;
105
- let record = records[rLevels[rIndex++] - 1];
106
-
107
- // Internal nodes
108
- for (const step of branch) {
109
- if (step === field || dLevel < step.dLevelMax) {
110
- break;
111
- }
112
-
113
- switch (step.repetitionType) {
114
- case 'REPEATED':
115
- if (!(step.name in record)) {
116
- // eslint-disable max-depth
117
- record[step.name] = [];
118
- }
119
- const ix = rLevels[rIndex++];
120
- while (record[step.name].length <= ix) {
121
- // eslint-disable max-depth
122
- record[step.name].push({});
123
- }
124
- record = record[step.name][ix];
125
- break;
126
-
127
- default:
128
- record[step.name] = record[step.name] || {};
129
- record = record[step.name];
130
- }
131
- }
132
-
133
- // Leaf node
134
- if (dLevel === field.dLevelMax) {
135
- const value = Types.fromPrimitive(
136
- // @ts-ignore
137
- field.originalType || field.primitiveType,
138
- columnData.values[vIndex],
139
- field
140
- );
141
- vIndex++;
142
-
143
- switch (field.repetitionType) {
144
- case 'REPEATED':
145
- if (!(field.name in record)) {
146
- // eslint-disable max-depth
147
- record[field.name] = [];
148
- }
149
- const ix = rLevels[rIndex];
150
- while (record[field.name].length <= ix) {
151
- // eslint-disable max-depth
152
- record[field.name].push(null);
153
- }
154
- record[field.name][ix] = value;
155
- break;
156
-
157
- default:
158
- record[field.name] = value;
159
- }
160
- }
161
- }
162
- */
163
48
  //# sourceMappingURL=shred.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"shred.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/shred.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,aAAa,EAA6B,aAAa,EAAC,MAAM,WAAW,CAAC;AAClF,OAAO,EAAC,aAAa,EAAC,MAAM,UAAU,CAAC;AAGvC,OAAO,EAAC,aAAa,EAAC,CAAC;AAEvB,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,GAAG,aAAa,CAYhE;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,aAAa,GAAG,IAAI,CAmB3F;AAgED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,aAAa,GAAG,aAAa,EAAE,CAYhG;AAmFD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkHE"}
1
+ {"version":3,"file":"shred.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/shred.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,aAAa,EAA6B,aAAa,EAAC,MAAM,WAAW,CAAC;AAClF,OAAO,EAAC,aAAa,EAAC,MAAM,UAAU,CAAC;AAGvC,OAAO,EAAC,aAAa,EAAC,CAAC;AAEvB,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,GAAG,aAAa,CAYhE;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,aAAa,GAAG,IAAI,CAmB3F;AAgED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,aAAa,GAAG,aAAa,EAAE,CAOhG"}
@@ -155,193 +155,75 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
155
155
  */
156
156
  function materializeRecords(schema, buffer) {
157
157
  const records = [];
158
- for (let i = 0; i < buffer.rowCount; i++) {
158
+ for (let i = 0; i < buffer.rowCount; i++)
159
159
  records.push({});
160
- }
161
160
  for (const key in buffer.columnData) {
162
- const columnData = buffer.columnData[key];
163
- if (columnData.count) {
164
- materializeColumn(schema, columnData, key, records);
165
- }
161
+ materializeColumn(schema, buffer, key, records);
166
162
  }
167
163
  return records;
168
164
  }
169
165
  exports.materializeRecords = materializeRecords;
170
166
  // eslint-disable-next-line max-statements, complexity
171
- function materializeColumn(schema, columnData, key, records) {
167
+ function materializeColumn(schema, buffer, key, records) {
168
+ const data = buffer.columnData[key];
169
+ if (!data.count)
170
+ return;
172
171
  const field = schema.findField(key);
173
172
  const branch = schema.findFieldBranch(key);
174
173
  // tslint:disable-next-line:prefer-array-literal
175
174
  const rLevels = new Array(field.rLevelMax + 1).fill(0);
176
175
  let vIndex = 0;
177
- for (let i = 0; i < columnData.count; i++) {
178
- const dLevel = columnData.dlevels[i];
179
- const rLevel = columnData.rlevels[i];
176
+ for (let i = 0; i < data.count; i++) {
177
+ const dLevel = data.dlevels[i];
178
+ const rLevel = data.rlevels[i];
180
179
  rLevels[rLevel]++;
181
180
  rLevels.fill(0, rLevel + 1);
182
181
  let rIndex = 0;
183
182
  let record = records[rLevels[rIndex++] - 1];
184
- // Internal nodes - Build a nested row object
183
+ // Internal nodes
185
184
  for (const step of branch) {
186
- if (step === field || dLevel < step.dLevelMax) {
185
+ if (step === field)
187
186
  break;
187
+ if (dLevel < step.dLevelMax)
188
+ break;
189
+ if (step.repetitionType === 'REPEATED') {
190
+ if (!(step.name in record)) {
191
+ // eslint-disable max-depth
192
+ record[step.name] = [];
193
+ }
194
+ const ix = rLevels[rIndex++];
195
+ while (record[step.name].length <= ix) {
196
+ // eslint-disable max-depth
197
+ record[step.name].push({});
198
+ }
199
+ record = record[step.name][ix];
188
200
  }
189
- switch (step.repetitionType) {
190
- case 'REPEATED':
191
- if (!(step.name in record)) {
192
- // eslint-disable max-depth
193
- record[step.name] = [];
194
- }
195
- const ix = rLevels[rIndex++];
196
- while (record[step.name].length <= ix) {
197
- // eslint-disable max-depth
198
- record[step.name].push({});
199
- }
200
- record = record[step.name][ix];
201
- break;
202
- default:
203
- record[step.name] = record[step.name] || {};
204
- record = record[step.name];
201
+ else {
202
+ record[step.name] = record[step.name] || {};
203
+ record = record[step.name];
205
204
  }
206
205
  }
207
- // Leaf node - Add the value
206
+ // Leaf node
208
207
  if (dLevel === field.dLevelMax) {
209
208
  const value = Types.fromPrimitive(
210
209
  // @ts-ignore
211
- field.originalType || field.primitiveType, columnData.values[vIndex], field);
210
+ field.originalType || field.primitiveType, data.values[vIndex], field);
212
211
  vIndex++;
213
- switch (field.repetitionType) {
214
- case 'REPEATED':
215
- if (!(field.name in record)) {
216
- // eslint-disable max-depth
217
- record[field.name] = [];
218
- }
219
- const ix = rLevels[rIndex];
220
- while (record[field.name].length <= ix) {
221
- // eslint-disable max-depth
222
- record[field.name].push(null);
223
- }
224
- record[field.name][ix] = value;
225
- break;
226
- default:
227
- record[field.name] = value;
212
+ if (field.repetitionType === 'REPEATED') {
213
+ if (!(field.name in record)) {
214
+ // eslint-disable max-depth
215
+ record[field.name] = [];
216
+ }
217
+ const ix = rLevels[rIndex];
218
+ while (record[field.name].length <= ix) {
219
+ // eslint-disable max-depth
220
+ record[field.name].push(null);
221
+ }
222
+ record[field.name][ix] = value;
223
+ }
224
+ else {
225
+ record[field.name] = value;
228
226
  }
229
227
  }
230
228
  }
231
229
  }
232
- // Columnar export
233
- /**
234
- * 'Materialize' a list of <value, repetition_level, definition_level>
235
- * tuples back to nested records (objects/arrays) using the Google Dremel
236
- * Algorithm..
237
- *
238
- * The buffer argument must point to an object with the following structure (i.e.
239
- * the same structure that is returned by shredRecords):
240
- *
241
- * buffer = {
242
- * columnData: [
243
- * 'my_col': {
244
- * dlevels: [d1, d2, .. dN],
245
- * rlevels: [r1, r2, .. rN],
246
- * values: [v1, v2, .. vN],
247
- * }, ...
248
- * ],
249
- * rowCount: X,
250
- * }
251
- *
252
- export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
253
- const columns: ParquetRecord = {};
254
- for (const key in buffer.columnData) {
255
- const columnData = buffer.columnData[key];
256
- if (columnData.count) {
257
- extractColumn(schema, columnData, key, columns);
258
- }
259
- }
260
- return columns;
261
- }
262
-
263
- // eslint-disable-next-line max-statements, complexity
264
- function extractColumn(
265
- schema: ParquetSchema,
266
- columnData: ParquetData,
267
- key: string,
268
- columns: Record<string, unknown>
269
- ) {
270
- if (columnData.count <= 0) {
271
- return;
272
- }
273
-
274
- const record = columns;
275
-
276
- const field = schema.findField(key);
277
- const branch = schema.findFieldBranch(key);
278
-
279
- // tslint:disable-next-line:prefer-array-literal
280
- const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
281
- let vIndex = 0;
282
-
283
- let i = 0;
284
- const dLevel = columnData.dlevels[i];
285
- const rLevel = columnData.rlevels[i];
286
- rLevels[rLevel]++;
287
- rLevels.fill(0, rLevel + 1);
288
-
289
- let rIndex = 0;
290
- let record = records[rLevels[rIndex++] - 1];
291
-
292
- // Internal nodes
293
- for (const step of branch) {
294
- if (step === field || dLevel < step.dLevelMax) {
295
- break;
296
- }
297
-
298
- switch (step.repetitionType) {
299
- case 'REPEATED':
300
- if (!(step.name in record)) {
301
- // eslint-disable max-depth
302
- record[step.name] = [];
303
- }
304
- const ix = rLevels[rIndex++];
305
- while (record[step.name].length <= ix) {
306
- // eslint-disable max-depth
307
- record[step.name].push({});
308
- }
309
- record = record[step.name][ix];
310
- break;
311
-
312
- default:
313
- record[step.name] = record[step.name] || {};
314
- record = record[step.name];
315
- }
316
- }
317
-
318
- // Leaf node
319
- if (dLevel === field.dLevelMax) {
320
- const value = Types.fromPrimitive(
321
- // @ts-ignore
322
- field.originalType || field.primitiveType,
323
- columnData.values[vIndex],
324
- field
325
- );
326
- vIndex++;
327
-
328
- switch (field.repetitionType) {
329
- case 'REPEATED':
330
- if (!(field.name in record)) {
331
- // eslint-disable max-depth
332
- record[field.name] = [];
333
- }
334
- const ix = rLevels[rIndex];
335
- while (record[field.name].length <= ix) {
336
- // eslint-disable max-depth
337
- record[field.name].push(null);
338
- }
339
- record[field.name][ix] = value;
340
- break;
341
-
342
- default:
343
- record[field.name] = value;
344
- }
345
- }
346
- }
347
- */
@@ -11,10 +11,10 @@ export declare const PARQUET_LOGICAL_TYPES: Record<ParquetType, ParquetTypeKit>;
11
11
  * Convert a value from it's native representation to the internal/underlying
12
12
  * primitive type
13
13
  */
14
- export declare function toPrimitive(type: ParquetType, value: unknown, field?: ParquetField): unknown;
14
+ export declare function toPrimitive(type: ParquetType, value: any, field?: ParquetField): any;
15
15
  /**
16
16
  * Convert a value from it's internal/underlying primitive representation to
17
17
  * the native representation
18
18
  */
19
- export declare function fromPrimitive(type: ParquetType, value: unknown, field?: ParquetField): any;
19
+ export declare function fromPrimitive(type: ParquetType, value: any, field?: ParquetField): any;
20
20
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,GAAG,OAAO,CAM5F;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,YAAY,OAUpF"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,YAAY,OAM9E;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,YAAY,OAUhF"}
@@ -0,0 +1,10 @@
1
+ /// <reference types="node" />
2
+ /**
3
+ * Convert Buffer to ArrayBuffer
4
+ */
5
+ export declare function toArrayBuffer(buffer: Buffer): ArrayBuffer;
6
+ /**
7
+ * Convert (copy) ArrayBuffer to Buffer
8
+ */
9
+ export declare function toBuffer(arrayBuffer: ArrayBuffer): Buffer;
10
+ //# sourceMappingURL=buffer-utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"buffer-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/buffer-utils.ts"],"names":[],"mappings":";AAAA;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,CAOzD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAEzD"}
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.toBuffer = exports.toArrayBuffer = void 0;
4
+ /**
5
+ * Convert Buffer to ArrayBuffer
6
+ */
7
+ function toArrayBuffer(buffer) {
8
+ // TODO - per docs we should just be able to call buffer.buffer, but there are issues
9
+ if (Buffer.isBuffer(buffer)) {
10
+ const typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);
11
+ return typedArray.slice().buffer;
12
+ }
13
+ return buffer;
14
+ }
15
+ exports.toArrayBuffer = toArrayBuffer;
16
+ /**
17
+ * Convert (copy) ArrayBuffer to Buffer
18
+ */
19
+ function toBuffer(arrayBuffer) {
20
+ return Buffer.from(arrayBuffer);
21
+ }
22
+ exports.toBuffer = toBuffer;
@@ -1,7 +1,8 @@
1
1
  /// <reference types="node" />
2
2
  /// <reference types="node" />
3
3
  /// <reference types="node" />
4
- import { fs, stream } from '@loaders.gl/loader-utils';
4
+ import fs from 'fs';
5
+ import { Writable } from 'stream';
5
6
  export declare function load(name: string): any;
6
7
  export interface WriteStreamOptions {
7
8
  flags?: string;
@@ -11,7 +12,7 @@ export interface WriteStreamOptions {
11
12
  autoClose?: boolean;
12
13
  start?: number;
13
14
  }
14
- export declare function oswrite(os: stream.Writable, buf: Buffer): Promise<void>;
15
- export declare function osclose(os: stream.Writable): Promise<void>;
15
+ export declare function oswrite(os: Writable, buf: Buffer): Promise<void>;
16
+ export declare function osclose(os: Writable): Promise<void>;
16
17
  export declare function osopen(path: string, opts?: WriteStreamOptions): Promise<fs.WriteStream>;
17
18
  //# sourceMappingURL=file-utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/file-utils.ts"],"names":[],"mappings":";;;AACA,OAAO,EAAC,EAAE,EAAE,MAAM,EAAC,MAAM,0BAA0B,CAAC;AAEpD,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAEtC;AACD,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUvE;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAU1D;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,CAAC,CAMvF"}
1
+ {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/file-utils.ts"],"names":[],"mappings":";;;AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAC,QAAQ,EAAC,MAAM,QAAQ,CAAC;AAEhC,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAEtC;AACD,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUhE;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAUnD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,CAAC,CAMvF"}
@@ -1,8 +1,11 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.osopen = exports.osclose = exports.oswrite = exports.load = void 0;
4
7
  // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
5
- const loader_utils_1 = require("@loaders.gl/loader-utils");
8
+ const fs_1 = __importDefault(require("fs"));
6
9
  function load(name) {
7
10
  return (module || global).require(name);
8
11
  }
@@ -35,7 +38,7 @@ function osclose(os) {
35
38
  exports.osclose = osclose;
36
39
  function osopen(path, opts) {
37
40
  return new Promise((resolve, reject) => {
38
- const outputStream = loader_utils_1.fs.createWriteStream(path, opts);
41
+ const outputStream = fs_1.default.createWriteStream(path, opts);
39
42
  outputStream.once('open', (fd) => resolve(outputStream));
40
43
  outputStream.once('error', (err) => reject(err));
41
44
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "3.3.0",
3
+ "version": "3.4.0-alpha.1",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -37,14 +37,12 @@
37
37
  "net": false,
38
38
  "tls": false,
39
39
  "lzo": false,
40
- "stream": false,
41
- "fs": false,
42
40
  "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
43
41
  },
44
42
  "dependencies": {
45
- "@loaders.gl/compression": "3.3.0",
46
- "@loaders.gl/loader-utils": "3.3.0",
47
- "@loaders.gl/schema": "3.3.0",
43
+ "@loaders.gl/compression": "3.4.0-alpha.1",
44
+ "@loaders.gl/loader-utils": "3.4.0-alpha.1",
45
+ "@loaders.gl/schema": "3.4.0-alpha.1",
48
46
  "async-mutex": "^0.2.2",
49
47
  "brotli": "^1.3.2",
50
48
  "bson": "^1.0.4",
@@ -70,5 +68,5 @@
70
68
  "@types/varint": "^5.0.0",
71
69
  "apache-arrow": "^4.0.0"
72
70
  },
73
- "gitHead": "566d194f175a03e042d3e588cdd514048f428cc1"
71
+ "gitHead": "4085b0323050e4361614471319a1fb4729547bbf"
74
72
  }
package/src/index.ts CHANGED
@@ -32,8 +32,8 @@ export {preloadCompressions} from './parquetjs/compression';
32
32
 
33
33
  export {ParquetSchema} from './parquetjs/schema/schema';
34
34
  export {ParquetReader} from './parquetjs/parser/parquet-reader';
35
- export {ParquetEncoder} from './parquetjs/encoder/parquet-encoder';
36
-
35
+ export {ParquetEnvelopeReader} from './parquetjs/parser/parquet-envelope-reader';
36
+ // export {ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer} from './parquetjs/encoder/writer';
37
37
  export {convertParquetToArrowSchema} from './lib/convert-schema';
38
38
 
39
39
  // TESTS
@@ -1,7 +1,6 @@
1
1
  // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
2
- // import {ColumnarTableBatch} from '@loaders.gl/schema';
3
- import {makeReadableFile} from '@loaders.gl/loader-utils';
4
2
  import type {ParquetLoaderOptions} from '../parquet-loader';
3
+
5
4
  import {ParquetReader} from '../parquetjs/parser/parquet-reader';
6
5
 
7
6
  export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
@@ -13,28 +12,16 @@ export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLo
13
12
  }
14
13
 
15
14
  export async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {
16
- const file = makeReadableFile(blob);
17
- const reader = new ParquetReader(file);
18
- const rowBatches = reader.rowBatchIterator(options?.parquet);
19
- for await (const rows of rowBatches) {
20
- yield rows;
15
+ const reader = await ParquetReader.openBlob(blob);
16
+ const rows: any[][] = [];
17
+ try {
18
+ const cursor = reader.getCursor();
19
+ let record: any[] | null;
20
+ while ((record = await cursor.next())) {
21
+ rows.push(record);
22
+ }
23
+ } finally {
24
+ await reader.close();
21
25
  }
26
+ yield rows;
22
27
  }
23
-
24
- // export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
25
- // const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
26
- // try {
27
- // for await (const rowGroup of rowGroupReader) {
28
- // yield convertRowGroupToTableBatch(rowGroup);
29
- // }
30
- // } finally {
31
- // await rowGroupReader.close();
32
- // }
33
- // }
34
-
35
- // function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
36
- // // @ts-expect-error
37
- // return {
38
- // data: rowGroup
39
- // };
40
- // }
@@ -8,15 +8,13 @@ export type ParquetLoaderOptions = LoaderOptions & {
8
8
  parquet?: {
9
9
  type?: 'object-row-table';
10
10
  url?: string;
11
- columnList?: string[] | string[][];
12
11
  };
13
12
  };
14
13
 
15
14
  const DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {
16
15
  parquet: {
17
16
  type: 'object-row-table',
18
- url: undefined,
19
- columnList: []
17
+ url: undefined
20
18
  }
21
19
  };
22
20
 
@@ -14,20 +14,7 @@ import {
14
14
  } from '@loaders.gl/compression';
15
15
 
16
16
  import {ParquetCompression} from './schema/declare';
17
-
18
- /** We can't use loaders-util buffer handling since we are dependent on buffers even in the browser */
19
- function toBuffer(arrayBuffer: ArrayBuffer): Buffer {
20
- return Buffer.from(arrayBuffer);
21
- }
22
-
23
- function toArrayBuffer(buffer: Buffer): ArrayBuffer {
24
- // TODO - per docs we should just be able to call buffer.buffer, but there are issues
25
- if (Buffer.isBuffer(buffer)) {
26
- const typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);
27
- return typedArray.slice().buffer;
28
- }
29
- return buffer;
30
- }
17
+ import {toArrayBuffer, toBuffer} from './utils/buffer-utils';
31
18
 
32
19
  // TODO switch to worker compression to avoid bundling...
33
20