@loaders.gl/parquet 3.3.0 → 3.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +26 -17
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +3 -3
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/parse-parquet.js +25 -49
- package/dist/es5/lib/parse-parquet.js.map +1 -1
- package/dist/es5/parquet-loader.js +2 -3
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +5 -15
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/{parquet-encoder.js → writer.js} +158 -70
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
- package/dist/es5/parquetjs/file.js +94 -0
- package/dist/es5/parquetjs/file.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js +183 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +327 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +222 -553
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +1 -3
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +33 -39
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +19 -0
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/es5/parquetjs/utils/file-utils.js +3 -2
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/esm/index.js +1 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/parse-parquet.js +12 -6
- package/dist/esm/lib/parse-parquet.js.map +1 -1
- package/dist/esm/parquet-loader.js +2 -3
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/compression.js +1 -10
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/encoder/{parquet-encoder.js → writer.js} +37 -7
- package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
- package/dist/esm/parquetjs/file.js +81 -0
- package/dist/esm/parquetjs/file.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js +78 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +129 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +72 -158
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +0 -1
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +34 -42
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +13 -0
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -3
- package/dist/lib/parse-parquet.d.ts +2 -2
- package/dist/lib/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parse-parquet.js +12 -24
- package/dist/parquet-loader.d.ts +0 -1
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +1 -2
- package/dist/parquet-worker.js +24 -15
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +5 -16
- package/dist/parquetjs/encoder/{parquet-encoder.d.ts → writer.d.ts} +19 -10
- package/dist/parquetjs/encoder/writer.d.ts.map +1 -0
- package/dist/parquetjs/encoder/{parquet-encoder.js → writer.js} +37 -39
- package/dist/parquetjs/file.d.ts +10 -0
- package/dist/parquetjs/file.d.ts.map +1 -0
- package/dist/parquetjs/file.js +99 -0
- package/dist/parquetjs/parser/parquet-cursor.d.ts +36 -0
- package/dist/parquetjs/parser/parquet-cursor.d.ts.map +1 -0
- package/dist/parquetjs/parser/parquet-cursor.js +74 -0
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +40 -0
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +1 -0
- package/dist/parquetjs/parser/parquet-envelope-reader.js +136 -0
- package/dist/parquetjs/parser/parquet-reader.d.ts +57 -47
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +102 -168
- package/dist/parquetjs/schema/declare.d.ts +7 -14
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +0 -2
- package/dist/parquetjs/schema/shred.d.ts +0 -115
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +43 -161
- package/dist/parquetjs/schema/types.d.ts +2 -2
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/utils/buffer-utils.d.ts +10 -0
- package/dist/parquetjs/utils/buffer-utils.d.ts.map +1 -0
- package/dist/parquetjs/utils/buffer-utils.js +22 -0
- package/dist/parquetjs/utils/file-utils.d.ts +4 -3
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +5 -2
- package/package.json +5 -7
- package/src/index.ts +2 -2
- package/src/lib/parse-parquet.ts +12 -25
- package/src/parquet-loader.ts +1 -3
- package/src/parquetjs/compression.ts +1 -14
- package/src/parquetjs/encoder/{parquet-encoder.ts → writer.ts} +28 -22
- package/src/parquetjs/file.ts +90 -0
- package/src/parquetjs/parser/parquet-cursor.ts +94 -0
- package/src/parquetjs/parser/parquet-envelope-reader.ts +199 -0
- package/src/parquetjs/parser/parquet-reader.ts +122 -239
- package/src/parquetjs/schema/declare.ts +9 -17
- package/src/parquetjs/schema/shred.ts +28 -157
- package/src/parquetjs/schema/types.ts +27 -21
- package/src/parquetjs/utils/buffer-utils.ts +18 -0
- package/src/parquetjs/utils/file-utils.ts +4 -3
- package/dist/es5/lib/convert-schema-deep.ts.disabled +0 -910
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +0 -1
- package/dist/esm/lib/convert-schema-deep.ts.disabled +0 -910
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +0 -1
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +0 -1
- package/src/lib/convert-schema-deep.ts.disabled +0 -910
|
@@ -45,119 +45,4 @@ export declare function shredRecord(schema: ParquetSchema, record: any, buffer:
|
|
|
45
45
|
* }
|
|
46
46
|
*/
|
|
47
47
|
export declare function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[];
|
|
48
|
-
/**
|
|
49
|
-
* 'Materialize' a list of <value, repetition_level, definition_level>
|
|
50
|
-
* tuples back to nested records (objects/arrays) using the Google Dremel
|
|
51
|
-
* Algorithm..
|
|
52
|
-
*
|
|
53
|
-
* The buffer argument must point to an object with the following structure (i.e.
|
|
54
|
-
* the same structure that is returned by shredRecords):
|
|
55
|
-
*
|
|
56
|
-
* buffer = {
|
|
57
|
-
* columnData: [
|
|
58
|
-
* 'my_col': {
|
|
59
|
-
* dlevels: [d1, d2, .. dN],
|
|
60
|
-
* rlevels: [r1, r2, .. rN],
|
|
61
|
-
* values: [v1, v2, .. vN],
|
|
62
|
-
* }, ...
|
|
63
|
-
* ],
|
|
64
|
-
* rowCount: X,
|
|
65
|
-
* }
|
|
66
|
-
*
|
|
67
|
-
export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
|
|
68
|
-
const columns: ParquetRecord = {};
|
|
69
|
-
for (const key in buffer.columnData) {
|
|
70
|
-
const columnData = buffer.columnData[key];
|
|
71
|
-
if (columnData.count) {
|
|
72
|
-
extractColumn(schema, columnData, key, columns);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
return columns;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// eslint-disable-next-line max-statements, complexity
|
|
79
|
-
function extractColumn(
|
|
80
|
-
schema: ParquetSchema,
|
|
81
|
-
columnData: ParquetData,
|
|
82
|
-
key: string,
|
|
83
|
-
columns: Record<string, unknown>
|
|
84
|
-
) {
|
|
85
|
-
if (columnData.count <= 0) {
|
|
86
|
-
return;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const record = columns;
|
|
90
|
-
|
|
91
|
-
const field = schema.findField(key);
|
|
92
|
-
const branch = schema.findFieldBranch(key);
|
|
93
|
-
|
|
94
|
-
// tslint:disable-next-line:prefer-array-literal
|
|
95
|
-
const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
|
|
96
|
-
let vIndex = 0;
|
|
97
|
-
|
|
98
|
-
let i = 0;
|
|
99
|
-
const dLevel = columnData.dlevels[i];
|
|
100
|
-
const rLevel = columnData.rlevels[i];
|
|
101
|
-
rLevels[rLevel]++;
|
|
102
|
-
rLevels.fill(0, rLevel + 1);
|
|
103
|
-
|
|
104
|
-
let rIndex = 0;
|
|
105
|
-
let record = records[rLevels[rIndex++] - 1];
|
|
106
|
-
|
|
107
|
-
// Internal nodes
|
|
108
|
-
for (const step of branch) {
|
|
109
|
-
if (step === field || dLevel < step.dLevelMax) {
|
|
110
|
-
break;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
switch (step.repetitionType) {
|
|
114
|
-
case 'REPEATED':
|
|
115
|
-
if (!(step.name in record)) {
|
|
116
|
-
// eslint-disable max-depth
|
|
117
|
-
record[step.name] = [];
|
|
118
|
-
}
|
|
119
|
-
const ix = rLevels[rIndex++];
|
|
120
|
-
while (record[step.name].length <= ix) {
|
|
121
|
-
// eslint-disable max-depth
|
|
122
|
-
record[step.name].push({});
|
|
123
|
-
}
|
|
124
|
-
record = record[step.name][ix];
|
|
125
|
-
break;
|
|
126
|
-
|
|
127
|
-
default:
|
|
128
|
-
record[step.name] = record[step.name] || {};
|
|
129
|
-
record = record[step.name];
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// Leaf node
|
|
134
|
-
if (dLevel === field.dLevelMax) {
|
|
135
|
-
const value = Types.fromPrimitive(
|
|
136
|
-
// @ts-ignore
|
|
137
|
-
field.originalType || field.primitiveType,
|
|
138
|
-
columnData.values[vIndex],
|
|
139
|
-
field
|
|
140
|
-
);
|
|
141
|
-
vIndex++;
|
|
142
|
-
|
|
143
|
-
switch (field.repetitionType) {
|
|
144
|
-
case 'REPEATED':
|
|
145
|
-
if (!(field.name in record)) {
|
|
146
|
-
// eslint-disable max-depth
|
|
147
|
-
record[field.name] = [];
|
|
148
|
-
}
|
|
149
|
-
const ix = rLevels[rIndex];
|
|
150
|
-
while (record[field.name].length <= ix) {
|
|
151
|
-
// eslint-disable max-depth
|
|
152
|
-
record[field.name].push(null);
|
|
153
|
-
}
|
|
154
|
-
record[field.name][ix] = value;
|
|
155
|
-
break;
|
|
156
|
-
|
|
157
|
-
default:
|
|
158
|
-
record[field.name] = value;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
*/
|
|
163
48
|
//# sourceMappingURL=shred.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shred.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/shred.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,aAAa,EAA6B,aAAa,EAAC,MAAM,WAAW,CAAC;AAClF,OAAO,EAAC,aAAa,EAAC,MAAM,UAAU,CAAC;AAGvC,OAAO,EAAC,aAAa,EAAC,CAAC;AAEvB,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,GAAG,aAAa,CAYhE;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,aAAa,GAAG,IAAI,CAmB3F;AAgED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,aAAa,GAAG,aAAa,EAAE,
|
|
1
|
+
{"version":3,"file":"shred.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/shred.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,aAAa,EAA6B,aAAa,EAAC,MAAM,WAAW,CAAC;AAClF,OAAO,EAAC,aAAa,EAAC,MAAM,UAAU,CAAC;AAGvC,OAAO,EAAC,aAAa,EAAC,CAAC;AAEvB,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,GAAG,aAAa,CAYhE;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,aAAa,GAAG,IAAI,CAmB3F;AAgED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,aAAa,GAAG,aAAa,EAAE,CAOhG"}
|
|
@@ -155,193 +155,75 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
|
155
155
|
*/
|
|
156
156
|
function materializeRecords(schema, buffer) {
|
|
157
157
|
const records = [];
|
|
158
|
-
for (let i = 0; i < buffer.rowCount; i++)
|
|
158
|
+
for (let i = 0; i < buffer.rowCount; i++)
|
|
159
159
|
records.push({});
|
|
160
|
-
}
|
|
161
160
|
for (const key in buffer.columnData) {
|
|
162
|
-
|
|
163
|
-
if (columnData.count) {
|
|
164
|
-
materializeColumn(schema, columnData, key, records);
|
|
165
|
-
}
|
|
161
|
+
materializeColumn(schema, buffer, key, records);
|
|
166
162
|
}
|
|
167
163
|
return records;
|
|
168
164
|
}
|
|
169
165
|
exports.materializeRecords = materializeRecords;
|
|
170
166
|
// eslint-disable-next-line max-statements, complexity
|
|
171
|
-
function materializeColumn(schema,
|
|
167
|
+
function materializeColumn(schema, buffer, key, records) {
|
|
168
|
+
const data = buffer.columnData[key];
|
|
169
|
+
if (!data.count)
|
|
170
|
+
return;
|
|
172
171
|
const field = schema.findField(key);
|
|
173
172
|
const branch = schema.findFieldBranch(key);
|
|
174
173
|
// tslint:disable-next-line:prefer-array-literal
|
|
175
174
|
const rLevels = new Array(field.rLevelMax + 1).fill(0);
|
|
176
175
|
let vIndex = 0;
|
|
177
|
-
for (let i = 0; i <
|
|
178
|
-
const dLevel =
|
|
179
|
-
const rLevel =
|
|
176
|
+
for (let i = 0; i < data.count; i++) {
|
|
177
|
+
const dLevel = data.dlevels[i];
|
|
178
|
+
const rLevel = data.rlevels[i];
|
|
180
179
|
rLevels[rLevel]++;
|
|
181
180
|
rLevels.fill(0, rLevel + 1);
|
|
182
181
|
let rIndex = 0;
|
|
183
182
|
let record = records[rLevels[rIndex++] - 1];
|
|
184
|
-
// Internal nodes
|
|
183
|
+
// Internal nodes
|
|
185
184
|
for (const step of branch) {
|
|
186
|
-
if (step === field
|
|
185
|
+
if (step === field)
|
|
187
186
|
break;
|
|
187
|
+
if (dLevel < step.dLevelMax)
|
|
188
|
+
break;
|
|
189
|
+
if (step.repetitionType === 'REPEATED') {
|
|
190
|
+
if (!(step.name in record)) {
|
|
191
|
+
// eslint-disable max-depth
|
|
192
|
+
record[step.name] = [];
|
|
193
|
+
}
|
|
194
|
+
const ix = rLevels[rIndex++];
|
|
195
|
+
while (record[step.name].length <= ix) {
|
|
196
|
+
// eslint-disable max-depth
|
|
197
|
+
record[step.name].push({});
|
|
198
|
+
}
|
|
199
|
+
record = record[step.name][ix];
|
|
188
200
|
}
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
// eslint-disable max-depth
|
|
193
|
-
record[step.name] = [];
|
|
194
|
-
}
|
|
195
|
-
const ix = rLevels[rIndex++];
|
|
196
|
-
while (record[step.name].length <= ix) {
|
|
197
|
-
// eslint-disable max-depth
|
|
198
|
-
record[step.name].push({});
|
|
199
|
-
}
|
|
200
|
-
record = record[step.name][ix];
|
|
201
|
-
break;
|
|
202
|
-
default:
|
|
203
|
-
record[step.name] = record[step.name] || {};
|
|
204
|
-
record = record[step.name];
|
|
201
|
+
else {
|
|
202
|
+
record[step.name] = record[step.name] || {};
|
|
203
|
+
record = record[step.name];
|
|
205
204
|
}
|
|
206
205
|
}
|
|
207
|
-
// Leaf node
|
|
206
|
+
// Leaf node
|
|
208
207
|
if (dLevel === field.dLevelMax) {
|
|
209
208
|
const value = Types.fromPrimitive(
|
|
210
209
|
// @ts-ignore
|
|
211
|
-
field.originalType || field.primitiveType,
|
|
210
|
+
field.originalType || field.primitiveType, data.values[vIndex], field);
|
|
212
211
|
vIndex++;
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
record[field.name] = value;
|
|
212
|
+
if (field.repetitionType === 'REPEATED') {
|
|
213
|
+
if (!(field.name in record)) {
|
|
214
|
+
// eslint-disable max-depth
|
|
215
|
+
record[field.name] = [];
|
|
216
|
+
}
|
|
217
|
+
const ix = rLevels[rIndex];
|
|
218
|
+
while (record[field.name].length <= ix) {
|
|
219
|
+
// eslint-disable max-depth
|
|
220
|
+
record[field.name].push(null);
|
|
221
|
+
}
|
|
222
|
+
record[field.name][ix] = value;
|
|
223
|
+
}
|
|
224
|
+
else {
|
|
225
|
+
record[field.name] = value;
|
|
228
226
|
}
|
|
229
227
|
}
|
|
230
228
|
}
|
|
231
229
|
}
|
|
232
|
-
// Columnar export
|
|
233
|
-
/**
|
|
234
|
-
* 'Materialize' a list of <value, repetition_level, definition_level>
|
|
235
|
-
* tuples back to nested records (objects/arrays) using the Google Dremel
|
|
236
|
-
* Algorithm..
|
|
237
|
-
*
|
|
238
|
-
* The buffer argument must point to an object with the following structure (i.e.
|
|
239
|
-
* the same structure that is returned by shredRecords):
|
|
240
|
-
*
|
|
241
|
-
* buffer = {
|
|
242
|
-
* columnData: [
|
|
243
|
-
* 'my_col': {
|
|
244
|
-
* dlevels: [d1, d2, .. dN],
|
|
245
|
-
* rlevels: [r1, r2, .. rN],
|
|
246
|
-
* values: [v1, v2, .. vN],
|
|
247
|
-
* }, ...
|
|
248
|
-
* ],
|
|
249
|
-
* rowCount: X,
|
|
250
|
-
* }
|
|
251
|
-
*
|
|
252
|
-
export function extractColumns(schema: ParquetSchema, buffer: ParquetBuffer): Record<string, unknown> {
|
|
253
|
-
const columns: ParquetRecord = {};
|
|
254
|
-
for (const key in buffer.columnData) {
|
|
255
|
-
const columnData = buffer.columnData[key];
|
|
256
|
-
if (columnData.count) {
|
|
257
|
-
extractColumn(schema, columnData, key, columns);
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
return columns;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// eslint-disable-next-line max-statements, complexity
|
|
264
|
-
function extractColumn(
|
|
265
|
-
schema: ParquetSchema,
|
|
266
|
-
columnData: ParquetData,
|
|
267
|
-
key: string,
|
|
268
|
-
columns: Record<string, unknown>
|
|
269
|
-
) {
|
|
270
|
-
if (columnData.count <= 0) {
|
|
271
|
-
return;
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
const record = columns;
|
|
275
|
-
|
|
276
|
-
const field = schema.findField(key);
|
|
277
|
-
const branch = schema.findFieldBranch(key);
|
|
278
|
-
|
|
279
|
-
// tslint:disable-next-line:prefer-array-literal
|
|
280
|
-
const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
|
|
281
|
-
let vIndex = 0;
|
|
282
|
-
|
|
283
|
-
let i = 0;
|
|
284
|
-
const dLevel = columnData.dlevels[i];
|
|
285
|
-
const rLevel = columnData.rlevels[i];
|
|
286
|
-
rLevels[rLevel]++;
|
|
287
|
-
rLevels.fill(0, rLevel + 1);
|
|
288
|
-
|
|
289
|
-
let rIndex = 0;
|
|
290
|
-
let record = records[rLevels[rIndex++] - 1];
|
|
291
|
-
|
|
292
|
-
// Internal nodes
|
|
293
|
-
for (const step of branch) {
|
|
294
|
-
if (step === field || dLevel < step.dLevelMax) {
|
|
295
|
-
break;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
switch (step.repetitionType) {
|
|
299
|
-
case 'REPEATED':
|
|
300
|
-
if (!(step.name in record)) {
|
|
301
|
-
// eslint-disable max-depth
|
|
302
|
-
record[step.name] = [];
|
|
303
|
-
}
|
|
304
|
-
const ix = rLevels[rIndex++];
|
|
305
|
-
while (record[step.name].length <= ix) {
|
|
306
|
-
// eslint-disable max-depth
|
|
307
|
-
record[step.name].push({});
|
|
308
|
-
}
|
|
309
|
-
record = record[step.name][ix];
|
|
310
|
-
break;
|
|
311
|
-
|
|
312
|
-
default:
|
|
313
|
-
record[step.name] = record[step.name] || {};
|
|
314
|
-
record = record[step.name];
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
// Leaf node
|
|
319
|
-
if (dLevel === field.dLevelMax) {
|
|
320
|
-
const value = Types.fromPrimitive(
|
|
321
|
-
// @ts-ignore
|
|
322
|
-
field.originalType || field.primitiveType,
|
|
323
|
-
columnData.values[vIndex],
|
|
324
|
-
field
|
|
325
|
-
);
|
|
326
|
-
vIndex++;
|
|
327
|
-
|
|
328
|
-
switch (field.repetitionType) {
|
|
329
|
-
case 'REPEATED':
|
|
330
|
-
if (!(field.name in record)) {
|
|
331
|
-
// eslint-disable max-depth
|
|
332
|
-
record[field.name] = [];
|
|
333
|
-
}
|
|
334
|
-
const ix = rLevels[rIndex];
|
|
335
|
-
while (record[field.name].length <= ix) {
|
|
336
|
-
// eslint-disable max-depth
|
|
337
|
-
record[field.name].push(null);
|
|
338
|
-
}
|
|
339
|
-
record[field.name][ix] = value;
|
|
340
|
-
break;
|
|
341
|
-
|
|
342
|
-
default:
|
|
343
|
-
record[field.name] = value;
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
*/
|
|
@@ -11,10 +11,10 @@ export declare const PARQUET_LOGICAL_TYPES: Record<ParquetType, ParquetTypeKit>;
|
|
|
11
11
|
* Convert a value from it's native representation to the internal/underlying
|
|
12
12
|
* primitive type
|
|
13
13
|
*/
|
|
14
|
-
export declare function toPrimitive(type: ParquetType, value:
|
|
14
|
+
export declare function toPrimitive(type: ParquetType, value: any, field?: ParquetField): any;
|
|
15
15
|
/**
|
|
16
16
|
* Convert a value from it's internal/underlying primitive representation to
|
|
17
17
|
* the native representation
|
|
18
18
|
*/
|
|
19
|
-
export declare function fromPrimitive(type: ParquetType, value:
|
|
19
|
+
export declare function fromPrimitive(type: ParquetType, value: any, field?: ParquetField): any;
|
|
20
20
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/types.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAC,MAAM,WAAW,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,aAAa,CAAC;IAC7B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,CAAC;IACtB,aAAa,CAAC,EAAE,QAAQ,CAAC;CAC1B;AAED,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CAuJrE,CAAC;AAEF;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,YAAY,OAM9E;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,YAAY,OAUhF"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
/**
|
|
3
|
+
* Convert Buffer to ArrayBuffer
|
|
4
|
+
*/
|
|
5
|
+
export declare function toArrayBuffer(buffer: Buffer): ArrayBuffer;
|
|
6
|
+
/**
|
|
7
|
+
* Convert (copy) ArrayBuffer to Buffer
|
|
8
|
+
*/
|
|
9
|
+
export declare function toBuffer(arrayBuffer: ArrayBuffer): Buffer;
|
|
10
|
+
//# sourceMappingURL=buffer-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"buffer-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/buffer-utils.ts"],"names":[],"mappings":";AAAA;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,CAOzD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAEzD"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.toBuffer = exports.toArrayBuffer = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Convert Buffer to ArrayBuffer
|
|
6
|
+
*/
|
|
7
|
+
function toArrayBuffer(buffer) {
|
|
8
|
+
// TODO - per docs we should just be able to call buffer.buffer, but there are issues
|
|
9
|
+
if (Buffer.isBuffer(buffer)) {
|
|
10
|
+
const typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);
|
|
11
|
+
return typedArray.slice().buffer;
|
|
12
|
+
}
|
|
13
|
+
return buffer;
|
|
14
|
+
}
|
|
15
|
+
exports.toArrayBuffer = toArrayBuffer;
|
|
16
|
+
/**
|
|
17
|
+
* Convert (copy) ArrayBuffer to Buffer
|
|
18
|
+
*/
|
|
19
|
+
function toBuffer(arrayBuffer) {
|
|
20
|
+
return Buffer.from(arrayBuffer);
|
|
21
|
+
}
|
|
22
|
+
exports.toBuffer = toBuffer;
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
/// <reference types="node" />
|
|
3
3
|
/// <reference types="node" />
|
|
4
|
-
import
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import { Writable } from 'stream';
|
|
5
6
|
export declare function load(name: string): any;
|
|
6
7
|
export interface WriteStreamOptions {
|
|
7
8
|
flags?: string;
|
|
@@ -11,7 +12,7 @@ export interface WriteStreamOptions {
|
|
|
11
12
|
autoClose?: boolean;
|
|
12
13
|
start?: number;
|
|
13
14
|
}
|
|
14
|
-
export declare function oswrite(os:
|
|
15
|
-
export declare function osclose(os:
|
|
15
|
+
export declare function oswrite(os: Writable, buf: Buffer): Promise<void>;
|
|
16
|
+
export declare function osclose(os: Writable): Promise<void>;
|
|
16
17
|
export declare function osopen(path: string, opts?: WriteStreamOptions): Promise<fs.WriteStream>;
|
|
17
18
|
//# sourceMappingURL=file-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/file-utils.ts"],"names":[],"mappings":";;;AACA,OAAO,
|
|
1
|
+
{"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/utils/file-utils.ts"],"names":[],"mappings":";;;AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAC,QAAQ,EAAC,MAAM,QAAQ,CAAC;AAEhC,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAEtC;AACD,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUhE;AAED,wBAAgB,OAAO,CAAC,EAAE,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAUnD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,CAAC,CAMvF"}
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
6
|
exports.osopen = exports.osclose = exports.oswrite = exports.load = void 0;
|
|
4
7
|
// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
|
|
5
|
-
const
|
|
8
|
+
const fs_1 = __importDefault(require("fs"));
|
|
6
9
|
function load(name) {
|
|
7
10
|
return (module || global).require(name);
|
|
8
11
|
}
|
|
@@ -35,7 +38,7 @@ function osclose(os) {
|
|
|
35
38
|
exports.osclose = osclose;
|
|
36
39
|
function osopen(path, opts) {
|
|
37
40
|
return new Promise((resolve, reject) => {
|
|
38
|
-
const outputStream =
|
|
41
|
+
const outputStream = fs_1.default.createWriteStream(path, opts);
|
|
39
42
|
outputStream.once('open', (fd) => resolve(outputStream));
|
|
40
43
|
outputStream.once('error', (err) => reject(err));
|
|
41
44
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/parquet",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.4.0-alpha.1",
|
|
4
4
|
"description": "Framework-independent loader for Apache Parquet files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"publishConfig": {
|
|
@@ -37,14 +37,12 @@
|
|
|
37
37
|
"net": false,
|
|
38
38
|
"tls": false,
|
|
39
39
|
"lzo": false,
|
|
40
|
-
"stream": false,
|
|
41
|
-
"fs": false,
|
|
42
40
|
"./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
|
|
43
41
|
},
|
|
44
42
|
"dependencies": {
|
|
45
|
-
"@loaders.gl/compression": "3.
|
|
46
|
-
"@loaders.gl/loader-utils": "3.
|
|
47
|
-
"@loaders.gl/schema": "3.
|
|
43
|
+
"@loaders.gl/compression": "3.4.0-alpha.1",
|
|
44
|
+
"@loaders.gl/loader-utils": "3.4.0-alpha.1",
|
|
45
|
+
"@loaders.gl/schema": "3.4.0-alpha.1",
|
|
48
46
|
"async-mutex": "^0.2.2",
|
|
49
47
|
"brotli": "^1.3.2",
|
|
50
48
|
"bson": "^1.0.4",
|
|
@@ -70,5 +68,5 @@
|
|
|
70
68
|
"@types/varint": "^5.0.0",
|
|
71
69
|
"apache-arrow": "^4.0.0"
|
|
72
70
|
},
|
|
73
|
-
"gitHead": "
|
|
71
|
+
"gitHead": "4085b0323050e4361614471319a1fb4729547bbf"
|
|
74
72
|
}
|
package/src/index.ts
CHANGED
|
@@ -32,8 +32,8 @@ export {preloadCompressions} from './parquetjs/compression';
|
|
|
32
32
|
|
|
33
33
|
export {ParquetSchema} from './parquetjs/schema/schema';
|
|
34
34
|
export {ParquetReader} from './parquetjs/parser/parquet-reader';
|
|
35
|
-
export {
|
|
36
|
-
|
|
35
|
+
export {ParquetEnvelopeReader} from './parquetjs/parser/parquet-envelope-reader';
|
|
36
|
+
// export {ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer} from './parquetjs/encoder/writer';
|
|
37
37
|
export {convertParquetToArrowSchema} from './lib/convert-schema';
|
|
38
38
|
|
|
39
39
|
// TESTS
|
package/src/lib/parse-parquet.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
2
|
-
// import {ColumnarTableBatch} from '@loaders.gl/schema';
|
|
3
|
-
import {makeReadableFile} from '@loaders.gl/loader-utils';
|
|
4
2
|
import type {ParquetLoaderOptions} from '../parquet-loader';
|
|
3
|
+
|
|
5
4
|
import {ParquetReader} from '../parquetjs/parser/parquet-reader';
|
|
6
5
|
|
|
7
6
|
export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
@@ -13,28 +12,16 @@ export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLo
|
|
|
13
12
|
}
|
|
14
13
|
|
|
15
14
|
export async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
15
|
+
const reader = await ParquetReader.openBlob(blob);
|
|
16
|
+
const rows: any[][] = [];
|
|
17
|
+
try {
|
|
18
|
+
const cursor = reader.getCursor();
|
|
19
|
+
let record: any[] | null;
|
|
20
|
+
while ((record = await cursor.next())) {
|
|
21
|
+
rows.push(record);
|
|
22
|
+
}
|
|
23
|
+
} finally {
|
|
24
|
+
await reader.close();
|
|
21
25
|
}
|
|
26
|
+
yield rows;
|
|
22
27
|
}
|
|
23
|
-
|
|
24
|
-
// export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
|
|
25
|
-
// const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
|
|
26
|
-
// try {
|
|
27
|
-
// for await (const rowGroup of rowGroupReader) {
|
|
28
|
-
// yield convertRowGroupToTableBatch(rowGroup);
|
|
29
|
-
// }
|
|
30
|
-
// } finally {
|
|
31
|
-
// await rowGroupReader.close();
|
|
32
|
-
// }
|
|
33
|
-
// }
|
|
34
|
-
|
|
35
|
-
// function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
|
|
36
|
-
// // @ts-expect-error
|
|
37
|
-
// return {
|
|
38
|
-
// data: rowGroup
|
|
39
|
-
// };
|
|
40
|
-
// }
|
package/src/parquet-loader.ts
CHANGED
|
@@ -8,15 +8,13 @@ export type ParquetLoaderOptions = LoaderOptions & {
|
|
|
8
8
|
parquet?: {
|
|
9
9
|
type?: 'object-row-table';
|
|
10
10
|
url?: string;
|
|
11
|
-
columnList?: string[] | string[][];
|
|
12
11
|
};
|
|
13
12
|
};
|
|
14
13
|
|
|
15
14
|
const DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {
|
|
16
15
|
parquet: {
|
|
17
16
|
type: 'object-row-table',
|
|
18
|
-
url: undefined
|
|
19
|
-
columnList: []
|
|
17
|
+
url: undefined
|
|
20
18
|
}
|
|
21
19
|
};
|
|
22
20
|
|
|
@@ -14,20 +14,7 @@ import {
|
|
|
14
14
|
} from '@loaders.gl/compression';
|
|
15
15
|
|
|
16
16
|
import {ParquetCompression} from './schema/declare';
|
|
17
|
-
|
|
18
|
-
/** We can't use loaders-util buffer handling since we are dependent on buffers even in the browser */
|
|
19
|
-
function toBuffer(arrayBuffer: ArrayBuffer): Buffer {
|
|
20
|
-
return Buffer.from(arrayBuffer);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
function toArrayBuffer(buffer: Buffer): ArrayBuffer {
|
|
24
|
-
// TODO - per docs we should just be able to call buffer.buffer, but there are issues
|
|
25
|
-
if (Buffer.isBuffer(buffer)) {
|
|
26
|
-
const typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);
|
|
27
|
-
return typedArray.slice().buffer;
|
|
28
|
-
}
|
|
29
|
-
return buffer;
|
|
30
|
-
}
|
|
17
|
+
import {toArrayBuffer, toBuffer} from './utils/buffer-utils';
|
|
31
18
|
|
|
32
19
|
// TODO switch to worker compression to avoid bundling...
|
|
33
20
|
|