@loaders.gl/parquet 4.3.0-alpha.1 → 4.3.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +100 -24
- package/dist/index.cjs.map +4 -4
- package/dist/lib/constants.d.ts +1 -1
- package/dist/lib/constants.d.ts.map +1 -1
- package/dist/lib/constants.js +2 -2
- package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/{wasm → encoders}/encode-parquet-wasm.js +1 -1
- package/dist/lib/parsers/parse-parquet-wasm.d.ts +10 -0
- package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-wasm.js +51 -0
- package/dist/lib/utils/load-wasm.d.ts +3 -0
- package/dist/lib/utils/load-wasm.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.d.ts +11 -0
- package/dist/lib/utils/make-stream-iterator.d.ts.map +1 -0
- package/dist/lib/utils/make-stream-iterator.js +67 -0
- package/dist/parquet-loader.js +1 -1
- package/dist/parquet-wasm-loader.d.ts +17 -5
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +19 -4
- package/dist/parquet-wasm-writer.js +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquet_wasm_bg.wasm +0 -0
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +1 -0
- package/package.json +12 -12
- package/src/lib/constants.ts +2 -1
- package/src/lib/{wasm → encoders}/encode-parquet-wasm.ts +1 -1
- package/src/lib/parsers/parse-parquet-wasm.ts +72 -0
- package/src/lib/utils/make-stream-iterator.ts +87 -0
- package/src/parquet-wasm-loader.ts +36 -9
- package/src/parquet-wasm-writer.ts +1 -1
- package/src/parquetjs/codecs/rle.ts +3 -1
- package/dist/arrow1_bg.wasm +0 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/load-wasm.d.ts +0 -3
- package/dist/lib/wasm/load-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +0 -4
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +0 -24
- package/src/lib/wasm/parse-parquet-wasm.ts +0 -33
- package/src/lib/wip/convert-schema-deep.java.disabled +0 -910
- package/src/lib/wip/convert-schema-deep.rs.disabled +0 -976
- /package/dist/lib/{wasm → encoders}/encode-parquet-wasm.d.ts +0 -0
- /package/dist/lib/{wasm → utils}/load-wasm.js +0 -0
- /package/src/lib/{wasm → utils}/load-wasm.ts +0 -0
|
@@ -1,910 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
/*
|
|
3
|
-
/*
|
|
4
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
5
|
-
* or more contributor license agreements. See the NOTICE file
|
|
6
|
-
* distributed with this work for additional information
|
|
7
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
8
|
-
* to you under the Apache License, Version 2.0 (the
|
|
9
|
-
* "License"); you may not use this file except in compliance
|
|
10
|
-
* with the License. You may obtain a copy of the License at
|
|
11
|
-
*
|
|
12
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
-
*
|
|
14
|
-
* Unless required by applicable law or agreed to in writing,
|
|
15
|
-
* software distributed under the License is distributed on an
|
|
16
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
17
|
-
* KIND, either express or implied. See the License for the
|
|
18
|
-
* specific language governing permissions and limitations
|
|
19
|
-
* under the License.
|
|
20
|
-
*/
|
|
21
|
-
package org.apache.parquet.arrow.schema;
|
|
22
|
-
|
|
23
|
-
import static java.util.Arrays.asList;
|
|
24
|
-
import static java.util.Optional.empty;
|
|
25
|
-
import static java.util.Optional.of;
|
|
26
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
|
|
27
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
|
|
28
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
|
|
29
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
|
|
30
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
|
|
31
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
|
|
32
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
|
|
33
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
|
|
34
|
-
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
|
|
35
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
|
|
36
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
|
|
37
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
|
|
38
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
|
|
39
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
|
|
40
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
|
|
41
|
-
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
|
|
42
|
-
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
|
|
43
|
-
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
|
|
44
|
-
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
|
|
45
|
-
|
|
46
|
-
import java.util.ArrayList;
|
|
47
|
-
import java.util.List;
|
|
48
|
-
import java.util.Optional;
|
|
49
|
-
|
|
50
|
-
import org.apache.arrow.vector.types.DateUnit;
|
|
51
|
-
import org.apache.arrow.vector.types.FloatingPointPrecision;
|
|
52
|
-
import org.apache.arrow.vector.types.TimeUnit;
|
|
53
|
-
import org.apache.arrow.vector.types.pojo.ArrowType;
|
|
54
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
|
|
55
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
|
|
56
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
|
|
57
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Date;
|
|
58
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
|
|
59
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
|
|
60
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
|
|
61
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
|
|
62
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
|
|
63
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
|
|
64
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
|
|
65
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
|
|
66
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
|
|
67
|
-
import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
|
|
68
|
-
import org.apache.arrow.vector.types.pojo.Field;
|
|
69
|
-
import org.apache.arrow.vector.types.pojo.Schema;
|
|
70
|
-
import org.apache.parquet.arrow.schema.SchemaMapping.ListTypeMapping;
|
|
71
|
-
import org.apache.parquet.arrow.schema.SchemaMapping.PrimitiveTypeMapping;
|
|
72
|
-
import org.apache.parquet.arrow.schema.SchemaMapping.RepeatedTypeMapping;
|
|
73
|
-
import org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping;
|
|
74
|
-
import org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping;
|
|
75
|
-
import org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping;
|
|
76
|
-
import org.apache.parquet.schema.GroupType;
|
|
77
|
-
import org.apache.parquet.schema.LogicalTypeAnnotation;
|
|
78
|
-
import org.apache.parquet.schema.MessageType;
|
|
79
|
-
import org.apache.parquet.schema.PrimitiveType;
|
|
80
|
-
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
|
|
81
|
-
import org.apache.parquet.schema.Type;
|
|
82
|
-
import org.apache.parquet.schema.Type.Repetition;
|
|
83
|
-
import org.apache.parquet.schema.Types;
|
|
84
|
-
import org.apache.parquet.schema.Types.GroupBuilder;
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Logic to convert Parquet and Arrow Schemas back and forth and maintain the mapping
|
|
88
|
-
*/
|
|
89
|
-
public class SchemaConverter {
|
|
90
|
-
|
|
91
|
-
// Indicates if Int96 should be converted to Arrow Timestamp
|
|
92
|
-
private final boolean convertInt96ToArrowTimestamp;
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* For when we'll need this to be configurable
|
|
96
|
-
*/
|
|
97
|
-
public SchemaConverter() {
|
|
98
|
-
this(false);
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// TODO(PARQUET-1511): pass the parameters in a configuration object
|
|
102
|
-
public SchemaConverter(final boolean convertInt96ToArrowTimestamp) {
|
|
103
|
-
this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
/**
|
|
107
|
-
* Creates a Parquet Schema from an Arrow one and returns the mapping
|
|
108
|
-
* @param arrowSchema the provided Arrow Schema
|
|
109
|
-
* @return the mapping between the 2
|
|
110
|
-
*/
|
|
111
|
-
public SchemaMapping fromArrow(Schema arrowSchema) {
|
|
112
|
-
List<Field> fields = arrowSchema.getFields();
|
|
113
|
-
List<TypeMapping> parquetFields = fromArrow(fields);
|
|
114
|
-
MessageType parquetType = addToBuilder(parquetFields, Types.buildMessage()).named("root");
|
|
115
|
-
return new SchemaMapping(arrowSchema, parquetType, parquetFields);
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
private <T> GroupBuilder<T> addToBuilder(List<TypeMapping> parquetFields, GroupBuilder<T> builder) {
|
|
119
|
-
for (TypeMapping type : parquetFields) {
|
|
120
|
-
builder = builder.addField(type.getParquetType());
|
|
121
|
-
}
|
|
122
|
-
return builder;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
private List<TypeMapping> fromArrow(List<Field> fields) {
|
|
126
|
-
List<TypeMapping> result = new ArrayList<>(fields.size());
|
|
127
|
-
for (Field field : fields) {
|
|
128
|
-
result.add(fromArrow(field));
|
|
129
|
-
}
|
|
130
|
-
return result;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
private TypeMapping fromArrow(final Field field) {
|
|
134
|
-
return fromArrow(field, field.getName());
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
/**
|
|
138
|
-
* @param field arrow field
|
|
139
|
-
* @param fieldName overrides field.getName()
|
|
140
|
-
* @return mapping
|
|
141
|
-
*/
|
|
142
|
-
private TypeMapping fromArrow(final Field field, final String fieldName) {
|
|
143
|
-
final List<Field> children = field.getChildren();
|
|
144
|
-
return field.getType().accept(new ArrowTypeVisitor<TypeMapping>() {
|
|
145
|
-
|
|
146
|
-
@Override
|
|
147
|
-
public TypeMapping visit(Null type) {
|
|
148
|
-
// TODO(PARQUET-757): null original type
|
|
149
|
-
return primitive(BINARY);
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
@Override
|
|
153
|
-
public TypeMapping visit(Struct type) {
|
|
154
|
-
List<TypeMapping> parquetTypes = fromArrow(children);
|
|
155
|
-
return new StructTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
@Override
|
|
159
|
-
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
|
|
160
|
-
return createListTypeMapping();
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
@Override
|
|
164
|
-
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
|
|
165
|
-
return createListTypeMapping();
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
private ListTypeMapping createListTypeMapping() {
|
|
169
|
-
if (children.size() != 1) {
|
|
170
|
-
throw new IllegalArgumentException("list fields must have exactly one child: " + field);
|
|
171
|
-
}
|
|
172
|
-
TypeMapping parquetChild = fromArrow(children.get(0), "element");
|
|
173
|
-
GroupType list = Types.optionalList().element(parquetChild.getParquetType()).named(fieldName);
|
|
174
|
-
return new ListTypeMapping(field, new List3Levels(list), parquetChild);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
@Override
|
|
178
|
-
public TypeMapping visit(Union type) {
|
|
179
|
-
// TODO(PARQUET-756): add Union OriginalType
|
|
180
|
-
List<TypeMapping> parquetTypes = fromArrow(children);
|
|
181
|
-
return new UnionTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
@Override
|
|
185
|
-
public TypeMapping visit(Int type) {
|
|
186
|
-
boolean signed = type.getIsSigned();
|
|
187
|
-
switch (type.getBitWidth()) {
|
|
188
|
-
case 8:
|
|
189
|
-
case 16:
|
|
190
|
-
case 32:
|
|
191
|
-
return primitive(INT32, intType(type.getBitWidth(), signed));
|
|
192
|
-
case 64:
|
|
193
|
-
return primitive(INT64, intType(64, signed));
|
|
194
|
-
default:
|
|
195
|
-
throw new IllegalArgumentException("Illegal int type: " + field);
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
@Override
|
|
200
|
-
public TypeMapping visit(FloatingPoint type) {
|
|
201
|
-
switch (type.getPrecision()) {
|
|
202
|
-
case HALF:
|
|
203
|
-
// TODO(PARQUET-757): original type HalfFloat
|
|
204
|
-
return primitive(FLOAT);
|
|
205
|
-
case SINGLE:
|
|
206
|
-
return primitive(FLOAT);
|
|
207
|
-
case DOUBLE:
|
|
208
|
-
return primitive(DOUBLE);
|
|
209
|
-
default:
|
|
210
|
-
throw new IllegalArgumentException("Illegal float type: " + field);
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
@Override
|
|
215
|
-
public TypeMapping visit(Utf8 type) {
|
|
216
|
-
return primitive(BINARY, stringType());
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
@Override
|
|
220
|
-
public TypeMapping visit(Binary type) {
|
|
221
|
-
return primitive(BINARY);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
@Override
|
|
225
|
-
public TypeMapping visit(Bool type) {
|
|
226
|
-
return primitive(BOOLEAN);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/**
|
|
230
|
-
* See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
|
|
231
|
-
* @param type an arrow decimal type
|
|
232
|
-
* @return a mapping from the arrow decimal to the Parquet type
|
|
233
|
-
*/
|
|
234
|
-
@Override
|
|
235
|
-
public TypeMapping visit(Decimal type) {
|
|
236
|
-
int precision = type.getPrecision();
|
|
237
|
-
int scale = type.getScale();
|
|
238
|
-
if (1 <= precision && precision <= 9) {
|
|
239
|
-
return decimal(INT32, precision, scale);
|
|
240
|
-
} else if (1 <= precision && precision <= 18) {
|
|
241
|
-
return decimal(INT64, precision, scale);
|
|
242
|
-
} else {
|
|
243
|
-
// Better: FIXED_LENGTH_BYTE_ARRAY with length
|
|
244
|
-
return decimal(BINARY, precision, scale);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
@Override
|
|
249
|
-
public TypeMapping visit(Date type) {
|
|
250
|
-
return primitive(INT32, dateType());
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
@Override
|
|
254
|
-
public TypeMapping visit(Time type) {
|
|
255
|
-
int bitWidth = type.getBitWidth();
|
|
256
|
-
TimeUnit timeUnit = type.getUnit();
|
|
257
|
-
if (bitWidth == 32 && timeUnit == TimeUnit.MILLISECOND) {
|
|
258
|
-
return primitive(INT32, timeType(false, MILLIS));
|
|
259
|
-
} else if (bitWidth == 64 && timeUnit == TimeUnit.MICROSECOND) {
|
|
260
|
-
return primitive(INT64, timeType(false, MICROS));
|
|
261
|
-
} else if (bitWidth == 64 && timeUnit == TimeUnit.NANOSECOND) {
|
|
262
|
-
return primitive(INT64, timeType(false, NANOS));
|
|
263
|
-
}
|
|
264
|
-
throw new UnsupportedOperationException("Unsupported type " + type);
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
@Override
|
|
268
|
-
public TypeMapping visit(Timestamp type) {
|
|
269
|
-
TimeUnit timeUnit = type.getUnit();
|
|
270
|
-
if (timeUnit == TimeUnit.MILLISECOND) {
|
|
271
|
-
return primitive(INT64, timestampType(isUtcNormalized(type), MILLIS));
|
|
272
|
-
} else if (timeUnit == TimeUnit.MICROSECOND) {
|
|
273
|
-
return primitive(INT64, timestampType(isUtcNormalized(type), MICROS));
|
|
274
|
-
} else if (timeUnit == TimeUnit.NANOSECOND) {
|
|
275
|
-
return primitive(INT64, timestampType(isUtcNormalized(type), NANOS));
|
|
276
|
-
}
|
|
277
|
-
throw new UnsupportedOperationException("Unsupported type " + type);
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
private boolean isUtcNormalized(Timestamp timestamp) {
|
|
281
|
-
String timeZone = timestamp.getTimezone();
|
|
282
|
-
return timeZone != null && !timeZone.isEmpty();
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
/**
|
|
286
|
-
* See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval
|
|
287
|
-
*/
|
|
288
|
-
@Override
|
|
289
|
-
public TypeMapping visit(Interval type) {
|
|
290
|
-
// TODO(PARQUET-675): fix interval original types
|
|
291
|
-
return primitiveFLBA(12, LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance());
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
@Override
|
|
295
|
-
public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
|
|
296
|
-
return primitive(BINARY);
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
private TypeMapping mapping(PrimitiveType parquetType) {
|
|
300
|
-
return new PrimitiveTypeMapping(field, parquetType);
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
private TypeMapping decimal(PrimitiveTypeName type, int precision, int scale) {
|
|
304
|
-
return mapping(Types.optional(type).as(decimalType(scale, precision)).named(fieldName));
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
private TypeMapping primitive(PrimitiveTypeName type) {
|
|
308
|
-
return mapping(Types.optional(type).named(fieldName));
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
private TypeMapping primitive(PrimitiveTypeName type, LogicalTypeAnnotation otype) {
|
|
312
|
-
return mapping(Types.optional(type).as(otype).named(fieldName));
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
private TypeMapping primitiveFLBA(int length, LogicalTypeAnnotation otype) {
|
|
316
|
-
return mapping(Types.optional(FIXED_LEN_BYTE_ARRAY).length(length).as(otype).named(fieldName));
|
|
317
|
-
}
|
|
318
|
-
});
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
/**
|
|
322
|
-
* Creates an Arrow Schema from an Parquet one and returns the mapping
|
|
323
|
-
* @param parquetSchema the provided Parquet Schema
|
|
324
|
-
* @return the mapping between the 2
|
|
325
|
-
*/
|
|
326
|
-
public SchemaMapping fromParquet(MessageType parquetSchema) {
|
|
327
|
-
List<Type> fields = parquetSchema.getFields();
|
|
328
|
-
List<TypeMapping> mappings = fromParquet(fields);
|
|
329
|
-
List<Field> arrowFields = fields(mappings);
|
|
330
|
-
return new SchemaMapping(new Schema(arrowFields), parquetSchema, mappings);
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
private List<Field> fields(List<TypeMapping> mappings) {
|
|
334
|
-
List<Field> result = new ArrayList<>(mappings.size());
|
|
335
|
-
for (TypeMapping typeMapping : mappings) {
|
|
336
|
-
result.add(typeMapping.getArrowField());
|
|
337
|
-
}
|
|
338
|
-
return result;
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
private List<TypeMapping> fromParquet(List<Type> fields) {
|
|
342
|
-
List<TypeMapping> result = new ArrayList<>(fields.size());
|
|
343
|
-
for (Type type : fields) {
|
|
344
|
-
result.add(fromParquet(type));
|
|
345
|
-
}
|
|
346
|
-
return result;
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
private TypeMapping fromParquet(Type type) {
|
|
350
|
-
return fromParquet(type, type.getName(), type.getRepetition());
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
/**
|
|
354
|
-
* @param type parquet type
|
|
355
|
-
* @param name overrides parquet.getName)
|
|
356
|
-
* @param repetition overrides parquet.getRepetition()
|
|
357
|
-
* @return a type mapping from the Parquet type to an Arrow type
|
|
358
|
-
*/
|
|
359
|
-
private TypeMapping fromParquet(Type type, String name, Repetition repetition) {
|
|
360
|
-
if (repetition == REPEATED) {
|
|
361
|
-
// case where we have a repeated field that is not in a List/Map
|
|
362
|
-
TypeMapping child = fromParquet(type, null, REQUIRED);
|
|
363
|
-
Field arrowField = new Field(name, false, new ArrowType.List(), asList(child.getArrowField()));
|
|
364
|
-
return new RepeatedTypeMapping(arrowField, type, child);
|
|
365
|
-
}
|
|
366
|
-
if (type.isPrimitive()) {
|
|
367
|
-
return fromParquetPrimitive(type.asPrimitiveType(), name);
|
|
368
|
-
} else {
|
|
369
|
-
return fromParquetGroup(type.asGroupType(), name);
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
/**
|
|
374
|
-
* @param type parquet types
|
|
375
|
-
* @param name overrides parquet.getName()
|
|
376
|
-
* @return the mapping
|
|
377
|
-
*/
|
|
378
|
-
private TypeMapping fromParquetGroup(GroupType type, String name) {
|
|
379
|
-
LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
|
|
380
|
-
if (logicalType == null) {
|
|
381
|
-
List<TypeMapping> typeMappings = fromParquet(type.getFields());
|
|
382
|
-
Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
|
|
383
|
-
return new StructTypeMapping(arrowField, type, typeMappings);
|
|
384
|
-
} else {
|
|
385
|
-
return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
|
|
386
|
-
@Override
|
|
387
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
|
|
388
|
-
List3Levels list3Levels = new List3Levels(type);
|
|
389
|
-
TypeMapping child = fromParquet(list3Levels.getElement(), null, list3Levels.getElement().getRepetition());
|
|
390
|
-
Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new ArrowType.List(), asList(child.getArrowField()));
|
|
391
|
-
return of(new ListTypeMapping(arrowField, list3Levels, child));
|
|
392
|
-
}
|
|
393
|
-
}).orElseThrow(() -> new UnsupportedOperationException("Unsupported type " + type));
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
/**
|
|
398
|
-
* @param type parquet types
|
|
399
|
-
* @param name overrides parquet.getName()
|
|
400
|
-
* @return the mapping
|
|
401
|
-
*/
|
|
402
|
-
private TypeMapping fromParquetPrimitive(final PrimitiveType type, final String name) {
|
|
403
|
-
return type.getPrimitiveTypeName().convert(new PrimitiveType.PrimitiveTypeNameConverter<TypeMapping, RuntimeException>() {
|
|
404
|
-
|
|
405
|
-
private TypeMapping field(ArrowType arrowType) {
|
|
406
|
-
Field field = new Field(name, type.isRepetition(OPTIONAL), arrowType, null);
|
|
407
|
-
return new PrimitiveTypeMapping(field, type);
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
@Override
|
|
411
|
-
public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
412
|
-
return field(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
@Override
|
|
416
|
-
public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
417
|
-
return field(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
@Override
|
|
421
|
-
public TypeMapping convertINT32(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
422
|
-
LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
|
|
423
|
-
if (logicalTypeAnnotation == null) {
|
|
424
|
-
return integer(32, true);
|
|
425
|
-
}
|
|
426
|
-
return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
|
|
427
|
-
@Override
|
|
428
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
|
|
429
|
-
return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
@Override
|
|
433
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
|
|
434
|
-
return of(field(new ArrowType.Date(DateUnit.DAY)));
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
@Override
|
|
438
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
|
|
439
|
-
return timeLogicalType.getUnit() == MILLIS ? of(field(new ArrowType.Time(TimeUnit.MILLISECOND, 32))) : empty();
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
@Override
|
|
443
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
|
|
444
|
-
if (intLogicalType.getBitWidth() == 64) {
|
|
445
|
-
return empty();
|
|
446
|
-
}
|
|
447
|
-
return of(integer(intLogicalType.getBitWidth(), intLogicalType.isSigned()));
|
|
448
|
-
}
|
|
449
|
-
}).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
@Override
|
|
453
|
-
public TypeMapping convertINT64(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
454
|
-
LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
|
|
455
|
-
if (logicalTypeAnnotation == null) {
|
|
456
|
-
return integer(64, true);
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
|
|
460
|
-
@Override
|
|
461
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
|
|
462
|
-
return of(field(new ArrowType.Date(DateUnit.DAY)));
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
@Override
|
|
466
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
|
|
467
|
-
return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
@Override
|
|
471
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
|
|
472
|
-
return of(integer(intLogicalType.getBitWidth(), intLogicalType.isSigned()));
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
@Override
|
|
476
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
|
|
477
|
-
if (timeLogicalType.getUnit() == MICROS) {
|
|
478
|
-
return of(field(new ArrowType.Time(TimeUnit.MICROSECOND, 64)));
|
|
479
|
-
} else if (timeLogicalType.getUnit() == NANOS) {
|
|
480
|
-
return of(field(new ArrowType.Time(TimeUnit.NANOSECOND, 64)));
|
|
481
|
-
}
|
|
482
|
-
return empty();
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
@Override
|
|
486
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
|
|
487
|
-
switch (timestampLogicalType.getUnit()) {
|
|
488
|
-
case MICROS:
|
|
489
|
-
return of(field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, getTimeZone(timestampLogicalType))));
|
|
490
|
-
case MILLIS:
|
|
491
|
-
return of(field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, getTimeZone(timestampLogicalType))));
|
|
492
|
-
case NANOS:
|
|
493
|
-
return of(field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, getTimeZone(timestampLogicalType))));
|
|
494
|
-
}
|
|
495
|
-
return empty();
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
private String getTimeZone(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
|
|
499
|
-
return timestampLogicalType.isAdjustedToUTC() ? "UTC" : null;
|
|
500
|
-
}
|
|
501
|
-
}).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
@Override
|
|
505
|
-
public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
506
|
-
if (convertInt96ToArrowTimestamp) {
|
|
507
|
-
return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null));
|
|
508
|
-
} else {
|
|
509
|
-
return field(new ArrowType.Binary());
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
@Override
|
|
514
|
-
public TypeMapping convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
515
|
-
LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
|
|
516
|
-
if (logicalTypeAnnotation == null) {
|
|
517
|
-
return field(new ArrowType.Binary());
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
|
|
521
|
-
@Override
|
|
522
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
|
|
523
|
-
return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
|
|
524
|
-
}
|
|
525
|
-
}).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
|
|
526
|
-
}
|
|
527
|
-
|
|
528
|
-
@Override
|
|
529
|
-
public TypeMapping convertBOOLEAN(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
530
|
-
return field(new ArrowType.Bool());
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
@Override
|
|
534
|
-
public TypeMapping convertBINARY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
|
|
535
|
-
LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
|
|
536
|
-
if (logicalTypeAnnotation == null) {
|
|
537
|
-
return field(new ArrowType.Binary());
|
|
538
|
-
}
|
|
539
|
-
return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
|
|
540
|
-
@Override
|
|
541
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
|
|
542
|
-
return of(field(new ArrowType.Utf8()));
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
@Override
|
|
546
|
-
public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
|
|
547
|
-
return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
|
|
548
|
-
}
|
|
549
|
-
}).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
private TypeMapping decimal(int precision, int scale) {
|
|
553
|
-
return field(new ArrowType.Decimal(precision, scale));
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
private TypeMapping integer(int width, boolean signed) {
|
|
557
|
-
return field(new ArrowType.Int(width, signed));
|
|
558
|
-
}
|
|
559
|
-
});
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
/**
|
|
563
|
-
* Maps a Parquet and Arrow Schema
|
|
564
|
-
* For now does not validate primitive type compatibility
|
|
565
|
-
* @param arrowSchema an Arrow schema
|
|
566
|
-
* @param parquetSchema a Parquet message type
|
|
567
|
-
* @return the mapping between the 2
|
|
568
|
-
*/
|
|
569
|
-
public SchemaMapping map(Schema arrowSchema, MessageType parquetSchema) {
|
|
570
|
-
List<TypeMapping> children = map(arrowSchema.getFields(), parquetSchema.getFields());
|
|
571
|
-
return new SchemaMapping(arrowSchema, parquetSchema, children);
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
private List<TypeMapping> map(List<Field> arrowFields, List<Type> parquetFields) {
|
|
575
|
-
if (arrowFields.size() != parquetFields.size()) {
|
|
576
|
-
throw new IllegalArgumentException("Can not map schemas as sizes differ: " + arrowFields + " != " + parquetFields);
|
|
577
|
-
}
|
|
578
|
-
List<TypeMapping> result = new ArrayList<>(arrowFields.size());
|
|
579
|
-
for (int i = 0; i < arrowFields.size(); i++) {
|
|
580
|
-
Field arrowField = arrowFields.get(i);
|
|
581
|
-
Type parquetField = parquetFields.get(i);
|
|
582
|
-
result.add(map(arrowField, parquetField));
|
|
583
|
-
}
|
|
584
|
-
return result;
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
private TypeMapping map(final Field arrowField, final Type parquetField) {
|
|
588
|
-
return arrowField.getType().accept(new ArrowTypeVisitor<TypeMapping>() {
|
|
589
|
-
|
|
590
|
-
@Override
|
|
591
|
-
public TypeMapping visit(Null type) {
|
|
592
|
-
if (!parquetField.isRepetition(OPTIONAL)) {
|
|
593
|
-
throw new IllegalArgumentException("Parquet type can't be null: " + parquetField);
|
|
594
|
-
}
|
|
595
|
-
return primitive();
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
@Override
|
|
599
|
-
public TypeMapping visit(Struct type) {
|
|
600
|
-
if (parquetField.isPrimitive()) {
|
|
601
|
-
throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
|
|
602
|
-
}
|
|
603
|
-
GroupType groupType = parquetField.asGroupType();
|
|
604
|
-
return new StructTypeMapping(arrowField, groupType, map(arrowField.getChildren(), groupType.getFields()));
|
|
605
|
-
}
|
|
606
|
-
|
|
607
|
-
@Override
|
|
608
|
-
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
|
|
609
|
-
return createListTypeMapping(type);
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
@Override
|
|
613
|
-
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
|
|
614
|
-
return createListTypeMapping(type);
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
|
|
618
|
-
if (arrowField.getChildren().size() != 1) {
|
|
619
|
-
throw new IllegalArgumentException("Invalid list type: " + type);
|
|
620
|
-
}
|
|
621
|
-
Field arrowChild = arrowField.getChildren().get(0);
|
|
622
|
-
if (parquetField.isRepetition(REPEATED)) {
|
|
623
|
-
return new RepeatedTypeMapping(arrowField, parquetField, map(arrowChild, parquetField));
|
|
624
|
-
}
|
|
625
|
-
if (parquetField.isPrimitive()) {
|
|
626
|
-
throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
|
|
627
|
-
}
|
|
628
|
-
List3Levels list3Levels = new List3Levels(parquetField.asGroupType());
|
|
629
|
-
if (arrowField.getChildren().size() != 1) {
|
|
630
|
-
throw new IllegalArgumentException("invalid arrow list: " + arrowField);
|
|
631
|
-
}
|
|
632
|
-
return new ListTypeMapping(arrowField, list3Levels, map(arrowChild, list3Levels.getElement()));
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
@Override
|
|
636
|
-
public TypeMapping visit(Union type) {
|
|
637
|
-
if (parquetField.isPrimitive()) {
|
|
638
|
-
throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
|
|
639
|
-
}
|
|
640
|
-
GroupType groupType = parquetField.asGroupType();
|
|
641
|
-
return new UnionTypeMapping(arrowField, groupType, map(arrowField.getChildren(), groupType.getFields()));
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
@Override
|
|
645
|
-
public TypeMapping visit(Int type) {
|
|
646
|
-
return primitive();
|
|
647
|
-
}
|
|
648
|
-
|
|
649
|
-
@Override
|
|
650
|
-
public TypeMapping visit(FloatingPoint type) {
|
|
651
|
-
return primitive();
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
@Override
|
|
655
|
-
public TypeMapping visit(Utf8 type) {
|
|
656
|
-
return primitive();
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
@Override
|
|
660
|
-
public TypeMapping visit(Binary type) {
|
|
661
|
-
return primitive();
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
@Override
|
|
665
|
-
public TypeMapping visit(Bool type) {
|
|
666
|
-
return primitive();
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
@Override
|
|
670
|
-
public TypeMapping visit(Decimal type) {
|
|
671
|
-
return primitive();
|
|
672
|
-
}
|
|
673
|
-
|
|
674
|
-
@Override
|
|
675
|
-
public TypeMapping visit(Date type) {
|
|
676
|
-
return primitive();
|
|
677
|
-
}
|
|
678
|
-
|
|
679
|
-
@Override
|
|
680
|
-
public TypeMapping visit(Time type) {
|
|
681
|
-
return primitive();
|
|
682
|
-
}
|
|
683
|
-
|
|
684
|
-
@Override
|
|
685
|
-
public TypeMapping visit(Timestamp type) {
|
|
686
|
-
return primitive();
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
@Override
|
|
690
|
-
public TypeMapping visit(Interval type) {
|
|
691
|
-
return primitive();
|
|
692
|
-
}
|
|
693
|
-
|
|
694
|
-
@Override
|
|
695
|
-
public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
|
|
696
|
-
return primitive();
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
private TypeMapping primitive() {
|
|
700
|
-
if (!parquetField.isPrimitive()) {
|
|
701
|
-
throw new IllegalArgumentException("Can not map schemas as one is primitive and the other is not: " + arrowField + " != " + parquetField);
|
|
702
|
-
}
|
|
703
|
-
return new PrimitiveTypeMapping(arrowField, parquetField.asPrimitiveType());
|
|
704
|
-
}
|
|
705
|
-
});
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
|
|
709
|
-
/*
|
|
710
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
711
|
-
* or more contributor license agreements. See the NOTICE file
|
|
712
|
-
* distributed with this work for additional information
|
|
713
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
714
|
-
* to you under the Apache License, Version 2.0 (the
|
|
715
|
-
* "License"); you may not use this file except in compliance
|
|
716
|
-
* with the License. You may obtain a copy of the License at
|
|
717
|
-
*
|
|
718
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
719
|
-
*
|
|
720
|
-
* Unless required by applicable law or agreed to in writing,
|
|
721
|
-
* software distributed under the License is distributed on an
|
|
722
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
723
|
-
* KIND, either express or implied. See the License for the
|
|
724
|
-
* specific language governing permissions and limitations
|
|
725
|
-
* under the License.
|
|
726
|
-
*/
|
|
727
|
-
package org.apache.parquet.arrow.schema;
|
|
728
|
-
|
|
729
|
-
import static java.util.Arrays.asList;
|
|
730
|
-
|
|
731
|
-
import java.util.Collections;
|
|
732
|
-
import java.util.List;
|
|
733
|
-
|
|
734
|
-
import org.apache.arrow.vector.types.pojo.Field;
|
|
735
|
-
import org.apache.arrow.vector.types.pojo.Schema;
|
|
736
|
-
import org.apache.parquet.schema.GroupType;
|
|
737
|
-
import org.apache.parquet.schema.MessageType;
|
|
738
|
-
import org.apache.parquet.schema.PrimitiveType;
|
|
739
|
-
import org.apache.parquet.schema.Type;
|
|
740
|
-
|
|
741
|
-
/**
|
|
742
|
-
* The mapping between an Arrow and a Parquet schema
|
|
743
|
-
* @see SchemaConverter
|
|
744
|
-
*/
|
|
745
|
-
public class SchemaMapping {
|
|
746
|
-
|
|
747
|
-
private final Schema arrowSchema;
|
|
748
|
-
private final MessageType parquetSchema;
|
|
749
|
-
private final List<TypeMapping> children;
|
|
750
|
-
|
|
751
|
-
SchemaMapping(Schema arrowSchema, MessageType parquetSchema, List<TypeMapping> children) {
|
|
752
|
-
super();
|
|
753
|
-
this.arrowSchema = arrowSchema;
|
|
754
|
-
this.parquetSchema = parquetSchema;
|
|
755
|
-
this.children = Collections.unmodifiableList(children);
|
|
756
|
-
}
|
|
757
|
-
|
|
758
|
-
public Schema getArrowSchema() {
|
|
759
|
-
return arrowSchema;
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
public MessageType getParquetSchema() {
|
|
763
|
-
return parquetSchema;
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
/**
|
|
767
|
-
* @return mapping between individual fields of each of the 2 schemas (should be the same width)
|
|
768
|
-
*/
|
|
769
|
-
public List<TypeMapping> getChildren() {
|
|
770
|
-
return children;
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
/**
|
|
774
|
-
* To traverse a schema mapping
|
|
775
|
-
* @param <T> the Java return type of the visitor
|
|
776
|
-
*/
|
|
777
|
-
public interface TypeMappingVisitor<T> {
|
|
778
|
-
T visit(PrimitiveTypeMapping primitiveTypeMapping);
|
|
779
|
-
T visit(StructTypeMapping structTypeMapping);
|
|
780
|
-
T visit(UnionTypeMapping unionTypeMapping);
|
|
781
|
-
T visit(ListTypeMapping listTypeMapping);
|
|
782
|
-
T visit(RepeatedTypeMapping repeatedTypeMapping);
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
/**
|
|
786
|
-
* Mapping between an Arrow and a Parquet types
|
|
787
|
-
*/
|
|
788
|
-
public abstract static class TypeMapping {
|
|
789
|
-
|
|
790
|
-
private final Field arrowField;
|
|
791
|
-
private final Type parquetType;
|
|
792
|
-
private List<TypeMapping> children;
|
|
793
|
-
|
|
794
|
-
TypeMapping(Field arrowField, Type parquetType, List<TypeMapping> children) {
|
|
795
|
-
super();
|
|
796
|
-
this.arrowField = arrowField;
|
|
797
|
-
this.parquetType = parquetType;
|
|
798
|
-
this.children = children;
|
|
799
|
-
}
|
|
800
|
-
|
|
801
|
-
public Field getArrowField() {
|
|
802
|
-
return arrowField;
|
|
803
|
-
}
|
|
804
|
-
|
|
805
|
-
public Type getParquetType() {
|
|
806
|
-
return parquetType;
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
public List<TypeMapping> getChildren() {
|
|
810
|
-
return children;
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
public abstract <T> T accept(TypeMappingVisitor<T> visitor);
|
|
814
|
-
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
/**
|
|
818
|
-
* mapping between two primitive types
|
|
819
|
-
*/
|
|
820
|
-
public static class PrimitiveTypeMapping extends TypeMapping {
|
|
821
|
-
public PrimitiveTypeMapping(Field arrowField, PrimitiveType parquetType) {
|
|
822
|
-
super(arrowField, parquetType, Collections.<TypeMapping>emptyList());
|
|
823
|
-
}
|
|
824
|
-
|
|
825
|
-
@Override
|
|
826
|
-
public <T> T accept(TypeMappingVisitor<T> visitor) {
|
|
827
|
-
return visitor.visit(this);
|
|
828
|
-
}
|
|
829
|
-
}
|
|
830
|
-
|
|
831
|
-
/**
|
|
832
|
-
* mapping of a struct type
|
|
833
|
-
*/
|
|
834
|
-
public static class StructTypeMapping extends TypeMapping {
|
|
835
|
-
public StructTypeMapping(Field arrowField, GroupType parquetType, List<TypeMapping> children) {
|
|
836
|
-
super(arrowField, parquetType, children);
|
|
837
|
-
}
|
|
838
|
-
|
|
839
|
-
@Override
|
|
840
|
-
public <T> T accept(TypeMappingVisitor<T> visitor) {
|
|
841
|
-
return visitor.visit(this);
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
/**
|
|
846
|
-
* mapping of a union type
|
|
847
|
-
*/
|
|
848
|
-
public static class UnionTypeMapping extends TypeMapping {
|
|
849
|
-
public UnionTypeMapping(Field arrowField, GroupType parquetType, List<TypeMapping> children) {
|
|
850
|
-
super(arrowField, parquetType, children);
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
@Override
|
|
854
|
-
public <T> T accept(TypeMappingVisitor<T> visitor) {
|
|
855
|
-
return visitor.visit(this);
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
/**
|
|
860
|
-
* mapping of a List type and standard 3-level List annotated Parquet type
|
|
861
|
-
*/
|
|
862
|
-
public static class ListTypeMapping extends TypeMapping {
|
|
863
|
-
private final List3Levels list3Levels;
|
|
864
|
-
private final TypeMapping child;
|
|
865
|
-
|
|
866
|
-
public ListTypeMapping(Field arrowField, List3Levels list3Levels, TypeMapping child) {
|
|
867
|
-
super(arrowField, list3Levels.getList(), asList(child));
|
|
868
|
-
this.list3Levels = list3Levels;
|
|
869
|
-
this.child = child;
|
|
870
|
-
if (list3Levels.getElement() != child.getParquetType()) {
|
|
871
|
-
throw new IllegalArgumentException(list3Levels + " <=> " + child);
|
|
872
|
-
}
|
|
873
|
-
}
|
|
874
|
-
|
|
875
|
-
public List3Levels getList3Levels() {
|
|
876
|
-
return list3Levels;
|
|
877
|
-
}
|
|
878
|
-
|
|
879
|
-
public TypeMapping getChild() {
|
|
880
|
-
return child;
|
|
881
|
-
}
|
|
882
|
-
|
|
883
|
-
@Override
|
|
884
|
-
public <T> T accept(TypeMappingVisitor<T> visitor) {
|
|
885
|
-
return visitor.visit(this);
|
|
886
|
-
}
|
|
887
|
-
}
|
|
888
|
-
|
|
889
|
-
/**
|
|
890
|
-
* mapping of a List type and repeated Parquet field (non-list annotated)
|
|
891
|
-
*/
|
|
892
|
-
public static class RepeatedTypeMapping extends TypeMapping {
|
|
893
|
-
private final TypeMapping child;
|
|
894
|
-
|
|
895
|
-
public RepeatedTypeMapping(Field arrowField, Type parquetType, TypeMapping child) {
|
|
896
|
-
super(arrowField, parquetType, asList(child));
|
|
897
|
-
this.child = child;
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
public TypeMapping getChild() {
|
|
901
|
-
return child;
|
|
902
|
-
}
|
|
903
|
-
|
|
904
|
-
@Override
|
|
905
|
-
public <T> T accept(TypeMappingVisitor<T> visitor) {
|
|
906
|
-
return visitor.visit(this);
|
|
907
|
-
}
|
|
908
|
-
}
|
|
909
|
-
}
|
|
910
|
-
*/
|