parquet 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +66 -59
- data/README.md +105 -1
- data/ext/parquet/Cargo.toml +4 -3
- data/ext/parquet/src/enumerator.rs +8 -0
- data/ext/parquet/src/header_cache.rs +11 -12
- data/ext/parquet/src/lib.rs +1 -0
- data/ext/parquet/src/logger.rs +171 -0
- data/ext/parquet/src/reader/common.rs +110 -0
- data/ext/parquet/src/reader/mod.rs +1 -43
- data/ext/parquet/src/reader/parquet_column_reader.rs +50 -86
- data/ext/parquet/src/reader/parquet_row_reader.rs +53 -23
- data/ext/parquet/src/ruby_reader.rs +37 -25
- data/ext/parquet/src/types/core_types.rs +47 -6
- data/ext/parquet/src/types/mod.rs +64 -1
- data/ext/parquet/src/types/parquet_value.rs +284 -102
- data/ext/parquet/src/types/record_types.rs +24 -23
- data/ext/parquet/src/types/schema_converter.rs +244 -0
- data/ext/parquet/src/types/schema_node.rs +329 -0
- data/ext/parquet/src/types/timestamp.rs +16 -8
- data/ext/parquet/src/types/type_conversion.rs +1151 -521
- data/ext/parquet/src/types/writer_types.rs +94 -151
- data/ext/parquet/src/utils.rs +29 -9
- data/ext/parquet/src/writer/mod.rs +342 -457
- data/ext/parquet/src/writer/write_columns.rs +226 -0
- data/ext/parquet/src/writer/write_rows.rs +484 -0
- data/lib/parquet/schema.rb +154 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rb +1 -0
- metadata +9 -2
@@ -1,7 +1,10 @@
|
|
1
1
|
use std::str::FromStr;
|
2
|
+
use std::sync::Arc;
|
2
3
|
|
3
4
|
use super::*;
|
5
|
+
use arrow_array::builder::MapFieldNames;
|
4
6
|
use arrow_array::builder::*;
|
7
|
+
use arrow_schema::{DataType, Field, Fields, TimeUnit};
|
5
8
|
use jiff::tz::{Offset, TimeZone};
|
6
9
|
use magnus::{RArray, RString, TryConvert};
|
7
10
|
|
@@ -14,8 +17,7 @@ where
|
|
14
17
|
T: TryConvert + FromStr,
|
15
18
|
<T as FromStr>::Err: std::fmt::Display,
|
16
19
|
{
|
17
|
-
pub fn convert_with_string_fallback(value: Value) -> Result<T, MagnusError> {
|
18
|
-
let ruby = unsafe { Ruby::get_unchecked() };
|
20
|
+
pub fn convert_with_string_fallback(ruby: &Ruby, value: Value) -> Result<T, MagnusError> {
|
19
21
|
if value.is_kind_of(ruby.class_string()) {
|
20
22
|
let s = String::try_convert(value)?;
|
21
23
|
s.trim().parse::<T>().map_err(|e| {
|
@@ -30,8 +32,11 @@ where
|
|
30
32
|
}
|
31
33
|
}
|
32
34
|
|
33
|
-
pub fn convert_to_date32(
|
34
|
-
|
35
|
+
pub fn convert_to_date32(
|
36
|
+
ruby: &Ruby,
|
37
|
+
value: Value,
|
38
|
+
format: Option<&str>,
|
39
|
+
) -> Result<i32, MagnusError> {
|
35
40
|
if value.is_kind_of(ruby.class_string()) {
|
36
41
|
let s = String::try_convert(value)?;
|
37
42
|
// Parse string into Date using jiff
|
@@ -64,14 +69,19 @@ pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, Magn
|
|
64
69
|
|
65
70
|
let x = timestamp
|
66
71
|
.to_zoned(TimeZone::fixed(Offset::constant(0)))
|
67
|
-
.
|
72
|
+
.map_err(|e| {
|
73
|
+
MagnusError::new(
|
74
|
+
magnus::exception::type_error(),
|
75
|
+
format!("Failed to convert date32 to timestamp: {}", e),
|
76
|
+
)
|
77
|
+
})?
|
68
78
|
.timestamp();
|
69
79
|
|
70
80
|
// Convert to epoch days
|
71
81
|
Ok((x.as_second() as i64 / 86400) as i32)
|
72
82
|
} else if value.is_kind_of(ruby.class_time()) {
|
73
83
|
// Convert Time object to epoch days
|
74
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())
|
84
|
+
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
75
85
|
Ok(((secs as f64) / 86400.0) as i32)
|
76
86
|
} else {
|
77
87
|
Err(MagnusError::new(
|
@@ -81,8 +91,11 @@ pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, Magn
|
|
81
91
|
}
|
82
92
|
}
|
83
93
|
|
84
|
-
pub fn convert_to_timestamp_millis(
|
85
|
-
|
94
|
+
pub fn convert_to_timestamp_millis(
|
95
|
+
ruby: &Ruby,
|
96
|
+
value: Value,
|
97
|
+
format: Option<&str>,
|
98
|
+
) -> Result<i64, MagnusError> {
|
86
99
|
if value.is_kind_of(ruby.class_string()) {
|
87
100
|
let s = String::try_convert(value)?;
|
88
101
|
// Parse string into Timestamp using jiff
|
@@ -115,8 +128,8 @@ pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result
|
|
115
128
|
Ok(timestamp.as_millisecond())
|
116
129
|
} else if value.is_kind_of(ruby.class_time()) {
|
117
130
|
// Convert Time object to milliseconds
|
118
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())
|
119
|
-
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())
|
131
|
+
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
132
|
+
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
120
133
|
Ok(secs * 1000 + (usecs / 1000))
|
121
134
|
} else {
|
122
135
|
Err(MagnusError::new(
|
@@ -128,8 +141,11 @@ pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result
|
|
128
141
|
}
|
129
142
|
}
|
130
143
|
|
131
|
-
pub fn convert_to_timestamp_micros(
|
132
|
-
|
144
|
+
pub fn convert_to_timestamp_micros(
|
145
|
+
ruby: &Ruby,
|
146
|
+
value: Value,
|
147
|
+
format: Option<&str>,
|
148
|
+
) -> Result<i64, MagnusError> {
|
133
149
|
if value.is_kind_of(ruby.class_string()) {
|
134
150
|
let s = String::try_convert(value)?;
|
135
151
|
// Parse string into Timestamp using jiff
|
@@ -162,8 +178,8 @@ pub fn convert_to_timestamp_micros(value: Value, format: Option<&str>) -> Result
|
|
162
178
|
Ok(timestamp.as_microsecond())
|
163
179
|
} else if value.is_kind_of(ruby.class_time()) {
|
164
180
|
// Convert Time object to microseconds
|
165
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())
|
166
|
-
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())
|
181
|
+
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
182
|
+
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
167
183
|
Ok(secs * 1_000_000 + usecs)
|
168
184
|
} else {
|
169
185
|
Err(MagnusError::new(
|
@@ -179,8 +195,7 @@ pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
|
|
179
195
|
Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
|
180
196
|
}
|
181
197
|
|
182
|
-
pub fn convert_to_boolean(value: Value) -> Result<bool, MagnusError> {
|
183
|
-
let ruby = unsafe { Ruby::get_unchecked() };
|
198
|
+
pub fn convert_to_boolean(ruby: &Ruby, value: Value) -> Result<bool, MagnusError> {
|
184
199
|
if value.is_kind_of(ruby.class_string()) {
|
185
200
|
let s = String::try_convert(value)?;
|
186
201
|
s.trim().parse::<bool>().map_err(|e| {
|
@@ -209,225 +224,79 @@ pub fn convert_to_string(value: Value) -> Result<String, MagnusError> {
|
|
209
224
|
})
|
210
225
|
}
|
211
226
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
}
|
249
|
-
ParquetSchemaType::UInt32 => {
|
250
|
-
let v = NumericConverter::<u32>::convert_with_string_fallback(item_value)?;
|
251
|
-
ParquetValue::UInt32(v)
|
252
|
-
}
|
253
|
-
ParquetSchemaType::UInt64 => {
|
254
|
-
let v = NumericConverter::<u64>::convert_with_string_fallback(item_value)?;
|
255
|
-
ParquetValue::UInt64(v)
|
256
|
-
}
|
257
|
-
ParquetSchemaType::Float => {
|
258
|
-
let v = NumericConverter::<f32>::convert_with_string_fallback(item_value)?;
|
259
|
-
ParquetValue::Float32(v)
|
260
|
-
}
|
261
|
-
ParquetSchemaType::Double => {
|
262
|
-
let v = NumericConverter::<f64>::convert_with_string_fallback(item_value)?;
|
263
|
-
ParquetValue::Float64(v)
|
264
|
-
}
|
265
|
-
ParquetSchemaType::String => {
|
266
|
-
let v = String::try_convert(item_value)?;
|
267
|
-
ParquetValue::String(v)
|
268
|
-
}
|
269
|
-
ParquetSchemaType::Binary => {
|
270
|
-
let v = convert_to_binary(item_value)?;
|
271
|
-
ParquetValue::Bytes(v)
|
272
|
-
}
|
273
|
-
ParquetSchemaType::Boolean => {
|
274
|
-
let v = convert_to_boolean(item_value)?;
|
275
|
-
ParquetValue::Boolean(v)
|
276
|
-
}
|
277
|
-
ParquetSchemaType::Date32 => {
|
278
|
-
let v = convert_to_date32(item_value, list_field.format)?;
|
279
|
-
ParquetValue::Date32(v)
|
280
|
-
}
|
281
|
-
ParquetSchemaType::TimestampMillis => {
|
282
|
-
let v = convert_to_timestamp_millis(item_value, list_field.format)?;
|
283
|
-
ParquetValue::TimestampMillis(v, None)
|
284
|
-
}
|
285
|
-
ParquetSchemaType::TimestampMicros => {
|
286
|
-
let v = convert_to_timestamp_micros(item_value, list_field.format)?;
|
287
|
-
ParquetValue::TimestampMicros(v, None)
|
288
|
-
}
|
289
|
-
ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
|
290
|
-
return Err(MagnusError::new(
|
291
|
-
magnus::exception::type_error(),
|
292
|
-
"Nested lists and maps are not supported",
|
293
|
-
))
|
294
|
-
}
|
295
|
-
};
|
296
|
-
values.push(converted);
|
227
|
+
/// Converts our custom `ParquetSchemaType` into an Arrow `DataType`.
|
228
|
+
/// This ensures proper nullability settings for nested types.
|
229
|
+
/// Converts a ParquetSchemaType to an Arrow DataType
|
230
|
+
pub fn parquet_schema_type_to_arrow_data_type(
|
231
|
+
schema_type: &ParquetSchemaType,
|
232
|
+
) -> Result<DataType, MagnusError> {
|
233
|
+
Ok(match schema_type {
|
234
|
+
ParquetSchemaType::Primitive(primative) => match primative {
|
235
|
+
PrimitiveType::Int8 => DataType::Int8,
|
236
|
+
PrimitiveType::Int16 => DataType::Int16,
|
237
|
+
PrimitiveType::Int32 => DataType::Int32,
|
238
|
+
PrimitiveType::Int64 => DataType::Int64,
|
239
|
+
PrimitiveType::UInt8 => DataType::UInt8,
|
240
|
+
PrimitiveType::UInt16 => DataType::UInt16,
|
241
|
+
PrimitiveType::UInt32 => DataType::UInt32,
|
242
|
+
PrimitiveType::UInt64 => DataType::UInt64,
|
243
|
+
PrimitiveType::Float32 => DataType::Float32,
|
244
|
+
PrimitiveType::Float64 => DataType::Float64,
|
245
|
+
PrimitiveType::String => DataType::Utf8,
|
246
|
+
PrimitiveType::Binary => DataType::Binary,
|
247
|
+
PrimitiveType::Boolean => DataType::Boolean,
|
248
|
+
PrimitiveType::Date32 => DataType::Date32,
|
249
|
+
PrimitiveType::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None),
|
250
|
+
PrimitiveType::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None),
|
251
|
+
},
|
252
|
+
// For a List<T>, create a standard List in Arrow with nullable items
|
253
|
+
ParquetSchemaType::List(list_field) => {
|
254
|
+
let child_type = parquet_schema_type_to_arrow_data_type(&list_field.item_type)?;
|
255
|
+
// For a list, use empty field name to match expectations for schema_dsl test
|
256
|
+
// This is the critical fix for the schema_dsl test which expects an empty field name
|
257
|
+
// Use empty field name for all list field items - this is crucial for compatibility
|
258
|
+
DataType::List(Arc::new(Field::new(
|
259
|
+
"item",
|
260
|
+
child_type,
|
261
|
+
list_field.nullable,
|
262
|
+
)))
|
297
263
|
}
|
298
|
-
Ok(values)
|
299
|
-
} else {
|
300
|
-
Err(MagnusError::new(
|
301
|
-
magnus::exception::type_error(),
|
302
|
-
"Invalid list format",
|
303
|
-
))
|
304
|
-
}
|
305
|
-
}
|
306
|
-
|
307
|
-
pub fn convert_to_map(
|
308
|
-
value: Value,
|
309
|
-
map_field: &MapField,
|
310
|
-
) -> Result<HashMap<ParquetValue, ParquetValue>, MagnusError> {
|
311
|
-
let ruby = unsafe { Ruby::get_unchecked() };
|
312
|
-
if value.is_kind_of(ruby.class_hash()) {
|
313
|
-
let mut map = HashMap::new();
|
314
|
-
let entries: Vec<(Value, Value)> = value.funcall("to_a", ())?;
|
315
|
-
|
316
|
-
for (key, value) in entries {
|
317
|
-
let key_value = match &map_field.key_type {
|
318
|
-
ParquetSchemaType::String => {
|
319
|
-
let v = String::try_convert(key)?;
|
320
|
-
ParquetValue::String(v)
|
321
|
-
}
|
322
|
-
_ => {
|
323
|
-
return Err(MagnusError::new(
|
324
|
-
magnus::exception::type_error(),
|
325
|
-
"Map keys must be strings",
|
326
|
-
))
|
327
|
-
}
|
328
|
-
};
|
329
264
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
ParquetValue::Int64(v)
|
346
|
-
}
|
347
|
-
ParquetSchemaType::UInt8 => {
|
348
|
-
let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
|
349
|
-
ParquetValue::UInt8(v)
|
350
|
-
}
|
351
|
-
ParquetSchemaType::UInt16 => {
|
352
|
-
let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
|
353
|
-
ParquetValue::UInt16(v)
|
354
|
-
}
|
355
|
-
ParquetSchemaType::UInt32 => {
|
356
|
-
let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
|
357
|
-
ParquetValue::UInt32(v)
|
358
|
-
}
|
359
|
-
ParquetSchemaType::UInt64 => {
|
360
|
-
let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
|
361
|
-
ParquetValue::UInt64(v)
|
362
|
-
}
|
363
|
-
ParquetSchemaType::Float => {
|
364
|
-
let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
|
365
|
-
ParquetValue::Float32(v)
|
366
|
-
}
|
367
|
-
ParquetSchemaType::Double => {
|
368
|
-
let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
|
369
|
-
ParquetValue::Float64(v)
|
370
|
-
}
|
371
|
-
ParquetSchemaType::String => {
|
372
|
-
let v = String::try_convert(value)?;
|
373
|
-
ParquetValue::String(v)
|
374
|
-
}
|
375
|
-
ParquetSchemaType::Binary => {
|
376
|
-
let v = convert_to_binary(value)?;
|
377
|
-
ParquetValue::Bytes(v)
|
378
|
-
}
|
379
|
-
ParquetSchemaType::Boolean => {
|
380
|
-
let v = convert_to_boolean(value)?;
|
381
|
-
ParquetValue::Boolean(v)
|
382
|
-
}
|
383
|
-
ParquetSchemaType::Date32 => {
|
384
|
-
let v = convert_to_date32(value, map_field.format)?;
|
385
|
-
ParquetValue::Date32(v)
|
386
|
-
}
|
387
|
-
ParquetSchemaType::TimestampMillis => {
|
388
|
-
let v = convert_to_timestamp_millis(value, map_field.format)?;
|
389
|
-
ParquetValue::TimestampMillis(v, None)
|
390
|
-
}
|
391
|
-
ParquetSchemaType::TimestampMicros => {
|
392
|
-
let v = convert_to_timestamp_micros(value, map_field.format)?;
|
393
|
-
ParquetValue::TimestampMicros(v, None)
|
394
|
-
}
|
395
|
-
ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
|
396
|
-
return Err(MagnusError::new(
|
397
|
-
magnus::exception::type_error(),
|
398
|
-
"Map values cannot be lists or maps",
|
399
|
-
))
|
400
|
-
}
|
401
|
-
};
|
402
|
-
|
403
|
-
map.insert(key_value, value_value);
|
265
|
+
// For a Map<K, V>, ensure entries field is non-nullable and key field is non-nullable
|
266
|
+
ParquetSchemaType::Map(map_field) => {
|
267
|
+
let key_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.key_type)?;
|
268
|
+
let value_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.value_type)?;
|
269
|
+
DataType::Map(
|
270
|
+
Arc::new(Field::new(
|
271
|
+
"entries",
|
272
|
+
DataType::Struct(Fields::from(vec![
|
273
|
+
Field::new("key", key_arrow_type, false), // key must be non-null
|
274
|
+
Field::new("value", value_arrow_type, true), // value can be null
|
275
|
+
])),
|
276
|
+
/*nullable=*/ false, // crucial: entries must be non-nullable
|
277
|
+
)),
|
278
|
+
/*keys_sorted=*/ false,
|
279
|
+
)
|
404
280
|
}
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
}
|
281
|
+
ParquetSchemaType::Struct(struct_field) => {
|
282
|
+
if struct_field.fields.is_empty() {
|
283
|
+
return Err(MagnusError::new(
|
284
|
+
magnus::exception::runtime_error(),
|
285
|
+
"Cannot create a struct with zero subfields (empty struct).",
|
286
|
+
));
|
287
|
+
}
|
413
288
|
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
ParquetValue::Null => builder.append_null(),
|
421
|
-
_ => {
|
422
|
-
return Err(MagnusError::new(
|
423
|
-
magnus::exception::type_error(),
|
424
|
-
format!("Expected {}, got {:?}", stringify!($variant), value),
|
425
|
-
))
|
426
|
-
}
|
289
|
+
// Build arrow fields
|
290
|
+
let mut arrow_fields = Vec::with_capacity(struct_field.fields.len());
|
291
|
+
|
292
|
+
for field in &struct_field.fields {
|
293
|
+
let field_type = parquet_schema_type_to_arrow_data_type(&field.type_)?;
|
294
|
+
arrow_fields.push(Field::new(&field.name, field_type, true)); // All fields are nullable by default
|
427
295
|
}
|
296
|
+
|
297
|
+
DataType::Struct(Fields::from(arrow_fields))
|
428
298
|
}
|
429
|
-
|
430
|
-
}};
|
299
|
+
})
|
431
300
|
}
|
432
301
|
|
433
302
|
#[macro_export]
|
@@ -457,367 +326,1128 @@ macro_rules! impl_timestamp_array_conversion {
|
|
457
326
|
}};
|
458
327
|
}
|
459
328
|
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
magnus::exception::type_error(),
|
471
|
-
format!("Expected {}, got {:?}", stringify!($variant), value),
|
472
|
-
))
|
473
|
-
}
|
474
|
-
}
|
329
|
+
// Create the appropriate Arrow builder for a given ParquetSchemaType.
|
330
|
+
// We return a Box<dyn ArrayBuilder> so we can dynamically downcast.
|
331
|
+
fn create_arrow_builder_for_type(
|
332
|
+
type_: &ParquetSchemaType,
|
333
|
+
capacity: Option<usize>,
|
334
|
+
) -> Result<Box<dyn ArrayBuilder>, ParquetGemError> {
|
335
|
+
let cap = capacity.unwrap_or(1); // Default to at least capacity 1 to avoid empty builders
|
336
|
+
match type_ {
|
337
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int8) => {
|
338
|
+
Ok(Box::new(Int8Builder::with_capacity(cap)))
|
475
339
|
}
|
476
|
-
|
477
|
-
|
478
|
-
($values:expr, $builder_type:ty, $variant:ident, $capacity:expr) => {{
|
479
|
-
let mut builder = <$builder_type>::with_capacity($values.len(), $capacity);
|
480
|
-
for value in $values {
|
481
|
-
match value {
|
482
|
-
ParquetValue::$variant(v) => builder.append_value(v),
|
483
|
-
ParquetValue::Null => builder.append_null(),
|
484
|
-
_ => {
|
485
|
-
return Err(MagnusError::new(
|
486
|
-
magnus::exception::type_error(),
|
487
|
-
format!("Expected {}, got {:?}", stringify!($variant), value),
|
488
|
-
))
|
489
|
-
}
|
490
|
-
}
|
340
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int16) => {
|
341
|
+
Ok(Box::new(Int16Builder::with_capacity(cap)))
|
491
342
|
}
|
492
|
-
|
493
|
-
|
494
|
-
}
|
343
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
|
344
|
+
Ok(Box::new(Int32Builder::with_capacity(cap)))
|
345
|
+
}
|
346
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
|
347
|
+
Ok(Box::new(Int64Builder::with_capacity(cap)))
|
348
|
+
}
|
349
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
|
350
|
+
Ok(Box::new(UInt8Builder::with_capacity(cap)))
|
351
|
+
}
|
352
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
|
353
|
+
Ok(Box::new(UInt16Builder::with_capacity(cap)))
|
354
|
+
}
|
355
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
|
356
|
+
Ok(Box::new(UInt32Builder::with_capacity(cap)))
|
357
|
+
}
|
358
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
|
359
|
+
Ok(Box::new(UInt64Builder::with_capacity(cap)))
|
360
|
+
}
|
361
|
+
ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
|
362
|
+
Ok(Box::new(Float32Builder::with_capacity(cap)))
|
363
|
+
}
|
364
|
+
ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
|
365
|
+
Ok(Box::new(Float64Builder::with_capacity(cap)))
|
366
|
+
}
|
367
|
+
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
368
|
+
Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
|
369
|
+
}
|
370
|
+
ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
|
371
|
+
Ok(Box::new(BinaryBuilder::with_capacity(cap, cap * 32)))
|
372
|
+
}
|
373
|
+
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
|
374
|
+
Ok(Box::new(BooleanBuilder::with_capacity(cap)))
|
375
|
+
}
|
376
|
+
ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
|
377
|
+
Ok(Box::new(Date32Builder::with_capacity(cap)))
|
378
|
+
}
|
379
|
+
ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
|
380
|
+
Ok(Box::new(TimestampMillisecondBuilder::with_capacity(cap)))
|
381
|
+
}
|
382
|
+
ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
|
383
|
+
Ok(Box::new(TimestampMicrosecondBuilder::with_capacity(cap)))
|
384
|
+
}
|
385
|
+
ParquetSchemaType::List(list_field) => {
|
386
|
+
// For a list, we create a ListBuilder whose child builder is determined by item_type.
|
387
|
+
// Pass through capacity to ensure consistent sizing
|
388
|
+
let child_builder = create_arrow_builder_for_type(&list_field.item_type, Some(cap))?;
|
495
389
|
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
(
|
509
|
-
|
390
|
+
// Ensure consistent builder capacity for lists
|
391
|
+
Ok(Box::new(ListBuilder::<Box<dyn ArrayBuilder>>::new(
|
392
|
+
child_builder,
|
393
|
+
)))
|
394
|
+
}
|
395
|
+
ParquetSchemaType::Map(map_field) => {
|
396
|
+
// A Map is physically a list<struct<key:..., value:...>> in Arrow.
|
397
|
+
// Pass through capacity to ensure consistent sizing
|
398
|
+
let key_builder = create_arrow_builder_for_type(&map_field.key_type, Some(cap))?;
|
399
|
+
let value_builder = create_arrow_builder_for_type(&map_field.value_type, Some(cap))?;
|
400
|
+
|
401
|
+
// Create a MapBuilder with explicit field names to ensure compatibility
|
402
|
+
Ok(Box::new(MapBuilder::<
|
403
|
+
Box<dyn ArrayBuilder>,
|
404
|
+
Box<dyn ArrayBuilder>,
|
405
|
+
>::new(
|
406
|
+
Some(MapFieldNames {
|
407
|
+
entry: "entries".to_string(),
|
408
|
+
key: "key".to_string(),
|
409
|
+
value: "value".to_string(),
|
410
|
+
}),
|
411
|
+
key_builder,
|
412
|
+
value_builder,
|
413
|
+
)))
|
414
|
+
}
|
415
|
+
ParquetSchemaType::Struct(struct_field) => {
|
416
|
+
// Check for empty struct immediately
|
417
|
+
if struct_field.fields.is_empty() {
|
418
|
+
return Err(MagnusError::new(
|
419
|
+
magnus::exception::runtime_error(),
|
420
|
+
"Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
|
421
|
+
))?;
|
510
422
|
}
|
511
|
-
|
423
|
+
|
424
|
+
// Create a child builder for each field in the struct
|
425
|
+
let mut child_field_builders = Vec::with_capacity(struct_field.fields.len());
|
426
|
+
|
427
|
+
// Get struct data type first to ensure field compatibility
|
428
|
+
let data_type = parquet_schema_type_to_arrow_data_type(type_)?;
|
429
|
+
|
430
|
+
// Make sure the data type is a struct
|
431
|
+
let arrow_fields = if let DataType::Struct(ref fields) = data_type {
|
432
|
+
fields.clone()
|
433
|
+
} else {
|
512
434
|
return Err(MagnusError::new(
|
513
435
|
magnus::exception::type_error(),
|
436
|
+
"Expected struct data type".to_string(),
|
437
|
+
))?;
|
438
|
+
};
|
439
|
+
|
440
|
+
// Create builders for each child field with consistent capacity
|
441
|
+
for child in &struct_field.fields {
|
442
|
+
let sub_builder = create_arrow_builder_for_type(&child.type_, Some(cap))?;
|
443
|
+
child_field_builders.push(sub_builder);
|
444
|
+
}
|
445
|
+
|
446
|
+
// Make sure we have the right number of builders
|
447
|
+
if child_field_builders.len() != arrow_fields.len() {
|
448
|
+
return Err(MagnusError::new(
|
449
|
+
magnus::exception::runtime_error(),
|
514
450
|
format!(
|
515
|
-
"
|
516
|
-
|
451
|
+
"Number of field builders ({}) doesn't match number of arrow fields ({})",
|
452
|
+
child_field_builders.len(),
|
453
|
+
arrow_fields.len()
|
517
454
|
),
|
518
|
-
))
|
455
|
+
))?;
|
519
456
|
}
|
457
|
+
|
458
|
+
// Create the StructBuilder with the fields and child builders
|
459
|
+
Ok(Box::new(StructBuilder::new(
|
460
|
+
arrow_fields,
|
461
|
+
child_field_builders,
|
462
|
+
)))
|
520
463
|
}
|
521
|
-
}
|
464
|
+
}
|
522
465
|
}
|
523
466
|
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
467
|
+
// Fill primitive scalar Int8 values
|
468
|
+
fn fill_int8_builder(
|
469
|
+
builder: &mut dyn ArrayBuilder,
|
470
|
+
values: &[ParquetValue],
|
471
|
+
) -> Result<(), MagnusError> {
|
472
|
+
let typed_builder = builder
|
473
|
+
.as_any_mut()
|
474
|
+
.downcast_mut::<Int8Builder>()
|
475
|
+
.expect("Builder mismatch: expected Int8Builder");
|
476
|
+
for val in values {
|
477
|
+
match val {
|
478
|
+
ParquetValue::Int8(i) => typed_builder.append_value(*i),
|
479
|
+
// Handle Int64 that could be an Int8
|
480
|
+
ParquetValue::Int64(i) => {
|
481
|
+
if *i < i8::MIN as i64 || *i > i8::MAX as i64 {
|
482
|
+
return Err(MagnusError::new(
|
483
|
+
magnus::exception::range_error(),
|
484
|
+
format!("Integer {} is out of range for Int8", i),
|
485
|
+
));
|
486
|
+
}
|
487
|
+
typed_builder.append_value(*i as i8)
|
535
488
|
}
|
536
|
-
|
537
|
-
|
489
|
+
ParquetValue::Null => typed_builder.append_null(),
|
490
|
+
other => {
|
491
|
+
return Err(MagnusError::new(
|
492
|
+
magnus::exception::type_error(),
|
493
|
+
format!("Expected Int8, got {:?}", other),
|
494
|
+
))
|
538
495
|
}
|
539
|
-
|
496
|
+
}
|
497
|
+
}
|
498
|
+
Ok(())
|
499
|
+
}
|
500
|
+
|
501
|
+
// Fill primitive scalar Int16 values
|
502
|
+
fn fill_int16_builder(
|
503
|
+
builder: &mut dyn ArrayBuilder,
|
504
|
+
values: &[ParquetValue],
|
505
|
+
) -> Result<(), MagnusError> {
|
506
|
+
let typed_builder = builder
|
507
|
+
.as_any_mut()
|
508
|
+
.downcast_mut::<Int16Builder>()
|
509
|
+
.expect("Builder mismatch: expected Int16Builder");
|
510
|
+
for val in values {
|
511
|
+
match val {
|
512
|
+
ParquetValue::Int16(i) => typed_builder.append_value(*i),
|
513
|
+
// Handle Int64 that could be an Int16
|
514
|
+
ParquetValue::Int64(i) => {
|
515
|
+
if *i < i16::MIN as i64 || *i > i16::MAX as i64 {
|
516
|
+
return Err(MagnusError::new(
|
517
|
+
magnus::exception::range_error(),
|
518
|
+
format!("Integer {} is out of range for Int16", i),
|
519
|
+
));
|
520
|
+
}
|
521
|
+
typed_builder.append_value(*i as i16)
|
522
|
+
}
|
523
|
+
ParquetValue::Null => typed_builder.append_null(),
|
524
|
+
other => {
|
540
525
|
return Err(MagnusError::new(
|
541
526
|
magnus::exception::type_error(),
|
542
|
-
format!(
|
543
|
-
"Type mismatch in list: expected {:?}, got {:?}",
|
544
|
-
$item_type, $value
|
545
|
-
),
|
527
|
+
format!("Expected Int16, got {:?}", other),
|
546
528
|
))
|
547
529
|
}
|
548
530
|
}
|
549
|
-
}
|
531
|
+
}
|
532
|
+
Ok(())
|
550
533
|
}
|
551
534
|
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
535
|
+
// Fill list values by recursively filling child items
|
536
|
+
fn fill_list_builder(
|
537
|
+
builder: &mut dyn ArrayBuilder,
|
538
|
+
item_type: &ParquetSchemaType,
|
539
|
+
values: &[ParquetValue],
|
540
|
+
) -> Result<(), MagnusError> {
|
541
|
+
// We need to use a more specific type for ListBuilder to help Rust's type inference
|
542
|
+
let lb = builder
|
543
|
+
.as_any_mut()
|
544
|
+
.downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
|
545
|
+
.expect("Builder mismatch: expected ListBuilder");
|
546
|
+
|
547
|
+
for val in values {
|
548
|
+
if let ParquetValue::Null = val {
|
549
|
+
// null list
|
550
|
+
lb.append(false);
|
551
|
+
} else if let ParquetValue::List(list_items) = val {
|
552
|
+
// First fill the child builder with the items
|
553
|
+
let values_builder = lb.values();
|
554
|
+
fill_builder(values_builder, item_type, list_items)?;
|
555
|
+
// Then finalize the list by calling append(true)
|
556
|
+
lb.append(true);
|
557
|
+
} else {
|
558
|
+
return Err(MagnusError::new(
|
559
|
+
magnus::exception::type_error(),
|
560
|
+
format!("Expected ParquetValue::List(...) or Null, got {:?}", val),
|
561
|
+
));
|
562
|
+
}
|
563
|
+
}
|
564
|
+
|
565
|
+
Ok(())
|
566
|
+
}
|
567
|
+
|
568
|
+
// Fill map values by recursively filling key and value items
|
569
|
+
fn fill_map_builder(
|
570
|
+
builder: &mut dyn ArrayBuilder,
|
571
|
+
key_type: &ParquetSchemaType,
|
572
|
+
value_type: &ParquetSchemaType,
|
573
|
+
values: &[ParquetValue],
|
574
|
+
) -> Result<(), MagnusError> {
|
575
|
+
let mb = builder
|
576
|
+
.as_any_mut()
|
577
|
+
.downcast_mut::<MapBuilder<Box<dyn ArrayBuilder>, Box<dyn ArrayBuilder>>>()
|
578
|
+
.expect("Builder mismatch: expected MapBuilder");
|
579
|
+
|
580
|
+
for val in values {
|
581
|
+
match val {
|
582
|
+
ParquetValue::Null => {
|
583
|
+
// null map
|
584
|
+
mb.append(false).map_err(|e| {
|
585
|
+
MagnusError::new(
|
586
|
+
magnus::exception::runtime_error(),
|
587
|
+
format!("Failed to append null to map: {}", e),
|
588
|
+
)
|
589
|
+
})?;
|
563
590
|
}
|
564
|
-
|
565
|
-
|
591
|
+
ParquetValue::Map(map_entries) => {
|
592
|
+
// First append all key-value pairs to the child arrays
|
593
|
+
for (k, v) in map_entries {
|
594
|
+
// Note: Arrow expects field names "key" and "value" (singular)
|
595
|
+
fill_builder(mb.keys(), key_type, &[k.clone()])?;
|
596
|
+
fill_builder(mb.values(), value_type, &[v.clone()])?;
|
597
|
+
}
|
598
|
+
// Then finalize the map by calling append(true)
|
599
|
+
mb.append(true).map_err(|e| {
|
600
|
+
MagnusError::new(
|
601
|
+
magnus::exception::runtime_error(),
|
602
|
+
format!("Failed to append map entry: {}", e),
|
603
|
+
)
|
604
|
+
})?;
|
566
605
|
}
|
567
|
-
|
606
|
+
other => {
|
568
607
|
return Err(MagnusError::new(
|
569
608
|
magnus::exception::type_error(),
|
570
|
-
format!(
|
571
|
-
"Type mismatch in list: expected {:?}, got {:?}",
|
572
|
-
$item_type, $value
|
573
|
-
),
|
609
|
+
format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
|
574
610
|
))
|
575
611
|
}
|
576
612
|
}
|
577
|
-
}
|
613
|
+
}
|
614
|
+
|
615
|
+
Ok(())
|
578
616
|
}
|
579
617
|
|
580
|
-
|
581
|
-
|
618
|
+
// Append an entire slice of ParquetValue into the given Arrow builder.
|
619
|
+
// We do a `match` on the type for each item, recursing for nested list/map.
|
620
|
+
fn fill_builder(
|
621
|
+
builder: &mut dyn ArrayBuilder,
|
582
622
|
type_: &ParquetSchemaType,
|
583
|
-
|
623
|
+
values: &[ParquetValue],
|
624
|
+
) -> Result<(), MagnusError> {
|
584
625
|
match type_ {
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
ParquetSchemaType::
|
589
|
-
ParquetSchemaType::
|
590
|
-
ParquetSchemaType::
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
626
|
+
// ------------------
|
627
|
+
// PRIMITIVE SCALARS - delegated to specialized helpers
|
628
|
+
// ------------------
|
629
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int8) => fill_int8_builder(builder, values),
|
630
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int16) => fill_int16_builder(builder, values),
|
631
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
|
632
|
+
let typed_builder = builder
|
633
|
+
.as_any_mut()
|
634
|
+
.downcast_mut::<Int32Builder>()
|
635
|
+
.expect("Builder mismatch: expected Int32Builder");
|
636
|
+
for val in values {
|
637
|
+
match val {
|
638
|
+
ParquetValue::Int32(i) => typed_builder.append_value(*i),
|
639
|
+
ParquetValue::Date32(d) => typed_builder.append_value(*d), // if you allow date->int
|
640
|
+
// Handle the case where we have an Int64 in an Int32 field (common with Ruby Integers)
|
641
|
+
ParquetValue::Int64(i) => {
|
642
|
+
if *i < i32::MIN as i64 || *i > i32::MAX as i64 {
|
643
|
+
return Err(MagnusError::new(
|
644
|
+
magnus::exception::range_error(),
|
645
|
+
format!("Integer {} is out of range for Int32", i),
|
646
|
+
));
|
647
|
+
}
|
648
|
+
typed_builder.append_value(*i as i32)
|
649
|
+
}
|
650
|
+
ParquetValue::Null => typed_builder.append_null(),
|
651
|
+
other => {
|
652
|
+
return Err(MagnusError::new(
|
653
|
+
magnus::exception::type_error(),
|
654
|
+
format!("Expected Int32, got {:?}", other),
|
655
|
+
))
|
656
|
+
}
|
657
|
+
}
|
658
|
+
}
|
659
|
+
Ok(())
|
609
660
|
}
|
610
|
-
ParquetSchemaType::
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
661
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
|
662
|
+
let typed_builder = builder
|
663
|
+
.as_any_mut()
|
664
|
+
.downcast_mut::<Int64Builder>()
|
665
|
+
.expect("Builder mismatch: expected Int64Builder");
|
666
|
+
for val in values {
|
667
|
+
match val {
|
668
|
+
ParquetValue::Int64(i) => typed_builder.append_value(*i),
|
669
|
+
ParquetValue::Null => typed_builder.append_null(),
|
670
|
+
other => {
|
671
|
+
return Err(MagnusError::new(
|
672
|
+
magnus::exception::type_error(),
|
673
|
+
format!("Expected Int64, got {:?}", other),
|
674
|
+
))
|
675
|
+
}
|
676
|
+
}
|
677
|
+
}
|
678
|
+
Ok(())
|
616
679
|
}
|
617
|
-
ParquetSchemaType::
|
618
|
-
let
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
680
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
|
681
|
+
let typed_builder = builder
|
682
|
+
.as_any_mut()
|
683
|
+
.downcast_mut::<UInt8Builder>()
|
684
|
+
.expect("Builder mismatch: expected UInt8Builder");
|
685
|
+
for val in values {
|
686
|
+
match val {
|
687
|
+
ParquetValue::UInt8(u) => typed_builder.append_value(*u),
|
688
|
+
// Handle Int64 that could be a UInt8
|
689
|
+
ParquetValue::Int64(i) => {
|
690
|
+
if *i < 0 || *i > u8::MAX as i64 {
|
691
|
+
return Err(MagnusError::new(
|
692
|
+
magnus::exception::range_error(),
|
693
|
+
format!("Integer {} is out of range for UInt8", i),
|
694
|
+
));
|
695
|
+
}
|
696
|
+
typed_builder.append_value(*i as u8)
|
697
|
+
}
|
698
|
+
ParquetValue::Null => typed_builder.append_null(),
|
699
|
+
other => {
|
700
|
+
return Err(MagnusError::new(
|
701
|
+
magnus::exception::type_error(),
|
702
|
+
format!("Expected UInt8, got {:?}", other),
|
703
|
+
))
|
704
|
+
}
|
626
705
|
}
|
627
|
-
|
628
|
-
|
706
|
+
}
|
707
|
+
Ok(())
|
708
|
+
}
|
709
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
|
710
|
+
let typed_builder = builder
|
711
|
+
.as_any_mut()
|
712
|
+
.downcast_mut::<UInt16Builder>()
|
713
|
+
.expect("Builder mismatch: expected UInt16Builder");
|
714
|
+
for val in values {
|
715
|
+
match val {
|
716
|
+
ParquetValue::UInt16(u) => typed_builder.append_value(*u),
|
717
|
+
// Handle Int64 that could be a UInt16
|
718
|
+
ParquetValue::Int64(i) => {
|
719
|
+
if *i < 0 || *i > u16::MAX as i64 {
|
720
|
+
return Err(MagnusError::new(
|
721
|
+
magnus::exception::range_error(),
|
722
|
+
format!("Integer {} is out of range for UInt16", i),
|
723
|
+
));
|
724
|
+
}
|
725
|
+
typed_builder.append_value(*i as u16)
|
726
|
+
}
|
727
|
+
ParquetValue::Null => typed_builder.append_null(),
|
728
|
+
other => {
|
729
|
+
return Err(MagnusError::new(
|
730
|
+
magnus::exception::type_error(),
|
731
|
+
format!("Expected UInt16, got {:?}", other),
|
732
|
+
))
|
733
|
+
}
|
629
734
|
}
|
630
|
-
|
631
|
-
|
735
|
+
}
|
736
|
+
Ok(())
|
737
|
+
}
|
738
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
|
739
|
+
let typed_builder = builder
|
740
|
+
.as_any_mut()
|
741
|
+
.downcast_mut::<UInt32Builder>()
|
742
|
+
.expect("Builder mismatch: expected UInt32Builder");
|
743
|
+
for val in values {
|
744
|
+
match val {
|
745
|
+
ParquetValue::UInt32(u) => typed_builder.append_value(*u),
|
746
|
+
// Handle Int64 that could be a UInt32
|
747
|
+
ParquetValue::Int64(i) => {
|
748
|
+
if *i < 0 || *i > u32::MAX as i64 {
|
749
|
+
return Err(MagnusError::new(
|
750
|
+
magnus::exception::range_error(),
|
751
|
+
format!("Integer {} is out of range for UInt32", i),
|
752
|
+
));
|
753
|
+
}
|
754
|
+
typed_builder.append_value(*i as u32)
|
755
|
+
}
|
756
|
+
ParquetValue::Null => typed_builder.append_null(),
|
757
|
+
other => {
|
758
|
+
return Err(MagnusError::new(
|
759
|
+
magnus::exception::type_error(),
|
760
|
+
format!("Expected UInt32, got {:?}", other),
|
761
|
+
))
|
762
|
+
}
|
632
763
|
}
|
633
|
-
|
634
|
-
|
764
|
+
}
|
765
|
+
Ok(())
|
766
|
+
}
|
767
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
|
768
|
+
let typed_builder = builder
|
769
|
+
.as_any_mut()
|
770
|
+
.downcast_mut::<UInt64Builder>()
|
771
|
+
.expect("Builder mismatch: expected UInt64Builder");
|
772
|
+
for val in values {
|
773
|
+
match val {
|
774
|
+
ParquetValue::UInt64(u) => typed_builder.append_value(*u),
|
775
|
+
// Handle Int64 that could be a UInt64
|
776
|
+
ParquetValue::Int64(i) => {
|
777
|
+
if *i < 0 {
|
778
|
+
return Err(MagnusError::new(
|
779
|
+
magnus::exception::range_error(),
|
780
|
+
format!("Integer {} is out of range for UInt64", i),
|
781
|
+
));
|
782
|
+
}
|
783
|
+
typed_builder.append_value(*i as u64)
|
784
|
+
}
|
785
|
+
ParquetValue::Null => typed_builder.append_null(),
|
786
|
+
other => {
|
787
|
+
return Err(MagnusError::new(
|
788
|
+
magnus::exception::type_error(),
|
789
|
+
format!("Expected UInt64, got {:?}", other),
|
790
|
+
))
|
791
|
+
}
|
635
792
|
}
|
636
|
-
|
637
|
-
|
793
|
+
}
|
794
|
+
Ok(())
|
795
|
+
}
|
796
|
+
ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
|
797
|
+
let typed_builder = builder
|
798
|
+
.as_any_mut()
|
799
|
+
.downcast_mut::<Float32Builder>()
|
800
|
+
.expect("Builder mismatch: expected Float32Builder");
|
801
|
+
for val in values {
|
802
|
+
match val {
|
803
|
+
ParquetValue::Float32(f) => typed_builder.append_value(*f),
|
804
|
+
ParquetValue::Float16(fh) => typed_builder.append_value(*fh),
|
805
|
+
ParquetValue::Null => typed_builder.append_null(),
|
806
|
+
other => {
|
807
|
+
return Err(MagnusError::new(
|
808
|
+
magnus::exception::type_error(),
|
809
|
+
format!("Expected Float32, got {:?}", other),
|
810
|
+
))
|
811
|
+
}
|
638
812
|
}
|
639
|
-
|
640
|
-
|
813
|
+
}
|
814
|
+
Ok(())
|
815
|
+
}
|
816
|
+
ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
|
817
|
+
let typed_builder = builder
|
818
|
+
.as_any_mut()
|
819
|
+
.downcast_mut::<Float64Builder>()
|
820
|
+
.expect("Builder mismatch: expected Float64Builder");
|
821
|
+
for val in values {
|
822
|
+
match val {
|
823
|
+
ParquetValue::Float64(f) => typed_builder.append_value(*f),
|
824
|
+
// If you want to allow f32 => f64, do so:
|
825
|
+
ParquetValue::Float32(flo) => typed_builder.append_value(*flo as f64),
|
826
|
+
ParquetValue::Null => typed_builder.append_null(),
|
827
|
+
other => {
|
828
|
+
return Err(MagnusError::new(
|
829
|
+
magnus::exception::type_error(),
|
830
|
+
format!("Expected Float64, got {:?}", other),
|
831
|
+
))
|
832
|
+
}
|
641
833
|
}
|
642
|
-
|
643
|
-
|
834
|
+
}
|
835
|
+
Ok(())
|
836
|
+
}
|
837
|
+
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
|
838
|
+
let typed_builder = builder
|
839
|
+
.as_any_mut()
|
840
|
+
.downcast_mut::<BooleanBuilder>()
|
841
|
+
.expect("Builder mismatch: expected BooleanBuilder");
|
842
|
+
for val in values {
|
843
|
+
match val {
|
844
|
+
ParquetValue::Boolean(b) => typed_builder.append_value(*b),
|
845
|
+
ParquetValue::Null => typed_builder.append_null(),
|
846
|
+
other => {
|
847
|
+
return Err(MagnusError::new(
|
848
|
+
magnus::exception::type_error(),
|
849
|
+
format!("Expected Boolean, got {:?}", other),
|
850
|
+
))
|
851
|
+
}
|
644
852
|
}
|
645
|
-
|
646
|
-
|
853
|
+
}
|
854
|
+
Ok(())
|
855
|
+
}
|
856
|
+
ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
|
857
|
+
let typed_builder = builder
|
858
|
+
.as_any_mut()
|
859
|
+
.downcast_mut::<Date32Builder>()
|
860
|
+
.expect("Builder mismatch: expected Date32Builder");
|
861
|
+
for val in values {
|
862
|
+
match val {
|
863
|
+
ParquetValue::Date32(d) => typed_builder.append_value(*d),
|
864
|
+
ParquetValue::Null => typed_builder.append_null(),
|
865
|
+
other => {
|
866
|
+
return Err(MagnusError::new(
|
867
|
+
magnus::exception::type_error(),
|
868
|
+
format!("Expected Date32, got {:?}", other),
|
869
|
+
))
|
870
|
+
}
|
647
871
|
}
|
648
|
-
|
649
|
-
|
872
|
+
}
|
873
|
+
Ok(())
|
874
|
+
}
|
875
|
+
ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
|
876
|
+
let typed_builder = builder
|
877
|
+
.as_any_mut()
|
878
|
+
.downcast_mut::<TimestampMillisecondBuilder>()
|
879
|
+
.expect("Builder mismatch: expected TimestampMillisecondBuilder");
|
880
|
+
for val in values {
|
881
|
+
match val {
|
882
|
+
ParquetValue::TimestampMillis(ts, _tz) => typed_builder.append_value(*ts),
|
883
|
+
ParquetValue::Null => typed_builder.append_null(),
|
884
|
+
other => {
|
885
|
+
return Err(MagnusError::new(
|
886
|
+
magnus::exception::type_error(),
|
887
|
+
format!("Expected TimestampMillis, got {:?}", other),
|
888
|
+
))
|
889
|
+
}
|
650
890
|
}
|
651
|
-
|
652
|
-
|
891
|
+
}
|
892
|
+
Ok(())
|
893
|
+
}
|
894
|
+
ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
|
895
|
+
let typed_builder = builder
|
896
|
+
.as_any_mut()
|
897
|
+
.downcast_mut::<TimestampMicrosecondBuilder>()
|
898
|
+
.expect("Builder mismatch: expected TimestampMicrosecondBuilder");
|
899
|
+
for val in values {
|
900
|
+
match val {
|
901
|
+
ParquetValue::TimestampMicros(ts, _tz) => typed_builder.append_value(*ts),
|
902
|
+
ParquetValue::Null => typed_builder.append_null(),
|
903
|
+
other => {
|
904
|
+
return Err(MagnusError::new(
|
905
|
+
magnus::exception::type_error(),
|
906
|
+
format!("Expected TimestampMicros, got {:?}", other),
|
907
|
+
))
|
908
|
+
}
|
653
909
|
}
|
654
|
-
|
655
|
-
|
910
|
+
}
|
911
|
+
Ok(())
|
912
|
+
}
|
913
|
+
|
914
|
+
// ------------------
|
915
|
+
// NESTED LIST - using helper function
|
916
|
+
// ------------------
|
917
|
+
ParquetSchemaType::List(list_field) => {
|
918
|
+
fill_list_builder(builder, &list_field.item_type, values)
|
919
|
+
}
|
920
|
+
|
921
|
+
// ------------------
|
922
|
+
// NESTED MAP - using helper function
|
923
|
+
// ------------------
|
924
|
+
ParquetSchemaType::Map(map_field) => {
|
925
|
+
fill_map_builder(builder, &map_field.key_type, &map_field.value_type, values)
|
926
|
+
}
|
927
|
+
|
928
|
+
// ------------------
|
929
|
+
// OTHER TYPES - keep as is for now
|
930
|
+
// ------------------
|
931
|
+
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
932
|
+
let typed_builder = builder
|
933
|
+
.as_any_mut()
|
934
|
+
.downcast_mut::<StringBuilder>()
|
935
|
+
.expect("Builder mismatch: expected StringBuilder");
|
936
|
+
for val in values {
|
937
|
+
match val {
|
938
|
+
ParquetValue::String(s) => typed_builder.append_value(s),
|
939
|
+
ParquetValue::Null => typed_builder.append_null(),
|
940
|
+
other => {
|
941
|
+
return Err(MagnusError::new(
|
942
|
+
magnus::exception::type_error(),
|
943
|
+
format!("Expected String, got {:?}", other),
|
944
|
+
))
|
945
|
+
}
|
656
946
|
}
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
947
|
+
}
|
948
|
+
Ok(())
|
949
|
+
}
|
950
|
+
ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
|
951
|
+
let typed_builder = builder
|
952
|
+
.as_any_mut()
|
953
|
+
.downcast_mut::<BinaryBuilder>()
|
954
|
+
.expect("Builder mismatch: expected BinaryBuilder");
|
955
|
+
for val in values {
|
956
|
+
match val {
|
957
|
+
ParquetValue::Bytes(b) => typed_builder.append_value(&b),
|
958
|
+
ParquetValue::Null => typed_builder.append_null(),
|
959
|
+
other => {
|
960
|
+
return Err(MagnusError::new(
|
961
|
+
magnus::exception::type_error(),
|
962
|
+
format!("Expected Binary, got {:?}", other),
|
963
|
+
))
|
964
|
+
}
|
662
965
|
}
|
663
|
-
}
|
966
|
+
}
|
967
|
+
Ok(())
|
968
|
+
}
|
969
|
+
ParquetSchemaType::Struct(struct_field) => {
|
970
|
+
let typed_builder = builder
|
971
|
+
.as_any_mut()
|
972
|
+
.downcast_mut::<StructBuilder>()
|
973
|
+
.expect("Builder mismatch: expected StructBuilder");
|
664
974
|
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
ParquetValue::
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
ParquetValue::
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
975
|
+
for val in values {
|
976
|
+
match val {
|
977
|
+
ParquetValue::Null => {
|
978
|
+
// null struct
|
979
|
+
typed_builder.append(false);
|
980
|
+
}
|
981
|
+
ParquetValue::Map(map_data) => {
|
982
|
+
for (i, field) in struct_field.fields.iter().enumerate() {
|
983
|
+
let field_key = ParquetValue::String(field.name.clone());
|
984
|
+
if let Some(field_val) = map_data.get(&field_key) {
|
985
|
+
match field_val {
|
986
|
+
ParquetValue::Int8(x) => typed_builder
|
987
|
+
.field_builder::<Int8Builder>(i)
|
988
|
+
.ok_or_else(|| {
|
989
|
+
MagnusError::new(
|
990
|
+
magnus::exception::type_error(),
|
991
|
+
"Failed to coerce into Int8Builder",
|
992
|
+
)
|
993
|
+
})?
|
994
|
+
.append_value(*x),
|
995
|
+
ParquetValue::Int16(x) => typed_builder
|
996
|
+
.field_builder::<Int16Builder>(i)
|
997
|
+
.ok_or_else(|| {
|
998
|
+
MagnusError::new(
|
999
|
+
magnus::exception::type_error(),
|
1000
|
+
"Failed to coerce into Int16Builder",
|
1001
|
+
)
|
1002
|
+
})?
|
1003
|
+
.append_value(*x),
|
1004
|
+
ParquetValue::Int32(x) => typed_builder
|
1005
|
+
.field_builder::<Int32Builder>(i)
|
1006
|
+
.ok_or_else(|| {
|
1007
|
+
MagnusError::new(
|
1008
|
+
magnus::exception::type_error(),
|
1009
|
+
"Failed to coerce into Int32Builder",
|
1010
|
+
)
|
1011
|
+
})?
|
1012
|
+
.append_value(*x),
|
1013
|
+
ParquetValue::Int64(x) => typed_builder
|
1014
|
+
.field_builder::<Int64Builder>(i)
|
1015
|
+
.ok_or_else(|| {
|
1016
|
+
MagnusError::new(
|
1017
|
+
magnus::exception::type_error(),
|
1018
|
+
"Failed to coerce into Int64Builder",
|
1019
|
+
)
|
1020
|
+
})?
|
1021
|
+
.append_value(*x),
|
1022
|
+
ParquetValue::UInt8(x) => typed_builder
|
1023
|
+
.field_builder::<UInt8Builder>(i)
|
1024
|
+
.ok_or_else(|| {
|
1025
|
+
MagnusError::new(
|
1026
|
+
magnus::exception::type_error(),
|
1027
|
+
"Failed to coerce into UInt8Builder",
|
1028
|
+
)
|
1029
|
+
})?
|
1030
|
+
.append_value(*x),
|
1031
|
+
ParquetValue::UInt16(x) => typed_builder
|
1032
|
+
.field_builder::<UInt16Builder>(i)
|
1033
|
+
.ok_or_else(|| {
|
1034
|
+
MagnusError::new(
|
1035
|
+
magnus::exception::type_error(),
|
1036
|
+
"Failed to coerce into UInt16Builder",
|
1037
|
+
)
|
1038
|
+
})?
|
1039
|
+
.append_value(*x),
|
1040
|
+
ParquetValue::UInt32(x) => typed_builder
|
1041
|
+
.field_builder::<UInt32Builder>(i)
|
1042
|
+
.ok_or_else(|| {
|
1043
|
+
MagnusError::new(
|
1044
|
+
magnus::exception::type_error(),
|
1045
|
+
"Failed to coerce into UInt32Builder",
|
1046
|
+
)
|
1047
|
+
})?
|
1048
|
+
.append_value(*x),
|
1049
|
+
ParquetValue::UInt64(x) => typed_builder
|
1050
|
+
.field_builder::<UInt64Builder>(i)
|
1051
|
+
.ok_or_else(|| {
|
1052
|
+
MagnusError::new(
|
1053
|
+
magnus::exception::type_error(),
|
1054
|
+
"Failed to coerce into UInt64Builder",
|
1055
|
+
)
|
1056
|
+
})?
|
1057
|
+
.append_value(*x),
|
1058
|
+
ParquetValue::Float16(_) => {
|
1059
|
+
return Err(MagnusError::new(
|
1060
|
+
magnus::exception::runtime_error(),
|
1061
|
+
"Float16 not supported",
|
1062
|
+
))
|
1063
|
+
}
|
1064
|
+
ParquetValue::Float32(x) => typed_builder
|
1065
|
+
.field_builder::<Float32Builder>(i)
|
1066
|
+
.ok_or_else(|| {
|
1067
|
+
MagnusError::new(
|
1068
|
+
magnus::exception::type_error(),
|
1069
|
+
"Failed to coerce into Float32Builder",
|
1070
|
+
)
|
1071
|
+
})?
|
1072
|
+
.append_value(*x),
|
1073
|
+
ParquetValue::Float64(x) => typed_builder
|
1074
|
+
.field_builder::<Float64Builder>(i)
|
1075
|
+
.ok_or_else(|| {
|
1076
|
+
MagnusError::new(
|
1077
|
+
magnus::exception::type_error(),
|
1078
|
+
"Failed to coerce into Float64Builder",
|
1079
|
+
)
|
1080
|
+
})?
|
1081
|
+
.append_value(*x),
|
1082
|
+
ParquetValue::Boolean(x) => typed_builder
|
1083
|
+
.field_builder::<BooleanBuilder>(i)
|
1084
|
+
.ok_or_else(|| {
|
1085
|
+
MagnusError::new(
|
1086
|
+
magnus::exception::type_error(),
|
1087
|
+
"Failed to coerce into BooleanBuilder",
|
1088
|
+
)
|
1089
|
+
})?
|
1090
|
+
.append_value(*x),
|
1091
|
+
ParquetValue::String(x) => typed_builder
|
1092
|
+
.field_builder::<StringBuilder>(i)
|
1093
|
+
.ok_or_else(|| {
|
1094
|
+
MagnusError::new(
|
1095
|
+
magnus::exception::type_error(),
|
1096
|
+
"Failed to coerce into StringBuilder",
|
1097
|
+
)
|
1098
|
+
})?
|
1099
|
+
.append_value(x),
|
1100
|
+
ParquetValue::Bytes(bytes) => typed_builder
|
1101
|
+
.field_builder::<BinaryBuilder>(i)
|
1102
|
+
.ok_or_else(|| {
|
1103
|
+
MagnusError::new(
|
1104
|
+
magnus::exception::type_error(),
|
1105
|
+
"Failed to coerce into BinaryBuilder",
|
1106
|
+
)
|
1107
|
+
})?
|
1108
|
+
.append_value(bytes),
|
1109
|
+
ParquetValue::Date32(x) => typed_builder
|
1110
|
+
.field_builder::<Date32Builder>(i)
|
1111
|
+
.ok_or_else(|| {
|
1112
|
+
MagnusError::new(
|
1113
|
+
magnus::exception::type_error(),
|
1114
|
+
"Failed to coerce into Date32Builder",
|
1115
|
+
)
|
1116
|
+
})?
|
1117
|
+
.append_value(*x),
|
1118
|
+
ParquetValue::Date64(x) => typed_builder
|
1119
|
+
.field_builder::<Date64Builder>(i)
|
1120
|
+
.ok_or_else(|| {
|
1121
|
+
MagnusError::new(
|
1122
|
+
magnus::exception::type_error(),
|
1123
|
+
"Failed to coerce into Date64Builder",
|
1124
|
+
)
|
1125
|
+
})?
|
1126
|
+
.append_value(*x),
|
1127
|
+
ParquetValue::TimestampSecond(x, _tz) => typed_builder
|
1128
|
+
.field_builder::<TimestampSecondBuilder>(i)
|
1129
|
+
.ok_or_else(|| {
|
1130
|
+
MagnusError::new(
|
1131
|
+
magnus::exception::type_error(),
|
1132
|
+
"Failed to coerce into TimestampSecondBuilder",
|
1133
|
+
)
|
1134
|
+
})?
|
1135
|
+
.append_value(*x),
|
1136
|
+
ParquetValue::TimestampMillis(x, _tz) => typed_builder
|
1137
|
+
.field_builder::<TimestampMillisecondBuilder>(i)
|
1138
|
+
.ok_or_else(|| {
|
1139
|
+
MagnusError::new(
|
1140
|
+
magnus::exception::type_error(),
|
1141
|
+
"Failed to coerce into TimestampMillisecondBuilder",
|
1142
|
+
)
|
1143
|
+
})?
|
1144
|
+
.append_value(*x),
|
1145
|
+
ParquetValue::TimestampMicros(x, _tz) => typed_builder
|
1146
|
+
.field_builder::<TimestampMicrosecondBuilder>(i)
|
1147
|
+
.ok_or_else(|| {
|
1148
|
+
MagnusError::new(
|
1149
|
+
magnus::exception::type_error(),
|
1150
|
+
"Failed to coerce into TimestampMicrosecondBuilder",
|
1151
|
+
)
|
1152
|
+
})?
|
1153
|
+
.append_value(*x),
|
1154
|
+
ParquetValue::TimestampNanos(x, _tz) => typed_builder
|
1155
|
+
.field_builder::<TimestampNanosecondBuilder>(i)
|
1156
|
+
.ok_or_else(|| {
|
1157
|
+
MagnusError::new(
|
1158
|
+
magnus::exception::type_error(),
|
1159
|
+
"Failed to coerce into TimestampNanosecondBuilder",
|
1160
|
+
)
|
1161
|
+
})?
|
1162
|
+
.append_value(*x),
|
1163
|
+
ParquetValue::List(items) => {
|
1164
|
+
let list_builder = typed_builder
|
1165
|
+
.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
|
1166
|
+
.ok_or_else(|| {
|
1167
|
+
MagnusError::new(
|
1168
|
+
magnus::exception::type_error(),
|
1169
|
+
"Failed to coerce into ListBuilder",
|
1170
|
+
)
|
1171
|
+
})?;
|
1172
|
+
fill_builder(
|
1173
|
+
list_builder.values(),
|
1174
|
+
&struct_field.fields[i].type_,
|
1175
|
+
items,
|
1176
|
+
)?;
|
1177
|
+
list_builder.append(true);
|
1178
|
+
}
|
1179
|
+
ParquetValue::Map(map_data) => {
|
1180
|
+
let maybe_map_builder = typed_builder
|
1181
|
+
.field_builder::<MapBuilder<
|
1182
|
+
Box<dyn ArrayBuilder>,
|
1183
|
+
Box<dyn ArrayBuilder>,
|
1184
|
+
>>(i);
|
1185
|
+
|
1186
|
+
if let Some(map_builder) = maybe_map_builder {
|
1187
|
+
fill_builder(
|
1188
|
+
map_builder,
|
1189
|
+
&struct_field.fields[i].type_,
|
1190
|
+
&[ParquetValue::Map(map_data.clone())],
|
1191
|
+
)?;
|
1192
|
+
map_builder.append(true).map_err(|e| {
|
1193
|
+
MagnusError::new(
|
1194
|
+
magnus::exception::runtime_error(),
|
1195
|
+
format!("Failed to append map: {}", e),
|
1196
|
+
)
|
1197
|
+
})?;
|
1198
|
+
} else {
|
1199
|
+
let child_struct_builder = typed_builder
|
1200
|
+
.field_builder::<StructBuilder>(i)
|
1201
|
+
.ok_or_else(|| {
|
1202
|
+
MagnusError::new(
|
1203
|
+
magnus::exception::type_error(),
|
1204
|
+
"Failed to coerce into StructBuilder",
|
1205
|
+
)
|
1206
|
+
})?;
|
1207
|
+
fill_builder(
|
1208
|
+
child_struct_builder,
|
1209
|
+
&struct_field.fields[i].type_,
|
1210
|
+
&[ParquetValue::Map(map_data.clone())],
|
1211
|
+
)?;
|
1212
|
+
}
|
1213
|
+
}
|
1214
|
+
ParquetValue::Null => match struct_field.fields[i].type_ {
|
1215
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int8) => typed_builder
|
1216
|
+
.field_builder::<Int8Builder>(i)
|
1217
|
+
.ok_or_else(|| {
|
1218
|
+
MagnusError::new(
|
1219
|
+
magnus::exception::type_error(),
|
1220
|
+
"Failed to coerce into Int8Builder",
|
1221
|
+
)
|
1222
|
+
})?
|
1223
|
+
.append_null(),
|
1224
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int16) => typed_builder
|
1225
|
+
.field_builder::<Int16Builder>(i)
|
1226
|
+
.ok_or_else(|| {
|
1227
|
+
MagnusError::new(
|
1228
|
+
magnus::exception::type_error(),
|
1229
|
+
"Failed to coerce into Int16Builder",
|
1230
|
+
)
|
1231
|
+
})?
|
1232
|
+
.append_null(),
|
1233
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int32) => typed_builder
|
1234
|
+
.field_builder::<Int32Builder>(i)
|
1235
|
+
.ok_or_else(|| {
|
1236
|
+
MagnusError::new(
|
1237
|
+
magnus::exception::type_error(),
|
1238
|
+
"Failed to coerce into Int32Builder",
|
1239
|
+
)
|
1240
|
+
})?
|
1241
|
+
.append_null(),
|
1242
|
+
ParquetSchemaType::Primitive(PrimitiveType::Int64) => typed_builder
|
1243
|
+
.field_builder::<Int64Builder>(i)
|
1244
|
+
.ok_or_else(|| {
|
1245
|
+
MagnusError::new(
|
1246
|
+
magnus::exception::type_error(),
|
1247
|
+
"Failed to coerce into Int64Builder",
|
1248
|
+
)
|
1249
|
+
})?
|
1250
|
+
.append_null(),
|
1251
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt8) => typed_builder
|
1252
|
+
.field_builder::<UInt8Builder>(i)
|
1253
|
+
.ok_or_else(|| {
|
1254
|
+
MagnusError::new(
|
1255
|
+
magnus::exception::type_error(),
|
1256
|
+
"Failed to coerce into UInt8Builder",
|
1257
|
+
)
|
1258
|
+
})?
|
1259
|
+
.append_null(),
|
1260
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt16) => typed_builder
|
1261
|
+
.field_builder::<UInt16Builder>(i)
|
1262
|
+
.ok_or_else(|| {
|
1263
|
+
MagnusError::new(
|
1264
|
+
magnus::exception::type_error(),
|
1265
|
+
"Failed to coerce into UInt16Builder",
|
1266
|
+
)
|
1267
|
+
})?
|
1268
|
+
.append_null(),
|
1269
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt32) => typed_builder
|
1270
|
+
.field_builder::<UInt32Builder>(i)
|
1271
|
+
.ok_or_else(|| {
|
1272
|
+
MagnusError::new(
|
1273
|
+
magnus::exception::type_error(),
|
1274
|
+
"Failed to coerce into UInt32Builder",
|
1275
|
+
)
|
1276
|
+
})?
|
1277
|
+
.append_null(),
|
1278
|
+
ParquetSchemaType::Primitive(PrimitiveType::UInt64) => typed_builder
|
1279
|
+
.field_builder::<UInt64Builder>(i)
|
1280
|
+
.ok_or_else(|| {
|
1281
|
+
MagnusError::new(
|
1282
|
+
magnus::exception::type_error(),
|
1283
|
+
"Failed to coerce into UInt64Builder",
|
1284
|
+
)
|
1285
|
+
})?
|
1286
|
+
.append_null(),
|
1287
|
+
ParquetSchemaType::Primitive(PrimitiveType::Float32) => typed_builder
|
1288
|
+
.field_builder::<Float32Builder>(i)
|
1289
|
+
.ok_or_else(|| {
|
1290
|
+
MagnusError::new(
|
1291
|
+
magnus::exception::type_error(),
|
1292
|
+
"Failed to coerce into Float32Builder",
|
1293
|
+
)
|
1294
|
+
})?
|
1295
|
+
.append_null(),
|
1296
|
+
ParquetSchemaType::Primitive(PrimitiveType::Float64) => typed_builder
|
1297
|
+
.field_builder::<Float64Builder>(i)
|
1298
|
+
.ok_or_else(|| {
|
1299
|
+
MagnusError::new(
|
1300
|
+
magnus::exception::type_error(),
|
1301
|
+
"Failed to coerce into Float64Builder",
|
1302
|
+
)
|
1303
|
+
})?
|
1304
|
+
.append_null(),
|
1305
|
+
ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
|
1306
|
+
.field_builder::<StringBuilder>(i)
|
1307
|
+
.ok_or_else(|| {
|
1308
|
+
MagnusError::new(
|
1309
|
+
magnus::exception::type_error(),
|
1310
|
+
"Failed to coerce into StringBuilder",
|
1311
|
+
)
|
1312
|
+
})?
|
1313
|
+
.append_null(),
|
1314
|
+
ParquetSchemaType::Primitive(PrimitiveType::Binary) => typed_builder
|
1315
|
+
.field_builder::<BinaryBuilder>(i)
|
1316
|
+
.ok_or_else(|| {
|
1317
|
+
MagnusError::new(
|
1318
|
+
magnus::exception::type_error(),
|
1319
|
+
"Failed to coerce into BinaryBuilder",
|
1320
|
+
)
|
1321
|
+
})?
|
1322
|
+
.append_null(),
|
1323
|
+
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => typed_builder
|
1324
|
+
.field_builder::<BooleanBuilder>(i)
|
1325
|
+
.ok_or_else(|| {
|
1326
|
+
MagnusError::new(
|
1327
|
+
magnus::exception::type_error(),
|
1328
|
+
"Failed to coerce into BooleanBuilder",
|
1329
|
+
)
|
1330
|
+
})?
|
1331
|
+
.append_null(),
|
1332
|
+
ParquetSchemaType::Primitive(PrimitiveType::Date32) => typed_builder
|
1333
|
+
.field_builder::<Date32Builder>(i)
|
1334
|
+
.ok_or_else(|| {
|
1335
|
+
MagnusError::new(
|
1336
|
+
magnus::exception::type_error(),
|
1337
|
+
"Failed to coerce into Date32Builder",
|
1338
|
+
)
|
1339
|
+
})?
|
1340
|
+
.append_null(),
|
1341
|
+
ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => typed_builder
|
1342
|
+
.field_builder::<TimestampMillisecondBuilder>(i)
|
1343
|
+
.ok_or_else(|| {
|
1344
|
+
MagnusError::new(
|
1345
|
+
magnus::exception::type_error(),
|
1346
|
+
"Failed to coerce into TimestampMillisecondBuilder",
|
1347
|
+
)
|
1348
|
+
})?
|
1349
|
+
.append_null(),
|
1350
|
+
ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => typed_builder
|
1351
|
+
.field_builder::<TimestampMicrosecondBuilder>(i)
|
1352
|
+
.ok_or_else(|| {
|
1353
|
+
MagnusError::new(
|
1354
|
+
magnus::exception::type_error(),
|
1355
|
+
"Failed to coerce into TimestampMicrosecondBuilder",
|
1356
|
+
)
|
1357
|
+
})?
|
1358
|
+
.append_null(),
|
1359
|
+
ParquetSchemaType::List(_) => typed_builder
|
1360
|
+
.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
|
1361
|
+
.ok_or_else(|| {
|
1362
|
+
MagnusError::new(
|
1363
|
+
magnus::exception::type_error(),
|
1364
|
+
"Failed to coerce into ListBuilder",
|
1365
|
+
)
|
1366
|
+
})?
|
1367
|
+
.append(false),
|
1368
|
+
ParquetSchemaType::Map(_) => {
|
1369
|
+
typed_builder
|
1370
|
+
.field_builder::<MapBuilder<
|
1371
|
+
Box<dyn ArrayBuilder>,
|
1372
|
+
Box<dyn ArrayBuilder>,
|
1373
|
+
>>(i)
|
1374
|
+
.ok_or_else(|| {
|
1375
|
+
MagnusError::new(
|
1376
|
+
magnus::exception::type_error(),
|
1377
|
+
"Failed to coerce into MapBuilder",
|
1378
|
+
)
|
1379
|
+
})?
|
1380
|
+
.append(false)
|
1381
|
+
.map_err(|e| {
|
1382
|
+
MagnusError::new(
|
1383
|
+
magnus::exception::runtime_error(),
|
1384
|
+
format!("Failed to append map: {}", e),
|
1385
|
+
)
|
1386
|
+
})?;
|
1387
|
+
}
|
1388
|
+
ParquetSchemaType::Struct(_) => typed_builder
|
1389
|
+
.field_builder::<StructBuilder>(i)
|
1390
|
+
.ok_or_else(|| {
|
1391
|
+
MagnusError::new(
|
1392
|
+
magnus::exception::type_error(),
|
1393
|
+
"Failed to coerce into StructBuilder",
|
1394
|
+
)
|
1395
|
+
})?
|
1396
|
+
.append_null(),
|
1397
|
+
},
|
789
1398
|
}
|
1399
|
+
} else {
|
1400
|
+
return Err(MagnusError::new(
|
1401
|
+
magnus::exception::type_error(),
|
1402
|
+
format!("Field {} not found in map", i),
|
1403
|
+
));
|
790
1404
|
}
|
791
1405
|
}
|
1406
|
+
typed_builder.append(true);
|
792
1407
|
}
|
793
|
-
|
794
|
-
_ => {
|
1408
|
+
other => {
|
795
1409
|
return Err(MagnusError::new(
|
796
1410
|
magnus::exception::type_error(),
|
797
|
-
format!("Expected
|
798
|
-
))
|
1411
|
+
format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
|
1412
|
+
));
|
799
1413
|
}
|
800
1414
|
}
|
801
1415
|
}
|
802
|
-
Ok(
|
803
|
-
}
|
804
|
-
ParquetSchemaType::Map(_map_field) => {
|
805
|
-
unimplemented!("Writing maps is not yet supported")
|
1416
|
+
Ok(())
|
806
1417
|
}
|
807
1418
|
}
|
808
1419
|
}
|
809
1420
|
|
1421
|
+
/// Creates a final Arrow array from a list of ParquetValues and a schema type.
|
1422
|
+
/// This is your "unified" way to handle any nesting level.
|
1423
|
+
pub fn convert_parquet_values_to_arrow(
|
1424
|
+
values: Vec<ParquetValue>,
|
1425
|
+
type_: &ParquetSchemaType,
|
1426
|
+
) -> Result<Arc<dyn Array>, ParquetGemError> {
|
1427
|
+
// Make sure we always have at least capacity 1 to avoid empty builders
|
1428
|
+
let capacity = if values.is_empty() { 1 } else { values.len() };
|
1429
|
+
let mut builder = create_arrow_builder_for_type(type_, Some(capacity))?;
|
1430
|
+
|
1431
|
+
fill_builder(&mut builder, type_, &values)?;
|
1432
|
+
|
1433
|
+
// Finish building the array
|
1434
|
+
let array = builder.finish();
|
1435
|
+
|
1436
|
+
Ok(Arc::new(array))
|
1437
|
+
}
|
1438
|
+
|
810
1439
|
pub fn convert_ruby_array_to_arrow(
|
1440
|
+
ruby: &Ruby,
|
811
1441
|
values: RArray,
|
812
1442
|
type_: &ParquetSchemaType,
|
813
|
-
) -> Result<Arc<dyn Array>,
|
1443
|
+
) -> Result<Arc<dyn Array>, ParquetGemError> {
|
814
1444
|
let mut parquet_values = Vec::with_capacity(values.len());
|
815
1445
|
for value in values {
|
816
1446
|
if value.is_nil() {
|
817
1447
|
parquet_values.push(ParquetValue::Null);
|
818
1448
|
continue;
|
819
1449
|
}
|
820
|
-
let parquet_value = ParquetValue::from_value(value, type_)?;
|
1450
|
+
let parquet_value = ParquetValue::from_value(ruby, value, type_, None)?;
|
821
1451
|
parquet_values.push(parquet_value);
|
822
1452
|
}
|
823
1453
|
convert_parquet_values_to_arrow(parquet_values, type_)
|