parquet 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +66 -59
- data/README.md +105 -1
- data/ext/parquet/Cargo.toml +4 -3
- data/ext/parquet/src/enumerator.rs +8 -0
- data/ext/parquet/src/header_cache.rs +7 -3
- data/ext/parquet/src/lib.rs +1 -0
- data/ext/parquet/src/logger.rs +171 -0
- data/ext/parquet/src/reader/common.rs +113 -0
- data/ext/parquet/src/reader/mod.rs +27 -13
- data/ext/parquet/src/reader/parquet_column_reader.rs +38 -78
- data/ext/parquet/src/reader/parquet_row_reader.rs +42 -19
- data/ext/parquet/src/types/core_types.rs +57 -1
- data/ext/parquet/src/types/mod.rs +8 -1
- data/ext/parquet/src/types/parquet_value.rs +211 -35
- data/ext/parquet/src/types/record_types.rs +18 -15
- data/ext/parquet/src/types/schema_converter.rs +349 -0
- data/ext/parquet/src/types/schema_node.rs +329 -0
- data/ext/parquet/src/types/timestamp.rs +18 -8
- data/ext/parquet/src/types/type_conversion.rs +1106 -511
- data/ext/parquet/src/types/writer_types.rs +78 -107
- data/ext/parquet/src/utils.rs +29 -9
- data/ext/parquet/src/writer/mod.rs +828 -280
- data/lib/parquet/schema.rb +154 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rb +1 -0
- metadata +7 -2
@@ -1,7 +1,12 @@
|
|
1
1
|
use std::str::FromStr;
|
2
|
+
use std::sync::Arc;
|
3
|
+
|
4
|
+
use crate::reader::ReaderError;
|
2
5
|
|
3
6
|
use super::*;
|
7
|
+
use arrow_array::builder::MapFieldNames;
|
4
8
|
use arrow_array::builder::*;
|
9
|
+
use arrow_schema::{DataType, Field, Fields, TimeUnit};
|
5
10
|
use jiff::tz::{Offset, TimeZone};
|
6
11
|
use magnus::{RArray, RString, TryConvert};
|
7
12
|
|
@@ -64,14 +69,19 @@ pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, Magn
|
|
64
69
|
|
65
70
|
let x = timestamp
|
66
71
|
.to_zoned(TimeZone::fixed(Offset::constant(0)))
|
67
|
-
.
|
72
|
+
.map_err(|e| {
|
73
|
+
MagnusError::new(
|
74
|
+
magnus::exception::type_error(),
|
75
|
+
format!("Failed to convert date32 to timestamp: {}", e),
|
76
|
+
)
|
77
|
+
})?
|
68
78
|
.timestamp();
|
69
79
|
|
70
80
|
// Convert to epoch days
|
71
81
|
Ok((x.as_second() as i64 / 86400) as i32)
|
72
82
|
} else if value.is_kind_of(ruby.class_time()) {
|
73
83
|
// Convert Time object to epoch days
|
74
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())
|
84
|
+
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
75
85
|
Ok(((secs as f64) / 86400.0) as i32)
|
76
86
|
} else {
|
77
87
|
Err(MagnusError::new(
|
@@ -115,8 +125,8 @@ pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result
|
|
115
125
|
Ok(timestamp.as_millisecond())
|
116
126
|
} else if value.is_kind_of(ruby.class_time()) {
|
117
127
|
// Convert Time object to milliseconds
|
118
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())
|
119
|
-
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())
|
128
|
+
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
129
|
+
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
120
130
|
Ok(secs * 1000 + (usecs / 1000))
|
121
131
|
} else {
|
122
132
|
Err(MagnusError::new(
|
@@ -162,8 +172,8 @@ pub fn convert_to_timestamp_micros(value: Value, format: Option<&str>) -> Result
|
|
162
172
|
Ok(timestamp.as_microsecond())
|
163
173
|
} else if value.is_kind_of(ruby.class_time()) {
|
164
174
|
// Convert Time object to microseconds
|
165
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())
|
166
|
-
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())
|
175
|
+
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
176
|
+
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
167
177
|
Ok(secs * 1_000_000 + usecs)
|
168
178
|
} else {
|
169
179
|
Err(MagnusError::new(
|
@@ -209,225 +219,78 @@ pub fn convert_to_string(value: Value) -> Result<String, MagnusError> {
|
|
209
219
|
})
|
210
220
|
}
|
211
221
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
ParquetValue::Int32(v)
|
236
|
-
}
|
237
|
-
ParquetSchemaType::Int64 => {
|
238
|
-
let v = NumericConverter::<i64>::convert_with_string_fallback(item_value)?;
|
239
|
-
ParquetValue::Int64(v)
|
240
|
-
}
|
241
|
-
ParquetSchemaType::UInt8 => {
|
242
|
-
let v = NumericConverter::<u8>::convert_with_string_fallback(item_value)?;
|
243
|
-
ParquetValue::UInt8(v)
|
244
|
-
}
|
245
|
-
ParquetSchemaType::UInt16 => {
|
246
|
-
let v = NumericConverter::<u16>::convert_with_string_fallback(item_value)?;
|
247
|
-
ParquetValue::UInt16(v)
|
248
|
-
}
|
249
|
-
ParquetSchemaType::UInt32 => {
|
250
|
-
let v = NumericConverter::<u32>::convert_with_string_fallback(item_value)?;
|
251
|
-
ParquetValue::UInt32(v)
|
252
|
-
}
|
253
|
-
ParquetSchemaType::UInt64 => {
|
254
|
-
let v = NumericConverter::<u64>::convert_with_string_fallback(item_value)?;
|
255
|
-
ParquetValue::UInt64(v)
|
256
|
-
}
|
257
|
-
ParquetSchemaType::Float => {
|
258
|
-
let v = NumericConverter::<f32>::convert_with_string_fallback(item_value)?;
|
259
|
-
ParquetValue::Float32(v)
|
260
|
-
}
|
261
|
-
ParquetSchemaType::Double => {
|
262
|
-
let v = NumericConverter::<f64>::convert_with_string_fallback(item_value)?;
|
263
|
-
ParquetValue::Float64(v)
|
264
|
-
}
|
265
|
-
ParquetSchemaType::String => {
|
266
|
-
let v = String::try_convert(item_value)?;
|
267
|
-
ParquetValue::String(v)
|
268
|
-
}
|
269
|
-
ParquetSchemaType::Binary => {
|
270
|
-
let v = convert_to_binary(item_value)?;
|
271
|
-
ParquetValue::Bytes(v)
|
272
|
-
}
|
273
|
-
ParquetSchemaType::Boolean => {
|
274
|
-
let v = convert_to_boolean(item_value)?;
|
275
|
-
ParquetValue::Boolean(v)
|
276
|
-
}
|
277
|
-
ParquetSchemaType::Date32 => {
|
278
|
-
let v = convert_to_date32(item_value, list_field.format)?;
|
279
|
-
ParquetValue::Date32(v)
|
280
|
-
}
|
281
|
-
ParquetSchemaType::TimestampMillis => {
|
282
|
-
let v = convert_to_timestamp_millis(item_value, list_field.format)?;
|
283
|
-
ParquetValue::TimestampMillis(v, None)
|
284
|
-
}
|
285
|
-
ParquetSchemaType::TimestampMicros => {
|
286
|
-
let v = convert_to_timestamp_micros(item_value, list_field.format)?;
|
287
|
-
ParquetValue::TimestampMicros(v, None)
|
288
|
-
}
|
289
|
-
ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
|
290
|
-
return Err(MagnusError::new(
|
291
|
-
magnus::exception::type_error(),
|
292
|
-
"Nested lists and maps are not supported",
|
293
|
-
))
|
294
|
-
}
|
295
|
-
};
|
296
|
-
values.push(converted);
|
297
|
-
}
|
298
|
-
Ok(values)
|
299
|
-
} else {
|
300
|
-
Err(MagnusError::new(
|
301
|
-
magnus::exception::type_error(),
|
302
|
-
"Invalid list format",
|
303
|
-
))
|
304
|
-
}
|
305
|
-
}
|
306
|
-
|
307
|
-
pub fn convert_to_map(
|
308
|
-
value: Value,
|
309
|
-
map_field: &MapField,
|
310
|
-
) -> Result<HashMap<ParquetValue, ParquetValue>, MagnusError> {
|
311
|
-
let ruby = unsafe { Ruby::get_unchecked() };
|
312
|
-
if value.is_kind_of(ruby.class_hash()) {
|
313
|
-
let mut map = HashMap::new();
|
314
|
-
let entries: Vec<(Value, Value)> = value.funcall("to_a", ())?;
|
315
|
-
|
316
|
-
for (key, value) in entries {
|
317
|
-
let key_value = match &map_field.key_type {
|
318
|
-
ParquetSchemaType::String => {
|
319
|
-
let v = String::try_convert(key)?;
|
320
|
-
ParquetValue::String(v)
|
321
|
-
}
|
322
|
-
_ => {
|
323
|
-
return Err(MagnusError::new(
|
324
|
-
magnus::exception::type_error(),
|
325
|
-
"Map keys must be strings",
|
326
|
-
))
|
327
|
-
}
|
328
|
-
};
|
222
|
+
/// Converts our custom `ParquetSchemaType` into an Arrow `DataType`.
|
223
|
+
/// This ensures proper nullability settings for nested types.
|
224
|
+
/// Converts a ParquetSchemaType to an Arrow DataType
|
225
|
+
pub fn parquet_schema_type_to_arrow_data_type(
|
226
|
+
schema_type: &ParquetSchemaType,
|
227
|
+
) -> Result<DataType, MagnusError> {
|
228
|
+
Ok(match schema_type {
|
229
|
+
ParquetSchemaType::Int8 => DataType::Int8,
|
230
|
+
ParquetSchemaType::Int16 => DataType::Int16,
|
231
|
+
ParquetSchemaType::Int32 => DataType::Int32,
|
232
|
+
ParquetSchemaType::Int64 => DataType::Int64,
|
233
|
+
ParquetSchemaType::UInt8 => DataType::UInt8,
|
234
|
+
ParquetSchemaType::UInt16 => DataType::UInt16,
|
235
|
+
ParquetSchemaType::UInt32 => DataType::UInt32,
|
236
|
+
ParquetSchemaType::UInt64 => DataType::UInt64,
|
237
|
+
ParquetSchemaType::Float => DataType::Float32,
|
238
|
+
ParquetSchemaType::Double => DataType::Float64,
|
239
|
+
ParquetSchemaType::String => DataType::Utf8,
|
240
|
+
ParquetSchemaType::Binary => DataType::Binary,
|
241
|
+
ParquetSchemaType::Boolean => DataType::Boolean,
|
242
|
+
ParquetSchemaType::Date32 => DataType::Date32,
|
243
|
+
ParquetSchemaType::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None),
|
244
|
+
ParquetSchemaType::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None),
|
329
245
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
}
|
343
|
-
ParquetSchemaType::Int64 => {
|
344
|
-
let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
|
345
|
-
ParquetValue::Int64(v)
|
346
|
-
}
|
347
|
-
ParquetSchemaType::UInt8 => {
|
348
|
-
let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
|
349
|
-
ParquetValue::UInt8(v)
|
350
|
-
}
|
351
|
-
ParquetSchemaType::UInt16 => {
|
352
|
-
let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
|
353
|
-
ParquetValue::UInt16(v)
|
354
|
-
}
|
355
|
-
ParquetSchemaType::UInt32 => {
|
356
|
-
let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
|
357
|
-
ParquetValue::UInt32(v)
|
358
|
-
}
|
359
|
-
ParquetSchemaType::UInt64 => {
|
360
|
-
let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
|
361
|
-
ParquetValue::UInt64(v)
|
362
|
-
}
|
363
|
-
ParquetSchemaType::Float => {
|
364
|
-
let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
|
365
|
-
ParquetValue::Float32(v)
|
366
|
-
}
|
367
|
-
ParquetSchemaType::Double => {
|
368
|
-
let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
|
369
|
-
ParquetValue::Float64(v)
|
370
|
-
}
|
371
|
-
ParquetSchemaType::String => {
|
372
|
-
let v = String::try_convert(value)?;
|
373
|
-
ParquetValue::String(v)
|
374
|
-
}
|
375
|
-
ParquetSchemaType::Binary => {
|
376
|
-
let v = convert_to_binary(value)?;
|
377
|
-
ParquetValue::Bytes(v)
|
378
|
-
}
|
379
|
-
ParquetSchemaType::Boolean => {
|
380
|
-
let v = convert_to_boolean(value)?;
|
381
|
-
ParquetValue::Boolean(v)
|
382
|
-
}
|
383
|
-
ParquetSchemaType::Date32 => {
|
384
|
-
let v = convert_to_date32(value, map_field.format)?;
|
385
|
-
ParquetValue::Date32(v)
|
386
|
-
}
|
387
|
-
ParquetSchemaType::TimestampMillis => {
|
388
|
-
let v = convert_to_timestamp_millis(value, map_field.format)?;
|
389
|
-
ParquetValue::TimestampMillis(v, None)
|
390
|
-
}
|
391
|
-
ParquetSchemaType::TimestampMicros => {
|
392
|
-
let v = convert_to_timestamp_micros(value, map_field.format)?;
|
393
|
-
ParquetValue::TimestampMicros(v, None)
|
394
|
-
}
|
395
|
-
ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
|
396
|
-
return Err(MagnusError::new(
|
397
|
-
magnus::exception::type_error(),
|
398
|
-
"Map values cannot be lists or maps",
|
399
|
-
))
|
400
|
-
}
|
401
|
-
};
|
246
|
+
// For a List<T>, create a standard List in Arrow with nullable items
|
247
|
+
ParquetSchemaType::List(list_field) => {
|
248
|
+
let child_type = parquet_schema_type_to_arrow_data_type(&list_field.item_type)?;
|
249
|
+
// For a list, use empty field name to match expectations for schema_dsl test
|
250
|
+
// This is the critical fix for the schema_dsl test which expects an empty field name
|
251
|
+
// Use empty field name for all list field items - this is crucial for compatibility
|
252
|
+
DataType::List(Arc::new(Field::new(
|
253
|
+
"item",
|
254
|
+
child_type,
|
255
|
+
list_field.nullable,
|
256
|
+
)))
|
257
|
+
}
|
402
258
|
|
403
|
-
|
259
|
+
// For a Map<K, V>, ensure entries field is non-nullable and key field is non-nullable
|
260
|
+
ParquetSchemaType::Map(map_field) => {
|
261
|
+
let key_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.key_type)?;
|
262
|
+
let value_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.value_type)?;
|
263
|
+
DataType::Map(
|
264
|
+
Arc::new(Field::new(
|
265
|
+
"entries",
|
266
|
+
DataType::Struct(Fields::from(vec![
|
267
|
+
Field::new("key", key_arrow_type, false), // key must be non-null
|
268
|
+
Field::new("value", value_arrow_type, true), // value can be null
|
269
|
+
])),
|
270
|
+
/*nullable=*/ false, // crucial: entries must be non-nullable
|
271
|
+
)),
|
272
|
+
/*keys_sorted=*/ false,
|
273
|
+
)
|
404
274
|
}
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
}
|
275
|
+
ParquetSchemaType::Struct(struct_field) => {
|
276
|
+
if struct_field.fields.is_empty() {
|
277
|
+
return Err(MagnusError::new(
|
278
|
+
magnus::exception::runtime_error(),
|
279
|
+
"Cannot create a struct with zero subfields (empty struct).",
|
280
|
+
));
|
281
|
+
}
|
413
282
|
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
ParquetValue::Null => builder.append_null(),
|
421
|
-
_ => {
|
422
|
-
return Err(MagnusError::new(
|
423
|
-
magnus::exception::type_error(),
|
424
|
-
format!("Expected {}, got {:?}", stringify!($variant), value),
|
425
|
-
))
|
426
|
-
}
|
283
|
+
// Build arrow fields
|
284
|
+
let mut arrow_fields = Vec::with_capacity(struct_field.fields.len());
|
285
|
+
|
286
|
+
for field in &struct_field.fields {
|
287
|
+
let field_type = parquet_schema_type_to_arrow_data_type(&field.type_)?;
|
288
|
+
arrow_fields.push(Field::new(&field.name, field_type, true)); // All fields are nullable by default
|
427
289
|
}
|
290
|
+
|
291
|
+
DataType::Struct(Fields::from(arrow_fields))
|
428
292
|
}
|
429
|
-
|
430
|
-
}};
|
293
|
+
})
|
431
294
|
}
|
432
295
|
|
433
296
|
#[macro_export]
|
@@ -457,367 +320,1099 @@ macro_rules! impl_timestamp_array_conversion {
|
|
457
320
|
}};
|
458
321
|
}
|
459
322
|
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
323
|
+
// Create the appropriate Arrow builder for a given ParquetSchemaType.
|
324
|
+
// We return a Box<dyn ArrayBuilder> so we can dynamically downcast.
|
325
|
+
fn create_arrow_builder_for_type(
|
326
|
+
type_: &ParquetSchemaType,
|
327
|
+
capacity: Option<usize>,
|
328
|
+
) -> Result<Box<dyn ArrayBuilder>, ReaderError> {
|
329
|
+
let cap = capacity.unwrap_or(1); // Default to at least capacity 1 to avoid empty builders
|
330
|
+
match type_ {
|
331
|
+
ParquetSchemaType::Int8 => Ok(Box::new(Int8Builder::with_capacity(cap))),
|
332
|
+
ParquetSchemaType::Int16 => Ok(Box::new(Int16Builder::with_capacity(cap))),
|
333
|
+
ParquetSchemaType::Int32 => Ok(Box::new(Int32Builder::with_capacity(cap))),
|
334
|
+
ParquetSchemaType::Int64 => Ok(Box::new(Int64Builder::with_capacity(cap))),
|
335
|
+
ParquetSchemaType::UInt8 => Ok(Box::new(UInt8Builder::with_capacity(cap))),
|
336
|
+
ParquetSchemaType::UInt16 => Ok(Box::new(UInt16Builder::with_capacity(cap))),
|
337
|
+
ParquetSchemaType::UInt32 => Ok(Box::new(UInt32Builder::with_capacity(cap))),
|
338
|
+
ParquetSchemaType::UInt64 => Ok(Box::new(UInt64Builder::with_capacity(cap))),
|
339
|
+
ParquetSchemaType::Float => Ok(Box::new(Float32Builder::with_capacity(cap))),
|
340
|
+
ParquetSchemaType::Double => Ok(Box::new(Float64Builder::with_capacity(cap))),
|
341
|
+
ParquetSchemaType::String => Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32))),
|
342
|
+
ParquetSchemaType::Binary => Ok(Box::new(BinaryBuilder::with_capacity(cap, cap * 32))),
|
343
|
+
ParquetSchemaType::Boolean => Ok(Box::new(BooleanBuilder::with_capacity(cap))),
|
344
|
+
ParquetSchemaType::Date32 => Ok(Box::new(Date32Builder::with_capacity(cap))),
|
345
|
+
ParquetSchemaType::TimestampMillis => {
|
346
|
+
Ok(Box::new(TimestampMillisecondBuilder::with_capacity(cap)))
|
475
347
|
}
|
476
|
-
|
477
|
-
|
478
|
-
($values:expr, $builder_type:ty, $variant:ident, $capacity:expr) => {{
|
479
|
-
let mut builder = <$builder_type>::with_capacity($values.len(), $capacity);
|
480
|
-
for value in $values {
|
481
|
-
match value {
|
482
|
-
ParquetValue::$variant(v) => builder.append_value(v),
|
483
|
-
ParquetValue::Null => builder.append_null(),
|
484
|
-
_ => {
|
485
|
-
return Err(MagnusError::new(
|
486
|
-
magnus::exception::type_error(),
|
487
|
-
format!("Expected {}, got {:?}", stringify!($variant), value),
|
488
|
-
))
|
489
|
-
}
|
490
|
-
}
|
348
|
+
ParquetSchemaType::TimestampMicros => {
|
349
|
+
Ok(Box::new(TimestampMicrosecondBuilder::with_capacity(cap)))
|
491
350
|
}
|
492
|
-
|
493
|
-
|
494
|
-
|
351
|
+
ParquetSchemaType::List(list_field) => {
|
352
|
+
// For a list, we create a ListBuilder whose child builder is determined by item_type.
|
353
|
+
// Pass through capacity to ensure consistent sizing
|
354
|
+
let child_builder = create_arrow_builder_for_type(&list_field.item_type, Some(cap))?;
|
495
355
|
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
(
|
509
|
-
|
356
|
+
// Ensure consistent builder capacity for lists
|
357
|
+
Ok(Box::new(ListBuilder::<Box<dyn ArrayBuilder>>::new(
|
358
|
+
child_builder,
|
359
|
+
)))
|
360
|
+
}
|
361
|
+
ParquetSchemaType::Map(map_field) => {
|
362
|
+
// A Map is physically a list<struct<key:..., value:...>> in Arrow.
|
363
|
+
// Pass through capacity to ensure consistent sizing
|
364
|
+
let key_builder = create_arrow_builder_for_type(&map_field.key_type, Some(cap))?;
|
365
|
+
let value_builder = create_arrow_builder_for_type(&map_field.value_type, Some(cap))?;
|
366
|
+
|
367
|
+
// Create a MapBuilder with explicit field names to ensure compatibility
|
368
|
+
Ok(Box::new(MapBuilder::<
|
369
|
+
Box<dyn ArrayBuilder>,
|
370
|
+
Box<dyn ArrayBuilder>,
|
371
|
+
>::new(
|
372
|
+
Some(MapFieldNames {
|
373
|
+
entry: "entries".to_string(),
|
374
|
+
key: "key".to_string(),
|
375
|
+
value: "value".to_string(),
|
376
|
+
}),
|
377
|
+
key_builder,
|
378
|
+
value_builder,
|
379
|
+
)))
|
380
|
+
}
|
381
|
+
ParquetSchemaType::Struct(struct_field) => {
|
382
|
+
// Check for empty struct immediately
|
383
|
+
if struct_field.fields.is_empty() {
|
384
|
+
return Err(MagnusError::new(
|
385
|
+
magnus::exception::runtime_error(),
|
386
|
+
"Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
|
387
|
+
))?;
|
510
388
|
}
|
511
|
-
|
389
|
+
|
390
|
+
// Create a child builder for each field in the struct
|
391
|
+
let mut child_field_builders = Vec::with_capacity(struct_field.fields.len());
|
392
|
+
|
393
|
+
// Get struct data type first to ensure field compatibility
|
394
|
+
let data_type = parquet_schema_type_to_arrow_data_type(type_)?;
|
395
|
+
|
396
|
+
// Make sure the data type is a struct
|
397
|
+
let arrow_fields = if let DataType::Struct(ref fields) = data_type {
|
398
|
+
fields.clone()
|
399
|
+
} else {
|
512
400
|
return Err(MagnusError::new(
|
513
401
|
magnus::exception::type_error(),
|
402
|
+
"Expected struct data type".to_string(),
|
403
|
+
))?;
|
404
|
+
};
|
405
|
+
|
406
|
+
// Create builders for each child field with consistent capacity
|
407
|
+
for child in &struct_field.fields {
|
408
|
+
let sub_builder = create_arrow_builder_for_type(&child.type_, Some(cap))?;
|
409
|
+
child_field_builders.push(sub_builder);
|
410
|
+
}
|
411
|
+
|
412
|
+
// Make sure we have the right number of builders
|
413
|
+
if child_field_builders.len() != arrow_fields.len() {
|
414
|
+
return Err(MagnusError::new(
|
415
|
+
magnus::exception::runtime_error(),
|
514
416
|
format!(
|
515
|
-
"
|
516
|
-
|
417
|
+
"Number of field builders ({}) doesn't match number of arrow fields ({})",
|
418
|
+
child_field_builders.len(),
|
419
|
+
arrow_fields.len()
|
517
420
|
),
|
518
|
-
))
|
421
|
+
))?;
|
519
422
|
}
|
423
|
+
|
424
|
+
// Create the StructBuilder with the fields and child builders
|
425
|
+
Ok(Box::new(StructBuilder::new(
|
426
|
+
arrow_fields,
|
427
|
+
child_field_builders,
|
428
|
+
)))
|
520
429
|
}
|
521
|
-
}
|
430
|
+
}
|
522
431
|
}
|
523
432
|
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
433
|
+
// Fill primitive scalar Int8 values
|
434
|
+
fn fill_int8_builder(
|
435
|
+
builder: &mut dyn ArrayBuilder,
|
436
|
+
values: &[ParquetValue],
|
437
|
+
) -> Result<(), MagnusError> {
|
438
|
+
let typed_builder = builder
|
439
|
+
.as_any_mut()
|
440
|
+
.downcast_mut::<Int8Builder>()
|
441
|
+
.expect("Builder mismatch: expected Int8Builder");
|
442
|
+
for val in values {
|
443
|
+
match val {
|
444
|
+
ParquetValue::Int8(i) => typed_builder.append_value(*i),
|
445
|
+
// Handle Int64 that could be an Int8
|
446
|
+
ParquetValue::Int64(i) => {
|
447
|
+
if *i < i8::MIN as i64 || *i > i8::MAX as i64 {
|
448
|
+
return Err(MagnusError::new(
|
449
|
+
magnus::exception::range_error(),
|
450
|
+
format!("Integer {} is out of range for Int8", i),
|
451
|
+
));
|
452
|
+
}
|
453
|
+
typed_builder.append_value(*i as i8)
|
535
454
|
}
|
536
|
-
|
537
|
-
|
455
|
+
ParquetValue::Null => typed_builder.append_null(),
|
456
|
+
other => {
|
457
|
+
return Err(MagnusError::new(
|
458
|
+
magnus::exception::type_error(),
|
459
|
+
format!("Expected Int8, got {:?}", other),
|
460
|
+
))
|
461
|
+
}
|
462
|
+
}
|
463
|
+
}
|
464
|
+
Ok(())
|
465
|
+
}
|
466
|
+
|
467
|
+
// Fill primitive scalar Int16 values
|
468
|
+
fn fill_int16_builder(
|
469
|
+
builder: &mut dyn ArrayBuilder,
|
470
|
+
values: &[ParquetValue],
|
471
|
+
) -> Result<(), MagnusError> {
|
472
|
+
let typed_builder = builder
|
473
|
+
.as_any_mut()
|
474
|
+
.downcast_mut::<Int16Builder>()
|
475
|
+
.expect("Builder mismatch: expected Int16Builder");
|
476
|
+
for val in values {
|
477
|
+
match val {
|
478
|
+
ParquetValue::Int16(i) => typed_builder.append_value(*i),
|
479
|
+
// Handle Int64 that could be an Int16
|
480
|
+
ParquetValue::Int64(i) => {
|
481
|
+
if *i < i16::MIN as i64 || *i > i16::MAX as i64 {
|
482
|
+
return Err(MagnusError::new(
|
483
|
+
magnus::exception::range_error(),
|
484
|
+
format!("Integer {} is out of range for Int16", i),
|
485
|
+
));
|
486
|
+
}
|
487
|
+
typed_builder.append_value(*i as i16)
|
538
488
|
}
|
539
|
-
|
489
|
+
ParquetValue::Null => typed_builder.append_null(),
|
490
|
+
other => {
|
540
491
|
return Err(MagnusError::new(
|
541
492
|
magnus::exception::type_error(),
|
542
|
-
format!(
|
543
|
-
"Type mismatch in list: expected {:?}, got {:?}",
|
544
|
-
$item_type, $value
|
545
|
-
),
|
493
|
+
format!("Expected Int16, got {:?}", other),
|
546
494
|
))
|
547
495
|
}
|
548
496
|
}
|
549
|
-
}
|
497
|
+
}
|
498
|
+
Ok(())
|
550
499
|
}
|
551
500
|
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
501
|
+
// Fill list values by recursively filling child items
|
502
|
+
fn fill_list_builder(
|
503
|
+
builder: &mut dyn ArrayBuilder,
|
504
|
+
item_type: &ParquetSchemaType,
|
505
|
+
values: &[ParquetValue],
|
506
|
+
) -> Result<(), MagnusError> {
|
507
|
+
// We need to use a more specific type for ListBuilder to help Rust's type inference
|
508
|
+
let lb = builder
|
509
|
+
.as_any_mut()
|
510
|
+
.downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
|
511
|
+
.expect("Builder mismatch: expected ListBuilder");
|
512
|
+
|
513
|
+
for val in values {
|
514
|
+
if let ParquetValue::Null = val {
|
515
|
+
// null list
|
516
|
+
lb.append(false);
|
517
|
+
} else if let ParquetValue::List(list_items) = val {
|
518
|
+
// First fill the child builder with the items
|
519
|
+
let values_builder = lb.values();
|
520
|
+
fill_builder(values_builder, item_type, list_items)?;
|
521
|
+
// Then finalize the list by calling append(true)
|
522
|
+
lb.append(true);
|
523
|
+
} else {
|
524
|
+
return Err(MagnusError::new(
|
525
|
+
magnus::exception::type_error(),
|
526
|
+
format!("Expected ParquetValue::List(...) or Null, got {:?}", val),
|
527
|
+
));
|
528
|
+
}
|
529
|
+
}
|
530
|
+
|
531
|
+
Ok(())
|
532
|
+
}
|
533
|
+
|
534
|
+
// Fill map values by recursively filling key and value items
|
535
|
+
fn fill_map_builder(
|
536
|
+
builder: &mut dyn ArrayBuilder,
|
537
|
+
key_type: &ParquetSchemaType,
|
538
|
+
value_type: &ParquetSchemaType,
|
539
|
+
values: &[ParquetValue],
|
540
|
+
) -> Result<(), MagnusError> {
|
541
|
+
let mb = builder
|
542
|
+
.as_any_mut()
|
543
|
+
.downcast_mut::<MapBuilder<Box<dyn ArrayBuilder>, Box<dyn ArrayBuilder>>>()
|
544
|
+
.expect("Builder mismatch: expected MapBuilder");
|
545
|
+
|
546
|
+
for val in values {
|
547
|
+
match val {
|
548
|
+
ParquetValue::Null => {
|
549
|
+
// null map
|
550
|
+
mb.append(false).map_err(|e| {
|
551
|
+
MagnusError::new(
|
552
|
+
magnus::exception::runtime_error(),
|
553
|
+
format!("Failed to append null to map: {}", e),
|
554
|
+
)
|
555
|
+
})?;
|
563
556
|
}
|
564
|
-
|
565
|
-
|
557
|
+
ParquetValue::Map(map_entries) => {
|
558
|
+
// First append all key-value pairs to the child arrays
|
559
|
+
for (k, v) in map_entries {
|
560
|
+
// Note: Arrow expects field names "key" and "value" (singular)
|
561
|
+
fill_builder(mb.keys(), key_type, &[k.clone()])?;
|
562
|
+
fill_builder(mb.values(), value_type, &[v.clone()])?;
|
563
|
+
}
|
564
|
+
// Then finalize the map by calling append(true)
|
565
|
+
mb.append(true).map_err(|e| {
|
566
|
+
MagnusError::new(
|
567
|
+
magnus::exception::runtime_error(),
|
568
|
+
format!("Failed to append map entry: {}", e),
|
569
|
+
)
|
570
|
+
})?;
|
566
571
|
}
|
567
|
-
|
572
|
+
other => {
|
568
573
|
return Err(MagnusError::new(
|
569
574
|
magnus::exception::type_error(),
|
570
|
-
format!(
|
571
|
-
"Type mismatch in list: expected {:?}, got {:?}",
|
572
|
-
$item_type, $value
|
573
|
-
),
|
575
|
+
format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
|
574
576
|
))
|
575
577
|
}
|
576
578
|
}
|
577
|
-
}
|
579
|
+
}
|
580
|
+
|
581
|
+
Ok(())
|
578
582
|
}
|
579
583
|
|
580
|
-
|
581
|
-
|
584
|
+
// Append an entire slice of ParquetValue into the given Arrow builder.
|
585
|
+
// We do a `match` on the type for each item, recursing for nested list/map.
|
586
|
+
fn fill_builder(
|
587
|
+
builder: &mut dyn ArrayBuilder,
|
582
588
|
type_: &ParquetSchemaType,
|
583
|
-
|
589
|
+
values: &[ParquetValue],
|
590
|
+
) -> Result<(), MagnusError> {
|
584
591
|
match type_ {
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
ParquetSchemaType::
|
589
|
-
ParquetSchemaType::
|
590
|
-
ParquetSchemaType::
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
592
|
+
// ------------------
|
593
|
+
// PRIMITIVE SCALARS - delegated to specialized helpers
|
594
|
+
// ------------------
|
595
|
+
ParquetSchemaType::Int8 => fill_int8_builder(builder, values),
|
596
|
+
ParquetSchemaType::Int16 => fill_int16_builder(builder, values),
|
597
|
+
ParquetSchemaType::Int32 => {
|
598
|
+
let typed_builder = builder
|
599
|
+
.as_any_mut()
|
600
|
+
.downcast_mut::<Int32Builder>()
|
601
|
+
.expect("Builder mismatch: expected Int32Builder");
|
602
|
+
for val in values {
|
603
|
+
match val {
|
604
|
+
ParquetValue::Int32(i) => typed_builder.append_value(*i),
|
605
|
+
ParquetValue::Date32(d) => typed_builder.append_value(*d), // if you allow date->int
|
606
|
+
// Handle the case where we have an Int64 in an Int32 field (common with Ruby Integers)
|
607
|
+
ParquetValue::Int64(i) => {
|
608
|
+
if *i < i32::MIN as i64 || *i > i32::MAX as i64 {
|
609
|
+
return Err(MagnusError::new(
|
610
|
+
magnus::exception::range_error(),
|
611
|
+
format!("Integer {} is out of range for Int32", i),
|
612
|
+
));
|
613
|
+
}
|
614
|
+
typed_builder.append_value(*i as i32)
|
615
|
+
}
|
616
|
+
ParquetValue::Null => typed_builder.append_null(),
|
617
|
+
other => {
|
618
|
+
return Err(MagnusError::new(
|
619
|
+
magnus::exception::type_error(),
|
620
|
+
format!("Expected Int32, got {:?}", other),
|
621
|
+
))
|
622
|
+
}
|
623
|
+
}
|
624
|
+
}
|
625
|
+
Ok(())
|
609
626
|
}
|
610
|
-
ParquetSchemaType::
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
627
|
+
ParquetSchemaType::Int64 => {
|
628
|
+
let typed_builder = builder
|
629
|
+
.as_any_mut()
|
630
|
+
.downcast_mut::<Int64Builder>()
|
631
|
+
.expect("Builder mismatch: expected Int64Builder");
|
632
|
+
for val in values {
|
633
|
+
match val {
|
634
|
+
ParquetValue::Int64(i) => typed_builder.append_value(*i),
|
635
|
+
ParquetValue::Null => typed_builder.append_null(),
|
636
|
+
other => {
|
637
|
+
return Err(MagnusError::new(
|
638
|
+
magnus::exception::type_error(),
|
639
|
+
format!("Expected Int64, got {:?}", other),
|
640
|
+
))
|
641
|
+
}
|
642
|
+
}
|
643
|
+
}
|
644
|
+
Ok(())
|
616
645
|
}
|
617
|
-
ParquetSchemaType::
|
618
|
-
let
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
646
|
+
ParquetSchemaType::UInt8 => {
|
647
|
+
let typed_builder = builder
|
648
|
+
.as_any_mut()
|
649
|
+
.downcast_mut::<UInt8Builder>()
|
650
|
+
.expect("Builder mismatch: expected UInt8Builder");
|
651
|
+
for val in values {
|
652
|
+
match val {
|
653
|
+
ParquetValue::UInt8(u) => typed_builder.append_value(*u),
|
654
|
+
// Handle Int64 that could be a UInt8
|
655
|
+
ParquetValue::Int64(i) => {
|
656
|
+
if *i < 0 || *i > u8::MAX as i64 {
|
657
|
+
return Err(MagnusError::new(
|
658
|
+
magnus::exception::range_error(),
|
659
|
+
format!("Integer {} is out of range for UInt8", i),
|
660
|
+
));
|
661
|
+
}
|
662
|
+
typed_builder.append_value(*i as u8)
|
663
|
+
}
|
664
|
+
ParquetValue::Null => typed_builder.append_null(),
|
665
|
+
other => {
|
666
|
+
return Err(MagnusError::new(
|
667
|
+
magnus::exception::type_error(),
|
668
|
+
format!("Expected UInt8, got {:?}", other),
|
669
|
+
))
|
670
|
+
}
|
626
671
|
}
|
627
|
-
|
628
|
-
|
672
|
+
}
|
673
|
+
Ok(())
|
674
|
+
}
|
675
|
+
ParquetSchemaType::UInt16 => {
|
676
|
+
let typed_builder = builder
|
677
|
+
.as_any_mut()
|
678
|
+
.downcast_mut::<UInt16Builder>()
|
679
|
+
.expect("Builder mismatch: expected UInt16Builder");
|
680
|
+
for val in values {
|
681
|
+
match val {
|
682
|
+
ParquetValue::UInt16(u) => typed_builder.append_value(*u),
|
683
|
+
// Handle Int64 that could be a UInt16
|
684
|
+
ParquetValue::Int64(i) => {
|
685
|
+
if *i < 0 || *i > u16::MAX as i64 {
|
686
|
+
return Err(MagnusError::new(
|
687
|
+
magnus::exception::range_error(),
|
688
|
+
format!("Integer {} is out of range for UInt16", i),
|
689
|
+
));
|
690
|
+
}
|
691
|
+
typed_builder.append_value(*i as u16)
|
692
|
+
}
|
693
|
+
ParquetValue::Null => typed_builder.append_null(),
|
694
|
+
other => {
|
695
|
+
return Err(MagnusError::new(
|
696
|
+
magnus::exception::type_error(),
|
697
|
+
format!("Expected UInt16, got {:?}", other),
|
698
|
+
))
|
699
|
+
}
|
629
700
|
}
|
630
|
-
|
631
|
-
|
701
|
+
}
|
702
|
+
Ok(())
|
703
|
+
}
|
704
|
+
ParquetSchemaType::UInt32 => {
|
705
|
+
let typed_builder = builder
|
706
|
+
.as_any_mut()
|
707
|
+
.downcast_mut::<UInt32Builder>()
|
708
|
+
.expect("Builder mismatch: expected UInt32Builder");
|
709
|
+
for val in values {
|
710
|
+
match val {
|
711
|
+
ParquetValue::UInt32(u) => typed_builder.append_value(*u),
|
712
|
+
// Handle Int64 that could be a UInt32
|
713
|
+
ParquetValue::Int64(i) => {
|
714
|
+
if *i < 0 || *i > u32::MAX as i64 {
|
715
|
+
return Err(MagnusError::new(
|
716
|
+
magnus::exception::range_error(),
|
717
|
+
format!("Integer {} is out of range for UInt32", i),
|
718
|
+
));
|
719
|
+
}
|
720
|
+
typed_builder.append_value(*i as u32)
|
721
|
+
}
|
722
|
+
ParquetValue::Null => typed_builder.append_null(),
|
723
|
+
other => {
|
724
|
+
return Err(MagnusError::new(
|
725
|
+
magnus::exception::type_error(),
|
726
|
+
format!("Expected UInt32, got {:?}", other),
|
727
|
+
))
|
728
|
+
}
|
632
729
|
}
|
633
|
-
|
634
|
-
|
730
|
+
}
|
731
|
+
Ok(())
|
732
|
+
}
|
733
|
+
ParquetSchemaType::UInt64 => {
|
734
|
+
let typed_builder = builder
|
735
|
+
.as_any_mut()
|
736
|
+
.downcast_mut::<UInt64Builder>()
|
737
|
+
.expect("Builder mismatch: expected UInt64Builder");
|
738
|
+
for val in values {
|
739
|
+
match val {
|
740
|
+
ParquetValue::UInt64(u) => typed_builder.append_value(*u),
|
741
|
+
// Handle Int64 that could be a UInt64
|
742
|
+
ParquetValue::Int64(i) => {
|
743
|
+
if *i < 0 {
|
744
|
+
return Err(MagnusError::new(
|
745
|
+
magnus::exception::range_error(),
|
746
|
+
format!("Integer {} is out of range for UInt64", i),
|
747
|
+
));
|
748
|
+
}
|
749
|
+
typed_builder.append_value(*i as u64)
|
750
|
+
}
|
751
|
+
ParquetValue::Null => typed_builder.append_null(),
|
752
|
+
other => {
|
753
|
+
return Err(MagnusError::new(
|
754
|
+
magnus::exception::type_error(),
|
755
|
+
format!("Expected UInt64, got {:?}", other),
|
756
|
+
))
|
757
|
+
}
|
635
758
|
}
|
636
|
-
|
637
|
-
|
759
|
+
}
|
760
|
+
Ok(())
|
761
|
+
}
|
762
|
+
ParquetSchemaType::Float => {
|
763
|
+
let typed_builder = builder
|
764
|
+
.as_any_mut()
|
765
|
+
.downcast_mut::<Float32Builder>()
|
766
|
+
.expect("Builder mismatch: expected Float32Builder");
|
767
|
+
for val in values {
|
768
|
+
match val {
|
769
|
+
ParquetValue::Float32(f) => typed_builder.append_value(*f),
|
770
|
+
ParquetValue::Float16(fh) => typed_builder.append_value(*fh),
|
771
|
+
ParquetValue::Null => typed_builder.append_null(),
|
772
|
+
other => {
|
773
|
+
return Err(MagnusError::new(
|
774
|
+
magnus::exception::type_error(),
|
775
|
+
format!("Expected Float32, got {:?}", other),
|
776
|
+
))
|
777
|
+
}
|
638
778
|
}
|
639
|
-
|
640
|
-
|
779
|
+
}
|
780
|
+
Ok(())
|
781
|
+
}
|
782
|
+
ParquetSchemaType::Double => {
|
783
|
+
let typed_builder = builder
|
784
|
+
.as_any_mut()
|
785
|
+
.downcast_mut::<Float64Builder>()
|
786
|
+
.expect("Builder mismatch: expected Float64Builder");
|
787
|
+
for val in values {
|
788
|
+
match val {
|
789
|
+
ParquetValue::Float64(f) => typed_builder.append_value(*f),
|
790
|
+
// If you want to allow f32 => f64, do so:
|
791
|
+
ParquetValue::Float32(flo) => typed_builder.append_value(*flo as f64),
|
792
|
+
ParquetValue::Null => typed_builder.append_null(),
|
793
|
+
other => {
|
794
|
+
return Err(MagnusError::new(
|
795
|
+
magnus::exception::type_error(),
|
796
|
+
format!("Expected Float64, got {:?}", other),
|
797
|
+
))
|
798
|
+
}
|
641
799
|
}
|
642
|
-
|
643
|
-
|
800
|
+
}
|
801
|
+
Ok(())
|
802
|
+
}
|
803
|
+
ParquetSchemaType::Boolean => {
|
804
|
+
let typed_builder = builder
|
805
|
+
.as_any_mut()
|
806
|
+
.downcast_mut::<BooleanBuilder>()
|
807
|
+
.expect("Builder mismatch: expected BooleanBuilder");
|
808
|
+
for val in values {
|
809
|
+
match val {
|
810
|
+
ParquetValue::Boolean(b) => typed_builder.append_value(*b),
|
811
|
+
ParquetValue::Null => typed_builder.append_null(),
|
812
|
+
other => {
|
813
|
+
return Err(MagnusError::new(
|
814
|
+
magnus::exception::type_error(),
|
815
|
+
format!("Expected Boolean, got {:?}", other),
|
816
|
+
))
|
817
|
+
}
|
644
818
|
}
|
645
|
-
|
646
|
-
|
819
|
+
}
|
820
|
+
Ok(())
|
821
|
+
}
|
822
|
+
ParquetSchemaType::Date32 => {
|
823
|
+
let typed_builder = builder
|
824
|
+
.as_any_mut()
|
825
|
+
.downcast_mut::<Date32Builder>()
|
826
|
+
.expect("Builder mismatch: expected Date32Builder");
|
827
|
+
for val in values {
|
828
|
+
match val {
|
829
|
+
ParquetValue::Date32(d) => typed_builder.append_value(*d),
|
830
|
+
ParquetValue::Null => typed_builder.append_null(),
|
831
|
+
other => {
|
832
|
+
return Err(MagnusError::new(
|
833
|
+
magnus::exception::type_error(),
|
834
|
+
format!("Expected Date32, got {:?}", other),
|
835
|
+
))
|
836
|
+
}
|
647
837
|
}
|
648
|
-
|
649
|
-
|
838
|
+
}
|
839
|
+
Ok(())
|
840
|
+
}
|
841
|
+
ParquetSchemaType::TimestampMillis => {
|
842
|
+
let typed_builder = builder
|
843
|
+
.as_any_mut()
|
844
|
+
.downcast_mut::<TimestampMillisecondBuilder>()
|
845
|
+
.expect("Builder mismatch: expected TimestampMillisecondBuilder");
|
846
|
+
for val in values {
|
847
|
+
match val {
|
848
|
+
ParquetValue::TimestampMillis(ts, _tz) => typed_builder.append_value(*ts),
|
849
|
+
ParquetValue::Null => typed_builder.append_null(),
|
850
|
+
other => {
|
851
|
+
return Err(MagnusError::new(
|
852
|
+
magnus::exception::type_error(),
|
853
|
+
format!("Expected TimestampMillis, got {:?}", other),
|
854
|
+
))
|
855
|
+
}
|
650
856
|
}
|
651
|
-
|
652
|
-
|
857
|
+
}
|
858
|
+
Ok(())
|
859
|
+
}
|
860
|
+
ParquetSchemaType::TimestampMicros => {
|
861
|
+
let typed_builder = builder
|
862
|
+
.as_any_mut()
|
863
|
+
.downcast_mut::<TimestampMicrosecondBuilder>()
|
864
|
+
.expect("Builder mismatch: expected TimestampMicrosecondBuilder");
|
865
|
+
for val in values {
|
866
|
+
match val {
|
867
|
+
ParquetValue::TimestampMicros(ts, _tz) => typed_builder.append_value(*ts),
|
868
|
+
ParquetValue::Null => typed_builder.append_null(),
|
869
|
+
other => {
|
870
|
+
return Err(MagnusError::new(
|
871
|
+
magnus::exception::type_error(),
|
872
|
+
format!("Expected TimestampMicros, got {:?}", other),
|
873
|
+
))
|
874
|
+
}
|
653
875
|
}
|
654
|
-
|
655
|
-
|
876
|
+
}
|
877
|
+
Ok(())
|
878
|
+
}
|
879
|
+
|
880
|
+
// ------------------
|
881
|
+
// NESTED LIST - using helper function
|
882
|
+
// ------------------
|
883
|
+
ParquetSchemaType::List(list_field) => {
|
884
|
+
fill_list_builder(builder, &list_field.item_type, values)
|
885
|
+
}
|
886
|
+
|
887
|
+
// ------------------
|
888
|
+
// NESTED MAP - using helper function
|
889
|
+
// ------------------
|
890
|
+
ParquetSchemaType::Map(map_field) => {
|
891
|
+
fill_map_builder(builder, &map_field.key_type, &map_field.value_type, values)
|
892
|
+
}
|
893
|
+
|
894
|
+
// ------------------
|
895
|
+
// OTHER TYPES - keep as is for now
|
896
|
+
// ------------------
|
897
|
+
ParquetSchemaType::String => {
|
898
|
+
let typed_builder = builder
|
899
|
+
.as_any_mut()
|
900
|
+
.downcast_mut::<StringBuilder>()
|
901
|
+
.expect("Builder mismatch: expected StringBuilder");
|
902
|
+
for val in values {
|
903
|
+
match val {
|
904
|
+
ParquetValue::String(s) => typed_builder.append_value(s),
|
905
|
+
ParquetValue::Null => typed_builder.append_null(),
|
906
|
+
other => {
|
907
|
+
return Err(MagnusError::new(
|
908
|
+
magnus::exception::type_error(),
|
909
|
+
format!("Expected String, got {:?}", other),
|
910
|
+
))
|
911
|
+
}
|
656
912
|
}
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
913
|
+
}
|
914
|
+
Ok(())
|
915
|
+
}
|
916
|
+
ParquetSchemaType::Binary => {
|
917
|
+
let typed_builder = builder
|
918
|
+
.as_any_mut()
|
919
|
+
.downcast_mut::<BinaryBuilder>()
|
920
|
+
.expect("Builder mismatch: expected BinaryBuilder");
|
921
|
+
for val in values {
|
922
|
+
match val {
|
923
|
+
ParquetValue::Bytes(b) => typed_builder.append_value(&b),
|
924
|
+
ParquetValue::Null => typed_builder.append_null(),
|
925
|
+
other => {
|
926
|
+
return Err(MagnusError::new(
|
927
|
+
magnus::exception::type_error(),
|
928
|
+
format!("Expected Binary, got {:?}", other),
|
929
|
+
))
|
930
|
+
}
|
662
931
|
}
|
663
|
-
}
|
932
|
+
}
|
933
|
+
Ok(())
|
934
|
+
}
|
935
|
+
ParquetSchemaType::Struct(struct_field) => {
|
936
|
+
let typed_builder = builder
|
937
|
+
.as_any_mut()
|
938
|
+
.downcast_mut::<StructBuilder>()
|
939
|
+
.expect("Builder mismatch: expected StructBuilder");
|
664
940
|
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
ParquetValue::
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
ParquetValue::
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
941
|
+
for val in values {
|
942
|
+
match val {
|
943
|
+
ParquetValue::Null => {
|
944
|
+
// null struct
|
945
|
+
typed_builder.append(false);
|
946
|
+
}
|
947
|
+
ParquetValue::Map(map_data) => {
|
948
|
+
for (i, field) in struct_field.fields.iter().enumerate() {
|
949
|
+
let field_key = ParquetValue::String(field.name.clone());
|
950
|
+
if let Some(field_val) = map_data.get(&field_key) {
|
951
|
+
match field_val {
|
952
|
+
ParquetValue::Int8(x) => typed_builder
|
953
|
+
.field_builder::<Int8Builder>(i)
|
954
|
+
.ok_or_else(|| {
|
955
|
+
MagnusError::new(
|
956
|
+
magnus::exception::type_error(),
|
957
|
+
"Failed to coerce into Int8Builder",
|
958
|
+
)
|
959
|
+
})?
|
960
|
+
.append_value(*x),
|
961
|
+
ParquetValue::Int16(x) => typed_builder
|
962
|
+
.field_builder::<Int16Builder>(i)
|
963
|
+
.ok_or_else(|| {
|
964
|
+
MagnusError::new(
|
965
|
+
magnus::exception::type_error(),
|
966
|
+
"Failed to coerce into Int16Builder",
|
967
|
+
)
|
968
|
+
})?
|
969
|
+
.append_value(*x),
|
970
|
+
ParquetValue::Int32(x) => typed_builder
|
971
|
+
.field_builder::<Int32Builder>(i)
|
972
|
+
.ok_or_else(|| {
|
973
|
+
MagnusError::new(
|
974
|
+
magnus::exception::type_error(),
|
975
|
+
"Failed to coerce into Int32Builder",
|
976
|
+
)
|
977
|
+
})?
|
978
|
+
.append_value(*x),
|
979
|
+
ParquetValue::Int64(x) => typed_builder
|
980
|
+
.field_builder::<Int64Builder>(i)
|
981
|
+
.ok_or_else(|| {
|
982
|
+
MagnusError::new(
|
983
|
+
magnus::exception::type_error(),
|
984
|
+
"Failed to coerce into Int64Builder",
|
985
|
+
)
|
986
|
+
})?
|
987
|
+
.append_value(*x),
|
988
|
+
ParquetValue::UInt8(x) => typed_builder
|
989
|
+
.field_builder::<UInt8Builder>(i)
|
990
|
+
.ok_or_else(|| {
|
991
|
+
MagnusError::new(
|
992
|
+
magnus::exception::type_error(),
|
993
|
+
"Failed to coerce into UInt8Builder",
|
994
|
+
)
|
995
|
+
})?
|
996
|
+
.append_value(*x),
|
997
|
+
ParquetValue::UInt16(x) => typed_builder
|
998
|
+
.field_builder::<UInt16Builder>(i)
|
999
|
+
.ok_or_else(|| {
|
1000
|
+
MagnusError::new(
|
1001
|
+
magnus::exception::type_error(),
|
1002
|
+
"Failed to coerce into UInt16Builder",
|
1003
|
+
)
|
1004
|
+
})?
|
1005
|
+
.append_value(*x),
|
1006
|
+
ParquetValue::UInt32(x) => typed_builder
|
1007
|
+
.field_builder::<UInt32Builder>(i)
|
1008
|
+
.ok_or_else(|| {
|
1009
|
+
MagnusError::new(
|
1010
|
+
magnus::exception::type_error(),
|
1011
|
+
"Failed to coerce into UInt32Builder",
|
1012
|
+
)
|
1013
|
+
})?
|
1014
|
+
.append_value(*x),
|
1015
|
+
ParquetValue::UInt64(x) => typed_builder
|
1016
|
+
.field_builder::<UInt64Builder>(i)
|
1017
|
+
.ok_or_else(|| {
|
1018
|
+
MagnusError::new(
|
1019
|
+
magnus::exception::type_error(),
|
1020
|
+
"Failed to coerce into UInt64Builder",
|
1021
|
+
)
|
1022
|
+
})?
|
1023
|
+
.append_value(*x),
|
1024
|
+
ParquetValue::Float16(_) => {
|
1025
|
+
return Err(MagnusError::new(
|
1026
|
+
magnus::exception::runtime_error(),
|
1027
|
+
"Float16 not supported",
|
1028
|
+
))
|
1029
|
+
}
|
1030
|
+
ParquetValue::Float32(x) => typed_builder
|
1031
|
+
.field_builder::<Float32Builder>(i)
|
1032
|
+
.ok_or_else(|| {
|
1033
|
+
MagnusError::new(
|
1034
|
+
magnus::exception::type_error(),
|
1035
|
+
"Failed to coerce into Float32Builder",
|
1036
|
+
)
|
1037
|
+
})?
|
1038
|
+
.append_value(*x),
|
1039
|
+
ParquetValue::Float64(x) => typed_builder
|
1040
|
+
.field_builder::<Float64Builder>(i)
|
1041
|
+
.ok_or_else(|| {
|
1042
|
+
MagnusError::new(
|
1043
|
+
magnus::exception::type_error(),
|
1044
|
+
"Failed to coerce into Float64Builder",
|
1045
|
+
)
|
1046
|
+
})?
|
1047
|
+
.append_value(*x),
|
1048
|
+
ParquetValue::Boolean(x) => typed_builder
|
1049
|
+
.field_builder::<BooleanBuilder>(i)
|
1050
|
+
.ok_or_else(|| {
|
1051
|
+
MagnusError::new(
|
1052
|
+
magnus::exception::type_error(),
|
1053
|
+
"Failed to coerce into BooleanBuilder",
|
1054
|
+
)
|
1055
|
+
})?
|
1056
|
+
.append_value(*x),
|
1057
|
+
ParquetValue::String(x) => typed_builder
|
1058
|
+
.field_builder::<StringBuilder>(i)
|
1059
|
+
.ok_or_else(|| {
|
1060
|
+
MagnusError::new(
|
1061
|
+
magnus::exception::type_error(),
|
1062
|
+
"Failed to coerce into StringBuilder",
|
1063
|
+
)
|
1064
|
+
})?
|
1065
|
+
.append_value(x),
|
1066
|
+
ParquetValue::Bytes(bytes) => typed_builder
|
1067
|
+
.field_builder::<BinaryBuilder>(i)
|
1068
|
+
.ok_or_else(|| {
|
1069
|
+
MagnusError::new(
|
1070
|
+
magnus::exception::type_error(),
|
1071
|
+
"Failed to coerce into BinaryBuilder",
|
1072
|
+
)
|
1073
|
+
})?
|
1074
|
+
.append_value(bytes),
|
1075
|
+
ParquetValue::Date32(x) => typed_builder
|
1076
|
+
.field_builder::<Date32Builder>(i)
|
1077
|
+
.ok_or_else(|| {
|
1078
|
+
MagnusError::new(
|
1079
|
+
magnus::exception::type_error(),
|
1080
|
+
"Failed to coerce into Date32Builder",
|
1081
|
+
)
|
1082
|
+
})?
|
1083
|
+
.append_value(*x),
|
1084
|
+
ParquetValue::Date64(x) => typed_builder
|
1085
|
+
.field_builder::<Date64Builder>(i)
|
1086
|
+
.ok_or_else(|| {
|
1087
|
+
MagnusError::new(
|
1088
|
+
magnus::exception::type_error(),
|
1089
|
+
"Failed to coerce into Date64Builder",
|
1090
|
+
)
|
1091
|
+
})?
|
1092
|
+
.append_value(*x),
|
1093
|
+
ParquetValue::TimestampSecond(x, _tz) => typed_builder
|
1094
|
+
.field_builder::<TimestampSecondBuilder>(i)
|
1095
|
+
.ok_or_else(|| {
|
1096
|
+
MagnusError::new(
|
1097
|
+
magnus::exception::type_error(),
|
1098
|
+
"Failed to coerce into TimestampSecondBuilder",
|
1099
|
+
)
|
1100
|
+
})?
|
1101
|
+
.append_value(*x),
|
1102
|
+
ParquetValue::TimestampMillis(x, _tz) => typed_builder
|
1103
|
+
.field_builder::<TimestampMillisecondBuilder>(i)
|
1104
|
+
.ok_or_else(|| {
|
1105
|
+
MagnusError::new(
|
1106
|
+
magnus::exception::type_error(),
|
1107
|
+
"Failed to coerce into TimestampMillisecondBuilder",
|
1108
|
+
)
|
1109
|
+
})?
|
1110
|
+
.append_value(*x),
|
1111
|
+
ParquetValue::TimestampMicros(x, _tz) => typed_builder
|
1112
|
+
.field_builder::<TimestampMicrosecondBuilder>(i)
|
1113
|
+
.ok_or_else(|| {
|
1114
|
+
MagnusError::new(
|
1115
|
+
magnus::exception::type_error(),
|
1116
|
+
"Failed to coerce into TimestampMicrosecondBuilder",
|
1117
|
+
)
|
1118
|
+
})?
|
1119
|
+
.append_value(*x),
|
1120
|
+
ParquetValue::TimestampNanos(x, _tz) => typed_builder
|
1121
|
+
.field_builder::<TimestampNanosecondBuilder>(i)
|
1122
|
+
.ok_or_else(|| {
|
1123
|
+
MagnusError::new(
|
1124
|
+
magnus::exception::type_error(),
|
1125
|
+
"Failed to coerce into TimestampNanosecondBuilder",
|
1126
|
+
)
|
1127
|
+
})?
|
1128
|
+
.append_value(*x),
|
1129
|
+
ParquetValue::List(items) => {
|
1130
|
+
let list_builder = typed_builder
|
1131
|
+
.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
|
1132
|
+
.ok_or_else(|| {
|
1133
|
+
MagnusError::new(
|
1134
|
+
magnus::exception::type_error(),
|
1135
|
+
"Failed to coerce into ListBuilder",
|
1136
|
+
)
|
1137
|
+
})?;
|
1138
|
+
fill_builder(
|
1139
|
+
list_builder.values(),
|
1140
|
+
&struct_field.fields[i].type_,
|
1141
|
+
items,
|
1142
|
+
)?;
|
1143
|
+
list_builder.append(true);
|
1144
|
+
}
|
1145
|
+
ParquetValue::Map(map_data) => {
|
1146
|
+
let maybe_map_builder = typed_builder
|
1147
|
+
.field_builder::<MapBuilder<
|
1148
|
+
Box<dyn ArrayBuilder>,
|
1149
|
+
Box<dyn ArrayBuilder>,
|
1150
|
+
>>(i);
|
1151
|
+
|
1152
|
+
if let Some(map_builder) = maybe_map_builder {
|
1153
|
+
fill_builder(
|
1154
|
+
map_builder,
|
1155
|
+
&struct_field.fields[i].type_,
|
1156
|
+
&[ParquetValue::Map(map_data.clone())],
|
1157
|
+
)?;
|
1158
|
+
map_builder.append(true).map_err(|e| {
|
1159
|
+
MagnusError::new(
|
1160
|
+
magnus::exception::runtime_error(),
|
1161
|
+
format!("Failed to append map: {}", e),
|
1162
|
+
)
|
1163
|
+
})?;
|
1164
|
+
} else {
|
1165
|
+
let child_struct_builder = typed_builder
|
1166
|
+
.field_builder::<StructBuilder>(i)
|
1167
|
+
.ok_or_else(|| {
|
1168
|
+
MagnusError::new(
|
1169
|
+
magnus::exception::type_error(),
|
1170
|
+
"Failed to coerce into StructBuilder",
|
1171
|
+
)
|
1172
|
+
})?;
|
1173
|
+
fill_builder(
|
1174
|
+
child_struct_builder,
|
1175
|
+
&struct_field.fields[i].type_,
|
1176
|
+
&[ParquetValue::Map(map_data.clone())],
|
1177
|
+
)?;
|
1178
|
+
}
|
1179
|
+
}
|
1180
|
+
ParquetValue::Null => match struct_field.fields[i].type_ {
|
1181
|
+
ParquetSchemaType::Int8 => typed_builder
|
1182
|
+
.field_builder::<Int8Builder>(i)
|
1183
|
+
.ok_or_else(|| {
|
1184
|
+
MagnusError::new(
|
1185
|
+
magnus::exception::type_error(),
|
1186
|
+
"Failed to coerce into Int8Builder",
|
1187
|
+
)
|
1188
|
+
})?
|
1189
|
+
.append_null(),
|
1190
|
+
ParquetSchemaType::Int16 => typed_builder
|
1191
|
+
.field_builder::<Int16Builder>(i)
|
1192
|
+
.ok_or_else(|| {
|
1193
|
+
MagnusError::new(
|
1194
|
+
magnus::exception::type_error(),
|
1195
|
+
"Failed to coerce into Int16Builder",
|
1196
|
+
)
|
1197
|
+
})?
|
1198
|
+
.append_null(),
|
1199
|
+
ParquetSchemaType::Int32 => typed_builder
|
1200
|
+
.field_builder::<Int32Builder>(i)
|
1201
|
+
.ok_or_else(|| {
|
1202
|
+
MagnusError::new(
|
1203
|
+
magnus::exception::type_error(),
|
1204
|
+
"Failed to coerce into Int32Builder",
|
1205
|
+
)
|
1206
|
+
})?
|
1207
|
+
.append_null(),
|
1208
|
+
ParquetSchemaType::Int64 => typed_builder
|
1209
|
+
.field_builder::<Int64Builder>(i)
|
1210
|
+
.ok_or_else(|| {
|
1211
|
+
MagnusError::new(
|
1212
|
+
magnus::exception::type_error(),
|
1213
|
+
"Failed to coerce into Int64Builder",
|
1214
|
+
)
|
1215
|
+
})?
|
1216
|
+
.append_null(),
|
1217
|
+
ParquetSchemaType::UInt8 => typed_builder
|
1218
|
+
.field_builder::<UInt8Builder>(i)
|
1219
|
+
.ok_or_else(|| {
|
1220
|
+
MagnusError::new(
|
1221
|
+
magnus::exception::type_error(),
|
1222
|
+
"Failed to coerce into UInt8Builder",
|
1223
|
+
)
|
1224
|
+
})?
|
1225
|
+
.append_null(),
|
1226
|
+
ParquetSchemaType::UInt16 => typed_builder
|
1227
|
+
.field_builder::<UInt16Builder>(i)
|
1228
|
+
.ok_or_else(|| {
|
1229
|
+
MagnusError::new(
|
1230
|
+
magnus::exception::type_error(),
|
1231
|
+
"Failed to coerce into UInt16Builder",
|
1232
|
+
)
|
1233
|
+
})?
|
1234
|
+
.append_null(),
|
1235
|
+
ParquetSchemaType::UInt32 => typed_builder
|
1236
|
+
.field_builder::<UInt32Builder>(i)
|
1237
|
+
.ok_or_else(|| {
|
1238
|
+
MagnusError::new(
|
1239
|
+
magnus::exception::type_error(),
|
1240
|
+
"Failed to coerce into UInt32Builder",
|
1241
|
+
)
|
1242
|
+
})?
|
1243
|
+
.append_null(),
|
1244
|
+
ParquetSchemaType::UInt64 => typed_builder
|
1245
|
+
.field_builder::<UInt64Builder>(i)
|
1246
|
+
.ok_or_else(|| {
|
1247
|
+
MagnusError::new(
|
1248
|
+
magnus::exception::type_error(),
|
1249
|
+
"Failed to coerce into UInt64Builder",
|
1250
|
+
)
|
1251
|
+
})?
|
1252
|
+
.append_null(),
|
1253
|
+
ParquetSchemaType::Float => typed_builder
|
1254
|
+
.field_builder::<Float32Builder>(i)
|
1255
|
+
.ok_or_else(|| {
|
1256
|
+
MagnusError::new(
|
1257
|
+
magnus::exception::type_error(),
|
1258
|
+
"Failed to coerce into Float32Builder",
|
1259
|
+
)
|
1260
|
+
})?
|
1261
|
+
.append_null(),
|
1262
|
+
ParquetSchemaType::Double => typed_builder
|
1263
|
+
.field_builder::<Float64Builder>(i)
|
1264
|
+
.ok_or_else(|| {
|
1265
|
+
MagnusError::new(
|
1266
|
+
magnus::exception::type_error(),
|
1267
|
+
"Failed to coerce into Float64Builder",
|
1268
|
+
)
|
1269
|
+
})?
|
1270
|
+
.append_null(),
|
1271
|
+
ParquetSchemaType::String => typed_builder
|
1272
|
+
.field_builder::<StringBuilder>(i)
|
1273
|
+
.ok_or_else(|| {
|
1274
|
+
MagnusError::new(
|
1275
|
+
magnus::exception::type_error(),
|
1276
|
+
"Failed to coerce into StringBuilder",
|
1277
|
+
)
|
1278
|
+
})?
|
1279
|
+
.append_null(),
|
1280
|
+
ParquetSchemaType::Binary => typed_builder
|
1281
|
+
.field_builder::<BinaryBuilder>(i)
|
1282
|
+
.ok_or_else(|| {
|
1283
|
+
MagnusError::new(
|
1284
|
+
magnus::exception::type_error(),
|
1285
|
+
"Failed to coerce into BinaryBuilder",
|
1286
|
+
)
|
1287
|
+
})?
|
1288
|
+
.append_null(),
|
1289
|
+
ParquetSchemaType::Boolean => typed_builder
|
1290
|
+
.field_builder::<BooleanBuilder>(i)
|
1291
|
+
.ok_or_else(|| {
|
1292
|
+
MagnusError::new(
|
1293
|
+
magnus::exception::type_error(),
|
1294
|
+
"Failed to coerce into BooleanBuilder",
|
1295
|
+
)
|
1296
|
+
})?
|
1297
|
+
.append_null(),
|
1298
|
+
ParquetSchemaType::Date32 => typed_builder
|
1299
|
+
.field_builder::<Date32Builder>(i)
|
1300
|
+
.ok_or_else(|| {
|
1301
|
+
MagnusError::new(
|
1302
|
+
magnus::exception::type_error(),
|
1303
|
+
"Failed to coerce into Date32Builder",
|
1304
|
+
)
|
1305
|
+
})?
|
1306
|
+
.append_null(),
|
1307
|
+
ParquetSchemaType::TimestampMillis => typed_builder
|
1308
|
+
.field_builder::<TimestampMillisecondBuilder>(i)
|
1309
|
+
.ok_or_else(|| {
|
1310
|
+
MagnusError::new(
|
1311
|
+
magnus::exception::type_error(),
|
1312
|
+
"Failed to coerce into TimestampMillisecondBuilder",
|
1313
|
+
)
|
1314
|
+
})?
|
1315
|
+
.append_null(),
|
1316
|
+
ParquetSchemaType::TimestampMicros => typed_builder
|
1317
|
+
.field_builder::<TimestampMicrosecondBuilder>(i)
|
1318
|
+
.ok_or_else(|| {
|
1319
|
+
MagnusError::new(
|
1320
|
+
magnus::exception::type_error(),
|
1321
|
+
"Failed to coerce into TimestampMicrosecondBuilder",
|
1322
|
+
)
|
1323
|
+
})?
|
1324
|
+
.append_null(),
|
1325
|
+
ParquetSchemaType::List(_) => typed_builder
|
1326
|
+
.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
|
1327
|
+
.ok_or_else(|| {
|
1328
|
+
MagnusError::new(
|
1329
|
+
magnus::exception::type_error(),
|
1330
|
+
"Failed to coerce into ListBuilder",
|
1331
|
+
)
|
1332
|
+
})?
|
1333
|
+
.append(false),
|
1334
|
+
ParquetSchemaType::Map(_) => {
|
1335
|
+
typed_builder
|
1336
|
+
.field_builder::<MapBuilder<
|
1337
|
+
Box<dyn ArrayBuilder>,
|
1338
|
+
Box<dyn ArrayBuilder>,
|
1339
|
+
>>(i)
|
1340
|
+
.ok_or_else(|| {
|
1341
|
+
MagnusError::new(
|
1342
|
+
magnus::exception::type_error(),
|
1343
|
+
"Failed to coerce into MapBuilder",
|
1344
|
+
)
|
1345
|
+
})?
|
1346
|
+
.append(false)
|
1347
|
+
.map_err(|e| {
|
1348
|
+
MagnusError::new(
|
1349
|
+
magnus::exception::runtime_error(),
|
1350
|
+
format!("Failed to append map: {}", e),
|
1351
|
+
)
|
1352
|
+
})?;
|
1353
|
+
}
|
1354
|
+
ParquetSchemaType::Struct(_) => typed_builder
|
1355
|
+
.field_builder::<StructBuilder>(i)
|
1356
|
+
.ok_or_else(|| {
|
1357
|
+
MagnusError::new(
|
1358
|
+
magnus::exception::type_error(),
|
1359
|
+
"Failed to coerce into StructBuilder",
|
1360
|
+
)
|
1361
|
+
})?
|
1362
|
+
.append_null(),
|
1363
|
+
},
|
789
1364
|
}
|
1365
|
+
} else {
|
1366
|
+
return Err(MagnusError::new(
|
1367
|
+
magnus::exception::type_error(),
|
1368
|
+
format!("Field {} not found in map", i),
|
1369
|
+
));
|
790
1370
|
}
|
791
1371
|
}
|
1372
|
+
typed_builder.append(true);
|
792
1373
|
}
|
793
|
-
|
794
|
-
_ => {
|
1374
|
+
other => {
|
795
1375
|
return Err(MagnusError::new(
|
796
1376
|
magnus::exception::type_error(),
|
797
|
-
format!("Expected
|
798
|
-
))
|
1377
|
+
format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
|
1378
|
+
));
|
799
1379
|
}
|
800
1380
|
}
|
801
1381
|
}
|
802
|
-
Ok(
|
803
|
-
}
|
804
|
-
ParquetSchemaType::Map(_map_field) => {
|
805
|
-
unimplemented!("Writing maps is not yet supported")
|
1382
|
+
Ok(())
|
806
1383
|
}
|
807
1384
|
}
|
808
1385
|
}
|
809
1386
|
|
1387
|
+
/// Creates a final Arrow array from a list of ParquetValues and a schema type.
|
1388
|
+
/// This is your "unified" way to handle any nesting level.
|
1389
|
+
pub fn convert_parquet_values_to_arrow(
|
1390
|
+
values: Vec<ParquetValue>,
|
1391
|
+
type_: &ParquetSchemaType,
|
1392
|
+
) -> Result<Arc<dyn Array>, ReaderError> {
|
1393
|
+
// Make sure we always have at least capacity 1 to avoid empty builders
|
1394
|
+
let capacity = if values.is_empty() { 1 } else { values.len() };
|
1395
|
+
let mut builder = create_arrow_builder_for_type(type_, Some(capacity))?;
|
1396
|
+
|
1397
|
+
fill_builder(&mut builder, type_, &values)?;
|
1398
|
+
|
1399
|
+
// Finish building the array
|
1400
|
+
let array = builder.finish();
|
1401
|
+
|
1402
|
+
Ok(Arc::new(array))
|
1403
|
+
}
|
1404
|
+
|
810
1405
|
pub fn convert_ruby_array_to_arrow(
|
811
1406
|
values: RArray,
|
812
1407
|
type_: &ParquetSchemaType,
|
813
|
-
) -> Result<Arc<dyn Array>,
|
1408
|
+
) -> Result<Arc<dyn Array>, ReaderError> {
|
814
1409
|
let mut parquet_values = Vec::with_capacity(values.len());
|
815
1410
|
for value in values {
|
816
1411
|
if value.is_nil() {
|
817
1412
|
parquet_values.push(ParquetValue::Null);
|
818
1413
|
continue;
|
819
1414
|
}
|
820
|
-
let parquet_value = ParquetValue::from_value(value, type_)?;
|
1415
|
+
let parquet_value = ParquetValue::from_value(value, type_, None)?;
|
821
1416
|
parquet_values.push(parquet_value);
|
822
1417
|
}
|
823
1418
|
convert_parquet_values_to_arrow(parquet_values, type_)
|