parquet 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,10 @@
1
1
  use std::str::FromStr;
2
+ use std::sync::Arc;
2
3
 
3
4
  use super::*;
5
+ use arrow_array::builder::MapFieldNames;
4
6
  use arrow_array::builder::*;
7
+ use arrow_schema::{DataType, Field, Fields, TimeUnit};
5
8
  use jiff::tz::{Offset, TimeZone};
6
9
  use magnus::{RArray, RString, TryConvert};
7
10
 
@@ -14,8 +17,7 @@ where
14
17
  T: TryConvert + FromStr,
15
18
  <T as FromStr>::Err: std::fmt::Display,
16
19
  {
17
- pub fn convert_with_string_fallback(value: Value) -> Result<T, MagnusError> {
18
- let ruby = unsafe { Ruby::get_unchecked() };
20
+ pub fn convert_with_string_fallback(ruby: &Ruby, value: Value) -> Result<T, MagnusError> {
19
21
  if value.is_kind_of(ruby.class_string()) {
20
22
  let s = String::try_convert(value)?;
21
23
  s.trim().parse::<T>().map_err(|e| {
@@ -30,8 +32,11 @@ where
30
32
  }
31
33
  }
32
34
 
33
- pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, MagnusError> {
34
- let ruby = unsafe { Ruby::get_unchecked() };
35
+ pub fn convert_to_date32(
36
+ ruby: &Ruby,
37
+ value: Value,
38
+ format: Option<&str>,
39
+ ) -> Result<i32, MagnusError> {
35
40
  if value.is_kind_of(ruby.class_string()) {
36
41
  let s = String::try_convert(value)?;
37
42
  // Parse string into Date using jiff
@@ -64,14 +69,19 @@ pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, Magn
64
69
 
65
70
  let x = timestamp
66
71
  .to_zoned(TimeZone::fixed(Offset::constant(0)))
67
- .unwrap()
72
+ .map_err(|e| {
73
+ MagnusError::new(
74
+ magnus::exception::type_error(),
75
+ format!("Failed to convert date32 to timestamp: {}", e),
76
+ )
77
+ })?
68
78
  .timestamp();
69
79
 
70
80
  // Convert to epoch days
71
81
  Ok((x.as_second() as i64 / 86400) as i32)
72
82
  } else if value.is_kind_of(ruby.class_time()) {
73
83
  // Convert Time object to epoch days
74
- let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
84
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
75
85
  Ok(((secs as f64) / 86400.0) as i32)
76
86
  } else {
77
87
  Err(MagnusError::new(
@@ -81,8 +91,11 @@ pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, Magn
81
91
  }
82
92
  }
83
93
 
84
- pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result<i64, MagnusError> {
85
- let ruby = unsafe { Ruby::get_unchecked() };
94
+ pub fn convert_to_timestamp_millis(
95
+ ruby: &Ruby,
96
+ value: Value,
97
+ format: Option<&str>,
98
+ ) -> Result<i64, MagnusError> {
86
99
  if value.is_kind_of(ruby.class_string()) {
87
100
  let s = String::try_convert(value)?;
88
101
  // Parse string into Timestamp using jiff
@@ -115,8 +128,8 @@ pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result
115
128
  Ok(timestamp.as_millisecond())
116
129
  } else if value.is_kind_of(ruby.class_time()) {
117
130
  // Convert Time object to milliseconds
118
- let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
119
- let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
131
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
132
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
120
133
  Ok(secs * 1000 + (usecs / 1000))
121
134
  } else {
122
135
  Err(MagnusError::new(
@@ -128,8 +141,11 @@ pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result
128
141
  }
129
142
  }
130
143
 
131
- pub fn convert_to_timestamp_micros(value: Value, format: Option<&str>) -> Result<i64, MagnusError> {
132
- let ruby = unsafe { Ruby::get_unchecked() };
144
+ pub fn convert_to_timestamp_micros(
145
+ ruby: &Ruby,
146
+ value: Value,
147
+ format: Option<&str>,
148
+ ) -> Result<i64, MagnusError> {
133
149
  if value.is_kind_of(ruby.class_string()) {
134
150
  let s = String::try_convert(value)?;
135
151
  // Parse string into Timestamp using jiff
@@ -162,8 +178,8 @@ pub fn convert_to_timestamp_micros(value: Value, format: Option<&str>) -> Result
162
178
  Ok(timestamp.as_microsecond())
163
179
  } else if value.is_kind_of(ruby.class_time()) {
164
180
  // Convert Time object to microseconds
165
- let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
166
- let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
181
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
182
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
167
183
  Ok(secs * 1_000_000 + usecs)
168
184
  } else {
169
185
  Err(MagnusError::new(
@@ -179,8 +195,7 @@ pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
179
195
  Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
180
196
  }
181
197
 
182
- pub fn convert_to_boolean(value: Value) -> Result<bool, MagnusError> {
183
- let ruby = unsafe { Ruby::get_unchecked() };
198
+ pub fn convert_to_boolean(ruby: &Ruby, value: Value) -> Result<bool, MagnusError> {
184
199
  if value.is_kind_of(ruby.class_string()) {
185
200
  let s = String::try_convert(value)?;
186
201
  s.trim().parse::<bool>().map_err(|e| {
@@ -209,225 +224,79 @@ pub fn convert_to_string(value: Value) -> Result<String, MagnusError> {
209
224
  })
210
225
  }
211
226
 
212
- pub fn convert_to_list(
213
- value: Value,
214
- list_field: &ListField,
215
- ) -> Result<Vec<ParquetValue>, MagnusError> {
216
- let ruby = unsafe { Ruby::get_unchecked() };
217
- if value.is_kind_of(ruby.class_array()) {
218
- let array = RArray::from_value(value).ok_or_else(|| {
219
- MagnusError::new(magnus::exception::type_error(), "Invalid list format")
220
- })?;
221
-
222
- let mut values = Vec::with_capacity(array.len());
223
- for item_value in array.into_iter() {
224
- let converted = match &list_field.item_type {
225
- ParquetSchemaType::Int8 => {
226
- let v = NumericConverter::<i8>::convert_with_string_fallback(item_value)?;
227
- ParquetValue::Int8(v)
228
- }
229
- ParquetSchemaType::Int16 => {
230
- let v = NumericConverter::<i16>::convert_with_string_fallback(item_value)?;
231
- ParquetValue::Int16(v)
232
- }
233
- ParquetSchemaType::Int32 => {
234
- let v = NumericConverter::<i32>::convert_with_string_fallback(item_value)?;
235
- ParquetValue::Int32(v)
236
- }
237
- ParquetSchemaType::Int64 => {
238
- let v = NumericConverter::<i64>::convert_with_string_fallback(item_value)?;
239
- ParquetValue::Int64(v)
240
- }
241
- ParquetSchemaType::UInt8 => {
242
- let v = NumericConverter::<u8>::convert_with_string_fallback(item_value)?;
243
- ParquetValue::UInt8(v)
244
- }
245
- ParquetSchemaType::UInt16 => {
246
- let v = NumericConverter::<u16>::convert_with_string_fallback(item_value)?;
247
- ParquetValue::UInt16(v)
248
- }
249
- ParquetSchemaType::UInt32 => {
250
- let v = NumericConverter::<u32>::convert_with_string_fallback(item_value)?;
251
- ParquetValue::UInt32(v)
252
- }
253
- ParquetSchemaType::UInt64 => {
254
- let v = NumericConverter::<u64>::convert_with_string_fallback(item_value)?;
255
- ParquetValue::UInt64(v)
256
- }
257
- ParquetSchemaType::Float => {
258
- let v = NumericConverter::<f32>::convert_with_string_fallback(item_value)?;
259
- ParquetValue::Float32(v)
260
- }
261
- ParquetSchemaType::Double => {
262
- let v = NumericConverter::<f64>::convert_with_string_fallback(item_value)?;
263
- ParquetValue::Float64(v)
264
- }
265
- ParquetSchemaType::String => {
266
- let v = String::try_convert(item_value)?;
267
- ParquetValue::String(v)
268
- }
269
- ParquetSchemaType::Binary => {
270
- let v = convert_to_binary(item_value)?;
271
- ParquetValue::Bytes(v)
272
- }
273
- ParquetSchemaType::Boolean => {
274
- let v = convert_to_boolean(item_value)?;
275
- ParquetValue::Boolean(v)
276
- }
277
- ParquetSchemaType::Date32 => {
278
- let v = convert_to_date32(item_value, list_field.format)?;
279
- ParquetValue::Date32(v)
280
- }
281
- ParquetSchemaType::TimestampMillis => {
282
- let v = convert_to_timestamp_millis(item_value, list_field.format)?;
283
- ParquetValue::TimestampMillis(v, None)
284
- }
285
- ParquetSchemaType::TimestampMicros => {
286
- let v = convert_to_timestamp_micros(item_value, list_field.format)?;
287
- ParquetValue::TimestampMicros(v, None)
288
- }
289
- ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
290
- return Err(MagnusError::new(
291
- magnus::exception::type_error(),
292
- "Nested lists and maps are not supported",
293
- ))
294
- }
295
- };
296
- values.push(converted);
227
+ /// Converts our custom `ParquetSchemaType` into an Arrow `DataType`.
228
+ /// This ensures proper nullability settings for nested types.
229
+ /// Converts a ParquetSchemaType to an Arrow DataType
230
+ pub fn parquet_schema_type_to_arrow_data_type(
231
+ schema_type: &ParquetSchemaType,
232
+ ) -> Result<DataType, MagnusError> {
233
+ Ok(match schema_type {
234
+ ParquetSchemaType::Primitive(primative) => match primative {
235
+ PrimitiveType::Int8 => DataType::Int8,
236
+ PrimitiveType::Int16 => DataType::Int16,
237
+ PrimitiveType::Int32 => DataType::Int32,
238
+ PrimitiveType::Int64 => DataType::Int64,
239
+ PrimitiveType::UInt8 => DataType::UInt8,
240
+ PrimitiveType::UInt16 => DataType::UInt16,
241
+ PrimitiveType::UInt32 => DataType::UInt32,
242
+ PrimitiveType::UInt64 => DataType::UInt64,
243
+ PrimitiveType::Float32 => DataType::Float32,
244
+ PrimitiveType::Float64 => DataType::Float64,
245
+ PrimitiveType::String => DataType::Utf8,
246
+ PrimitiveType::Binary => DataType::Binary,
247
+ PrimitiveType::Boolean => DataType::Boolean,
248
+ PrimitiveType::Date32 => DataType::Date32,
249
+ PrimitiveType::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None),
250
+ PrimitiveType::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None),
251
+ },
252
+ // For a List<T>, create a standard List in Arrow with nullable items
253
+ ParquetSchemaType::List(list_field) => {
254
+ let child_type = parquet_schema_type_to_arrow_data_type(&list_field.item_type)?;
255
+ // For a list, use empty field name to match expectations for schema_dsl test
256
+ // This is the critical fix for the schema_dsl test which expects an empty field name
257
+ // Use empty field name for all list field items - this is crucial for compatibility
258
+ DataType::List(Arc::new(Field::new(
259
+ "item",
260
+ child_type,
261
+ list_field.nullable,
262
+ )))
297
263
  }
298
- Ok(values)
299
- } else {
300
- Err(MagnusError::new(
301
- magnus::exception::type_error(),
302
- "Invalid list format",
303
- ))
304
- }
305
- }
306
-
307
- pub fn convert_to_map(
308
- value: Value,
309
- map_field: &MapField,
310
- ) -> Result<HashMap<ParquetValue, ParquetValue>, MagnusError> {
311
- let ruby = unsafe { Ruby::get_unchecked() };
312
- if value.is_kind_of(ruby.class_hash()) {
313
- let mut map = HashMap::new();
314
- let entries: Vec<(Value, Value)> = value.funcall("to_a", ())?;
315
-
316
- for (key, value) in entries {
317
- let key_value = match &map_field.key_type {
318
- ParquetSchemaType::String => {
319
- let v = String::try_convert(key)?;
320
- ParquetValue::String(v)
321
- }
322
- _ => {
323
- return Err(MagnusError::new(
324
- magnus::exception::type_error(),
325
- "Map keys must be strings",
326
- ))
327
- }
328
- };
329
264
 
330
- let value_value = match &map_field.value_type {
331
- ParquetSchemaType::Int8 => {
332
- let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
333
- ParquetValue::Int8(v)
334
- }
335
- ParquetSchemaType::Int16 => {
336
- let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
337
- ParquetValue::Int16(v)
338
- }
339
- ParquetSchemaType::Int32 => {
340
- let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
341
- ParquetValue::Int32(v)
342
- }
343
- ParquetSchemaType::Int64 => {
344
- let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
345
- ParquetValue::Int64(v)
346
- }
347
- ParquetSchemaType::UInt8 => {
348
- let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
349
- ParquetValue::UInt8(v)
350
- }
351
- ParquetSchemaType::UInt16 => {
352
- let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
353
- ParquetValue::UInt16(v)
354
- }
355
- ParquetSchemaType::UInt32 => {
356
- let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
357
- ParquetValue::UInt32(v)
358
- }
359
- ParquetSchemaType::UInt64 => {
360
- let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
361
- ParquetValue::UInt64(v)
362
- }
363
- ParquetSchemaType::Float => {
364
- let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
365
- ParquetValue::Float32(v)
366
- }
367
- ParquetSchemaType::Double => {
368
- let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
369
- ParquetValue::Float64(v)
370
- }
371
- ParquetSchemaType::String => {
372
- let v = String::try_convert(value)?;
373
- ParquetValue::String(v)
374
- }
375
- ParquetSchemaType::Binary => {
376
- let v = convert_to_binary(value)?;
377
- ParquetValue::Bytes(v)
378
- }
379
- ParquetSchemaType::Boolean => {
380
- let v = convert_to_boolean(value)?;
381
- ParquetValue::Boolean(v)
382
- }
383
- ParquetSchemaType::Date32 => {
384
- let v = convert_to_date32(value, map_field.format)?;
385
- ParquetValue::Date32(v)
386
- }
387
- ParquetSchemaType::TimestampMillis => {
388
- let v = convert_to_timestamp_millis(value, map_field.format)?;
389
- ParquetValue::TimestampMillis(v, None)
390
- }
391
- ParquetSchemaType::TimestampMicros => {
392
- let v = convert_to_timestamp_micros(value, map_field.format)?;
393
- ParquetValue::TimestampMicros(v, None)
394
- }
395
- ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
396
- return Err(MagnusError::new(
397
- magnus::exception::type_error(),
398
- "Map values cannot be lists or maps",
399
- ))
400
- }
401
- };
402
-
403
- map.insert(key_value, value_value);
265
+ // For a Map<K, V>, ensure entries field is non-nullable and key field is non-nullable
266
+ ParquetSchemaType::Map(map_field) => {
267
+ let key_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.key_type)?;
268
+ let value_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.value_type)?;
269
+ DataType::Map(
270
+ Arc::new(Field::new(
271
+ "entries",
272
+ DataType::Struct(Fields::from(vec![
273
+ Field::new("key", key_arrow_type, false), // key must be non-null
274
+ Field::new("value", value_arrow_type, true), // value can be null
275
+ ])),
276
+ /*nullable=*/ false, // crucial: entries must be non-nullable
277
+ )),
278
+ /*keys_sorted=*/ false,
279
+ )
404
280
  }
405
- Ok(map)
406
- } else {
407
- Err(MagnusError::new(
408
- magnus::exception::type_error(),
409
- "Invalid map format",
410
- ))
411
- }
412
- }
281
+ ParquetSchemaType::Struct(struct_field) => {
282
+ if struct_field.fields.is_empty() {
283
+ return Err(MagnusError::new(
284
+ magnus::exception::runtime_error(),
285
+ "Cannot create a struct with zero subfields (empty struct).",
286
+ ));
287
+ }
413
288
 
414
- macro_rules! impl_timestamp_to_arrow_conversion {
415
- ($values:expr, $builder_type:ty, $variant:ident) => {{
416
- let mut builder = <$builder_type>::with_capacity($values.len());
417
- for value in $values {
418
- match value {
419
- ParquetValue::$variant(v, _tz) => builder.append_value(v),
420
- ParquetValue::Null => builder.append_null(),
421
- _ => {
422
- return Err(MagnusError::new(
423
- magnus::exception::type_error(),
424
- format!("Expected {}, got {:?}", stringify!($variant), value),
425
- ))
426
- }
289
+ // Build arrow fields
290
+ let mut arrow_fields = Vec::with_capacity(struct_field.fields.len());
291
+
292
+ for field in &struct_field.fields {
293
+ let field_type = parquet_schema_type_to_arrow_data_type(&field.type_)?;
294
+ arrow_fields.push(Field::new(&field.name, field_type, true)); // All fields are nullable by default
427
295
  }
296
+
297
+ DataType::Struct(Fields::from(arrow_fields))
428
298
  }
429
- Ok(Arc::new(builder.finish()))
430
- }};
299
+ })
431
300
  }
432
301
 
433
302
  #[macro_export]
@@ -457,367 +326,1128 @@ macro_rules! impl_timestamp_array_conversion {
457
326
  }};
458
327
  }
459
328
 
460
- #[macro_export]
461
- macro_rules! impl_array_conversion {
462
- ($values:expr, $builder_type:ty, $variant:ident) => {{
463
- let mut builder = <$builder_type>::with_capacity($values.len());
464
- for value in $values {
465
- match value {
466
- ParquetValue::$variant(v) => builder.append_value(v),
467
- ParquetValue::Null => builder.append_null(),
468
- _ => {
469
- return Err(MagnusError::new(
470
- magnus::exception::type_error(),
471
- format!("Expected {}, got {:?}", stringify!($variant), value),
472
- ))
473
- }
474
- }
329
+ // Create the appropriate Arrow builder for a given ParquetSchemaType.
330
+ // We return a Box<dyn ArrayBuilder> so we can dynamically downcast.
331
+ fn create_arrow_builder_for_type(
332
+ type_: &ParquetSchemaType,
333
+ capacity: Option<usize>,
334
+ ) -> Result<Box<dyn ArrayBuilder>, ParquetGemError> {
335
+ let cap = capacity.unwrap_or(1); // Default to at least capacity 1 to avoid empty builders
336
+ match type_ {
337
+ ParquetSchemaType::Primitive(PrimitiveType::Int8) => {
338
+ Ok(Box::new(Int8Builder::with_capacity(cap)))
475
339
  }
476
- Ok(Arc::new(builder.finish()))
477
- }};
478
- ($values:expr, $builder_type:ty, $variant:ident, $capacity:expr) => {{
479
- let mut builder = <$builder_type>::with_capacity($values.len(), $capacity);
480
- for value in $values {
481
- match value {
482
- ParquetValue::$variant(v) => builder.append_value(v),
483
- ParquetValue::Null => builder.append_null(),
484
- _ => {
485
- return Err(MagnusError::new(
486
- magnus::exception::type_error(),
487
- format!("Expected {}, got {:?}", stringify!($variant), value),
488
- ))
489
- }
490
- }
340
+ ParquetSchemaType::Primitive(PrimitiveType::Int16) => {
341
+ Ok(Box::new(Int16Builder::with_capacity(cap)))
491
342
  }
492
- Ok(Arc::new(builder.finish()))
493
- }};
494
- }
343
+ ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
344
+ Ok(Box::new(Int32Builder::with_capacity(cap)))
345
+ }
346
+ ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
347
+ Ok(Box::new(Int64Builder::with_capacity(cap)))
348
+ }
349
+ ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
350
+ Ok(Box::new(UInt8Builder::with_capacity(cap)))
351
+ }
352
+ ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
353
+ Ok(Box::new(UInt16Builder::with_capacity(cap)))
354
+ }
355
+ ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
356
+ Ok(Box::new(UInt32Builder::with_capacity(cap)))
357
+ }
358
+ ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
359
+ Ok(Box::new(UInt64Builder::with_capacity(cap)))
360
+ }
361
+ ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
362
+ Ok(Box::new(Float32Builder::with_capacity(cap)))
363
+ }
364
+ ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
365
+ Ok(Box::new(Float64Builder::with_capacity(cap)))
366
+ }
367
+ ParquetSchemaType::Primitive(PrimitiveType::String) => {
368
+ Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
369
+ }
370
+ ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
371
+ Ok(Box::new(BinaryBuilder::with_capacity(cap, cap * 32)))
372
+ }
373
+ ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
374
+ Ok(Box::new(BooleanBuilder::with_capacity(cap)))
375
+ }
376
+ ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
377
+ Ok(Box::new(Date32Builder::with_capacity(cap)))
378
+ }
379
+ ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
380
+ Ok(Box::new(TimestampMillisecondBuilder::with_capacity(cap)))
381
+ }
382
+ ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
383
+ Ok(Box::new(TimestampMicrosecondBuilder::with_capacity(cap)))
384
+ }
385
+ ParquetSchemaType::List(list_field) => {
386
+ // For a list, we create a ListBuilder whose child builder is determined by item_type.
387
+ // Pass through capacity to ensure consistent sizing
388
+ let child_builder = create_arrow_builder_for_type(&list_field.item_type, Some(cap))?;
495
389
 
496
- #[macro_export]
497
- macro_rules! append_list_value {
498
- ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
499
- match (&$item_type, &$value) {
500
- ($item_type, $value_variant(v)) => {
501
- $list_builder
502
- .values()
503
- .as_any_mut()
504
- .downcast_mut::<$builder_type>()
505
- .unwrap()
506
- .append_value(v.clone());
507
- }
508
- (_, ParquetValue::Null) => {
509
- $list_builder.append_null();
390
+ // Ensure consistent builder capacity for lists
391
+ Ok(Box::new(ListBuilder::<Box<dyn ArrayBuilder>>::new(
392
+ child_builder,
393
+ )))
394
+ }
395
+ ParquetSchemaType::Map(map_field) => {
396
+ // A Map is physically a list<struct<key:..., value:...>> in Arrow.
397
+ // Pass through capacity to ensure consistent sizing
398
+ let key_builder = create_arrow_builder_for_type(&map_field.key_type, Some(cap))?;
399
+ let value_builder = create_arrow_builder_for_type(&map_field.value_type, Some(cap))?;
400
+
401
+ // Create a MapBuilder with explicit field names to ensure compatibility
402
+ Ok(Box::new(MapBuilder::<
403
+ Box<dyn ArrayBuilder>,
404
+ Box<dyn ArrayBuilder>,
405
+ >::new(
406
+ Some(MapFieldNames {
407
+ entry: "entries".to_string(),
408
+ key: "key".to_string(),
409
+ value: "value".to_string(),
410
+ }),
411
+ key_builder,
412
+ value_builder,
413
+ )))
414
+ }
415
+ ParquetSchemaType::Struct(struct_field) => {
416
+ // Check for empty struct immediately
417
+ if struct_field.fields.is_empty() {
418
+ return Err(MagnusError::new(
419
+ magnus::exception::runtime_error(),
420
+ "Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
421
+ ))?;
510
422
  }
511
- _ => {
423
+
424
+ // Create a child builder for each field in the struct
425
+ let mut child_field_builders = Vec::with_capacity(struct_field.fields.len());
426
+
427
+ // Get struct data type first to ensure field compatibility
428
+ let data_type = parquet_schema_type_to_arrow_data_type(type_)?;
429
+
430
+ // Make sure the data type is a struct
431
+ let arrow_fields = if let DataType::Struct(ref fields) = data_type {
432
+ fields.clone()
433
+ } else {
512
434
  return Err(MagnusError::new(
513
435
  magnus::exception::type_error(),
436
+ "Expected struct data type".to_string(),
437
+ ))?;
438
+ };
439
+
440
+ // Create builders for each child field with consistent capacity
441
+ for child in &struct_field.fields {
442
+ let sub_builder = create_arrow_builder_for_type(&child.type_, Some(cap))?;
443
+ child_field_builders.push(sub_builder);
444
+ }
445
+
446
+ // Make sure we have the right number of builders
447
+ if child_field_builders.len() != arrow_fields.len() {
448
+ return Err(MagnusError::new(
449
+ magnus::exception::runtime_error(),
514
450
  format!(
515
- "Type mismatch in list: expected {:?}, got {:?}",
516
- $item_type, $value
451
+ "Number of field builders ({}) doesn't match number of arrow fields ({})",
452
+ child_field_builders.len(),
453
+ arrow_fields.len()
517
454
  ),
518
- ))
455
+ ))?;
519
456
  }
457
+
458
+ // Create the StructBuilder with the fields and child builders
459
+ Ok(Box::new(StructBuilder::new(
460
+ arrow_fields,
461
+ child_field_builders,
462
+ )))
520
463
  }
521
- };
464
+ }
522
465
  }
523
466
 
524
- #[macro_export]
525
- macro_rules! append_list_value_copy {
526
- ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
527
- match (&$item_type, &$value) {
528
- ($item_type, $value_variant(v)) => {
529
- $list_builder
530
- .values()
531
- .as_any_mut()
532
- .downcast_mut::<$builder_type>()
533
- .unwrap()
534
- .append_value(*v);
467
+ // Fill primitive scalar Int8 values
468
+ fn fill_int8_builder(
469
+ builder: &mut dyn ArrayBuilder,
470
+ values: &[ParquetValue],
471
+ ) -> Result<(), MagnusError> {
472
+ let typed_builder = builder
473
+ .as_any_mut()
474
+ .downcast_mut::<Int8Builder>()
475
+ .expect("Builder mismatch: expected Int8Builder");
476
+ for val in values {
477
+ match val {
478
+ ParquetValue::Int8(i) => typed_builder.append_value(*i),
479
+ // Handle Int64 that could be an Int8
480
+ ParquetValue::Int64(i) => {
481
+ if *i < i8::MIN as i64 || *i > i8::MAX as i64 {
482
+ return Err(MagnusError::new(
483
+ magnus::exception::range_error(),
484
+ format!("Integer {} is out of range for Int8", i),
485
+ ));
486
+ }
487
+ typed_builder.append_value(*i as i8)
535
488
  }
536
- (_, ParquetValue::Null) => {
537
- $list_builder.append_null();
489
+ ParquetValue::Null => typed_builder.append_null(),
490
+ other => {
491
+ return Err(MagnusError::new(
492
+ magnus::exception::type_error(),
493
+ format!("Expected Int8, got {:?}", other),
494
+ ))
538
495
  }
539
- _ => {
496
+ }
497
+ }
498
+ Ok(())
499
+ }
500
+
501
+ // Fill primitive scalar Int16 values
502
+ fn fill_int16_builder(
503
+ builder: &mut dyn ArrayBuilder,
504
+ values: &[ParquetValue],
505
+ ) -> Result<(), MagnusError> {
506
+ let typed_builder = builder
507
+ .as_any_mut()
508
+ .downcast_mut::<Int16Builder>()
509
+ .expect("Builder mismatch: expected Int16Builder");
510
+ for val in values {
511
+ match val {
512
+ ParquetValue::Int16(i) => typed_builder.append_value(*i),
513
+ // Handle Int64 that could be an Int16
514
+ ParquetValue::Int64(i) => {
515
+ if *i < i16::MIN as i64 || *i > i16::MAX as i64 {
516
+ return Err(MagnusError::new(
517
+ magnus::exception::range_error(),
518
+ format!("Integer {} is out of range for Int16", i),
519
+ ));
520
+ }
521
+ typed_builder.append_value(*i as i16)
522
+ }
523
+ ParquetValue::Null => typed_builder.append_null(),
524
+ other => {
540
525
  return Err(MagnusError::new(
541
526
  magnus::exception::type_error(),
542
- format!(
543
- "Type mismatch in list: expected {:?}, got {:?}",
544
- $item_type, $value
545
- ),
527
+ format!("Expected Int16, got {:?}", other),
546
528
  ))
547
529
  }
548
530
  }
549
- };
531
+ }
532
+ Ok(())
550
533
  }
551
534
 
552
- #[macro_export]
553
- macro_rules! append_timestamp_list_value {
554
- ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
555
- match (&$item_type, &$value) {
556
- ($item_type, $value_variant(v, _tz)) => {
557
- $list_builder
558
- .values()
559
- .as_any_mut()
560
- .downcast_mut::<$builder_type>()
561
- .unwrap()
562
- .append_value(*v);
535
+ // Fill list values by recursively filling child items
536
+ fn fill_list_builder(
537
+ builder: &mut dyn ArrayBuilder,
538
+ item_type: &ParquetSchemaType,
539
+ values: &[ParquetValue],
540
+ ) -> Result<(), MagnusError> {
541
+ // We need to use a more specific type for ListBuilder to help Rust's type inference
542
+ let lb = builder
543
+ .as_any_mut()
544
+ .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
545
+ .expect("Builder mismatch: expected ListBuilder");
546
+
547
+ for val in values {
548
+ if let ParquetValue::Null = val {
549
+ // null list
550
+ lb.append(false);
551
+ } else if let ParquetValue::List(list_items) = val {
552
+ // First fill the child builder with the items
553
+ let values_builder = lb.values();
554
+ fill_builder(values_builder, item_type, list_items)?;
555
+ // Then finalize the list by calling append(true)
556
+ lb.append(true);
557
+ } else {
558
+ return Err(MagnusError::new(
559
+ magnus::exception::type_error(),
560
+ format!("Expected ParquetValue::List(...) or Null, got {:?}", val),
561
+ ));
562
+ }
563
+ }
564
+
565
+ Ok(())
566
+ }
567
+
568
+ // Fill map values by recursively filling key and value items
569
+ fn fill_map_builder(
570
+ builder: &mut dyn ArrayBuilder,
571
+ key_type: &ParquetSchemaType,
572
+ value_type: &ParquetSchemaType,
573
+ values: &[ParquetValue],
574
+ ) -> Result<(), MagnusError> {
575
+ let mb = builder
576
+ .as_any_mut()
577
+ .downcast_mut::<MapBuilder<Box<dyn ArrayBuilder>, Box<dyn ArrayBuilder>>>()
578
+ .expect("Builder mismatch: expected MapBuilder");
579
+
580
+ for val in values {
581
+ match val {
582
+ ParquetValue::Null => {
583
+ // null map
584
+ mb.append(false).map_err(|e| {
585
+ MagnusError::new(
586
+ magnus::exception::runtime_error(),
587
+ format!("Failed to append null to map: {}", e),
588
+ )
589
+ })?;
563
590
  }
564
- (_, ParquetValue::Null) => {
565
- $list_builder.append_null();
591
+ ParquetValue::Map(map_entries) => {
592
+ // First append all key-value pairs to the child arrays
593
+ for (k, v) in map_entries {
594
+ // Note: Arrow expects field names "key" and "value" (singular)
595
+ fill_builder(mb.keys(), key_type, &[k.clone()])?;
596
+ fill_builder(mb.values(), value_type, &[v.clone()])?;
597
+ }
598
+ // Then finalize the map by calling append(true)
599
+ mb.append(true).map_err(|e| {
600
+ MagnusError::new(
601
+ magnus::exception::runtime_error(),
602
+ format!("Failed to append map entry: {}", e),
603
+ )
604
+ })?;
566
605
  }
567
- _ => {
606
+ other => {
568
607
  return Err(MagnusError::new(
569
608
  magnus::exception::type_error(),
570
- format!(
571
- "Type mismatch in list: expected {:?}, got {:?}",
572
- $item_type, $value
573
- ),
609
+ format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
574
610
  ))
575
611
  }
576
612
  }
577
- };
613
+ }
614
+
615
+ Ok(())
578
616
  }
579
617
 
580
- pub fn convert_parquet_values_to_arrow(
581
- values: Vec<ParquetValue>,
618
+ // Append an entire slice of ParquetValue into the given Arrow builder.
619
+ // We do a `match` on the type for each item, recursing for nested list/map.
620
+ fn fill_builder(
621
+ builder: &mut dyn ArrayBuilder,
582
622
  type_: &ParquetSchemaType,
583
- ) -> Result<Arc<dyn Array>, MagnusError> {
623
+ values: &[ParquetValue],
624
+ ) -> Result<(), MagnusError> {
584
625
  match type_ {
585
- ParquetSchemaType::Int8 => impl_array_conversion!(values, Int8Builder, Int8),
586
- ParquetSchemaType::Int16 => impl_array_conversion!(values, Int16Builder, Int16),
587
- ParquetSchemaType::Int32 => impl_array_conversion!(values, Int32Builder, Int32),
588
- ParquetSchemaType::Int64 => impl_array_conversion!(values, Int64Builder, Int64),
589
- ParquetSchemaType::UInt8 => impl_array_conversion!(values, UInt8Builder, UInt8),
590
- ParquetSchemaType::UInt16 => impl_array_conversion!(values, UInt16Builder, UInt16),
591
- ParquetSchemaType::UInt32 => impl_array_conversion!(values, UInt32Builder, UInt32),
592
- ParquetSchemaType::UInt64 => impl_array_conversion!(values, UInt64Builder, UInt64),
593
- ParquetSchemaType::Float => impl_array_conversion!(values, Float32Builder, Float32),
594
- ParquetSchemaType::Double => impl_array_conversion!(values, Float64Builder, Float64),
595
- ParquetSchemaType::String => {
596
- impl_array_conversion!(values, StringBuilder, String, values.len() * 32)
597
- }
598
- ParquetSchemaType::Binary => {
599
- impl_array_conversion!(values, BinaryBuilder, Bytes, values.len() * 32)
600
- }
601
- ParquetSchemaType::Boolean => impl_array_conversion!(values, BooleanBuilder, Boolean),
602
- ParquetSchemaType::Date32 => impl_array_conversion!(values, Date32Builder, Date32),
603
- ParquetSchemaType::TimestampMillis => {
604
- impl_timestamp_to_arrow_conversion!(
605
- values,
606
- TimestampMillisecondBuilder,
607
- TimestampMillis
608
- )
626
+ // ------------------
627
+ // PRIMITIVE SCALARS - delegated to specialized helpers
628
+ // ------------------
629
+ ParquetSchemaType::Primitive(PrimitiveType::Int8) => fill_int8_builder(builder, values),
630
+ ParquetSchemaType::Primitive(PrimitiveType::Int16) => fill_int16_builder(builder, values),
631
+ ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
632
+ let typed_builder = builder
633
+ .as_any_mut()
634
+ .downcast_mut::<Int32Builder>()
635
+ .expect("Builder mismatch: expected Int32Builder");
636
+ for val in values {
637
+ match val {
638
+ ParquetValue::Int32(i) => typed_builder.append_value(*i),
639
+ ParquetValue::Date32(d) => typed_builder.append_value(*d), // if you allow date->int
640
+ // Handle the case where we have an Int64 in an Int32 field (common with Ruby Integers)
641
+ ParquetValue::Int64(i) => {
642
+ if *i < i32::MIN as i64 || *i > i32::MAX as i64 {
643
+ return Err(MagnusError::new(
644
+ magnus::exception::range_error(),
645
+ format!("Integer {} is out of range for Int32", i),
646
+ ));
647
+ }
648
+ typed_builder.append_value(*i as i32)
649
+ }
650
+ ParquetValue::Null => typed_builder.append_null(),
651
+ other => {
652
+ return Err(MagnusError::new(
653
+ magnus::exception::type_error(),
654
+ format!("Expected Int32, got {:?}", other),
655
+ ))
656
+ }
657
+ }
658
+ }
659
+ Ok(())
609
660
  }
610
- ParquetSchemaType::TimestampMicros => {
611
- impl_timestamp_to_arrow_conversion!(
612
- values,
613
- TimestampMicrosecondBuilder,
614
- TimestampMicros
615
- )
661
+ ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
662
+ let typed_builder = builder
663
+ .as_any_mut()
664
+ .downcast_mut::<Int64Builder>()
665
+ .expect("Builder mismatch: expected Int64Builder");
666
+ for val in values {
667
+ match val {
668
+ ParquetValue::Int64(i) => typed_builder.append_value(*i),
669
+ ParquetValue::Null => typed_builder.append_null(),
670
+ other => {
671
+ return Err(MagnusError::new(
672
+ magnus::exception::type_error(),
673
+ format!("Expected Int64, got {:?}", other),
674
+ ))
675
+ }
676
+ }
677
+ }
678
+ Ok(())
616
679
  }
617
- ParquetSchemaType::List(list_field) => {
618
- let value_builder = match list_field.item_type {
619
- ParquetSchemaType::Int8 => Box::new(Int8Builder::new()) as Box<dyn ArrayBuilder>,
620
- ParquetSchemaType::Int16 => Box::new(Int16Builder::new()) as Box<dyn ArrayBuilder>,
621
- ParquetSchemaType::Int32 => Box::new(Int32Builder::new()) as Box<dyn ArrayBuilder>,
622
- ParquetSchemaType::Int64 => Box::new(Int64Builder::new()) as Box<dyn ArrayBuilder>,
623
- ParquetSchemaType::UInt8 => Box::new(UInt8Builder::new()) as Box<dyn ArrayBuilder>,
624
- ParquetSchemaType::UInt16 => {
625
- Box::new(UInt16Builder::new()) as Box<dyn ArrayBuilder>
680
+ ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
681
+ let typed_builder = builder
682
+ .as_any_mut()
683
+ .downcast_mut::<UInt8Builder>()
684
+ .expect("Builder mismatch: expected UInt8Builder");
685
+ for val in values {
686
+ match val {
687
+ ParquetValue::UInt8(u) => typed_builder.append_value(*u),
688
+ // Handle Int64 that could be a UInt8
689
+ ParquetValue::Int64(i) => {
690
+ if *i < 0 || *i > u8::MAX as i64 {
691
+ return Err(MagnusError::new(
692
+ magnus::exception::range_error(),
693
+ format!("Integer {} is out of range for UInt8", i),
694
+ ));
695
+ }
696
+ typed_builder.append_value(*i as u8)
697
+ }
698
+ ParquetValue::Null => typed_builder.append_null(),
699
+ other => {
700
+ return Err(MagnusError::new(
701
+ magnus::exception::type_error(),
702
+ format!("Expected UInt8, got {:?}", other),
703
+ ))
704
+ }
626
705
  }
627
- ParquetSchemaType::UInt32 => {
628
- Box::new(UInt32Builder::new()) as Box<dyn ArrayBuilder>
706
+ }
707
+ Ok(())
708
+ }
709
+ ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
710
+ let typed_builder = builder
711
+ .as_any_mut()
712
+ .downcast_mut::<UInt16Builder>()
713
+ .expect("Builder mismatch: expected UInt16Builder");
714
+ for val in values {
715
+ match val {
716
+ ParquetValue::UInt16(u) => typed_builder.append_value(*u),
717
+ // Handle Int64 that could be a UInt16
718
+ ParquetValue::Int64(i) => {
719
+ if *i < 0 || *i > u16::MAX as i64 {
720
+ return Err(MagnusError::new(
721
+ magnus::exception::range_error(),
722
+ format!("Integer {} is out of range for UInt16", i),
723
+ ));
724
+ }
725
+ typed_builder.append_value(*i as u16)
726
+ }
727
+ ParquetValue::Null => typed_builder.append_null(),
728
+ other => {
729
+ return Err(MagnusError::new(
730
+ magnus::exception::type_error(),
731
+ format!("Expected UInt16, got {:?}", other),
732
+ ))
733
+ }
629
734
  }
630
- ParquetSchemaType::UInt64 => {
631
- Box::new(UInt64Builder::new()) as Box<dyn ArrayBuilder>
735
+ }
736
+ Ok(())
737
+ }
738
+ ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
739
+ let typed_builder = builder
740
+ .as_any_mut()
741
+ .downcast_mut::<UInt32Builder>()
742
+ .expect("Builder mismatch: expected UInt32Builder");
743
+ for val in values {
744
+ match val {
745
+ ParquetValue::UInt32(u) => typed_builder.append_value(*u),
746
+ // Handle Int64 that could be a UInt32
747
+ ParquetValue::Int64(i) => {
748
+ if *i < 0 || *i > u32::MAX as i64 {
749
+ return Err(MagnusError::new(
750
+ magnus::exception::range_error(),
751
+ format!("Integer {} is out of range for UInt32", i),
752
+ ));
753
+ }
754
+ typed_builder.append_value(*i as u32)
755
+ }
756
+ ParquetValue::Null => typed_builder.append_null(),
757
+ other => {
758
+ return Err(MagnusError::new(
759
+ magnus::exception::type_error(),
760
+ format!("Expected UInt32, got {:?}", other),
761
+ ))
762
+ }
632
763
  }
633
- ParquetSchemaType::Float => {
634
- Box::new(Float32Builder::new()) as Box<dyn ArrayBuilder>
764
+ }
765
+ Ok(())
766
+ }
767
+ ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
768
+ let typed_builder = builder
769
+ .as_any_mut()
770
+ .downcast_mut::<UInt64Builder>()
771
+ .expect("Builder mismatch: expected UInt64Builder");
772
+ for val in values {
773
+ match val {
774
+ ParquetValue::UInt64(u) => typed_builder.append_value(*u),
775
+ // Handle Int64 that could be a UInt64
776
+ ParquetValue::Int64(i) => {
777
+ if *i < 0 {
778
+ return Err(MagnusError::new(
779
+ magnus::exception::range_error(),
780
+ format!("Integer {} is out of range for UInt64", i),
781
+ ));
782
+ }
783
+ typed_builder.append_value(*i as u64)
784
+ }
785
+ ParquetValue::Null => typed_builder.append_null(),
786
+ other => {
787
+ return Err(MagnusError::new(
788
+ magnus::exception::type_error(),
789
+ format!("Expected UInt64, got {:?}", other),
790
+ ))
791
+ }
635
792
  }
636
- ParquetSchemaType::Double => {
637
- Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>
793
+ }
794
+ Ok(())
795
+ }
796
+ ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
797
+ let typed_builder = builder
798
+ .as_any_mut()
799
+ .downcast_mut::<Float32Builder>()
800
+ .expect("Builder mismatch: expected Float32Builder");
801
+ for val in values {
802
+ match val {
803
+ ParquetValue::Float32(f) => typed_builder.append_value(*f),
804
+ ParquetValue::Float16(fh) => typed_builder.append_value(*fh),
805
+ ParquetValue::Null => typed_builder.append_null(),
806
+ other => {
807
+ return Err(MagnusError::new(
808
+ magnus::exception::type_error(),
809
+ format!("Expected Float32, got {:?}", other),
810
+ ))
811
+ }
638
812
  }
639
- ParquetSchemaType::String => {
640
- Box::new(StringBuilder::new()) as Box<dyn ArrayBuilder>
813
+ }
814
+ Ok(())
815
+ }
816
+ ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
817
+ let typed_builder = builder
818
+ .as_any_mut()
819
+ .downcast_mut::<Float64Builder>()
820
+ .expect("Builder mismatch: expected Float64Builder");
821
+ for val in values {
822
+ match val {
823
+ ParquetValue::Float64(f) => typed_builder.append_value(*f),
824
+ // If you want to allow f32 => f64, do so:
825
+ ParquetValue::Float32(flo) => typed_builder.append_value(*flo as f64),
826
+ ParquetValue::Null => typed_builder.append_null(),
827
+ other => {
828
+ return Err(MagnusError::new(
829
+ magnus::exception::type_error(),
830
+ format!("Expected Float64, got {:?}", other),
831
+ ))
832
+ }
641
833
  }
642
- ParquetSchemaType::Binary => {
643
- Box::new(BinaryBuilder::new()) as Box<dyn ArrayBuilder>
834
+ }
835
+ Ok(())
836
+ }
837
+ ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
838
+ let typed_builder = builder
839
+ .as_any_mut()
840
+ .downcast_mut::<BooleanBuilder>()
841
+ .expect("Builder mismatch: expected BooleanBuilder");
842
+ for val in values {
843
+ match val {
844
+ ParquetValue::Boolean(b) => typed_builder.append_value(*b),
845
+ ParquetValue::Null => typed_builder.append_null(),
846
+ other => {
847
+ return Err(MagnusError::new(
848
+ magnus::exception::type_error(),
849
+ format!("Expected Boolean, got {:?}", other),
850
+ ))
851
+ }
644
852
  }
645
- ParquetSchemaType::Boolean => {
646
- Box::new(BooleanBuilder::new()) as Box<dyn ArrayBuilder>
853
+ }
854
+ Ok(())
855
+ }
856
+ ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
857
+ let typed_builder = builder
858
+ .as_any_mut()
859
+ .downcast_mut::<Date32Builder>()
860
+ .expect("Builder mismatch: expected Date32Builder");
861
+ for val in values {
862
+ match val {
863
+ ParquetValue::Date32(d) => typed_builder.append_value(*d),
864
+ ParquetValue::Null => typed_builder.append_null(),
865
+ other => {
866
+ return Err(MagnusError::new(
867
+ magnus::exception::type_error(),
868
+ format!("Expected Date32, got {:?}", other),
869
+ ))
870
+ }
647
871
  }
648
- ParquetSchemaType::Date32 => {
649
- Box::new(Date32Builder::new()) as Box<dyn ArrayBuilder>
872
+ }
873
+ Ok(())
874
+ }
875
+ ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
876
+ let typed_builder = builder
877
+ .as_any_mut()
878
+ .downcast_mut::<TimestampMillisecondBuilder>()
879
+ .expect("Builder mismatch: expected TimestampMillisecondBuilder");
880
+ for val in values {
881
+ match val {
882
+ ParquetValue::TimestampMillis(ts, _tz) => typed_builder.append_value(*ts),
883
+ ParquetValue::Null => typed_builder.append_null(),
884
+ other => {
885
+ return Err(MagnusError::new(
886
+ magnus::exception::type_error(),
887
+ format!("Expected TimestampMillis, got {:?}", other),
888
+ ))
889
+ }
650
890
  }
651
- ParquetSchemaType::TimestampMillis => {
652
- Box::new(TimestampMillisecondBuilder::new()) as Box<dyn ArrayBuilder>
891
+ }
892
+ Ok(())
893
+ }
894
+ ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
895
+ let typed_builder = builder
896
+ .as_any_mut()
897
+ .downcast_mut::<TimestampMicrosecondBuilder>()
898
+ .expect("Builder mismatch: expected TimestampMicrosecondBuilder");
899
+ for val in values {
900
+ match val {
901
+ ParquetValue::TimestampMicros(ts, _tz) => typed_builder.append_value(*ts),
902
+ ParquetValue::Null => typed_builder.append_null(),
903
+ other => {
904
+ return Err(MagnusError::new(
905
+ magnus::exception::type_error(),
906
+ format!("Expected TimestampMicros, got {:?}", other),
907
+ ))
908
+ }
653
909
  }
654
- ParquetSchemaType::TimestampMicros => {
655
- Box::new(TimestampMicrosecondBuilder::new()) as Box<dyn ArrayBuilder>
910
+ }
911
+ Ok(())
912
+ }
913
+
914
+ // ------------------
915
+ // NESTED LIST - using helper function
916
+ // ------------------
917
+ ParquetSchemaType::List(list_field) => {
918
+ fill_list_builder(builder, &list_field.item_type, values)
919
+ }
920
+
921
+ // ------------------
922
+ // NESTED MAP - using helper function
923
+ // ------------------
924
+ ParquetSchemaType::Map(map_field) => {
925
+ fill_map_builder(builder, &map_field.key_type, &map_field.value_type, values)
926
+ }
927
+
928
+ // ------------------
929
+ // OTHER TYPES - keep as is for now
930
+ // ------------------
931
+ ParquetSchemaType::Primitive(PrimitiveType::String) => {
932
+ let typed_builder = builder
933
+ .as_any_mut()
934
+ .downcast_mut::<StringBuilder>()
935
+ .expect("Builder mismatch: expected StringBuilder");
936
+ for val in values {
937
+ match val {
938
+ ParquetValue::String(s) => typed_builder.append_value(s),
939
+ ParquetValue::Null => typed_builder.append_null(),
940
+ other => {
941
+ return Err(MagnusError::new(
942
+ magnus::exception::type_error(),
943
+ format!("Expected String, got {:?}", other),
944
+ ))
945
+ }
656
946
  }
657
- ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
658
- return Err(MagnusError::new(
659
- magnus::exception::type_error(),
660
- "Nested lists and maps are not supported",
661
- ))
947
+ }
948
+ Ok(())
949
+ }
950
+ ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
951
+ let typed_builder = builder
952
+ .as_any_mut()
953
+ .downcast_mut::<BinaryBuilder>()
954
+ .expect("Builder mismatch: expected BinaryBuilder");
955
+ for val in values {
956
+ match val {
957
+ ParquetValue::Bytes(b) => typed_builder.append_value(&b),
958
+ ParquetValue::Null => typed_builder.append_null(),
959
+ other => {
960
+ return Err(MagnusError::new(
961
+ magnus::exception::type_error(),
962
+ format!("Expected Binary, got {:?}", other),
963
+ ))
964
+ }
662
965
  }
663
- };
966
+ }
967
+ Ok(())
968
+ }
969
+ ParquetSchemaType::Struct(struct_field) => {
970
+ let typed_builder = builder
971
+ .as_any_mut()
972
+ .downcast_mut::<StructBuilder>()
973
+ .expect("Builder mismatch: expected StructBuilder");
664
974
 
665
- let mut list_builder = ListBuilder::new(value_builder);
666
-
667
- for value in values {
668
- match value {
669
- ParquetValue::List(items) => {
670
- for item in items {
671
- match &list_field.item_type {
672
- ParquetSchemaType::Int8 => append_list_value_copy!(
673
- list_builder,
674
- ParquetSchemaType::Int8,
675
- item,
676
- Int8Builder,
677
- ParquetValue::Int8
678
- ),
679
- ParquetSchemaType::Int16 => append_list_value_copy!(
680
- list_builder,
681
- ParquetSchemaType::Int16,
682
- item,
683
- Int16Builder,
684
- ParquetValue::Int16
685
- ),
686
- ParquetSchemaType::Int32 => append_list_value_copy!(
687
- list_builder,
688
- ParquetSchemaType::Int32,
689
- item,
690
- Int32Builder,
691
- ParquetValue::Int32
692
- ),
693
- ParquetSchemaType::Int64 => append_list_value_copy!(
694
- list_builder,
695
- ParquetSchemaType::Int64,
696
- item,
697
- Int64Builder,
698
- ParquetValue::Int64
699
- ),
700
- ParquetSchemaType::UInt8 => append_list_value_copy!(
701
- list_builder,
702
- ParquetSchemaType::UInt8,
703
- item,
704
- UInt8Builder,
705
- ParquetValue::UInt8
706
- ),
707
- ParquetSchemaType::UInt16 => append_list_value_copy!(
708
- list_builder,
709
- ParquetSchemaType::UInt16,
710
- item,
711
- UInt16Builder,
712
- ParquetValue::UInt16
713
- ),
714
- ParquetSchemaType::UInt32 => append_list_value_copy!(
715
- list_builder,
716
- ParquetSchemaType::UInt32,
717
- item,
718
- UInt32Builder,
719
- ParquetValue::UInt32
720
- ),
721
- ParquetSchemaType::UInt64 => append_list_value_copy!(
722
- list_builder,
723
- ParquetSchemaType::UInt64,
724
- item,
725
- UInt64Builder,
726
- ParquetValue::UInt64
727
- ),
728
- ParquetSchemaType::Float => append_list_value_copy!(
729
- list_builder,
730
- ParquetSchemaType::Float,
731
- item,
732
- Float32Builder,
733
- ParquetValue::Float32
734
- ),
735
- ParquetSchemaType::Double => append_list_value_copy!(
736
- list_builder,
737
- ParquetSchemaType::Double,
738
- item,
739
- Float64Builder,
740
- ParquetValue::Float64
741
- ),
742
- ParquetSchemaType::String => append_list_value!(
743
- list_builder,
744
- ParquetSchemaType::String,
745
- item,
746
- StringBuilder,
747
- ParquetValue::String
748
- ),
749
- ParquetSchemaType::Binary => append_list_value!(
750
- list_builder,
751
- ParquetSchemaType::Binary,
752
- item,
753
- BinaryBuilder,
754
- ParquetValue::Bytes
755
- ),
756
- ParquetSchemaType::Boolean => append_list_value_copy!(
757
- list_builder,
758
- ParquetSchemaType::Boolean,
759
- item,
760
- BooleanBuilder,
761
- ParquetValue::Boolean
762
- ),
763
- ParquetSchemaType::Date32 => append_list_value_copy!(
764
- list_builder,
765
- ParquetSchemaType::Date32,
766
- item,
767
- Date32Builder,
768
- ParquetValue::Date32
769
- ),
770
- ParquetSchemaType::TimestampMillis => append_timestamp_list_value!(
771
- list_builder,
772
- ParquetSchemaType::TimestampMillis,
773
- item,
774
- TimestampMillisecondBuilder,
775
- ParquetValue::TimestampMillis
776
- ),
777
- ParquetSchemaType::TimestampMicros => append_timestamp_list_value!(
778
- list_builder,
779
- ParquetSchemaType::TimestampMicros,
780
- item,
781
- TimestampMicrosecondBuilder,
782
- ParquetValue::TimestampMicros
783
- ),
784
- ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
785
- return Err(MagnusError::new(
786
- magnus::exception::type_error(),
787
- "Nested lists and maps are not supported",
788
- ))
975
+ for val in values {
976
+ match val {
977
+ ParquetValue::Null => {
978
+ // null struct
979
+ typed_builder.append(false);
980
+ }
981
+ ParquetValue::Map(map_data) => {
982
+ for (i, field) in struct_field.fields.iter().enumerate() {
983
+ let field_key = ParquetValue::String(field.name.clone());
984
+ if let Some(field_val) = map_data.get(&field_key) {
985
+ match field_val {
986
+ ParquetValue::Int8(x) => typed_builder
987
+ .field_builder::<Int8Builder>(i)
988
+ .ok_or_else(|| {
989
+ MagnusError::new(
990
+ magnus::exception::type_error(),
991
+ "Failed to coerce into Int8Builder",
992
+ )
993
+ })?
994
+ .append_value(*x),
995
+ ParquetValue::Int16(x) => typed_builder
996
+ .field_builder::<Int16Builder>(i)
997
+ .ok_or_else(|| {
998
+ MagnusError::new(
999
+ magnus::exception::type_error(),
1000
+ "Failed to coerce into Int16Builder",
1001
+ )
1002
+ })?
1003
+ .append_value(*x),
1004
+ ParquetValue::Int32(x) => typed_builder
1005
+ .field_builder::<Int32Builder>(i)
1006
+ .ok_or_else(|| {
1007
+ MagnusError::new(
1008
+ magnus::exception::type_error(),
1009
+ "Failed to coerce into Int32Builder",
1010
+ )
1011
+ })?
1012
+ .append_value(*x),
1013
+ ParquetValue::Int64(x) => typed_builder
1014
+ .field_builder::<Int64Builder>(i)
1015
+ .ok_or_else(|| {
1016
+ MagnusError::new(
1017
+ magnus::exception::type_error(),
1018
+ "Failed to coerce into Int64Builder",
1019
+ )
1020
+ })?
1021
+ .append_value(*x),
1022
+ ParquetValue::UInt8(x) => typed_builder
1023
+ .field_builder::<UInt8Builder>(i)
1024
+ .ok_or_else(|| {
1025
+ MagnusError::new(
1026
+ magnus::exception::type_error(),
1027
+ "Failed to coerce into UInt8Builder",
1028
+ )
1029
+ })?
1030
+ .append_value(*x),
1031
+ ParquetValue::UInt16(x) => typed_builder
1032
+ .field_builder::<UInt16Builder>(i)
1033
+ .ok_or_else(|| {
1034
+ MagnusError::new(
1035
+ magnus::exception::type_error(),
1036
+ "Failed to coerce into UInt16Builder",
1037
+ )
1038
+ })?
1039
+ .append_value(*x),
1040
+ ParquetValue::UInt32(x) => typed_builder
1041
+ .field_builder::<UInt32Builder>(i)
1042
+ .ok_or_else(|| {
1043
+ MagnusError::new(
1044
+ magnus::exception::type_error(),
1045
+ "Failed to coerce into UInt32Builder",
1046
+ )
1047
+ })?
1048
+ .append_value(*x),
1049
+ ParquetValue::UInt64(x) => typed_builder
1050
+ .field_builder::<UInt64Builder>(i)
1051
+ .ok_or_else(|| {
1052
+ MagnusError::new(
1053
+ magnus::exception::type_error(),
1054
+ "Failed to coerce into UInt64Builder",
1055
+ )
1056
+ })?
1057
+ .append_value(*x),
1058
+ ParquetValue::Float16(_) => {
1059
+ return Err(MagnusError::new(
1060
+ magnus::exception::runtime_error(),
1061
+ "Float16 not supported",
1062
+ ))
1063
+ }
1064
+ ParquetValue::Float32(x) => typed_builder
1065
+ .field_builder::<Float32Builder>(i)
1066
+ .ok_or_else(|| {
1067
+ MagnusError::new(
1068
+ magnus::exception::type_error(),
1069
+ "Failed to coerce into Float32Builder",
1070
+ )
1071
+ })?
1072
+ .append_value(*x),
1073
+ ParquetValue::Float64(x) => typed_builder
1074
+ .field_builder::<Float64Builder>(i)
1075
+ .ok_or_else(|| {
1076
+ MagnusError::new(
1077
+ magnus::exception::type_error(),
1078
+ "Failed to coerce into Float64Builder",
1079
+ )
1080
+ })?
1081
+ .append_value(*x),
1082
+ ParquetValue::Boolean(x) => typed_builder
1083
+ .field_builder::<BooleanBuilder>(i)
1084
+ .ok_or_else(|| {
1085
+ MagnusError::new(
1086
+ magnus::exception::type_error(),
1087
+ "Failed to coerce into BooleanBuilder",
1088
+ )
1089
+ })?
1090
+ .append_value(*x),
1091
+ ParquetValue::String(x) => typed_builder
1092
+ .field_builder::<StringBuilder>(i)
1093
+ .ok_or_else(|| {
1094
+ MagnusError::new(
1095
+ magnus::exception::type_error(),
1096
+ "Failed to coerce into StringBuilder",
1097
+ )
1098
+ })?
1099
+ .append_value(x),
1100
+ ParquetValue::Bytes(bytes) => typed_builder
1101
+ .field_builder::<BinaryBuilder>(i)
1102
+ .ok_or_else(|| {
1103
+ MagnusError::new(
1104
+ magnus::exception::type_error(),
1105
+ "Failed to coerce into BinaryBuilder",
1106
+ )
1107
+ })?
1108
+ .append_value(bytes),
1109
+ ParquetValue::Date32(x) => typed_builder
1110
+ .field_builder::<Date32Builder>(i)
1111
+ .ok_or_else(|| {
1112
+ MagnusError::new(
1113
+ magnus::exception::type_error(),
1114
+ "Failed to coerce into Date32Builder",
1115
+ )
1116
+ })?
1117
+ .append_value(*x),
1118
+ ParquetValue::Date64(x) => typed_builder
1119
+ .field_builder::<Date64Builder>(i)
1120
+ .ok_or_else(|| {
1121
+ MagnusError::new(
1122
+ magnus::exception::type_error(),
1123
+ "Failed to coerce into Date64Builder",
1124
+ )
1125
+ })?
1126
+ .append_value(*x),
1127
+ ParquetValue::TimestampSecond(x, _tz) => typed_builder
1128
+ .field_builder::<TimestampSecondBuilder>(i)
1129
+ .ok_or_else(|| {
1130
+ MagnusError::new(
1131
+ magnus::exception::type_error(),
1132
+ "Failed to coerce into TimestampSecondBuilder",
1133
+ )
1134
+ })?
1135
+ .append_value(*x),
1136
+ ParquetValue::TimestampMillis(x, _tz) => typed_builder
1137
+ .field_builder::<TimestampMillisecondBuilder>(i)
1138
+ .ok_or_else(|| {
1139
+ MagnusError::new(
1140
+ magnus::exception::type_error(),
1141
+ "Failed to coerce into TimestampMillisecondBuilder",
1142
+ )
1143
+ })?
1144
+ .append_value(*x),
1145
+ ParquetValue::TimestampMicros(x, _tz) => typed_builder
1146
+ .field_builder::<TimestampMicrosecondBuilder>(i)
1147
+ .ok_or_else(|| {
1148
+ MagnusError::new(
1149
+ magnus::exception::type_error(),
1150
+ "Failed to coerce into TimestampMicrosecondBuilder",
1151
+ )
1152
+ })?
1153
+ .append_value(*x),
1154
+ ParquetValue::TimestampNanos(x, _tz) => typed_builder
1155
+ .field_builder::<TimestampNanosecondBuilder>(i)
1156
+ .ok_or_else(|| {
1157
+ MagnusError::new(
1158
+ magnus::exception::type_error(),
1159
+ "Failed to coerce into TimestampNanosecondBuilder",
1160
+ )
1161
+ })?
1162
+ .append_value(*x),
1163
+ ParquetValue::List(items) => {
1164
+ let list_builder = typed_builder
1165
+ .field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
1166
+ .ok_or_else(|| {
1167
+ MagnusError::new(
1168
+ magnus::exception::type_error(),
1169
+ "Failed to coerce into ListBuilder",
1170
+ )
1171
+ })?;
1172
+ fill_builder(
1173
+ list_builder.values(),
1174
+ &struct_field.fields[i].type_,
1175
+ items,
1176
+ )?;
1177
+ list_builder.append(true);
1178
+ }
1179
+ ParquetValue::Map(map_data) => {
1180
+ let maybe_map_builder = typed_builder
1181
+ .field_builder::<MapBuilder<
1182
+ Box<dyn ArrayBuilder>,
1183
+ Box<dyn ArrayBuilder>,
1184
+ >>(i);
1185
+
1186
+ if let Some(map_builder) = maybe_map_builder {
1187
+ fill_builder(
1188
+ map_builder,
1189
+ &struct_field.fields[i].type_,
1190
+ &[ParquetValue::Map(map_data.clone())],
1191
+ )?;
1192
+ map_builder.append(true).map_err(|e| {
1193
+ MagnusError::new(
1194
+ magnus::exception::runtime_error(),
1195
+ format!("Failed to append map: {}", e),
1196
+ )
1197
+ })?;
1198
+ } else {
1199
+ let child_struct_builder = typed_builder
1200
+ .field_builder::<StructBuilder>(i)
1201
+ .ok_or_else(|| {
1202
+ MagnusError::new(
1203
+ magnus::exception::type_error(),
1204
+ "Failed to coerce into StructBuilder",
1205
+ )
1206
+ })?;
1207
+ fill_builder(
1208
+ child_struct_builder,
1209
+ &struct_field.fields[i].type_,
1210
+ &[ParquetValue::Map(map_data.clone())],
1211
+ )?;
1212
+ }
1213
+ }
1214
+ ParquetValue::Null => match struct_field.fields[i].type_ {
1215
+ ParquetSchemaType::Primitive(PrimitiveType::Int8) => typed_builder
1216
+ .field_builder::<Int8Builder>(i)
1217
+ .ok_or_else(|| {
1218
+ MagnusError::new(
1219
+ magnus::exception::type_error(),
1220
+ "Failed to coerce into Int8Builder",
1221
+ )
1222
+ })?
1223
+ .append_null(),
1224
+ ParquetSchemaType::Primitive(PrimitiveType::Int16) => typed_builder
1225
+ .field_builder::<Int16Builder>(i)
1226
+ .ok_or_else(|| {
1227
+ MagnusError::new(
1228
+ magnus::exception::type_error(),
1229
+ "Failed to coerce into Int16Builder",
1230
+ )
1231
+ })?
1232
+ .append_null(),
1233
+ ParquetSchemaType::Primitive(PrimitiveType::Int32) => typed_builder
1234
+ .field_builder::<Int32Builder>(i)
1235
+ .ok_or_else(|| {
1236
+ MagnusError::new(
1237
+ magnus::exception::type_error(),
1238
+ "Failed to coerce into Int32Builder",
1239
+ )
1240
+ })?
1241
+ .append_null(),
1242
+ ParquetSchemaType::Primitive(PrimitiveType::Int64) => typed_builder
1243
+ .field_builder::<Int64Builder>(i)
1244
+ .ok_or_else(|| {
1245
+ MagnusError::new(
1246
+ magnus::exception::type_error(),
1247
+ "Failed to coerce into Int64Builder",
1248
+ )
1249
+ })?
1250
+ .append_null(),
1251
+ ParquetSchemaType::Primitive(PrimitiveType::UInt8) => typed_builder
1252
+ .field_builder::<UInt8Builder>(i)
1253
+ .ok_or_else(|| {
1254
+ MagnusError::new(
1255
+ magnus::exception::type_error(),
1256
+ "Failed to coerce into UInt8Builder",
1257
+ )
1258
+ })?
1259
+ .append_null(),
1260
+ ParquetSchemaType::Primitive(PrimitiveType::UInt16) => typed_builder
1261
+ .field_builder::<UInt16Builder>(i)
1262
+ .ok_or_else(|| {
1263
+ MagnusError::new(
1264
+ magnus::exception::type_error(),
1265
+ "Failed to coerce into UInt16Builder",
1266
+ )
1267
+ })?
1268
+ .append_null(),
1269
+ ParquetSchemaType::Primitive(PrimitiveType::UInt32) => typed_builder
1270
+ .field_builder::<UInt32Builder>(i)
1271
+ .ok_or_else(|| {
1272
+ MagnusError::new(
1273
+ magnus::exception::type_error(),
1274
+ "Failed to coerce into UInt32Builder",
1275
+ )
1276
+ })?
1277
+ .append_null(),
1278
+ ParquetSchemaType::Primitive(PrimitiveType::UInt64) => typed_builder
1279
+ .field_builder::<UInt64Builder>(i)
1280
+ .ok_or_else(|| {
1281
+ MagnusError::new(
1282
+ magnus::exception::type_error(),
1283
+ "Failed to coerce into UInt64Builder",
1284
+ )
1285
+ })?
1286
+ .append_null(),
1287
+ ParquetSchemaType::Primitive(PrimitiveType::Float32) => typed_builder
1288
+ .field_builder::<Float32Builder>(i)
1289
+ .ok_or_else(|| {
1290
+ MagnusError::new(
1291
+ magnus::exception::type_error(),
1292
+ "Failed to coerce into Float32Builder",
1293
+ )
1294
+ })?
1295
+ .append_null(),
1296
+ ParquetSchemaType::Primitive(PrimitiveType::Float64) => typed_builder
1297
+ .field_builder::<Float64Builder>(i)
1298
+ .ok_or_else(|| {
1299
+ MagnusError::new(
1300
+ magnus::exception::type_error(),
1301
+ "Failed to coerce into Float64Builder",
1302
+ )
1303
+ })?
1304
+ .append_null(),
1305
+ ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
1306
+ .field_builder::<StringBuilder>(i)
1307
+ .ok_or_else(|| {
1308
+ MagnusError::new(
1309
+ magnus::exception::type_error(),
1310
+ "Failed to coerce into StringBuilder",
1311
+ )
1312
+ })?
1313
+ .append_null(),
1314
+ ParquetSchemaType::Primitive(PrimitiveType::Binary) => typed_builder
1315
+ .field_builder::<BinaryBuilder>(i)
1316
+ .ok_or_else(|| {
1317
+ MagnusError::new(
1318
+ magnus::exception::type_error(),
1319
+ "Failed to coerce into BinaryBuilder",
1320
+ )
1321
+ })?
1322
+ .append_null(),
1323
+ ParquetSchemaType::Primitive(PrimitiveType::Boolean) => typed_builder
1324
+ .field_builder::<BooleanBuilder>(i)
1325
+ .ok_or_else(|| {
1326
+ MagnusError::new(
1327
+ magnus::exception::type_error(),
1328
+ "Failed to coerce into BooleanBuilder",
1329
+ )
1330
+ })?
1331
+ .append_null(),
1332
+ ParquetSchemaType::Primitive(PrimitiveType::Date32) => typed_builder
1333
+ .field_builder::<Date32Builder>(i)
1334
+ .ok_or_else(|| {
1335
+ MagnusError::new(
1336
+ magnus::exception::type_error(),
1337
+ "Failed to coerce into Date32Builder",
1338
+ )
1339
+ })?
1340
+ .append_null(),
1341
+ ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => typed_builder
1342
+ .field_builder::<TimestampMillisecondBuilder>(i)
1343
+ .ok_or_else(|| {
1344
+ MagnusError::new(
1345
+ magnus::exception::type_error(),
1346
+ "Failed to coerce into TimestampMillisecondBuilder",
1347
+ )
1348
+ })?
1349
+ .append_null(),
1350
+ ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => typed_builder
1351
+ .field_builder::<TimestampMicrosecondBuilder>(i)
1352
+ .ok_or_else(|| {
1353
+ MagnusError::new(
1354
+ magnus::exception::type_error(),
1355
+ "Failed to coerce into TimestampMicrosecondBuilder",
1356
+ )
1357
+ })?
1358
+ .append_null(),
1359
+ ParquetSchemaType::List(_) => typed_builder
1360
+ .field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
1361
+ .ok_or_else(|| {
1362
+ MagnusError::new(
1363
+ magnus::exception::type_error(),
1364
+ "Failed to coerce into ListBuilder",
1365
+ )
1366
+ })?
1367
+ .append(false),
1368
+ ParquetSchemaType::Map(_) => {
1369
+ typed_builder
1370
+ .field_builder::<MapBuilder<
1371
+ Box<dyn ArrayBuilder>,
1372
+ Box<dyn ArrayBuilder>,
1373
+ >>(i)
1374
+ .ok_or_else(|| {
1375
+ MagnusError::new(
1376
+ magnus::exception::type_error(),
1377
+ "Failed to coerce into MapBuilder",
1378
+ )
1379
+ })?
1380
+ .append(false)
1381
+ .map_err(|e| {
1382
+ MagnusError::new(
1383
+ magnus::exception::runtime_error(),
1384
+ format!("Failed to append map: {}", e),
1385
+ )
1386
+ })?;
1387
+ }
1388
+ ParquetSchemaType::Struct(_) => typed_builder
1389
+ .field_builder::<StructBuilder>(i)
1390
+ .ok_or_else(|| {
1391
+ MagnusError::new(
1392
+ magnus::exception::type_error(),
1393
+ "Failed to coerce into StructBuilder",
1394
+ )
1395
+ })?
1396
+ .append_null(),
1397
+ },
789
1398
  }
1399
+ } else {
1400
+ return Err(MagnusError::new(
1401
+ magnus::exception::type_error(),
1402
+ format!("Field {} not found in map", i),
1403
+ ));
790
1404
  }
791
1405
  }
1406
+ typed_builder.append(true);
792
1407
  }
793
- ParquetValue::Null => list_builder.append_null(),
794
- _ => {
1408
+ other => {
795
1409
  return Err(MagnusError::new(
796
1410
  magnus::exception::type_error(),
797
- format!("Expected List, got {:?}", value),
798
- ))
1411
+ format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
1412
+ ));
799
1413
  }
800
1414
  }
801
1415
  }
802
- Ok(Arc::new(list_builder.finish()))
803
- }
804
- ParquetSchemaType::Map(_map_field) => {
805
- unimplemented!("Writing maps is not yet supported")
1416
+ Ok(())
806
1417
  }
807
1418
  }
808
1419
  }
809
1420
 
1421
+ /// Creates a final Arrow array from a list of ParquetValues and a schema type.
1422
+ /// This is your "unified" way to handle any nesting level.
1423
+ pub fn convert_parquet_values_to_arrow(
1424
+ values: Vec<ParquetValue>,
1425
+ type_: &ParquetSchemaType,
1426
+ ) -> Result<Arc<dyn Array>, ParquetGemError> {
1427
+ // Make sure we always have at least capacity 1 to avoid empty builders
1428
+ let capacity = if values.is_empty() { 1 } else { values.len() };
1429
+ let mut builder = create_arrow_builder_for_type(type_, Some(capacity))?;
1430
+
1431
+ fill_builder(&mut builder, type_, &values)?;
1432
+
1433
+ // Finish building the array
1434
+ let array = builder.finish();
1435
+
1436
+ Ok(Arc::new(array))
1437
+ }
1438
+
810
1439
  pub fn convert_ruby_array_to_arrow(
1440
+ ruby: &Ruby,
811
1441
  values: RArray,
812
1442
  type_: &ParquetSchemaType,
813
- ) -> Result<Arc<dyn Array>, MagnusError> {
1443
+ ) -> Result<Arc<dyn Array>, ParquetGemError> {
814
1444
  let mut parquet_values = Vec::with_capacity(values.len());
815
1445
  for value in values {
816
1446
  if value.is_nil() {
817
1447
  parquet_values.push(ParquetValue::Null);
818
1448
  continue;
819
1449
  }
820
- let parquet_value = ParquetValue::from_value(value, type_)?;
1450
+ let parquet_value = ParquetValue::from_value(ruby, value, type_, None)?;
821
1451
  parquet_values.push(parquet_value);
822
1452
  }
823
1453
  convert_parquet_values_to_arrow(parquet_values, type_)