parquet 0.0.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,753 @@
1
+ use std::str::FromStr;
2
+
3
+ use super::*;
4
+ use arrow_array::builder::*;
5
+ use jiff::tz::{Offset, TimeZone};
6
+ use magnus::{RArray, TryConvert};
7
+
8
+ pub struct NumericConverter<T> {
9
+ _phantom: std::marker::PhantomData<T>,
10
+ }
11
+
12
+ impl<T> NumericConverter<T>
13
+ where
14
+ T: TryConvert + FromStr,
15
+ <T as FromStr>::Err: std::fmt::Display,
16
+ {
17
+ pub fn convert_with_string_fallback(value: Value) -> Result<T, MagnusError> {
18
+ let ruby = unsafe { Ruby::get_unchecked() };
19
+ if value.is_kind_of(ruby.class_string()) {
20
+ let s = String::try_convert(value)?;
21
+ s.trim().parse::<T>().map_err(|e| {
22
+ MagnusError::new(
23
+ magnus::exception::type_error(),
24
+ format!("Failed to parse '{}' as numeric: {}", s, e),
25
+ )
26
+ })
27
+ } else {
28
+ T::try_convert(value)
29
+ }
30
+ }
31
+ }
32
+
33
+ pub fn convert_to_date32(value: Value) -> Result<i32, MagnusError> {
34
+ let ruby = unsafe { Ruby::get_unchecked() };
35
+ if value.is_kind_of(ruby.class_string()) {
36
+ let s = String::try_convert(value)?;
37
+ // Parse string into Timestamp using jiff
38
+ let date: jiff::civil::Date = s.parse().map_err(|e| {
39
+ MagnusError::new(
40
+ magnus::exception::type_error(),
41
+ format!("Failed to parse '{}' as date32: {}", s, e),
42
+ )
43
+ })?;
44
+
45
+ let timestamp = date.at(0, 0, 0, 0);
46
+
47
+ let x = timestamp
48
+ .to_zoned(TimeZone::fixed(Offset::constant(0)))
49
+ .unwrap()
50
+ .timestamp();
51
+
52
+ // Convert to epoch days
53
+ Ok((x.as_second() as i64 / 86400) as i32)
54
+ } else if value.is_kind_of(ruby.class_time()) {
55
+ // Convert Time object to epoch days
56
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
57
+ Ok(((secs as f64) / 86400.0) as i32)
58
+ } else {
59
+ Err(MagnusError::new(
60
+ magnus::exception::type_error(),
61
+ format!("Cannot convert {} to date32", unsafe { value.classname() }),
62
+ ))
63
+ }
64
+ }
65
+
66
+ pub fn convert_to_timestamp_millis(value: Value) -> Result<i64, MagnusError> {
67
+ let ruby = unsafe { Ruby::get_unchecked() };
68
+ if value.is_kind_of(ruby.class_string()) {
69
+ let s = String::try_convert(value)?;
70
+ // Parse string into Timestamp using jiff
71
+ let timestamp: jiff::Timestamp = s.parse().map_err(|e| {
72
+ MagnusError::new(
73
+ magnus::exception::type_error(),
74
+ format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
75
+ )
76
+ })?;
77
+ // Convert to milliseconds
78
+ Ok(timestamp.as_millisecond())
79
+ } else if value.is_kind_of(ruby.class_time()) {
80
+ // Convert Time object to milliseconds
81
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
82
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
83
+ Ok(secs * 1000 + (usecs / 1000))
84
+ } else {
85
+ Err(MagnusError::new(
86
+ magnus::exception::type_error(),
87
+ format!("Cannot convert {} to timestamp_millis", unsafe {
88
+ value.classname()
89
+ }),
90
+ ))
91
+ }
92
+ }
93
+
94
+ pub fn convert_to_timestamp_micros(value: Value) -> Result<i64, MagnusError> {
95
+ let ruby = unsafe { Ruby::get_unchecked() };
96
+ if value.is_kind_of(ruby.class_string()) {
97
+ let s = String::try_convert(value)?;
98
+ // Parse string into Timestamp using jiff
99
+ let timestamp: jiff::Timestamp = s.parse().map_err(|e| {
100
+ MagnusError::new(
101
+ magnus::exception::type_error(),
102
+ format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
103
+ )
104
+ })?;
105
+ // Convert to microseconds
106
+ Ok(timestamp.as_microsecond())
107
+ } else if value.is_kind_of(ruby.class_time()) {
108
+ // Convert Time object to microseconds
109
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
110
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
111
+ Ok(secs * 1_000_000 + usecs)
112
+ } else {
113
+ Err(MagnusError::new(
114
+ magnus::exception::type_error(),
115
+ format!("Cannot convert {} to timestamp_micros", unsafe {
116
+ value.classname()
117
+ }),
118
+ ))
119
+ }
120
+ }
121
+
122
+ pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
123
+ Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
124
+ }
125
+
126
+ pub fn convert_to_boolean(value: Value) -> Result<bool, MagnusError> {
127
+ let ruby = unsafe { Ruby::get_unchecked() };
128
+ if value.is_kind_of(ruby.class_string()) {
129
+ let s = String::try_convert(value)?;
130
+ s.trim().parse::<bool>().map_err(|e| {
131
+ MagnusError::new(
132
+ magnus::exception::type_error(),
133
+ format!("Failed to parse '{}' as boolean: {}", s, e),
134
+ )
135
+ })
136
+ } else {
137
+ bool::try_convert(value)
138
+ }
139
+ }
140
+
141
+ pub fn convert_to_list(
142
+ value: Value,
143
+ list_field: &ListField,
144
+ ) -> Result<Vec<ParquetValue>, MagnusError> {
145
+ let ruby = unsafe { Ruby::get_unchecked() };
146
+ if value.is_kind_of(ruby.class_array()) {
147
+ let array = RArray::from_value(value).ok_or_else(|| {
148
+ MagnusError::new(magnus::exception::type_error(), "Invalid list format")
149
+ })?;
150
+
151
+ let mut values = Vec::with_capacity(array.len());
152
+ for item_value in array.into_iter() {
153
+ let converted = match &list_field.item_type {
154
+ ParquetSchemaType::Int8 => {
155
+ let v = NumericConverter::<i8>::convert_with_string_fallback(item_value)?;
156
+ ParquetValue::Int8(v)
157
+ }
158
+ ParquetSchemaType::Int16 => {
159
+ let v = NumericConverter::<i16>::convert_with_string_fallback(item_value)?;
160
+ ParquetValue::Int16(v)
161
+ }
162
+ ParquetSchemaType::Int32 => {
163
+ let v = NumericConverter::<i32>::convert_with_string_fallback(item_value)?;
164
+ ParquetValue::Int32(v)
165
+ }
166
+ ParquetSchemaType::Int64 => {
167
+ let v = NumericConverter::<i64>::convert_with_string_fallback(item_value)?;
168
+ ParquetValue::Int64(v)
169
+ }
170
+ ParquetSchemaType::UInt8 => {
171
+ let v = NumericConverter::<u8>::convert_with_string_fallback(item_value)?;
172
+ ParquetValue::UInt8(v)
173
+ }
174
+ ParquetSchemaType::UInt16 => {
175
+ let v = NumericConverter::<u16>::convert_with_string_fallback(item_value)?;
176
+ ParquetValue::UInt16(v)
177
+ }
178
+ ParquetSchemaType::UInt32 => {
179
+ let v = NumericConverter::<u32>::convert_with_string_fallback(item_value)?;
180
+ ParquetValue::UInt32(v)
181
+ }
182
+ ParquetSchemaType::UInt64 => {
183
+ let v = NumericConverter::<u64>::convert_with_string_fallback(item_value)?;
184
+ ParquetValue::UInt64(v)
185
+ }
186
+ ParquetSchemaType::Float => {
187
+ let v = NumericConverter::<f32>::convert_with_string_fallback(item_value)?;
188
+ ParquetValue::Float32(v)
189
+ }
190
+ ParquetSchemaType::Double => {
191
+ let v = NumericConverter::<f64>::convert_with_string_fallback(item_value)?;
192
+ ParquetValue::Float64(v)
193
+ }
194
+ ParquetSchemaType::String => {
195
+ let v = String::try_convert(item_value)?;
196
+ ParquetValue::String(v)
197
+ }
198
+ ParquetSchemaType::Binary => {
199
+ let v = convert_to_binary(item_value)?;
200
+ ParquetValue::Bytes(v)
201
+ }
202
+ ParquetSchemaType::Boolean => {
203
+ let v = convert_to_boolean(item_value)?;
204
+ ParquetValue::Boolean(v)
205
+ }
206
+ ParquetSchemaType::Date32 => {
207
+ let v = convert_to_date32(item_value)?;
208
+ ParquetValue::Date32(v)
209
+ }
210
+ ParquetSchemaType::TimestampMillis => {
211
+ let v = convert_to_timestamp_millis(item_value)?;
212
+ ParquetValue::TimestampMillis(v, None)
213
+ }
214
+ ParquetSchemaType::TimestampMicros => {
215
+ let v = convert_to_timestamp_micros(item_value)?;
216
+ ParquetValue::TimestampMicros(v, None)
217
+ }
218
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
219
+ return Err(MagnusError::new(
220
+ magnus::exception::type_error(),
221
+ "Nested lists and maps are not supported",
222
+ ))
223
+ }
224
+ };
225
+ values.push(converted);
226
+ }
227
+ Ok(values)
228
+ } else {
229
+ Err(MagnusError::new(
230
+ magnus::exception::type_error(),
231
+ "Invalid list format",
232
+ ))
233
+ }
234
+ }
235
+
236
+ pub fn convert_to_map(
237
+ value: Value,
238
+ map_field: &MapField,
239
+ ) -> Result<HashMap<ParquetValue, ParquetValue>, MagnusError> {
240
+ let ruby = unsafe { Ruby::get_unchecked() };
241
+ if value.is_kind_of(ruby.class_hash()) {
242
+ let mut map = HashMap::new();
243
+ let entries: Vec<(Value, Value)> = value.funcall("to_a", ())?;
244
+
245
+ for (key, value) in entries {
246
+ let key_value = match &map_field.key_type {
247
+ ParquetSchemaType::String => {
248
+ let v = String::try_convert(key)?;
249
+ ParquetValue::String(v)
250
+ }
251
+ _ => {
252
+ return Err(MagnusError::new(
253
+ magnus::exception::type_error(),
254
+ "Map keys must be strings",
255
+ ))
256
+ }
257
+ };
258
+
259
+ let value_value = match &map_field.value_type {
260
+ ParquetSchemaType::Int8 => {
261
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
262
+ ParquetValue::Int8(v)
263
+ }
264
+ ParquetSchemaType::Int16 => {
265
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
266
+ ParquetValue::Int16(v)
267
+ }
268
+ ParquetSchemaType::Int32 => {
269
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
270
+ ParquetValue::Int32(v)
271
+ }
272
+ ParquetSchemaType::Int64 => {
273
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
274
+ ParquetValue::Int64(v)
275
+ }
276
+ ParquetSchemaType::UInt8 => {
277
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
278
+ ParquetValue::UInt8(v)
279
+ }
280
+ ParquetSchemaType::UInt16 => {
281
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
282
+ ParquetValue::UInt16(v)
283
+ }
284
+ ParquetSchemaType::UInt32 => {
285
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
286
+ ParquetValue::UInt32(v)
287
+ }
288
+ ParquetSchemaType::UInt64 => {
289
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
290
+ ParquetValue::UInt64(v)
291
+ }
292
+ ParquetSchemaType::Float => {
293
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
294
+ ParquetValue::Float32(v)
295
+ }
296
+ ParquetSchemaType::Double => {
297
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
298
+ ParquetValue::Float64(v)
299
+ }
300
+ ParquetSchemaType::String => {
301
+ let v = String::try_convert(value)?;
302
+ ParquetValue::String(v)
303
+ }
304
+ ParquetSchemaType::Binary => {
305
+ let v = convert_to_binary(value)?;
306
+ ParquetValue::Bytes(v)
307
+ }
308
+ ParquetSchemaType::Boolean => {
309
+ let v = convert_to_boolean(value)?;
310
+ ParquetValue::Boolean(v)
311
+ }
312
+ ParquetSchemaType::Date32 => {
313
+ let v = convert_to_date32(value)?;
314
+ ParquetValue::Date32(v)
315
+ }
316
+ ParquetSchemaType::TimestampMillis => {
317
+ let v = convert_to_timestamp_millis(value)?;
318
+ ParquetValue::TimestampMillis(v, None)
319
+ }
320
+ ParquetSchemaType::TimestampMicros => {
321
+ let v = convert_to_timestamp_micros(value)?;
322
+ ParquetValue::TimestampMicros(v, None)
323
+ }
324
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
325
+ return Err(MagnusError::new(
326
+ magnus::exception::type_error(),
327
+ "Map values cannot be lists or maps",
328
+ ))
329
+ }
330
+ };
331
+
332
+ map.insert(key_value, value_value);
333
+ }
334
+ Ok(map)
335
+ } else {
336
+ Err(MagnusError::new(
337
+ magnus::exception::type_error(),
338
+ "Invalid map format",
339
+ ))
340
+ }
341
+ }
342
+
343
+ macro_rules! impl_timestamp_to_arrow_conversion {
344
+ ($values:expr, $builder_type:ty, $variant:ident) => {{
345
+ let mut builder = <$builder_type>::with_capacity($values.len());
346
+ for value in $values {
347
+ match value {
348
+ ParquetValue::$variant(v, _tz) => builder.append_value(v),
349
+ ParquetValue::Null => builder.append_null(),
350
+ _ => {
351
+ return Err(MagnusError::new(
352
+ magnus::exception::type_error(),
353
+ format!("Expected {}, got {:?}", stringify!($variant), value),
354
+ ))
355
+ }
356
+ }
357
+ }
358
+ Ok(Arc::new(builder.finish()))
359
+ }};
360
+ }
361
+
362
+ #[macro_export]
363
+ macro_rules! impl_timestamp_array_conversion {
364
+ ($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
365
+ let array = downcast_array::<$array_type>($column);
366
+ if array.is_nullable() {
367
+ array
368
+ .values()
369
+ .iter()
370
+ .enumerate()
371
+ .map(|(i, x)| {
372
+ if array.is_null(i) {
373
+ ParquetValue::Null
374
+ } else {
375
+ ParquetValue::$variant(*x, $tz.clone().map(|s| s.into()))
376
+ }
377
+ })
378
+ .collect()
379
+ } else {
380
+ array
381
+ .values()
382
+ .iter()
383
+ .map(|x| ParquetValue::$variant(*x, $tz.clone().map(|s| s.into())))
384
+ .collect()
385
+ }
386
+ }};
387
+ }
388
+
389
+ #[macro_export]
390
+ macro_rules! impl_array_conversion {
391
+ ($values:expr, $builder_type:ty, $variant:ident) => {{
392
+ let mut builder = <$builder_type>::with_capacity($values.len());
393
+ for value in $values {
394
+ match value {
395
+ ParquetValue::$variant(v) => builder.append_value(v),
396
+ ParquetValue::Null => builder.append_null(),
397
+ _ => {
398
+ return Err(MagnusError::new(
399
+ magnus::exception::type_error(),
400
+ format!("Expected {}, got {:?}", stringify!($variant), value),
401
+ ))
402
+ }
403
+ }
404
+ }
405
+ Ok(Arc::new(builder.finish()))
406
+ }};
407
+ ($values:expr, $builder_type:ty, $variant:ident, $capacity:expr) => {{
408
+ let mut builder = <$builder_type>::with_capacity($values.len(), $capacity);
409
+ for value in $values {
410
+ match value {
411
+ ParquetValue::$variant(v) => builder.append_value(v),
412
+ ParquetValue::Null => builder.append_null(),
413
+ _ => {
414
+ return Err(MagnusError::new(
415
+ magnus::exception::type_error(),
416
+ format!("Expected {}, got {:?}", stringify!($variant), value),
417
+ ))
418
+ }
419
+ }
420
+ }
421
+ Ok(Arc::new(builder.finish()))
422
+ }};
423
+ }
424
+
425
+ #[macro_export]
426
+ macro_rules! append_list_value {
427
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
428
+ match (&$item_type, &$value) {
429
+ ($item_type, $value_variant(v)) => {
430
+ $list_builder
431
+ .values()
432
+ .as_any_mut()
433
+ .downcast_mut::<$builder_type>()
434
+ .unwrap()
435
+ .append_value(v.clone());
436
+ }
437
+ (_, ParquetValue::Null) => {
438
+ $list_builder.append_null();
439
+ }
440
+ _ => {
441
+ return Err(MagnusError::new(
442
+ magnus::exception::type_error(),
443
+ format!(
444
+ "Type mismatch in list: expected {:?}, got {:?}",
445
+ $item_type, $value
446
+ ),
447
+ ))
448
+ }
449
+ }
450
+ };
451
+ }
452
+
453
+ #[macro_export]
454
+ macro_rules! append_list_value_copy {
455
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
456
+ match (&$item_type, &$value) {
457
+ ($item_type, $value_variant(v)) => {
458
+ $list_builder
459
+ .values()
460
+ .as_any_mut()
461
+ .downcast_mut::<$builder_type>()
462
+ .unwrap()
463
+ .append_value(*v);
464
+ }
465
+ (_, ParquetValue::Null) => {
466
+ $list_builder.append_null();
467
+ }
468
+ _ => {
469
+ return Err(MagnusError::new(
470
+ magnus::exception::type_error(),
471
+ format!(
472
+ "Type mismatch in list: expected {:?}, got {:?}",
473
+ $item_type, $value
474
+ ),
475
+ ))
476
+ }
477
+ }
478
+ };
479
+ }
480
+
481
+ #[macro_export]
482
+ macro_rules! append_timestamp_list_value {
483
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
484
+ match (&$item_type, &$value) {
485
+ ($item_type, $value_variant(v, _tz)) => {
486
+ $list_builder
487
+ .values()
488
+ .as_any_mut()
489
+ .downcast_mut::<$builder_type>()
490
+ .unwrap()
491
+ .append_value(*v);
492
+ }
493
+ (_, ParquetValue::Null) => {
494
+ $list_builder.append_null();
495
+ }
496
+ _ => {
497
+ return Err(MagnusError::new(
498
+ magnus::exception::type_error(),
499
+ format!(
500
+ "Type mismatch in list: expected {:?}, got {:?}",
501
+ $item_type, $value
502
+ ),
503
+ ))
504
+ }
505
+ }
506
+ };
507
+ }
508
+
509
+ pub fn convert_parquet_values_to_arrow(
510
+ values: Vec<ParquetValue>,
511
+ type_: &ParquetSchemaType,
512
+ ) -> Result<Arc<dyn Array>, MagnusError> {
513
+ match type_ {
514
+ ParquetSchemaType::Int8 => impl_array_conversion!(values, Int8Builder, Int8),
515
+ ParquetSchemaType::Int16 => impl_array_conversion!(values, Int16Builder, Int16),
516
+ ParquetSchemaType::Int32 => impl_array_conversion!(values, Int32Builder, Int32),
517
+ ParquetSchemaType::Int64 => impl_array_conversion!(values, Int64Builder, Int64),
518
+ ParquetSchemaType::UInt8 => impl_array_conversion!(values, UInt8Builder, UInt8),
519
+ ParquetSchemaType::UInt16 => impl_array_conversion!(values, UInt16Builder, UInt16),
520
+ ParquetSchemaType::UInt32 => impl_array_conversion!(values, UInt32Builder, UInt32),
521
+ ParquetSchemaType::UInt64 => impl_array_conversion!(values, UInt64Builder, UInt64),
522
+ ParquetSchemaType::Float => impl_array_conversion!(values, Float32Builder, Float32),
523
+ ParquetSchemaType::Double => impl_array_conversion!(values, Float64Builder, Float64),
524
+ ParquetSchemaType::String => {
525
+ impl_array_conversion!(values, StringBuilder, String, values.len() * 32)
526
+ }
527
+ ParquetSchemaType::Binary => {
528
+ impl_array_conversion!(values, BinaryBuilder, Bytes, values.len() * 32)
529
+ }
530
+ ParquetSchemaType::Boolean => impl_array_conversion!(values, BooleanBuilder, Boolean),
531
+ ParquetSchemaType::Date32 => impl_array_conversion!(values, Date32Builder, Date32),
532
+ ParquetSchemaType::TimestampMillis => {
533
+ impl_timestamp_to_arrow_conversion!(
534
+ values,
535
+ TimestampMillisecondBuilder,
536
+ TimestampMillis
537
+ )
538
+ }
539
+ ParquetSchemaType::TimestampMicros => {
540
+ impl_timestamp_to_arrow_conversion!(
541
+ values,
542
+ TimestampMicrosecondBuilder,
543
+ TimestampMicros
544
+ )
545
+ }
546
+ ParquetSchemaType::List(list_field) => {
547
+ let value_builder = match list_field.item_type {
548
+ ParquetSchemaType::Int8 => Box::new(Int8Builder::new()) as Box<dyn ArrayBuilder>,
549
+ ParquetSchemaType::Int16 => Box::new(Int16Builder::new()) as Box<dyn ArrayBuilder>,
550
+ ParquetSchemaType::Int32 => Box::new(Int32Builder::new()) as Box<dyn ArrayBuilder>,
551
+ ParquetSchemaType::Int64 => Box::new(Int64Builder::new()) as Box<dyn ArrayBuilder>,
552
+ ParquetSchemaType::UInt8 => Box::new(UInt8Builder::new()) as Box<dyn ArrayBuilder>,
553
+ ParquetSchemaType::UInt16 => {
554
+ Box::new(UInt16Builder::new()) as Box<dyn ArrayBuilder>
555
+ }
556
+ ParquetSchemaType::UInt32 => {
557
+ Box::new(UInt32Builder::new()) as Box<dyn ArrayBuilder>
558
+ }
559
+ ParquetSchemaType::UInt64 => {
560
+ Box::new(UInt64Builder::new()) as Box<dyn ArrayBuilder>
561
+ }
562
+ ParquetSchemaType::Float => {
563
+ Box::new(Float32Builder::new()) as Box<dyn ArrayBuilder>
564
+ }
565
+ ParquetSchemaType::Double => {
566
+ Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>
567
+ }
568
+ ParquetSchemaType::String => {
569
+ Box::new(StringBuilder::new()) as Box<dyn ArrayBuilder>
570
+ }
571
+ ParquetSchemaType::Binary => {
572
+ Box::new(BinaryBuilder::new()) as Box<dyn ArrayBuilder>
573
+ }
574
+ ParquetSchemaType::Boolean => {
575
+ Box::new(BooleanBuilder::new()) as Box<dyn ArrayBuilder>
576
+ }
577
+ ParquetSchemaType::Date32 => {
578
+ Box::new(Date32Builder::new()) as Box<dyn ArrayBuilder>
579
+ }
580
+ ParquetSchemaType::TimestampMillis => {
581
+ Box::new(TimestampMillisecondBuilder::new()) as Box<dyn ArrayBuilder>
582
+ }
583
+ ParquetSchemaType::TimestampMicros => {
584
+ Box::new(TimestampMicrosecondBuilder::new()) as Box<dyn ArrayBuilder>
585
+ }
586
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
587
+ return Err(MagnusError::new(
588
+ magnus::exception::type_error(),
589
+ "Nested lists and maps are not supported",
590
+ ))
591
+ }
592
+ };
593
+
594
+ let mut list_builder = ListBuilder::new(value_builder);
595
+
596
+ for value in values {
597
+ match value {
598
+ ParquetValue::List(items) => {
599
+ for item in items {
600
+ match &list_field.item_type {
601
+ ParquetSchemaType::Int8 => append_list_value_copy!(
602
+ list_builder,
603
+ ParquetSchemaType::Int8,
604
+ item,
605
+ Int8Builder,
606
+ ParquetValue::Int8
607
+ ),
608
+ ParquetSchemaType::Int16 => append_list_value_copy!(
609
+ list_builder,
610
+ ParquetSchemaType::Int16,
611
+ item,
612
+ Int16Builder,
613
+ ParquetValue::Int16
614
+ ),
615
+ ParquetSchemaType::Int32 => append_list_value_copy!(
616
+ list_builder,
617
+ ParquetSchemaType::Int32,
618
+ item,
619
+ Int32Builder,
620
+ ParquetValue::Int32
621
+ ),
622
+ ParquetSchemaType::Int64 => append_list_value_copy!(
623
+ list_builder,
624
+ ParquetSchemaType::Int64,
625
+ item,
626
+ Int64Builder,
627
+ ParquetValue::Int64
628
+ ),
629
+ ParquetSchemaType::UInt8 => append_list_value_copy!(
630
+ list_builder,
631
+ ParquetSchemaType::UInt8,
632
+ item,
633
+ UInt8Builder,
634
+ ParquetValue::UInt8
635
+ ),
636
+ ParquetSchemaType::UInt16 => append_list_value_copy!(
637
+ list_builder,
638
+ ParquetSchemaType::UInt16,
639
+ item,
640
+ UInt16Builder,
641
+ ParquetValue::UInt16
642
+ ),
643
+ ParquetSchemaType::UInt32 => append_list_value_copy!(
644
+ list_builder,
645
+ ParquetSchemaType::UInt32,
646
+ item,
647
+ UInt32Builder,
648
+ ParquetValue::UInt32
649
+ ),
650
+ ParquetSchemaType::UInt64 => append_list_value_copy!(
651
+ list_builder,
652
+ ParquetSchemaType::UInt64,
653
+ item,
654
+ UInt64Builder,
655
+ ParquetValue::UInt64
656
+ ),
657
+ ParquetSchemaType::Float => append_list_value_copy!(
658
+ list_builder,
659
+ ParquetSchemaType::Float,
660
+ item,
661
+ Float32Builder,
662
+ ParquetValue::Float32
663
+ ),
664
+ ParquetSchemaType::Double => append_list_value_copy!(
665
+ list_builder,
666
+ ParquetSchemaType::Double,
667
+ item,
668
+ Float64Builder,
669
+ ParquetValue::Float64
670
+ ),
671
+ ParquetSchemaType::String => append_list_value!(
672
+ list_builder,
673
+ ParquetSchemaType::String,
674
+ item,
675
+ StringBuilder,
676
+ ParquetValue::String
677
+ ),
678
+ ParquetSchemaType::Binary => append_list_value!(
679
+ list_builder,
680
+ ParquetSchemaType::Binary,
681
+ item,
682
+ BinaryBuilder,
683
+ ParquetValue::Bytes
684
+ ),
685
+ ParquetSchemaType::Boolean => append_list_value_copy!(
686
+ list_builder,
687
+ ParquetSchemaType::Boolean,
688
+ item,
689
+ BooleanBuilder,
690
+ ParquetValue::Boolean
691
+ ),
692
+ ParquetSchemaType::Date32 => append_list_value_copy!(
693
+ list_builder,
694
+ ParquetSchemaType::Date32,
695
+ item,
696
+ Date32Builder,
697
+ ParquetValue::Date32
698
+ ),
699
+ ParquetSchemaType::TimestampMillis => append_timestamp_list_value!(
700
+ list_builder,
701
+ ParquetSchemaType::TimestampMillis,
702
+ item,
703
+ TimestampMillisecondBuilder,
704
+ ParquetValue::TimestampMillis
705
+ ),
706
+ ParquetSchemaType::TimestampMicros => append_timestamp_list_value!(
707
+ list_builder,
708
+ ParquetSchemaType::TimestampMicros,
709
+ item,
710
+ TimestampMicrosecondBuilder,
711
+ ParquetValue::TimestampMicros
712
+ ),
713
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
714
+ return Err(MagnusError::new(
715
+ magnus::exception::type_error(),
716
+ "Nested lists and maps are not supported",
717
+ ))
718
+ }
719
+ }
720
+ }
721
+ }
722
+ ParquetValue::Null => list_builder.append_null(),
723
+ _ => {
724
+ return Err(MagnusError::new(
725
+ magnus::exception::type_error(),
726
+ format!("Expected List, got {:?}", value),
727
+ ))
728
+ }
729
+ }
730
+ }
731
+ Ok(Arc::new(list_builder.finish()))
732
+ }
733
+ ParquetSchemaType::Map(_map_field) => {
734
+ unimplemented!("Writing maps is not yet supported")
735
+ }
736
+ }
737
+ }
738
+
739
+ pub fn convert_ruby_array_to_arrow(
740
+ values: RArray,
741
+ type_: &ParquetSchemaType,
742
+ ) -> Result<Arc<dyn Array>, MagnusError> {
743
+ let mut parquet_values = Vec::with_capacity(values.len());
744
+ for value in values {
745
+ if value.is_nil() {
746
+ parquet_values.push(ParquetValue::Null);
747
+ continue;
748
+ }
749
+ let parquet_value = ParquetValue::from_value(value, type_)?;
750
+ parquet_values.push(parquet_value);
751
+ }
752
+ convert_parquet_values_to_arrow(parquet_values, type_)
753
+ }