parquet 0.0.5 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,753 @@
1
+ use std::str::FromStr;
2
+
3
+ use super::*;
4
+ use arrow_array::builder::*;
5
+ use jiff::tz::{Offset, TimeZone};
6
+ use magnus::{RArray, TryConvert};
7
+
8
+ pub struct NumericConverter<T> {
9
+ _phantom: std::marker::PhantomData<T>,
10
+ }
11
+
12
+ impl<T> NumericConverter<T>
13
+ where
14
+ T: TryConvert + FromStr,
15
+ <T as FromStr>::Err: std::fmt::Display,
16
+ {
17
+ pub fn convert_with_string_fallback(value: Value) -> Result<T, MagnusError> {
18
+ let ruby = unsafe { Ruby::get_unchecked() };
19
+ if value.is_kind_of(ruby.class_string()) {
20
+ let s = String::try_convert(value)?;
21
+ s.trim().parse::<T>().map_err(|e| {
22
+ MagnusError::new(
23
+ magnus::exception::type_error(),
24
+ format!("Failed to parse '{}' as numeric: {}", s, e),
25
+ )
26
+ })
27
+ } else {
28
+ T::try_convert(value)
29
+ }
30
+ }
31
+ }
32
+
33
+ pub fn convert_to_date32(value: Value) -> Result<i32, MagnusError> {
34
+ let ruby = unsafe { Ruby::get_unchecked() };
35
+ if value.is_kind_of(ruby.class_string()) {
36
+ let s = String::try_convert(value)?;
37
+ // Parse string into Timestamp using jiff
38
+ let date: jiff::civil::Date = s.parse().map_err(|e| {
39
+ MagnusError::new(
40
+ magnus::exception::type_error(),
41
+ format!("Failed to parse '{}' as date32: {}", s, e),
42
+ )
43
+ })?;
44
+
45
+ let timestamp = date.at(0, 0, 0, 0);
46
+
47
+ let x = timestamp
48
+ .to_zoned(TimeZone::fixed(Offset::constant(0)))
49
+ .unwrap()
50
+ .timestamp();
51
+
52
+ // Convert to epoch days
53
+ Ok((x.as_second() as i64 / 86400) as i32)
54
+ } else if value.is_kind_of(ruby.class_time()) {
55
+ // Convert Time object to epoch days
56
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
57
+ Ok(((secs as f64) / 86400.0) as i32)
58
+ } else {
59
+ Err(MagnusError::new(
60
+ magnus::exception::type_error(),
61
+ format!("Cannot convert {} to date32", unsafe { value.classname() }),
62
+ ))
63
+ }
64
+ }
65
+
66
+ pub fn convert_to_timestamp_millis(value: Value) -> Result<i64, MagnusError> {
67
+ let ruby = unsafe { Ruby::get_unchecked() };
68
+ if value.is_kind_of(ruby.class_string()) {
69
+ let s = String::try_convert(value)?;
70
+ // Parse string into Timestamp using jiff
71
+ let timestamp: jiff::Timestamp = s.parse().map_err(|e| {
72
+ MagnusError::new(
73
+ magnus::exception::type_error(),
74
+ format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
75
+ )
76
+ })?;
77
+ // Convert to milliseconds
78
+ Ok(timestamp.as_millisecond())
79
+ } else if value.is_kind_of(ruby.class_time()) {
80
+ // Convert Time object to milliseconds
81
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
82
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
83
+ Ok(secs * 1000 + (usecs / 1000))
84
+ } else {
85
+ Err(MagnusError::new(
86
+ magnus::exception::type_error(),
87
+ format!("Cannot convert {} to timestamp_millis", unsafe {
88
+ value.classname()
89
+ }),
90
+ ))
91
+ }
92
+ }
93
+
94
+ pub fn convert_to_timestamp_micros(value: Value) -> Result<i64, MagnusError> {
95
+ let ruby = unsafe { Ruby::get_unchecked() };
96
+ if value.is_kind_of(ruby.class_string()) {
97
+ let s = String::try_convert(value)?;
98
+ // Parse string into Timestamp using jiff
99
+ let timestamp: jiff::Timestamp = s.parse().map_err(|e| {
100
+ MagnusError::new(
101
+ magnus::exception::type_error(),
102
+ format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
103
+ )
104
+ })?;
105
+ // Convert to microseconds
106
+ Ok(timestamp.as_microsecond())
107
+ } else if value.is_kind_of(ruby.class_time()) {
108
+ // Convert Time object to microseconds
109
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
110
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
111
+ Ok(secs * 1_000_000 + usecs)
112
+ } else {
113
+ Err(MagnusError::new(
114
+ magnus::exception::type_error(),
115
+ format!("Cannot convert {} to timestamp_micros", unsafe {
116
+ value.classname()
117
+ }),
118
+ ))
119
+ }
120
+ }
121
+
122
+ pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
123
+ Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
124
+ }
125
+
126
+ pub fn convert_to_boolean(value: Value) -> Result<bool, MagnusError> {
127
+ let ruby = unsafe { Ruby::get_unchecked() };
128
+ if value.is_kind_of(ruby.class_string()) {
129
+ let s = String::try_convert(value)?;
130
+ s.trim().parse::<bool>().map_err(|e| {
131
+ MagnusError::new(
132
+ magnus::exception::type_error(),
133
+ format!("Failed to parse '{}' as boolean: {}", s, e),
134
+ )
135
+ })
136
+ } else {
137
+ bool::try_convert(value)
138
+ }
139
+ }
140
+
141
+ pub fn convert_to_list(
142
+ value: Value,
143
+ list_field: &ListField,
144
+ ) -> Result<Vec<ParquetValue>, MagnusError> {
145
+ let ruby = unsafe { Ruby::get_unchecked() };
146
+ if value.is_kind_of(ruby.class_array()) {
147
+ let array = RArray::from_value(value).ok_or_else(|| {
148
+ MagnusError::new(magnus::exception::type_error(), "Invalid list format")
149
+ })?;
150
+
151
+ let mut values = Vec::with_capacity(array.len());
152
+ for item_value in array.into_iter() {
153
+ let converted = match &list_field.item_type {
154
+ ParquetSchemaType::Int8 => {
155
+ let v = NumericConverter::<i8>::convert_with_string_fallback(item_value)?;
156
+ ParquetValue::Int8(v)
157
+ }
158
+ ParquetSchemaType::Int16 => {
159
+ let v = NumericConverter::<i16>::convert_with_string_fallback(item_value)?;
160
+ ParquetValue::Int16(v)
161
+ }
162
+ ParquetSchemaType::Int32 => {
163
+ let v = NumericConverter::<i32>::convert_with_string_fallback(item_value)?;
164
+ ParquetValue::Int32(v)
165
+ }
166
+ ParquetSchemaType::Int64 => {
167
+ let v = NumericConverter::<i64>::convert_with_string_fallback(item_value)?;
168
+ ParquetValue::Int64(v)
169
+ }
170
+ ParquetSchemaType::UInt8 => {
171
+ let v = NumericConverter::<u8>::convert_with_string_fallback(item_value)?;
172
+ ParquetValue::UInt8(v)
173
+ }
174
+ ParquetSchemaType::UInt16 => {
175
+ let v = NumericConverter::<u16>::convert_with_string_fallback(item_value)?;
176
+ ParquetValue::UInt16(v)
177
+ }
178
+ ParquetSchemaType::UInt32 => {
179
+ let v = NumericConverter::<u32>::convert_with_string_fallback(item_value)?;
180
+ ParquetValue::UInt32(v)
181
+ }
182
+ ParquetSchemaType::UInt64 => {
183
+ let v = NumericConverter::<u64>::convert_with_string_fallback(item_value)?;
184
+ ParquetValue::UInt64(v)
185
+ }
186
+ ParquetSchemaType::Float => {
187
+ let v = NumericConverter::<f32>::convert_with_string_fallback(item_value)?;
188
+ ParquetValue::Float32(v)
189
+ }
190
+ ParquetSchemaType::Double => {
191
+ let v = NumericConverter::<f64>::convert_with_string_fallback(item_value)?;
192
+ ParquetValue::Float64(v)
193
+ }
194
+ ParquetSchemaType::String => {
195
+ let v = String::try_convert(item_value)?;
196
+ ParquetValue::String(v)
197
+ }
198
+ ParquetSchemaType::Binary => {
199
+ let v = convert_to_binary(item_value)?;
200
+ ParquetValue::Bytes(v)
201
+ }
202
+ ParquetSchemaType::Boolean => {
203
+ let v = convert_to_boolean(item_value)?;
204
+ ParquetValue::Boolean(v)
205
+ }
206
+ ParquetSchemaType::Date32 => {
207
+ let v = convert_to_date32(item_value)?;
208
+ ParquetValue::Date32(v)
209
+ }
210
+ ParquetSchemaType::TimestampMillis => {
211
+ let v = convert_to_timestamp_millis(item_value)?;
212
+ ParquetValue::TimestampMillis(v, None)
213
+ }
214
+ ParquetSchemaType::TimestampMicros => {
215
+ let v = convert_to_timestamp_micros(item_value)?;
216
+ ParquetValue::TimestampMicros(v, None)
217
+ }
218
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
219
+ return Err(MagnusError::new(
220
+ magnus::exception::type_error(),
221
+ "Nested lists and maps are not supported",
222
+ ))
223
+ }
224
+ };
225
+ values.push(converted);
226
+ }
227
+ Ok(values)
228
+ } else {
229
+ Err(MagnusError::new(
230
+ magnus::exception::type_error(),
231
+ "Invalid list format",
232
+ ))
233
+ }
234
+ }
235
+
236
+ pub fn convert_to_map(
237
+ value: Value,
238
+ map_field: &MapField,
239
+ ) -> Result<HashMap<ParquetValue, ParquetValue>, MagnusError> {
240
+ let ruby = unsafe { Ruby::get_unchecked() };
241
+ if value.is_kind_of(ruby.class_hash()) {
242
+ let mut map = HashMap::new();
243
+ let entries: Vec<(Value, Value)> = value.funcall("to_a", ())?;
244
+
245
+ for (key, value) in entries {
246
+ let key_value = match &map_field.key_type {
247
+ ParquetSchemaType::String => {
248
+ let v = String::try_convert(key)?;
249
+ ParquetValue::String(v)
250
+ }
251
+ _ => {
252
+ return Err(MagnusError::new(
253
+ magnus::exception::type_error(),
254
+ "Map keys must be strings",
255
+ ))
256
+ }
257
+ };
258
+
259
+ let value_value = match &map_field.value_type {
260
+ ParquetSchemaType::Int8 => {
261
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
262
+ ParquetValue::Int8(v)
263
+ }
264
+ ParquetSchemaType::Int16 => {
265
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
266
+ ParquetValue::Int16(v)
267
+ }
268
+ ParquetSchemaType::Int32 => {
269
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
270
+ ParquetValue::Int32(v)
271
+ }
272
+ ParquetSchemaType::Int64 => {
273
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
274
+ ParquetValue::Int64(v)
275
+ }
276
+ ParquetSchemaType::UInt8 => {
277
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
278
+ ParquetValue::UInt8(v)
279
+ }
280
+ ParquetSchemaType::UInt16 => {
281
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
282
+ ParquetValue::UInt16(v)
283
+ }
284
+ ParquetSchemaType::UInt32 => {
285
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
286
+ ParquetValue::UInt32(v)
287
+ }
288
+ ParquetSchemaType::UInt64 => {
289
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
290
+ ParquetValue::UInt64(v)
291
+ }
292
+ ParquetSchemaType::Float => {
293
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
294
+ ParquetValue::Float32(v)
295
+ }
296
+ ParquetSchemaType::Double => {
297
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
298
+ ParquetValue::Float64(v)
299
+ }
300
+ ParquetSchemaType::String => {
301
+ let v = String::try_convert(value)?;
302
+ ParquetValue::String(v)
303
+ }
304
+ ParquetSchemaType::Binary => {
305
+ let v = convert_to_binary(value)?;
306
+ ParquetValue::Bytes(v)
307
+ }
308
+ ParquetSchemaType::Boolean => {
309
+ let v = convert_to_boolean(value)?;
310
+ ParquetValue::Boolean(v)
311
+ }
312
+ ParquetSchemaType::Date32 => {
313
+ let v = convert_to_date32(value)?;
314
+ ParquetValue::Date32(v)
315
+ }
316
+ ParquetSchemaType::TimestampMillis => {
317
+ let v = convert_to_timestamp_millis(value)?;
318
+ ParquetValue::TimestampMillis(v, None)
319
+ }
320
+ ParquetSchemaType::TimestampMicros => {
321
+ let v = convert_to_timestamp_micros(value)?;
322
+ ParquetValue::TimestampMicros(v, None)
323
+ }
324
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
325
+ return Err(MagnusError::new(
326
+ magnus::exception::type_error(),
327
+ "Map values cannot be lists or maps",
328
+ ))
329
+ }
330
+ };
331
+
332
+ map.insert(key_value, value_value);
333
+ }
334
+ Ok(map)
335
+ } else {
336
+ Err(MagnusError::new(
337
+ magnus::exception::type_error(),
338
+ "Invalid map format",
339
+ ))
340
+ }
341
+ }
342
+
343
+ macro_rules! impl_timestamp_to_arrow_conversion {
344
+ ($values:expr, $builder_type:ty, $variant:ident) => {{
345
+ let mut builder = <$builder_type>::with_capacity($values.len());
346
+ for value in $values {
347
+ match value {
348
+ ParquetValue::$variant(v, _tz) => builder.append_value(v),
349
+ ParquetValue::Null => builder.append_null(),
350
+ _ => {
351
+ return Err(MagnusError::new(
352
+ magnus::exception::type_error(),
353
+ format!("Expected {}, got {:?}", stringify!($variant), value),
354
+ ))
355
+ }
356
+ }
357
+ }
358
+ Ok(Arc::new(builder.finish()))
359
+ }};
360
+ }
361
+
362
+ #[macro_export]
363
+ macro_rules! impl_timestamp_array_conversion {
364
+ ($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
365
+ let array = downcast_array::<$array_type>($column);
366
+ if array.is_nullable() {
367
+ array
368
+ .values()
369
+ .iter()
370
+ .enumerate()
371
+ .map(|(i, x)| {
372
+ if array.is_null(i) {
373
+ ParquetValue::Null
374
+ } else {
375
+ ParquetValue::$variant(*x, $tz.clone().map(|s| s.into()))
376
+ }
377
+ })
378
+ .collect()
379
+ } else {
380
+ array
381
+ .values()
382
+ .iter()
383
+ .map(|x| ParquetValue::$variant(*x, $tz.clone().map(|s| s.into())))
384
+ .collect()
385
+ }
386
+ }};
387
+ }
388
+
389
+ #[macro_export]
390
+ macro_rules! impl_array_conversion {
391
+ ($values:expr, $builder_type:ty, $variant:ident) => {{
392
+ let mut builder = <$builder_type>::with_capacity($values.len());
393
+ for value in $values {
394
+ match value {
395
+ ParquetValue::$variant(v) => builder.append_value(v),
396
+ ParquetValue::Null => builder.append_null(),
397
+ _ => {
398
+ return Err(MagnusError::new(
399
+ magnus::exception::type_error(),
400
+ format!("Expected {}, got {:?}", stringify!($variant), value),
401
+ ))
402
+ }
403
+ }
404
+ }
405
+ Ok(Arc::new(builder.finish()))
406
+ }};
407
+ ($values:expr, $builder_type:ty, $variant:ident, $capacity:expr) => {{
408
+ let mut builder = <$builder_type>::with_capacity($values.len(), $capacity);
409
+ for value in $values {
410
+ match value {
411
+ ParquetValue::$variant(v) => builder.append_value(v),
412
+ ParquetValue::Null => builder.append_null(),
413
+ _ => {
414
+ return Err(MagnusError::new(
415
+ magnus::exception::type_error(),
416
+ format!("Expected {}, got {:?}", stringify!($variant), value),
417
+ ))
418
+ }
419
+ }
420
+ }
421
+ Ok(Arc::new(builder.finish()))
422
+ }};
423
+ }
424
+
425
+ #[macro_export]
426
+ macro_rules! append_list_value {
427
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
428
+ match (&$item_type, &$value) {
429
+ ($item_type, $value_variant(v)) => {
430
+ $list_builder
431
+ .values()
432
+ .as_any_mut()
433
+ .downcast_mut::<$builder_type>()
434
+ .unwrap()
435
+ .append_value(v.clone());
436
+ }
437
+ (_, ParquetValue::Null) => {
438
+ $list_builder.append_null();
439
+ }
440
+ _ => {
441
+ return Err(MagnusError::new(
442
+ magnus::exception::type_error(),
443
+ format!(
444
+ "Type mismatch in list: expected {:?}, got {:?}",
445
+ $item_type, $value
446
+ ),
447
+ ))
448
+ }
449
+ }
450
+ };
451
+ }
452
+
453
+ #[macro_export]
454
+ macro_rules! append_list_value_copy {
455
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
456
+ match (&$item_type, &$value) {
457
+ ($item_type, $value_variant(v)) => {
458
+ $list_builder
459
+ .values()
460
+ .as_any_mut()
461
+ .downcast_mut::<$builder_type>()
462
+ .unwrap()
463
+ .append_value(*v);
464
+ }
465
+ (_, ParquetValue::Null) => {
466
+ $list_builder.append_null();
467
+ }
468
+ _ => {
469
+ return Err(MagnusError::new(
470
+ magnus::exception::type_error(),
471
+ format!(
472
+ "Type mismatch in list: expected {:?}, got {:?}",
473
+ $item_type, $value
474
+ ),
475
+ ))
476
+ }
477
+ }
478
+ };
479
+ }
480
+
481
+ #[macro_export]
482
+ macro_rules! append_timestamp_list_value {
483
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
484
+ match (&$item_type, &$value) {
485
+ ($item_type, $value_variant(v, _tz)) => {
486
+ $list_builder
487
+ .values()
488
+ .as_any_mut()
489
+ .downcast_mut::<$builder_type>()
490
+ .unwrap()
491
+ .append_value(*v);
492
+ }
493
+ (_, ParquetValue::Null) => {
494
+ $list_builder.append_null();
495
+ }
496
+ _ => {
497
+ return Err(MagnusError::new(
498
+ magnus::exception::type_error(),
499
+ format!(
500
+ "Type mismatch in list: expected {:?}, got {:?}",
501
+ $item_type, $value
502
+ ),
503
+ ))
504
+ }
505
+ }
506
+ };
507
+ }
508
+
509
+ pub fn convert_parquet_values_to_arrow(
510
+ values: Vec<ParquetValue>,
511
+ type_: &ParquetSchemaType,
512
+ ) -> Result<Arc<dyn Array>, MagnusError> {
513
+ match type_ {
514
+ ParquetSchemaType::Int8 => impl_array_conversion!(values, Int8Builder, Int8),
515
+ ParquetSchemaType::Int16 => impl_array_conversion!(values, Int16Builder, Int16),
516
+ ParquetSchemaType::Int32 => impl_array_conversion!(values, Int32Builder, Int32),
517
+ ParquetSchemaType::Int64 => impl_array_conversion!(values, Int64Builder, Int64),
518
+ ParquetSchemaType::UInt8 => impl_array_conversion!(values, UInt8Builder, UInt8),
519
+ ParquetSchemaType::UInt16 => impl_array_conversion!(values, UInt16Builder, UInt16),
520
+ ParquetSchemaType::UInt32 => impl_array_conversion!(values, UInt32Builder, UInt32),
521
+ ParquetSchemaType::UInt64 => impl_array_conversion!(values, UInt64Builder, UInt64),
522
+ ParquetSchemaType::Float => impl_array_conversion!(values, Float32Builder, Float32),
523
+ ParquetSchemaType::Double => impl_array_conversion!(values, Float64Builder, Float64),
524
+ ParquetSchemaType::String => {
525
+ impl_array_conversion!(values, StringBuilder, String, values.len() * 32)
526
+ }
527
+ ParquetSchemaType::Binary => {
528
+ impl_array_conversion!(values, BinaryBuilder, Bytes, values.len() * 32)
529
+ }
530
+ ParquetSchemaType::Boolean => impl_array_conversion!(values, BooleanBuilder, Boolean),
531
+ ParquetSchemaType::Date32 => impl_array_conversion!(values, Date32Builder, Date32),
532
+ ParquetSchemaType::TimestampMillis => {
533
+ impl_timestamp_to_arrow_conversion!(
534
+ values,
535
+ TimestampMillisecondBuilder,
536
+ TimestampMillis
537
+ )
538
+ }
539
+ ParquetSchemaType::TimestampMicros => {
540
+ impl_timestamp_to_arrow_conversion!(
541
+ values,
542
+ TimestampMicrosecondBuilder,
543
+ TimestampMicros
544
+ )
545
+ }
546
+ ParquetSchemaType::List(list_field) => {
547
+ let value_builder = match list_field.item_type {
548
+ ParquetSchemaType::Int8 => Box::new(Int8Builder::new()) as Box<dyn ArrayBuilder>,
549
+ ParquetSchemaType::Int16 => Box::new(Int16Builder::new()) as Box<dyn ArrayBuilder>,
550
+ ParquetSchemaType::Int32 => Box::new(Int32Builder::new()) as Box<dyn ArrayBuilder>,
551
+ ParquetSchemaType::Int64 => Box::new(Int64Builder::new()) as Box<dyn ArrayBuilder>,
552
+ ParquetSchemaType::UInt8 => Box::new(UInt8Builder::new()) as Box<dyn ArrayBuilder>,
553
+ ParquetSchemaType::UInt16 => {
554
+ Box::new(UInt16Builder::new()) as Box<dyn ArrayBuilder>
555
+ }
556
+ ParquetSchemaType::UInt32 => {
557
+ Box::new(UInt32Builder::new()) as Box<dyn ArrayBuilder>
558
+ }
559
+ ParquetSchemaType::UInt64 => {
560
+ Box::new(UInt64Builder::new()) as Box<dyn ArrayBuilder>
561
+ }
562
+ ParquetSchemaType::Float => {
563
+ Box::new(Float32Builder::new()) as Box<dyn ArrayBuilder>
564
+ }
565
+ ParquetSchemaType::Double => {
566
+ Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>
567
+ }
568
+ ParquetSchemaType::String => {
569
+ Box::new(StringBuilder::new()) as Box<dyn ArrayBuilder>
570
+ }
571
+ ParquetSchemaType::Binary => {
572
+ Box::new(BinaryBuilder::new()) as Box<dyn ArrayBuilder>
573
+ }
574
+ ParquetSchemaType::Boolean => {
575
+ Box::new(BooleanBuilder::new()) as Box<dyn ArrayBuilder>
576
+ }
577
+ ParquetSchemaType::Date32 => {
578
+ Box::new(Date32Builder::new()) as Box<dyn ArrayBuilder>
579
+ }
580
+ ParquetSchemaType::TimestampMillis => {
581
+ Box::new(TimestampMillisecondBuilder::new()) as Box<dyn ArrayBuilder>
582
+ }
583
+ ParquetSchemaType::TimestampMicros => {
584
+ Box::new(TimestampMicrosecondBuilder::new()) as Box<dyn ArrayBuilder>
585
+ }
586
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
587
+ return Err(MagnusError::new(
588
+ magnus::exception::type_error(),
589
+ "Nested lists and maps are not supported",
590
+ ))
591
+ }
592
+ };
593
+
594
+ let mut list_builder = ListBuilder::new(value_builder);
595
+ for value in values {
596
+ match value {
597
+ ParquetValue::List(items) => {
598
+ list_builder.append(true);
599
+ for item in items {
600
+ match list_field.item_type {
601
+ ParquetSchemaType::Int8 => append_list_value_copy!(
602
+ list_builder,
603
+ ParquetSchemaType::Int8,
604
+ item,
605
+ Int8Builder,
606
+ ParquetValue::Int8
607
+ ),
608
+ ParquetSchemaType::Int16 => append_list_value_copy!(
609
+ list_builder,
610
+ ParquetSchemaType::Int16,
611
+ item,
612
+ Int16Builder,
613
+ ParquetValue::Int16
614
+ ),
615
+ ParquetSchemaType::Int32 => append_list_value_copy!(
616
+ list_builder,
617
+ ParquetSchemaType::Int32,
618
+ item,
619
+ Int32Builder,
620
+ ParquetValue::Int32
621
+ ),
622
+ ParquetSchemaType::Int64 => append_list_value_copy!(
623
+ list_builder,
624
+ ParquetSchemaType::Int64,
625
+ item,
626
+ Int64Builder,
627
+ ParquetValue::Int64
628
+ ),
629
+ ParquetSchemaType::UInt8 => append_list_value_copy!(
630
+ list_builder,
631
+ ParquetSchemaType::UInt8,
632
+ item,
633
+ UInt8Builder,
634
+ ParquetValue::UInt8
635
+ ),
636
+ ParquetSchemaType::UInt16 => append_list_value_copy!(
637
+ list_builder,
638
+ ParquetSchemaType::UInt16,
639
+ item,
640
+ UInt16Builder,
641
+ ParquetValue::UInt16
642
+ ),
643
+ ParquetSchemaType::UInt32 => append_list_value_copy!(
644
+ list_builder,
645
+ ParquetSchemaType::UInt32,
646
+ item,
647
+ UInt32Builder,
648
+ ParquetValue::UInt32
649
+ ),
650
+ ParquetSchemaType::UInt64 => append_list_value_copy!(
651
+ list_builder,
652
+ ParquetSchemaType::UInt64,
653
+ item,
654
+ UInt64Builder,
655
+ ParquetValue::UInt64
656
+ ),
657
+ ParquetSchemaType::Float => append_list_value_copy!(
658
+ list_builder,
659
+ ParquetSchemaType::Float,
660
+ item,
661
+ Float32Builder,
662
+ ParquetValue::Float32
663
+ ),
664
+ ParquetSchemaType::Double => append_list_value_copy!(
665
+ list_builder,
666
+ ParquetSchemaType::Double,
667
+ item,
668
+ Float64Builder,
669
+ ParquetValue::Float64
670
+ ),
671
+ ParquetSchemaType::String => append_list_value!(
672
+ list_builder,
673
+ ParquetSchemaType::String,
674
+ item,
675
+ StringBuilder,
676
+ ParquetValue::String
677
+ ),
678
+ ParquetSchemaType::Binary => append_list_value!(
679
+ list_builder,
680
+ ParquetSchemaType::Binary,
681
+ item,
682
+ BinaryBuilder,
683
+ ParquetValue::Bytes
684
+ ),
685
+ ParquetSchemaType::Boolean => append_list_value_copy!(
686
+ list_builder,
687
+ ParquetSchemaType::Boolean,
688
+ item,
689
+ BooleanBuilder,
690
+ ParquetValue::Boolean
691
+ ),
692
+ ParquetSchemaType::Date32 => append_list_value_copy!(
693
+ list_builder,
694
+ ParquetSchemaType::Date32,
695
+ item,
696
+ Date32Builder,
697
+ ParquetValue::Date32
698
+ ),
699
+ ParquetSchemaType::TimestampMillis => append_timestamp_list_value!(
700
+ list_builder,
701
+ ParquetSchemaType::TimestampMillis,
702
+ item,
703
+ TimestampMillisecondBuilder,
704
+ ParquetValue::TimestampMillis
705
+ ),
706
+ ParquetSchemaType::TimestampMicros => append_timestamp_list_value!(
707
+ list_builder,
708
+ ParquetSchemaType::TimestampMicros,
709
+ item,
710
+ TimestampMicrosecondBuilder,
711
+ ParquetValue::TimestampMicros
712
+ ),
713
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
714
+ return Err(MagnusError::new(
715
+ magnus::exception::type_error(),
716
+ "Nested lists and maps are not supported",
717
+ ))
718
+ }
719
+ }
720
+ }
721
+ }
722
+ ParquetValue::Null => list_builder.append_null(),
723
+ _ => {
724
+ return Err(MagnusError::new(
725
+ magnus::exception::type_error(),
726
+ format!("Expected List, got {:?}", value),
727
+ ))
728
+ }
729
+ }
730
+ }
731
+ Ok(Arc::new(list_builder.finish()))
732
+ }
733
+ ParquetSchemaType::Map(_map_field) => {
734
+ unimplemented!("Writing maps is not yet supported")
735
+ }
736
+ }
737
+ }
738
+
739
+ pub fn convert_ruby_array_to_arrow(
740
+ values: RArray,
741
+ type_: &ParquetSchemaType,
742
+ ) -> Result<Arc<dyn Array>, MagnusError> {
743
+ let mut parquet_values = Vec::with_capacity(values.len());
744
+ for value in values {
745
+ if value.is_nil() {
746
+ parquet_values.push(ParquetValue::Null);
747
+ continue;
748
+ }
749
+ let parquet_value = ParquetValue::from_value(value, type_)?;
750
+ parquet_values.push(parquet_value);
751
+ }
752
+ convert_parquet_values_to_arrow(parquet_values, type_)
753
+ }