parquet 0.2.12-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,809 @@
1
+ use std::str::FromStr;
2
+
3
+ use super::*;
4
+ use arrow_array::builder::*;
5
+ use jiff::tz::{Offset, TimeZone};
6
+ use magnus::{RArray, TryConvert};
7
+
8
+ pub struct NumericConverter<T> {
9
+ _phantom: std::marker::PhantomData<T>,
10
+ }
11
+
12
+ impl<T> NumericConverter<T>
13
+ where
14
+ T: TryConvert + FromStr,
15
+ <T as FromStr>::Err: std::fmt::Display,
16
+ {
17
+ pub fn convert_with_string_fallback(value: Value) -> Result<T, MagnusError> {
18
+ let ruby = unsafe { Ruby::get_unchecked() };
19
+ if value.is_kind_of(ruby.class_string()) {
20
+ let s = String::try_convert(value)?;
21
+ s.trim().parse::<T>().map_err(|e| {
22
+ MagnusError::new(
23
+ magnus::exception::type_error(),
24
+ format!("Failed to parse '{}' as numeric: {}", s, e),
25
+ )
26
+ })
27
+ } else {
28
+ T::try_convert(value)
29
+ }
30
+ }
31
+ }
32
+
33
+ pub fn convert_to_date32(value: Value, format: Option<&str>) -> Result<i32, MagnusError> {
34
+ let ruby = unsafe { Ruby::get_unchecked() };
35
+ if value.is_kind_of(ruby.class_string()) {
36
+ let s = String::try_convert(value)?;
37
+ // Parse string into Date using jiff
38
+ let date = if let Some(fmt) = format {
39
+ jiff::civil::Date::strptime(&fmt, &s).or_else(|e1| {
40
+ // Try parsing as DateTime and convert to Date with zero offset
41
+ jiff::civil::DateTime::strptime(&fmt, &s)
42
+ .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
43
+ .map(|dt| dt.date())
44
+ .map_err(|e2| {
45
+ MagnusError::new(
46
+ magnus::exception::type_error(),
47
+ format!(
48
+ "Failed to parse '{}' with format '{}' as date32: {} (and as datetime: {})",
49
+ s, fmt, e1, e2
50
+ ),
51
+ )
52
+ })
53
+ })?
54
+ } else {
55
+ s.parse().map_err(|e| {
56
+ MagnusError::new(
57
+ magnus::exception::type_error(),
58
+ format!("Failed to parse '{}' as date32: {}", s, e),
59
+ )
60
+ })?
61
+ };
62
+
63
+ let timestamp = date.at(0, 0, 0, 0);
64
+
65
+ let x = timestamp
66
+ .to_zoned(TimeZone::fixed(Offset::constant(0)))
67
+ .unwrap()
68
+ .timestamp();
69
+
70
+ // Convert to epoch days
71
+ Ok((x.as_second() as i64 / 86400) as i32)
72
+ } else if value.is_kind_of(ruby.class_time()) {
73
+ // Convert Time object to epoch days
74
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
75
+ Ok(((secs as f64) / 86400.0) as i32)
76
+ } else {
77
+ Err(MagnusError::new(
78
+ magnus::exception::type_error(),
79
+ format!("Cannot convert {} to date32", unsafe { value.classname() }),
80
+ ))
81
+ }
82
+ }
83
+
84
+ pub fn convert_to_timestamp_millis(value: Value, format: Option<&str>) -> Result<i64, MagnusError> {
85
+ let ruby = unsafe { Ruby::get_unchecked() };
86
+ if value.is_kind_of(ruby.class_string()) {
87
+ let s = String::try_convert(value)?;
88
+ // Parse string into Timestamp using jiff
89
+ let timestamp = if let Some(fmt) = format {
90
+ jiff::Timestamp::strptime(&fmt, &s)
91
+ .or_else(|e1| {
92
+ // Try parsing as DateTime and convert to Timestamp with zero offset
93
+ jiff::civil::DateTime::strptime(&fmt, &s)
94
+ .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
95
+ .map(|dt| dt.timestamp())
96
+ .map_err(|e2| {
97
+ MagnusError::new(
98
+ magnus::exception::type_error(),
99
+ format!(
100
+ "Failed to parse '{}' with format '{}' as timestamp_millis: {} (and as datetime: {})",
101
+ s, fmt, e1, e2
102
+ ),
103
+ )
104
+ })
105
+ })?
106
+ } else {
107
+ s.parse().map_err(|e| {
108
+ MagnusError::new(
109
+ magnus::exception::type_error(),
110
+ format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
111
+ )
112
+ })?
113
+ };
114
+ // Convert to milliseconds
115
+ Ok(timestamp.as_millisecond())
116
+ } else if value.is_kind_of(ruby.class_time()) {
117
+ // Convert Time object to milliseconds
118
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
119
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
120
+ Ok(secs * 1000 + (usecs / 1000))
121
+ } else {
122
+ Err(MagnusError::new(
123
+ magnus::exception::type_error(),
124
+ format!("Cannot convert {} to timestamp_millis", unsafe {
125
+ value.classname()
126
+ }),
127
+ ))
128
+ }
129
+ }
130
+
131
+ pub fn convert_to_timestamp_micros(value: Value, format: Option<&str>) -> Result<i64, MagnusError> {
132
+ let ruby = unsafe { Ruby::get_unchecked() };
133
+ if value.is_kind_of(ruby.class_string()) {
134
+ let s = String::try_convert(value)?;
135
+ // Parse string into Timestamp using jiff
136
+ let timestamp = if let Some(fmt) = format {
137
+ jiff::Timestamp::strptime(&fmt, &s).or_else(|e1| {
138
+ // Try parsing as DateTime and convert to Timestamp with zero offset
139
+ jiff::civil::DateTime::strptime(&fmt, &s).and_then(|dt| {
140
+ dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
141
+ })
142
+ .map(|dt| dt.timestamp())
143
+ .map_err(|e2| {
144
+ MagnusError::new(
145
+ magnus::exception::type_error(),
146
+ format!(
147
+ "Failed to parse '{}' with format '{}' as timestamp_micros: {} (and as datetime: {})",
148
+ s, fmt, e1, e2
149
+ ),
150
+ )
151
+ })
152
+ })?
153
+ } else {
154
+ s.parse().map_err(|e| {
155
+ MagnusError::new(
156
+ magnus::exception::type_error(),
157
+ format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
158
+ )
159
+ })?
160
+ };
161
+ // Convert to microseconds
162
+ Ok(timestamp.as_microsecond())
163
+ } else if value.is_kind_of(ruby.class_time()) {
164
+ // Convert Time object to microseconds
165
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ()).unwrap())?;
166
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ()).unwrap())?;
167
+ Ok(secs * 1_000_000 + usecs)
168
+ } else {
169
+ Err(MagnusError::new(
170
+ magnus::exception::type_error(),
171
+ format!("Cannot convert {} to timestamp_micros", unsafe {
172
+ value.classname()
173
+ }),
174
+ ))
175
+ }
176
+ }
177
+
178
+ pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
179
+ Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
180
+ }
181
+
182
+ pub fn convert_to_boolean(value: Value) -> Result<bool, MagnusError> {
183
+ let ruby = unsafe { Ruby::get_unchecked() };
184
+ if value.is_kind_of(ruby.class_string()) {
185
+ let s = String::try_convert(value)?;
186
+ s.trim().parse::<bool>().map_err(|e| {
187
+ MagnusError::new(
188
+ magnus::exception::type_error(),
189
+ format!("Failed to parse '{}' as boolean: {}", s, e),
190
+ )
191
+ })
192
+ } else {
193
+ bool::try_convert(value)
194
+ }
195
+ }
196
+
197
+ pub fn convert_to_list(
198
+ value: Value,
199
+ list_field: &ListField,
200
+ ) -> Result<Vec<ParquetValue>, MagnusError> {
201
+ let ruby = unsafe { Ruby::get_unchecked() };
202
+ if value.is_kind_of(ruby.class_array()) {
203
+ let array = RArray::from_value(value).ok_or_else(|| {
204
+ MagnusError::new(magnus::exception::type_error(), "Invalid list format")
205
+ })?;
206
+
207
+ let mut values = Vec::with_capacity(array.len());
208
+ for item_value in array.into_iter() {
209
+ let converted = match &list_field.item_type {
210
+ ParquetSchemaType::Int8 => {
211
+ let v = NumericConverter::<i8>::convert_with_string_fallback(item_value)?;
212
+ ParquetValue::Int8(v)
213
+ }
214
+ ParquetSchemaType::Int16 => {
215
+ let v = NumericConverter::<i16>::convert_with_string_fallback(item_value)?;
216
+ ParquetValue::Int16(v)
217
+ }
218
+ ParquetSchemaType::Int32 => {
219
+ let v = NumericConverter::<i32>::convert_with_string_fallback(item_value)?;
220
+ ParquetValue::Int32(v)
221
+ }
222
+ ParquetSchemaType::Int64 => {
223
+ let v = NumericConverter::<i64>::convert_with_string_fallback(item_value)?;
224
+ ParquetValue::Int64(v)
225
+ }
226
+ ParquetSchemaType::UInt8 => {
227
+ let v = NumericConverter::<u8>::convert_with_string_fallback(item_value)?;
228
+ ParquetValue::UInt8(v)
229
+ }
230
+ ParquetSchemaType::UInt16 => {
231
+ let v = NumericConverter::<u16>::convert_with_string_fallback(item_value)?;
232
+ ParquetValue::UInt16(v)
233
+ }
234
+ ParquetSchemaType::UInt32 => {
235
+ let v = NumericConverter::<u32>::convert_with_string_fallback(item_value)?;
236
+ ParquetValue::UInt32(v)
237
+ }
238
+ ParquetSchemaType::UInt64 => {
239
+ let v = NumericConverter::<u64>::convert_with_string_fallback(item_value)?;
240
+ ParquetValue::UInt64(v)
241
+ }
242
+ ParquetSchemaType::Float => {
243
+ let v = NumericConverter::<f32>::convert_with_string_fallback(item_value)?;
244
+ ParquetValue::Float32(v)
245
+ }
246
+ ParquetSchemaType::Double => {
247
+ let v = NumericConverter::<f64>::convert_with_string_fallback(item_value)?;
248
+ ParquetValue::Float64(v)
249
+ }
250
+ ParquetSchemaType::String => {
251
+ let v = String::try_convert(item_value)?;
252
+ ParquetValue::String(v)
253
+ }
254
+ ParquetSchemaType::Binary => {
255
+ let v = convert_to_binary(item_value)?;
256
+ ParquetValue::Bytes(v)
257
+ }
258
+ ParquetSchemaType::Boolean => {
259
+ let v = convert_to_boolean(item_value)?;
260
+ ParquetValue::Boolean(v)
261
+ }
262
+ ParquetSchemaType::Date32 => {
263
+ let v = convert_to_date32(item_value, list_field.format)?;
264
+ ParquetValue::Date32(v)
265
+ }
266
+ ParquetSchemaType::TimestampMillis => {
267
+ let v = convert_to_timestamp_millis(item_value, list_field.format)?;
268
+ ParquetValue::TimestampMillis(v, None)
269
+ }
270
+ ParquetSchemaType::TimestampMicros => {
271
+ let v = convert_to_timestamp_micros(item_value, list_field.format)?;
272
+ ParquetValue::TimestampMicros(v, None)
273
+ }
274
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
275
+ return Err(MagnusError::new(
276
+ magnus::exception::type_error(),
277
+ "Nested lists and maps are not supported",
278
+ ))
279
+ }
280
+ };
281
+ values.push(converted);
282
+ }
283
+ Ok(values)
284
+ } else {
285
+ Err(MagnusError::new(
286
+ magnus::exception::type_error(),
287
+ "Invalid list format",
288
+ ))
289
+ }
290
+ }
291
+
292
+ pub fn convert_to_map(
293
+ value: Value,
294
+ map_field: &MapField,
295
+ ) -> Result<HashMap<ParquetValue, ParquetValue>, MagnusError> {
296
+ let ruby = unsafe { Ruby::get_unchecked() };
297
+ if value.is_kind_of(ruby.class_hash()) {
298
+ let mut map = HashMap::new();
299
+ let entries: Vec<(Value, Value)> = value.funcall("to_a", ())?;
300
+
301
+ for (key, value) in entries {
302
+ let key_value = match &map_field.key_type {
303
+ ParquetSchemaType::String => {
304
+ let v = String::try_convert(key)?;
305
+ ParquetValue::String(v)
306
+ }
307
+ _ => {
308
+ return Err(MagnusError::new(
309
+ magnus::exception::type_error(),
310
+ "Map keys must be strings",
311
+ ))
312
+ }
313
+ };
314
+
315
+ let value_value = match &map_field.value_type {
316
+ ParquetSchemaType::Int8 => {
317
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
318
+ ParquetValue::Int8(v)
319
+ }
320
+ ParquetSchemaType::Int16 => {
321
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
322
+ ParquetValue::Int16(v)
323
+ }
324
+ ParquetSchemaType::Int32 => {
325
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
326
+ ParquetValue::Int32(v)
327
+ }
328
+ ParquetSchemaType::Int64 => {
329
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
330
+ ParquetValue::Int64(v)
331
+ }
332
+ ParquetSchemaType::UInt8 => {
333
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
334
+ ParquetValue::UInt8(v)
335
+ }
336
+ ParquetSchemaType::UInt16 => {
337
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
338
+ ParquetValue::UInt16(v)
339
+ }
340
+ ParquetSchemaType::UInt32 => {
341
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
342
+ ParquetValue::UInt32(v)
343
+ }
344
+ ParquetSchemaType::UInt64 => {
345
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
346
+ ParquetValue::UInt64(v)
347
+ }
348
+ ParquetSchemaType::Float => {
349
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
350
+ ParquetValue::Float32(v)
351
+ }
352
+ ParquetSchemaType::Double => {
353
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
354
+ ParquetValue::Float64(v)
355
+ }
356
+ ParquetSchemaType::String => {
357
+ let v = String::try_convert(value)?;
358
+ ParquetValue::String(v)
359
+ }
360
+ ParquetSchemaType::Binary => {
361
+ let v = convert_to_binary(value)?;
362
+ ParquetValue::Bytes(v)
363
+ }
364
+ ParquetSchemaType::Boolean => {
365
+ let v = convert_to_boolean(value)?;
366
+ ParquetValue::Boolean(v)
367
+ }
368
+ ParquetSchemaType::Date32 => {
369
+ let v = convert_to_date32(value, map_field.format)?;
370
+ ParquetValue::Date32(v)
371
+ }
372
+ ParquetSchemaType::TimestampMillis => {
373
+ let v = convert_to_timestamp_millis(value, map_field.format)?;
374
+ ParquetValue::TimestampMillis(v, None)
375
+ }
376
+ ParquetSchemaType::TimestampMicros => {
377
+ let v = convert_to_timestamp_micros(value, map_field.format)?;
378
+ ParquetValue::TimestampMicros(v, None)
379
+ }
380
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
381
+ return Err(MagnusError::new(
382
+ magnus::exception::type_error(),
383
+ "Map values cannot be lists or maps",
384
+ ))
385
+ }
386
+ };
387
+
388
+ map.insert(key_value, value_value);
389
+ }
390
+ Ok(map)
391
+ } else {
392
+ Err(MagnusError::new(
393
+ magnus::exception::type_error(),
394
+ "Invalid map format",
395
+ ))
396
+ }
397
+ }
398
+
399
+ macro_rules! impl_timestamp_to_arrow_conversion {
400
+ ($values:expr, $builder_type:ty, $variant:ident) => {{
401
+ let mut builder = <$builder_type>::with_capacity($values.len());
402
+ for value in $values {
403
+ match value {
404
+ ParquetValue::$variant(v, _tz) => builder.append_value(v),
405
+ ParquetValue::Null => builder.append_null(),
406
+ _ => {
407
+ return Err(MagnusError::new(
408
+ magnus::exception::type_error(),
409
+ format!("Expected {}, got {:?}", stringify!($variant), value),
410
+ ))
411
+ }
412
+ }
413
+ }
414
+ Ok(Arc::new(builder.finish()))
415
+ }};
416
+ }
417
+
418
+ #[macro_export]
419
+ macro_rules! impl_timestamp_array_conversion {
420
+ ($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
421
+ let array = downcast_array::<$array_type>($column);
422
+ if array.is_nullable() {
423
+ array
424
+ .values()
425
+ .iter()
426
+ .enumerate()
427
+ .map(|(i, x)| {
428
+ if array.is_null(i) {
429
+ ParquetValue::Null
430
+ } else {
431
+ ParquetValue::$variant(*x, $tz.clone().map(|s| s.into()))
432
+ }
433
+ })
434
+ .collect()
435
+ } else {
436
+ array
437
+ .values()
438
+ .iter()
439
+ .map(|x| ParquetValue::$variant(*x, $tz.clone().map(|s| s.into())))
440
+ .collect()
441
+ }
442
+ }};
443
+ }
444
+
445
+ #[macro_export]
446
+ macro_rules! impl_array_conversion {
447
+ ($values:expr, $builder_type:ty, $variant:ident) => {{
448
+ let mut builder = <$builder_type>::with_capacity($values.len());
449
+ for value in $values {
450
+ match value {
451
+ ParquetValue::$variant(v) => builder.append_value(v),
452
+ ParquetValue::Null => builder.append_null(),
453
+ _ => {
454
+ return Err(MagnusError::new(
455
+ magnus::exception::type_error(),
456
+ format!("Expected {}, got {:?}", stringify!($variant), value),
457
+ ))
458
+ }
459
+ }
460
+ }
461
+ Ok(Arc::new(builder.finish()))
462
+ }};
463
+ ($values:expr, $builder_type:ty, $variant:ident, $capacity:expr) => {{
464
+ let mut builder = <$builder_type>::with_capacity($values.len(), $capacity);
465
+ for value in $values {
466
+ match value {
467
+ ParquetValue::$variant(v) => builder.append_value(v),
468
+ ParquetValue::Null => builder.append_null(),
469
+ _ => {
470
+ return Err(MagnusError::new(
471
+ magnus::exception::type_error(),
472
+ format!("Expected {}, got {:?}", stringify!($variant), value),
473
+ ))
474
+ }
475
+ }
476
+ }
477
+ Ok(Arc::new(builder.finish()))
478
+ }};
479
+ }
480
+
481
+ #[macro_export]
482
+ macro_rules! append_list_value {
483
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
484
+ match (&$item_type, &$value) {
485
+ ($item_type, $value_variant(v)) => {
486
+ $list_builder
487
+ .values()
488
+ .as_any_mut()
489
+ .downcast_mut::<$builder_type>()
490
+ .unwrap()
491
+ .append_value(v.clone());
492
+ }
493
+ (_, ParquetValue::Null) => {
494
+ $list_builder.append_null();
495
+ }
496
+ _ => {
497
+ return Err(MagnusError::new(
498
+ magnus::exception::type_error(),
499
+ format!(
500
+ "Type mismatch in list: expected {:?}, got {:?}",
501
+ $item_type, $value
502
+ ),
503
+ ))
504
+ }
505
+ }
506
+ };
507
+ }
508
+
509
+ #[macro_export]
510
+ macro_rules! append_list_value_copy {
511
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
512
+ match (&$item_type, &$value) {
513
+ ($item_type, $value_variant(v)) => {
514
+ $list_builder
515
+ .values()
516
+ .as_any_mut()
517
+ .downcast_mut::<$builder_type>()
518
+ .unwrap()
519
+ .append_value(*v);
520
+ }
521
+ (_, ParquetValue::Null) => {
522
+ $list_builder.append_null();
523
+ }
524
+ _ => {
525
+ return Err(MagnusError::new(
526
+ magnus::exception::type_error(),
527
+ format!(
528
+ "Type mismatch in list: expected {:?}, got {:?}",
529
+ $item_type, $value
530
+ ),
531
+ ))
532
+ }
533
+ }
534
+ };
535
+ }
536
+
537
+ #[macro_export]
538
+ macro_rules! append_timestamp_list_value {
539
+ ($list_builder:expr, $item_type:path, $value:expr, $builder_type:ty, $value_variant:path) => {
540
+ match (&$item_type, &$value) {
541
+ ($item_type, $value_variant(v, _tz)) => {
542
+ $list_builder
543
+ .values()
544
+ .as_any_mut()
545
+ .downcast_mut::<$builder_type>()
546
+ .unwrap()
547
+ .append_value(*v);
548
+ }
549
+ (_, ParquetValue::Null) => {
550
+ $list_builder.append_null();
551
+ }
552
+ _ => {
553
+ return Err(MagnusError::new(
554
+ magnus::exception::type_error(),
555
+ format!(
556
+ "Type mismatch in list: expected {:?}, got {:?}",
557
+ $item_type, $value
558
+ ),
559
+ ))
560
+ }
561
+ }
562
+ };
563
+ }
564
+
565
+ pub fn convert_parquet_values_to_arrow(
566
+ values: Vec<ParquetValue>,
567
+ type_: &ParquetSchemaType,
568
+ ) -> Result<Arc<dyn Array>, MagnusError> {
569
+ match type_ {
570
+ ParquetSchemaType::Int8 => impl_array_conversion!(values, Int8Builder, Int8),
571
+ ParquetSchemaType::Int16 => impl_array_conversion!(values, Int16Builder, Int16),
572
+ ParquetSchemaType::Int32 => impl_array_conversion!(values, Int32Builder, Int32),
573
+ ParquetSchemaType::Int64 => impl_array_conversion!(values, Int64Builder, Int64),
574
+ ParquetSchemaType::UInt8 => impl_array_conversion!(values, UInt8Builder, UInt8),
575
+ ParquetSchemaType::UInt16 => impl_array_conversion!(values, UInt16Builder, UInt16),
576
+ ParquetSchemaType::UInt32 => impl_array_conversion!(values, UInt32Builder, UInt32),
577
+ ParquetSchemaType::UInt64 => impl_array_conversion!(values, UInt64Builder, UInt64),
578
+ ParquetSchemaType::Float => impl_array_conversion!(values, Float32Builder, Float32),
579
+ ParquetSchemaType::Double => impl_array_conversion!(values, Float64Builder, Float64),
580
+ ParquetSchemaType::String => {
581
+ impl_array_conversion!(values, StringBuilder, String, values.len() * 32)
582
+ }
583
+ ParquetSchemaType::Binary => {
584
+ impl_array_conversion!(values, BinaryBuilder, Bytes, values.len() * 32)
585
+ }
586
+ ParquetSchemaType::Boolean => impl_array_conversion!(values, BooleanBuilder, Boolean),
587
+ ParquetSchemaType::Date32 => impl_array_conversion!(values, Date32Builder, Date32),
588
+ ParquetSchemaType::TimestampMillis => {
589
+ impl_timestamp_to_arrow_conversion!(
590
+ values,
591
+ TimestampMillisecondBuilder,
592
+ TimestampMillis
593
+ )
594
+ }
595
+ ParquetSchemaType::TimestampMicros => {
596
+ impl_timestamp_to_arrow_conversion!(
597
+ values,
598
+ TimestampMicrosecondBuilder,
599
+ TimestampMicros
600
+ )
601
+ }
602
+ ParquetSchemaType::List(list_field) => {
603
+ let value_builder = match list_field.item_type {
604
+ ParquetSchemaType::Int8 => Box::new(Int8Builder::new()) as Box<dyn ArrayBuilder>,
605
+ ParquetSchemaType::Int16 => Box::new(Int16Builder::new()) as Box<dyn ArrayBuilder>,
606
+ ParquetSchemaType::Int32 => Box::new(Int32Builder::new()) as Box<dyn ArrayBuilder>,
607
+ ParquetSchemaType::Int64 => Box::new(Int64Builder::new()) as Box<dyn ArrayBuilder>,
608
+ ParquetSchemaType::UInt8 => Box::new(UInt8Builder::new()) as Box<dyn ArrayBuilder>,
609
+ ParquetSchemaType::UInt16 => {
610
+ Box::new(UInt16Builder::new()) as Box<dyn ArrayBuilder>
611
+ }
612
+ ParquetSchemaType::UInt32 => {
613
+ Box::new(UInt32Builder::new()) as Box<dyn ArrayBuilder>
614
+ }
615
+ ParquetSchemaType::UInt64 => {
616
+ Box::new(UInt64Builder::new()) as Box<dyn ArrayBuilder>
617
+ }
618
+ ParquetSchemaType::Float => {
619
+ Box::new(Float32Builder::new()) as Box<dyn ArrayBuilder>
620
+ }
621
+ ParquetSchemaType::Double => {
622
+ Box::new(Float64Builder::new()) as Box<dyn ArrayBuilder>
623
+ }
624
+ ParquetSchemaType::String => {
625
+ Box::new(StringBuilder::new()) as Box<dyn ArrayBuilder>
626
+ }
627
+ ParquetSchemaType::Binary => {
628
+ Box::new(BinaryBuilder::new()) as Box<dyn ArrayBuilder>
629
+ }
630
+ ParquetSchemaType::Boolean => {
631
+ Box::new(BooleanBuilder::new()) as Box<dyn ArrayBuilder>
632
+ }
633
+ ParquetSchemaType::Date32 => {
634
+ Box::new(Date32Builder::new()) as Box<dyn ArrayBuilder>
635
+ }
636
+ ParquetSchemaType::TimestampMillis => {
637
+ Box::new(TimestampMillisecondBuilder::new()) as Box<dyn ArrayBuilder>
638
+ }
639
+ ParquetSchemaType::TimestampMicros => {
640
+ Box::new(TimestampMicrosecondBuilder::new()) as Box<dyn ArrayBuilder>
641
+ }
642
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
643
+ return Err(MagnusError::new(
644
+ magnus::exception::type_error(),
645
+ "Nested lists and maps are not supported",
646
+ ))
647
+ }
648
+ };
649
+
650
+ let mut list_builder = ListBuilder::new(value_builder);
651
+
652
+ for value in values {
653
+ match value {
654
+ ParquetValue::List(items) => {
655
+ for item in items {
656
+ match &list_field.item_type {
657
+ ParquetSchemaType::Int8 => append_list_value_copy!(
658
+ list_builder,
659
+ ParquetSchemaType::Int8,
660
+ item,
661
+ Int8Builder,
662
+ ParquetValue::Int8
663
+ ),
664
+ ParquetSchemaType::Int16 => append_list_value_copy!(
665
+ list_builder,
666
+ ParquetSchemaType::Int16,
667
+ item,
668
+ Int16Builder,
669
+ ParquetValue::Int16
670
+ ),
671
+ ParquetSchemaType::Int32 => append_list_value_copy!(
672
+ list_builder,
673
+ ParquetSchemaType::Int32,
674
+ item,
675
+ Int32Builder,
676
+ ParquetValue::Int32
677
+ ),
678
+ ParquetSchemaType::Int64 => append_list_value_copy!(
679
+ list_builder,
680
+ ParquetSchemaType::Int64,
681
+ item,
682
+ Int64Builder,
683
+ ParquetValue::Int64
684
+ ),
685
+ ParquetSchemaType::UInt8 => append_list_value_copy!(
686
+ list_builder,
687
+ ParquetSchemaType::UInt8,
688
+ item,
689
+ UInt8Builder,
690
+ ParquetValue::UInt8
691
+ ),
692
+ ParquetSchemaType::UInt16 => append_list_value_copy!(
693
+ list_builder,
694
+ ParquetSchemaType::UInt16,
695
+ item,
696
+ UInt16Builder,
697
+ ParquetValue::UInt16
698
+ ),
699
+ ParquetSchemaType::UInt32 => append_list_value_copy!(
700
+ list_builder,
701
+ ParquetSchemaType::UInt32,
702
+ item,
703
+ UInt32Builder,
704
+ ParquetValue::UInt32
705
+ ),
706
+ ParquetSchemaType::UInt64 => append_list_value_copy!(
707
+ list_builder,
708
+ ParquetSchemaType::UInt64,
709
+ item,
710
+ UInt64Builder,
711
+ ParquetValue::UInt64
712
+ ),
713
+ ParquetSchemaType::Float => append_list_value_copy!(
714
+ list_builder,
715
+ ParquetSchemaType::Float,
716
+ item,
717
+ Float32Builder,
718
+ ParquetValue::Float32
719
+ ),
720
+ ParquetSchemaType::Double => append_list_value_copy!(
721
+ list_builder,
722
+ ParquetSchemaType::Double,
723
+ item,
724
+ Float64Builder,
725
+ ParquetValue::Float64
726
+ ),
727
+ ParquetSchemaType::String => append_list_value!(
728
+ list_builder,
729
+ ParquetSchemaType::String,
730
+ item,
731
+ StringBuilder,
732
+ ParquetValue::String
733
+ ),
734
+ ParquetSchemaType::Binary => append_list_value!(
735
+ list_builder,
736
+ ParquetSchemaType::Binary,
737
+ item,
738
+ BinaryBuilder,
739
+ ParquetValue::Bytes
740
+ ),
741
+ ParquetSchemaType::Boolean => append_list_value_copy!(
742
+ list_builder,
743
+ ParquetSchemaType::Boolean,
744
+ item,
745
+ BooleanBuilder,
746
+ ParquetValue::Boolean
747
+ ),
748
+ ParquetSchemaType::Date32 => append_list_value_copy!(
749
+ list_builder,
750
+ ParquetSchemaType::Date32,
751
+ item,
752
+ Date32Builder,
753
+ ParquetValue::Date32
754
+ ),
755
+ ParquetSchemaType::TimestampMillis => append_timestamp_list_value!(
756
+ list_builder,
757
+ ParquetSchemaType::TimestampMillis,
758
+ item,
759
+ TimestampMillisecondBuilder,
760
+ ParquetValue::TimestampMillis
761
+ ),
762
+ ParquetSchemaType::TimestampMicros => append_timestamp_list_value!(
763
+ list_builder,
764
+ ParquetSchemaType::TimestampMicros,
765
+ item,
766
+ TimestampMicrosecondBuilder,
767
+ ParquetValue::TimestampMicros
768
+ ),
769
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => {
770
+ return Err(MagnusError::new(
771
+ magnus::exception::type_error(),
772
+ "Nested lists and maps are not supported",
773
+ ))
774
+ }
775
+ }
776
+ }
777
+ }
778
+ ParquetValue::Null => list_builder.append_null(),
779
+ _ => {
780
+ return Err(MagnusError::new(
781
+ magnus::exception::type_error(),
782
+ format!("Expected List, got {:?}", value),
783
+ ))
784
+ }
785
+ }
786
+ }
787
+ Ok(Arc::new(list_builder.finish()))
788
+ }
789
+ ParquetSchemaType::Map(_map_field) => {
790
+ unimplemented!("Writing maps is not yet supported")
791
+ }
792
+ }
793
+ }
794
+
795
+ pub fn convert_ruby_array_to_arrow(
796
+ values: RArray,
797
+ type_: &ParquetSchemaType,
798
+ ) -> Result<Arc<dyn Array>, MagnusError> {
799
+ let mut parquet_values = Vec::with_capacity(values.len());
800
+ for value in values {
801
+ if value.is_nil() {
802
+ parquet_values.push(ParquetValue::Null);
803
+ continue;
804
+ }
805
+ let parquet_value = ParquetValue::from_value(value, type_)?;
806
+ parquet_values.push(parquet_value);
807
+ }
808
+ convert_parquet_values_to_arrow(parquet_values, type_)
809
+ }