parquet 0.0.5 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,458 @@
1
+ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
+
3
+ use super::*;
4
+
5
+ #[derive(Debug, Clone)]
6
+ pub enum ParquetValue {
7
+ Int8(i8),
8
+ Int16(i16),
9
+ Int32(i32),
10
+ Int64(i64),
11
+ UInt8(u8),
12
+ UInt16(u16),
13
+ UInt32(u32),
14
+ UInt64(u64),
15
+ Float16(f32), // f16 converted to f32
16
+ Float32(f32),
17
+ Float64(f64),
18
+ Boolean(bool),
19
+ String(String),
20
+ Bytes(Vec<u8>),
21
+ Date32(i32),
22
+ Date64(i64),
23
+ TimestampSecond(i64, Option<Arc<str>>),
24
+ TimestampMillis(i64, Option<Arc<str>>),
25
+ TimestampMicros(i64, Option<Arc<str>>),
26
+ TimestampNanos(i64, Option<Arc<str>>),
27
+ List(Vec<ParquetValue>),
28
+ Map(HashMap<ParquetValue, ParquetValue>),
29
+ Null,
30
+ }
31
+
32
+ impl PartialEq for ParquetValue {
33
+ fn eq(&self, other: &Self) -> bool {
34
+ match (self, other) {
35
+ (ParquetValue::Int8(a), ParquetValue::Int8(b)) => a == b,
36
+ (ParquetValue::Int16(a), ParquetValue::Int16(b)) => a == b,
37
+ (ParquetValue::Int32(a), ParquetValue::Int32(b)) => a == b,
38
+ (ParquetValue::Int64(a), ParquetValue::Int64(b)) => a == b,
39
+ (ParquetValue::UInt8(a), ParquetValue::UInt8(b)) => a == b,
40
+ (ParquetValue::UInt16(a), ParquetValue::UInt16(b)) => a == b,
41
+ (ParquetValue::UInt32(a), ParquetValue::UInt32(b)) => a == b,
42
+ (ParquetValue::UInt64(a), ParquetValue::UInt64(b)) => a == b,
43
+ (ParquetValue::Float16(a), ParquetValue::Float16(b)) => a == b,
44
+ (ParquetValue::Float32(a), ParquetValue::Float32(b)) => a == b,
45
+ (ParquetValue::Float64(a), ParquetValue::Float64(b)) => a == b,
46
+ (ParquetValue::Boolean(a), ParquetValue::Boolean(b)) => a == b,
47
+ (ParquetValue::String(a), ParquetValue::String(b)) => a == b,
48
+ (ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
49
+ (ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
50
+ (ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
51
+ (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
52
+ (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
53
+ (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
54
+ (ParquetValue::TimestampNanos(a, _), ParquetValue::TimestampNanos(b, _)) => a == b,
55
+ (ParquetValue::List(a), ParquetValue::List(b)) => a == b,
56
+ (ParquetValue::Null, ParquetValue::Null) => true,
57
+ _ => false,
58
+ }
59
+ }
60
+ }
61
+
62
+ impl Eq for ParquetValue {}
63
+
64
+ impl std::hash::Hash for ParquetValue {
65
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
66
+ match self {
67
+ ParquetValue::Int8(i) => i.hash(state),
68
+ ParquetValue::Int16(i) => i.hash(state),
69
+ ParquetValue::Int32(i) => i.hash(state),
70
+ ParquetValue::Int64(i) => i.hash(state),
71
+ ParquetValue::UInt8(i) => i.hash(state),
72
+ ParquetValue::UInt16(i) => i.hash(state),
73
+ ParquetValue::UInt32(i) => i.hash(state),
74
+ ParquetValue::UInt64(i) => i.hash(state),
75
+ ParquetValue::Float16(f) => f.to_bits().hash(state),
76
+ ParquetValue::Float32(f) => f.to_bits().hash(state),
77
+ ParquetValue::Float64(f) => f.to_bits().hash(state),
78
+ ParquetValue::Boolean(b) => b.hash(state),
79
+ ParquetValue::String(s) => s.hash(state),
80
+ ParquetValue::Bytes(b) => b.hash(state),
81
+ ParquetValue::Date32(d) => d.hash(state),
82
+ ParquetValue::Date64(d) => d.hash(state),
83
+ ParquetValue::TimestampSecond(ts, tz) => {
84
+ ts.hash(state);
85
+ tz.hash(state);
86
+ }
87
+ ParquetValue::TimestampMillis(ts, tz) => {
88
+ ts.hash(state);
89
+ tz.hash(state);
90
+ }
91
+ ParquetValue::TimestampMicros(ts, tz) => {
92
+ ts.hash(state);
93
+ tz.hash(state);
94
+ }
95
+ ParquetValue::TimestampNanos(ts, tz) => {
96
+ ts.hash(state);
97
+ tz.hash(state);
98
+ }
99
+ ParquetValue::List(l) => l.hash(state),
100
+ ParquetValue::Map(_m) => panic!("Map is not hashable"),
101
+ ParquetValue::Null => 0_i32.hash(state),
102
+ }
103
+ }
104
+ }
105
+
106
+ impl IntoValue for ParquetValue {
107
+ fn into_value_with(self, handle: &Ruby) -> Value {
108
+ match self {
109
+ ParquetValue::Int8(i) => i.into_value_with(handle),
110
+ ParquetValue::Int16(i) => i.into_value_with(handle),
111
+ ParquetValue::Int32(i) => i.into_value_with(handle),
112
+ ParquetValue::Int64(i) => i.into_value_with(handle),
113
+ ParquetValue::UInt8(i) => i.into_value_with(handle),
114
+ ParquetValue::UInt16(i) => i.into_value_with(handle),
115
+ ParquetValue::UInt32(i) => i.into_value_with(handle),
116
+ ParquetValue::UInt64(i) => i.into_value_with(handle),
117
+ ParquetValue::Float16(f) => f.into_value_with(handle),
118
+ ParquetValue::Float32(f) => f.into_value_with(handle),
119
+ ParquetValue::Float64(f) => f.into_value_with(handle),
120
+ ParquetValue::Boolean(b) => b.into_value_with(handle),
121
+ ParquetValue::String(s) => s.into_value_with(handle),
122
+ ParquetValue::Bytes(b) => handle.str_from_slice(&b).as_value(),
123
+ ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
124
+ ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
125
+ timestamp @ ParquetValue::TimestampSecond(_, _) => {
126
+ impl_timestamp_conversion!(timestamp, TimestampSecond, handle)
127
+ }
128
+ timestamp @ ParquetValue::TimestampMillis(_, _) => {
129
+ impl_timestamp_conversion!(timestamp, TimestampMillis, handle)
130
+ }
131
+ timestamp @ ParquetValue::TimestampMicros(_, _) => {
132
+ impl_timestamp_conversion!(timestamp, TimestampMicros, handle)
133
+ }
134
+ timestamp @ ParquetValue::TimestampNanos(_, _) => {
135
+ impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
136
+ }
137
+ ParquetValue::List(l) => {
138
+ let ary = handle.ary_new_capa(l.len());
139
+ l.into_iter()
140
+ .try_for_each(|v| ary.push(v.into_value_with(handle)))
141
+ .unwrap();
142
+ ary.into_value_with(handle)
143
+ }
144
+ ParquetValue::Map(m) => {
145
+ let hash = handle.hash_new_capa(m.len());
146
+ m.into_iter()
147
+ .try_for_each(|(k, v)| {
148
+ hash.aset(k.into_value_with(handle), v.into_value_with(handle))
149
+ })
150
+ .unwrap();
151
+ hash.into_value_with(handle)
152
+ }
153
+ ParquetValue::Null => handle.qnil().as_value(),
154
+ }
155
+ }
156
+ }
157
+
158
+ impl ParquetValue {
159
+ pub fn from_value(value: Value, type_: &ParquetSchemaType) -> Result<Self, MagnusError> {
160
+ match type_ {
161
+ ParquetSchemaType::Int8 => {
162
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
163
+ Ok(ParquetValue::Int8(v))
164
+ }
165
+ ParquetSchemaType::Int16 => {
166
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
167
+ Ok(ParquetValue::Int16(v))
168
+ }
169
+ ParquetSchemaType::Int32 => {
170
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
171
+ Ok(ParquetValue::Int32(v))
172
+ }
173
+ ParquetSchemaType::Int64 => {
174
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
175
+ Ok(ParquetValue::Int64(v))
176
+ }
177
+ ParquetSchemaType::UInt8 => {
178
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
179
+ Ok(ParquetValue::UInt8(v))
180
+ }
181
+ ParquetSchemaType::UInt16 => {
182
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
183
+ Ok(ParquetValue::UInt16(v))
184
+ }
185
+ ParquetSchemaType::UInt32 => {
186
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
187
+ Ok(ParquetValue::UInt32(v))
188
+ }
189
+ ParquetSchemaType::UInt64 => {
190
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
191
+ Ok(ParquetValue::UInt64(v))
192
+ }
193
+ ParquetSchemaType::Float => {
194
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
195
+ Ok(ParquetValue::Float32(v))
196
+ }
197
+ ParquetSchemaType::Double => {
198
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
199
+ Ok(ParquetValue::Float64(v))
200
+ }
201
+ ParquetSchemaType::String => {
202
+ let v = String::try_convert(value)?;
203
+ Ok(ParquetValue::String(v))
204
+ }
205
+ ParquetSchemaType::Binary => {
206
+ let v = convert_to_binary(value)?;
207
+ Ok(ParquetValue::Bytes(v))
208
+ }
209
+ ParquetSchemaType::Boolean => {
210
+ let v = convert_to_boolean(value)?;
211
+ Ok(ParquetValue::Boolean(v))
212
+ }
213
+ ParquetSchemaType::Date32 => {
214
+ let v = convert_to_date32(value)?;
215
+ Ok(ParquetValue::Date32(v))
216
+ }
217
+ ParquetSchemaType::TimestampMillis => {
218
+ let v = convert_to_timestamp_millis(value)?;
219
+ Ok(ParquetValue::TimestampMillis(v, None))
220
+ }
221
+ ParquetSchemaType::TimestampMicros => {
222
+ let v = convert_to_timestamp_micros(value)?;
223
+ Ok(ParquetValue::TimestampMicros(v, None))
224
+ }
225
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(
226
+ magnus::exception::type_error(),
227
+ "Nested lists and maps are not supported",
228
+ )),
229
+ }
230
+ }
231
+ }
232
+
233
+ #[derive(Debug)]
234
+ pub struct ParquetValueVec(Vec<ParquetValue>);
235
+
236
+ impl ParquetValueVec {
237
+ pub fn into_inner(self) -> Vec<ParquetValue> {
238
+ self.0
239
+ }
240
+ }
241
+
242
+ impl IntoIterator for ParquetValueVec {
243
+ type Item = ParquetValue;
244
+ type IntoIter = std::vec::IntoIter<ParquetValue>;
245
+
246
+ fn into_iter(self) -> Self::IntoIter {
247
+ self.0.into_iter()
248
+ }
249
+ }
250
+
251
+ impl std::cmp::PartialEq for ParquetValueVec {
252
+ fn eq(&self, other: &Self) -> bool {
253
+ self.0 == other.0
254
+ }
255
+ }
256
+
257
+ impl std::cmp::Eq for ParquetValueVec {}
258
+
259
+ impl TryFrom<Arc<dyn Array>> for ParquetValueVec {
260
+ type Error = String;
261
+
262
+ fn try_from(column: Arc<dyn Array>) -> Result<Self, Self::Error> {
263
+ ParquetValueVec::try_from(&*column)
264
+ }
265
+ }
266
+
267
+ macro_rules! impl_numeric_array_conversion {
268
+ ($column:expr, $array_type:ty, $variant:ident) => {{
269
+ let array = downcast_array::<$array_type>($column);
270
+ if array.is_nullable() {
271
+ array
272
+ .values()
273
+ .iter()
274
+ .enumerate()
275
+ .map(|(i, x)| {
276
+ if array.is_null(i) {
277
+ ParquetValue::Null
278
+ } else {
279
+ ParquetValue::$variant(*x)
280
+ }
281
+ })
282
+ .collect()
283
+ } else {
284
+ array
285
+ .values()
286
+ .iter()
287
+ .map(|x| ParquetValue::$variant(*x))
288
+ .collect()
289
+ }
290
+ }};
291
+ }
292
+ macro_rules! impl_boolean_array_conversion {
293
+ ($column:expr, $array_type:ty, $variant:ident) => {{
294
+ let array = downcast_array::<$array_type>($column);
295
+ if array.is_nullable() {
296
+ array
297
+ .values()
298
+ .iter()
299
+ .enumerate()
300
+ .map(|(i, x)| {
301
+ if array.is_null(i) {
302
+ ParquetValue::Null
303
+ } else {
304
+ ParquetValue::$variant(x)
305
+ }
306
+ })
307
+ .collect()
308
+ } else {
309
+ array
310
+ .values()
311
+ .iter()
312
+ .map(|x| ParquetValue::$variant(x))
313
+ .collect()
314
+ }
315
+ }};
316
+ }
317
+
318
+ impl TryFrom<&dyn Array> for ParquetValueVec {
319
+ type Error = String;
320
+
321
+ fn try_from(column: &dyn Array) -> Result<Self, Self::Error> {
322
+ let tmp_vec = match column.data_type() {
323
+ DataType::Boolean => impl_boolean_array_conversion!(column, BooleanArray, Boolean),
324
+ DataType::Int8 => impl_numeric_array_conversion!(column, Int8Array, Int8),
325
+ DataType::Int16 => impl_numeric_array_conversion!(column, Int16Array, Int16),
326
+ DataType::Int32 => impl_numeric_array_conversion!(column, Int32Array, Int32),
327
+ DataType::Int64 => impl_numeric_array_conversion!(column, Int64Array, Int64),
328
+ DataType::UInt8 => impl_numeric_array_conversion!(column, UInt8Array, UInt8),
329
+ DataType::UInt16 => impl_numeric_array_conversion!(column, UInt16Array, UInt16),
330
+ DataType::UInt32 => impl_numeric_array_conversion!(column, UInt32Array, UInt32),
331
+ DataType::UInt64 => impl_numeric_array_conversion!(column, UInt64Array, UInt64),
332
+ DataType::Float32 => impl_numeric_array_conversion!(column, Float32Array, Float32),
333
+ DataType::Float64 => impl_numeric_array_conversion!(column, Float64Array, Float64),
334
+ DataType::Date32 => impl_numeric_array_conversion!(column, Date32Array, Date32),
335
+ DataType::Date64 => impl_numeric_array_conversion!(column, Date64Array, Date64),
336
+ DataType::Timestamp(TimeUnit::Second, tz) => {
337
+ impl_timestamp_array_conversion!(column, TimestampSecondArray, TimestampSecond, tz)
338
+ }
339
+ DataType::Timestamp(TimeUnit::Millisecond, tz) => {
340
+ impl_timestamp_array_conversion!(
341
+ column,
342
+ TimestampMillisecondArray,
343
+ TimestampMillis,
344
+ tz
345
+ )
346
+ }
347
+ DataType::Timestamp(TimeUnit::Microsecond, tz) => {
348
+ impl_timestamp_array_conversion!(
349
+ column,
350
+ TimestampMicrosecondArray,
351
+ TimestampMicros,
352
+ tz
353
+ )
354
+ }
355
+ DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
356
+ impl_timestamp_array_conversion!(
357
+ column,
358
+ TimestampNanosecondArray,
359
+ TimestampNanos,
360
+ tz
361
+ )
362
+ }
363
+ DataType::Float16 => {
364
+ let array = downcast_array::<Float16Array>(column);
365
+ if array.is_nullable() {
366
+ array
367
+ .values()
368
+ .iter()
369
+ .enumerate()
370
+ .map(|(i, x)| {
371
+ if array.is_null(i) {
372
+ ParquetValue::Null
373
+ } else {
374
+ ParquetValue::Float16(f32::from(*x))
375
+ }
376
+ })
377
+ .collect()
378
+ } else {
379
+ array
380
+ .values()
381
+ .iter()
382
+ .map(|x| ParquetValue::Float16(f32::from(*x)))
383
+ .collect()
384
+ }
385
+ }
386
+ DataType::Utf8 => {
387
+ let array = downcast_array::<StringArray>(column);
388
+ array
389
+ .iter()
390
+ .map(|opt_x| match opt_x {
391
+ Some(x) => ParquetValue::String(x.to_string()),
392
+ None => ParquetValue::Null,
393
+ })
394
+ .collect()
395
+ }
396
+ DataType::Binary => {
397
+ let array = downcast_array::<BinaryArray>(column);
398
+ array
399
+ .iter()
400
+ .map(|opt_x| match opt_x {
401
+ Some(x) => ParquetValue::Bytes(x.to_vec()),
402
+ None => ParquetValue::Null,
403
+ })
404
+ .collect()
405
+ }
406
+ DataType::List(_field) => {
407
+ let list_array = downcast_array::<ListArray>(column);
408
+ list_array
409
+ .iter()
410
+ .map(|x| match x {
411
+ Some(values) => match ParquetValueVec::try_from(values) {
412
+ Ok(vec) => ParquetValue::List(vec.into_inner()),
413
+ Err(e) => {
414
+ panic!("Error converting list array to ParquetValueVec: {}", e)
415
+ }
416
+ },
417
+ None => ParquetValue::Null,
418
+ })
419
+ .collect()
420
+ }
421
+ DataType::Struct(_) => {
422
+ let struct_array = downcast_array::<StructArray>(column);
423
+ let mut values = Vec::with_capacity(struct_array.len());
424
+ for i in 0..struct_array.len() {
425
+ if struct_array.is_null(i) {
426
+ values.push(ParquetValue::Null);
427
+ continue;
428
+ }
429
+
430
+ let mut map = std::collections::HashMap::new();
431
+ for (field_idx, field) in struct_array.fields().iter().enumerate() {
432
+ let column = struct_array.column(field_idx);
433
+ let field_values = match ParquetValueVec::try_from(column.slice(i, 1)) {
434
+ Ok(vec) => vec.into_inner(),
435
+ Err(e) => {
436
+ panic!("Error converting struct field to ParquetValueVec: {}", e)
437
+ }
438
+ };
439
+ map.insert(
440
+ ParquetValue::String(field.name().to_string()),
441
+ field_values.into_iter().next().unwrap(),
442
+ );
443
+ }
444
+ values.push(ParquetValue::Map(map));
445
+ }
446
+ values
447
+ }
448
+ DataType::Null => {
449
+ let x = downcast_array::<NullArray>(column);
450
+ vec![ParquetValue::Null; x.len()]
451
+ }
452
+ _ => {
453
+ return Err(format!("Unsupported data type: {:?}", column.data_type()));
454
+ }
455
+ };
456
+ Ok(ParquetValueVec(tmp_vec))
457
+ }
458
+ }
@@ -0,0 +1,204 @@
1
+ use itertools::Itertools;
2
+
3
+ use super::*;
4
+
5
+ #[derive(Debug)]
6
+ pub enum RowRecord<S: BuildHasher + Default> {
7
+ Vec(Vec<ParquetField>),
8
+ Map(HashMap<StringCacheKey, ParquetField, S>),
9
+ }
10
+
11
+ #[derive(Debug)]
12
+ pub enum ColumnRecord<S: BuildHasher + Default> {
13
+ Vec(Vec<Vec<ParquetValue>>),
14
+ Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
15
+ }
16
+
17
+ #[derive(Debug)]
18
+ pub struct ParquetField(pub Field);
19
+
20
+ impl<S: BuildHasher + Default> IntoValue for RowRecord<S> {
21
+ fn into_value_with(self, handle: &Ruby) -> Value {
22
+ match self {
23
+ RowRecord::Vec(vec) => {
24
+ let ary = handle.ary_new_capa(vec.len());
25
+ vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
26
+ handle.into_value(ary)
27
+ }
28
+ RowRecord::Map(map) => {
29
+ let hash = handle.hash_new_capa(map.len());
30
+
31
+ let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
32
+ let mut i = 0;
33
+
34
+ for chunk in &map.into_iter().chunks(64) {
35
+ // Reduced to 64 to ensure space for pairs
36
+ for (k, v) in chunk {
37
+ if i + 1 >= values.len() {
38
+ // Bulk insert current batch if array is full
39
+ hash.bulk_insert(&values[..i]).unwrap();
40
+ values[..i].fill(handle.qnil().as_value());
41
+ i = 0;
42
+ }
43
+ values[i] = handle.into_value(k);
44
+ values[i + 1] = handle.into_value(v);
45
+ i += 2;
46
+ }
47
+ // Insert any remaining pairs
48
+ if i > 0 {
49
+ hash.bulk_insert(&values[..i]).unwrap();
50
+ values[..i].fill(handle.qnil().as_value());
51
+ i = 0;
52
+ }
53
+ }
54
+
55
+ hash.into_value_with(handle)
56
+ }
57
+ }
58
+ }
59
+ }
60
+
61
+ impl<S: BuildHasher + Default> IntoValue for ColumnRecord<S> {
62
+ fn into_value_with(self, handle: &Ruby) -> Value {
63
+ match self {
64
+ ColumnRecord::Vec(vec) => {
65
+ let ary = handle.ary_new_capa(vec.len());
66
+ vec.into_iter()
67
+ .try_for_each(|v| {
68
+ let nested_ary = handle.ary_new_capa(v.len());
69
+ v.into_iter().try_for_each(|v| nested_ary.push(v)).unwrap();
70
+ ary.push(nested_ary.into_value_with(handle))
71
+ })
72
+ .unwrap();
73
+ ary.into_value_with(handle)
74
+ }
75
+ ColumnRecord::Map(map) => {
76
+ let hash = handle.hash_new_capa(map.len());
77
+
78
+ let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
79
+ let mut i = 0;
80
+
81
+ for chunk in &map.into_iter().chunks(64) {
82
+ // Reduced to 64 to ensure space for pairs
83
+ for (k, v) in chunk {
84
+ if i + 1 >= values.len() {
85
+ // Bulk insert current batch if array is full
86
+ hash.bulk_insert(&values[..i]).unwrap();
87
+ values[..i].fill(handle.qnil().as_value());
88
+ i = 0;
89
+ }
90
+ values[i] = handle.into_value(k);
91
+ let ary = handle.ary_new_capa(v.len());
92
+ v.into_iter().try_for_each(|v| ary.push(v)).unwrap();
93
+ values[i + 1] = handle.into_value(ary);
94
+ i += 2;
95
+ }
96
+ // Insert any remaining pairs
97
+ if i > 0 {
98
+ hash.bulk_insert(&values[..i]).unwrap();
99
+ values[..i].fill(handle.qnil().as_value());
100
+ i = 0;
101
+ }
102
+ }
103
+
104
+ hash.into_value_with(handle)
105
+ }
106
+ }
107
+ }
108
+ }
109
+
110
+ impl IntoValue for ParquetField {
111
+ fn into_value_with(self, handle: &Ruby) -> Value {
112
+ match self.0 {
113
+ Field::Null => handle.qnil().as_value(),
114
+ Field::Bool(b) => b.into_value_with(handle),
115
+ Field::Short(s) => s.into_value_with(handle),
116
+ Field::Int(i) => i.into_value_with(handle),
117
+ Field::Long(l) => l.into_value_with(handle),
118
+ Field::UByte(ub) => ub.into_value_with(handle),
119
+ Field::UShort(us) => us.into_value_with(handle),
120
+ Field::UInt(ui) => ui.into_value_with(handle),
121
+ Field::ULong(ul) => ul.into_value_with(handle),
122
+ Field::Float16(f) => f32::from(f).into_value_with(handle),
123
+ Field::Float(f) => f.into_value_with(handle),
124
+ Field::Double(d) => d.into_value_with(handle),
125
+ Field::Str(s) => s.into_value_with(handle),
126
+ Field::Byte(b) => b.into_value_with(handle),
127
+ Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
128
+ Field::Date(d) => {
129
+ let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
130
+ let formatted = ts.strftime("%Y-%m-%d").to_string();
131
+ formatted.into_value_with(handle)
132
+ }
133
+ Field::TimestampMillis(ts) => {
134
+ let ts = jiff::Timestamp::from_millisecond(ts).unwrap();
135
+ let time_class = handle.class_time();
136
+ time_class
137
+ .funcall::<_, _, Value>("parse", (ts.to_string(),))
138
+ .unwrap()
139
+ .into_value_with(handle)
140
+ }
141
+ Field::TimestampMicros(ts) => {
142
+ let ts = jiff::Timestamp::from_microsecond(ts).unwrap();
143
+ let time_class = handle.class_time();
144
+ time_class
145
+ .funcall::<_, _, Value>("parse", (ts.to_string(),))
146
+ .unwrap()
147
+ .into_value_with(handle)
148
+ }
149
+ Field::ListInternal(list) => {
150
+ let elements = list.elements();
151
+ let ary = handle.ary_new_capa(elements.len());
152
+ elements
153
+ .iter()
154
+ .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
155
+ .unwrap();
156
+ ary.into_value_with(handle)
157
+ }
158
+ Field::MapInternal(map) => {
159
+ let entries = map.entries();
160
+ let hash = handle.hash_new_capa(entries.len());
161
+ entries
162
+ .iter()
163
+ .try_for_each(|(k, v)| {
164
+ hash.aset(
165
+ ParquetField(k.clone()).into_value_with(handle),
166
+ ParquetField(v.clone()).into_value_with(handle),
167
+ )
168
+ })
169
+ .unwrap();
170
+ hash.into_value_with(handle)
171
+ }
172
+ Field::Decimal(d) => {
173
+ let value = match d {
174
+ Decimal::Int32 { value, scale, .. } => {
175
+ let unscaled = i32::from_be_bytes(value);
176
+ format!("{}e-{}", unscaled, scale)
177
+ }
178
+ Decimal::Int64 { value, scale, .. } => {
179
+ let unscaled = i64::from_be_bytes(value);
180
+ format!("{}e-{}", unscaled, scale)
181
+ }
182
+ Decimal::Bytes { value, scale, .. } => {
183
+ // Convert bytes to string representation of unscaled value
184
+ let unscaled = String::from_utf8_lossy(value.data());
185
+ format!("{}e-{}", unscaled, scale)
186
+ }
187
+ };
188
+ handle.eval(&format!("BigDecimal(\"{value}\")")).unwrap()
189
+ }
190
+ Field::Group(row) => {
191
+ let hash = handle.hash_new();
192
+ row.get_column_iter()
193
+ .try_for_each(|(k, v)| {
194
+ hash.aset(
195
+ k.clone().into_value_with(handle),
196
+ ParquetField(v.clone()).into_value_with(handle),
197
+ )
198
+ })
199
+ .unwrap();
200
+ hash.into_value_with(handle)
201
+ }
202
+ }
203
+ }
204
+ }