parquet 0.0.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,462 @@
1
+ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
+
3
+ use super::*;
4
+
5
+ #[derive(Debug, Clone)]
6
+ pub enum ParquetValue {
7
+ Int8(i8),
8
+ Int16(i16),
9
+ Int32(i32),
10
+ Int64(i64),
11
+ UInt8(u8),
12
+ UInt16(u16),
13
+ UInt32(u32),
14
+ UInt64(u64),
15
+ Float16(f32), // f16 converted to f32
16
+ Float32(f32),
17
+ Float64(f64),
18
+ Boolean(bool),
19
+ String(String),
20
+ Bytes(Vec<u8>),
21
+ Date32(i32),
22
+ Date64(i64),
23
+ TimestampSecond(i64, Option<Arc<str>>),
24
+ TimestampMillis(i64, Option<Arc<str>>),
25
+ TimestampMicros(i64, Option<Arc<str>>),
26
+ TimestampNanos(i64, Option<Arc<str>>),
27
+ List(Vec<ParquetValue>),
28
+ Map(HashMap<ParquetValue, ParquetValue>),
29
+ Null,
30
+ }
31
+
32
+ impl PartialEq for ParquetValue {
33
+ fn eq(&self, other: &Self) -> bool {
34
+ match (self, other) {
35
+ (ParquetValue::Int8(a), ParquetValue::Int8(b)) => a == b,
36
+ (ParquetValue::Int16(a), ParquetValue::Int16(b)) => a == b,
37
+ (ParquetValue::Int32(a), ParquetValue::Int32(b)) => a == b,
38
+ (ParquetValue::Int64(a), ParquetValue::Int64(b)) => a == b,
39
+ (ParquetValue::UInt8(a), ParquetValue::UInt8(b)) => a == b,
40
+ (ParquetValue::UInt16(a), ParquetValue::UInt16(b)) => a == b,
41
+ (ParquetValue::UInt32(a), ParquetValue::UInt32(b)) => a == b,
42
+ (ParquetValue::UInt64(a), ParquetValue::UInt64(b)) => a == b,
43
+ (ParquetValue::Float16(a), ParquetValue::Float16(b)) => a == b,
44
+ (ParquetValue::Float32(a), ParquetValue::Float32(b)) => a == b,
45
+ (ParquetValue::Float64(a), ParquetValue::Float64(b)) => a == b,
46
+ (ParquetValue::Boolean(a), ParquetValue::Boolean(b)) => a == b,
47
+ (ParquetValue::String(a), ParquetValue::String(b)) => a == b,
48
+ (ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
49
+ (ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
50
+ (ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
51
+ (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
52
+ (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
53
+ (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
54
+ (ParquetValue::TimestampNanos(a, _), ParquetValue::TimestampNanos(b, _)) => a == b,
55
+ (ParquetValue::List(a), ParquetValue::List(b)) => a == b,
56
+ (ParquetValue::Null, ParquetValue::Null) => true,
57
+ _ => false,
58
+ }
59
+ }
60
+ }
61
+
62
+ impl Eq for ParquetValue {}
63
+
64
+ impl std::hash::Hash for ParquetValue {
65
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
66
+ match self {
67
+ ParquetValue::Int8(i) => i.hash(state),
68
+ ParquetValue::Int16(i) => i.hash(state),
69
+ ParquetValue::Int32(i) => i.hash(state),
70
+ ParquetValue::Int64(i) => i.hash(state),
71
+ ParquetValue::UInt8(i) => i.hash(state),
72
+ ParquetValue::UInt16(i) => i.hash(state),
73
+ ParquetValue::UInt32(i) => i.hash(state),
74
+ ParquetValue::UInt64(i) => i.hash(state),
75
+ ParquetValue::Float16(f) => f.to_bits().hash(state),
76
+ ParquetValue::Float32(f) => f.to_bits().hash(state),
77
+ ParquetValue::Float64(f) => f.to_bits().hash(state),
78
+ ParquetValue::Boolean(b) => b.hash(state),
79
+ ParquetValue::String(s) => s.hash(state),
80
+ ParquetValue::Bytes(b) => b.hash(state),
81
+ ParquetValue::Date32(d) => d.hash(state),
82
+ ParquetValue::Date64(d) => d.hash(state),
83
+ ParquetValue::TimestampSecond(ts, tz) => {
84
+ ts.hash(state);
85
+ tz.hash(state);
86
+ }
87
+ ParquetValue::TimestampMillis(ts, tz) => {
88
+ ts.hash(state);
89
+ tz.hash(state);
90
+ }
91
+ ParquetValue::TimestampMicros(ts, tz) => {
92
+ ts.hash(state);
93
+ tz.hash(state);
94
+ }
95
+ ParquetValue::TimestampNanos(ts, tz) => {
96
+ ts.hash(state);
97
+ tz.hash(state);
98
+ }
99
+ ParquetValue::List(l) => l.hash(state),
100
+ ParquetValue::Map(_m) => panic!("Map is not hashable"),
101
+ ParquetValue::Null => 0_i32.hash(state),
102
+ }
103
+ }
104
+ }
105
+
106
+ impl IntoValue for ParquetValue {
107
+ fn into_value_with(self, handle: &Ruby) -> Value {
108
+ match self {
109
+ ParquetValue::Int8(i) => i.into_value_with(handle),
110
+ ParquetValue::Int16(i) => i.into_value_with(handle),
111
+ ParquetValue::Int32(i) => i.into_value_with(handle),
112
+ ParquetValue::Int64(i) => i.into_value_with(handle),
113
+ ParquetValue::UInt8(i) => i.into_value_with(handle),
114
+ ParquetValue::UInt16(i) => i.into_value_with(handle),
115
+ ParquetValue::UInt32(i) => i.into_value_with(handle),
116
+ ParquetValue::UInt64(i) => i.into_value_with(handle),
117
+ ParquetValue::Float16(f) => f.into_value_with(handle),
118
+ ParquetValue::Float32(f) => f.into_value_with(handle),
119
+ ParquetValue::Float64(f) => f.into_value_with(handle),
120
+ ParquetValue::Boolean(b) => b.into_value_with(handle),
121
+ ParquetValue::String(s) => s.into_value_with(handle),
122
+ ParquetValue::Bytes(b) => handle.str_from_slice(&b).as_value(),
123
+ ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
124
+ ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
125
+ timestamp @ ParquetValue::TimestampSecond(_, _) => {
126
+ impl_timestamp_conversion!(timestamp, TimestampSecond, handle)
127
+ }
128
+ timestamp @ ParquetValue::TimestampMillis(_, _) => {
129
+ impl_timestamp_conversion!(timestamp, TimestampMillis, handle)
130
+ }
131
+ timestamp @ ParquetValue::TimestampMicros(_, _) => {
132
+ impl_timestamp_conversion!(timestamp, TimestampMicros, handle)
133
+ }
134
+ timestamp @ ParquetValue::TimestampNanos(_, _) => {
135
+ impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
136
+ }
137
+ ParquetValue::List(l) => {
138
+ let ary = handle.ary_new_capa(l.len());
139
+ l.into_iter()
140
+ .try_for_each(|v| ary.push(v.into_value_with(handle)))
141
+ .unwrap();
142
+ ary.into_value_with(handle)
143
+ }
144
+ ParquetValue::Map(m) => {
145
+ let hash = handle.hash_new_capa(m.len());
146
+ m.into_iter()
147
+ .try_for_each(|(k, v)| {
148
+ hash.aset(k.into_value_with(handle), v.into_value_with(handle))
149
+ })
150
+ .unwrap();
151
+ hash.into_value_with(handle)
152
+ }
153
+ ParquetValue::Null => handle.qnil().as_value(),
154
+ }
155
+ }
156
+ }
157
+
158
+ impl ParquetValue {
159
+ pub fn from_value(value: Value, type_: &ParquetSchemaType) -> Result<Self, MagnusError> {
160
+ if value.is_nil() {
161
+ return Ok(ParquetValue::Null);
162
+ }
163
+
164
+ match type_ {
165
+ ParquetSchemaType::Int8 => {
166
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
167
+ Ok(ParquetValue::Int8(v))
168
+ }
169
+ ParquetSchemaType::Int16 => {
170
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
171
+ Ok(ParquetValue::Int16(v))
172
+ }
173
+ ParquetSchemaType::Int32 => {
174
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
175
+ Ok(ParquetValue::Int32(v))
176
+ }
177
+ ParquetSchemaType::Int64 => {
178
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
179
+ Ok(ParquetValue::Int64(v))
180
+ }
181
+ ParquetSchemaType::UInt8 => {
182
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
183
+ Ok(ParquetValue::UInt8(v))
184
+ }
185
+ ParquetSchemaType::UInt16 => {
186
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
187
+ Ok(ParquetValue::UInt16(v))
188
+ }
189
+ ParquetSchemaType::UInt32 => {
190
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
191
+ Ok(ParquetValue::UInt32(v))
192
+ }
193
+ ParquetSchemaType::UInt64 => {
194
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
195
+ Ok(ParquetValue::UInt64(v))
196
+ }
197
+ ParquetSchemaType::Float => {
198
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
199
+ Ok(ParquetValue::Float32(v))
200
+ }
201
+ ParquetSchemaType::Double => {
202
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
203
+ Ok(ParquetValue::Float64(v))
204
+ }
205
+ ParquetSchemaType::String => {
206
+ let v = String::try_convert(value)?;
207
+ Ok(ParquetValue::String(v))
208
+ }
209
+ ParquetSchemaType::Binary => {
210
+ let v = convert_to_binary(value)?;
211
+ Ok(ParquetValue::Bytes(v))
212
+ }
213
+ ParquetSchemaType::Boolean => {
214
+ let v = convert_to_boolean(value)?;
215
+ Ok(ParquetValue::Boolean(v))
216
+ }
217
+ ParquetSchemaType::Date32 => {
218
+ let v = convert_to_date32(value)?;
219
+ Ok(ParquetValue::Date32(v))
220
+ }
221
+ ParquetSchemaType::TimestampMillis => {
222
+ let v = convert_to_timestamp_millis(value)?;
223
+ Ok(ParquetValue::TimestampMillis(v, None))
224
+ }
225
+ ParquetSchemaType::TimestampMicros => {
226
+ let v = convert_to_timestamp_micros(value)?;
227
+ Ok(ParquetValue::TimestampMicros(v, None))
228
+ }
229
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(
230
+ magnus::exception::type_error(),
231
+ "Nested lists and maps are not supported",
232
+ )),
233
+ }
234
+ }
235
+ }
236
+
237
+ #[derive(Debug)]
238
+ pub struct ParquetValueVec(Vec<ParquetValue>);
239
+
240
+ impl ParquetValueVec {
241
+ pub fn into_inner(self) -> Vec<ParquetValue> {
242
+ self.0
243
+ }
244
+ }
245
+
246
+ impl IntoIterator for ParquetValueVec {
247
+ type Item = ParquetValue;
248
+ type IntoIter = std::vec::IntoIter<ParquetValue>;
249
+
250
+ fn into_iter(self) -> Self::IntoIter {
251
+ self.0.into_iter()
252
+ }
253
+ }
254
+
255
+ impl std::cmp::PartialEq for ParquetValueVec {
256
+ fn eq(&self, other: &Self) -> bool {
257
+ self.0 == other.0
258
+ }
259
+ }
260
+
261
+ impl std::cmp::Eq for ParquetValueVec {}
262
+
263
+ impl TryFrom<Arc<dyn Array>> for ParquetValueVec {
264
+ type Error = String;
265
+
266
+ fn try_from(column: Arc<dyn Array>) -> Result<Self, Self::Error> {
267
+ ParquetValueVec::try_from(&*column)
268
+ }
269
+ }
270
+
271
+ macro_rules! impl_numeric_array_conversion {
272
+ ($column:expr, $array_type:ty, $variant:ident) => {{
273
+ let array = downcast_array::<$array_type>($column);
274
+ if array.is_nullable() {
275
+ array
276
+ .values()
277
+ .iter()
278
+ .enumerate()
279
+ .map(|(i, x)| {
280
+ if array.is_null(i) {
281
+ ParquetValue::Null
282
+ } else {
283
+ ParquetValue::$variant(*x)
284
+ }
285
+ })
286
+ .collect()
287
+ } else {
288
+ array
289
+ .values()
290
+ .iter()
291
+ .map(|x| ParquetValue::$variant(*x))
292
+ .collect()
293
+ }
294
+ }};
295
+ }
296
+ macro_rules! impl_boolean_array_conversion {
297
+ ($column:expr, $array_type:ty, $variant:ident) => {{
298
+ let array = downcast_array::<$array_type>($column);
299
+ if array.is_nullable() {
300
+ array
301
+ .values()
302
+ .iter()
303
+ .enumerate()
304
+ .map(|(i, x)| {
305
+ if array.is_null(i) {
306
+ ParquetValue::Null
307
+ } else {
308
+ ParquetValue::$variant(x)
309
+ }
310
+ })
311
+ .collect()
312
+ } else {
313
+ array
314
+ .values()
315
+ .iter()
316
+ .map(|x| ParquetValue::$variant(x))
317
+ .collect()
318
+ }
319
+ }};
320
+ }
321
+
322
+ impl TryFrom<&dyn Array> for ParquetValueVec {
323
+ type Error = String;
324
+
325
+ fn try_from(column: &dyn Array) -> Result<Self, Self::Error> {
326
+ let tmp_vec = match column.data_type() {
327
+ DataType::Boolean => impl_boolean_array_conversion!(column, BooleanArray, Boolean),
328
+ DataType::Int8 => impl_numeric_array_conversion!(column, Int8Array, Int8),
329
+ DataType::Int16 => impl_numeric_array_conversion!(column, Int16Array, Int16),
330
+ DataType::Int32 => impl_numeric_array_conversion!(column, Int32Array, Int32),
331
+ DataType::Int64 => impl_numeric_array_conversion!(column, Int64Array, Int64),
332
+ DataType::UInt8 => impl_numeric_array_conversion!(column, UInt8Array, UInt8),
333
+ DataType::UInt16 => impl_numeric_array_conversion!(column, UInt16Array, UInt16),
334
+ DataType::UInt32 => impl_numeric_array_conversion!(column, UInt32Array, UInt32),
335
+ DataType::UInt64 => impl_numeric_array_conversion!(column, UInt64Array, UInt64),
336
+ DataType::Float32 => impl_numeric_array_conversion!(column, Float32Array, Float32),
337
+ DataType::Float64 => impl_numeric_array_conversion!(column, Float64Array, Float64),
338
+ DataType::Date32 => impl_numeric_array_conversion!(column, Date32Array, Date32),
339
+ DataType::Date64 => impl_numeric_array_conversion!(column, Date64Array, Date64),
340
+ DataType::Timestamp(TimeUnit::Second, tz) => {
341
+ impl_timestamp_array_conversion!(column, TimestampSecondArray, TimestampSecond, tz)
342
+ }
343
+ DataType::Timestamp(TimeUnit::Millisecond, tz) => {
344
+ impl_timestamp_array_conversion!(
345
+ column,
346
+ TimestampMillisecondArray,
347
+ TimestampMillis,
348
+ tz
349
+ )
350
+ }
351
+ DataType::Timestamp(TimeUnit::Microsecond, tz) => {
352
+ impl_timestamp_array_conversion!(
353
+ column,
354
+ TimestampMicrosecondArray,
355
+ TimestampMicros,
356
+ tz
357
+ )
358
+ }
359
+ DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
360
+ impl_timestamp_array_conversion!(
361
+ column,
362
+ TimestampNanosecondArray,
363
+ TimestampNanos,
364
+ tz
365
+ )
366
+ }
367
+ DataType::Float16 => {
368
+ let array = downcast_array::<Float16Array>(column);
369
+ if array.is_nullable() {
370
+ array
371
+ .values()
372
+ .iter()
373
+ .enumerate()
374
+ .map(|(i, x)| {
375
+ if array.is_null(i) {
376
+ ParquetValue::Null
377
+ } else {
378
+ ParquetValue::Float16(f32::from(*x))
379
+ }
380
+ })
381
+ .collect()
382
+ } else {
383
+ array
384
+ .values()
385
+ .iter()
386
+ .map(|x| ParquetValue::Float16(f32::from(*x)))
387
+ .collect()
388
+ }
389
+ }
390
+ DataType::Utf8 => {
391
+ let array = downcast_array::<StringArray>(column);
392
+ array
393
+ .iter()
394
+ .map(|opt_x| match opt_x {
395
+ Some(x) => ParquetValue::String(x.to_string()),
396
+ None => ParquetValue::Null,
397
+ })
398
+ .collect()
399
+ }
400
+ DataType::Binary => {
401
+ let array = downcast_array::<BinaryArray>(column);
402
+ array
403
+ .iter()
404
+ .map(|opt_x| match opt_x {
405
+ Some(x) => ParquetValue::Bytes(x.to_vec()),
406
+ None => ParquetValue::Null,
407
+ })
408
+ .collect()
409
+ }
410
+ DataType::List(_field) => {
411
+ let list_array = downcast_array::<ListArray>(column);
412
+ list_array
413
+ .iter()
414
+ .map(|x| match x {
415
+ Some(values) => match ParquetValueVec::try_from(values) {
416
+ Ok(vec) => ParquetValue::List(vec.into_inner()),
417
+ Err(e) => {
418
+ panic!("Error converting list array to ParquetValueVec: {}", e)
419
+ }
420
+ },
421
+ None => ParquetValue::Null,
422
+ })
423
+ .collect()
424
+ }
425
+ DataType::Struct(_) => {
426
+ let struct_array = downcast_array::<StructArray>(column);
427
+ let mut values = Vec::with_capacity(struct_array.len());
428
+ for i in 0..struct_array.len() {
429
+ if struct_array.is_null(i) {
430
+ values.push(ParquetValue::Null);
431
+ continue;
432
+ }
433
+
434
+ let mut map = std::collections::HashMap::new();
435
+ for (field_idx, field) in struct_array.fields().iter().enumerate() {
436
+ let column = struct_array.column(field_idx);
437
+ let field_values = match ParquetValueVec::try_from(column.slice(i, 1)) {
438
+ Ok(vec) => vec.into_inner(),
439
+ Err(e) => {
440
+ panic!("Error converting struct field to ParquetValueVec: {}", e)
441
+ }
442
+ };
443
+ map.insert(
444
+ ParquetValue::String(field.name().to_string()),
445
+ field_values.into_iter().next().unwrap(),
446
+ );
447
+ }
448
+ values.push(ParquetValue::Map(map));
449
+ }
450
+ values
451
+ }
452
+ DataType::Null => {
453
+ let x = downcast_array::<NullArray>(column);
454
+ vec![ParquetValue::Null; x.len()]
455
+ }
456
+ _ => {
457
+ return Err(format!("Unsupported data type: {:?}", column.data_type()));
458
+ }
459
+ };
460
+ Ok(ParquetValueVec(tmp_vec))
461
+ }
462
+ }
@@ -0,0 +1,204 @@
1
+ use itertools::Itertools;
2
+
3
+ use super::*;
4
+
5
+ #[derive(Debug)]
6
+ pub enum RowRecord<S: BuildHasher + Default> {
7
+ Vec(Vec<ParquetField>),
8
+ Map(HashMap<StringCacheKey, ParquetField, S>),
9
+ }
10
+
11
+ #[derive(Debug)]
12
+ pub enum ColumnRecord<S: BuildHasher + Default> {
13
+ Vec(Vec<Vec<ParquetValue>>),
14
+ Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
15
+ }
16
+
17
+ #[derive(Debug)]
18
+ pub struct ParquetField(pub Field);
19
+
20
+ impl<S: BuildHasher + Default> IntoValue for RowRecord<S> {
21
+ fn into_value_with(self, handle: &Ruby) -> Value {
22
+ match self {
23
+ RowRecord::Vec(vec) => {
24
+ let ary = handle.ary_new_capa(vec.len());
25
+ vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
26
+ handle.into_value(ary)
27
+ }
28
+ RowRecord::Map(map) => {
29
+ let hash = handle.hash_new_capa(map.len());
30
+
31
+ let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
32
+ let mut i = 0;
33
+
34
+ for chunk in &map.into_iter().chunks(64) {
35
+ // Reduced to 64 to ensure space for pairs
36
+ for (k, v) in chunk {
37
+ if i + 1 >= values.len() {
38
+ // Bulk insert current batch if array is full
39
+ hash.bulk_insert(&values[..i]).unwrap();
40
+ values[..i].fill(handle.qnil().as_value());
41
+ i = 0;
42
+ }
43
+ values[i] = handle.into_value(k);
44
+ values[i + 1] = handle.into_value(v);
45
+ i += 2;
46
+ }
47
+ // Insert any remaining pairs
48
+ if i > 0 {
49
+ hash.bulk_insert(&values[..i]).unwrap();
50
+ values[..i].fill(handle.qnil().as_value());
51
+ i = 0;
52
+ }
53
+ }
54
+
55
+ hash.into_value_with(handle)
56
+ }
57
+ }
58
+ }
59
+ }
60
+
61
+ impl<S: BuildHasher + Default> IntoValue for ColumnRecord<S> {
62
+ fn into_value_with(self, handle: &Ruby) -> Value {
63
+ match self {
64
+ ColumnRecord::Vec(vec) => {
65
+ let ary = handle.ary_new_capa(vec.len());
66
+ vec.into_iter()
67
+ .try_for_each(|v| {
68
+ let nested_ary = handle.ary_new_capa(v.len());
69
+ v.into_iter().try_for_each(|v| nested_ary.push(v)).unwrap();
70
+ ary.push(nested_ary.into_value_with(handle))
71
+ })
72
+ .unwrap();
73
+ ary.into_value_with(handle)
74
+ }
75
+ ColumnRecord::Map(map) => {
76
+ let hash = handle.hash_new_capa(map.len());
77
+
78
+ let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
79
+ let mut i = 0;
80
+
81
+ for chunk in &map.into_iter().chunks(64) {
82
+ // Reduced to 64 to ensure space for pairs
83
+ for (k, v) in chunk {
84
+ if i + 1 >= values.len() {
85
+ // Bulk insert current batch if array is full
86
+ hash.bulk_insert(&values[..i]).unwrap();
87
+ values[..i].fill(handle.qnil().as_value());
88
+ i = 0;
89
+ }
90
+ values[i] = handle.into_value(k);
91
+ let ary = handle.ary_new_capa(v.len());
92
+ v.into_iter().try_for_each(|v| ary.push(v)).unwrap();
93
+ values[i + 1] = handle.into_value(ary);
94
+ i += 2;
95
+ }
96
+ // Insert any remaining pairs
97
+ if i > 0 {
98
+ hash.bulk_insert(&values[..i]).unwrap();
99
+ values[..i].fill(handle.qnil().as_value());
100
+ i = 0;
101
+ }
102
+ }
103
+
104
+ hash.into_value_with(handle)
105
+ }
106
+ }
107
+ }
108
+ }
109
+
110
+ impl IntoValue for ParquetField {
111
+ fn into_value_with(self, handle: &Ruby) -> Value {
112
+ match self.0 {
113
+ Field::Null => handle.qnil().as_value(),
114
+ Field::Bool(b) => b.into_value_with(handle),
115
+ Field::Short(s) => s.into_value_with(handle),
116
+ Field::Int(i) => i.into_value_with(handle),
117
+ Field::Long(l) => l.into_value_with(handle),
118
+ Field::UByte(ub) => ub.into_value_with(handle),
119
+ Field::UShort(us) => us.into_value_with(handle),
120
+ Field::UInt(ui) => ui.into_value_with(handle),
121
+ Field::ULong(ul) => ul.into_value_with(handle),
122
+ Field::Float16(f) => f32::from(f).into_value_with(handle),
123
+ Field::Float(f) => f.into_value_with(handle),
124
+ Field::Double(d) => d.into_value_with(handle),
125
+ Field::Str(s) => s.into_value_with(handle),
126
+ Field::Byte(b) => b.into_value_with(handle),
127
+ Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
128
+ Field::Date(d) => {
129
+ let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
130
+ let formatted = ts.strftime("%Y-%m-%d").to_string();
131
+ formatted.into_value_with(handle)
132
+ }
133
+ Field::TimestampMillis(ts) => {
134
+ let ts = jiff::Timestamp::from_millisecond(ts).unwrap();
135
+ let time_class = handle.class_time();
136
+ time_class
137
+ .funcall::<_, _, Value>("parse", (ts.to_string(),))
138
+ .unwrap()
139
+ .into_value_with(handle)
140
+ }
141
+ Field::TimestampMicros(ts) => {
142
+ let ts = jiff::Timestamp::from_microsecond(ts).unwrap();
143
+ let time_class = handle.class_time();
144
+ time_class
145
+ .funcall::<_, _, Value>("parse", (ts.to_string(),))
146
+ .unwrap()
147
+ .into_value_with(handle)
148
+ }
149
+ Field::ListInternal(list) => {
150
+ let elements = list.elements();
151
+ let ary = handle.ary_new_capa(elements.len());
152
+ elements
153
+ .iter()
154
+ .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
155
+ .unwrap();
156
+ ary.into_value_with(handle)
157
+ }
158
+ Field::MapInternal(map) => {
159
+ let entries = map.entries();
160
+ let hash = handle.hash_new_capa(entries.len());
161
+ entries
162
+ .iter()
163
+ .try_for_each(|(k, v)| {
164
+ hash.aset(
165
+ ParquetField(k.clone()).into_value_with(handle),
166
+ ParquetField(v.clone()).into_value_with(handle),
167
+ )
168
+ })
169
+ .unwrap();
170
+ hash.into_value_with(handle)
171
+ }
172
+ Field::Decimal(d) => {
173
+ let value = match d {
174
+ Decimal::Int32 { value, scale, .. } => {
175
+ let unscaled = i32::from_be_bytes(value);
176
+ format!("{}e-{}", unscaled, scale)
177
+ }
178
+ Decimal::Int64 { value, scale, .. } => {
179
+ let unscaled = i64::from_be_bytes(value);
180
+ format!("{}e-{}", unscaled, scale)
181
+ }
182
+ Decimal::Bytes { value, scale, .. } => {
183
+ // Convert bytes to string representation of unscaled value
184
+ let unscaled = String::from_utf8_lossy(value.data());
185
+ format!("{}e-{}", unscaled, scale)
186
+ }
187
+ };
188
+ handle.eval(&format!("BigDecimal(\"{value}\")")).unwrap()
189
+ }
190
+ Field::Group(row) => {
191
+ let hash = handle.hash_new();
192
+ row.get_column_iter()
193
+ .try_for_each(|(k, v)| {
194
+ hash.aset(
195
+ k.clone().into_value_with(handle),
196
+ ParquetField(v.clone()).into_value_with(handle),
197
+ )
198
+ })
199
+ .unwrap();
200
+ hash.into_value_with(handle)
201
+ }
202
+ }
203
+ }
204
+ }