parquet 0.0.5 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,458 @@
1
+ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
+
3
+ use super::*;
4
+
5
+ #[derive(Debug, Clone)]
6
+ pub enum ParquetValue {
7
+ Int8(i8),
8
+ Int16(i16),
9
+ Int32(i32),
10
+ Int64(i64),
11
+ UInt8(u8),
12
+ UInt16(u16),
13
+ UInt32(u32),
14
+ UInt64(u64),
15
+ Float16(f32), // f16 converted to f32
16
+ Float32(f32),
17
+ Float64(f64),
18
+ Boolean(bool),
19
+ String(String),
20
+ Bytes(Vec<u8>),
21
+ Date32(i32),
22
+ Date64(i64),
23
+ TimestampSecond(i64, Option<Arc<str>>),
24
+ TimestampMillis(i64, Option<Arc<str>>),
25
+ TimestampMicros(i64, Option<Arc<str>>),
26
+ TimestampNanos(i64, Option<Arc<str>>),
27
+ List(Vec<ParquetValue>),
28
+ Map(HashMap<ParquetValue, ParquetValue>),
29
+ Null,
30
+ }
31
+
32
+ impl PartialEq for ParquetValue {
33
+ fn eq(&self, other: &Self) -> bool {
34
+ match (self, other) {
35
+ (ParquetValue::Int8(a), ParquetValue::Int8(b)) => a == b,
36
+ (ParquetValue::Int16(a), ParquetValue::Int16(b)) => a == b,
37
+ (ParquetValue::Int32(a), ParquetValue::Int32(b)) => a == b,
38
+ (ParquetValue::Int64(a), ParquetValue::Int64(b)) => a == b,
39
+ (ParquetValue::UInt8(a), ParquetValue::UInt8(b)) => a == b,
40
+ (ParquetValue::UInt16(a), ParquetValue::UInt16(b)) => a == b,
41
+ (ParquetValue::UInt32(a), ParquetValue::UInt32(b)) => a == b,
42
+ (ParquetValue::UInt64(a), ParquetValue::UInt64(b)) => a == b,
43
+ (ParquetValue::Float16(a), ParquetValue::Float16(b)) => a == b,
44
+ (ParquetValue::Float32(a), ParquetValue::Float32(b)) => a == b,
45
+ (ParquetValue::Float64(a), ParquetValue::Float64(b)) => a == b,
46
+ (ParquetValue::Boolean(a), ParquetValue::Boolean(b)) => a == b,
47
+ (ParquetValue::String(a), ParquetValue::String(b)) => a == b,
48
+ (ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
49
+ (ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
50
+ (ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
51
+ (ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
52
+ (ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
53
+ (ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
54
+ (ParquetValue::TimestampNanos(a, _), ParquetValue::TimestampNanos(b, _)) => a == b,
55
+ (ParquetValue::List(a), ParquetValue::List(b)) => a == b,
56
+ (ParquetValue::Null, ParquetValue::Null) => true,
57
+ _ => false,
58
+ }
59
+ }
60
+ }
61
+
62
+ impl Eq for ParquetValue {}
63
+
64
+ impl std::hash::Hash for ParquetValue {
65
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
66
+ match self {
67
+ ParquetValue::Int8(i) => i.hash(state),
68
+ ParquetValue::Int16(i) => i.hash(state),
69
+ ParquetValue::Int32(i) => i.hash(state),
70
+ ParquetValue::Int64(i) => i.hash(state),
71
+ ParquetValue::UInt8(i) => i.hash(state),
72
+ ParquetValue::UInt16(i) => i.hash(state),
73
+ ParquetValue::UInt32(i) => i.hash(state),
74
+ ParquetValue::UInt64(i) => i.hash(state),
75
+ ParquetValue::Float16(f) => f.to_bits().hash(state),
76
+ ParquetValue::Float32(f) => f.to_bits().hash(state),
77
+ ParquetValue::Float64(f) => f.to_bits().hash(state),
78
+ ParquetValue::Boolean(b) => b.hash(state),
79
+ ParquetValue::String(s) => s.hash(state),
80
+ ParquetValue::Bytes(b) => b.hash(state),
81
+ ParquetValue::Date32(d) => d.hash(state),
82
+ ParquetValue::Date64(d) => d.hash(state),
83
+ ParquetValue::TimestampSecond(ts, tz) => {
84
+ ts.hash(state);
85
+ tz.hash(state);
86
+ }
87
+ ParquetValue::TimestampMillis(ts, tz) => {
88
+ ts.hash(state);
89
+ tz.hash(state);
90
+ }
91
+ ParquetValue::TimestampMicros(ts, tz) => {
92
+ ts.hash(state);
93
+ tz.hash(state);
94
+ }
95
+ ParquetValue::TimestampNanos(ts, tz) => {
96
+ ts.hash(state);
97
+ tz.hash(state);
98
+ }
99
+ ParquetValue::List(l) => l.hash(state),
100
+ ParquetValue::Map(_m) => panic!("Map is not hashable"),
101
+ ParquetValue::Null => 0_i32.hash(state),
102
+ }
103
+ }
104
+ }
105
+
106
+ impl IntoValue for ParquetValue {
107
+ fn into_value_with(self, handle: &Ruby) -> Value {
108
+ match self {
109
+ ParquetValue::Int8(i) => i.into_value_with(handle),
110
+ ParquetValue::Int16(i) => i.into_value_with(handle),
111
+ ParquetValue::Int32(i) => i.into_value_with(handle),
112
+ ParquetValue::Int64(i) => i.into_value_with(handle),
113
+ ParquetValue::UInt8(i) => i.into_value_with(handle),
114
+ ParquetValue::UInt16(i) => i.into_value_with(handle),
115
+ ParquetValue::UInt32(i) => i.into_value_with(handle),
116
+ ParquetValue::UInt64(i) => i.into_value_with(handle),
117
+ ParquetValue::Float16(f) => f.into_value_with(handle),
118
+ ParquetValue::Float32(f) => f.into_value_with(handle),
119
+ ParquetValue::Float64(f) => f.into_value_with(handle),
120
+ ParquetValue::Boolean(b) => b.into_value_with(handle),
121
+ ParquetValue::String(s) => s.into_value_with(handle),
122
+ ParquetValue::Bytes(b) => handle.str_from_slice(&b).as_value(),
123
+ ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
124
+ ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
125
+ timestamp @ ParquetValue::TimestampSecond(_, _) => {
126
+ impl_timestamp_conversion!(timestamp, TimestampSecond, handle)
127
+ }
128
+ timestamp @ ParquetValue::TimestampMillis(_, _) => {
129
+ impl_timestamp_conversion!(timestamp, TimestampMillis, handle)
130
+ }
131
+ timestamp @ ParquetValue::TimestampMicros(_, _) => {
132
+ impl_timestamp_conversion!(timestamp, TimestampMicros, handle)
133
+ }
134
+ timestamp @ ParquetValue::TimestampNanos(_, _) => {
135
+ impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
136
+ }
137
+ ParquetValue::List(l) => {
138
+ let ary = handle.ary_new_capa(l.len());
139
+ l.into_iter()
140
+ .try_for_each(|v| ary.push(v.into_value_with(handle)))
141
+ .unwrap();
142
+ ary.into_value_with(handle)
143
+ }
144
+ ParquetValue::Map(m) => {
145
+ let hash = handle.hash_new_capa(m.len());
146
+ m.into_iter()
147
+ .try_for_each(|(k, v)| {
148
+ hash.aset(k.into_value_with(handle), v.into_value_with(handle))
149
+ })
150
+ .unwrap();
151
+ hash.into_value_with(handle)
152
+ }
153
+ ParquetValue::Null => handle.qnil().as_value(),
154
+ }
155
+ }
156
+ }
157
+
158
+ impl ParquetValue {
159
+ pub fn from_value(value: Value, type_: &ParquetSchemaType) -> Result<Self, MagnusError> {
160
+ match type_ {
161
+ ParquetSchemaType::Int8 => {
162
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
163
+ Ok(ParquetValue::Int8(v))
164
+ }
165
+ ParquetSchemaType::Int16 => {
166
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
167
+ Ok(ParquetValue::Int16(v))
168
+ }
169
+ ParquetSchemaType::Int32 => {
170
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
171
+ Ok(ParquetValue::Int32(v))
172
+ }
173
+ ParquetSchemaType::Int64 => {
174
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
175
+ Ok(ParquetValue::Int64(v))
176
+ }
177
+ ParquetSchemaType::UInt8 => {
178
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
179
+ Ok(ParquetValue::UInt8(v))
180
+ }
181
+ ParquetSchemaType::UInt16 => {
182
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
183
+ Ok(ParquetValue::UInt16(v))
184
+ }
185
+ ParquetSchemaType::UInt32 => {
186
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
187
+ Ok(ParquetValue::UInt32(v))
188
+ }
189
+ ParquetSchemaType::UInt64 => {
190
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
191
+ Ok(ParquetValue::UInt64(v))
192
+ }
193
+ ParquetSchemaType::Float => {
194
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
195
+ Ok(ParquetValue::Float32(v))
196
+ }
197
+ ParquetSchemaType::Double => {
198
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
199
+ Ok(ParquetValue::Float64(v))
200
+ }
201
+ ParquetSchemaType::String => {
202
+ let v = String::try_convert(value)?;
203
+ Ok(ParquetValue::String(v))
204
+ }
205
+ ParquetSchemaType::Binary => {
206
+ let v = convert_to_binary(value)?;
207
+ Ok(ParquetValue::Bytes(v))
208
+ }
209
+ ParquetSchemaType::Boolean => {
210
+ let v = convert_to_boolean(value)?;
211
+ Ok(ParquetValue::Boolean(v))
212
+ }
213
+ ParquetSchemaType::Date32 => {
214
+ let v = convert_to_date32(value)?;
215
+ Ok(ParquetValue::Date32(v))
216
+ }
217
+ ParquetSchemaType::TimestampMillis => {
218
+ let v = convert_to_timestamp_millis(value)?;
219
+ Ok(ParquetValue::TimestampMillis(v, None))
220
+ }
221
+ ParquetSchemaType::TimestampMicros => {
222
+ let v = convert_to_timestamp_micros(value)?;
223
+ Ok(ParquetValue::TimestampMicros(v, None))
224
+ }
225
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(
226
+ magnus::exception::type_error(),
227
+ "Nested lists and maps are not supported",
228
+ )),
229
+ }
230
+ }
231
+ }
232
+
233
+ #[derive(Debug)]
234
+ pub struct ParquetValueVec(Vec<ParquetValue>);
235
+
236
+ impl ParquetValueVec {
237
+ pub fn into_inner(self) -> Vec<ParquetValue> {
238
+ self.0
239
+ }
240
+ }
241
+
242
+ impl IntoIterator for ParquetValueVec {
243
+ type Item = ParquetValue;
244
+ type IntoIter = std::vec::IntoIter<ParquetValue>;
245
+
246
+ fn into_iter(self) -> Self::IntoIter {
247
+ self.0.into_iter()
248
+ }
249
+ }
250
+
251
+ impl std::cmp::PartialEq for ParquetValueVec {
252
+ fn eq(&self, other: &Self) -> bool {
253
+ self.0 == other.0
254
+ }
255
+ }
256
+
257
+ impl std::cmp::Eq for ParquetValueVec {}
258
+
259
+ impl TryFrom<Arc<dyn Array>> for ParquetValueVec {
260
+ type Error = String;
261
+
262
+ fn try_from(column: Arc<dyn Array>) -> Result<Self, Self::Error> {
263
+ ParquetValueVec::try_from(&*column)
264
+ }
265
+ }
266
+
267
+ macro_rules! impl_numeric_array_conversion {
268
+ ($column:expr, $array_type:ty, $variant:ident) => {{
269
+ let array = downcast_array::<$array_type>($column);
270
+ if array.is_nullable() {
271
+ array
272
+ .values()
273
+ .iter()
274
+ .enumerate()
275
+ .map(|(i, x)| {
276
+ if array.is_null(i) {
277
+ ParquetValue::Null
278
+ } else {
279
+ ParquetValue::$variant(*x)
280
+ }
281
+ })
282
+ .collect()
283
+ } else {
284
+ array
285
+ .values()
286
+ .iter()
287
+ .map(|x| ParquetValue::$variant(*x))
288
+ .collect()
289
+ }
290
+ }};
291
+ }
292
+ macro_rules! impl_boolean_array_conversion {
293
+ ($column:expr, $array_type:ty, $variant:ident) => {{
294
+ let array = downcast_array::<$array_type>($column);
295
+ if array.is_nullable() {
296
+ array
297
+ .values()
298
+ .iter()
299
+ .enumerate()
300
+ .map(|(i, x)| {
301
+ if array.is_null(i) {
302
+ ParquetValue::Null
303
+ } else {
304
+ ParquetValue::$variant(x)
305
+ }
306
+ })
307
+ .collect()
308
+ } else {
309
+ array
310
+ .values()
311
+ .iter()
312
+ .map(|x| ParquetValue::$variant(x))
313
+ .collect()
314
+ }
315
+ }};
316
+ }
317
+
318
+ impl TryFrom<&dyn Array> for ParquetValueVec {
319
+ type Error = String;
320
+
321
+ fn try_from(column: &dyn Array) -> Result<Self, Self::Error> {
322
+ let tmp_vec = match column.data_type() {
323
+ DataType::Boolean => impl_boolean_array_conversion!(column, BooleanArray, Boolean),
324
+ DataType::Int8 => impl_numeric_array_conversion!(column, Int8Array, Int8),
325
+ DataType::Int16 => impl_numeric_array_conversion!(column, Int16Array, Int16),
326
+ DataType::Int32 => impl_numeric_array_conversion!(column, Int32Array, Int32),
327
+ DataType::Int64 => impl_numeric_array_conversion!(column, Int64Array, Int64),
328
+ DataType::UInt8 => impl_numeric_array_conversion!(column, UInt8Array, UInt8),
329
+ DataType::UInt16 => impl_numeric_array_conversion!(column, UInt16Array, UInt16),
330
+ DataType::UInt32 => impl_numeric_array_conversion!(column, UInt32Array, UInt32),
331
+ DataType::UInt64 => impl_numeric_array_conversion!(column, UInt64Array, UInt64),
332
+ DataType::Float32 => impl_numeric_array_conversion!(column, Float32Array, Float32),
333
+ DataType::Float64 => impl_numeric_array_conversion!(column, Float64Array, Float64),
334
+ DataType::Date32 => impl_numeric_array_conversion!(column, Date32Array, Date32),
335
+ DataType::Date64 => impl_numeric_array_conversion!(column, Date64Array, Date64),
336
+ DataType::Timestamp(TimeUnit::Second, tz) => {
337
+ impl_timestamp_array_conversion!(column, TimestampSecondArray, TimestampSecond, tz)
338
+ }
339
+ DataType::Timestamp(TimeUnit::Millisecond, tz) => {
340
+ impl_timestamp_array_conversion!(
341
+ column,
342
+ TimestampMillisecondArray,
343
+ TimestampMillis,
344
+ tz
345
+ )
346
+ }
347
+ DataType::Timestamp(TimeUnit::Microsecond, tz) => {
348
+ impl_timestamp_array_conversion!(
349
+ column,
350
+ TimestampMicrosecondArray,
351
+ TimestampMicros,
352
+ tz
353
+ )
354
+ }
355
+ DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
356
+ impl_timestamp_array_conversion!(
357
+ column,
358
+ TimestampNanosecondArray,
359
+ TimestampNanos,
360
+ tz
361
+ )
362
+ }
363
+ DataType::Float16 => {
364
+ let array = downcast_array::<Float16Array>(column);
365
+ if array.is_nullable() {
366
+ array
367
+ .values()
368
+ .iter()
369
+ .enumerate()
370
+ .map(|(i, x)| {
371
+ if array.is_null(i) {
372
+ ParquetValue::Null
373
+ } else {
374
+ ParquetValue::Float16(f32::from(*x))
375
+ }
376
+ })
377
+ .collect()
378
+ } else {
379
+ array
380
+ .values()
381
+ .iter()
382
+ .map(|x| ParquetValue::Float16(f32::from(*x)))
383
+ .collect()
384
+ }
385
+ }
386
+ DataType::Utf8 => {
387
+ let array = downcast_array::<StringArray>(column);
388
+ array
389
+ .iter()
390
+ .map(|opt_x| match opt_x {
391
+ Some(x) => ParquetValue::String(x.to_string()),
392
+ None => ParquetValue::Null,
393
+ })
394
+ .collect()
395
+ }
396
+ DataType::Binary => {
397
+ let array = downcast_array::<BinaryArray>(column);
398
+ array
399
+ .iter()
400
+ .map(|opt_x| match opt_x {
401
+ Some(x) => ParquetValue::Bytes(x.to_vec()),
402
+ None => ParquetValue::Null,
403
+ })
404
+ .collect()
405
+ }
406
+ DataType::List(_field) => {
407
+ let list_array = downcast_array::<ListArray>(column);
408
+ list_array
409
+ .iter()
410
+ .map(|x| match x {
411
+ Some(values) => match ParquetValueVec::try_from(values) {
412
+ Ok(vec) => ParquetValue::List(vec.into_inner()),
413
+ Err(e) => {
414
+ panic!("Error converting list array to ParquetValueVec: {}", e)
415
+ }
416
+ },
417
+ None => ParquetValue::Null,
418
+ })
419
+ .collect()
420
+ }
421
+ DataType::Struct(_) => {
422
+ let struct_array = downcast_array::<StructArray>(column);
423
+ let mut values = Vec::with_capacity(struct_array.len());
424
+ for i in 0..struct_array.len() {
425
+ if struct_array.is_null(i) {
426
+ values.push(ParquetValue::Null);
427
+ continue;
428
+ }
429
+
430
+ let mut map = std::collections::HashMap::new();
431
+ for (field_idx, field) in struct_array.fields().iter().enumerate() {
432
+ let column = struct_array.column(field_idx);
433
+ let field_values = match ParquetValueVec::try_from(column.slice(i, 1)) {
434
+ Ok(vec) => vec.into_inner(),
435
+ Err(e) => {
436
+ panic!("Error converting struct field to ParquetValueVec: {}", e)
437
+ }
438
+ };
439
+ map.insert(
440
+ ParquetValue::String(field.name().to_string()),
441
+ field_values.into_iter().next().unwrap(),
442
+ );
443
+ }
444
+ values.push(ParquetValue::Map(map));
445
+ }
446
+ values
447
+ }
448
+ DataType::Null => {
449
+ let x = downcast_array::<NullArray>(column);
450
+ vec![ParquetValue::Null; x.len()]
451
+ }
452
+ _ => {
453
+ return Err(format!("Unsupported data type: {:?}", column.data_type()));
454
+ }
455
+ };
456
+ Ok(ParquetValueVec(tmp_vec))
457
+ }
458
+ }
@@ -0,0 +1,204 @@
1
+ use itertools::Itertools;
2
+
3
+ use super::*;
4
+
5
+ #[derive(Debug)]
6
+ pub enum RowRecord<S: BuildHasher + Default> {
7
+ Vec(Vec<ParquetField>),
8
+ Map(HashMap<StringCacheKey, ParquetField, S>),
9
+ }
10
+
11
+ #[derive(Debug)]
12
+ pub enum ColumnRecord<S: BuildHasher + Default> {
13
+ Vec(Vec<Vec<ParquetValue>>),
14
+ Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
15
+ }
16
+
17
+ #[derive(Debug)]
18
+ pub struct ParquetField(pub Field);
19
+
20
+ impl<S: BuildHasher + Default> IntoValue for RowRecord<S> {
21
+ fn into_value_with(self, handle: &Ruby) -> Value {
22
+ match self {
23
+ RowRecord::Vec(vec) => {
24
+ let ary = handle.ary_new_capa(vec.len());
25
+ vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
26
+ handle.into_value(ary)
27
+ }
28
+ RowRecord::Map(map) => {
29
+ let hash = handle.hash_new_capa(map.len());
30
+
31
+ let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
32
+ let mut i = 0;
33
+
34
+ for chunk in &map.into_iter().chunks(64) {
35
+ // Reduced to 64 to ensure space for pairs
36
+ for (k, v) in chunk {
37
+ if i + 1 >= values.len() {
38
+ // Bulk insert current batch if array is full
39
+ hash.bulk_insert(&values[..i]).unwrap();
40
+ values[..i].fill(handle.qnil().as_value());
41
+ i = 0;
42
+ }
43
+ values[i] = handle.into_value(k);
44
+ values[i + 1] = handle.into_value(v);
45
+ i += 2;
46
+ }
47
+ // Insert any remaining pairs
48
+ if i > 0 {
49
+ hash.bulk_insert(&values[..i]).unwrap();
50
+ values[..i].fill(handle.qnil().as_value());
51
+ i = 0;
52
+ }
53
+ }
54
+
55
+ hash.into_value_with(handle)
56
+ }
57
+ }
58
+ }
59
+ }
60
+
61
+ impl<S: BuildHasher + Default> IntoValue for ColumnRecord<S> {
62
+ fn into_value_with(self, handle: &Ruby) -> Value {
63
+ match self {
64
+ ColumnRecord::Vec(vec) => {
65
+ let ary = handle.ary_new_capa(vec.len());
66
+ vec.into_iter()
67
+ .try_for_each(|v| {
68
+ let nested_ary = handle.ary_new_capa(v.len());
69
+ v.into_iter().try_for_each(|v| nested_ary.push(v)).unwrap();
70
+ ary.push(nested_ary.into_value_with(handle))
71
+ })
72
+ .unwrap();
73
+ ary.into_value_with(handle)
74
+ }
75
+ ColumnRecord::Map(map) => {
76
+ let hash = handle.hash_new_capa(map.len());
77
+
78
+ let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
79
+ let mut i = 0;
80
+
81
+ for chunk in &map.into_iter().chunks(64) {
82
+ // Reduced to 64 to ensure space for pairs
83
+ for (k, v) in chunk {
84
+ if i + 1 >= values.len() {
85
+ // Bulk insert current batch if array is full
86
+ hash.bulk_insert(&values[..i]).unwrap();
87
+ values[..i].fill(handle.qnil().as_value());
88
+ i = 0;
89
+ }
90
+ values[i] = handle.into_value(k);
91
+ let ary = handle.ary_new_capa(v.len());
92
+ v.into_iter().try_for_each(|v| ary.push(v)).unwrap();
93
+ values[i + 1] = handle.into_value(ary);
94
+ i += 2;
95
+ }
96
+ // Insert any remaining pairs
97
+ if i > 0 {
98
+ hash.bulk_insert(&values[..i]).unwrap();
99
+ values[..i].fill(handle.qnil().as_value());
100
+ i = 0;
101
+ }
102
+ }
103
+
104
+ hash.into_value_with(handle)
105
+ }
106
+ }
107
+ }
108
+ }
109
+
110
+ impl IntoValue for ParquetField {
111
+ fn into_value_with(self, handle: &Ruby) -> Value {
112
+ match self.0 {
113
+ Field::Null => handle.qnil().as_value(),
114
+ Field::Bool(b) => b.into_value_with(handle),
115
+ Field::Short(s) => s.into_value_with(handle),
116
+ Field::Int(i) => i.into_value_with(handle),
117
+ Field::Long(l) => l.into_value_with(handle),
118
+ Field::UByte(ub) => ub.into_value_with(handle),
119
+ Field::UShort(us) => us.into_value_with(handle),
120
+ Field::UInt(ui) => ui.into_value_with(handle),
121
+ Field::ULong(ul) => ul.into_value_with(handle),
122
+ Field::Float16(f) => f32::from(f).into_value_with(handle),
123
+ Field::Float(f) => f.into_value_with(handle),
124
+ Field::Double(d) => d.into_value_with(handle),
125
+ Field::Str(s) => s.into_value_with(handle),
126
+ Field::Byte(b) => b.into_value_with(handle),
127
+ Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
128
+ Field::Date(d) => {
129
+ let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
130
+ let formatted = ts.strftime("%Y-%m-%d").to_string();
131
+ formatted.into_value_with(handle)
132
+ }
133
+ Field::TimestampMillis(ts) => {
134
+ let ts = jiff::Timestamp::from_millisecond(ts).unwrap();
135
+ let time_class = handle.class_time();
136
+ time_class
137
+ .funcall::<_, _, Value>("parse", (ts.to_string(),))
138
+ .unwrap()
139
+ .into_value_with(handle)
140
+ }
141
+ Field::TimestampMicros(ts) => {
142
+ let ts = jiff::Timestamp::from_microsecond(ts).unwrap();
143
+ let time_class = handle.class_time();
144
+ time_class
145
+ .funcall::<_, _, Value>("parse", (ts.to_string(),))
146
+ .unwrap()
147
+ .into_value_with(handle)
148
+ }
149
+ Field::ListInternal(list) => {
150
+ let elements = list.elements();
151
+ let ary = handle.ary_new_capa(elements.len());
152
+ elements
153
+ .iter()
154
+ .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
155
+ .unwrap();
156
+ ary.into_value_with(handle)
157
+ }
158
+ Field::MapInternal(map) => {
159
+ let entries = map.entries();
160
+ let hash = handle.hash_new_capa(entries.len());
161
+ entries
162
+ .iter()
163
+ .try_for_each(|(k, v)| {
164
+ hash.aset(
165
+ ParquetField(k.clone()).into_value_with(handle),
166
+ ParquetField(v.clone()).into_value_with(handle),
167
+ )
168
+ })
169
+ .unwrap();
170
+ hash.into_value_with(handle)
171
+ }
172
+ Field::Decimal(d) => {
173
+ let value = match d {
174
+ Decimal::Int32 { value, scale, .. } => {
175
+ let unscaled = i32::from_be_bytes(value);
176
+ format!("{}e-{}", unscaled, scale)
177
+ }
178
+ Decimal::Int64 { value, scale, .. } => {
179
+ let unscaled = i64::from_be_bytes(value);
180
+ format!("{}e-{}", unscaled, scale)
181
+ }
182
+ Decimal::Bytes { value, scale, .. } => {
183
+ // Convert bytes to string representation of unscaled value
184
+ let unscaled = String::from_utf8_lossy(value.data());
185
+ format!("{}e-{}", unscaled, scale)
186
+ }
187
+ };
188
+ handle.eval(&format!("BigDecimal(\"{value}\")")).unwrap()
189
+ }
190
+ Field::Group(row) => {
191
+ let hash = handle.hash_new();
192
+ row.get_column_iter()
193
+ .try_for_each(|(k, v)| {
194
+ hash.aset(
195
+ k.clone().into_value_with(handle),
196
+ ParquetField(v.clone()).into_value_with(handle),
197
+ )
198
+ })
199
+ .unwrap();
200
+ hash.into_value_with(handle)
201
+ }
202
+ }
203
+ }
204
+ }