parquet 0.0.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,232 +1,8 @@
1
- use std::{borrow::Cow, collections::HashMap, hash::BuildHasher, sync::Arc};
1
+ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
2
 
3
- use arrow_array::cast::downcast_array;
4
- use arrow_array::{
5
- Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Float16Array, Float32Array,
6
- Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, NullArray, StringArray,
7
- StructArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
8
- TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
9
- };
10
- use arrow_schema::{DataType, TimeUnit};
11
- use itertools::Itertools;
12
- use magnus::{value::ReprValue, IntoValue, Ruby, Value};
13
- use parquet::data_type::Decimal;
14
- use parquet::record::Field;
15
-
16
- use crate::header_cache::StringCacheKey;
17
-
18
- #[derive(Debug)]
19
- pub enum RowRecord<S: BuildHasher + Default> {
20
- Vec(Vec<ParquetField>),
21
- Map(HashMap<StringCacheKey, ParquetField, S>),
22
- }
23
-
24
- #[derive(Debug)]
25
- pub enum ColumnRecord<S: BuildHasher + Default> {
26
- Vec(Vec<Vec<ParquetValue>>),
27
- Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
28
- }
29
-
30
- impl<S: BuildHasher + Default> IntoValue for RowRecord<S> {
31
- fn into_value_with(self, handle: &Ruby) -> Value {
32
- match self {
33
- RowRecord::Vec(vec) => {
34
- let ary = handle.ary_new_capa(vec.len());
35
- vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
36
- handle.into_value(ary)
37
- }
38
- RowRecord::Map(map) => {
39
- let hash = handle.hash_new_capa(map.len());
40
-
41
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
42
- let mut i = 0;
43
-
44
- for chunk in &map.into_iter().chunks(64) {
45
- // Reduced to 64 to ensure space for pairs
46
- for (k, v) in chunk {
47
- if i + 1 >= values.len() {
48
- // Bulk insert current batch if array is full
49
- hash.bulk_insert(&values[..i]).unwrap();
50
- values[..i].fill(handle.qnil().as_value());
51
- i = 0;
52
- }
53
- values[i] = handle.into_value(k);
54
- values[i + 1] = handle.into_value(v);
55
- i += 2;
56
- }
57
- // Insert any remaining pairs
58
- if i > 0 {
59
- hash.bulk_insert(&values[..i]).unwrap();
60
- values[..i].fill(handle.qnil().as_value());
61
- i = 0;
62
- }
63
- }
64
-
65
- hash.into_value_with(handle)
66
- }
67
- }
68
- }
69
- }
70
-
71
- impl<S: BuildHasher + Default> IntoValue for ColumnRecord<S> {
72
- fn into_value_with(self, handle: &Ruby) -> Value {
73
- match self {
74
- ColumnRecord::Vec(vec) => {
75
- let ary = handle.ary_new_capa(vec.len());
76
- vec.into_iter()
77
- .try_for_each(|v| {
78
- let nested_ary = handle.ary_new_capa(v.len());
79
- v.into_iter().try_for_each(|v| nested_ary.push(v)).unwrap();
80
- ary.push(nested_ary.into_value_with(handle))
81
- })
82
- .unwrap();
83
- ary.into_value_with(handle)
84
- }
85
- ColumnRecord::Map(map) => {
86
- let hash = handle.hash_new_capa(map.len());
87
-
88
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
89
- let mut i = 0;
90
-
91
- for chunk in &map.into_iter().chunks(64) {
92
- // Reduced to 64 to ensure space for pairs
93
- for (k, v) in chunk {
94
- if i + 1 >= values.len() {
95
- // Bulk insert current batch if array is full
96
- hash.bulk_insert(&values[..i]).unwrap();
97
- values[..i].fill(handle.qnil().as_value());
98
- i = 0;
99
- }
100
- values[i] = handle.into_value(k);
101
- let ary = handle.ary_new_capa(v.len());
102
- v.into_iter().try_for_each(|v| ary.push(v)).unwrap();
103
- values[i + 1] = handle.into_value(ary);
104
- i += 2;
105
- }
106
- // Insert any remaining pairs
107
- if i > 0 {
108
- hash.bulk_insert(&values[..i]).unwrap();
109
- values[..i].fill(handle.qnil().as_value());
110
- i = 0;
111
- }
112
- }
113
-
114
- hash.into_value_with(handle)
115
- }
116
- }
117
- }
118
- }
3
+ use super::*;
119
4
 
120
5
  #[derive(Debug, Clone)]
121
- pub struct CowValue<'a>(pub Cow<'a, str>);
122
-
123
- impl<'a> IntoValue for CowValue<'a> {
124
- fn into_value_with(self, handle: &Ruby) -> Value {
125
- self.0.into_value_with(handle)
126
- }
127
- }
128
-
129
- #[derive(Debug)]
130
- pub struct ParquetField(pub Field);
131
-
132
- impl IntoValue for ParquetField {
133
- fn into_value_with(self, handle: &Ruby) -> Value {
134
- match self.0 {
135
- Field::Null => handle.qnil().as_value(),
136
- Field::Bool(b) => b.into_value_with(handle),
137
- Field::Short(s) => s.into_value_with(handle),
138
- Field::Int(i) => i.into_value_with(handle),
139
- Field::Long(l) => l.into_value_with(handle),
140
- Field::UByte(ub) => ub.into_value_with(handle),
141
- Field::UShort(us) => us.into_value_with(handle),
142
- Field::UInt(ui) => ui.into_value_with(handle),
143
- Field::ULong(ul) => ul.into_value_with(handle),
144
- Field::Float16(f) => f32::from(f).into_value_with(handle),
145
- Field::Float(f) => f.into_value_with(handle),
146
- Field::Double(d) => d.into_value_with(handle),
147
- Field::Str(s) => s.into_value_with(handle),
148
- Field::Byte(b) => b.into_value_with(handle),
149
- Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
150
- Field::Date(d) => {
151
- let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
152
- let formatted = ts.strftime("%Y-%m-%d").to_string();
153
- formatted.into_value_with(handle)
154
- }
155
- Field::TimestampMillis(ts) => {
156
- let ts = jiff::Timestamp::from_millisecond(ts).unwrap();
157
- let time_class = handle.class_time();
158
- time_class
159
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
160
- .unwrap()
161
- .into_value_with(handle)
162
- }
163
- Field::TimestampMicros(ts) => {
164
- let ts = jiff::Timestamp::from_microsecond(ts).unwrap();
165
- let time_class = handle.class_time();
166
- time_class
167
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
168
- .unwrap()
169
- .into_value_with(handle)
170
- }
171
- Field::ListInternal(list) => {
172
- let elements = list.elements();
173
- let ary = handle.ary_new_capa(elements.len());
174
- elements
175
- .iter()
176
- .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
177
- .unwrap();
178
- ary.into_value_with(handle)
179
- }
180
- Field::MapInternal(map) => {
181
- let entries = map.entries();
182
- let hash = handle.hash_new_capa(entries.len());
183
- entries
184
- .iter()
185
- .try_for_each(|(k, v)| {
186
- hash.aset(
187
- ParquetField(k.clone()).into_value_with(handle),
188
- ParquetField(v.clone()).into_value_with(handle),
189
- )
190
- })
191
- .unwrap();
192
- hash.into_value_with(handle)
193
- }
194
- Field::Decimal(d) => {
195
- let value = match d {
196
- Decimal::Int32 { value, scale, .. } => {
197
- let unscaled = i32::from_be_bytes(value);
198
- format!("{}e-{}", unscaled, scale)
199
- }
200
- Decimal::Int64 { value, scale, .. } => {
201
- let unscaled = i64::from_be_bytes(value);
202
- format!("{}e-{}", unscaled, scale)
203
- }
204
- Decimal::Bytes { value, scale, .. } => {
205
- // Convert bytes to string representation of unscaled value
206
- let unscaled = String::from_utf8_lossy(value.data());
207
- format!("{}e-{}", unscaled, scale)
208
- }
209
- };
210
- handle.eval(&format!("BigDecimal(\"{value}\")")).unwrap()
211
- }
212
- Field::Group(row) => {
213
- let hash = handle.hash_new();
214
- row.get_column_iter()
215
- .try_for_each(|(k, v)| {
216
- hash.aset(
217
- k.clone().into_value_with(handle),
218
- ParquetField(v.clone()).into_value_with(handle),
219
- )
220
- })
221
- .unwrap();
222
- hash.into_value_with(handle)
223
- }
224
- }
225
- }
226
- }
227
-
228
- #[allow(dead_code)]
229
- #[derive(Clone, Debug)]
230
6
  pub enum ParquetValue {
231
7
  Int8(i8),
232
8
  Int16(i16),
@@ -285,6 +61,175 @@ impl PartialEq for ParquetValue {
285
61
 
286
62
  impl Eq for ParquetValue {}
287
63
 
64
+ impl std::hash::Hash for ParquetValue {
65
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
66
+ match self {
67
+ ParquetValue::Int8(i) => i.hash(state),
68
+ ParquetValue::Int16(i) => i.hash(state),
69
+ ParquetValue::Int32(i) => i.hash(state),
70
+ ParquetValue::Int64(i) => i.hash(state),
71
+ ParquetValue::UInt8(i) => i.hash(state),
72
+ ParquetValue::UInt16(i) => i.hash(state),
73
+ ParquetValue::UInt32(i) => i.hash(state),
74
+ ParquetValue::UInt64(i) => i.hash(state),
75
+ ParquetValue::Float16(f) => f.to_bits().hash(state),
76
+ ParquetValue::Float32(f) => f.to_bits().hash(state),
77
+ ParquetValue::Float64(f) => f.to_bits().hash(state),
78
+ ParquetValue::Boolean(b) => b.hash(state),
79
+ ParquetValue::String(s) => s.hash(state),
80
+ ParquetValue::Bytes(b) => b.hash(state),
81
+ ParquetValue::Date32(d) => d.hash(state),
82
+ ParquetValue::Date64(d) => d.hash(state),
83
+ ParquetValue::TimestampSecond(ts, tz) => {
84
+ ts.hash(state);
85
+ tz.hash(state);
86
+ }
87
+ ParquetValue::TimestampMillis(ts, tz) => {
88
+ ts.hash(state);
89
+ tz.hash(state);
90
+ }
91
+ ParquetValue::TimestampMicros(ts, tz) => {
92
+ ts.hash(state);
93
+ tz.hash(state);
94
+ }
95
+ ParquetValue::TimestampNanos(ts, tz) => {
96
+ ts.hash(state);
97
+ tz.hash(state);
98
+ }
99
+ ParquetValue::List(l) => l.hash(state),
100
+ ParquetValue::Map(_m) => panic!("Map is not hashable"),
101
+ ParquetValue::Null => 0_i32.hash(state),
102
+ }
103
+ }
104
+ }
105
+
106
+ impl IntoValue for ParquetValue {
107
+ fn into_value_with(self, handle: &Ruby) -> Value {
108
+ match self {
109
+ ParquetValue::Int8(i) => i.into_value_with(handle),
110
+ ParquetValue::Int16(i) => i.into_value_with(handle),
111
+ ParquetValue::Int32(i) => i.into_value_with(handle),
112
+ ParquetValue::Int64(i) => i.into_value_with(handle),
113
+ ParquetValue::UInt8(i) => i.into_value_with(handle),
114
+ ParquetValue::UInt16(i) => i.into_value_with(handle),
115
+ ParquetValue::UInt32(i) => i.into_value_with(handle),
116
+ ParquetValue::UInt64(i) => i.into_value_with(handle),
117
+ ParquetValue::Float16(f) => f.into_value_with(handle),
118
+ ParquetValue::Float32(f) => f.into_value_with(handle),
119
+ ParquetValue::Float64(f) => f.into_value_with(handle),
120
+ ParquetValue::Boolean(b) => b.into_value_with(handle),
121
+ ParquetValue::String(s) => s.into_value_with(handle),
122
+ ParquetValue::Bytes(b) => handle.str_from_slice(&b).as_value(),
123
+ ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
124
+ ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
125
+ timestamp @ ParquetValue::TimestampSecond(_, _) => {
126
+ impl_timestamp_conversion!(timestamp, TimestampSecond, handle)
127
+ }
128
+ timestamp @ ParquetValue::TimestampMillis(_, _) => {
129
+ impl_timestamp_conversion!(timestamp, TimestampMillis, handle)
130
+ }
131
+ timestamp @ ParquetValue::TimestampMicros(_, _) => {
132
+ impl_timestamp_conversion!(timestamp, TimestampMicros, handle)
133
+ }
134
+ timestamp @ ParquetValue::TimestampNanos(_, _) => {
135
+ impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
136
+ }
137
+ ParquetValue::List(l) => {
138
+ let ary = handle.ary_new_capa(l.len());
139
+ l.into_iter()
140
+ .try_for_each(|v| ary.push(v.into_value_with(handle)))
141
+ .unwrap();
142
+ ary.into_value_with(handle)
143
+ }
144
+ ParquetValue::Map(m) => {
145
+ let hash = handle.hash_new_capa(m.len());
146
+ m.into_iter()
147
+ .try_for_each(|(k, v)| {
148
+ hash.aset(k.into_value_with(handle), v.into_value_with(handle))
149
+ })
150
+ .unwrap();
151
+ hash.into_value_with(handle)
152
+ }
153
+ ParquetValue::Null => handle.qnil().as_value(),
154
+ }
155
+ }
156
+ }
157
+
158
+ impl ParquetValue {
159
+ pub fn from_value(value: Value, type_: &ParquetSchemaType) -> Result<Self, MagnusError> {
160
+ match type_ {
161
+ ParquetSchemaType::Int8 => {
162
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
163
+ Ok(ParquetValue::Int8(v))
164
+ }
165
+ ParquetSchemaType::Int16 => {
166
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
167
+ Ok(ParquetValue::Int16(v))
168
+ }
169
+ ParquetSchemaType::Int32 => {
170
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
171
+ Ok(ParquetValue::Int32(v))
172
+ }
173
+ ParquetSchemaType::Int64 => {
174
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
175
+ Ok(ParquetValue::Int64(v))
176
+ }
177
+ ParquetSchemaType::UInt8 => {
178
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
179
+ Ok(ParquetValue::UInt8(v))
180
+ }
181
+ ParquetSchemaType::UInt16 => {
182
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
183
+ Ok(ParquetValue::UInt16(v))
184
+ }
185
+ ParquetSchemaType::UInt32 => {
186
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
187
+ Ok(ParquetValue::UInt32(v))
188
+ }
189
+ ParquetSchemaType::UInt64 => {
190
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
191
+ Ok(ParquetValue::UInt64(v))
192
+ }
193
+ ParquetSchemaType::Float => {
194
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
195
+ Ok(ParquetValue::Float32(v))
196
+ }
197
+ ParquetSchemaType::Double => {
198
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
199
+ Ok(ParquetValue::Float64(v))
200
+ }
201
+ ParquetSchemaType::String => {
202
+ let v = String::try_convert(value)?;
203
+ Ok(ParquetValue::String(v))
204
+ }
205
+ ParquetSchemaType::Binary => {
206
+ let v = convert_to_binary(value)?;
207
+ Ok(ParquetValue::Bytes(v))
208
+ }
209
+ ParquetSchemaType::Boolean => {
210
+ let v = convert_to_boolean(value)?;
211
+ Ok(ParquetValue::Boolean(v))
212
+ }
213
+ ParquetSchemaType::Date32 => {
214
+ let v = convert_to_date32(value)?;
215
+ Ok(ParquetValue::Date32(v))
216
+ }
217
+ ParquetSchemaType::TimestampMillis => {
218
+ let v = convert_to_timestamp_millis(value)?;
219
+ Ok(ParquetValue::TimestampMillis(v, None))
220
+ }
221
+ ParquetSchemaType::TimestampMicros => {
222
+ let v = convert_to_timestamp_micros(value)?;
223
+ Ok(ParquetValue::TimestampMicros(v, None))
224
+ }
225
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(
226
+ magnus::exception::type_error(),
227
+ "Nested lists and maps are not supported",
228
+ )),
229
+ }
230
+ }
231
+ }
232
+
288
233
  #[derive(Debug)]
289
234
  pub struct ParquetValueVec(Vec<ParquetValue>);
290
235
 
@@ -319,7 +264,6 @@ impl TryFrom<Arc<dyn Array>> for ParquetValueVec {
319
264
  }
320
265
  }
321
266
 
322
- // Add macro for handling numeric array conversions
323
267
  macro_rules! impl_numeric_array_conversion {
324
268
  ($column:expr, $array_type:ty, $variant:ident) => {{
325
269
  let array = downcast_array::<$array_type>($column);
@@ -345,8 +289,6 @@ macro_rules! impl_numeric_array_conversion {
345
289
  }
346
290
  }};
347
291
  }
348
-
349
- // Add macro for handling boolean array conversions
350
292
  macro_rules! impl_boolean_array_conversion {
351
293
  ($column:expr, $array_type:ty, $variant:ident) => {{
352
294
  let array = downcast_array::<$array_type>($column);
@@ -373,33 +315,6 @@ macro_rules! impl_boolean_array_conversion {
373
315
  }};
374
316
  }
375
317
 
376
- // Add macro for handling timestamp array conversions
377
- macro_rules! impl_timestamp_array_conversion {
378
- ($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
379
- let array = downcast_array::<$array_type>($column);
380
- if array.is_nullable() {
381
- array
382
- .values()
383
- .iter()
384
- .enumerate()
385
- .map(|(i, x)| {
386
- if array.is_null(i) {
387
- ParquetValue::Null
388
- } else {
389
- ParquetValue::$variant(*x, $tz.clone())
390
- }
391
- })
392
- .collect()
393
- } else {
394
- array
395
- .values()
396
- .iter()
397
- .map(|x| ParquetValue::$variant(*x, $tz.clone()))
398
- .collect()
399
- }
400
- }};
401
- }
402
-
403
318
  impl TryFrom<&dyn Array> for ParquetValueVec {
404
319
  type Error = String;
405
320
 
@@ -445,7 +360,6 @@ impl TryFrom<&dyn Array> for ParquetValueVec {
445
360
  tz
446
361
  )
447
362
  }
448
- // Because f16 is unstable in Rust, we convert it to f32
449
363
  DataType::Float16 => {
450
364
  let array = downcast_array::<Float16Array>(column);
451
365
  if array.is_nullable() {
@@ -542,181 +456,3 @@ impl TryFrom<&dyn Array> for ParquetValueVec {
542
456
  Ok(ParquetValueVec(tmp_vec))
543
457
  }
544
458
  }
545
-
546
- impl std::hash::Hash for ParquetValue {
547
- fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
548
- match self {
549
- ParquetValue::Int8(i) => i.hash(state),
550
- ParquetValue::Int16(i) => i.hash(state),
551
- ParquetValue::Int32(i) => i.hash(state),
552
- ParquetValue::Int64(i) => i.hash(state),
553
- ParquetValue::UInt8(i) => i.hash(state),
554
- ParquetValue::UInt16(i) => i.hash(state),
555
- ParquetValue::UInt32(i) => i.hash(state),
556
- ParquetValue::UInt64(i) => i.hash(state),
557
- ParquetValue::Float16(f) => f.to_bits().hash(state),
558
- ParquetValue::Float32(f) => f.to_bits().hash(state),
559
- ParquetValue::Float64(f) => f.to_bits().hash(state),
560
- ParquetValue::Boolean(b) => b.hash(state),
561
- ParquetValue::String(s) => s.hash(state),
562
- ParquetValue::Bytes(b) => b.hash(state),
563
- ParquetValue::Date32(d) => d.hash(state),
564
- ParquetValue::Date64(d) => d.hash(state),
565
- ParquetValue::TimestampSecond(ts, tz) => {
566
- ts.hash(state);
567
- tz.hash(state);
568
- }
569
- ParquetValue::TimestampMillis(ts, tz) => {
570
- ts.hash(state);
571
- tz.hash(state);
572
- }
573
- ParquetValue::TimestampMicros(ts, tz) => {
574
- ts.hash(state);
575
- tz.hash(state);
576
- }
577
- ParquetValue::TimestampNanos(ts, tz) => {
578
- ts.hash(state);
579
- tz.hash(state);
580
- }
581
- ParquetValue::List(l) => l.hash(state),
582
- ParquetValue::Map(_m) => panic!("Map is not hashable"),
583
- ParquetValue::Null => 0_i32.hash(state),
584
- }
585
- }
586
- }
587
-
588
- impl IntoValue for ParquetValue {
589
- fn into_value_with(self, handle: &Ruby) -> Value {
590
- match self {
591
- ParquetValue::Int8(i) => i.into_value_with(handle),
592
- ParquetValue::Int16(i) => i.into_value_with(handle),
593
- ParquetValue::Int32(i) => i.into_value_with(handle),
594
- ParquetValue::Int64(i) => i.into_value_with(handle),
595
- ParquetValue::UInt8(i) => i.into_value_with(handle),
596
- ParquetValue::UInt16(i) => i.into_value_with(handle),
597
- ParquetValue::UInt32(i) => i.into_value_with(handle),
598
- ParquetValue::UInt64(i) => i.into_value_with(handle),
599
- ParquetValue::Float16(f) => f.into_value_with(handle),
600
- ParquetValue::Float32(f) => f.into_value_with(handle),
601
- ParquetValue::Float64(f) => f.into_value_with(handle),
602
- ParquetValue::Boolean(b) => b.into_value_with(handle),
603
- ParquetValue::String(s) => s.into_value_with(handle),
604
- ParquetValue::Bytes(b) => b.into_value_with(handle),
605
- ParquetValue::Date32(d) => {
606
- let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
607
- let formatted = ts.strftime("%Y-%m-%d").to_string();
608
- formatted.into_value_with(handle)
609
- }
610
- ParquetValue::Date64(d) => {
611
- let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
612
- let formatted = ts.strftime("%Y-%m-%d").to_string();
613
- formatted.into_value_with(handle)
614
- }
615
- ParquetValue::TimestampSecond(ts, tz) => {
616
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampSecond(ts, tz));
617
- let time_class = handle.class_time();
618
- time_class
619
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
620
- .unwrap()
621
- .into_value_with(handle)
622
- }
623
- ParquetValue::TimestampMillis(ts, tz) => {
624
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampMillis(ts, tz));
625
- let time_class = handle.class_time();
626
- time_class
627
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
628
- .unwrap()
629
- .into_value_with(handle)
630
- }
631
- ParquetValue::TimestampMicros(ts, tz) => {
632
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampMicros(ts, tz));
633
- let time_class = handle.class_time();
634
- time_class
635
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
636
- .unwrap()
637
- .into_value_with(handle)
638
- }
639
- ParquetValue::TimestampNanos(ts, tz) => {
640
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampNanos(ts, tz));
641
- let time_class = handle.class_time();
642
- time_class
643
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
644
- .unwrap()
645
- .into_value_with(handle)
646
- }
647
- ParquetValue::List(l) => {
648
- let ary = handle.ary_new_capa(l.len());
649
- l.into_iter()
650
- .try_for_each(|v| ary.push(v.into_value_with(handle)))
651
- .unwrap();
652
- ary.into_value_with(handle)
653
- }
654
- ParquetValue::Map(m) => {
655
- let hash = handle.hash_new_capa(m.len());
656
- m.into_iter()
657
- .try_for_each(|(k, v)| {
658
- hash.aset(k.into_value_with(handle), v.into_value_with(handle))
659
- })
660
- .unwrap();
661
- hash.into_value_with(handle)
662
- }
663
- ParquetValue::Null => handle.qnil().as_value(),
664
- }
665
- }
666
- }
667
-
668
- fn parse_zoned_timestamp(value: &ParquetValue) -> jiff::Timestamp {
669
- let (ts, tz) = match value {
670
- ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts).unwrap(), tz),
671
- ParquetValue::TimestampMillis(ts, tz) => {
672
- (jiff::Timestamp::from_millisecond(*ts).unwrap(), tz)
673
- }
674
- ParquetValue::TimestampMicros(ts, tz) => {
675
- (jiff::Timestamp::from_microsecond(*ts).unwrap(), tz)
676
- }
677
- ParquetValue::TimestampNanos(ts, tz) => {
678
- (jiff::Timestamp::from_nanosecond(*ts as i128).unwrap(), tz)
679
- }
680
- _ => panic!("Invalid timestamp value"),
681
- };
682
-
683
- // If timezone is provided, convert to zoned timestamp
684
- if let Some(tz) = tz {
685
- // Handle fixed offset timezones like "+09:00" first
686
- if tz.starts_with('+') || tz.starts_with('-') {
687
- // Parse the offset string into hours and minutes
688
- let (hours, minutes) = if tz.len() >= 5 && tz.contains(':') {
689
- // Format: "+09:00" or "-09:00"
690
- let h = tz[1..3].parse::<i32>().unwrap_or(0);
691
- let m = tz[4..6].parse::<i32>().unwrap_or(0);
692
- (h, m)
693
- } else if tz.len() >= 3 {
694
- // Format: "+09" or "-09"
695
- let h = tz[1..3].parse::<i32>().unwrap_or(0);
696
- (h, 0)
697
- } else {
698
- (0, 0)
699
- };
700
-
701
- // Apply sign
702
- let total_minutes = if tz.starts_with('-') {
703
- -(hours * 60 + minutes)
704
- } else {
705
- hours * 60 + minutes
706
- };
707
-
708
- // Create fixed timezone
709
- let tz = jiff::tz::TimeZone::fixed(jiff::tz::offset((total_minutes / 60) as i8));
710
- ts.to_zoned(tz).timestamp()
711
- } else {
712
- // Try IANA timezone
713
- match ts.intz(&tz) {
714
- Ok(zoned) => zoned.timestamp(),
715
- Err(_) => ts, // Fall back to UTC if timezone is invalid
716
- }
717
- }
718
- } else {
719
- // No timezone provided - treat as UTC
720
- ts
721
- }
722
- }