parquet 0.0.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,232 +1,8 @@
1
- use std::{borrow::Cow, collections::HashMap, hash::BuildHasher, sync::Arc};
1
+ use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
2
2
 
3
- use arrow_array::cast::downcast_array;
4
- use arrow_array::{
5
- Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Float16Array, Float32Array,
6
- Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, NullArray, StringArray,
7
- StructArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
8
- TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
9
- };
10
- use arrow_schema::{DataType, TimeUnit};
11
- use itertools::Itertools;
12
- use magnus::{value::ReprValue, IntoValue, Ruby, Value};
13
- use parquet::data_type::Decimal;
14
- use parquet::record::Field;
15
-
16
- use crate::header_cache::StringCacheKey;
17
-
18
- #[derive(Debug)]
19
- pub enum RowRecord<S: BuildHasher + Default> {
20
- Vec(Vec<ParquetField>),
21
- Map(HashMap<StringCacheKey, ParquetField, S>),
22
- }
23
-
24
- #[derive(Debug)]
25
- pub enum ColumnRecord<S: BuildHasher + Default> {
26
- Vec(Vec<Vec<ParquetValue>>),
27
- Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
28
- }
29
-
30
- impl<S: BuildHasher + Default> IntoValue for RowRecord<S> {
31
- fn into_value_with(self, handle: &Ruby) -> Value {
32
- match self {
33
- RowRecord::Vec(vec) => {
34
- let ary = handle.ary_new_capa(vec.len());
35
- vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
36
- handle.into_value(ary)
37
- }
38
- RowRecord::Map(map) => {
39
- let hash = handle.hash_new_capa(map.len());
40
-
41
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
42
- let mut i = 0;
43
-
44
- for chunk in &map.into_iter().chunks(64) {
45
- // Reduced to 64 to ensure space for pairs
46
- for (k, v) in chunk {
47
- if i + 1 >= values.len() {
48
- // Bulk insert current batch if array is full
49
- hash.bulk_insert(&values[..i]).unwrap();
50
- values[..i].fill(handle.qnil().as_value());
51
- i = 0;
52
- }
53
- values[i] = handle.into_value(k);
54
- values[i + 1] = handle.into_value(v);
55
- i += 2;
56
- }
57
- // Insert any remaining pairs
58
- if i > 0 {
59
- hash.bulk_insert(&values[..i]).unwrap();
60
- values[..i].fill(handle.qnil().as_value());
61
- i = 0;
62
- }
63
- }
64
-
65
- hash.into_value_with(handle)
66
- }
67
- }
68
- }
69
- }
70
-
71
- impl<S: BuildHasher + Default> IntoValue for ColumnRecord<S> {
72
- fn into_value_with(self, handle: &Ruby) -> Value {
73
- match self {
74
- ColumnRecord::Vec(vec) => {
75
- let ary = handle.ary_new_capa(vec.len());
76
- vec.into_iter()
77
- .try_for_each(|v| {
78
- let nested_ary = handle.ary_new_capa(v.len());
79
- v.into_iter().try_for_each(|v| nested_ary.push(v)).unwrap();
80
- ary.push(nested_ary.into_value_with(handle))
81
- })
82
- .unwrap();
83
- ary.into_value_with(handle)
84
- }
85
- ColumnRecord::Map(map) => {
86
- let hash = handle.hash_new_capa(map.len());
87
-
88
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
89
- let mut i = 0;
90
-
91
- for chunk in &map.into_iter().chunks(64) {
92
- // Reduced to 64 to ensure space for pairs
93
- for (k, v) in chunk {
94
- if i + 1 >= values.len() {
95
- // Bulk insert current batch if array is full
96
- hash.bulk_insert(&values[..i]).unwrap();
97
- values[..i].fill(handle.qnil().as_value());
98
- i = 0;
99
- }
100
- values[i] = handle.into_value(k);
101
- let ary = handle.ary_new_capa(v.len());
102
- v.into_iter().try_for_each(|v| ary.push(v)).unwrap();
103
- values[i + 1] = handle.into_value(ary);
104
- i += 2;
105
- }
106
- // Insert any remaining pairs
107
- if i > 0 {
108
- hash.bulk_insert(&values[..i]).unwrap();
109
- values[..i].fill(handle.qnil().as_value());
110
- i = 0;
111
- }
112
- }
113
-
114
- hash.into_value_with(handle)
115
- }
116
- }
117
- }
118
- }
3
+ use super::*;
119
4
 
120
5
  #[derive(Debug, Clone)]
121
- pub struct CowValue<'a>(pub Cow<'a, str>);
122
-
123
- impl<'a> IntoValue for CowValue<'a> {
124
- fn into_value_with(self, handle: &Ruby) -> Value {
125
- self.0.into_value_with(handle)
126
- }
127
- }
128
-
129
- #[derive(Debug)]
130
- pub struct ParquetField(pub Field);
131
-
132
- impl IntoValue for ParquetField {
133
- fn into_value_with(self, handle: &Ruby) -> Value {
134
- match self.0 {
135
- Field::Null => handle.qnil().as_value(),
136
- Field::Bool(b) => b.into_value_with(handle),
137
- Field::Short(s) => s.into_value_with(handle),
138
- Field::Int(i) => i.into_value_with(handle),
139
- Field::Long(l) => l.into_value_with(handle),
140
- Field::UByte(ub) => ub.into_value_with(handle),
141
- Field::UShort(us) => us.into_value_with(handle),
142
- Field::UInt(ui) => ui.into_value_with(handle),
143
- Field::ULong(ul) => ul.into_value_with(handle),
144
- Field::Float16(f) => f32::from(f).into_value_with(handle),
145
- Field::Float(f) => f.into_value_with(handle),
146
- Field::Double(d) => d.into_value_with(handle),
147
- Field::Str(s) => s.into_value_with(handle),
148
- Field::Byte(b) => b.into_value_with(handle),
149
- Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
150
- Field::Date(d) => {
151
- let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
152
- let formatted = ts.strftime("%Y-%m-%d").to_string();
153
- formatted.into_value_with(handle)
154
- }
155
- Field::TimestampMillis(ts) => {
156
- let ts = jiff::Timestamp::from_millisecond(ts).unwrap();
157
- let time_class = handle.class_time();
158
- time_class
159
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
160
- .unwrap()
161
- .into_value_with(handle)
162
- }
163
- Field::TimestampMicros(ts) => {
164
- let ts = jiff::Timestamp::from_microsecond(ts).unwrap();
165
- let time_class = handle.class_time();
166
- time_class
167
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
168
- .unwrap()
169
- .into_value_with(handle)
170
- }
171
- Field::ListInternal(list) => {
172
- let elements = list.elements();
173
- let ary = handle.ary_new_capa(elements.len());
174
- elements
175
- .iter()
176
- .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
177
- .unwrap();
178
- ary.into_value_with(handle)
179
- }
180
- Field::MapInternal(map) => {
181
- let entries = map.entries();
182
- let hash = handle.hash_new_capa(entries.len());
183
- entries
184
- .iter()
185
- .try_for_each(|(k, v)| {
186
- hash.aset(
187
- ParquetField(k.clone()).into_value_with(handle),
188
- ParquetField(v.clone()).into_value_with(handle),
189
- )
190
- })
191
- .unwrap();
192
- hash.into_value_with(handle)
193
- }
194
- Field::Decimal(d) => {
195
- let value = match d {
196
- Decimal::Int32 { value, scale, .. } => {
197
- let unscaled = i32::from_be_bytes(value);
198
- format!("{}e-{}", unscaled, scale)
199
- }
200
- Decimal::Int64 { value, scale, .. } => {
201
- let unscaled = i64::from_be_bytes(value);
202
- format!("{}e-{}", unscaled, scale)
203
- }
204
- Decimal::Bytes { value, scale, .. } => {
205
- // Convert bytes to string representation of unscaled value
206
- let unscaled = String::from_utf8_lossy(value.data());
207
- format!("{}e-{}", unscaled, scale)
208
- }
209
- };
210
- handle.eval(&format!("BigDecimal(\"{value}\")")).unwrap()
211
- }
212
- Field::Group(row) => {
213
- let hash = handle.hash_new();
214
- row.get_column_iter()
215
- .try_for_each(|(k, v)| {
216
- hash.aset(
217
- k.clone().into_value_with(handle),
218
- ParquetField(v.clone()).into_value_with(handle),
219
- )
220
- })
221
- .unwrap();
222
- hash.into_value_with(handle)
223
- }
224
- }
225
- }
226
- }
227
-
228
- #[allow(dead_code)]
229
- #[derive(Clone, Debug)]
230
6
  pub enum ParquetValue {
231
7
  Int8(i8),
232
8
  Int16(i16),
@@ -285,6 +61,175 @@ impl PartialEq for ParquetValue {
285
61
 
286
62
  impl Eq for ParquetValue {}
287
63
 
64
+ impl std::hash::Hash for ParquetValue {
65
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
66
+ match self {
67
+ ParquetValue::Int8(i) => i.hash(state),
68
+ ParquetValue::Int16(i) => i.hash(state),
69
+ ParquetValue::Int32(i) => i.hash(state),
70
+ ParquetValue::Int64(i) => i.hash(state),
71
+ ParquetValue::UInt8(i) => i.hash(state),
72
+ ParquetValue::UInt16(i) => i.hash(state),
73
+ ParquetValue::UInt32(i) => i.hash(state),
74
+ ParquetValue::UInt64(i) => i.hash(state),
75
+ ParquetValue::Float16(f) => f.to_bits().hash(state),
76
+ ParquetValue::Float32(f) => f.to_bits().hash(state),
77
+ ParquetValue::Float64(f) => f.to_bits().hash(state),
78
+ ParquetValue::Boolean(b) => b.hash(state),
79
+ ParquetValue::String(s) => s.hash(state),
80
+ ParquetValue::Bytes(b) => b.hash(state),
81
+ ParquetValue::Date32(d) => d.hash(state),
82
+ ParquetValue::Date64(d) => d.hash(state),
83
+ ParquetValue::TimestampSecond(ts, tz) => {
84
+ ts.hash(state);
85
+ tz.hash(state);
86
+ }
87
+ ParquetValue::TimestampMillis(ts, tz) => {
88
+ ts.hash(state);
89
+ tz.hash(state);
90
+ }
91
+ ParquetValue::TimestampMicros(ts, tz) => {
92
+ ts.hash(state);
93
+ tz.hash(state);
94
+ }
95
+ ParquetValue::TimestampNanos(ts, tz) => {
96
+ ts.hash(state);
97
+ tz.hash(state);
98
+ }
99
+ ParquetValue::List(l) => l.hash(state),
100
+ ParquetValue::Map(_m) => panic!("Map is not hashable"),
101
+ ParquetValue::Null => 0_i32.hash(state),
102
+ }
103
+ }
104
+ }
105
+
106
+ impl IntoValue for ParquetValue {
107
+ fn into_value_with(self, handle: &Ruby) -> Value {
108
+ match self {
109
+ ParquetValue::Int8(i) => i.into_value_with(handle),
110
+ ParquetValue::Int16(i) => i.into_value_with(handle),
111
+ ParquetValue::Int32(i) => i.into_value_with(handle),
112
+ ParquetValue::Int64(i) => i.into_value_with(handle),
113
+ ParquetValue::UInt8(i) => i.into_value_with(handle),
114
+ ParquetValue::UInt16(i) => i.into_value_with(handle),
115
+ ParquetValue::UInt32(i) => i.into_value_with(handle),
116
+ ParquetValue::UInt64(i) => i.into_value_with(handle),
117
+ ParquetValue::Float16(f) => f.into_value_with(handle),
118
+ ParquetValue::Float32(f) => f.into_value_with(handle),
119
+ ParquetValue::Float64(f) => f.into_value_with(handle),
120
+ ParquetValue::Boolean(b) => b.into_value_with(handle),
121
+ ParquetValue::String(s) => s.into_value_with(handle),
122
+ ParquetValue::Bytes(b) => handle.str_from_slice(&b).as_value(),
123
+ ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
124
+ ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
125
+ timestamp @ ParquetValue::TimestampSecond(_, _) => {
126
+ impl_timestamp_conversion!(timestamp, TimestampSecond, handle)
127
+ }
128
+ timestamp @ ParquetValue::TimestampMillis(_, _) => {
129
+ impl_timestamp_conversion!(timestamp, TimestampMillis, handle)
130
+ }
131
+ timestamp @ ParquetValue::TimestampMicros(_, _) => {
132
+ impl_timestamp_conversion!(timestamp, TimestampMicros, handle)
133
+ }
134
+ timestamp @ ParquetValue::TimestampNanos(_, _) => {
135
+ impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
136
+ }
137
+ ParquetValue::List(l) => {
138
+ let ary = handle.ary_new_capa(l.len());
139
+ l.into_iter()
140
+ .try_for_each(|v| ary.push(v.into_value_with(handle)))
141
+ .unwrap();
142
+ ary.into_value_with(handle)
143
+ }
144
+ ParquetValue::Map(m) => {
145
+ let hash = handle.hash_new_capa(m.len());
146
+ m.into_iter()
147
+ .try_for_each(|(k, v)| {
148
+ hash.aset(k.into_value_with(handle), v.into_value_with(handle))
149
+ })
150
+ .unwrap();
151
+ hash.into_value_with(handle)
152
+ }
153
+ ParquetValue::Null => handle.qnil().as_value(),
154
+ }
155
+ }
156
+ }
157
+
158
+ impl ParquetValue {
159
+ pub fn from_value(value: Value, type_: &ParquetSchemaType) -> Result<Self, MagnusError> {
160
+ match type_ {
161
+ ParquetSchemaType::Int8 => {
162
+ let v = NumericConverter::<i8>::convert_with_string_fallback(value)?;
163
+ Ok(ParquetValue::Int8(v))
164
+ }
165
+ ParquetSchemaType::Int16 => {
166
+ let v = NumericConverter::<i16>::convert_with_string_fallback(value)?;
167
+ Ok(ParquetValue::Int16(v))
168
+ }
169
+ ParquetSchemaType::Int32 => {
170
+ let v = NumericConverter::<i32>::convert_with_string_fallback(value)?;
171
+ Ok(ParquetValue::Int32(v))
172
+ }
173
+ ParquetSchemaType::Int64 => {
174
+ let v = NumericConverter::<i64>::convert_with_string_fallback(value)?;
175
+ Ok(ParquetValue::Int64(v))
176
+ }
177
+ ParquetSchemaType::UInt8 => {
178
+ let v = NumericConverter::<u8>::convert_with_string_fallback(value)?;
179
+ Ok(ParquetValue::UInt8(v))
180
+ }
181
+ ParquetSchemaType::UInt16 => {
182
+ let v = NumericConverter::<u16>::convert_with_string_fallback(value)?;
183
+ Ok(ParquetValue::UInt16(v))
184
+ }
185
+ ParquetSchemaType::UInt32 => {
186
+ let v = NumericConverter::<u32>::convert_with_string_fallback(value)?;
187
+ Ok(ParquetValue::UInt32(v))
188
+ }
189
+ ParquetSchemaType::UInt64 => {
190
+ let v = NumericConverter::<u64>::convert_with_string_fallback(value)?;
191
+ Ok(ParquetValue::UInt64(v))
192
+ }
193
+ ParquetSchemaType::Float => {
194
+ let v = NumericConverter::<f32>::convert_with_string_fallback(value)?;
195
+ Ok(ParquetValue::Float32(v))
196
+ }
197
+ ParquetSchemaType::Double => {
198
+ let v = NumericConverter::<f64>::convert_with_string_fallback(value)?;
199
+ Ok(ParquetValue::Float64(v))
200
+ }
201
+ ParquetSchemaType::String => {
202
+ let v = String::try_convert(value)?;
203
+ Ok(ParquetValue::String(v))
204
+ }
205
+ ParquetSchemaType::Binary => {
206
+ let v = convert_to_binary(value)?;
207
+ Ok(ParquetValue::Bytes(v))
208
+ }
209
+ ParquetSchemaType::Boolean => {
210
+ let v = convert_to_boolean(value)?;
211
+ Ok(ParquetValue::Boolean(v))
212
+ }
213
+ ParquetSchemaType::Date32 => {
214
+ let v = convert_to_date32(value)?;
215
+ Ok(ParquetValue::Date32(v))
216
+ }
217
+ ParquetSchemaType::TimestampMillis => {
218
+ let v = convert_to_timestamp_millis(value)?;
219
+ Ok(ParquetValue::TimestampMillis(v, None))
220
+ }
221
+ ParquetSchemaType::TimestampMicros => {
222
+ let v = convert_to_timestamp_micros(value)?;
223
+ Ok(ParquetValue::TimestampMicros(v, None))
224
+ }
225
+ ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(
226
+ magnus::exception::type_error(),
227
+ "Nested lists and maps are not supported",
228
+ )),
229
+ }
230
+ }
231
+ }
232
+
288
233
  #[derive(Debug)]
289
234
  pub struct ParquetValueVec(Vec<ParquetValue>);
290
235
 
@@ -319,7 +264,6 @@ impl TryFrom<Arc<dyn Array>> for ParquetValueVec {
319
264
  }
320
265
  }
321
266
 
322
- // Add macro for handling numeric array conversions
323
267
  macro_rules! impl_numeric_array_conversion {
324
268
  ($column:expr, $array_type:ty, $variant:ident) => {{
325
269
  let array = downcast_array::<$array_type>($column);
@@ -345,8 +289,6 @@ macro_rules! impl_numeric_array_conversion {
345
289
  }
346
290
  }};
347
291
  }
348
-
349
- // Add macro for handling boolean array conversions
350
292
  macro_rules! impl_boolean_array_conversion {
351
293
  ($column:expr, $array_type:ty, $variant:ident) => {{
352
294
  let array = downcast_array::<$array_type>($column);
@@ -373,33 +315,6 @@ macro_rules! impl_boolean_array_conversion {
373
315
  }};
374
316
  }
375
317
 
376
- // Add macro for handling timestamp array conversions
377
- macro_rules! impl_timestamp_array_conversion {
378
- ($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
379
- let array = downcast_array::<$array_type>($column);
380
- if array.is_nullable() {
381
- array
382
- .values()
383
- .iter()
384
- .enumerate()
385
- .map(|(i, x)| {
386
- if array.is_null(i) {
387
- ParquetValue::Null
388
- } else {
389
- ParquetValue::$variant(*x, $tz.clone())
390
- }
391
- })
392
- .collect()
393
- } else {
394
- array
395
- .values()
396
- .iter()
397
- .map(|x| ParquetValue::$variant(*x, $tz.clone()))
398
- .collect()
399
- }
400
- }};
401
- }
402
-
403
318
  impl TryFrom<&dyn Array> for ParquetValueVec {
404
319
  type Error = String;
405
320
 
@@ -445,7 +360,6 @@ impl TryFrom<&dyn Array> for ParquetValueVec {
445
360
  tz
446
361
  )
447
362
  }
448
- // Because f16 is unstable in Rust, we convert it to f32
449
363
  DataType::Float16 => {
450
364
  let array = downcast_array::<Float16Array>(column);
451
365
  if array.is_nullable() {
@@ -542,181 +456,3 @@ impl TryFrom<&dyn Array> for ParquetValueVec {
542
456
  Ok(ParquetValueVec(tmp_vec))
543
457
  }
544
458
  }
545
-
546
- impl std::hash::Hash for ParquetValue {
547
- fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
548
- match self {
549
- ParquetValue::Int8(i) => i.hash(state),
550
- ParquetValue::Int16(i) => i.hash(state),
551
- ParquetValue::Int32(i) => i.hash(state),
552
- ParquetValue::Int64(i) => i.hash(state),
553
- ParquetValue::UInt8(i) => i.hash(state),
554
- ParquetValue::UInt16(i) => i.hash(state),
555
- ParquetValue::UInt32(i) => i.hash(state),
556
- ParquetValue::UInt64(i) => i.hash(state),
557
- ParquetValue::Float16(f) => f.to_bits().hash(state),
558
- ParquetValue::Float32(f) => f.to_bits().hash(state),
559
- ParquetValue::Float64(f) => f.to_bits().hash(state),
560
- ParquetValue::Boolean(b) => b.hash(state),
561
- ParquetValue::String(s) => s.hash(state),
562
- ParquetValue::Bytes(b) => b.hash(state),
563
- ParquetValue::Date32(d) => d.hash(state),
564
- ParquetValue::Date64(d) => d.hash(state),
565
- ParquetValue::TimestampSecond(ts, tz) => {
566
- ts.hash(state);
567
- tz.hash(state);
568
- }
569
- ParquetValue::TimestampMillis(ts, tz) => {
570
- ts.hash(state);
571
- tz.hash(state);
572
- }
573
- ParquetValue::TimestampMicros(ts, tz) => {
574
- ts.hash(state);
575
- tz.hash(state);
576
- }
577
- ParquetValue::TimestampNanos(ts, tz) => {
578
- ts.hash(state);
579
- tz.hash(state);
580
- }
581
- ParquetValue::List(l) => l.hash(state),
582
- ParquetValue::Map(_m) => panic!("Map is not hashable"),
583
- ParquetValue::Null => 0_i32.hash(state),
584
- }
585
- }
586
- }
587
-
588
- impl IntoValue for ParquetValue {
589
- fn into_value_with(self, handle: &Ruby) -> Value {
590
- match self {
591
- ParquetValue::Int8(i) => i.into_value_with(handle),
592
- ParquetValue::Int16(i) => i.into_value_with(handle),
593
- ParquetValue::Int32(i) => i.into_value_with(handle),
594
- ParquetValue::Int64(i) => i.into_value_with(handle),
595
- ParquetValue::UInt8(i) => i.into_value_with(handle),
596
- ParquetValue::UInt16(i) => i.into_value_with(handle),
597
- ParquetValue::UInt32(i) => i.into_value_with(handle),
598
- ParquetValue::UInt64(i) => i.into_value_with(handle),
599
- ParquetValue::Float16(f) => f.into_value_with(handle),
600
- ParquetValue::Float32(f) => f.into_value_with(handle),
601
- ParquetValue::Float64(f) => f.into_value_with(handle),
602
- ParquetValue::Boolean(b) => b.into_value_with(handle),
603
- ParquetValue::String(s) => s.into_value_with(handle),
604
- ParquetValue::Bytes(b) => b.into_value_with(handle),
605
- ParquetValue::Date32(d) => {
606
- let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
607
- let formatted = ts.strftime("%Y-%m-%d").to_string();
608
- formatted.into_value_with(handle)
609
- }
610
- ParquetValue::Date64(d) => {
611
- let ts = jiff::Timestamp::from_second((d as i64) * 86400).unwrap();
612
- let formatted = ts.strftime("%Y-%m-%d").to_string();
613
- formatted.into_value_with(handle)
614
- }
615
- ParquetValue::TimestampSecond(ts, tz) => {
616
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampSecond(ts, tz));
617
- let time_class = handle.class_time();
618
- time_class
619
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
620
- .unwrap()
621
- .into_value_with(handle)
622
- }
623
- ParquetValue::TimestampMillis(ts, tz) => {
624
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampMillis(ts, tz));
625
- let time_class = handle.class_time();
626
- time_class
627
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
628
- .unwrap()
629
- .into_value_with(handle)
630
- }
631
- ParquetValue::TimestampMicros(ts, tz) => {
632
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampMicros(ts, tz));
633
- let time_class = handle.class_time();
634
- time_class
635
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
636
- .unwrap()
637
- .into_value_with(handle)
638
- }
639
- ParquetValue::TimestampNanos(ts, tz) => {
640
- let ts = parse_zoned_timestamp(&ParquetValue::TimestampNanos(ts, tz));
641
- let time_class = handle.class_time();
642
- time_class
643
- .funcall::<_, _, Value>("parse", (ts.to_string(),))
644
- .unwrap()
645
- .into_value_with(handle)
646
- }
647
- ParquetValue::List(l) => {
648
- let ary = handle.ary_new_capa(l.len());
649
- l.into_iter()
650
- .try_for_each(|v| ary.push(v.into_value_with(handle)))
651
- .unwrap();
652
- ary.into_value_with(handle)
653
- }
654
- ParquetValue::Map(m) => {
655
- let hash = handle.hash_new_capa(m.len());
656
- m.into_iter()
657
- .try_for_each(|(k, v)| {
658
- hash.aset(k.into_value_with(handle), v.into_value_with(handle))
659
- })
660
- .unwrap();
661
- hash.into_value_with(handle)
662
- }
663
- ParquetValue::Null => handle.qnil().as_value(),
664
- }
665
- }
666
- }
667
-
668
- fn parse_zoned_timestamp(value: &ParquetValue) -> jiff::Timestamp {
669
- let (ts, tz) = match value {
670
- ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts).unwrap(), tz),
671
- ParquetValue::TimestampMillis(ts, tz) => {
672
- (jiff::Timestamp::from_millisecond(*ts).unwrap(), tz)
673
- }
674
- ParquetValue::TimestampMicros(ts, tz) => {
675
- (jiff::Timestamp::from_microsecond(*ts).unwrap(), tz)
676
- }
677
- ParquetValue::TimestampNanos(ts, tz) => {
678
- (jiff::Timestamp::from_nanosecond(*ts as i128).unwrap(), tz)
679
- }
680
- _ => panic!("Invalid timestamp value"),
681
- };
682
-
683
- // If timezone is provided, convert to zoned timestamp
684
- if let Some(tz) = tz {
685
- // Handle fixed offset timezones like "+09:00" first
686
- if tz.starts_with('+') || tz.starts_with('-') {
687
- // Parse the offset string into hours and minutes
688
- let (hours, minutes) = if tz.len() >= 5 && tz.contains(':') {
689
- // Format: "+09:00" or "-09:00"
690
- let h = tz[1..3].parse::<i32>().unwrap_or(0);
691
- let m = tz[4..6].parse::<i32>().unwrap_or(0);
692
- (h, m)
693
- } else if tz.len() >= 3 {
694
- // Format: "+09" or "-09"
695
- let h = tz[1..3].parse::<i32>().unwrap_or(0);
696
- (h, 0)
697
- } else {
698
- (0, 0)
699
- };
700
-
701
- // Apply sign
702
- let total_minutes = if tz.starts_with('-') {
703
- -(hours * 60 + minutes)
704
- } else {
705
- hours * 60 + minutes
706
- };
707
-
708
- // Create fixed timezone
709
- let tz = jiff::tz::TimeZone::fixed(jiff::tz::offset((total_minutes / 60) as i8));
710
- ts.to_zoned(tz).timestamp()
711
- } else {
712
- // Try IANA timezone
713
- match ts.intz(&tz) {
714
- Ok(zoned) => zoned.timestamp(),
715
- Err(_) => ts, // Fall back to UTC if timezone is invalid
716
- }
717
- }
718
- } else {
719
- // No timezone provided - treat as UTC
720
- ts
721
- }
722
- }