parquet 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/parquet/src/header_cache.rs +4 -9
- data/ext/parquet/src/logger.rs +2 -2
- data/ext/parquet/src/reader/common.rs +12 -15
- data/ext/parquet/src/reader/mod.rs +0 -56
- data/ext/parquet/src/reader/parquet_column_reader.rs +20 -16
- data/ext/parquet/src/reader/parquet_row_reader.rs +21 -14
- data/ext/parquet/src/ruby_reader.rs +18 -17
- data/ext/parquet/src/types/core_types.rs +2 -17
- data/ext/parquet/src/types/mod.rs +56 -0
- data/ext/parquet/src/types/parquet_value.rs +106 -95
- data/ext/parquet/src/types/record_types.rs +27 -17
- data/ext/parquet/src/types/schema_converter.rs +4 -109
- data/ext/parquet/src/types/timestamp.rs +3 -5
- data/ext/parquet/src/types/type_conversion.rs +116 -81
- data/ext/parquet/src/types/writer_types.rs +26 -54
- data/ext/parquet/src/writer/mod.rs +176 -840
- data/ext/parquet/src/writer/write_columns.rs +226 -0
- data/ext/parquet/src/writer/write_rows.rs +486 -0
- data/lib/parquet/version.rb +1 -1
- metadata +4 -2
@@ -1,7 +1,4 @@
|
|
1
|
-
use crate::{
|
2
|
-
impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion,
|
3
|
-
reader::{MagnusErrorWrapper, ReaderError},
|
4
|
-
};
|
1
|
+
use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
|
5
2
|
|
6
3
|
use super::*;
|
7
4
|
use arrow_array::MapArray;
|
@@ -115,7 +112,7 @@ impl std::hash::Hash for ParquetValue {
|
|
115
112
|
}
|
116
113
|
|
117
114
|
impl TryIntoValue for ParquetValue {
|
118
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
115
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
119
116
|
match self {
|
120
117
|
ParquetValue::Int8(i) => Ok(i.into_value_with(handle)),
|
121
118
|
ParquetValue::Int16(i) => Ok(i.into_value_with(handle)),
|
@@ -153,7 +150,7 @@ impl TryIntoValue for ParquetValue {
|
|
153
150
|
let ary = handle.ary_new_capa(l.len());
|
154
151
|
l.into_iter().try_for_each(|v| {
|
155
152
|
ary.push(v.try_into_value_with(handle)?)?;
|
156
|
-
Ok::<_,
|
153
|
+
Ok::<_, ParquetGemError>(())
|
157
154
|
})?;
|
158
155
|
|
159
156
|
// The complex_types test expects double_list to be nil when empty,
|
@@ -163,13 +160,18 @@ impl TryIntoValue for ParquetValue {
|
|
163
160
|
Ok(ary.into_value_with(handle))
|
164
161
|
}
|
165
162
|
ParquetValue::Map(m) => {
|
163
|
+
#[cfg(ruby_lt_3_2)]
|
166
164
|
let hash = handle.hash_new_capa(m.len());
|
165
|
+
|
166
|
+
#[cfg(not(ruby_lt_3_2))]
|
167
|
+
let hash = handle.hash_new();
|
168
|
+
|
167
169
|
m.into_iter().try_for_each(|(k, v)| {
|
168
170
|
hash.aset(
|
169
171
|
k.try_into_value_with(handle)?,
|
170
172
|
v.try_into_value_with(handle)?,
|
171
173
|
)?;
|
172
|
-
Ok::<_,
|
174
|
+
Ok::<_, ParquetGemError>(())
|
173
175
|
})?;
|
174
176
|
Ok(hash.into_value_with(handle))
|
175
177
|
}
|
@@ -180,6 +182,7 @@ impl TryIntoValue for ParquetValue {
|
|
180
182
|
|
181
183
|
impl ParquetValue {
|
182
184
|
pub fn from_value(
|
185
|
+
ruby: &Ruby,
|
183
186
|
value: Value,
|
184
187
|
type_: &ParquetSchemaType,
|
185
188
|
format: Option<&str>,
|
@@ -189,70 +192,72 @@ impl ParquetValue {
|
|
189
192
|
}
|
190
193
|
|
191
194
|
match type_ {
|
192
|
-
ParquetSchemaType::
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
195
|
+
ParquetSchemaType::Primitive(primative) => match primative {
|
196
|
+
PrimitiveType::Int8 => {
|
197
|
+
let v = NumericConverter::<i8>::convert_with_string_fallback(ruby, value)?;
|
198
|
+
Ok(ParquetValue::Int8(v))
|
199
|
+
}
|
200
|
+
PrimitiveType::Int16 => {
|
201
|
+
let v = NumericConverter::<i16>::convert_with_string_fallback(ruby, value)?;
|
202
|
+
Ok(ParquetValue::Int16(v))
|
203
|
+
}
|
204
|
+
PrimitiveType::Int32 => {
|
205
|
+
let v = NumericConverter::<i32>::convert_with_string_fallback(ruby, value)?;
|
206
|
+
Ok(ParquetValue::Int32(v))
|
207
|
+
}
|
208
|
+
PrimitiveType::Int64 => {
|
209
|
+
let v = NumericConverter::<i64>::convert_with_string_fallback(ruby, value)?;
|
210
|
+
Ok(ParquetValue::Int64(v))
|
211
|
+
}
|
212
|
+
PrimitiveType::UInt8 => {
|
213
|
+
let v = NumericConverter::<u8>::convert_with_string_fallback(ruby, value)?;
|
214
|
+
Ok(ParquetValue::UInt8(v))
|
215
|
+
}
|
216
|
+
PrimitiveType::UInt16 => {
|
217
|
+
let v = NumericConverter::<u16>::convert_with_string_fallback(ruby, value)?;
|
218
|
+
Ok(ParquetValue::UInt16(v))
|
219
|
+
}
|
220
|
+
PrimitiveType::UInt32 => {
|
221
|
+
let v = NumericConverter::<u32>::convert_with_string_fallback(ruby, value)?;
|
222
|
+
Ok(ParquetValue::UInt32(v))
|
223
|
+
}
|
224
|
+
PrimitiveType::UInt64 => {
|
225
|
+
let v = NumericConverter::<u64>::convert_with_string_fallback(ruby, value)?;
|
226
|
+
Ok(ParquetValue::UInt64(v))
|
227
|
+
}
|
228
|
+
PrimitiveType::Float32 => {
|
229
|
+
let v = NumericConverter::<f32>::convert_with_string_fallback(ruby, value)?;
|
230
|
+
Ok(ParquetValue::Float32(v))
|
231
|
+
}
|
232
|
+
PrimitiveType::Float64 => {
|
233
|
+
let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
|
234
|
+
Ok(ParquetValue::Float64(v))
|
235
|
+
}
|
236
|
+
PrimitiveType::String => {
|
237
|
+
let v = convert_to_string(value)?;
|
238
|
+
Ok(ParquetValue::String(v))
|
239
|
+
}
|
240
|
+
PrimitiveType::Binary => {
|
241
|
+
let v = convert_to_binary(value)?;
|
242
|
+
Ok(ParquetValue::Bytes(v))
|
243
|
+
}
|
244
|
+
PrimitiveType::Boolean => {
|
245
|
+
let v = convert_to_boolean(ruby, value)?;
|
246
|
+
Ok(ParquetValue::Boolean(v))
|
247
|
+
}
|
248
|
+
PrimitiveType::Date32 => {
|
249
|
+
let v = convert_to_date32(ruby, value, format)?;
|
250
|
+
Ok(ParquetValue::Date32(v))
|
251
|
+
}
|
252
|
+
PrimitiveType::TimestampMillis => {
|
253
|
+
let v = convert_to_timestamp_millis(ruby, value, format)?;
|
254
|
+
Ok(ParquetValue::TimestampMillis(v, None))
|
255
|
+
}
|
256
|
+
PrimitiveType::TimestampMicros => {
|
257
|
+
let v = convert_to_timestamp_micros(ruby, value, format)?;
|
258
|
+
Ok(ParquetValue::TimestampMicros(v, None))
|
259
|
+
}
|
260
|
+
},
|
256
261
|
ParquetSchemaType::List(list_field) => {
|
257
262
|
// We expect the Ruby object to be an Array, each item converting
|
258
263
|
// to the item_type. We gather them into ParquetValue::List(...)
|
@@ -271,6 +276,7 @@ impl ParquetValue {
|
|
271
276
|
let mut items = Vec::with_capacity(array.len());
|
272
277
|
for (index, item_val) in array.into_iter().enumerate() {
|
273
278
|
match ParquetValue::from_value(
|
279
|
+
ruby,
|
274
280
|
item_val,
|
275
281
|
&list_field.item_type,
|
276
282
|
list_field.format,
|
@@ -292,10 +298,18 @@ impl ParquetValue {
|
|
292
298
|
let hash_pairs: Vec<(Value, Value)> = value.funcall("to_a", ())?;
|
293
299
|
let mut result = HashMap::with_capacity(hash_pairs.len());
|
294
300
|
for (k, v) in hash_pairs {
|
295
|
-
let key_val =
|
296
|
-
|
297
|
-
|
298
|
-
|
301
|
+
let key_val = ParquetValue::from_value(
|
302
|
+
ruby,
|
303
|
+
k,
|
304
|
+
&map_field.key_type,
|
305
|
+
map_field.key_format,
|
306
|
+
)?;
|
307
|
+
let val_val = ParquetValue::from_value(
|
308
|
+
ruby,
|
309
|
+
v,
|
310
|
+
&map_field.value_type,
|
311
|
+
map_field.value_format,
|
312
|
+
)?;
|
299
313
|
result.insert(key_val, val_val);
|
300
314
|
}
|
301
315
|
Ok(ParquetValue::Map(result))
|
@@ -316,9 +330,7 @@ impl ParquetValue {
|
|
316
330
|
// For each field in the struct definition, try to find a matching key in the hash
|
317
331
|
for field in &struct_field.fields {
|
318
332
|
let field_name = ParquetValue::String(field.name.clone());
|
319
|
-
let ruby_field_name =
|
320
|
-
.str_new(&field.name)
|
321
|
-
.as_value();
|
333
|
+
let ruby_field_name = ruby.str_new(&field.name).as_value();
|
322
334
|
|
323
335
|
// Try to get the field value using Ruby's [] method
|
324
336
|
let field_value_obj =
|
@@ -328,6 +340,7 @@ impl ParquetValue {
|
|
328
340
|
ParquetValue::Null // Field not provided or nil, treat as null
|
329
341
|
} else {
|
330
342
|
ParquetValue::from_value(
|
343
|
+
ruby,
|
331
344
|
field_value_obj,
|
332
345
|
&field.type_,
|
333
346
|
field.format.as_deref(),
|
@@ -427,7 +440,7 @@ pub struct ArrayWrapper<'a> {
|
|
427
440
|
}
|
428
441
|
|
429
442
|
impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
430
|
-
type Error =
|
443
|
+
type Error = ParquetGemError;
|
431
444
|
|
432
445
|
fn try_from(column: ArrayWrapper<'a>) -> Result<Self, Self::Error> {
|
433
446
|
match column.array.data_type() {
|
@@ -515,11 +528,11 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
515
528
|
let iter = array.iter().map(|opt_x| match opt_x {
|
516
529
|
Some(x) => {
|
517
530
|
if column.strict {
|
518
|
-
Ok::<_,
|
531
|
+
Ok::<_, ParquetGemError>(ParquetValue::String(
|
519
532
|
simdutf8::basic::from_utf8(x.as_bytes())?.to_string(),
|
520
533
|
))
|
521
534
|
} else {
|
522
|
-
Ok::<_,
|
535
|
+
Ok::<_, ParquetGemError>(ParquetValue::String(x.to_string()))
|
523
536
|
}
|
524
537
|
}
|
525
538
|
None => Ok(ParquetValue::Null),
|
@@ -551,10 +564,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
551
564
|
strict: column.strict,
|
552
565
|
}) {
|
553
566
|
Ok(vec) => Ok(ParquetValue::List(vec.into_inner())),
|
554
|
-
Err(e) => Err(
|
567
|
+
Err(e) => Err(MagnusError::new(
|
555
568
|
magnus::exception::type_error(),
|
556
569
|
format!("Error converting list array to ParquetValueVec: {}", e),
|
557
|
-
))
|
570
|
+
))?,
|
558
571
|
},
|
559
572
|
None => Ok(ParquetValue::Null),
|
560
573
|
})
|
@@ -579,24 +592,22 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
579
592
|
}) {
|
580
593
|
Ok(vec) => vec.into_inner(),
|
581
594
|
Err(e) => {
|
582
|
-
return Err(
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
e
|
588
|
-
),
|
595
|
+
return Err(MagnusError::new(
|
596
|
+
magnus::exception::type_error(),
|
597
|
+
format!(
|
598
|
+
"Error converting struct field to ParquetValueVec: {}",
|
599
|
+
e
|
589
600
|
),
|
590
|
-
))
|
601
|
+
))?;
|
591
602
|
}
|
592
603
|
};
|
593
604
|
map.insert(
|
594
605
|
ParquetValue::String(field.name().to_string()),
|
595
606
|
field_values.into_iter().next().ok_or_else(|| {
|
596
|
-
|
607
|
+
MagnusError::new(
|
597
608
|
magnus::exception::type_error(),
|
598
609
|
"Expected a single value for struct field".to_string(),
|
599
|
-
)
|
610
|
+
)
|
600
611
|
})?,
|
601
612
|
);
|
602
613
|
}
|
@@ -667,10 +678,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
667
678
|
Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
|
668
679
|
}
|
669
680
|
_ => {
|
670
|
-
return Err(
|
681
|
+
return Err(MagnusError::new(
|
671
682
|
magnus::exception::type_error(),
|
672
683
|
format!("Unsupported data type: {:?}", column.array.data_type()),
|
673
|
-
))
|
684
|
+
))?;
|
674
685
|
}
|
675
686
|
}
|
676
687
|
}
|
@@ -1,7 +1,5 @@
|
|
1
1
|
use itertools::Itertools;
|
2
2
|
|
3
|
-
use crate::reader::ReaderError;
|
4
|
-
|
5
3
|
use super::*;
|
6
4
|
|
7
5
|
#[derive(Debug)]
|
@@ -20,19 +18,23 @@ pub enum ColumnRecord<S: BuildHasher + Default> {
|
|
20
18
|
pub struct ParquetField(pub Field, pub bool);
|
21
19
|
|
22
20
|
impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
23
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
21
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
24
22
|
match self {
|
25
23
|
RowRecord::Vec(vec) => {
|
26
24
|
let ary = handle.ary_new_capa(vec.len());
|
27
25
|
vec.into_iter().try_for_each(|v| {
|
28
26
|
ary.push(v.try_into_value_with(handle)?)?;
|
29
|
-
Ok::<_,
|
27
|
+
Ok::<_, ParquetGemError>(())
|
30
28
|
})?;
|
31
29
|
Ok(handle.into_value(ary))
|
32
30
|
}
|
33
31
|
RowRecord::Map(map) => {
|
32
|
+
#[cfg(ruby_lt_3_2)]
|
34
33
|
let hash = handle.hash_new_capa(map.len());
|
35
34
|
|
35
|
+
#[cfg(not(ruby_lt_3_2))]
|
36
|
+
let hash = handle.hash_new();
|
37
|
+
|
36
38
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
37
39
|
let mut i = 0;
|
38
40
|
|
@@ -64,7 +66,7 @@ impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
|
64
66
|
}
|
65
67
|
|
66
68
|
impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
67
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
69
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
68
70
|
match self {
|
69
71
|
ColumnRecord::Vec(vec) => {
|
70
72
|
let ary = handle.ary_new_capa(vec.len());
|
@@ -72,16 +74,20 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
72
74
|
let nested_ary = handle.ary_new_capa(v.len());
|
73
75
|
v.into_iter().try_for_each(|v| {
|
74
76
|
nested_ary.push(v.try_into_value_with(handle)?)?;
|
75
|
-
Ok::<_,
|
77
|
+
Ok::<_, ParquetGemError>(())
|
76
78
|
})?;
|
77
79
|
ary.push(nested_ary.into_value_with(handle))?;
|
78
|
-
Ok::<_,
|
80
|
+
Ok::<_, ParquetGemError>(())
|
79
81
|
})?;
|
80
82
|
Ok(ary.into_value_with(handle))
|
81
83
|
}
|
82
84
|
ColumnRecord::Map(map) => {
|
85
|
+
#[cfg(ruby_lt_3_2)]
|
83
86
|
let hash = handle.hash_new_capa(map.len());
|
84
87
|
|
88
|
+
#[cfg(not(ruby_lt_3_2))]
|
89
|
+
let hash = handle.hash_new();
|
90
|
+
|
85
91
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
86
92
|
let mut i = 0;
|
87
93
|
|
@@ -98,7 +104,7 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
98
104
|
let ary = handle.ary_new_capa(v.len());
|
99
105
|
v.into_iter().try_for_each(|v| {
|
100
106
|
ary.push(v.try_into_value_with(handle)?)?;
|
101
|
-
Ok::<_,
|
107
|
+
Ok::<_, ParquetGemError>(())
|
102
108
|
})?;
|
103
109
|
values[i + 1] = handle.into_value(ary);
|
104
110
|
i += 2;
|
@@ -118,11 +124,11 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
118
124
|
}
|
119
125
|
|
120
126
|
pub trait TryIntoValue {
|
121
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
127
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError>;
|
122
128
|
}
|
123
129
|
|
124
130
|
impl TryIntoValue for ParquetField {
|
125
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
131
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
126
132
|
match self.0 {
|
127
133
|
Field::Null => Ok(handle.qnil().as_value()),
|
128
134
|
Field::Bool(b) => Ok(b.into_value_with(handle)),
|
@@ -139,7 +145,7 @@ impl TryIntoValue for ParquetField {
|
|
139
145
|
Field::Str(s) => {
|
140
146
|
if self.1 {
|
141
147
|
Ok(simdutf8::basic::from_utf8(s.as_bytes())
|
142
|
-
.map_err(|e|
|
148
|
+
.map_err(|e| ParquetGemError::Utf8Error(e))
|
143
149
|
.and_then(|s| Ok(s.into_value_with(handle)))?)
|
144
150
|
} else {
|
145
151
|
let s = String::from_utf8_lossy(s.as_bytes());
|
@@ -172,19 +178,23 @@ impl TryIntoValue for ParquetField {
|
|
172
178
|
let ary = handle.ary_new_capa(elements.len());
|
173
179
|
elements.iter().try_for_each(|e| {
|
174
180
|
ary.push(ParquetField(e.clone(), self.1).try_into_value_with(handle)?)?;
|
175
|
-
Ok::<_,
|
181
|
+
Ok::<_, ParquetGemError>(())
|
176
182
|
})?;
|
177
183
|
Ok(ary.into_value_with(handle))
|
178
184
|
}
|
179
185
|
Field::MapInternal(map) => {
|
180
|
-
|
181
|
-
let hash = handle.hash_new_capa(
|
182
|
-
|
186
|
+
#[cfg(ruby_lt_3_2)]
|
187
|
+
let hash = handle.hash_new_capa(map.len());
|
188
|
+
|
189
|
+
#[cfg(not(ruby_lt_3_2))]
|
190
|
+
let hash = handle.hash_new();
|
191
|
+
|
192
|
+
map.entries().iter().try_for_each(|(k, v)| {
|
183
193
|
hash.aset(
|
184
194
|
ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
|
185
195
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
186
196
|
)?;
|
187
|
-
Ok::<_,
|
197
|
+
Ok::<_, ParquetGemError>(())
|
188
198
|
})?;
|
189
199
|
Ok(hash.into_value_with(handle))
|
190
200
|
}
|
@@ -213,7 +223,7 @@ impl TryIntoValue for ParquetField {
|
|
213
223
|
k.clone().into_value_with(handle),
|
214
224
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
215
225
|
)?;
|
216
|
-
Ok::<_,
|
226
|
+
Ok::<_, ParquetGemError>(())
|
217
227
|
})?;
|
218
228
|
Ok(hash.into_value_with(handle))
|
219
229
|
}
|
@@ -7,122 +7,17 @@ use crate::utils::parse_string_or_symbol;
|
|
7
7
|
/// Recursively converts a SchemaField to a SchemaNode for any level of nesting
|
8
8
|
fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
9
9
|
match &field.type_ {
|
10
|
-
PST::
|
10
|
+
PST::Primitive(primative) => SchemaNode::Primitive {
|
11
11
|
name: field.name.clone(),
|
12
12
|
nullable: field.nullable,
|
13
|
-
parquet_type:
|
14
|
-
format: field.format.clone(),
|
15
|
-
},
|
16
|
-
PST::Int16 => SchemaNode::Primitive {
|
17
|
-
name: field.name.clone(),
|
18
|
-
nullable: field.nullable,
|
19
|
-
parquet_type: PrimitiveType::Int16,
|
20
|
-
format: field.format.clone(),
|
21
|
-
},
|
22
|
-
PST::Int32 => SchemaNode::Primitive {
|
23
|
-
name: field.name.clone(),
|
24
|
-
nullable: field.nullable,
|
25
|
-
parquet_type: PrimitiveType::Int32,
|
26
|
-
format: field.format.clone(),
|
27
|
-
},
|
28
|
-
PST::Int64 => SchemaNode::Primitive {
|
29
|
-
name: field.name.clone(),
|
30
|
-
nullable: field.nullable,
|
31
|
-
parquet_type: PrimitiveType::Int64,
|
32
|
-
format: field.format.clone(),
|
33
|
-
},
|
34
|
-
PST::UInt8 => SchemaNode::Primitive {
|
35
|
-
name: field.name.clone(),
|
36
|
-
nullable: field.nullable,
|
37
|
-
parquet_type: PrimitiveType::UInt8,
|
38
|
-
format: field.format.clone(),
|
39
|
-
},
|
40
|
-
PST::UInt16 => SchemaNode::Primitive {
|
41
|
-
name: field.name.clone(),
|
42
|
-
nullable: field.nullable,
|
43
|
-
parquet_type: PrimitiveType::UInt16,
|
44
|
-
format: field.format.clone(),
|
45
|
-
},
|
46
|
-
PST::UInt32 => SchemaNode::Primitive {
|
47
|
-
name: field.name.clone(),
|
48
|
-
nullable: field.nullable,
|
49
|
-
parquet_type: PrimitiveType::UInt32,
|
50
|
-
format: field.format.clone(),
|
51
|
-
},
|
52
|
-
PST::UInt64 => SchemaNode::Primitive {
|
53
|
-
name: field.name.clone(),
|
54
|
-
nullable: field.nullable,
|
55
|
-
parquet_type: PrimitiveType::UInt64,
|
56
|
-
format: field.format.clone(),
|
57
|
-
},
|
58
|
-
PST::Float => SchemaNode::Primitive {
|
59
|
-
name: field.name.clone(),
|
60
|
-
nullable: field.nullable,
|
61
|
-
parquet_type: PrimitiveType::Float32,
|
62
|
-
format: field.format.clone(),
|
63
|
-
},
|
64
|
-
PST::Double => SchemaNode::Primitive {
|
65
|
-
name: field.name.clone(),
|
66
|
-
nullable: field.nullable,
|
67
|
-
parquet_type: PrimitiveType::Float64,
|
68
|
-
format: field.format.clone(),
|
69
|
-
},
|
70
|
-
PST::String => SchemaNode::Primitive {
|
71
|
-
name: field.name.clone(),
|
72
|
-
nullable: field.nullable,
|
73
|
-
parquet_type: PrimitiveType::String,
|
74
|
-
format: field.format.clone(),
|
75
|
-
},
|
76
|
-
PST::Binary => SchemaNode::Primitive {
|
77
|
-
name: field.name.clone(),
|
78
|
-
nullable: field.nullable,
|
79
|
-
parquet_type: PrimitiveType::Binary,
|
80
|
-
format: field.format.clone(),
|
81
|
-
},
|
82
|
-
PST::Boolean => SchemaNode::Primitive {
|
83
|
-
name: field.name.clone(),
|
84
|
-
nullable: field.nullable,
|
85
|
-
parquet_type: PrimitiveType::Boolean,
|
86
|
-
format: field.format.clone(),
|
87
|
-
},
|
88
|
-
PST::Date32 => SchemaNode::Primitive {
|
89
|
-
name: field.name.clone(),
|
90
|
-
nullable: field.nullable,
|
91
|
-
parquet_type: PrimitiveType::Date32,
|
92
|
-
format: field.format.clone(),
|
93
|
-
},
|
94
|
-
PST::TimestampMillis => SchemaNode::Primitive {
|
95
|
-
name: field.name.clone(),
|
96
|
-
nullable: field.nullable,
|
97
|
-
parquet_type: PrimitiveType::TimestampMillis,
|
98
|
-
format: field.format.clone(),
|
99
|
-
},
|
100
|
-
PST::TimestampMicros => SchemaNode::Primitive {
|
101
|
-
name: field.name.clone(),
|
102
|
-
nullable: field.nullable,
|
103
|
-
parquet_type: PrimitiveType::TimestampMicros,
|
13
|
+
parquet_type: *primative,
|
104
14
|
format: field.format.clone(),
|
105
15
|
},
|
106
16
|
PST::List(list_field) => {
|
107
17
|
// Create item node by recursively converting the list item type to a node
|
108
18
|
let item_node = match &list_field.item_type {
|
109
19
|
// For primitive types, create a primitive node with name "item"
|
110
|
-
PST::
|
111
|
-
| PST::Int16
|
112
|
-
| PST::Int32
|
113
|
-
| PST::Int64
|
114
|
-
| PST::UInt8
|
115
|
-
| PST::UInt16
|
116
|
-
| PST::UInt32
|
117
|
-
| PST::UInt64
|
118
|
-
| PST::Float
|
119
|
-
| PST::Double
|
120
|
-
| PST::String
|
121
|
-
| PST::Binary
|
122
|
-
| PST::Boolean
|
123
|
-
| PST::Date32
|
124
|
-
| PST::TimestampMillis
|
125
|
-
| PST::TimestampMicros => {
|
20
|
+
PST::Primitive(_) => {
|
126
21
|
// Use a temporary SchemaField to convert item type
|
127
22
|
let item_field = SchemaField {
|
128
23
|
name: "item".to_string(),
|
@@ -341,7 +236,7 @@ pub fn infer_schema_from_first_row(
|
|
341
236
|
Ok((0..array.len())
|
342
237
|
.map(|i| SchemaField {
|
343
238
|
name: format!("f{}", i),
|
344
|
-
type_: PST::String, // Default to String type when inferring
|
239
|
+
type_: PST::Primitive(PrimitiveType::String), // Default to String type when inferring
|
345
240
|
format: None,
|
346
241
|
nullable,
|
347
242
|
})
|
@@ -1,8 +1,6 @@
|
|
1
|
-
use crate::reader::ReaderError;
|
2
|
-
|
3
1
|
use super::*;
|
4
2
|
|
5
|
-
pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp,
|
3
|
+
pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, ParquetGemError> {
|
6
4
|
let (ts, tz) = match value {
|
7
5
|
ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts).unwrap(), tz),
|
8
6
|
ParquetValue::TimestampMillis(ts, tz) => {
|
@@ -75,10 +73,10 @@ macro_rules! impl_timestamp_conversion {
|
|
75
73
|
.funcall::<_, _, Value>("parse", (ts.to_string(),))?
|
76
74
|
.into_value_with($handle))
|
77
75
|
}
|
78
|
-
_ => Err(
|
76
|
+
_ => Err(MagnusError::new(
|
79
77
|
magnus::exception::type_error(),
|
80
78
|
"Invalid timestamp type".to_string(),
|
81
|
-
))
|
79
|
+
))?,
|
82
80
|
}
|
83
81
|
}};
|
84
82
|
}
|