parquet 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/parquet/src/header_cache.rs +4 -9
- data/ext/parquet/src/logger.rs +2 -2
- data/ext/parquet/src/reader/common.rs +12 -15
- data/ext/parquet/src/reader/mod.rs +0 -56
- data/ext/parquet/src/reader/parquet_column_reader.rs +20 -16
- data/ext/parquet/src/reader/parquet_row_reader.rs +21 -14
- data/ext/parquet/src/ruby_reader.rs +37 -25
- data/ext/parquet/src/types/core_types.rs +2 -17
- data/ext/parquet/src/types/mod.rs +56 -0
- data/ext/parquet/src/types/parquet_value.rs +101 -95
- data/ext/parquet/src/types/record_types.rs +12 -14
- data/ext/parquet/src/types/schema_converter.rs +4 -109
- data/ext/parquet/src/types/timestamp.rs +3 -5
- data/ext/parquet/src/types/type_conversion.rs +116 -81
- data/ext/parquet/src/types/writer_types.rs +26 -54
- data/ext/parquet/src/writer/mod.rs +176 -839
- data/ext/parquet/src/writer/write_columns.rs +226 -0
- data/ext/parquet/src/writer/write_rows.rs +484 -0
- data/lib/parquet/version.rb +1 -1
- metadata +3 -1
@@ -1,7 +1,4 @@
|
|
1
|
-
use crate::{
|
2
|
-
impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion,
|
3
|
-
reader::{MagnusErrorWrapper, ReaderError},
|
4
|
-
};
|
1
|
+
use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
|
5
2
|
|
6
3
|
use super::*;
|
7
4
|
use arrow_array::MapArray;
|
@@ -115,7 +112,7 @@ impl std::hash::Hash for ParquetValue {
|
|
115
112
|
}
|
116
113
|
|
117
114
|
impl TryIntoValue for ParquetValue {
|
118
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
115
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
119
116
|
match self {
|
120
117
|
ParquetValue::Int8(i) => Ok(i.into_value_with(handle)),
|
121
118
|
ParquetValue::Int16(i) => Ok(i.into_value_with(handle)),
|
@@ -153,7 +150,7 @@ impl TryIntoValue for ParquetValue {
|
|
153
150
|
let ary = handle.ary_new_capa(l.len());
|
154
151
|
l.into_iter().try_for_each(|v| {
|
155
152
|
ary.push(v.try_into_value_with(handle)?)?;
|
156
|
-
Ok::<_,
|
153
|
+
Ok::<_, ParquetGemError>(())
|
157
154
|
})?;
|
158
155
|
|
159
156
|
// The complex_types test expects double_list to be nil when empty,
|
@@ -169,7 +166,7 @@ impl TryIntoValue for ParquetValue {
|
|
169
166
|
k.try_into_value_with(handle)?,
|
170
167
|
v.try_into_value_with(handle)?,
|
171
168
|
)?;
|
172
|
-
Ok::<_,
|
169
|
+
Ok::<_, ParquetGemError>(())
|
173
170
|
})?;
|
174
171
|
Ok(hash.into_value_with(handle))
|
175
172
|
}
|
@@ -180,6 +177,7 @@ impl TryIntoValue for ParquetValue {
|
|
180
177
|
|
181
178
|
impl ParquetValue {
|
182
179
|
pub fn from_value(
|
180
|
+
ruby: &Ruby,
|
183
181
|
value: Value,
|
184
182
|
type_: &ParquetSchemaType,
|
185
183
|
format: Option<&str>,
|
@@ -189,70 +187,72 @@ impl ParquetValue {
|
|
189
187
|
}
|
190
188
|
|
191
189
|
match type_ {
|
192
|
-
ParquetSchemaType::
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
190
|
+
ParquetSchemaType::Primitive(primative) => match primative {
|
191
|
+
PrimitiveType::Int8 => {
|
192
|
+
let v = NumericConverter::<i8>::convert_with_string_fallback(ruby, value)?;
|
193
|
+
Ok(ParquetValue::Int8(v))
|
194
|
+
}
|
195
|
+
PrimitiveType::Int16 => {
|
196
|
+
let v = NumericConverter::<i16>::convert_with_string_fallback(ruby, value)?;
|
197
|
+
Ok(ParquetValue::Int16(v))
|
198
|
+
}
|
199
|
+
PrimitiveType::Int32 => {
|
200
|
+
let v = NumericConverter::<i32>::convert_with_string_fallback(ruby, value)?;
|
201
|
+
Ok(ParquetValue::Int32(v))
|
202
|
+
}
|
203
|
+
PrimitiveType::Int64 => {
|
204
|
+
let v = NumericConverter::<i64>::convert_with_string_fallback(ruby, value)?;
|
205
|
+
Ok(ParquetValue::Int64(v))
|
206
|
+
}
|
207
|
+
PrimitiveType::UInt8 => {
|
208
|
+
let v = NumericConverter::<u8>::convert_with_string_fallback(ruby, value)?;
|
209
|
+
Ok(ParquetValue::UInt8(v))
|
210
|
+
}
|
211
|
+
PrimitiveType::UInt16 => {
|
212
|
+
let v = NumericConverter::<u16>::convert_with_string_fallback(ruby, value)?;
|
213
|
+
Ok(ParquetValue::UInt16(v))
|
214
|
+
}
|
215
|
+
PrimitiveType::UInt32 => {
|
216
|
+
let v = NumericConverter::<u32>::convert_with_string_fallback(ruby, value)?;
|
217
|
+
Ok(ParquetValue::UInt32(v))
|
218
|
+
}
|
219
|
+
PrimitiveType::UInt64 => {
|
220
|
+
let v = NumericConverter::<u64>::convert_with_string_fallback(ruby, value)?;
|
221
|
+
Ok(ParquetValue::UInt64(v))
|
222
|
+
}
|
223
|
+
PrimitiveType::Float32 => {
|
224
|
+
let v = NumericConverter::<f32>::convert_with_string_fallback(ruby, value)?;
|
225
|
+
Ok(ParquetValue::Float32(v))
|
226
|
+
}
|
227
|
+
PrimitiveType::Float64 => {
|
228
|
+
let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
|
229
|
+
Ok(ParquetValue::Float64(v))
|
230
|
+
}
|
231
|
+
PrimitiveType::String => {
|
232
|
+
let v = convert_to_string(value)?;
|
233
|
+
Ok(ParquetValue::String(v))
|
234
|
+
}
|
235
|
+
PrimitiveType::Binary => {
|
236
|
+
let v = convert_to_binary(value)?;
|
237
|
+
Ok(ParquetValue::Bytes(v))
|
238
|
+
}
|
239
|
+
PrimitiveType::Boolean => {
|
240
|
+
let v = convert_to_boolean(ruby, value)?;
|
241
|
+
Ok(ParquetValue::Boolean(v))
|
242
|
+
}
|
243
|
+
PrimitiveType::Date32 => {
|
244
|
+
let v = convert_to_date32(ruby, value, format)?;
|
245
|
+
Ok(ParquetValue::Date32(v))
|
246
|
+
}
|
247
|
+
PrimitiveType::TimestampMillis => {
|
248
|
+
let v = convert_to_timestamp_millis(ruby, value, format)?;
|
249
|
+
Ok(ParquetValue::TimestampMillis(v, None))
|
250
|
+
}
|
251
|
+
PrimitiveType::TimestampMicros => {
|
252
|
+
let v = convert_to_timestamp_micros(ruby, value, format)?;
|
253
|
+
Ok(ParquetValue::TimestampMicros(v, None))
|
254
|
+
}
|
255
|
+
},
|
256
256
|
ParquetSchemaType::List(list_field) => {
|
257
257
|
// We expect the Ruby object to be an Array, each item converting
|
258
258
|
// to the item_type. We gather them into ParquetValue::List(...)
|
@@ -271,6 +271,7 @@ impl ParquetValue {
|
|
271
271
|
let mut items = Vec::with_capacity(array.len());
|
272
272
|
for (index, item_val) in array.into_iter().enumerate() {
|
273
273
|
match ParquetValue::from_value(
|
274
|
+
ruby,
|
274
275
|
item_val,
|
275
276
|
&list_field.item_type,
|
276
277
|
list_field.format,
|
@@ -292,10 +293,18 @@ impl ParquetValue {
|
|
292
293
|
let hash_pairs: Vec<(Value, Value)> = value.funcall("to_a", ())?;
|
293
294
|
let mut result = HashMap::with_capacity(hash_pairs.len());
|
294
295
|
for (k, v) in hash_pairs {
|
295
|
-
let key_val =
|
296
|
-
|
297
|
-
|
298
|
-
|
296
|
+
let key_val = ParquetValue::from_value(
|
297
|
+
ruby,
|
298
|
+
k,
|
299
|
+
&map_field.key_type,
|
300
|
+
map_field.key_format,
|
301
|
+
)?;
|
302
|
+
let val_val = ParquetValue::from_value(
|
303
|
+
ruby,
|
304
|
+
v,
|
305
|
+
&map_field.value_type,
|
306
|
+
map_field.value_format,
|
307
|
+
)?;
|
299
308
|
result.insert(key_val, val_val);
|
300
309
|
}
|
301
310
|
Ok(ParquetValue::Map(result))
|
@@ -316,9 +325,7 @@ impl ParquetValue {
|
|
316
325
|
// For each field in the struct definition, try to find a matching key in the hash
|
317
326
|
for field in &struct_field.fields {
|
318
327
|
let field_name = ParquetValue::String(field.name.clone());
|
319
|
-
let ruby_field_name =
|
320
|
-
.str_new(&field.name)
|
321
|
-
.as_value();
|
328
|
+
let ruby_field_name = ruby.str_new(&field.name).as_value();
|
322
329
|
|
323
330
|
// Try to get the field value using Ruby's [] method
|
324
331
|
let field_value_obj =
|
@@ -328,6 +335,7 @@ impl ParquetValue {
|
|
328
335
|
ParquetValue::Null // Field not provided or nil, treat as null
|
329
336
|
} else {
|
330
337
|
ParquetValue::from_value(
|
338
|
+
ruby,
|
331
339
|
field_value_obj,
|
332
340
|
&field.type_,
|
333
341
|
field.format.as_deref(),
|
@@ -427,7 +435,7 @@ pub struct ArrayWrapper<'a> {
|
|
427
435
|
}
|
428
436
|
|
429
437
|
impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
430
|
-
type Error =
|
438
|
+
type Error = ParquetGemError;
|
431
439
|
|
432
440
|
fn try_from(column: ArrayWrapper<'a>) -> Result<Self, Self::Error> {
|
433
441
|
match column.array.data_type() {
|
@@ -515,11 +523,11 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
515
523
|
let iter = array.iter().map(|opt_x| match opt_x {
|
516
524
|
Some(x) => {
|
517
525
|
if column.strict {
|
518
|
-
Ok::<_,
|
526
|
+
Ok::<_, ParquetGemError>(ParquetValue::String(
|
519
527
|
simdutf8::basic::from_utf8(x.as_bytes())?.to_string(),
|
520
528
|
))
|
521
529
|
} else {
|
522
|
-
Ok::<_,
|
530
|
+
Ok::<_, ParquetGemError>(ParquetValue::String(x.to_string()))
|
523
531
|
}
|
524
532
|
}
|
525
533
|
None => Ok(ParquetValue::Null),
|
@@ -551,10 +559,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
551
559
|
strict: column.strict,
|
552
560
|
}) {
|
553
561
|
Ok(vec) => Ok(ParquetValue::List(vec.into_inner())),
|
554
|
-
Err(e) => Err(
|
562
|
+
Err(e) => Err(MagnusError::new(
|
555
563
|
magnus::exception::type_error(),
|
556
564
|
format!("Error converting list array to ParquetValueVec: {}", e),
|
557
|
-
))
|
565
|
+
))?,
|
558
566
|
},
|
559
567
|
None => Ok(ParquetValue::Null),
|
560
568
|
})
|
@@ -579,24 +587,22 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
579
587
|
}) {
|
580
588
|
Ok(vec) => vec.into_inner(),
|
581
589
|
Err(e) => {
|
582
|
-
return Err(
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
e
|
588
|
-
),
|
590
|
+
return Err(MagnusError::new(
|
591
|
+
magnus::exception::type_error(),
|
592
|
+
format!(
|
593
|
+
"Error converting struct field to ParquetValueVec: {}",
|
594
|
+
e
|
589
595
|
),
|
590
|
-
))
|
596
|
+
))?;
|
591
597
|
}
|
592
598
|
};
|
593
599
|
map.insert(
|
594
600
|
ParquetValue::String(field.name().to_string()),
|
595
601
|
field_values.into_iter().next().ok_or_else(|| {
|
596
|
-
|
602
|
+
MagnusError::new(
|
597
603
|
magnus::exception::type_error(),
|
598
604
|
"Expected a single value for struct field".to_string(),
|
599
|
-
)
|
605
|
+
)
|
600
606
|
})?,
|
601
607
|
);
|
602
608
|
}
|
@@ -667,10 +673,10 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
667
673
|
Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
|
668
674
|
}
|
669
675
|
_ => {
|
670
|
-
return Err(
|
676
|
+
return Err(MagnusError::new(
|
671
677
|
magnus::exception::type_error(),
|
672
678
|
format!("Unsupported data type: {:?}", column.array.data_type()),
|
673
|
-
))
|
679
|
+
))?;
|
674
680
|
}
|
675
681
|
}
|
676
682
|
}
|
@@ -1,7 +1,5 @@
|
|
1
1
|
use itertools::Itertools;
|
2
2
|
|
3
|
-
use crate::reader::ReaderError;
|
4
|
-
|
5
3
|
use super::*;
|
6
4
|
|
7
5
|
#[derive(Debug)]
|
@@ -20,13 +18,13 @@ pub enum ColumnRecord<S: BuildHasher + Default> {
|
|
20
18
|
pub struct ParquetField(pub Field, pub bool);
|
21
19
|
|
22
20
|
impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
23
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
21
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
24
22
|
match self {
|
25
23
|
RowRecord::Vec(vec) => {
|
26
24
|
let ary = handle.ary_new_capa(vec.len());
|
27
25
|
vec.into_iter().try_for_each(|v| {
|
28
26
|
ary.push(v.try_into_value_with(handle)?)?;
|
29
|
-
Ok::<_,
|
27
|
+
Ok::<_, ParquetGemError>(())
|
30
28
|
})?;
|
31
29
|
Ok(handle.into_value(ary))
|
32
30
|
}
|
@@ -64,7 +62,7 @@ impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
|
64
62
|
}
|
65
63
|
|
66
64
|
impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
67
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
65
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
68
66
|
match self {
|
69
67
|
ColumnRecord::Vec(vec) => {
|
70
68
|
let ary = handle.ary_new_capa(vec.len());
|
@@ -72,10 +70,10 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
72
70
|
let nested_ary = handle.ary_new_capa(v.len());
|
73
71
|
v.into_iter().try_for_each(|v| {
|
74
72
|
nested_ary.push(v.try_into_value_with(handle)?)?;
|
75
|
-
Ok::<_,
|
73
|
+
Ok::<_, ParquetGemError>(())
|
76
74
|
})?;
|
77
75
|
ary.push(nested_ary.into_value_with(handle))?;
|
78
|
-
Ok::<_,
|
76
|
+
Ok::<_, ParquetGemError>(())
|
79
77
|
})?;
|
80
78
|
Ok(ary.into_value_with(handle))
|
81
79
|
}
|
@@ -98,7 +96,7 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
98
96
|
let ary = handle.ary_new_capa(v.len());
|
99
97
|
v.into_iter().try_for_each(|v| {
|
100
98
|
ary.push(v.try_into_value_with(handle)?)?;
|
101
|
-
Ok::<_,
|
99
|
+
Ok::<_, ParquetGemError>(())
|
102
100
|
})?;
|
103
101
|
values[i + 1] = handle.into_value(ary);
|
104
102
|
i += 2;
|
@@ -118,11 +116,11 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
118
116
|
}
|
119
117
|
|
120
118
|
pub trait TryIntoValue {
|
121
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
119
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError>;
|
122
120
|
}
|
123
121
|
|
124
122
|
impl TryIntoValue for ParquetField {
|
125
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
123
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
126
124
|
match self.0 {
|
127
125
|
Field::Null => Ok(handle.qnil().as_value()),
|
128
126
|
Field::Bool(b) => Ok(b.into_value_with(handle)),
|
@@ -139,7 +137,7 @@ impl TryIntoValue for ParquetField {
|
|
139
137
|
Field::Str(s) => {
|
140
138
|
if self.1 {
|
141
139
|
Ok(simdutf8::basic::from_utf8(s.as_bytes())
|
142
|
-
.map_err(|e|
|
140
|
+
.map_err(|e| ParquetGemError::Utf8Error(e))
|
143
141
|
.and_then(|s| Ok(s.into_value_with(handle)))?)
|
144
142
|
} else {
|
145
143
|
let s = String::from_utf8_lossy(s.as_bytes());
|
@@ -172,7 +170,7 @@ impl TryIntoValue for ParquetField {
|
|
172
170
|
let ary = handle.ary_new_capa(elements.len());
|
173
171
|
elements.iter().try_for_each(|e| {
|
174
172
|
ary.push(ParquetField(e.clone(), self.1).try_into_value_with(handle)?)?;
|
175
|
-
Ok::<_,
|
173
|
+
Ok::<_, ParquetGemError>(())
|
176
174
|
})?;
|
177
175
|
Ok(ary.into_value_with(handle))
|
178
176
|
}
|
@@ -184,7 +182,7 @@ impl TryIntoValue for ParquetField {
|
|
184
182
|
ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
|
185
183
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
186
184
|
)?;
|
187
|
-
Ok::<_,
|
185
|
+
Ok::<_, ParquetGemError>(())
|
188
186
|
})?;
|
189
187
|
Ok(hash.into_value_with(handle))
|
190
188
|
}
|
@@ -213,7 +211,7 @@ impl TryIntoValue for ParquetField {
|
|
213
211
|
k.clone().into_value_with(handle),
|
214
212
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
215
213
|
)?;
|
216
|
-
Ok::<_,
|
214
|
+
Ok::<_, ParquetGemError>(())
|
217
215
|
})?;
|
218
216
|
Ok(hash.into_value_with(handle))
|
219
217
|
}
|
@@ -7,122 +7,17 @@ use crate::utils::parse_string_or_symbol;
|
|
7
7
|
/// Recursively converts a SchemaField to a SchemaNode for any level of nesting
|
8
8
|
fn convert_schema_field_to_node(field: &SchemaField) -> SchemaNode {
|
9
9
|
match &field.type_ {
|
10
|
-
PST::
|
10
|
+
PST::Primitive(primative) => SchemaNode::Primitive {
|
11
11
|
name: field.name.clone(),
|
12
12
|
nullable: field.nullable,
|
13
|
-
parquet_type:
|
14
|
-
format: field.format.clone(),
|
15
|
-
},
|
16
|
-
PST::Int16 => SchemaNode::Primitive {
|
17
|
-
name: field.name.clone(),
|
18
|
-
nullable: field.nullable,
|
19
|
-
parquet_type: PrimitiveType::Int16,
|
20
|
-
format: field.format.clone(),
|
21
|
-
},
|
22
|
-
PST::Int32 => SchemaNode::Primitive {
|
23
|
-
name: field.name.clone(),
|
24
|
-
nullable: field.nullable,
|
25
|
-
parquet_type: PrimitiveType::Int32,
|
26
|
-
format: field.format.clone(),
|
27
|
-
},
|
28
|
-
PST::Int64 => SchemaNode::Primitive {
|
29
|
-
name: field.name.clone(),
|
30
|
-
nullable: field.nullable,
|
31
|
-
parquet_type: PrimitiveType::Int64,
|
32
|
-
format: field.format.clone(),
|
33
|
-
},
|
34
|
-
PST::UInt8 => SchemaNode::Primitive {
|
35
|
-
name: field.name.clone(),
|
36
|
-
nullable: field.nullable,
|
37
|
-
parquet_type: PrimitiveType::UInt8,
|
38
|
-
format: field.format.clone(),
|
39
|
-
},
|
40
|
-
PST::UInt16 => SchemaNode::Primitive {
|
41
|
-
name: field.name.clone(),
|
42
|
-
nullable: field.nullable,
|
43
|
-
parquet_type: PrimitiveType::UInt16,
|
44
|
-
format: field.format.clone(),
|
45
|
-
},
|
46
|
-
PST::UInt32 => SchemaNode::Primitive {
|
47
|
-
name: field.name.clone(),
|
48
|
-
nullable: field.nullable,
|
49
|
-
parquet_type: PrimitiveType::UInt32,
|
50
|
-
format: field.format.clone(),
|
51
|
-
},
|
52
|
-
PST::UInt64 => SchemaNode::Primitive {
|
53
|
-
name: field.name.clone(),
|
54
|
-
nullable: field.nullable,
|
55
|
-
parquet_type: PrimitiveType::UInt64,
|
56
|
-
format: field.format.clone(),
|
57
|
-
},
|
58
|
-
PST::Float => SchemaNode::Primitive {
|
59
|
-
name: field.name.clone(),
|
60
|
-
nullable: field.nullable,
|
61
|
-
parquet_type: PrimitiveType::Float32,
|
62
|
-
format: field.format.clone(),
|
63
|
-
},
|
64
|
-
PST::Double => SchemaNode::Primitive {
|
65
|
-
name: field.name.clone(),
|
66
|
-
nullable: field.nullable,
|
67
|
-
parquet_type: PrimitiveType::Float64,
|
68
|
-
format: field.format.clone(),
|
69
|
-
},
|
70
|
-
PST::String => SchemaNode::Primitive {
|
71
|
-
name: field.name.clone(),
|
72
|
-
nullable: field.nullable,
|
73
|
-
parquet_type: PrimitiveType::String,
|
74
|
-
format: field.format.clone(),
|
75
|
-
},
|
76
|
-
PST::Binary => SchemaNode::Primitive {
|
77
|
-
name: field.name.clone(),
|
78
|
-
nullable: field.nullable,
|
79
|
-
parquet_type: PrimitiveType::Binary,
|
80
|
-
format: field.format.clone(),
|
81
|
-
},
|
82
|
-
PST::Boolean => SchemaNode::Primitive {
|
83
|
-
name: field.name.clone(),
|
84
|
-
nullable: field.nullable,
|
85
|
-
parquet_type: PrimitiveType::Boolean,
|
86
|
-
format: field.format.clone(),
|
87
|
-
},
|
88
|
-
PST::Date32 => SchemaNode::Primitive {
|
89
|
-
name: field.name.clone(),
|
90
|
-
nullable: field.nullable,
|
91
|
-
parquet_type: PrimitiveType::Date32,
|
92
|
-
format: field.format.clone(),
|
93
|
-
},
|
94
|
-
PST::TimestampMillis => SchemaNode::Primitive {
|
95
|
-
name: field.name.clone(),
|
96
|
-
nullable: field.nullable,
|
97
|
-
parquet_type: PrimitiveType::TimestampMillis,
|
98
|
-
format: field.format.clone(),
|
99
|
-
},
|
100
|
-
PST::TimestampMicros => SchemaNode::Primitive {
|
101
|
-
name: field.name.clone(),
|
102
|
-
nullable: field.nullable,
|
103
|
-
parquet_type: PrimitiveType::TimestampMicros,
|
13
|
+
parquet_type: *primative,
|
104
14
|
format: field.format.clone(),
|
105
15
|
},
|
106
16
|
PST::List(list_field) => {
|
107
17
|
// Create item node by recursively converting the list item type to a node
|
108
18
|
let item_node = match &list_field.item_type {
|
109
19
|
// For primitive types, create a primitive node with name "item"
|
110
|
-
PST::
|
111
|
-
| PST::Int16
|
112
|
-
| PST::Int32
|
113
|
-
| PST::Int64
|
114
|
-
| PST::UInt8
|
115
|
-
| PST::UInt16
|
116
|
-
| PST::UInt32
|
117
|
-
| PST::UInt64
|
118
|
-
| PST::Float
|
119
|
-
| PST::Double
|
120
|
-
| PST::String
|
121
|
-
| PST::Binary
|
122
|
-
| PST::Boolean
|
123
|
-
| PST::Date32
|
124
|
-
| PST::TimestampMillis
|
125
|
-
| PST::TimestampMicros => {
|
20
|
+
PST::Primitive(_) => {
|
126
21
|
// Use a temporary SchemaField to convert item type
|
127
22
|
let item_field = SchemaField {
|
128
23
|
name: "item".to_string(),
|
@@ -341,7 +236,7 @@ pub fn infer_schema_from_first_row(
|
|
341
236
|
Ok((0..array.len())
|
342
237
|
.map(|i| SchemaField {
|
343
238
|
name: format!("f{}", i),
|
344
|
-
type_: PST::String, // Default to String type when inferring
|
239
|
+
type_: PST::Primitive(PrimitiveType::String), // Default to String type when inferring
|
345
240
|
format: None,
|
346
241
|
nullable,
|
347
242
|
})
|
@@ -1,8 +1,6 @@
|
|
1
|
-
use crate::reader::ReaderError;
|
2
|
-
|
3
1
|
use super::*;
|
4
2
|
|
5
|
-
pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp,
|
3
|
+
pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, ParquetGemError> {
|
6
4
|
let (ts, tz) = match value {
|
7
5
|
ParquetValue::TimestampSecond(ts, tz) => (jiff::Timestamp::from_second(*ts).unwrap(), tz),
|
8
6
|
ParquetValue::TimestampMillis(ts, tz) => {
|
@@ -75,10 +73,10 @@ macro_rules! impl_timestamp_conversion {
|
|
75
73
|
.funcall::<_, _, Value>("parse", (ts.to_string(),))?
|
76
74
|
.into_value_with($handle))
|
77
75
|
}
|
78
|
-
_ => Err(
|
76
|
+
_ => Err(MagnusError::new(
|
79
77
|
magnus::exception::type_error(),
|
80
78
|
"Invalid timestamp type".to_string(),
|
81
|
-
))
|
79
|
+
))?,
|
82
80
|
}
|
83
81
|
}};
|
84
82
|
}
|