parquet 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +66 -59
- data/README.md +105 -1
- data/ext/parquet/Cargo.toml +4 -3
- data/ext/parquet/src/enumerator.rs +8 -0
- data/ext/parquet/src/header_cache.rs +11 -12
- data/ext/parquet/src/lib.rs +1 -0
- data/ext/parquet/src/logger.rs +171 -0
- data/ext/parquet/src/reader/common.rs +110 -0
- data/ext/parquet/src/reader/mod.rs +1 -43
- data/ext/parquet/src/reader/parquet_column_reader.rs +50 -86
- data/ext/parquet/src/reader/parquet_row_reader.rs +53 -23
- data/ext/parquet/src/ruby_reader.rs +37 -25
- data/ext/parquet/src/types/core_types.rs +47 -6
- data/ext/parquet/src/types/mod.rs +64 -1
- data/ext/parquet/src/types/parquet_value.rs +284 -102
- data/ext/parquet/src/types/record_types.rs +24 -23
- data/ext/parquet/src/types/schema_converter.rs +244 -0
- data/ext/parquet/src/types/schema_node.rs +329 -0
- data/ext/parquet/src/types/timestamp.rs +16 -8
- data/ext/parquet/src/types/type_conversion.rs +1151 -521
- data/ext/parquet/src/types/writer_types.rs +94 -151
- data/ext/parquet/src/utils.rs +29 -9
- data/ext/parquet/src/writer/mod.rs +342 -457
- data/ext/parquet/src/writer/write_columns.rs +226 -0
- data/ext/parquet/src/writer/write_rows.rs +484 -0
- data/lib/parquet/schema.rb +154 -0
- data/lib/parquet/version.rb +1 -1
- data/lib/parquet.rb +1 -0
- metadata +9 -2
@@ -1,9 +1,8 @@
|
|
1
|
-
use crate::{
|
2
|
-
impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion,
|
3
|
-
reader::ReaderError,
|
4
|
-
};
|
1
|
+
use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
|
5
2
|
|
6
3
|
use super::*;
|
4
|
+
use arrow_array::MapArray;
|
5
|
+
use magnus::RArray;
|
7
6
|
|
8
7
|
#[derive(Debug, Clone)]
|
9
8
|
pub enum ParquetValue {
|
@@ -27,7 +26,8 @@ pub enum ParquetValue {
|
|
27
26
|
TimestampMillis(i64, Option<Arc<str>>),
|
28
27
|
TimestampMicros(i64, Option<Arc<str>>),
|
29
28
|
TimestampNanos(i64, Option<Arc<str>>),
|
30
|
-
List(Vec<ParquetValue>),
|
29
|
+
List(Vec<ParquetValue>), // A list of values (can be empty or have null items)
|
30
|
+
// We're not using a separate NilList type anymore - we'll handle nil lists elsewhere
|
31
31
|
Map(HashMap<ParquetValue, ParquetValue>),
|
32
32
|
Null,
|
33
33
|
}
|
@@ -100,14 +100,19 @@ impl std::hash::Hash for ParquetValue {
|
|
100
100
|
tz.hash(state);
|
101
101
|
}
|
102
102
|
ParquetValue::List(l) => l.hash(state),
|
103
|
-
ParquetValue::Map(
|
103
|
+
ParquetValue::Map(m) => {
|
104
|
+
for (k, v) in m {
|
105
|
+
k.hash(state);
|
106
|
+
v.hash(state);
|
107
|
+
}
|
108
|
+
}
|
104
109
|
ParquetValue::Null => 0_i32.hash(state),
|
105
110
|
}
|
106
111
|
}
|
107
112
|
}
|
108
113
|
|
109
114
|
impl TryIntoValue for ParquetValue {
|
110
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
115
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
111
116
|
match self {
|
112
117
|
ParquetValue::Int8(i) => Ok(i.into_value_with(handle)),
|
113
118
|
ParquetValue::Int16(i) => Ok(i.into_value_with(handle)),
|
@@ -138,11 +143,20 @@ impl TryIntoValue for ParquetValue {
|
|
138
143
|
impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
|
139
144
|
}
|
140
145
|
ParquetValue::List(l) => {
|
146
|
+
// For lists, convert to Ruby array and check for specific cases
|
147
|
+
// when we might need to return nil instead of an empty array
|
148
|
+
|
149
|
+
// Normal case - convert list elements to a Ruby array
|
141
150
|
let ary = handle.ary_new_capa(l.len());
|
142
151
|
l.into_iter().try_for_each(|v| {
|
143
152
|
ary.push(v.try_into_value_with(handle)?)?;
|
144
|
-
Ok::<_,
|
153
|
+
Ok::<_, ParquetGemError>(())
|
145
154
|
})?;
|
155
|
+
|
156
|
+
// The complex_types test expects double_list to be nil when empty,
|
157
|
+
// but it needs the context which we don't have directly.
|
158
|
+
// We'll let List stay as an empty array, and in each_row.rs it can
|
159
|
+
// be handled there with field name context.
|
146
160
|
Ok(ary.into_value_with(handle))
|
147
161
|
}
|
148
162
|
ParquetValue::Map(m) => {
|
@@ -151,7 +165,8 @@ impl TryIntoValue for ParquetValue {
|
|
151
165
|
hash.aset(
|
152
166
|
k.try_into_value_with(handle)?,
|
153
167
|
v.try_into_value_with(handle)?,
|
154
|
-
)
|
168
|
+
)?;
|
169
|
+
Ok::<_, ParquetGemError>(())
|
155
170
|
})?;
|
156
171
|
Ok(hash.into_value_with(handle))
|
157
172
|
}
|
@@ -161,80 +176,178 @@ impl TryIntoValue for ParquetValue {
|
|
161
176
|
}
|
162
177
|
|
163
178
|
impl ParquetValue {
|
164
|
-
pub fn from_value(
|
179
|
+
pub fn from_value(
|
180
|
+
ruby: &Ruby,
|
181
|
+
value: Value,
|
182
|
+
type_: &ParquetSchemaType,
|
183
|
+
format: Option<&str>,
|
184
|
+
) -> Result<Self, MagnusError> {
|
165
185
|
if value.is_nil() {
|
166
186
|
return Ok(ParquetValue::Null);
|
167
187
|
}
|
168
188
|
|
169
189
|
match type_ {
|
170
|
-
ParquetSchemaType::
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
190
|
+
ParquetSchemaType::Primitive(primative) => match primative {
|
191
|
+
PrimitiveType::Int8 => {
|
192
|
+
let v = NumericConverter::<i8>::convert_with_string_fallback(ruby, value)?;
|
193
|
+
Ok(ParquetValue::Int8(v))
|
194
|
+
}
|
195
|
+
PrimitiveType::Int16 => {
|
196
|
+
let v = NumericConverter::<i16>::convert_with_string_fallback(ruby, value)?;
|
197
|
+
Ok(ParquetValue::Int16(v))
|
198
|
+
}
|
199
|
+
PrimitiveType::Int32 => {
|
200
|
+
let v = NumericConverter::<i32>::convert_with_string_fallback(ruby, value)?;
|
201
|
+
Ok(ParquetValue::Int32(v))
|
202
|
+
}
|
203
|
+
PrimitiveType::Int64 => {
|
204
|
+
let v = NumericConverter::<i64>::convert_with_string_fallback(ruby, value)?;
|
205
|
+
Ok(ParquetValue::Int64(v))
|
206
|
+
}
|
207
|
+
PrimitiveType::UInt8 => {
|
208
|
+
let v = NumericConverter::<u8>::convert_with_string_fallback(ruby, value)?;
|
209
|
+
Ok(ParquetValue::UInt8(v))
|
210
|
+
}
|
211
|
+
PrimitiveType::UInt16 => {
|
212
|
+
let v = NumericConverter::<u16>::convert_with_string_fallback(ruby, value)?;
|
213
|
+
Ok(ParquetValue::UInt16(v))
|
214
|
+
}
|
215
|
+
PrimitiveType::UInt32 => {
|
216
|
+
let v = NumericConverter::<u32>::convert_with_string_fallback(ruby, value)?;
|
217
|
+
Ok(ParquetValue::UInt32(v))
|
218
|
+
}
|
219
|
+
PrimitiveType::UInt64 => {
|
220
|
+
let v = NumericConverter::<u64>::convert_with_string_fallback(ruby, value)?;
|
221
|
+
Ok(ParquetValue::UInt64(v))
|
222
|
+
}
|
223
|
+
PrimitiveType::Float32 => {
|
224
|
+
let v = NumericConverter::<f32>::convert_with_string_fallback(ruby, value)?;
|
225
|
+
Ok(ParquetValue::Float32(v))
|
226
|
+
}
|
227
|
+
PrimitiveType::Float64 => {
|
228
|
+
let v = NumericConverter::<f64>::convert_with_string_fallback(ruby, value)?;
|
229
|
+
Ok(ParquetValue::Float64(v))
|
230
|
+
}
|
231
|
+
PrimitiveType::String => {
|
232
|
+
let v = convert_to_string(value)?;
|
233
|
+
Ok(ParquetValue::String(v))
|
234
|
+
}
|
235
|
+
PrimitiveType::Binary => {
|
236
|
+
let v = convert_to_binary(value)?;
|
237
|
+
Ok(ParquetValue::Bytes(v))
|
238
|
+
}
|
239
|
+
PrimitiveType::Boolean => {
|
240
|
+
let v = convert_to_boolean(ruby, value)?;
|
241
|
+
Ok(ParquetValue::Boolean(v))
|
242
|
+
}
|
243
|
+
PrimitiveType::Date32 => {
|
244
|
+
let v = convert_to_date32(ruby, value, format)?;
|
245
|
+
Ok(ParquetValue::Date32(v))
|
246
|
+
}
|
247
|
+
PrimitiveType::TimestampMillis => {
|
248
|
+
let v = convert_to_timestamp_millis(ruby, value, format)?;
|
249
|
+
Ok(ParquetValue::TimestampMillis(v, None))
|
250
|
+
}
|
251
|
+
PrimitiveType::TimestampMicros => {
|
252
|
+
let v = convert_to_timestamp_micros(ruby, value, format)?;
|
253
|
+
Ok(ParquetValue::TimestampMicros(v, None))
|
254
|
+
}
|
255
|
+
},
|
256
|
+
ParquetSchemaType::List(list_field) => {
|
257
|
+
// We expect the Ruby object to be an Array, each item converting
|
258
|
+
// to the item_type. We gather them into ParquetValue::List(...)
|
259
|
+
let array = RArray::from_value(value).ok_or_else(|| {
|
260
|
+
// Just get a simple string representation of the class
|
261
|
+
let type_info = format!("{:?}", value.class());
|
262
|
+
|
263
|
+
MagnusError::new(
|
264
|
+
magnus::exception::type_error(),
|
265
|
+
format!(
|
266
|
+
"Value must be an Array for a list type, got {} instead",
|
267
|
+
type_info
|
268
|
+
),
|
269
|
+
)
|
270
|
+
})?;
|
271
|
+
let mut items = Vec::with_capacity(array.len());
|
272
|
+
for (index, item_val) in array.into_iter().enumerate() {
|
273
|
+
match ParquetValue::from_value(
|
274
|
+
ruby,
|
275
|
+
item_val,
|
276
|
+
&list_field.item_type,
|
277
|
+
list_field.format,
|
278
|
+
) {
|
279
|
+
Ok(child_val) => items.push(child_val),
|
280
|
+
Err(e) => {
|
281
|
+
// Enhance the error with the item index
|
282
|
+
return Err(MagnusError::new(
|
283
|
+
magnus::exception::type_error(),
|
284
|
+
format!("Failed to convert item at index {} of list: {}", index, e),
|
285
|
+
));
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
Ok(ParquetValue::List(items))
|
290
|
+
}
|
291
|
+
ParquetSchemaType::Map(map_field) => {
|
292
|
+
// We expect the Ruby object to be a Hash
|
293
|
+
let hash_pairs: Vec<(Value, Value)> = value.funcall("to_a", ())?;
|
294
|
+
let mut result = HashMap::with_capacity(hash_pairs.len());
|
295
|
+
for (k, v) in hash_pairs {
|
296
|
+
let key_val = ParquetValue::from_value(
|
297
|
+
ruby,
|
298
|
+
k,
|
299
|
+
&map_field.key_type,
|
300
|
+
map_field.key_format,
|
301
|
+
)?;
|
302
|
+
let val_val = ParquetValue::from_value(
|
303
|
+
ruby,
|
304
|
+
v,
|
305
|
+
&map_field.value_type,
|
306
|
+
map_field.value_format,
|
307
|
+
)?;
|
308
|
+
result.insert(key_val, val_val);
|
309
|
+
}
|
310
|
+
Ok(ParquetValue::Map(result))
|
229
311
|
}
|
230
|
-
ParquetSchemaType::
|
231
|
-
|
232
|
-
|
312
|
+
ParquetSchemaType::Struct(struct_field) => {
|
313
|
+
// We expect a Ruby hash or object that responds to to_h
|
314
|
+
let hash_obj = if value.respond_to("to_h", false)? {
|
315
|
+
value.funcall::<_, _, Value>("to_h", ())?
|
316
|
+
} else {
|
317
|
+
return Err(MagnusError::new(
|
318
|
+
magnus::exception::type_error(),
|
319
|
+
"Value must be a Hash or respond to to_h for a struct type",
|
320
|
+
));
|
321
|
+
};
|
322
|
+
|
323
|
+
let mut result = HashMap::new();
|
324
|
+
|
325
|
+
// For each field in the struct definition, try to find a matching key in the hash
|
326
|
+
for field in &struct_field.fields {
|
327
|
+
let field_name = ParquetValue::String(field.name.clone());
|
328
|
+
let ruby_field_name = ruby.str_new(&field.name).as_value();
|
329
|
+
|
330
|
+
// Try to get the field value using Ruby's [] method
|
331
|
+
let field_value_obj =
|
332
|
+
hash_obj.funcall::<_, _, Value>("[]", (ruby_field_name,))?;
|
333
|
+
|
334
|
+
let field_value = if field_value_obj.is_nil() {
|
335
|
+
ParquetValue::Null // Field not provided or nil, treat as null
|
336
|
+
} else {
|
337
|
+
ParquetValue::from_value(
|
338
|
+
ruby,
|
339
|
+
field_value_obj,
|
340
|
+
&field.type_,
|
341
|
+
field.format.as_deref(),
|
342
|
+
)?
|
343
|
+
};
|
344
|
+
|
345
|
+
result.insert(field_name, field_value);
|
346
|
+
}
|
347
|
+
|
348
|
+
// Use Map to represent a struct since it's a collection of named values
|
349
|
+
Ok(ParquetValue::Map(result))
|
233
350
|
}
|
234
|
-
ParquetSchemaType::List(_) | ParquetSchemaType::Map(_) => Err(MagnusError::new(
|
235
|
-
magnus::exception::type_error(),
|
236
|
-
"Nested lists and maps are not supported",
|
237
|
-
)),
|
238
351
|
}
|
239
352
|
}
|
240
353
|
}
|
@@ -322,7 +435,7 @@ pub struct ArrayWrapper<'a> {
|
|
322
435
|
}
|
323
436
|
|
324
437
|
impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
325
|
-
type Error =
|
438
|
+
type Error = ParquetGemError;
|
326
439
|
|
327
440
|
fn try_from(column: ArrayWrapper<'a>) -> Result<Self, Self::Error> {
|
328
441
|
match column.array.data_type() {
|
@@ -410,11 +523,11 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
410
523
|
let iter = array.iter().map(|opt_x| match opt_x {
|
411
524
|
Some(x) => {
|
412
525
|
if column.strict {
|
413
|
-
Ok::<_,
|
526
|
+
Ok::<_, ParquetGemError>(ParquetValue::String(
|
414
527
|
simdutf8::basic::from_utf8(x.as_bytes())?.to_string(),
|
415
528
|
))
|
416
529
|
} else {
|
417
|
-
Ok::<_,
|
530
|
+
Ok::<_, ParquetGemError>(ParquetValue::String(x.to_string()))
|
418
531
|
}
|
419
532
|
}
|
420
533
|
None => Ok(ParquetValue::Null),
|
@@ -438,23 +551,23 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
438
551
|
}
|
439
552
|
DataType::List(_field) => {
|
440
553
|
let list_array = downcast_array::<ListArray>(column.array);
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
))
|
554
|
+
let sub_list = list_array
|
555
|
+
.iter()
|
556
|
+
.map(|x| match x {
|
557
|
+
Some(values) => match ParquetValueVec::try_from(ArrayWrapper {
|
558
|
+
array: &*values,
|
559
|
+
strict: column.strict,
|
560
|
+
}) {
|
561
|
+
Ok(vec) => Ok(ParquetValue::List(vec.into_inner())),
|
562
|
+
Err(e) => Err(MagnusError::new(
|
563
|
+
magnus::exception::type_error(),
|
564
|
+
format!("Error converting list array to ParquetValueVec: {}", e),
|
565
|
+
))?,
|
566
|
+
},
|
567
|
+
None => Ok(ParquetValue::Null),
|
568
|
+
})
|
569
|
+
.collect::<Result<Vec<ParquetValue>, Self::Error>>()?;
|
570
|
+
Ok(ParquetValueVec(sub_list))
|
458
571
|
}
|
459
572
|
DataType::Struct(_) => {
|
460
573
|
let struct_array = downcast_array::<StructArray>(column.array);
|
@@ -474,27 +587,96 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
474
587
|
}) {
|
475
588
|
Ok(vec) => vec.into_inner(),
|
476
589
|
Err(e) => {
|
477
|
-
|
590
|
+
return Err(MagnusError::new(
|
591
|
+
magnus::exception::type_error(),
|
592
|
+
format!(
|
593
|
+
"Error converting struct field to ParquetValueVec: {}",
|
594
|
+
e
|
595
|
+
),
|
596
|
+
))?;
|
478
597
|
}
|
479
598
|
};
|
480
599
|
map.insert(
|
481
600
|
ParquetValue::String(field.name().to_string()),
|
482
|
-
field_values.into_iter().next().
|
601
|
+
field_values.into_iter().next().ok_or_else(|| {
|
602
|
+
MagnusError::new(
|
603
|
+
magnus::exception::type_error(),
|
604
|
+
"Expected a single value for struct field".to_string(),
|
605
|
+
)
|
606
|
+
})?,
|
483
607
|
);
|
484
608
|
}
|
485
609
|
values.push(ParquetValue::Map(map));
|
486
610
|
}
|
487
611
|
Ok(ParquetValueVec(values))
|
488
612
|
}
|
613
|
+
DataType::Map(_field, _keys_sorted) => {
|
614
|
+
let map_array = downcast_array::<MapArray>(column.array);
|
615
|
+
|
616
|
+
let mut result = Vec::with_capacity(map_array.len());
|
617
|
+
|
618
|
+
let offsets = map_array.offsets();
|
619
|
+
let struct_array = map_array.entries();
|
620
|
+
|
621
|
+
for i in 0..map_array.len() {
|
622
|
+
if map_array.is_null(i) {
|
623
|
+
result.push(ParquetValue::Null);
|
624
|
+
continue;
|
625
|
+
}
|
626
|
+
|
627
|
+
let start = offsets[i] as usize;
|
628
|
+
let end = offsets[i + 1] as usize;
|
629
|
+
|
630
|
+
let mut map_data =
|
631
|
+
HashMap::with_capacity_and_hasher(end - start, Default::default());
|
632
|
+
|
633
|
+
// In Arrow's MapArray, the entries are a struct with fields named "keys" and "values"
|
634
|
+
// Get the columns directly by index since we know the structure
|
635
|
+
let key_array = struct_array.column(0); // First field is always keys
|
636
|
+
let val_array = struct_array.column(1); // Second field is always values
|
637
|
+
|
638
|
+
for entry_index in start..end {
|
639
|
+
let key_value = if key_array.is_null(entry_index) {
|
640
|
+
ParquetValue::Null
|
641
|
+
} else {
|
642
|
+
let subarray = key_array.slice(entry_index, 1);
|
643
|
+
let subwrapper = ArrayWrapper {
|
644
|
+
array: &*subarray,
|
645
|
+
strict: column.strict,
|
646
|
+
};
|
647
|
+
let mut converted = ParquetValueVec::try_from(subwrapper)?.0;
|
648
|
+
converted.pop().unwrap_or(ParquetValue::Null)
|
649
|
+
};
|
650
|
+
|
651
|
+
let val_value = if val_array.is_null(entry_index) {
|
652
|
+
ParquetValue::Null
|
653
|
+
} else {
|
654
|
+
let subarray = val_array.slice(entry_index, 1);
|
655
|
+
let subwrapper = ArrayWrapper {
|
656
|
+
array: &*subarray,
|
657
|
+
strict: column.strict,
|
658
|
+
};
|
659
|
+
let mut converted = ParquetValueVec::try_from(subwrapper)?.0;
|
660
|
+
converted.pop().unwrap_or(ParquetValue::Null)
|
661
|
+
};
|
662
|
+
|
663
|
+
map_data.insert(key_value, val_value);
|
664
|
+
}
|
665
|
+
|
666
|
+
result.push(ParquetValue::Map(map_data));
|
667
|
+
}
|
668
|
+
|
669
|
+
Ok(ParquetValueVec(result))
|
670
|
+
}
|
489
671
|
DataType::Null => {
|
490
672
|
let x = downcast_array::<NullArray>(column.array);
|
491
673
|
Ok(ParquetValueVec(vec![ParquetValue::Null; x.len()]))
|
492
674
|
}
|
493
675
|
_ => {
|
494
|
-
return Err(
|
495
|
-
|
496
|
-
column.array.data_type()
|
497
|
-
))
|
676
|
+
return Err(MagnusError::new(
|
677
|
+
magnus::exception::type_error(),
|
678
|
+
format!("Unsupported data type: {:?}", column.array.data_type()),
|
679
|
+
))?;
|
498
680
|
}
|
499
681
|
}
|
500
682
|
}
|
@@ -1,7 +1,5 @@
|
|
1
1
|
use itertools::Itertools;
|
2
2
|
|
3
|
-
use crate::reader::ReaderError;
|
4
|
-
|
5
3
|
use super::*;
|
6
4
|
|
7
5
|
#[derive(Debug)]
|
@@ -20,12 +18,14 @@ pub enum ColumnRecord<S: BuildHasher + Default> {
|
|
20
18
|
pub struct ParquetField(pub Field, pub bool);
|
21
19
|
|
22
20
|
impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
23
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
21
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
24
22
|
match self {
|
25
23
|
RowRecord::Vec(vec) => {
|
26
24
|
let ary = handle.ary_new_capa(vec.len());
|
27
|
-
vec.into_iter()
|
28
|
-
|
25
|
+
vec.into_iter().try_for_each(|v| {
|
26
|
+
ary.push(v.try_into_value_with(handle)?)?;
|
27
|
+
Ok::<_, ParquetGemError>(())
|
28
|
+
})?;
|
29
29
|
Ok(handle.into_value(ary))
|
30
30
|
}
|
31
31
|
RowRecord::Map(map) => {
|
@@ -62,16 +62,18 @@ impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
|
62
62
|
}
|
63
63
|
|
64
64
|
impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
65
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
65
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
66
66
|
match self {
|
67
67
|
ColumnRecord::Vec(vec) => {
|
68
68
|
let ary = handle.ary_new_capa(vec.len());
|
69
69
|
vec.into_iter().try_for_each(|v| {
|
70
70
|
let nested_ary = handle.ary_new_capa(v.len());
|
71
|
-
v.into_iter()
|
72
|
-
|
71
|
+
v.into_iter().try_for_each(|v| {
|
72
|
+
nested_ary.push(v.try_into_value_with(handle)?)?;
|
73
|
+
Ok::<_, ParquetGemError>(())
|
74
|
+
})?;
|
73
75
|
ary.push(nested_ary.into_value_with(handle))?;
|
74
|
-
Ok::<_,
|
76
|
+
Ok::<_, ParquetGemError>(())
|
75
77
|
})?;
|
76
78
|
Ok(ary.into_value_with(handle))
|
77
79
|
}
|
@@ -92,8 +94,10 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
92
94
|
}
|
93
95
|
values[i] = handle.into_value(k);
|
94
96
|
let ary = handle.ary_new_capa(v.len());
|
95
|
-
v.into_iter()
|
96
|
-
|
97
|
+
v.into_iter().try_for_each(|v| {
|
98
|
+
ary.push(v.try_into_value_with(handle)?)?;
|
99
|
+
Ok::<_, ParquetGemError>(())
|
100
|
+
})?;
|
97
101
|
values[i + 1] = handle.into_value(ary);
|
98
102
|
i += 2;
|
99
103
|
}
|
@@ -112,11 +116,11 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
112
116
|
}
|
113
117
|
|
114
118
|
pub trait TryIntoValue {
|
115
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
119
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError>;
|
116
120
|
}
|
117
121
|
|
118
122
|
impl TryIntoValue for ParquetField {
|
119
|
-
fn try_into_value_with(self, handle: &Ruby) -> Result<Value,
|
123
|
+
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
120
124
|
match self.0 {
|
121
125
|
Field::Null => Ok(handle.qnil().as_value()),
|
122
126
|
Field::Bool(b) => Ok(b.into_value_with(handle)),
|
@@ -133,7 +137,7 @@ impl TryIntoValue for ParquetField {
|
|
133
137
|
Field::Str(s) => {
|
134
138
|
if self.1 {
|
135
139
|
Ok(simdutf8::basic::from_utf8(s.as_bytes())
|
136
|
-
.map_err(|e|
|
140
|
+
.map_err(|e| ParquetGemError::Utf8Error(e))
|
137
141
|
.and_then(|s| Ok(s.into_value_with(handle)))?)
|
138
142
|
} else {
|
139
143
|
let s = String::from_utf8_lossy(s.as_bytes());
|
@@ -165,7 +169,8 @@ impl TryIntoValue for ParquetField {
|
|
165
169
|
let elements = list.elements();
|
166
170
|
let ary = handle.ary_new_capa(elements.len());
|
167
171
|
elements.iter().try_for_each(|e| {
|
168
|
-
ary.push(ParquetField(e.clone(), self.1).try_into_value_with(handle)?)
|
172
|
+
ary.push(ParquetField(e.clone(), self.1).try_into_value_with(handle)?)?;
|
173
|
+
Ok::<_, ParquetGemError>(())
|
169
174
|
})?;
|
170
175
|
Ok(ary.into_value_with(handle))
|
171
176
|
}
|
@@ -176,7 +181,8 @@ impl TryIntoValue for ParquetField {
|
|
176
181
|
hash.aset(
|
177
182
|
ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
|
178
183
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
179
|
-
)
|
184
|
+
)?;
|
185
|
+
Ok::<_, ParquetGemError>(())
|
180
186
|
})?;
|
181
187
|
Ok(hash.into_value_with(handle))
|
182
188
|
}
|
@@ -204,16 +210,11 @@ impl TryIntoValue for ParquetField {
|
|
204
210
|
hash.aset(
|
205
211
|
k.clone().into_value_with(handle),
|
206
212
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
207
|
-
)
|
213
|
+
)?;
|
214
|
+
Ok::<_, ParquetGemError>(())
|
208
215
|
})?;
|
209
216
|
Ok(hash.into_value_with(handle))
|
210
217
|
}
|
211
218
|
}
|
212
219
|
}
|
213
220
|
}
|
214
|
-
|
215
|
-
// impl IntoValue for ParquetField {
|
216
|
-
// fn into_value_with(self, handle: &Ruby) -> Value {
|
217
|
-
// self.try_into_value_with(handle).unwrap()
|
218
|
-
// }
|
219
|
-
// }
|