parquet 0.5.13 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +295 -98
- data/Cargo.toml +1 -1
- data/Gemfile +1 -0
- data/README.md +94 -3
- data/ext/parquet/Cargo.toml +3 -0
- data/ext/parquet/src/adapter_ffi.rs +156 -0
- data/ext/parquet/src/lib.rs +13 -21
- data/ext/parquet-core/Cargo.toml +23 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
- data/ext/parquet-core/src/error.rs +163 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +263 -0
- data/ext/parquet-core/src/schema.rs +283 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +151 -0
- data/ext/parquet-core/src/value.rs +209 -0
- data/ext/parquet-core/src/writer.rs +839 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +430 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
- data/ext/parquet-ruby-adapter/src/error.rs +148 -0
- data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
- data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +94 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
- data/lib/parquet/schema.rb +19 -0
- data/lib/parquet/version.rb +1 -1
- metadata +50 -24
- data/ext/parquet/src/enumerator.rs +0 -68
- data/ext/parquet/src/header_cache.rs +0 -99
- data/ext/parquet/src/logger.rs +0 -171
- data/ext/parquet/src/reader/common.rs +0 -111
- data/ext/parquet/src/reader/mod.rs +0 -211
- data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
- data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
- data/ext/parquet/src/reader/unified/mod.rs +0 -363
- data/ext/parquet/src/types/core_types.rs +0 -120
- data/ext/parquet/src/types/mod.rs +0 -100
- data/ext/parquet/src/types/parquet_value.rs +0 -1275
- data/ext/parquet/src/types/record_types.rs +0 -605
- data/ext/parquet/src/types/schema_converter.rs +0 -290
- data/ext/parquet/src/types/schema_node.rs +0 -424
- data/ext/parquet/src/types/timestamp.rs +0 -285
- data/ext/parquet/src/types/type_conversion.rs +0 -1949
- data/ext/parquet/src/types/writer_types.rs +0 -329
- data/ext/parquet/src/utils.rs +0 -184
- data/ext/parquet/src/writer/mod.rs +0 -505
- data/ext/parquet/src/writer/write_columns.rs +0 -238
- data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -1,1949 +0,0 @@
|
|
1
|
-
use std::str::FromStr;
|
2
|
-
use std::sync::Arc;
|
3
|
-
|
4
|
-
use super::*;
|
5
|
-
use arrow_array::builder::MapFieldNames;
|
6
|
-
use arrow_array::builder::*;
|
7
|
-
use arrow_schema::{DataType, Field, Fields, TimeUnit};
|
8
|
-
use jiff::tz::{Offset, TimeZone};
|
9
|
-
use magnus::{RArray, RString, TryConvert};
|
10
|
-
|
11
|
-
pub struct NumericConverter<T> {
|
12
|
-
_phantom: std::marker::PhantomData<T>,
|
13
|
-
}
|
14
|
-
|
15
|
-
impl<T> NumericConverter<T>
|
16
|
-
where
|
17
|
-
T: TryConvert + FromStr,
|
18
|
-
<T as FromStr>::Err: std::fmt::Display,
|
19
|
-
{
|
20
|
-
pub fn convert_with_string_fallback(ruby: &Ruby, value: Value) -> Result<T, MagnusError> {
|
21
|
-
if value.is_kind_of(ruby.class_string()) {
|
22
|
-
let s = String::try_convert(value)?;
|
23
|
-
s.trim().parse::<T>().map_err(|e| {
|
24
|
-
MagnusError::new(
|
25
|
-
magnus::exception::type_error(),
|
26
|
-
format!("Failed to parse '{}' as numeric: {}", s, e),
|
27
|
-
)
|
28
|
-
})
|
29
|
-
} else {
|
30
|
-
T::try_convert(value)
|
31
|
-
}
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
pub fn convert_to_date32(
|
36
|
-
ruby: &Ruby,
|
37
|
-
value: Value,
|
38
|
-
format: Option<&str>,
|
39
|
-
) -> Result<i32, MagnusError> {
|
40
|
-
if value.is_kind_of(ruby.class_string()) {
|
41
|
-
let s = String::try_convert(value)?;
|
42
|
-
// Parse string into Date using jiff
|
43
|
-
let date = if let Some(fmt) = format {
|
44
|
-
jiff::civil::Date::strptime(fmt, &s).or_else(|e1| {
|
45
|
-
// Try parsing as DateTime and convert to Date with zero offset
|
46
|
-
jiff::civil::DateTime::strptime(fmt, &s)
|
47
|
-
.and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
|
48
|
-
.map(|dt| dt.date())
|
49
|
-
.map_err(|e2| {
|
50
|
-
MagnusError::new(
|
51
|
-
magnus::exception::type_error(),
|
52
|
-
format!(
|
53
|
-
"Failed to parse '{}' with format '{}' as date32: {} (and as datetime: {})",
|
54
|
-
s, fmt, e1, e2
|
55
|
-
),
|
56
|
-
)
|
57
|
-
})
|
58
|
-
})?
|
59
|
-
} else {
|
60
|
-
s.parse().map_err(|e| {
|
61
|
-
MagnusError::new(
|
62
|
-
magnus::exception::type_error(),
|
63
|
-
format!("Failed to parse '{}' as date32: {}", s, e),
|
64
|
-
)
|
65
|
-
})?
|
66
|
-
};
|
67
|
-
|
68
|
-
let timestamp = date.at(0, 0, 0, 0);
|
69
|
-
|
70
|
-
let x = timestamp
|
71
|
-
.to_zoned(TimeZone::fixed(Offset::constant(0)))
|
72
|
-
.map_err(|e| {
|
73
|
-
MagnusError::new(
|
74
|
-
magnus::exception::type_error(),
|
75
|
-
format!("Failed to convert date32 to timestamp: {}", e),
|
76
|
-
)
|
77
|
-
})?
|
78
|
-
.timestamp();
|
79
|
-
|
80
|
-
// Convert to epoch days
|
81
|
-
Ok((x.as_second() / 86400) as i32)
|
82
|
-
} else if value.is_kind_of(ruby.class_time()) {
|
83
|
-
// Convert Time object to epoch days
|
84
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
85
|
-
Ok(((secs as f64) / 86400.0) as i32)
|
86
|
-
} else {
|
87
|
-
Err(MagnusError::new(
|
88
|
-
magnus::exception::type_error(),
|
89
|
-
format!("Cannot convert {} to date32", unsafe { value.classname() }),
|
90
|
-
))
|
91
|
-
}
|
92
|
-
}
|
93
|
-
|
94
|
-
pub fn convert_to_timestamp_millis(
|
95
|
-
ruby: &Ruby,
|
96
|
-
value: Value,
|
97
|
-
format: Option<&str>,
|
98
|
-
) -> Result<i64, MagnusError> {
|
99
|
-
if value.is_kind_of(ruby.class_string()) {
|
100
|
-
let s = String::try_convert(value)?;
|
101
|
-
// Parse string into Timestamp using jiff
|
102
|
-
let timestamp = if let Some(fmt) = format {
|
103
|
-
jiff::Timestamp::strptime(fmt, &s)
|
104
|
-
.or_else(|e1| {
|
105
|
-
// Try parsing as DateTime and convert to Timestamp with zero offset
|
106
|
-
jiff::civil::DateTime::strptime(fmt, &s)
|
107
|
-
.and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
|
108
|
-
.map(|dt| dt.timestamp())
|
109
|
-
.map_err(|e2| {
|
110
|
-
MagnusError::new(
|
111
|
-
magnus::exception::type_error(),
|
112
|
-
format!(
|
113
|
-
"Failed to parse '{}' with format '{}' as timestamp_millis: {} (and as datetime: {})",
|
114
|
-
s, fmt, e1, e2
|
115
|
-
),
|
116
|
-
)
|
117
|
-
})
|
118
|
-
})?
|
119
|
-
} else {
|
120
|
-
s.parse().map_err(|e| {
|
121
|
-
MagnusError::new(
|
122
|
-
magnus::exception::type_error(),
|
123
|
-
format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
|
124
|
-
)
|
125
|
-
})?
|
126
|
-
};
|
127
|
-
// Convert to milliseconds
|
128
|
-
Ok(timestamp.as_millisecond())
|
129
|
-
} else if value.is_kind_of(ruby.class_time()) {
|
130
|
-
// Convert Time object to milliseconds
|
131
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
132
|
-
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
133
|
-
Ok(secs * 1000 + (usecs / 1000))
|
134
|
-
} else {
|
135
|
-
Err(MagnusError::new(
|
136
|
-
magnus::exception::type_error(),
|
137
|
-
format!("Cannot convert {} to timestamp_millis", unsafe {
|
138
|
-
value.classname()
|
139
|
-
}),
|
140
|
-
))
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
pub fn convert_to_timestamp_micros(
|
145
|
-
ruby: &Ruby,
|
146
|
-
value: Value,
|
147
|
-
format: Option<&str>,
|
148
|
-
) -> Result<i64, MagnusError> {
|
149
|
-
if value.is_kind_of(ruby.class_string()) {
|
150
|
-
let s = String::try_convert(value)?;
|
151
|
-
// Parse string into Timestamp using jiff
|
152
|
-
let timestamp = if let Some(fmt) = format {
|
153
|
-
jiff::Timestamp::strptime(fmt, &s).or_else(|e1| {
|
154
|
-
// Try parsing as DateTime and convert to Timestamp with zero offset
|
155
|
-
jiff::civil::DateTime::strptime(fmt, &s).and_then(|dt| {
|
156
|
-
dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
|
157
|
-
})
|
158
|
-
.map(|dt| dt.timestamp())
|
159
|
-
.map_err(|e2| {
|
160
|
-
MagnusError::new(
|
161
|
-
magnus::exception::type_error(),
|
162
|
-
format!(
|
163
|
-
"Failed to parse '{}' with format '{}' as timestamp_micros: {} (and as datetime: {})",
|
164
|
-
s, fmt, e1, e2
|
165
|
-
),
|
166
|
-
)
|
167
|
-
})
|
168
|
-
})?
|
169
|
-
} else {
|
170
|
-
s.parse().map_err(|e| {
|
171
|
-
MagnusError::new(
|
172
|
-
magnus::exception::type_error(),
|
173
|
-
format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
|
174
|
-
)
|
175
|
-
})?
|
176
|
-
};
|
177
|
-
// Convert to microseconds
|
178
|
-
Ok(timestamp.as_microsecond())
|
179
|
-
} else if value.is_kind_of(ruby.class_time()) {
|
180
|
-
// Convert Time object to microseconds
|
181
|
-
let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
|
182
|
-
let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
183
|
-
Ok(secs * 1_000_000 + usecs)
|
184
|
-
} else {
|
185
|
-
Err(MagnusError::new(
|
186
|
-
magnus::exception::type_error(),
|
187
|
-
format!("Cannot convert {} to timestamp_micros", unsafe {
|
188
|
-
value.classname()
|
189
|
-
}),
|
190
|
-
))
|
191
|
-
}
|
192
|
-
}
|
193
|
-
|
194
|
-
pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
|
195
|
-
Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
|
196
|
-
}
|
197
|
-
|
198
|
-
pub fn convert_to_boolean(ruby: &Ruby, value: Value) -> Result<bool, MagnusError> {
|
199
|
-
if value.is_kind_of(ruby.class_string()) {
|
200
|
-
let s = String::try_convert(value)?;
|
201
|
-
s.trim().parse::<bool>().map_err(|e| {
|
202
|
-
MagnusError::new(
|
203
|
-
magnus::exception::type_error(),
|
204
|
-
format!("Failed to parse '{}' as boolean: {}", s, e),
|
205
|
-
)
|
206
|
-
})
|
207
|
-
} else {
|
208
|
-
bool::try_convert(value)
|
209
|
-
}
|
210
|
-
}
|
211
|
-
|
212
|
-
pub fn convert_to_string(value: Value) -> Result<String, MagnusError> {
|
213
|
-
String::try_convert(value).or_else(|_| {
|
214
|
-
if value.respond_to("to_s", false)? {
|
215
|
-
value.funcall::<_, _, RString>("to_s", ())?.to_string()
|
216
|
-
} else if value.respond_to("to_str", false)? {
|
217
|
-
value.funcall::<_, _, RString>("to_str", ())?.to_string()
|
218
|
-
} else {
|
219
|
-
Err(MagnusError::new(
|
220
|
-
magnus::exception::type_error(),
|
221
|
-
format!("Not able to convert {:?} to String", value),
|
222
|
-
))
|
223
|
-
}
|
224
|
-
})
|
225
|
-
}
|
226
|
-
|
227
|
-
/// Converts our custom `ParquetSchemaType` into an Arrow `DataType`.
|
228
|
-
/// This ensures proper nullability settings for nested types.
|
229
|
-
/// Converts a ParquetSchemaType to an Arrow DataType
|
230
|
-
pub fn parquet_schema_type_to_arrow_data_type(
|
231
|
-
schema_type: &ParquetSchemaType,
|
232
|
-
) -> Result<DataType, MagnusError> {
|
233
|
-
Ok(match schema_type {
|
234
|
-
ParquetSchemaType::Primitive(primative) => match primative {
|
235
|
-
PrimitiveType::Int8 => DataType::Int8,
|
236
|
-
PrimitiveType::Int16 => DataType::Int16,
|
237
|
-
PrimitiveType::Int32 => DataType::Int32,
|
238
|
-
PrimitiveType::Int64 => DataType::Int64,
|
239
|
-
PrimitiveType::UInt8 => DataType::UInt8,
|
240
|
-
PrimitiveType::UInt16 => DataType::UInt16,
|
241
|
-
PrimitiveType::UInt32 => DataType::UInt32,
|
242
|
-
PrimitiveType::UInt64 => DataType::UInt64,
|
243
|
-
PrimitiveType::Float32 => DataType::Float32,
|
244
|
-
PrimitiveType::Float64 => DataType::Float64,
|
245
|
-
PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
|
246
|
-
PrimitiveType::Decimal256(precision, scale) => DataType::Decimal256(*precision, *scale),
|
247
|
-
PrimitiveType::String => DataType::Utf8,
|
248
|
-
PrimitiveType::Binary => DataType::Binary,
|
249
|
-
PrimitiveType::Boolean => DataType::Boolean,
|
250
|
-
PrimitiveType::Date32 => DataType::Date32,
|
251
|
-
PrimitiveType::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None),
|
252
|
-
PrimitiveType::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None),
|
253
|
-
PrimitiveType::TimeMillis => DataType::Time32(TimeUnit::Millisecond),
|
254
|
-
PrimitiveType::TimeMicros => DataType::Time64(TimeUnit::Microsecond),
|
255
|
-
},
|
256
|
-
// For a List<T>, create a standard List in Arrow with nullable items
|
257
|
-
ParquetSchemaType::List(list_field) => {
|
258
|
-
let child_type = parquet_schema_type_to_arrow_data_type(&list_field.item_type)?;
|
259
|
-
// For a list, use empty field name to match expectations for schema_dsl test
|
260
|
-
// This is the critical fix for the schema_dsl test which expects an empty field name
|
261
|
-
// Use empty field name for all list field items - this is crucial for compatibility
|
262
|
-
DataType::List(Arc::new(Field::new(
|
263
|
-
"item",
|
264
|
-
child_type,
|
265
|
-
list_field.nullable,
|
266
|
-
)))
|
267
|
-
}
|
268
|
-
|
269
|
-
// For a Map<K, V>, ensure entries field is non-nullable and key field is non-nullable
|
270
|
-
ParquetSchemaType::Map(map_field) => {
|
271
|
-
let key_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.key_type)?;
|
272
|
-
let value_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.value_type)?;
|
273
|
-
DataType::Map(
|
274
|
-
Arc::new(Field::new(
|
275
|
-
"entries",
|
276
|
-
DataType::Struct(Fields::from(vec![
|
277
|
-
Field::new("key", key_arrow_type, false), // key must be non-null
|
278
|
-
Field::new("value", value_arrow_type, true), // value can be null
|
279
|
-
])),
|
280
|
-
/*nullable=*/ false, // crucial: entries must be non-nullable
|
281
|
-
)),
|
282
|
-
/*keys_sorted=*/ false,
|
283
|
-
)
|
284
|
-
}
|
285
|
-
ParquetSchemaType::Struct(struct_field) => {
|
286
|
-
if struct_field.fields.is_empty() {
|
287
|
-
return Err(MagnusError::new(
|
288
|
-
magnus::exception::runtime_error(),
|
289
|
-
"Cannot create a struct with zero subfields (empty struct).",
|
290
|
-
));
|
291
|
-
}
|
292
|
-
|
293
|
-
// Build arrow fields
|
294
|
-
let mut arrow_fields = Vec::with_capacity(struct_field.fields.len());
|
295
|
-
|
296
|
-
for field in &struct_field.fields {
|
297
|
-
let field_type = parquet_schema_type_to_arrow_data_type(&field.type_)?;
|
298
|
-
arrow_fields.push(Field::new(&field.name, field_type, true)); // All fields are nullable by default
|
299
|
-
}
|
300
|
-
|
301
|
-
DataType::Struct(Fields::from(arrow_fields))
|
302
|
-
}
|
303
|
-
})
|
304
|
-
}
|
305
|
-
|
306
|
-
#[macro_export]
|
307
|
-
macro_rules! impl_timestamp_array_conversion {
|
308
|
-
($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
|
309
|
-
let array = downcast_array::<$array_type>($column);
|
310
|
-
Ok(ParquetValueVec(if array.is_nullable() {
|
311
|
-
array
|
312
|
-
.values()
|
313
|
-
.iter()
|
314
|
-
.enumerate()
|
315
|
-
.map(|(i, x)| {
|
316
|
-
if array.is_null(i) {
|
317
|
-
ParquetValue::Null
|
318
|
-
} else {
|
319
|
-
ParquetValue::$variant(*x, $tz.clone().map(|s| s.into()))
|
320
|
-
}
|
321
|
-
})
|
322
|
-
.collect()
|
323
|
-
} else {
|
324
|
-
array
|
325
|
-
.values()
|
326
|
-
.iter()
|
327
|
-
.map(|x| ParquetValue::$variant(*x, $tz.clone().map(|s| s.into())))
|
328
|
-
.collect()
|
329
|
-
}))
|
330
|
-
}};
|
331
|
-
}
|
332
|
-
|
333
|
-
// Create the appropriate Arrow builder for a given ParquetSchemaType.
|
334
|
-
// We return a Box<dyn ArrayBuilder> so we can dynamically downcast.
|
335
|
-
fn create_arrow_builder_for_type(
|
336
|
-
type_: &ParquetSchemaType,
|
337
|
-
capacity: Option<usize>,
|
338
|
-
) -> Result<Box<dyn ArrayBuilder>, ParquetGemError> {
|
339
|
-
let cap = capacity.unwrap_or(1); // Default to at least capacity 1 to avoid empty builders
|
340
|
-
match type_ {
|
341
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int8) => {
|
342
|
-
Ok(Box::new(Int8Builder::with_capacity(cap)))
|
343
|
-
}
|
344
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int16) => {
|
345
|
-
Ok(Box::new(Int16Builder::with_capacity(cap)))
|
346
|
-
}
|
347
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
|
348
|
-
Ok(Box::new(Int32Builder::with_capacity(cap)))
|
349
|
-
}
|
350
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
|
351
|
-
Ok(Box::new(Int64Builder::with_capacity(cap)))
|
352
|
-
}
|
353
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
|
354
|
-
Ok(Box::new(UInt8Builder::with_capacity(cap)))
|
355
|
-
}
|
356
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
|
357
|
-
Ok(Box::new(UInt16Builder::with_capacity(cap)))
|
358
|
-
}
|
359
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
|
360
|
-
Ok(Box::new(UInt32Builder::with_capacity(cap)))
|
361
|
-
}
|
362
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
|
363
|
-
Ok(Box::new(UInt64Builder::with_capacity(cap)))
|
364
|
-
}
|
365
|
-
ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
|
366
|
-
Ok(Box::new(Float32Builder::with_capacity(cap)))
|
367
|
-
}
|
368
|
-
ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
|
369
|
-
Ok(Box::new(Float64Builder::with_capacity(cap)))
|
370
|
-
}
|
371
|
-
ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
|
372
|
-
// Create a Decimal128Builder with specific precision and scale
|
373
|
-
let builder = Decimal128Builder::with_capacity(cap);
|
374
|
-
|
375
|
-
// Set precision and scale for the decimal and return the new builder
|
376
|
-
let builder_with_precision = builder
|
377
|
-
.with_precision_and_scale(*precision, *scale)
|
378
|
-
.map_err(|e| {
|
379
|
-
MagnusError::new(
|
380
|
-
magnus::exception::runtime_error(),
|
381
|
-
format!("Failed to set precision and scale: {}", e),
|
382
|
-
)
|
383
|
-
})?;
|
384
|
-
|
385
|
-
Ok(Box::new(builder_with_precision))
|
386
|
-
}
|
387
|
-
ParquetSchemaType::Primitive(PrimitiveType::Decimal256(precision, scale)) => {
|
388
|
-
// Create a Decimal128Builder since we're truncating Decimal256 to Decimal128
|
389
|
-
let builder = Decimal256Builder::with_capacity(cap);
|
390
|
-
|
391
|
-
// Set precision and scale for the decimal and return the new builder
|
392
|
-
let builder_with_precision = builder
|
393
|
-
.with_precision_and_scale(*precision, *scale)
|
394
|
-
.map_err(|e| {
|
395
|
-
MagnusError::new(
|
396
|
-
magnus::exception::runtime_error(),
|
397
|
-
format!("Failed to set precision and scale: {}", e),
|
398
|
-
)
|
399
|
-
})?;
|
400
|
-
|
401
|
-
Ok(Box::new(builder_with_precision))
|
402
|
-
}
|
403
|
-
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
404
|
-
Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
|
405
|
-
}
|
406
|
-
ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
|
407
|
-
Ok(Box::new(BinaryBuilder::with_capacity(cap, cap * 32)))
|
408
|
-
}
|
409
|
-
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
|
410
|
-
Ok(Box::new(BooleanBuilder::with_capacity(cap)))
|
411
|
-
}
|
412
|
-
ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
|
413
|
-
Ok(Box::new(Date32Builder::with_capacity(cap)))
|
414
|
-
}
|
415
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
|
416
|
-
Ok(Box::new(TimestampMillisecondBuilder::with_capacity(cap)))
|
417
|
-
}
|
418
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
|
419
|
-
Ok(Box::new(TimestampMicrosecondBuilder::with_capacity(cap)))
|
420
|
-
}
|
421
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => {
|
422
|
-
Ok(Box::new(Time32MillisecondBuilder::with_capacity(cap)))
|
423
|
-
}
|
424
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => {
|
425
|
-
Ok(Box::new(Time64MicrosecondBuilder::with_capacity(cap)))
|
426
|
-
}
|
427
|
-
ParquetSchemaType::List(list_field) => {
|
428
|
-
// For a list, we create a ListBuilder whose child builder is determined by item_type.
|
429
|
-
// Pass through capacity to ensure consistent sizing
|
430
|
-
let child_builder = create_arrow_builder_for_type(&list_field.item_type, Some(cap))?;
|
431
|
-
|
432
|
-
// Ensure consistent builder capacity for lists
|
433
|
-
Ok(Box::new(ListBuilder::<Box<dyn ArrayBuilder>>::new(
|
434
|
-
child_builder,
|
435
|
-
)))
|
436
|
-
}
|
437
|
-
ParquetSchemaType::Map(map_field) => {
|
438
|
-
// A Map is physically a list<struct<key:..., value:...>> in Arrow.
|
439
|
-
// Pass through capacity to ensure consistent sizing
|
440
|
-
let key_builder = create_arrow_builder_for_type(&map_field.key_type, Some(cap))?;
|
441
|
-
let value_builder = create_arrow_builder_for_type(&map_field.value_type, Some(cap))?;
|
442
|
-
|
443
|
-
// Create a MapBuilder with explicit field names to ensure compatibility
|
444
|
-
Ok(Box::new(MapBuilder::<
|
445
|
-
Box<dyn ArrayBuilder>,
|
446
|
-
Box<dyn ArrayBuilder>,
|
447
|
-
>::new(
|
448
|
-
Some(MapFieldNames {
|
449
|
-
entry: "entries".to_string(),
|
450
|
-
key: "key".to_string(),
|
451
|
-
value: "value".to_string(),
|
452
|
-
}),
|
453
|
-
key_builder,
|
454
|
-
value_builder,
|
455
|
-
)))
|
456
|
-
}
|
457
|
-
ParquetSchemaType::Struct(struct_field) => {
|
458
|
-
// Check for empty struct immediately
|
459
|
-
if struct_field.fields.is_empty() {
|
460
|
-
Err(MagnusError::new(
|
461
|
-
magnus::exception::runtime_error(),
|
462
|
-
"Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
|
463
|
-
))?;
|
464
|
-
}
|
465
|
-
|
466
|
-
// Create a child builder for each field in the struct
|
467
|
-
let mut child_field_builders = Vec::with_capacity(struct_field.fields.len());
|
468
|
-
|
469
|
-
// Get struct data type first to ensure field compatibility
|
470
|
-
let data_type = parquet_schema_type_to_arrow_data_type(type_)?;
|
471
|
-
|
472
|
-
// Make sure the data type is a struct
|
473
|
-
let arrow_fields = if let DataType::Struct(ref fields) = data_type {
|
474
|
-
fields.clone()
|
475
|
-
} else {
|
476
|
-
return Err(MagnusError::new(
|
477
|
-
magnus::exception::type_error(),
|
478
|
-
"Expected struct data type".to_string(),
|
479
|
-
))?;
|
480
|
-
};
|
481
|
-
|
482
|
-
// Create builders for each child field with consistent capacity
|
483
|
-
for child in &struct_field.fields {
|
484
|
-
let sub_builder = create_arrow_builder_for_type(&child.type_, Some(cap))?;
|
485
|
-
child_field_builders.push(sub_builder);
|
486
|
-
}
|
487
|
-
|
488
|
-
// Make sure we have the right number of builders
|
489
|
-
if child_field_builders.len() != arrow_fields.len() {
|
490
|
-
Err(MagnusError::new(
|
491
|
-
magnus::exception::runtime_error(),
|
492
|
-
format!(
|
493
|
-
"Number of field builders ({}) doesn't match number of arrow fields ({})",
|
494
|
-
child_field_builders.len(),
|
495
|
-
arrow_fields.len()
|
496
|
-
),
|
497
|
-
))?;
|
498
|
-
}
|
499
|
-
|
500
|
-
// Create the StructBuilder with the fields and child builders
|
501
|
-
Ok(Box::new(StructBuilder::new(
|
502
|
-
arrow_fields,
|
503
|
-
child_field_builders,
|
504
|
-
)))
|
505
|
-
}
|
506
|
-
}
|
507
|
-
}
|
508
|
-
|
509
|
-
// Fill primitive scalar Int8 values
|
510
|
-
fn fill_int8_builder(
|
511
|
-
builder: &mut dyn ArrayBuilder,
|
512
|
-
values: &[ParquetValue],
|
513
|
-
) -> Result<(), MagnusError> {
|
514
|
-
let typed_builder = builder
|
515
|
-
.as_any_mut()
|
516
|
-
.downcast_mut::<Int8Builder>()
|
517
|
-
.expect("Builder mismatch: expected Int8Builder");
|
518
|
-
for val in values {
|
519
|
-
match val {
|
520
|
-
ParquetValue::Int8(i) => typed_builder.append_value(*i),
|
521
|
-
// Handle Int64 that could be an Int8
|
522
|
-
ParquetValue::Int64(i) => {
|
523
|
-
if *i < i8::MIN as i64 || *i > i8::MAX as i64 {
|
524
|
-
return Err(MagnusError::new(
|
525
|
-
magnus::exception::range_error(),
|
526
|
-
format!("Integer {} is out of range for Int8", i),
|
527
|
-
));
|
528
|
-
}
|
529
|
-
typed_builder.append_value(*i as i8)
|
530
|
-
}
|
531
|
-
ParquetValue::Null => typed_builder.append_null(),
|
532
|
-
other => {
|
533
|
-
return Err(MagnusError::new(
|
534
|
-
magnus::exception::type_error(),
|
535
|
-
format!("Expected Int8, got {:?}", other),
|
536
|
-
))
|
537
|
-
}
|
538
|
-
}
|
539
|
-
}
|
540
|
-
Ok(())
|
541
|
-
}
|
542
|
-
|
543
|
-
// Fill primitive scalar Int16 values
|
544
|
-
fn fill_int16_builder(
|
545
|
-
builder: &mut dyn ArrayBuilder,
|
546
|
-
values: &[ParquetValue],
|
547
|
-
) -> Result<(), MagnusError> {
|
548
|
-
let typed_builder = builder
|
549
|
-
.as_any_mut()
|
550
|
-
.downcast_mut::<Int16Builder>()
|
551
|
-
.expect("Builder mismatch: expected Int16Builder");
|
552
|
-
for val in values {
|
553
|
-
match val {
|
554
|
-
ParquetValue::Int16(i) => typed_builder.append_value(*i),
|
555
|
-
// Handle Int64 that could be an Int16
|
556
|
-
ParquetValue::Int64(i) => {
|
557
|
-
if *i < i16::MIN as i64 || *i > i16::MAX as i64 {
|
558
|
-
return Err(MagnusError::new(
|
559
|
-
magnus::exception::range_error(),
|
560
|
-
format!("Integer {} is out of range for Int16", i),
|
561
|
-
));
|
562
|
-
}
|
563
|
-
typed_builder.append_value(*i as i16)
|
564
|
-
}
|
565
|
-
ParquetValue::Null => typed_builder.append_null(),
|
566
|
-
other => {
|
567
|
-
return Err(MagnusError::new(
|
568
|
-
magnus::exception::type_error(),
|
569
|
-
format!("Expected Int16, got {:?}", other),
|
570
|
-
))
|
571
|
-
}
|
572
|
-
}
|
573
|
-
}
|
574
|
-
Ok(())
|
575
|
-
}
|
576
|
-
|
577
|
-
// Fill list values by recursively filling child items
|
578
|
-
fn fill_list_builder(
|
579
|
-
builder: &mut dyn ArrayBuilder,
|
580
|
-
item_type: &ParquetSchemaType,
|
581
|
-
values: &[ParquetValue],
|
582
|
-
) -> Result<(), MagnusError> {
|
583
|
-
// We need to use a more specific type for ListBuilder to help Rust's type inference
|
584
|
-
let lb = builder
|
585
|
-
.as_any_mut()
|
586
|
-
.downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
|
587
|
-
.expect("Builder mismatch: expected ListBuilder");
|
588
|
-
|
589
|
-
for val in values {
|
590
|
-
if let ParquetValue::Null = val {
|
591
|
-
// null list
|
592
|
-
lb.append(false);
|
593
|
-
} else if let ParquetValue::List(list_items) = val {
|
594
|
-
// First fill the child builder with the items
|
595
|
-
let values_builder = lb.values();
|
596
|
-
fill_builder(values_builder, item_type, list_items)?;
|
597
|
-
// Then finalize the list by calling append(true)
|
598
|
-
lb.append(true);
|
599
|
-
} else {
|
600
|
-
return Err(MagnusError::new(
|
601
|
-
magnus::exception::type_error(),
|
602
|
-
format!("Expected ParquetValue::List(...) or Null, got {:?}", val),
|
603
|
-
));
|
604
|
-
}
|
605
|
-
}
|
606
|
-
|
607
|
-
Ok(())
|
608
|
-
}
|
609
|
-
|
610
|
-
// Fill map values by recursively filling key and value items
|
611
|
-
fn fill_map_builder(
|
612
|
-
builder: &mut dyn ArrayBuilder,
|
613
|
-
key_type: &ParquetSchemaType,
|
614
|
-
value_type: &ParquetSchemaType,
|
615
|
-
values: &[ParquetValue],
|
616
|
-
) -> Result<(), MagnusError> {
|
617
|
-
let mb = builder
|
618
|
-
.as_any_mut()
|
619
|
-
.downcast_mut::<MapBuilder<Box<dyn ArrayBuilder>, Box<dyn ArrayBuilder>>>()
|
620
|
-
.expect("Builder mismatch: expected MapBuilder");
|
621
|
-
|
622
|
-
for val in values {
|
623
|
-
match val {
|
624
|
-
ParquetValue::Null => {
|
625
|
-
// null map
|
626
|
-
mb.append(false).map_err(|e| {
|
627
|
-
MagnusError::new(
|
628
|
-
magnus::exception::runtime_error(),
|
629
|
-
format!("Failed to append null to map: {}", e),
|
630
|
-
)
|
631
|
-
})?;
|
632
|
-
}
|
633
|
-
ParquetValue::Map(map_entries) => {
|
634
|
-
// First append all key-value pairs to the child arrays
|
635
|
-
for (k, v) in map_entries {
|
636
|
-
// Note: Arrow expects field names "key" and "value" (singular)
|
637
|
-
fill_builder(mb.keys(), key_type, &[k.clone()])?;
|
638
|
-
fill_builder(mb.values(), value_type, &[v.clone()])?;
|
639
|
-
}
|
640
|
-
// Then finalize the map by calling append(true)
|
641
|
-
mb.append(true).map_err(|e| {
|
642
|
-
MagnusError::new(
|
643
|
-
magnus::exception::runtime_error(),
|
644
|
-
format!("Failed to append map entry: {}", e),
|
645
|
-
)
|
646
|
-
})?;
|
647
|
-
}
|
648
|
-
other => {
|
649
|
-
return Err(MagnusError::new(
|
650
|
-
magnus::exception::type_error(),
|
651
|
-
format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
|
652
|
-
))
|
653
|
-
}
|
654
|
-
}
|
655
|
-
}
|
656
|
-
|
657
|
-
Ok(())
|
658
|
-
}
|
659
|
-
|
660
|
-
// Append an entire slice of ParquetValue into the given Arrow builder.
|
661
|
-
// We do a `match` on the type for each item, recursing for nested list/map.
|
662
|
-
fn fill_builder(
|
663
|
-
builder: &mut dyn ArrayBuilder,
|
664
|
-
type_: &ParquetSchemaType,
|
665
|
-
values: &[ParquetValue],
|
666
|
-
) -> Result<(), MagnusError> {
|
667
|
-
match type_ {
|
668
|
-
// ------------------
|
669
|
-
// PRIMITIVE SCALARS - delegated to specialized helpers
|
670
|
-
// ------------------
|
671
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int8) => fill_int8_builder(builder, values),
|
672
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int16) => fill_int16_builder(builder, values),
|
673
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
|
674
|
-
let typed_builder = builder
|
675
|
-
.as_any_mut()
|
676
|
-
.downcast_mut::<Int32Builder>()
|
677
|
-
.expect("Builder mismatch: expected Int32Builder");
|
678
|
-
for val in values {
|
679
|
-
match val {
|
680
|
-
ParquetValue::Int32(i) => typed_builder.append_value(*i),
|
681
|
-
ParquetValue::Date32(d) => typed_builder.append_value(*d), // if you allow date->int
|
682
|
-
// Handle the case where we have an Int64 in an Int32 field (common with Ruby Integers)
|
683
|
-
ParquetValue::Int64(i) => {
|
684
|
-
if *i < i32::MIN as i64 || *i > i32::MAX as i64 {
|
685
|
-
return Err(MagnusError::new(
|
686
|
-
magnus::exception::range_error(),
|
687
|
-
format!("Integer {} is out of range for Int32", i),
|
688
|
-
));
|
689
|
-
}
|
690
|
-
typed_builder.append_value(*i as i32)
|
691
|
-
}
|
692
|
-
ParquetValue::Null => typed_builder.append_null(),
|
693
|
-
other => {
|
694
|
-
return Err(MagnusError::new(
|
695
|
-
magnus::exception::type_error(),
|
696
|
-
format!("Expected Int32, got {:?}", other),
|
697
|
-
))
|
698
|
-
}
|
699
|
-
}
|
700
|
-
}
|
701
|
-
Ok(())
|
702
|
-
}
|
703
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
|
704
|
-
let typed_builder = builder
|
705
|
-
.as_any_mut()
|
706
|
-
.downcast_mut::<Int64Builder>()
|
707
|
-
.expect("Builder mismatch: expected Int64Builder");
|
708
|
-
for val in values {
|
709
|
-
match val {
|
710
|
-
ParquetValue::Int64(i) => typed_builder.append_value(*i),
|
711
|
-
ParquetValue::Null => typed_builder.append_null(),
|
712
|
-
other => {
|
713
|
-
return Err(MagnusError::new(
|
714
|
-
magnus::exception::type_error(),
|
715
|
-
format!("Expected Int64, got {:?}", other),
|
716
|
-
))
|
717
|
-
}
|
718
|
-
}
|
719
|
-
}
|
720
|
-
Ok(())
|
721
|
-
}
|
722
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
|
723
|
-
let typed_builder = builder
|
724
|
-
.as_any_mut()
|
725
|
-
.downcast_mut::<UInt8Builder>()
|
726
|
-
.expect("Builder mismatch: expected UInt8Builder");
|
727
|
-
for val in values {
|
728
|
-
match val {
|
729
|
-
ParquetValue::UInt8(u) => typed_builder.append_value(*u),
|
730
|
-
// Handle Int64 that could be a UInt8
|
731
|
-
ParquetValue::Int64(i) => {
|
732
|
-
if *i < 0 || *i > u8::MAX as i64 {
|
733
|
-
return Err(MagnusError::new(
|
734
|
-
magnus::exception::range_error(),
|
735
|
-
format!("Integer {} is out of range for UInt8", i),
|
736
|
-
));
|
737
|
-
}
|
738
|
-
typed_builder.append_value(*i as u8)
|
739
|
-
}
|
740
|
-
ParquetValue::Null => typed_builder.append_null(),
|
741
|
-
other => {
|
742
|
-
return Err(MagnusError::new(
|
743
|
-
magnus::exception::type_error(),
|
744
|
-
format!("Expected UInt8, got {:?}", other),
|
745
|
-
))
|
746
|
-
}
|
747
|
-
}
|
748
|
-
}
|
749
|
-
Ok(())
|
750
|
-
}
|
751
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
|
752
|
-
let typed_builder = builder
|
753
|
-
.as_any_mut()
|
754
|
-
.downcast_mut::<UInt16Builder>()
|
755
|
-
.expect("Builder mismatch: expected UInt16Builder");
|
756
|
-
for val in values {
|
757
|
-
match val {
|
758
|
-
ParquetValue::UInt16(u) => typed_builder.append_value(*u),
|
759
|
-
// Handle Int64 that could be a UInt16
|
760
|
-
ParquetValue::Int64(i) => {
|
761
|
-
if *i < 0 || *i > u16::MAX as i64 {
|
762
|
-
return Err(MagnusError::new(
|
763
|
-
magnus::exception::range_error(),
|
764
|
-
format!("Integer {} is out of range for UInt16", i),
|
765
|
-
));
|
766
|
-
}
|
767
|
-
typed_builder.append_value(*i as u16)
|
768
|
-
}
|
769
|
-
ParquetValue::Null => typed_builder.append_null(),
|
770
|
-
other => {
|
771
|
-
return Err(MagnusError::new(
|
772
|
-
magnus::exception::type_error(),
|
773
|
-
format!("Expected UInt16, got {:?}", other),
|
774
|
-
))
|
775
|
-
}
|
776
|
-
}
|
777
|
-
}
|
778
|
-
Ok(())
|
779
|
-
}
|
780
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
|
781
|
-
let typed_builder = builder
|
782
|
-
.as_any_mut()
|
783
|
-
.downcast_mut::<UInt32Builder>()
|
784
|
-
.expect("Builder mismatch: expected UInt32Builder");
|
785
|
-
for val in values {
|
786
|
-
match val {
|
787
|
-
ParquetValue::UInt32(u) => typed_builder.append_value(*u),
|
788
|
-
// Handle Int64 that could be a UInt32
|
789
|
-
ParquetValue::Int64(i) => {
|
790
|
-
if *i < 0 || *i > u32::MAX as i64 {
|
791
|
-
return Err(MagnusError::new(
|
792
|
-
magnus::exception::range_error(),
|
793
|
-
format!("Integer {} is out of range for UInt32", i),
|
794
|
-
));
|
795
|
-
}
|
796
|
-
typed_builder.append_value(*i as u32)
|
797
|
-
}
|
798
|
-
ParquetValue::Null => typed_builder.append_null(),
|
799
|
-
other => {
|
800
|
-
return Err(MagnusError::new(
|
801
|
-
magnus::exception::type_error(),
|
802
|
-
format!("Expected UInt32, got {:?}", other),
|
803
|
-
))
|
804
|
-
}
|
805
|
-
}
|
806
|
-
}
|
807
|
-
Ok(())
|
808
|
-
}
|
809
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
|
810
|
-
let typed_builder = builder
|
811
|
-
.as_any_mut()
|
812
|
-
.downcast_mut::<UInt64Builder>()
|
813
|
-
.expect("Builder mismatch: expected UInt64Builder");
|
814
|
-
for val in values {
|
815
|
-
match val {
|
816
|
-
ParquetValue::UInt64(u) => typed_builder.append_value(*u),
|
817
|
-
// Handle Int64 that could be a UInt64
|
818
|
-
ParquetValue::Int64(i) => {
|
819
|
-
if *i < 0 {
|
820
|
-
return Err(MagnusError::new(
|
821
|
-
magnus::exception::range_error(),
|
822
|
-
format!("Integer {} is out of range for UInt64", i),
|
823
|
-
));
|
824
|
-
}
|
825
|
-
typed_builder.append_value(*i as u64)
|
826
|
-
}
|
827
|
-
ParquetValue::Null => typed_builder.append_null(),
|
828
|
-
other => {
|
829
|
-
return Err(MagnusError::new(
|
830
|
-
magnus::exception::type_error(),
|
831
|
-
format!("Expected UInt64, got {:?}", other),
|
832
|
-
))
|
833
|
-
}
|
834
|
-
}
|
835
|
-
}
|
836
|
-
Ok(())
|
837
|
-
}
|
838
|
-
ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
|
839
|
-
let typed_builder = builder
|
840
|
-
.as_any_mut()
|
841
|
-
.downcast_mut::<Float32Builder>()
|
842
|
-
.expect("Builder mismatch: expected Float32Builder");
|
843
|
-
for val in values {
|
844
|
-
match val {
|
845
|
-
ParquetValue::Float32(f) => typed_builder.append_value(*f),
|
846
|
-
ParquetValue::Float16(fh) => typed_builder.append_value(*fh),
|
847
|
-
ParquetValue::Null => typed_builder.append_null(),
|
848
|
-
other => {
|
849
|
-
return Err(MagnusError::new(
|
850
|
-
magnus::exception::type_error(),
|
851
|
-
format!("Expected Float32, got {:?}", other),
|
852
|
-
))
|
853
|
-
}
|
854
|
-
}
|
855
|
-
}
|
856
|
-
Ok(())
|
857
|
-
}
|
858
|
-
ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
|
859
|
-
let typed_builder = builder
|
860
|
-
.as_any_mut()
|
861
|
-
.downcast_mut::<Float64Builder>()
|
862
|
-
.expect("Builder mismatch: expected Float64Builder");
|
863
|
-
for val in values {
|
864
|
-
match val {
|
865
|
-
ParquetValue::Float64(f) => typed_builder.append_value(*f),
|
866
|
-
// If you want to allow f32 => f64, do so:
|
867
|
-
ParquetValue::Float32(flo) => typed_builder.append_value(*flo as f64),
|
868
|
-
ParquetValue::Null => typed_builder.append_null(),
|
869
|
-
other => {
|
870
|
-
return Err(MagnusError::new(
|
871
|
-
magnus::exception::type_error(),
|
872
|
-
format!("Expected Float64, got {:?}", other),
|
873
|
-
))
|
874
|
-
}
|
875
|
-
}
|
876
|
-
}
|
877
|
-
Ok(())
|
878
|
-
}
|
879
|
-
ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_precision, scale)) => {
|
880
|
-
let typed_builder = builder
|
881
|
-
.as_any_mut()
|
882
|
-
.downcast_mut::<Decimal128Builder>()
|
883
|
-
.expect("Builder mismatch: expected Float64Builder");
|
884
|
-
|
885
|
-
for val in values {
|
886
|
-
match val {
|
887
|
-
ParquetValue::Decimal128(d, _scale) => typed_builder.append_value(*d),
|
888
|
-
ParquetValue::Float64(f) => {
|
889
|
-
// Scale the float to the desired precision and scale
|
890
|
-
let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
|
891
|
-
typed_builder.append_value(scaled_value)
|
892
|
-
}
|
893
|
-
ParquetValue::Float32(flo) => {
|
894
|
-
// Scale the float to the desired precision and scale
|
895
|
-
let scaled_value = (*flo as f64 * 10_f64.powi(*scale as i32)) as i128;
|
896
|
-
typed_builder.append_value(scaled_value)
|
897
|
-
}
|
898
|
-
ParquetValue::Int64(i) => {
|
899
|
-
// Scale the integer to the desired scale
|
900
|
-
let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
|
901
|
-
typed_builder.append_value(scaled_value)
|
902
|
-
}
|
903
|
-
ParquetValue::Int32(i) => {
|
904
|
-
// Scale the integer to the desired scale
|
905
|
-
let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
|
906
|
-
typed_builder.append_value(scaled_value)
|
907
|
-
}
|
908
|
-
ParquetValue::Null => typed_builder.append_null(),
|
909
|
-
other => {
|
910
|
-
return Err(MagnusError::new(
|
911
|
-
magnus::exception::type_error(),
|
912
|
-
format!("Expected Float64, got {:?}", other),
|
913
|
-
))
|
914
|
-
}
|
915
|
-
}
|
916
|
-
}
|
917
|
-
Ok(())
|
918
|
-
}
|
919
|
-
ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_precision, scale)) => {
|
920
|
-
let typed_builder = builder
|
921
|
-
.as_any_mut()
|
922
|
-
.downcast_mut::<Decimal256Builder>()
|
923
|
-
.expect("Builder mismatch: expected Decimal256Builder for Decimal256");
|
924
|
-
|
925
|
-
for val in values {
|
926
|
-
match val {
|
927
|
-
ParquetValue::Decimal256(d, _scale) => typed_builder.append_value(*d),
|
928
|
-
ParquetValue::Decimal128(d, _scale) => {
|
929
|
-
// Convert i128 to i256
|
930
|
-
typed_builder.append_value(arrow_buffer::i256::from_i128(*d))
|
931
|
-
}
|
932
|
-
ParquetValue::Float64(f) => {
|
933
|
-
// Scale the float to the desired precision and scale
|
934
|
-
// For large values, use BigInt to avoid overflow
|
935
|
-
let scaled = *f * 10_f64.powi(*scale as i32);
|
936
|
-
if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
|
937
|
-
let scaled_value = scaled as i128;
|
938
|
-
typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
|
939
|
-
} else {
|
940
|
-
// Use BigInt for values that don't fit in i128
|
941
|
-
use num::{BigInt, FromPrimitive};
|
942
|
-
let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
|
943
|
-
MagnusError::new(
|
944
|
-
magnus::exception::type_error(),
|
945
|
-
format!("Failed to convert float {} to BigInt", f),
|
946
|
-
)
|
947
|
-
})?;
|
948
|
-
let bytes = bigint.to_signed_bytes_le();
|
949
|
-
if bytes.len() <= 32 {
|
950
|
-
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
951
|
-
[0xff; 32]
|
952
|
-
} else {
|
953
|
-
[0; 32]
|
954
|
-
};
|
955
|
-
buf[..bytes.len()].copy_from_slice(&bytes);
|
956
|
-
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
957
|
-
} else {
|
958
|
-
return Err(MagnusError::new(
|
959
|
-
magnus::exception::type_error(),
|
960
|
-
format!(
|
961
|
-
"Float value {} scaled to {} is too large for Decimal256",
|
962
|
-
f, scaled
|
963
|
-
),
|
964
|
-
));
|
965
|
-
}
|
966
|
-
}
|
967
|
-
}
|
968
|
-
ParquetValue::Float32(flo) => {
|
969
|
-
// Scale the float to the desired precision and scale
|
970
|
-
let scaled = (*flo as f64) * 10_f64.powi(*scale as i32);
|
971
|
-
if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
|
972
|
-
let scaled_value = scaled as i128;
|
973
|
-
typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
|
974
|
-
} else {
|
975
|
-
// Use BigInt for values that don't fit in i128
|
976
|
-
use num::{BigInt, FromPrimitive};
|
977
|
-
let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
|
978
|
-
MagnusError::new(
|
979
|
-
magnus::exception::type_error(),
|
980
|
-
format!("Failed to convert float {} to BigInt", flo),
|
981
|
-
)
|
982
|
-
})?;
|
983
|
-
let bytes = bigint.to_signed_bytes_le();
|
984
|
-
if bytes.len() <= 32 {
|
985
|
-
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
986
|
-
[0xff; 32]
|
987
|
-
} else {
|
988
|
-
[0; 32]
|
989
|
-
};
|
990
|
-
buf[..bytes.len()].copy_from_slice(&bytes);
|
991
|
-
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
992
|
-
} else {
|
993
|
-
return Err(MagnusError::new(
|
994
|
-
magnus::exception::type_error(),
|
995
|
-
format!(
|
996
|
-
"Float value {} scaled is too large for Decimal256",
|
997
|
-
flo
|
998
|
-
),
|
999
|
-
));
|
1000
|
-
}
|
1001
|
-
}
|
1002
|
-
}
|
1003
|
-
ParquetValue::Int64(i) => {
|
1004
|
-
// Scale the integer to the desired scale
|
1005
|
-
let base = arrow_buffer::i256::from_i128(*i as i128);
|
1006
|
-
if *scale <= 38 {
|
1007
|
-
// Can use i128 multiplication for scale <= 38
|
1008
|
-
let scale_factor =
|
1009
|
-
arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
|
1010
|
-
match base.checked_mul(scale_factor) {
|
1011
|
-
Some(scaled) => typed_builder.append_value(scaled),
|
1012
|
-
None => {
|
1013
|
-
return Err(MagnusError::new(
|
1014
|
-
magnus::exception::type_error(),
|
1015
|
-
format!(
|
1016
|
-
"Integer {} scaled by {} overflows Decimal256",
|
1017
|
-
i, scale
|
1018
|
-
),
|
1019
|
-
));
|
1020
|
-
}
|
1021
|
-
}
|
1022
|
-
} else {
|
1023
|
-
// For very large scales, use BigInt
|
1024
|
-
use num::BigInt;
|
1025
|
-
let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
|
1026
|
-
let bytes = bigint.to_signed_bytes_le();
|
1027
|
-
if bytes.len() <= 32 {
|
1028
|
-
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
1029
|
-
[0xff; 32]
|
1030
|
-
} else {
|
1031
|
-
[0; 32]
|
1032
|
-
};
|
1033
|
-
buf[..bytes.len()].copy_from_slice(&bytes);
|
1034
|
-
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
1035
|
-
} else {
|
1036
|
-
return Err(MagnusError::new(
|
1037
|
-
magnus::exception::type_error(),
|
1038
|
-
format!(
|
1039
|
-
"Integer {} scaled by {} is too large for Decimal256",
|
1040
|
-
i, scale
|
1041
|
-
),
|
1042
|
-
));
|
1043
|
-
}
|
1044
|
-
}
|
1045
|
-
}
|
1046
|
-
ParquetValue::Int32(i) => {
|
1047
|
-
// Scale the integer to the desired scale
|
1048
|
-
let base = arrow_buffer::i256::from_i128(*i as i128);
|
1049
|
-
if *scale <= 38 {
|
1050
|
-
// Can use i128 multiplication for scale <= 38
|
1051
|
-
let scale_factor =
|
1052
|
-
arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
|
1053
|
-
match base.checked_mul(scale_factor) {
|
1054
|
-
Some(scaled) => typed_builder.append_value(scaled),
|
1055
|
-
None => {
|
1056
|
-
return Err(MagnusError::new(
|
1057
|
-
magnus::exception::type_error(),
|
1058
|
-
format!(
|
1059
|
-
"Integer {} scaled by {} overflows Decimal256",
|
1060
|
-
i, scale
|
1061
|
-
),
|
1062
|
-
));
|
1063
|
-
}
|
1064
|
-
}
|
1065
|
-
} else {
|
1066
|
-
// For very large scales, use BigInt
|
1067
|
-
use num::BigInt;
|
1068
|
-
let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
|
1069
|
-
let bytes = bigint.to_signed_bytes_le();
|
1070
|
-
if bytes.len() <= 32 {
|
1071
|
-
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
1072
|
-
[0xff; 32]
|
1073
|
-
} else {
|
1074
|
-
[0; 32]
|
1075
|
-
};
|
1076
|
-
buf[..bytes.len()].copy_from_slice(&bytes);
|
1077
|
-
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
1078
|
-
} else {
|
1079
|
-
return Err(MagnusError::new(
|
1080
|
-
magnus::exception::type_error(),
|
1081
|
-
format!(
|
1082
|
-
"Integer {} scaled by {} is too large for Decimal256",
|
1083
|
-
i, scale
|
1084
|
-
),
|
1085
|
-
));
|
1086
|
-
}
|
1087
|
-
}
|
1088
|
-
}
|
1089
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1090
|
-
other => {
|
1091
|
-
return Err(MagnusError::new(
|
1092
|
-
magnus::exception::type_error(),
|
1093
|
-
format!("Expected numeric value for Decimal256, got {:?}", other),
|
1094
|
-
))
|
1095
|
-
}
|
1096
|
-
}
|
1097
|
-
}
|
1098
|
-
Ok(())
|
1099
|
-
}
|
1100
|
-
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
|
1101
|
-
let typed_builder = builder
|
1102
|
-
.as_any_mut()
|
1103
|
-
.downcast_mut::<BooleanBuilder>()
|
1104
|
-
.expect("Builder mismatch: expected BooleanBuilder");
|
1105
|
-
for val in values {
|
1106
|
-
match val {
|
1107
|
-
ParquetValue::Boolean(b) => typed_builder.append_value(*b),
|
1108
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1109
|
-
other => {
|
1110
|
-
return Err(MagnusError::new(
|
1111
|
-
magnus::exception::type_error(),
|
1112
|
-
format!("Expected Boolean, got {:?}", other),
|
1113
|
-
))
|
1114
|
-
}
|
1115
|
-
}
|
1116
|
-
}
|
1117
|
-
Ok(())
|
1118
|
-
}
|
1119
|
-
ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
|
1120
|
-
let typed_builder = builder
|
1121
|
-
.as_any_mut()
|
1122
|
-
.downcast_mut::<Date32Builder>()
|
1123
|
-
.expect("Builder mismatch: expected Date32Builder");
|
1124
|
-
for val in values {
|
1125
|
-
match val {
|
1126
|
-
ParquetValue::Date32(d) => typed_builder.append_value(*d),
|
1127
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1128
|
-
other => {
|
1129
|
-
return Err(MagnusError::new(
|
1130
|
-
magnus::exception::type_error(),
|
1131
|
-
format!("Expected Date32, got {:?}", other),
|
1132
|
-
))
|
1133
|
-
}
|
1134
|
-
}
|
1135
|
-
}
|
1136
|
-
Ok(())
|
1137
|
-
}
|
1138
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
|
1139
|
-
let typed_builder = builder
|
1140
|
-
.as_any_mut()
|
1141
|
-
.downcast_mut::<TimestampMillisecondBuilder>()
|
1142
|
-
.expect("Builder mismatch: expected TimestampMillisecondBuilder");
|
1143
|
-
for val in values {
|
1144
|
-
match val {
|
1145
|
-
ParquetValue::TimestampMillis(ts, _tz) => typed_builder.append_value(*ts),
|
1146
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1147
|
-
other => {
|
1148
|
-
return Err(MagnusError::new(
|
1149
|
-
magnus::exception::type_error(),
|
1150
|
-
format!("Expected TimestampMillis, got {:?}", other),
|
1151
|
-
))
|
1152
|
-
}
|
1153
|
-
}
|
1154
|
-
}
|
1155
|
-
Ok(())
|
1156
|
-
}
|
1157
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
|
1158
|
-
let typed_builder = builder
|
1159
|
-
.as_any_mut()
|
1160
|
-
.downcast_mut::<TimestampMicrosecondBuilder>()
|
1161
|
-
.expect("Builder mismatch: expected TimestampMicrosecondBuilder");
|
1162
|
-
for val in values {
|
1163
|
-
match val {
|
1164
|
-
ParquetValue::TimestampMicros(ts, _tz) => typed_builder.append_value(*ts),
|
1165
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1166
|
-
other => {
|
1167
|
-
return Err(MagnusError::new(
|
1168
|
-
magnus::exception::type_error(),
|
1169
|
-
format!("Expected TimestampMicros, got {:?}", other),
|
1170
|
-
))
|
1171
|
-
}
|
1172
|
-
}
|
1173
|
-
}
|
1174
|
-
Ok(())
|
1175
|
-
}
|
1176
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => {
|
1177
|
-
let typed_builder = builder
|
1178
|
-
.as_any_mut()
|
1179
|
-
.downcast_mut::<Time32MillisecondBuilder>()
|
1180
|
-
.expect("Builder mismatch: expected Time32MillisecondBuilder");
|
1181
|
-
for val in values {
|
1182
|
-
match val {
|
1183
|
-
ParquetValue::TimeMillis(t) => typed_builder.append_value(*t),
|
1184
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1185
|
-
other => {
|
1186
|
-
return Err(MagnusError::new(
|
1187
|
-
magnus::exception::type_error(),
|
1188
|
-
format!("Expected TimeMillis, got {:?}", other),
|
1189
|
-
))
|
1190
|
-
}
|
1191
|
-
}
|
1192
|
-
}
|
1193
|
-
Ok(())
|
1194
|
-
}
|
1195
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => {
|
1196
|
-
let typed_builder = builder
|
1197
|
-
.as_any_mut()
|
1198
|
-
.downcast_mut::<Time64MicrosecondBuilder>()
|
1199
|
-
.expect("Builder mismatch: expected Time64MicrosecondBuilder");
|
1200
|
-
for val in values {
|
1201
|
-
match val {
|
1202
|
-
ParquetValue::TimeMicros(t) => typed_builder.append_value(*t),
|
1203
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1204
|
-
other => {
|
1205
|
-
return Err(MagnusError::new(
|
1206
|
-
magnus::exception::type_error(),
|
1207
|
-
format!("Expected TimeMicros, got {:?}", other),
|
1208
|
-
))
|
1209
|
-
}
|
1210
|
-
}
|
1211
|
-
}
|
1212
|
-
Ok(())
|
1213
|
-
}
|
1214
|
-
|
1215
|
-
// ------------------
|
1216
|
-
// NESTED LIST - using helper function
|
1217
|
-
// ------------------
|
1218
|
-
ParquetSchemaType::List(list_field) => {
|
1219
|
-
fill_list_builder(builder, &list_field.item_type, values)
|
1220
|
-
}
|
1221
|
-
|
1222
|
-
// ------------------
|
1223
|
-
// NESTED MAP - using helper function
|
1224
|
-
// ------------------
|
1225
|
-
ParquetSchemaType::Map(map_field) => {
|
1226
|
-
fill_map_builder(builder, &map_field.key_type, &map_field.value_type, values)
|
1227
|
-
}
|
1228
|
-
|
1229
|
-
// ------------------
|
1230
|
-
// OTHER TYPES - keep as is for now
|
1231
|
-
// ------------------
|
1232
|
-
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
1233
|
-
let typed_builder = builder
|
1234
|
-
.as_any_mut()
|
1235
|
-
.downcast_mut::<StringBuilder>()
|
1236
|
-
.expect("Builder mismatch: expected StringBuilder");
|
1237
|
-
for val in values {
|
1238
|
-
match val {
|
1239
|
-
ParquetValue::String(s) => typed_builder.append_value(s),
|
1240
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1241
|
-
other => {
|
1242
|
-
return Err(MagnusError::new(
|
1243
|
-
magnus::exception::type_error(),
|
1244
|
-
format!("Expected String, got {:?}", other),
|
1245
|
-
))
|
1246
|
-
}
|
1247
|
-
}
|
1248
|
-
}
|
1249
|
-
Ok(())
|
1250
|
-
}
|
1251
|
-
ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
|
1252
|
-
let typed_builder = builder
|
1253
|
-
.as_any_mut()
|
1254
|
-
.downcast_mut::<BinaryBuilder>()
|
1255
|
-
.expect("Builder mismatch: expected BinaryBuilder");
|
1256
|
-
for val in values {
|
1257
|
-
match val {
|
1258
|
-
ParquetValue::Bytes(b) => typed_builder.append_value(b),
|
1259
|
-
ParquetValue::Null => typed_builder.append_null(),
|
1260
|
-
other => {
|
1261
|
-
return Err(MagnusError::new(
|
1262
|
-
magnus::exception::type_error(),
|
1263
|
-
format!("Expected Binary, got {:?}", other),
|
1264
|
-
))
|
1265
|
-
}
|
1266
|
-
}
|
1267
|
-
}
|
1268
|
-
Ok(())
|
1269
|
-
}
|
1270
|
-
ParquetSchemaType::Struct(struct_field) => {
|
1271
|
-
let typed_builder = builder
|
1272
|
-
.as_any_mut()
|
1273
|
-
.downcast_mut::<StructBuilder>()
|
1274
|
-
.expect("Builder mismatch: expected StructBuilder");
|
1275
|
-
|
1276
|
-
for val in values {
|
1277
|
-
match val {
|
1278
|
-
ParquetValue::Null => {
|
1279
|
-
// null struct
|
1280
|
-
typed_builder.append(false);
|
1281
|
-
}
|
1282
|
-
ParquetValue::Map(map_data) => {
|
1283
|
-
for (i, field) in struct_field.fields.iter().enumerate() {
|
1284
|
-
let field_key = ParquetValue::String(field.name.clone());
|
1285
|
-
if let Some(field_val) = map_data.get(&field_key) {
|
1286
|
-
match field_val {
|
1287
|
-
ParquetValue::Int8(x) => typed_builder
|
1288
|
-
.field_builder::<Int8Builder>(i)
|
1289
|
-
.ok_or_else(|| {
|
1290
|
-
MagnusError::new(
|
1291
|
-
magnus::exception::type_error(),
|
1292
|
-
"Failed to coerce into Int8Builder",
|
1293
|
-
)
|
1294
|
-
})?
|
1295
|
-
.append_value(*x),
|
1296
|
-
ParquetValue::Int16(x) => typed_builder
|
1297
|
-
.field_builder::<Int16Builder>(i)
|
1298
|
-
.ok_or_else(|| {
|
1299
|
-
MagnusError::new(
|
1300
|
-
magnus::exception::type_error(),
|
1301
|
-
"Failed to coerce into Int16Builder",
|
1302
|
-
)
|
1303
|
-
})?
|
1304
|
-
.append_value(*x),
|
1305
|
-
ParquetValue::Int32(x) => typed_builder
|
1306
|
-
.field_builder::<Int32Builder>(i)
|
1307
|
-
.ok_or_else(|| {
|
1308
|
-
MagnusError::new(
|
1309
|
-
magnus::exception::type_error(),
|
1310
|
-
"Failed to coerce into Int32Builder",
|
1311
|
-
)
|
1312
|
-
})?
|
1313
|
-
.append_value(*x),
|
1314
|
-
ParquetValue::Int64(x) => typed_builder
|
1315
|
-
.field_builder::<Int64Builder>(i)
|
1316
|
-
.ok_or_else(|| {
|
1317
|
-
MagnusError::new(
|
1318
|
-
magnus::exception::type_error(),
|
1319
|
-
"Failed to coerce into Int64Builder",
|
1320
|
-
)
|
1321
|
-
})?
|
1322
|
-
.append_value(*x),
|
1323
|
-
ParquetValue::UInt8(x) => typed_builder
|
1324
|
-
.field_builder::<UInt8Builder>(i)
|
1325
|
-
.ok_or_else(|| {
|
1326
|
-
MagnusError::new(
|
1327
|
-
magnus::exception::type_error(),
|
1328
|
-
"Failed to coerce into UInt8Builder",
|
1329
|
-
)
|
1330
|
-
})?
|
1331
|
-
.append_value(*x),
|
1332
|
-
ParquetValue::UInt16(x) => typed_builder
|
1333
|
-
.field_builder::<UInt16Builder>(i)
|
1334
|
-
.ok_or_else(|| {
|
1335
|
-
MagnusError::new(
|
1336
|
-
magnus::exception::type_error(),
|
1337
|
-
"Failed to coerce into UInt16Builder",
|
1338
|
-
)
|
1339
|
-
})?
|
1340
|
-
.append_value(*x),
|
1341
|
-
ParquetValue::UInt32(x) => typed_builder
|
1342
|
-
.field_builder::<UInt32Builder>(i)
|
1343
|
-
.ok_or_else(|| {
|
1344
|
-
MagnusError::new(
|
1345
|
-
magnus::exception::type_error(),
|
1346
|
-
"Failed to coerce into UInt32Builder",
|
1347
|
-
)
|
1348
|
-
})?
|
1349
|
-
.append_value(*x),
|
1350
|
-
ParquetValue::UInt64(x) => typed_builder
|
1351
|
-
.field_builder::<UInt64Builder>(i)
|
1352
|
-
.ok_or_else(|| {
|
1353
|
-
MagnusError::new(
|
1354
|
-
magnus::exception::type_error(),
|
1355
|
-
"Failed to coerce into UInt64Builder",
|
1356
|
-
)
|
1357
|
-
})?
|
1358
|
-
.append_value(*x),
|
1359
|
-
ParquetValue::Float16(_) => {
|
1360
|
-
return Err(MagnusError::new(
|
1361
|
-
magnus::exception::runtime_error(),
|
1362
|
-
"Float16 not supported",
|
1363
|
-
))
|
1364
|
-
}
|
1365
|
-
ParquetValue::Float32(x) => typed_builder
|
1366
|
-
.field_builder::<Float32Builder>(i)
|
1367
|
-
.ok_or_else(|| {
|
1368
|
-
MagnusError::new(
|
1369
|
-
magnus::exception::type_error(),
|
1370
|
-
"Failed to coerce into Float32Builder",
|
1371
|
-
)
|
1372
|
-
})?
|
1373
|
-
.append_value(*x),
|
1374
|
-
ParquetValue::Float64(x) => typed_builder
|
1375
|
-
.field_builder::<Float64Builder>(i)
|
1376
|
-
.ok_or_else(|| {
|
1377
|
-
MagnusError::new(
|
1378
|
-
magnus::exception::type_error(),
|
1379
|
-
"Failed to coerce into Float64Builder",
|
1380
|
-
)
|
1381
|
-
})?
|
1382
|
-
.append_value(*x),
|
1383
|
-
ParquetValue::Boolean(x) => typed_builder
|
1384
|
-
.field_builder::<BooleanBuilder>(i)
|
1385
|
-
.ok_or_else(|| {
|
1386
|
-
MagnusError::new(
|
1387
|
-
magnus::exception::type_error(),
|
1388
|
-
"Failed to coerce into BooleanBuilder",
|
1389
|
-
)
|
1390
|
-
})?
|
1391
|
-
.append_value(*x),
|
1392
|
-
ParquetValue::String(x) => typed_builder
|
1393
|
-
.field_builder::<StringBuilder>(i)
|
1394
|
-
.ok_or_else(|| {
|
1395
|
-
MagnusError::new(
|
1396
|
-
magnus::exception::type_error(),
|
1397
|
-
"Failed to coerce into StringBuilder",
|
1398
|
-
)
|
1399
|
-
})?
|
1400
|
-
.append_value(x),
|
1401
|
-
ParquetValue::Bytes(bytes) => typed_builder
|
1402
|
-
.field_builder::<BinaryBuilder>(i)
|
1403
|
-
.ok_or_else(|| {
|
1404
|
-
MagnusError::new(
|
1405
|
-
magnus::exception::type_error(),
|
1406
|
-
"Failed to coerce into BinaryBuilder",
|
1407
|
-
)
|
1408
|
-
})?
|
1409
|
-
.append_value(bytes),
|
1410
|
-
ParquetValue::Decimal128(x, _scale) => typed_builder
|
1411
|
-
.field_builder::<Decimal128Builder>(i)
|
1412
|
-
.ok_or_else(|| {
|
1413
|
-
MagnusError::new(
|
1414
|
-
magnus::exception::type_error(),
|
1415
|
-
"Failed to coerce into Decimal128Builder",
|
1416
|
-
)
|
1417
|
-
})?
|
1418
|
-
.append_value(*x),
|
1419
|
-
ParquetValue::Decimal256(x, _scale) => typed_builder
|
1420
|
-
.field_builder::<Decimal256Builder>(i)
|
1421
|
-
.ok_or_else(|| {
|
1422
|
-
MagnusError::new(
|
1423
|
-
magnus::exception::type_error(),
|
1424
|
-
"Failed to coerce into Decimal256Builder",
|
1425
|
-
)
|
1426
|
-
})?
|
1427
|
-
.append_value(*x),
|
1428
|
-
ParquetValue::Date32(x) => typed_builder
|
1429
|
-
.field_builder::<Date32Builder>(i)
|
1430
|
-
.ok_or_else(|| {
|
1431
|
-
MagnusError::new(
|
1432
|
-
magnus::exception::type_error(),
|
1433
|
-
"Failed to coerce into Date32Builder",
|
1434
|
-
)
|
1435
|
-
})?
|
1436
|
-
.append_value(*x),
|
1437
|
-
ParquetValue::Date64(x) => typed_builder
|
1438
|
-
.field_builder::<Date64Builder>(i)
|
1439
|
-
.ok_or_else(|| {
|
1440
|
-
MagnusError::new(
|
1441
|
-
magnus::exception::type_error(),
|
1442
|
-
"Failed to coerce into Date64Builder",
|
1443
|
-
)
|
1444
|
-
})?
|
1445
|
-
.append_value(*x),
|
1446
|
-
ParquetValue::TimestampSecond(x, _tz) => typed_builder
|
1447
|
-
.field_builder::<TimestampSecondBuilder>(i)
|
1448
|
-
.ok_or_else(|| {
|
1449
|
-
MagnusError::new(
|
1450
|
-
magnus::exception::type_error(),
|
1451
|
-
"Failed to coerce into TimestampSecondBuilder",
|
1452
|
-
)
|
1453
|
-
})?
|
1454
|
-
.append_value(*x),
|
1455
|
-
ParquetValue::TimestampMillis(x, _tz) => typed_builder
|
1456
|
-
.field_builder::<TimestampMillisecondBuilder>(i)
|
1457
|
-
.ok_or_else(|| {
|
1458
|
-
MagnusError::new(
|
1459
|
-
magnus::exception::type_error(),
|
1460
|
-
"Failed to coerce into TimestampMillisecondBuilder",
|
1461
|
-
)
|
1462
|
-
})?
|
1463
|
-
.append_value(*x),
|
1464
|
-
ParquetValue::TimestampMicros(x, _tz) => typed_builder
|
1465
|
-
.field_builder::<TimestampMicrosecondBuilder>(i)
|
1466
|
-
.ok_or_else(|| {
|
1467
|
-
MagnusError::new(
|
1468
|
-
magnus::exception::type_error(),
|
1469
|
-
"Failed to coerce into TimestampMicrosecondBuilder",
|
1470
|
-
)
|
1471
|
-
})?
|
1472
|
-
.append_value(*x),
|
1473
|
-
ParquetValue::TimestampNanos(x, _tz) => typed_builder
|
1474
|
-
.field_builder::<TimestampNanosecondBuilder>(i)
|
1475
|
-
.ok_or_else(|| {
|
1476
|
-
MagnusError::new(
|
1477
|
-
magnus::exception::type_error(),
|
1478
|
-
"Failed to coerce into TimestampNanosecondBuilder",
|
1479
|
-
)
|
1480
|
-
})?
|
1481
|
-
.append_value(*x),
|
1482
|
-
ParquetValue::TimeMillis(x) => typed_builder
|
1483
|
-
.field_builder::<Time32MillisecondBuilder>(i)
|
1484
|
-
.ok_or_else(|| {
|
1485
|
-
MagnusError::new(
|
1486
|
-
magnus::exception::type_error(),
|
1487
|
-
"Failed to coerce into Time32MillisecondBuilder",
|
1488
|
-
)
|
1489
|
-
})?
|
1490
|
-
.append_value(*x),
|
1491
|
-
ParquetValue::TimeMicros(x) => typed_builder
|
1492
|
-
.field_builder::<Time64MicrosecondBuilder>(i)
|
1493
|
-
.ok_or_else(|| {
|
1494
|
-
MagnusError::new(
|
1495
|
-
magnus::exception::type_error(),
|
1496
|
-
"Failed to coerce into Time64MicrosecondBuilder",
|
1497
|
-
)
|
1498
|
-
})?
|
1499
|
-
.append_value(*x),
|
1500
|
-
ParquetValue::List(items) => {
|
1501
|
-
let list_builder = typed_builder
|
1502
|
-
.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
|
1503
|
-
.ok_or_else(|| {
|
1504
|
-
MagnusError::new(
|
1505
|
-
magnus::exception::type_error(),
|
1506
|
-
"Failed to coerce into ListBuilder",
|
1507
|
-
)
|
1508
|
-
})?;
|
1509
|
-
fill_builder(
|
1510
|
-
list_builder.values(),
|
1511
|
-
&struct_field.fields[i].type_,
|
1512
|
-
items,
|
1513
|
-
)?;
|
1514
|
-
list_builder.append(true);
|
1515
|
-
}
|
1516
|
-
ParquetValue::Map(map_data) => {
|
1517
|
-
let maybe_map_builder = typed_builder
|
1518
|
-
.field_builder::<MapBuilder<
|
1519
|
-
Box<dyn ArrayBuilder>,
|
1520
|
-
Box<dyn ArrayBuilder>,
|
1521
|
-
>>(i);
|
1522
|
-
|
1523
|
-
if let Some(map_builder) = maybe_map_builder {
|
1524
|
-
fill_builder(
|
1525
|
-
map_builder,
|
1526
|
-
&struct_field.fields[i].type_,
|
1527
|
-
&[ParquetValue::Map(map_data.clone())],
|
1528
|
-
)?;
|
1529
|
-
map_builder.append(true).map_err(|e| {
|
1530
|
-
MagnusError::new(
|
1531
|
-
magnus::exception::runtime_error(),
|
1532
|
-
format!("Failed to append map: {}", e),
|
1533
|
-
)
|
1534
|
-
})?;
|
1535
|
-
} else {
|
1536
|
-
let child_struct_builder = typed_builder
|
1537
|
-
.field_builder::<StructBuilder>(i)
|
1538
|
-
.ok_or_else(|| {
|
1539
|
-
MagnusError::new(
|
1540
|
-
magnus::exception::type_error(),
|
1541
|
-
"Failed to coerce into StructBuilder",
|
1542
|
-
)
|
1543
|
-
})?;
|
1544
|
-
fill_builder(
|
1545
|
-
child_struct_builder,
|
1546
|
-
&struct_field.fields[i].type_,
|
1547
|
-
&[ParquetValue::Map(map_data.clone())],
|
1548
|
-
)?;
|
1549
|
-
}
|
1550
|
-
}
|
1551
|
-
ParquetValue::Null => match struct_field.fields[i].type_ {
|
1552
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int8) => typed_builder
|
1553
|
-
.field_builder::<Int8Builder>(i)
|
1554
|
-
.ok_or_else(|| {
|
1555
|
-
MagnusError::new(
|
1556
|
-
magnus::exception::type_error(),
|
1557
|
-
"Failed to coerce into Int8Builder",
|
1558
|
-
)
|
1559
|
-
})?
|
1560
|
-
.append_null(),
|
1561
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int16) => typed_builder
|
1562
|
-
.field_builder::<Int16Builder>(i)
|
1563
|
-
.ok_or_else(|| {
|
1564
|
-
MagnusError::new(
|
1565
|
-
magnus::exception::type_error(),
|
1566
|
-
"Failed to coerce into Int16Builder",
|
1567
|
-
)
|
1568
|
-
})?
|
1569
|
-
.append_null(),
|
1570
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int32) => typed_builder
|
1571
|
-
.field_builder::<Int32Builder>(i)
|
1572
|
-
.ok_or_else(|| {
|
1573
|
-
MagnusError::new(
|
1574
|
-
magnus::exception::type_error(),
|
1575
|
-
"Failed to coerce into Int32Builder",
|
1576
|
-
)
|
1577
|
-
})?
|
1578
|
-
.append_null(),
|
1579
|
-
ParquetSchemaType::Primitive(PrimitiveType::Int64) => typed_builder
|
1580
|
-
.field_builder::<Int64Builder>(i)
|
1581
|
-
.ok_or_else(|| {
|
1582
|
-
MagnusError::new(
|
1583
|
-
magnus::exception::type_error(),
|
1584
|
-
"Failed to coerce into Int64Builder",
|
1585
|
-
)
|
1586
|
-
})?
|
1587
|
-
.append_null(),
|
1588
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt8) => typed_builder
|
1589
|
-
.field_builder::<UInt8Builder>(i)
|
1590
|
-
.ok_or_else(|| {
|
1591
|
-
MagnusError::new(
|
1592
|
-
magnus::exception::type_error(),
|
1593
|
-
"Failed to coerce into UInt8Builder",
|
1594
|
-
)
|
1595
|
-
})?
|
1596
|
-
.append_null(),
|
1597
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt16) => typed_builder
|
1598
|
-
.field_builder::<UInt16Builder>(i)
|
1599
|
-
.ok_or_else(|| {
|
1600
|
-
MagnusError::new(
|
1601
|
-
magnus::exception::type_error(),
|
1602
|
-
"Failed to coerce into UInt16Builder",
|
1603
|
-
)
|
1604
|
-
})?
|
1605
|
-
.append_null(),
|
1606
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt32) => typed_builder
|
1607
|
-
.field_builder::<UInt32Builder>(i)
|
1608
|
-
.ok_or_else(|| {
|
1609
|
-
MagnusError::new(
|
1610
|
-
magnus::exception::type_error(),
|
1611
|
-
"Failed to coerce into UInt32Builder",
|
1612
|
-
)
|
1613
|
-
})?
|
1614
|
-
.append_null(),
|
1615
|
-
ParquetSchemaType::Primitive(PrimitiveType::UInt64) => typed_builder
|
1616
|
-
.field_builder::<UInt64Builder>(i)
|
1617
|
-
.ok_or_else(|| {
|
1618
|
-
MagnusError::new(
|
1619
|
-
magnus::exception::type_error(),
|
1620
|
-
"Failed to coerce into UInt64Builder",
|
1621
|
-
)
|
1622
|
-
})?
|
1623
|
-
.append_null(),
|
1624
|
-
ParquetSchemaType::Primitive(PrimitiveType::Float32) => typed_builder
|
1625
|
-
.field_builder::<Float32Builder>(i)
|
1626
|
-
.ok_or_else(|| {
|
1627
|
-
MagnusError::new(
|
1628
|
-
magnus::exception::type_error(),
|
1629
|
-
"Failed to coerce into Float32Builder",
|
1630
|
-
)
|
1631
|
-
})?
|
1632
|
-
.append_null(),
|
1633
|
-
ParquetSchemaType::Primitive(PrimitiveType::Float64) => typed_builder
|
1634
|
-
.field_builder::<Float64Builder>(i)
|
1635
|
-
.ok_or_else(|| {
|
1636
|
-
MagnusError::new(
|
1637
|
-
magnus::exception::type_error(),
|
1638
|
-
"Failed to coerce into Float64Builder",
|
1639
|
-
)
|
1640
|
-
})?
|
1641
|
-
.append_null(),
|
1642
|
-
ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_, _)) => typed_builder
|
1643
|
-
.field_builder::<Decimal128Builder>(i)
|
1644
|
-
.ok_or_else(|| {
|
1645
|
-
MagnusError::new(
|
1646
|
-
magnus::exception::type_error(),
|
1647
|
-
"Failed to coerce into Decimal128Builder",
|
1648
|
-
)
|
1649
|
-
})?
|
1650
|
-
.append_null(),
|
1651
|
-
ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_, _)) => typed_builder
|
1652
|
-
.field_builder::<Decimal256Builder>(i)
|
1653
|
-
.ok_or_else(|| {
|
1654
|
-
MagnusError::new(
|
1655
|
-
magnus::exception::type_error(),
|
1656
|
-
"Failed to coerce into Decimal256Builder for Decimal256",
|
1657
|
-
)
|
1658
|
-
})?
|
1659
|
-
.append_null(),
|
1660
|
-
ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
|
1661
|
-
.field_builder::<StringBuilder>(i)
|
1662
|
-
.ok_or_else(|| {
|
1663
|
-
MagnusError::new(
|
1664
|
-
magnus::exception::type_error(),
|
1665
|
-
"Failed to coerce into StringBuilder",
|
1666
|
-
)
|
1667
|
-
})?
|
1668
|
-
.append_null(),
|
1669
|
-
ParquetSchemaType::Primitive(PrimitiveType::Binary) => typed_builder
|
1670
|
-
.field_builder::<BinaryBuilder>(i)
|
1671
|
-
.ok_or_else(|| {
|
1672
|
-
MagnusError::new(
|
1673
|
-
magnus::exception::type_error(),
|
1674
|
-
"Failed to coerce into BinaryBuilder",
|
1675
|
-
)
|
1676
|
-
})?
|
1677
|
-
.append_null(),
|
1678
|
-
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => typed_builder
|
1679
|
-
.field_builder::<BooleanBuilder>(i)
|
1680
|
-
.ok_or_else(|| {
|
1681
|
-
MagnusError::new(
|
1682
|
-
magnus::exception::type_error(),
|
1683
|
-
"Failed to coerce into BooleanBuilder",
|
1684
|
-
)
|
1685
|
-
})?
|
1686
|
-
.append_null(),
|
1687
|
-
ParquetSchemaType::Primitive(PrimitiveType::Date32) => typed_builder
|
1688
|
-
.field_builder::<Date32Builder>(i)
|
1689
|
-
.ok_or_else(|| {
|
1690
|
-
MagnusError::new(
|
1691
|
-
magnus::exception::type_error(),
|
1692
|
-
"Failed to coerce into Date32Builder",
|
1693
|
-
)
|
1694
|
-
})?
|
1695
|
-
.append_null(),
|
1696
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => typed_builder
|
1697
|
-
.field_builder::<TimestampMillisecondBuilder>(i)
|
1698
|
-
.ok_or_else(|| {
|
1699
|
-
MagnusError::new(
|
1700
|
-
magnus::exception::type_error(),
|
1701
|
-
"Failed to coerce into TimestampMillisecondBuilder",
|
1702
|
-
)
|
1703
|
-
})?
|
1704
|
-
.append_null(),
|
1705
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => typed_builder
|
1706
|
-
.field_builder::<TimestampMicrosecondBuilder>(i)
|
1707
|
-
.ok_or_else(|| {
|
1708
|
-
MagnusError::new(
|
1709
|
-
magnus::exception::type_error(),
|
1710
|
-
"Failed to coerce into TimestampMicrosecondBuilder",
|
1711
|
-
)
|
1712
|
-
})?
|
1713
|
-
.append_null(),
|
1714
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => typed_builder
|
1715
|
-
.field_builder::<Time32MillisecondBuilder>(i)
|
1716
|
-
.ok_or_else(|| {
|
1717
|
-
MagnusError::new(
|
1718
|
-
magnus::exception::type_error(),
|
1719
|
-
"Failed to coerce into Time32MillisecondBuilder",
|
1720
|
-
)
|
1721
|
-
})?
|
1722
|
-
.append_null(),
|
1723
|
-
ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => typed_builder
|
1724
|
-
.field_builder::<Time64MicrosecondBuilder>(i)
|
1725
|
-
.ok_or_else(|| {
|
1726
|
-
MagnusError::new(
|
1727
|
-
magnus::exception::type_error(),
|
1728
|
-
"Failed to coerce into Time64MicrosecondBuilder",
|
1729
|
-
)
|
1730
|
-
})?
|
1731
|
-
.append_null(),
|
1732
|
-
ParquetSchemaType::List(_) => typed_builder
|
1733
|
-
.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
|
1734
|
-
.ok_or_else(|| {
|
1735
|
-
MagnusError::new(
|
1736
|
-
magnus::exception::type_error(),
|
1737
|
-
"Failed to coerce into ListBuilder",
|
1738
|
-
)
|
1739
|
-
})?
|
1740
|
-
.append(false),
|
1741
|
-
ParquetSchemaType::Map(_) => {
|
1742
|
-
typed_builder
|
1743
|
-
.field_builder::<MapBuilder<
|
1744
|
-
Box<dyn ArrayBuilder>,
|
1745
|
-
Box<dyn ArrayBuilder>,
|
1746
|
-
>>(i)
|
1747
|
-
.ok_or_else(|| {
|
1748
|
-
MagnusError::new(
|
1749
|
-
magnus::exception::type_error(),
|
1750
|
-
"Failed to coerce into MapBuilder",
|
1751
|
-
)
|
1752
|
-
})?
|
1753
|
-
.append(false)
|
1754
|
-
.map_err(|e| {
|
1755
|
-
MagnusError::new(
|
1756
|
-
magnus::exception::runtime_error(),
|
1757
|
-
format!("Failed to append map: {}", e),
|
1758
|
-
)
|
1759
|
-
})?;
|
1760
|
-
}
|
1761
|
-
ParquetSchemaType::Struct(_) => typed_builder
|
1762
|
-
.field_builder::<StructBuilder>(i)
|
1763
|
-
.ok_or_else(|| {
|
1764
|
-
MagnusError::new(
|
1765
|
-
magnus::exception::type_error(),
|
1766
|
-
"Failed to coerce into StructBuilder",
|
1767
|
-
)
|
1768
|
-
})?
|
1769
|
-
.append_null(),
|
1770
|
-
},
|
1771
|
-
}
|
1772
|
-
} else {
|
1773
|
-
return Err(MagnusError::new(
|
1774
|
-
magnus::exception::type_error(),
|
1775
|
-
format!("Field {} not found in map", i),
|
1776
|
-
));
|
1777
|
-
}
|
1778
|
-
}
|
1779
|
-
typed_builder.append(true);
|
1780
|
-
}
|
1781
|
-
other => {
|
1782
|
-
return Err(MagnusError::new(
|
1783
|
-
magnus::exception::type_error(),
|
1784
|
-
format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
|
1785
|
-
));
|
1786
|
-
}
|
1787
|
-
}
|
1788
|
-
}
|
1789
|
-
Ok(())
|
1790
|
-
}
|
1791
|
-
}
|
1792
|
-
}
|
1793
|
-
|
1794
|
-
/// Creates a final Arrow array from a list of ParquetValues and a schema type.
|
1795
|
-
/// This is your "unified" way to handle any nesting level.
|
1796
|
-
pub fn convert_parquet_values_to_arrow(
|
1797
|
-
values: Vec<ParquetValue>,
|
1798
|
-
type_: &ParquetSchemaType,
|
1799
|
-
) -> Result<Arc<dyn Array>, ParquetGemError> {
|
1800
|
-
// Make sure we always have at least capacity 1 to avoid empty builders
|
1801
|
-
let capacity = if values.is_empty() { 1 } else { values.len() };
|
1802
|
-
let mut builder = create_arrow_builder_for_type(type_, Some(capacity))?;
|
1803
|
-
|
1804
|
-
fill_builder(&mut builder, type_, &values)?;
|
1805
|
-
|
1806
|
-
// Finish building the array
|
1807
|
-
let array = builder.finish();
|
1808
|
-
|
1809
|
-
Ok(Arc::new(array))
|
1810
|
-
}
|
1811
|
-
|
1812
|
-
pub fn convert_ruby_array_to_arrow(
|
1813
|
-
ruby: &Ruby,
|
1814
|
-
values: RArray,
|
1815
|
-
type_: &ParquetSchemaType,
|
1816
|
-
) -> Result<Arc<dyn Array>, ParquetGemError> {
|
1817
|
-
let mut parquet_values = Vec::with_capacity(values.len());
|
1818
|
-
for value in values {
|
1819
|
-
if value.is_nil() {
|
1820
|
-
parquet_values.push(ParquetValue::Null);
|
1821
|
-
continue;
|
1822
|
-
}
|
1823
|
-
let parquet_value = ParquetValue::from_value(ruby, value, type_, None)?;
|
1824
|
-
parquet_values.push(parquet_value);
|
1825
|
-
}
|
1826
|
-
convert_parquet_values_to_arrow(parquet_values, type_)
|
1827
|
-
}
|
1828
|
-
|
1829
|
-
pub fn convert_to_time_millis(
|
1830
|
-
ruby: &Ruby,
|
1831
|
-
value: Value,
|
1832
|
-
format: Option<&str>,
|
1833
|
-
) -> Result<i32, MagnusError> {
|
1834
|
-
if value.is_kind_of(ruby.class_time()) {
|
1835
|
-
// Extract time components
|
1836
|
-
let hour = i32::try_convert(value.funcall::<_, _, Value>("hour", ())?)?;
|
1837
|
-
let min = i32::try_convert(value.funcall::<_, _, Value>("min", ())?)?;
|
1838
|
-
let sec = i32::try_convert(value.funcall::<_, _, Value>("sec", ())?)?;
|
1839
|
-
let usec = i32::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
1840
|
-
|
1841
|
-
// Convert to milliseconds since midnight
|
1842
|
-
Ok(hour * 3600000 + min * 60000 + sec * 1000 + usec / 1000)
|
1843
|
-
} else if value.is_kind_of(ruby.class_string()) {
|
1844
|
-
let s = String::try_convert(value)?;
|
1845
|
-
|
1846
|
-
if let Some(fmt) = format {
|
1847
|
-
// Parse using the provided format
|
1848
|
-
match jiff::civil::Time::strptime(fmt, &s) {
|
1849
|
-
Ok(time) => {
|
1850
|
-
let millis = time.hour() as i32 * 3600000
|
1851
|
-
+ time.minute() as i32 * 60000
|
1852
|
-
+ time.second() as i32 * 1000
|
1853
|
-
+ time.millisecond() as i32;
|
1854
|
-
Ok(millis)
|
1855
|
-
}
|
1856
|
-
Err(e) => Err(MagnusError::new(
|
1857
|
-
magnus::exception::type_error(),
|
1858
|
-
format!(
|
1859
|
-
"Failed to parse '{}' with format '{}' as time: {}",
|
1860
|
-
s, fmt, e
|
1861
|
-
),
|
1862
|
-
)),
|
1863
|
-
}
|
1864
|
-
} else {
|
1865
|
-
// Try to parse as standard time format
|
1866
|
-
match s.parse::<jiff::civil::Time>() {
|
1867
|
-
Ok(time) => {
|
1868
|
-
let millis = time.hour() as i32 * 3600000
|
1869
|
-
+ time.minute() as i32 * 60000
|
1870
|
-
+ time.second() as i32 * 1000
|
1871
|
-
+ time.millisecond() as i32;
|
1872
|
-
Ok(millis)
|
1873
|
-
}
|
1874
|
-
Err(e) => Err(MagnusError::new(
|
1875
|
-
magnus::exception::type_error(),
|
1876
|
-
format!("Failed to parse '{}' as time: {}", s, e),
|
1877
|
-
)),
|
1878
|
-
}
|
1879
|
-
}
|
1880
|
-
} else {
|
1881
|
-
Err(MagnusError::new(
|
1882
|
-
magnus::exception::type_error(),
|
1883
|
-
format!("Cannot convert {} to time_millis", unsafe {
|
1884
|
-
value.classname()
|
1885
|
-
}),
|
1886
|
-
))
|
1887
|
-
}
|
1888
|
-
}
|
1889
|
-
|
1890
|
-
pub fn convert_to_time_micros(
|
1891
|
-
ruby: &Ruby,
|
1892
|
-
value: Value,
|
1893
|
-
format: Option<&str>,
|
1894
|
-
) -> Result<i64, MagnusError> {
|
1895
|
-
if value.is_kind_of(ruby.class_time()) {
|
1896
|
-
// Extract time components
|
1897
|
-
let hour = i64::try_convert(value.funcall::<_, _, Value>("hour", ())?)?;
|
1898
|
-
let min = i64::try_convert(value.funcall::<_, _, Value>("min", ())?)?;
|
1899
|
-
let sec = i64::try_convert(value.funcall::<_, _, Value>("sec", ())?)?;
|
1900
|
-
let usec = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
|
1901
|
-
|
1902
|
-
// Convert to microseconds since midnight
|
1903
|
-
Ok(hour * 3600000000 + min * 60000000 + sec * 1000000 + usec)
|
1904
|
-
} else if value.is_kind_of(ruby.class_string()) {
|
1905
|
-
let s = String::try_convert(value)?;
|
1906
|
-
|
1907
|
-
if let Some(fmt) = format {
|
1908
|
-
// Parse using the provided format
|
1909
|
-
match jiff::civil::Time::strptime(fmt, &s) {
|
1910
|
-
Ok(time) => {
|
1911
|
-
let micros = time.hour() as i64 * 3600000000
|
1912
|
-
+ time.minute() as i64 * 60000000
|
1913
|
-
+ time.second() as i64 * 1000000
|
1914
|
-
+ time.microsecond() as i64;
|
1915
|
-
Ok(micros)
|
1916
|
-
}
|
1917
|
-
Err(e) => Err(MagnusError::new(
|
1918
|
-
magnus::exception::type_error(),
|
1919
|
-
format!(
|
1920
|
-
"Failed to parse '{}' with format '{}' as time: {}",
|
1921
|
-
s, fmt, e
|
1922
|
-
),
|
1923
|
-
)),
|
1924
|
-
}
|
1925
|
-
} else {
|
1926
|
-
// Try to parse as standard time format
|
1927
|
-
match s.parse::<jiff::civil::Time>() {
|
1928
|
-
Ok(time) => {
|
1929
|
-
let micros = time.hour() as i64 * 3600000000
|
1930
|
-
+ time.minute() as i64 * 60000000
|
1931
|
-
+ time.second() as i64 * 1000000
|
1932
|
-
+ time.microsecond() as i64;
|
1933
|
-
Ok(micros)
|
1934
|
-
}
|
1935
|
-
Err(e) => Err(MagnusError::new(
|
1936
|
-
magnus::exception::type_error(),
|
1937
|
-
format!("Failed to parse '{}' as time: {}", s, e),
|
1938
|
-
)),
|
1939
|
-
}
|
1940
|
-
}
|
1941
|
-
} else {
|
1942
|
-
Err(MagnusError::new(
|
1943
|
-
magnus::exception::type_error(),
|
1944
|
-
format!("Cannot convert {} to time_micros", unsafe {
|
1945
|
-
value.classname()
|
1946
|
-
}),
|
1947
|
-
))
|
1948
|
-
}
|
1949
|
-
}
|