parquet 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +8 -5
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -603
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -1,1949 +0,0 @@
1
- use std::str::FromStr;
2
- use std::sync::Arc;
3
-
4
- use super::*;
5
- use arrow_array::builder::MapFieldNames;
6
- use arrow_array::builder::*;
7
- use arrow_schema::{DataType, Field, Fields, TimeUnit};
8
- use jiff::tz::{Offset, TimeZone};
9
- use magnus::{RArray, RString, TryConvert};
10
-
11
- pub struct NumericConverter<T> {
12
- _phantom: std::marker::PhantomData<T>,
13
- }
14
-
15
- impl<T> NumericConverter<T>
16
- where
17
- T: TryConvert + FromStr,
18
- <T as FromStr>::Err: std::fmt::Display,
19
- {
20
- pub fn convert_with_string_fallback(ruby: &Ruby, value: Value) -> Result<T, MagnusError> {
21
- if value.is_kind_of(ruby.class_string()) {
22
- let s = String::try_convert(value)?;
23
- s.trim().parse::<T>().map_err(|e| {
24
- MagnusError::new(
25
- magnus::exception::type_error(),
26
- format!("Failed to parse '{}' as numeric: {}", s, e),
27
- )
28
- })
29
- } else {
30
- T::try_convert(value)
31
- }
32
- }
33
- }
34
-
35
- pub fn convert_to_date32(
36
- ruby: &Ruby,
37
- value: Value,
38
- format: Option<&str>,
39
- ) -> Result<i32, MagnusError> {
40
- if value.is_kind_of(ruby.class_string()) {
41
- let s = String::try_convert(value)?;
42
- // Parse string into Date using jiff
43
- let date = if let Some(fmt) = format {
44
- jiff::civil::Date::strptime(fmt, &s).or_else(|e1| {
45
- // Try parsing as DateTime and convert to Date with zero offset
46
- jiff::civil::DateTime::strptime(fmt, &s)
47
- .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
48
- .map(|dt| dt.date())
49
- .map_err(|e2| {
50
- MagnusError::new(
51
- magnus::exception::type_error(),
52
- format!(
53
- "Failed to parse '{}' with format '{}' as date32: {} (and as datetime: {})",
54
- s, fmt, e1, e2
55
- ),
56
- )
57
- })
58
- })?
59
- } else {
60
- s.parse().map_err(|e| {
61
- MagnusError::new(
62
- magnus::exception::type_error(),
63
- format!("Failed to parse '{}' as date32: {}", s, e),
64
- )
65
- })?
66
- };
67
-
68
- let timestamp = date.at(0, 0, 0, 0);
69
-
70
- let x = timestamp
71
- .to_zoned(TimeZone::fixed(Offset::constant(0)))
72
- .map_err(|e| {
73
- MagnusError::new(
74
- magnus::exception::type_error(),
75
- format!("Failed to convert date32 to timestamp: {}", e),
76
- )
77
- })?
78
- .timestamp();
79
-
80
- // Convert to epoch days
81
- Ok((x.as_second() / 86400) as i32)
82
- } else if value.is_kind_of(ruby.class_time()) {
83
- // Convert Time object to epoch days
84
- let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
85
- Ok(((secs as f64) / 86400.0) as i32)
86
- } else {
87
- Err(MagnusError::new(
88
- magnus::exception::type_error(),
89
- format!("Cannot convert {} to date32", unsafe { value.classname() }),
90
- ))
91
- }
92
- }
93
-
94
- pub fn convert_to_timestamp_millis(
95
- ruby: &Ruby,
96
- value: Value,
97
- format: Option<&str>,
98
- ) -> Result<i64, MagnusError> {
99
- if value.is_kind_of(ruby.class_string()) {
100
- let s = String::try_convert(value)?;
101
- // Parse string into Timestamp using jiff
102
- let timestamp = if let Some(fmt) = format {
103
- jiff::Timestamp::strptime(fmt, &s)
104
- .or_else(|e1| {
105
- // Try parsing as DateTime and convert to Timestamp with zero offset
106
- jiff::civil::DateTime::strptime(fmt, &s)
107
- .and_then(|dt| dt.to_zoned(TimeZone::fixed(Offset::constant(0))))
108
- .map(|dt| dt.timestamp())
109
- .map_err(|e2| {
110
- MagnusError::new(
111
- magnus::exception::type_error(),
112
- format!(
113
- "Failed to parse '{}' with format '{}' as timestamp_millis: {} (and as datetime: {})",
114
- s, fmt, e1, e2
115
- ),
116
- )
117
- })
118
- })?
119
- } else {
120
- s.parse().map_err(|e| {
121
- MagnusError::new(
122
- magnus::exception::type_error(),
123
- format!("Failed to parse '{}' as timestamp_millis: {}", s, e),
124
- )
125
- })?
126
- };
127
- // Convert to milliseconds
128
- Ok(timestamp.as_millisecond())
129
- } else if value.is_kind_of(ruby.class_time()) {
130
- // Convert Time object to milliseconds
131
- let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
132
- let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
133
- Ok(secs * 1000 + (usecs / 1000))
134
- } else {
135
- Err(MagnusError::new(
136
- magnus::exception::type_error(),
137
- format!("Cannot convert {} to timestamp_millis", unsafe {
138
- value.classname()
139
- }),
140
- ))
141
- }
142
- }
143
-
144
- pub fn convert_to_timestamp_micros(
145
- ruby: &Ruby,
146
- value: Value,
147
- format: Option<&str>,
148
- ) -> Result<i64, MagnusError> {
149
- if value.is_kind_of(ruby.class_string()) {
150
- let s = String::try_convert(value)?;
151
- // Parse string into Timestamp using jiff
152
- let timestamp = if let Some(fmt) = format {
153
- jiff::Timestamp::strptime(fmt, &s).or_else(|e1| {
154
- // Try parsing as DateTime and convert to Timestamp with zero offset
155
- jiff::civil::DateTime::strptime(fmt, &s).and_then(|dt| {
156
- dt.to_zoned(TimeZone::fixed(Offset::constant(0)))
157
- })
158
- .map(|dt| dt.timestamp())
159
- .map_err(|e2| {
160
- MagnusError::new(
161
- magnus::exception::type_error(),
162
- format!(
163
- "Failed to parse '{}' with format '{}' as timestamp_micros: {} (and as datetime: {})",
164
- s, fmt, e1, e2
165
- ),
166
- )
167
- })
168
- })?
169
- } else {
170
- s.parse().map_err(|e| {
171
- MagnusError::new(
172
- magnus::exception::type_error(),
173
- format!("Failed to parse '{}' as timestamp_micros: {}", s, e),
174
- )
175
- })?
176
- };
177
- // Convert to microseconds
178
- Ok(timestamp.as_microsecond())
179
- } else if value.is_kind_of(ruby.class_time()) {
180
- // Convert Time object to microseconds
181
- let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
182
- let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
183
- Ok(secs * 1_000_000 + usecs)
184
- } else {
185
- Err(MagnusError::new(
186
- magnus::exception::type_error(),
187
- format!("Cannot convert {} to timestamp_micros", unsafe {
188
- value.classname()
189
- }),
190
- ))
191
- }
192
- }
193
-
194
- pub fn convert_to_binary(value: Value) -> Result<Vec<u8>, MagnusError> {
195
- Ok(unsafe { value.to_r_string()?.as_slice() }.to_vec())
196
- }
197
-
198
- pub fn convert_to_boolean(ruby: &Ruby, value: Value) -> Result<bool, MagnusError> {
199
- if value.is_kind_of(ruby.class_string()) {
200
- let s = String::try_convert(value)?;
201
- s.trim().parse::<bool>().map_err(|e| {
202
- MagnusError::new(
203
- magnus::exception::type_error(),
204
- format!("Failed to parse '{}' as boolean: {}", s, e),
205
- )
206
- })
207
- } else {
208
- bool::try_convert(value)
209
- }
210
- }
211
-
212
- pub fn convert_to_string(value: Value) -> Result<String, MagnusError> {
213
- String::try_convert(value).or_else(|_| {
214
- if value.respond_to("to_s", false)? {
215
- value.funcall::<_, _, RString>("to_s", ())?.to_string()
216
- } else if value.respond_to("to_str", false)? {
217
- value.funcall::<_, _, RString>("to_str", ())?.to_string()
218
- } else {
219
- Err(MagnusError::new(
220
- magnus::exception::type_error(),
221
- format!("Not able to convert {:?} to String", value),
222
- ))
223
- }
224
- })
225
- }
226
-
227
- /// Converts our custom `ParquetSchemaType` into an Arrow `DataType`.
228
- /// This ensures proper nullability settings for nested types.
229
- /// Converts a ParquetSchemaType to an Arrow DataType
230
- pub fn parquet_schema_type_to_arrow_data_type(
231
- schema_type: &ParquetSchemaType,
232
- ) -> Result<DataType, MagnusError> {
233
- Ok(match schema_type {
234
- ParquetSchemaType::Primitive(primative) => match primative {
235
- PrimitiveType::Int8 => DataType::Int8,
236
- PrimitiveType::Int16 => DataType::Int16,
237
- PrimitiveType::Int32 => DataType::Int32,
238
- PrimitiveType::Int64 => DataType::Int64,
239
- PrimitiveType::UInt8 => DataType::UInt8,
240
- PrimitiveType::UInt16 => DataType::UInt16,
241
- PrimitiveType::UInt32 => DataType::UInt32,
242
- PrimitiveType::UInt64 => DataType::UInt64,
243
- PrimitiveType::Float32 => DataType::Float32,
244
- PrimitiveType::Float64 => DataType::Float64,
245
- PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
246
- PrimitiveType::Decimal256(precision, scale) => DataType::Decimal256(*precision, *scale),
247
- PrimitiveType::String => DataType::Utf8,
248
- PrimitiveType::Binary => DataType::Binary,
249
- PrimitiveType::Boolean => DataType::Boolean,
250
- PrimitiveType::Date32 => DataType::Date32,
251
- PrimitiveType::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None),
252
- PrimitiveType::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None),
253
- PrimitiveType::TimeMillis => DataType::Time32(TimeUnit::Millisecond),
254
- PrimitiveType::TimeMicros => DataType::Time64(TimeUnit::Microsecond),
255
- },
256
- // For a List<T>, create a standard List in Arrow with nullable items
257
- ParquetSchemaType::List(list_field) => {
258
- let child_type = parquet_schema_type_to_arrow_data_type(&list_field.item_type)?;
259
- // For a list, use empty field name to match expectations for schema_dsl test
260
- // This is the critical fix for the schema_dsl test which expects an empty field name
261
- // Use empty field name for all list field items - this is crucial for compatibility
262
- DataType::List(Arc::new(Field::new(
263
- "item",
264
- child_type,
265
- list_field.nullable,
266
- )))
267
- }
268
-
269
- // For a Map<K, V>, ensure entries field is non-nullable and key field is non-nullable
270
- ParquetSchemaType::Map(map_field) => {
271
- let key_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.key_type)?;
272
- let value_arrow_type = parquet_schema_type_to_arrow_data_type(&map_field.value_type)?;
273
- DataType::Map(
274
- Arc::new(Field::new(
275
- "entries",
276
- DataType::Struct(Fields::from(vec![
277
- Field::new("key", key_arrow_type, false), // key must be non-null
278
- Field::new("value", value_arrow_type, true), // value can be null
279
- ])),
280
- /*nullable=*/ false, // crucial: entries must be non-nullable
281
- )),
282
- /*keys_sorted=*/ false,
283
- )
284
- }
285
- ParquetSchemaType::Struct(struct_field) => {
286
- if struct_field.fields.is_empty() {
287
- return Err(MagnusError::new(
288
- magnus::exception::runtime_error(),
289
- "Cannot create a struct with zero subfields (empty struct).",
290
- ));
291
- }
292
-
293
- // Build arrow fields
294
- let mut arrow_fields = Vec::with_capacity(struct_field.fields.len());
295
-
296
- for field in &struct_field.fields {
297
- let field_type = parquet_schema_type_to_arrow_data_type(&field.type_)?;
298
- arrow_fields.push(Field::new(&field.name, field_type, true)); // All fields are nullable by default
299
- }
300
-
301
- DataType::Struct(Fields::from(arrow_fields))
302
- }
303
- })
304
- }
305
-
306
- #[macro_export]
307
- macro_rules! impl_timestamp_array_conversion {
308
- ($column:expr, $array_type:ty, $variant:ident, $tz:expr) => {{
309
- let array = downcast_array::<$array_type>($column);
310
- Ok(ParquetValueVec(if array.is_nullable() {
311
- array
312
- .values()
313
- .iter()
314
- .enumerate()
315
- .map(|(i, x)| {
316
- if array.is_null(i) {
317
- ParquetValue::Null
318
- } else {
319
- ParquetValue::$variant(*x, $tz.clone().map(|s| s.into()))
320
- }
321
- })
322
- .collect()
323
- } else {
324
- array
325
- .values()
326
- .iter()
327
- .map(|x| ParquetValue::$variant(*x, $tz.clone().map(|s| s.into())))
328
- .collect()
329
- }))
330
- }};
331
- }
332
-
333
- // Create the appropriate Arrow builder for a given ParquetSchemaType.
334
- // We return a Box<dyn ArrayBuilder> so we can dynamically downcast.
335
- fn create_arrow_builder_for_type(
336
- type_: &ParquetSchemaType,
337
- capacity: Option<usize>,
338
- ) -> Result<Box<dyn ArrayBuilder>, ParquetGemError> {
339
- let cap = capacity.unwrap_or(1); // Default to at least capacity 1 to avoid empty builders
340
- match type_ {
341
- ParquetSchemaType::Primitive(PrimitiveType::Int8) => {
342
- Ok(Box::new(Int8Builder::with_capacity(cap)))
343
- }
344
- ParquetSchemaType::Primitive(PrimitiveType::Int16) => {
345
- Ok(Box::new(Int16Builder::with_capacity(cap)))
346
- }
347
- ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
348
- Ok(Box::new(Int32Builder::with_capacity(cap)))
349
- }
350
- ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
351
- Ok(Box::new(Int64Builder::with_capacity(cap)))
352
- }
353
- ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
354
- Ok(Box::new(UInt8Builder::with_capacity(cap)))
355
- }
356
- ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
357
- Ok(Box::new(UInt16Builder::with_capacity(cap)))
358
- }
359
- ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
360
- Ok(Box::new(UInt32Builder::with_capacity(cap)))
361
- }
362
- ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
363
- Ok(Box::new(UInt64Builder::with_capacity(cap)))
364
- }
365
- ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
366
- Ok(Box::new(Float32Builder::with_capacity(cap)))
367
- }
368
- ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
369
- Ok(Box::new(Float64Builder::with_capacity(cap)))
370
- }
371
- ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
372
- // Create a Decimal128Builder with specific precision and scale
373
- let builder = Decimal128Builder::with_capacity(cap);
374
-
375
- // Set precision and scale for the decimal and return the new builder
376
- let builder_with_precision = builder
377
- .with_precision_and_scale(*precision, *scale)
378
- .map_err(|e| {
379
- MagnusError::new(
380
- magnus::exception::runtime_error(),
381
- format!("Failed to set precision and scale: {}", e),
382
- )
383
- })?;
384
-
385
- Ok(Box::new(builder_with_precision))
386
- }
387
- ParquetSchemaType::Primitive(PrimitiveType::Decimal256(precision, scale)) => {
388
- // Create a Decimal128Builder since we're truncating Decimal256 to Decimal128
389
- let builder = Decimal256Builder::with_capacity(cap);
390
-
391
- // Set precision and scale for the decimal and return the new builder
392
- let builder_with_precision = builder
393
- .with_precision_and_scale(*precision, *scale)
394
- .map_err(|e| {
395
- MagnusError::new(
396
- magnus::exception::runtime_error(),
397
- format!("Failed to set precision and scale: {}", e),
398
- )
399
- })?;
400
-
401
- Ok(Box::new(builder_with_precision))
402
- }
403
- ParquetSchemaType::Primitive(PrimitiveType::String) => {
404
- Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
405
- }
406
- ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
407
- Ok(Box::new(BinaryBuilder::with_capacity(cap, cap * 32)))
408
- }
409
- ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
410
- Ok(Box::new(BooleanBuilder::with_capacity(cap)))
411
- }
412
- ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
413
- Ok(Box::new(Date32Builder::with_capacity(cap)))
414
- }
415
- ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
416
- Ok(Box::new(TimestampMillisecondBuilder::with_capacity(cap)))
417
- }
418
- ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
419
- Ok(Box::new(TimestampMicrosecondBuilder::with_capacity(cap)))
420
- }
421
- ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => {
422
- Ok(Box::new(Time32MillisecondBuilder::with_capacity(cap)))
423
- }
424
- ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => {
425
- Ok(Box::new(Time64MicrosecondBuilder::with_capacity(cap)))
426
- }
427
- ParquetSchemaType::List(list_field) => {
428
- // For a list, we create a ListBuilder whose child builder is determined by item_type.
429
- // Pass through capacity to ensure consistent sizing
430
- let child_builder = create_arrow_builder_for_type(&list_field.item_type, Some(cap))?;
431
-
432
- // Ensure consistent builder capacity for lists
433
- Ok(Box::new(ListBuilder::<Box<dyn ArrayBuilder>>::new(
434
- child_builder,
435
- )))
436
- }
437
- ParquetSchemaType::Map(map_field) => {
438
- // A Map is physically a list<struct<key:..., value:...>> in Arrow.
439
- // Pass through capacity to ensure consistent sizing
440
- let key_builder = create_arrow_builder_for_type(&map_field.key_type, Some(cap))?;
441
- let value_builder = create_arrow_builder_for_type(&map_field.value_type, Some(cap))?;
442
-
443
- // Create a MapBuilder with explicit field names to ensure compatibility
444
- Ok(Box::new(MapBuilder::<
445
- Box<dyn ArrayBuilder>,
446
- Box<dyn ArrayBuilder>,
447
- >::new(
448
- Some(MapFieldNames {
449
- entry: "entries".to_string(),
450
- key: "key".to_string(),
451
- value: "value".to_string(),
452
- }),
453
- key_builder,
454
- value_builder,
455
- )))
456
- }
457
- ParquetSchemaType::Struct(struct_field) => {
458
- // Check for empty struct immediately
459
- if struct_field.fields.is_empty() {
460
- Err(MagnusError::new(
461
- magnus::exception::runtime_error(),
462
- "Cannot build a struct with zero fields - Parquet doesn't support empty structs".to_string(),
463
- ))?;
464
- }
465
-
466
- // Create a child builder for each field in the struct
467
- let mut child_field_builders = Vec::with_capacity(struct_field.fields.len());
468
-
469
- // Get struct data type first to ensure field compatibility
470
- let data_type = parquet_schema_type_to_arrow_data_type(type_)?;
471
-
472
- // Make sure the data type is a struct
473
- let arrow_fields = if let DataType::Struct(ref fields) = data_type {
474
- fields.clone()
475
- } else {
476
- return Err(MagnusError::new(
477
- magnus::exception::type_error(),
478
- "Expected struct data type".to_string(),
479
- ))?;
480
- };
481
-
482
- // Create builders for each child field with consistent capacity
483
- for child in &struct_field.fields {
484
- let sub_builder = create_arrow_builder_for_type(&child.type_, Some(cap))?;
485
- child_field_builders.push(sub_builder);
486
- }
487
-
488
- // Make sure we have the right number of builders
489
- if child_field_builders.len() != arrow_fields.len() {
490
- Err(MagnusError::new(
491
- magnus::exception::runtime_error(),
492
- format!(
493
- "Number of field builders ({}) doesn't match number of arrow fields ({})",
494
- child_field_builders.len(),
495
- arrow_fields.len()
496
- ),
497
- ))?;
498
- }
499
-
500
- // Create the StructBuilder with the fields and child builders
501
- Ok(Box::new(StructBuilder::new(
502
- arrow_fields,
503
- child_field_builders,
504
- )))
505
- }
506
- }
507
- }
508
-
509
- // Fill primitive scalar Int8 values
510
- fn fill_int8_builder(
511
- builder: &mut dyn ArrayBuilder,
512
- values: &[ParquetValue],
513
- ) -> Result<(), MagnusError> {
514
- let typed_builder = builder
515
- .as_any_mut()
516
- .downcast_mut::<Int8Builder>()
517
- .expect("Builder mismatch: expected Int8Builder");
518
- for val in values {
519
- match val {
520
- ParquetValue::Int8(i) => typed_builder.append_value(*i),
521
- // Handle Int64 that could be an Int8
522
- ParquetValue::Int64(i) => {
523
- if *i < i8::MIN as i64 || *i > i8::MAX as i64 {
524
- return Err(MagnusError::new(
525
- magnus::exception::range_error(),
526
- format!("Integer {} is out of range for Int8", i),
527
- ));
528
- }
529
- typed_builder.append_value(*i as i8)
530
- }
531
- ParquetValue::Null => typed_builder.append_null(),
532
- other => {
533
- return Err(MagnusError::new(
534
- magnus::exception::type_error(),
535
- format!("Expected Int8, got {:?}", other),
536
- ))
537
- }
538
- }
539
- }
540
- Ok(())
541
- }
542
-
543
- // Fill primitive scalar Int16 values
544
- fn fill_int16_builder(
545
- builder: &mut dyn ArrayBuilder,
546
- values: &[ParquetValue],
547
- ) -> Result<(), MagnusError> {
548
- let typed_builder = builder
549
- .as_any_mut()
550
- .downcast_mut::<Int16Builder>()
551
- .expect("Builder mismatch: expected Int16Builder");
552
- for val in values {
553
- match val {
554
- ParquetValue::Int16(i) => typed_builder.append_value(*i),
555
- // Handle Int64 that could be an Int16
556
- ParquetValue::Int64(i) => {
557
- if *i < i16::MIN as i64 || *i > i16::MAX as i64 {
558
- return Err(MagnusError::new(
559
- magnus::exception::range_error(),
560
- format!("Integer {} is out of range for Int16", i),
561
- ));
562
- }
563
- typed_builder.append_value(*i as i16)
564
- }
565
- ParquetValue::Null => typed_builder.append_null(),
566
- other => {
567
- return Err(MagnusError::new(
568
- magnus::exception::type_error(),
569
- format!("Expected Int16, got {:?}", other),
570
- ))
571
- }
572
- }
573
- }
574
- Ok(())
575
- }
576
-
577
- // Fill list values by recursively filling child items
578
- fn fill_list_builder(
579
- builder: &mut dyn ArrayBuilder,
580
- item_type: &ParquetSchemaType,
581
- values: &[ParquetValue],
582
- ) -> Result<(), MagnusError> {
583
- // We need to use a more specific type for ListBuilder to help Rust's type inference
584
- let lb = builder
585
- .as_any_mut()
586
- .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
587
- .expect("Builder mismatch: expected ListBuilder");
588
-
589
- for val in values {
590
- if let ParquetValue::Null = val {
591
- // null list
592
- lb.append(false);
593
- } else if let ParquetValue::List(list_items) = val {
594
- // First fill the child builder with the items
595
- let values_builder = lb.values();
596
- fill_builder(values_builder, item_type, list_items)?;
597
- // Then finalize the list by calling append(true)
598
- lb.append(true);
599
- } else {
600
- return Err(MagnusError::new(
601
- magnus::exception::type_error(),
602
- format!("Expected ParquetValue::List(...) or Null, got {:?}", val),
603
- ));
604
- }
605
- }
606
-
607
- Ok(())
608
- }
609
-
610
- // Fill map values by recursively filling key and value items
611
- fn fill_map_builder(
612
- builder: &mut dyn ArrayBuilder,
613
- key_type: &ParquetSchemaType,
614
- value_type: &ParquetSchemaType,
615
- values: &[ParquetValue],
616
- ) -> Result<(), MagnusError> {
617
- let mb = builder
618
- .as_any_mut()
619
- .downcast_mut::<MapBuilder<Box<dyn ArrayBuilder>, Box<dyn ArrayBuilder>>>()
620
- .expect("Builder mismatch: expected MapBuilder");
621
-
622
- for val in values {
623
- match val {
624
- ParquetValue::Null => {
625
- // null map
626
- mb.append(false).map_err(|e| {
627
- MagnusError::new(
628
- magnus::exception::runtime_error(),
629
- format!("Failed to append null to map: {}", e),
630
- )
631
- })?;
632
- }
633
- ParquetValue::Map(map_entries) => {
634
- // First append all key-value pairs to the child arrays
635
- for (k, v) in map_entries {
636
- // Note: Arrow expects field names "key" and "value" (singular)
637
- fill_builder(mb.keys(), key_type, &[k.clone()])?;
638
- fill_builder(mb.values(), value_type, &[v.clone()])?;
639
- }
640
- // Then finalize the map by calling append(true)
641
- mb.append(true).map_err(|e| {
642
- MagnusError::new(
643
- magnus::exception::runtime_error(),
644
- format!("Failed to append map entry: {}", e),
645
- )
646
- })?;
647
- }
648
- other => {
649
- return Err(MagnusError::new(
650
- magnus::exception::type_error(),
651
- format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
652
- ))
653
- }
654
- }
655
- }
656
-
657
- Ok(())
658
- }
659
-
660
- // Append an entire slice of ParquetValue into the given Arrow builder.
661
- // We do a `match` on the type for each item, recursing for nested list/map.
662
- fn fill_builder(
663
- builder: &mut dyn ArrayBuilder,
664
- type_: &ParquetSchemaType,
665
- values: &[ParquetValue],
666
- ) -> Result<(), MagnusError> {
667
- match type_ {
668
- // ------------------
669
- // PRIMITIVE SCALARS - delegated to specialized helpers
670
- // ------------------
671
- ParquetSchemaType::Primitive(PrimitiveType::Int8) => fill_int8_builder(builder, values),
672
- ParquetSchemaType::Primitive(PrimitiveType::Int16) => fill_int16_builder(builder, values),
673
- ParquetSchemaType::Primitive(PrimitiveType::Int32) => {
674
- let typed_builder = builder
675
- .as_any_mut()
676
- .downcast_mut::<Int32Builder>()
677
- .expect("Builder mismatch: expected Int32Builder");
678
- for val in values {
679
- match val {
680
- ParquetValue::Int32(i) => typed_builder.append_value(*i),
681
- ParquetValue::Date32(d) => typed_builder.append_value(*d), // if you allow date->int
682
- // Handle the case where we have an Int64 in an Int32 field (common with Ruby Integers)
683
- ParquetValue::Int64(i) => {
684
- if *i < i32::MIN as i64 || *i > i32::MAX as i64 {
685
- return Err(MagnusError::new(
686
- magnus::exception::range_error(),
687
- format!("Integer {} is out of range for Int32", i),
688
- ));
689
- }
690
- typed_builder.append_value(*i as i32)
691
- }
692
- ParquetValue::Null => typed_builder.append_null(),
693
- other => {
694
- return Err(MagnusError::new(
695
- magnus::exception::type_error(),
696
- format!("Expected Int32, got {:?}", other),
697
- ))
698
- }
699
- }
700
- }
701
- Ok(())
702
- }
703
- ParquetSchemaType::Primitive(PrimitiveType::Int64) => {
704
- let typed_builder = builder
705
- .as_any_mut()
706
- .downcast_mut::<Int64Builder>()
707
- .expect("Builder mismatch: expected Int64Builder");
708
- for val in values {
709
- match val {
710
- ParquetValue::Int64(i) => typed_builder.append_value(*i),
711
- ParquetValue::Null => typed_builder.append_null(),
712
- other => {
713
- return Err(MagnusError::new(
714
- magnus::exception::type_error(),
715
- format!("Expected Int64, got {:?}", other),
716
- ))
717
- }
718
- }
719
- }
720
- Ok(())
721
- }
722
- ParquetSchemaType::Primitive(PrimitiveType::UInt8) => {
723
- let typed_builder = builder
724
- .as_any_mut()
725
- .downcast_mut::<UInt8Builder>()
726
- .expect("Builder mismatch: expected UInt8Builder");
727
- for val in values {
728
- match val {
729
- ParquetValue::UInt8(u) => typed_builder.append_value(*u),
730
- // Handle Int64 that could be a UInt8
731
- ParquetValue::Int64(i) => {
732
- if *i < 0 || *i > u8::MAX as i64 {
733
- return Err(MagnusError::new(
734
- magnus::exception::range_error(),
735
- format!("Integer {} is out of range for UInt8", i),
736
- ));
737
- }
738
- typed_builder.append_value(*i as u8)
739
- }
740
- ParquetValue::Null => typed_builder.append_null(),
741
- other => {
742
- return Err(MagnusError::new(
743
- magnus::exception::type_error(),
744
- format!("Expected UInt8, got {:?}", other),
745
- ))
746
- }
747
- }
748
- }
749
- Ok(())
750
- }
751
- ParquetSchemaType::Primitive(PrimitiveType::UInt16) => {
752
- let typed_builder = builder
753
- .as_any_mut()
754
- .downcast_mut::<UInt16Builder>()
755
- .expect("Builder mismatch: expected UInt16Builder");
756
- for val in values {
757
- match val {
758
- ParquetValue::UInt16(u) => typed_builder.append_value(*u),
759
- // Handle Int64 that could be a UInt16
760
- ParquetValue::Int64(i) => {
761
- if *i < 0 || *i > u16::MAX as i64 {
762
- return Err(MagnusError::new(
763
- magnus::exception::range_error(),
764
- format!("Integer {} is out of range for UInt16", i),
765
- ));
766
- }
767
- typed_builder.append_value(*i as u16)
768
- }
769
- ParquetValue::Null => typed_builder.append_null(),
770
- other => {
771
- return Err(MagnusError::new(
772
- magnus::exception::type_error(),
773
- format!("Expected UInt16, got {:?}", other),
774
- ))
775
- }
776
- }
777
- }
778
- Ok(())
779
- }
780
- ParquetSchemaType::Primitive(PrimitiveType::UInt32) => {
781
- let typed_builder = builder
782
- .as_any_mut()
783
- .downcast_mut::<UInt32Builder>()
784
- .expect("Builder mismatch: expected UInt32Builder");
785
- for val in values {
786
- match val {
787
- ParquetValue::UInt32(u) => typed_builder.append_value(*u),
788
- // Handle Int64 that could be a UInt32
789
- ParquetValue::Int64(i) => {
790
- if *i < 0 || *i > u32::MAX as i64 {
791
- return Err(MagnusError::new(
792
- magnus::exception::range_error(),
793
- format!("Integer {} is out of range for UInt32", i),
794
- ));
795
- }
796
- typed_builder.append_value(*i as u32)
797
- }
798
- ParquetValue::Null => typed_builder.append_null(),
799
- other => {
800
- return Err(MagnusError::new(
801
- magnus::exception::type_error(),
802
- format!("Expected UInt32, got {:?}", other),
803
- ))
804
- }
805
- }
806
- }
807
- Ok(())
808
- }
809
- ParquetSchemaType::Primitive(PrimitiveType::UInt64) => {
810
- let typed_builder = builder
811
- .as_any_mut()
812
- .downcast_mut::<UInt64Builder>()
813
- .expect("Builder mismatch: expected UInt64Builder");
814
- for val in values {
815
- match val {
816
- ParquetValue::UInt64(u) => typed_builder.append_value(*u),
817
- // Handle Int64 that could be a UInt64
818
- ParquetValue::Int64(i) => {
819
- if *i < 0 {
820
- return Err(MagnusError::new(
821
- magnus::exception::range_error(),
822
- format!("Integer {} is out of range for UInt64", i),
823
- ));
824
- }
825
- typed_builder.append_value(*i as u64)
826
- }
827
- ParquetValue::Null => typed_builder.append_null(),
828
- other => {
829
- return Err(MagnusError::new(
830
- magnus::exception::type_error(),
831
- format!("Expected UInt64, got {:?}", other),
832
- ))
833
- }
834
- }
835
- }
836
- Ok(())
837
- }
838
- ParquetSchemaType::Primitive(PrimitiveType::Float32) => {
839
- let typed_builder = builder
840
- .as_any_mut()
841
- .downcast_mut::<Float32Builder>()
842
- .expect("Builder mismatch: expected Float32Builder");
843
- for val in values {
844
- match val {
845
- ParquetValue::Float32(f) => typed_builder.append_value(*f),
846
- ParquetValue::Float16(fh) => typed_builder.append_value(*fh),
847
- ParquetValue::Null => typed_builder.append_null(),
848
- other => {
849
- return Err(MagnusError::new(
850
- magnus::exception::type_error(),
851
- format!("Expected Float32, got {:?}", other),
852
- ))
853
- }
854
- }
855
- }
856
- Ok(())
857
- }
858
- ParquetSchemaType::Primitive(PrimitiveType::Float64) => {
859
- let typed_builder = builder
860
- .as_any_mut()
861
- .downcast_mut::<Float64Builder>()
862
- .expect("Builder mismatch: expected Float64Builder");
863
- for val in values {
864
- match val {
865
- ParquetValue::Float64(f) => typed_builder.append_value(*f),
866
- // If you want to allow f32 => f64, do so:
867
- ParquetValue::Float32(flo) => typed_builder.append_value(*flo as f64),
868
- ParquetValue::Null => typed_builder.append_null(),
869
- other => {
870
- return Err(MagnusError::new(
871
- magnus::exception::type_error(),
872
- format!("Expected Float64, got {:?}", other),
873
- ))
874
- }
875
- }
876
- }
877
- Ok(())
878
- }
879
- ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_precision, scale)) => {
880
- let typed_builder = builder
881
- .as_any_mut()
882
- .downcast_mut::<Decimal128Builder>()
883
- .expect("Builder mismatch: expected Float64Builder");
884
-
885
- for val in values {
886
- match val {
887
- ParquetValue::Decimal128(d, _scale) => typed_builder.append_value(*d),
888
- ParquetValue::Float64(f) => {
889
- // Scale the float to the desired precision and scale
890
- let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
891
- typed_builder.append_value(scaled_value)
892
- }
893
- ParquetValue::Float32(flo) => {
894
- // Scale the float to the desired precision and scale
895
- let scaled_value = (*flo as f64 * 10_f64.powi(*scale as i32)) as i128;
896
- typed_builder.append_value(scaled_value)
897
- }
898
- ParquetValue::Int64(i) => {
899
- // Scale the integer to the desired scale
900
- let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
901
- typed_builder.append_value(scaled_value)
902
- }
903
- ParquetValue::Int32(i) => {
904
- // Scale the integer to the desired scale
905
- let scaled_value = (*i as i128) * 10_i128.pow(*scale as u32);
906
- typed_builder.append_value(scaled_value)
907
- }
908
- ParquetValue::Null => typed_builder.append_null(),
909
- other => {
910
- return Err(MagnusError::new(
911
- magnus::exception::type_error(),
912
- format!("Expected Float64, got {:?}", other),
913
- ))
914
- }
915
- }
916
- }
917
- Ok(())
918
- }
919
- ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_precision, scale)) => {
920
- let typed_builder = builder
921
- .as_any_mut()
922
- .downcast_mut::<Decimal256Builder>()
923
- .expect("Builder mismatch: expected Decimal256Builder for Decimal256");
924
-
925
- for val in values {
926
- match val {
927
- ParquetValue::Decimal256(d, _scale) => typed_builder.append_value(*d),
928
- ParquetValue::Decimal128(d, _scale) => {
929
- // Convert i128 to i256
930
- typed_builder.append_value(arrow_buffer::i256::from_i128(*d))
931
- }
932
- ParquetValue::Float64(f) => {
933
- // Scale the float to the desired precision and scale
934
- // For large values, use BigInt to avoid overflow
935
- let scaled = *f * 10_f64.powi(*scale as i32);
936
- if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
937
- let scaled_value = scaled as i128;
938
- typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
939
- } else {
940
- // Use BigInt for values that don't fit in i128
941
- use num::{BigInt, FromPrimitive};
942
- let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
943
- MagnusError::new(
944
- magnus::exception::type_error(),
945
- format!("Failed to convert float {} to BigInt", f),
946
- )
947
- })?;
948
- let bytes = bigint.to_signed_bytes_le();
949
- if bytes.len() <= 32 {
950
- let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
951
- [0xff; 32]
952
- } else {
953
- [0; 32]
954
- };
955
- buf[..bytes.len()].copy_from_slice(&bytes);
956
- typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
957
- } else {
958
- return Err(MagnusError::new(
959
- magnus::exception::type_error(),
960
- format!(
961
- "Float value {} scaled to {} is too large for Decimal256",
962
- f, scaled
963
- ),
964
- ));
965
- }
966
- }
967
- }
968
- ParquetValue::Float32(flo) => {
969
- // Scale the float to the desired precision and scale
970
- let scaled = (*flo as f64) * 10_f64.powi(*scale as i32);
971
- if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
972
- let scaled_value = scaled as i128;
973
- typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
974
- } else {
975
- // Use BigInt for values that don't fit in i128
976
- use num::{BigInt, FromPrimitive};
977
- let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
978
- MagnusError::new(
979
- magnus::exception::type_error(),
980
- format!("Failed to convert float {} to BigInt", flo),
981
- )
982
- })?;
983
- let bytes = bigint.to_signed_bytes_le();
984
- if bytes.len() <= 32 {
985
- let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
986
- [0xff; 32]
987
- } else {
988
- [0; 32]
989
- };
990
- buf[..bytes.len()].copy_from_slice(&bytes);
991
- typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
992
- } else {
993
- return Err(MagnusError::new(
994
- magnus::exception::type_error(),
995
- format!(
996
- "Float value {} scaled is too large for Decimal256",
997
- flo
998
- ),
999
- ));
1000
- }
1001
- }
1002
- }
1003
- ParquetValue::Int64(i) => {
1004
- // Scale the integer to the desired scale
1005
- let base = arrow_buffer::i256::from_i128(*i as i128);
1006
- if *scale <= 38 {
1007
- // Can use i128 multiplication for scale <= 38
1008
- let scale_factor =
1009
- arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
1010
- match base.checked_mul(scale_factor) {
1011
- Some(scaled) => typed_builder.append_value(scaled),
1012
- None => {
1013
- return Err(MagnusError::new(
1014
- magnus::exception::type_error(),
1015
- format!(
1016
- "Integer {} scaled by {} overflows Decimal256",
1017
- i, scale
1018
- ),
1019
- ));
1020
- }
1021
- }
1022
- } else {
1023
- // For very large scales, use BigInt
1024
- use num::BigInt;
1025
- let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
1026
- let bytes = bigint.to_signed_bytes_le();
1027
- if bytes.len() <= 32 {
1028
- let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
1029
- [0xff; 32]
1030
- } else {
1031
- [0; 32]
1032
- };
1033
- buf[..bytes.len()].copy_from_slice(&bytes);
1034
- typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
1035
- } else {
1036
- return Err(MagnusError::new(
1037
- magnus::exception::type_error(),
1038
- format!(
1039
- "Integer {} scaled by {} is too large for Decimal256",
1040
- i, scale
1041
- ),
1042
- ));
1043
- }
1044
- }
1045
- }
1046
- ParquetValue::Int32(i) => {
1047
- // Scale the integer to the desired scale
1048
- let base = arrow_buffer::i256::from_i128(*i as i128);
1049
- if *scale <= 38 {
1050
- // Can use i128 multiplication for scale <= 38
1051
- let scale_factor =
1052
- arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
1053
- match base.checked_mul(scale_factor) {
1054
- Some(scaled) => typed_builder.append_value(scaled),
1055
- None => {
1056
- return Err(MagnusError::new(
1057
- magnus::exception::type_error(),
1058
- format!(
1059
- "Integer {} scaled by {} overflows Decimal256",
1060
- i, scale
1061
- ),
1062
- ));
1063
- }
1064
- }
1065
- } else {
1066
- // For very large scales, use BigInt
1067
- use num::BigInt;
1068
- let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
1069
- let bytes = bigint.to_signed_bytes_le();
1070
- if bytes.len() <= 32 {
1071
- let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
1072
- [0xff; 32]
1073
- } else {
1074
- [0; 32]
1075
- };
1076
- buf[..bytes.len()].copy_from_slice(&bytes);
1077
- typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
1078
- } else {
1079
- return Err(MagnusError::new(
1080
- magnus::exception::type_error(),
1081
- format!(
1082
- "Integer {} scaled by {} is too large for Decimal256",
1083
- i, scale
1084
- ),
1085
- ));
1086
- }
1087
- }
1088
- }
1089
- ParquetValue::Null => typed_builder.append_null(),
1090
- other => {
1091
- return Err(MagnusError::new(
1092
- magnus::exception::type_error(),
1093
- format!("Expected numeric value for Decimal256, got {:?}", other),
1094
- ))
1095
- }
1096
- }
1097
- }
1098
- Ok(())
1099
- }
1100
- ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
1101
- let typed_builder = builder
1102
- .as_any_mut()
1103
- .downcast_mut::<BooleanBuilder>()
1104
- .expect("Builder mismatch: expected BooleanBuilder");
1105
- for val in values {
1106
- match val {
1107
- ParquetValue::Boolean(b) => typed_builder.append_value(*b),
1108
- ParquetValue::Null => typed_builder.append_null(),
1109
- other => {
1110
- return Err(MagnusError::new(
1111
- magnus::exception::type_error(),
1112
- format!("Expected Boolean, got {:?}", other),
1113
- ))
1114
- }
1115
- }
1116
- }
1117
- Ok(())
1118
- }
1119
- ParquetSchemaType::Primitive(PrimitiveType::Date32) => {
1120
- let typed_builder = builder
1121
- .as_any_mut()
1122
- .downcast_mut::<Date32Builder>()
1123
- .expect("Builder mismatch: expected Date32Builder");
1124
- for val in values {
1125
- match val {
1126
- ParquetValue::Date32(d) => typed_builder.append_value(*d),
1127
- ParquetValue::Null => typed_builder.append_null(),
1128
- other => {
1129
- return Err(MagnusError::new(
1130
- magnus::exception::type_error(),
1131
- format!("Expected Date32, got {:?}", other),
1132
- ))
1133
- }
1134
- }
1135
- }
1136
- Ok(())
1137
- }
1138
- ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => {
1139
- let typed_builder = builder
1140
- .as_any_mut()
1141
- .downcast_mut::<TimestampMillisecondBuilder>()
1142
- .expect("Builder mismatch: expected TimestampMillisecondBuilder");
1143
- for val in values {
1144
- match val {
1145
- ParquetValue::TimestampMillis(ts, _tz) => typed_builder.append_value(*ts),
1146
- ParquetValue::Null => typed_builder.append_null(),
1147
- other => {
1148
- return Err(MagnusError::new(
1149
- magnus::exception::type_error(),
1150
- format!("Expected TimestampMillis, got {:?}", other),
1151
- ))
1152
- }
1153
- }
1154
- }
1155
- Ok(())
1156
- }
1157
- ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
1158
- let typed_builder = builder
1159
- .as_any_mut()
1160
- .downcast_mut::<TimestampMicrosecondBuilder>()
1161
- .expect("Builder mismatch: expected TimestampMicrosecondBuilder");
1162
- for val in values {
1163
- match val {
1164
- ParquetValue::TimestampMicros(ts, _tz) => typed_builder.append_value(*ts),
1165
- ParquetValue::Null => typed_builder.append_null(),
1166
- other => {
1167
- return Err(MagnusError::new(
1168
- magnus::exception::type_error(),
1169
- format!("Expected TimestampMicros, got {:?}", other),
1170
- ))
1171
- }
1172
- }
1173
- }
1174
- Ok(())
1175
- }
1176
- ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => {
1177
- let typed_builder = builder
1178
- .as_any_mut()
1179
- .downcast_mut::<Time32MillisecondBuilder>()
1180
- .expect("Builder mismatch: expected Time32MillisecondBuilder");
1181
- for val in values {
1182
- match val {
1183
- ParquetValue::TimeMillis(t) => typed_builder.append_value(*t),
1184
- ParquetValue::Null => typed_builder.append_null(),
1185
- other => {
1186
- return Err(MagnusError::new(
1187
- magnus::exception::type_error(),
1188
- format!("Expected TimeMillis, got {:?}", other),
1189
- ))
1190
- }
1191
- }
1192
- }
1193
- Ok(())
1194
- }
1195
- ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => {
1196
- let typed_builder = builder
1197
- .as_any_mut()
1198
- .downcast_mut::<Time64MicrosecondBuilder>()
1199
- .expect("Builder mismatch: expected Time64MicrosecondBuilder");
1200
- for val in values {
1201
- match val {
1202
- ParquetValue::TimeMicros(t) => typed_builder.append_value(*t),
1203
- ParquetValue::Null => typed_builder.append_null(),
1204
- other => {
1205
- return Err(MagnusError::new(
1206
- magnus::exception::type_error(),
1207
- format!("Expected TimeMicros, got {:?}", other),
1208
- ))
1209
- }
1210
- }
1211
- }
1212
- Ok(())
1213
- }
1214
-
1215
- // ------------------
1216
- // NESTED LIST - using helper function
1217
- // ------------------
1218
- ParquetSchemaType::List(list_field) => {
1219
- fill_list_builder(builder, &list_field.item_type, values)
1220
- }
1221
-
1222
- // ------------------
1223
- // NESTED MAP - using helper function
1224
- // ------------------
1225
- ParquetSchemaType::Map(map_field) => {
1226
- fill_map_builder(builder, &map_field.key_type, &map_field.value_type, values)
1227
- }
1228
-
1229
- // ------------------
1230
- // OTHER TYPES - keep as is for now
1231
- // ------------------
1232
- ParquetSchemaType::Primitive(PrimitiveType::String) => {
1233
- let typed_builder = builder
1234
- .as_any_mut()
1235
- .downcast_mut::<StringBuilder>()
1236
- .expect("Builder mismatch: expected StringBuilder");
1237
- for val in values {
1238
- match val {
1239
- ParquetValue::String(s) => typed_builder.append_value(s),
1240
- ParquetValue::Null => typed_builder.append_null(),
1241
- other => {
1242
- return Err(MagnusError::new(
1243
- magnus::exception::type_error(),
1244
- format!("Expected String, got {:?}", other),
1245
- ))
1246
- }
1247
- }
1248
- }
1249
- Ok(())
1250
- }
1251
- ParquetSchemaType::Primitive(PrimitiveType::Binary) => {
1252
- let typed_builder = builder
1253
- .as_any_mut()
1254
- .downcast_mut::<BinaryBuilder>()
1255
- .expect("Builder mismatch: expected BinaryBuilder");
1256
- for val in values {
1257
- match val {
1258
- ParquetValue::Bytes(b) => typed_builder.append_value(b),
1259
- ParquetValue::Null => typed_builder.append_null(),
1260
- other => {
1261
- return Err(MagnusError::new(
1262
- magnus::exception::type_error(),
1263
- format!("Expected Binary, got {:?}", other),
1264
- ))
1265
- }
1266
- }
1267
- }
1268
- Ok(())
1269
- }
1270
- ParquetSchemaType::Struct(struct_field) => {
1271
- let typed_builder = builder
1272
- .as_any_mut()
1273
- .downcast_mut::<StructBuilder>()
1274
- .expect("Builder mismatch: expected StructBuilder");
1275
-
1276
- for val in values {
1277
- match val {
1278
- ParquetValue::Null => {
1279
- // null struct
1280
- typed_builder.append(false);
1281
- }
1282
- ParquetValue::Map(map_data) => {
1283
- for (i, field) in struct_field.fields.iter().enumerate() {
1284
- let field_key = ParquetValue::String(field.name.clone());
1285
- if let Some(field_val) = map_data.get(&field_key) {
1286
- match field_val {
1287
- ParquetValue::Int8(x) => typed_builder
1288
- .field_builder::<Int8Builder>(i)
1289
- .ok_or_else(|| {
1290
- MagnusError::new(
1291
- magnus::exception::type_error(),
1292
- "Failed to coerce into Int8Builder",
1293
- )
1294
- })?
1295
- .append_value(*x),
1296
- ParquetValue::Int16(x) => typed_builder
1297
- .field_builder::<Int16Builder>(i)
1298
- .ok_or_else(|| {
1299
- MagnusError::new(
1300
- magnus::exception::type_error(),
1301
- "Failed to coerce into Int16Builder",
1302
- )
1303
- })?
1304
- .append_value(*x),
1305
- ParquetValue::Int32(x) => typed_builder
1306
- .field_builder::<Int32Builder>(i)
1307
- .ok_or_else(|| {
1308
- MagnusError::new(
1309
- magnus::exception::type_error(),
1310
- "Failed to coerce into Int32Builder",
1311
- )
1312
- })?
1313
- .append_value(*x),
1314
- ParquetValue::Int64(x) => typed_builder
1315
- .field_builder::<Int64Builder>(i)
1316
- .ok_or_else(|| {
1317
- MagnusError::new(
1318
- magnus::exception::type_error(),
1319
- "Failed to coerce into Int64Builder",
1320
- )
1321
- })?
1322
- .append_value(*x),
1323
- ParquetValue::UInt8(x) => typed_builder
1324
- .field_builder::<UInt8Builder>(i)
1325
- .ok_or_else(|| {
1326
- MagnusError::new(
1327
- magnus::exception::type_error(),
1328
- "Failed to coerce into UInt8Builder",
1329
- )
1330
- })?
1331
- .append_value(*x),
1332
- ParquetValue::UInt16(x) => typed_builder
1333
- .field_builder::<UInt16Builder>(i)
1334
- .ok_or_else(|| {
1335
- MagnusError::new(
1336
- magnus::exception::type_error(),
1337
- "Failed to coerce into UInt16Builder",
1338
- )
1339
- })?
1340
- .append_value(*x),
1341
- ParquetValue::UInt32(x) => typed_builder
1342
- .field_builder::<UInt32Builder>(i)
1343
- .ok_or_else(|| {
1344
- MagnusError::new(
1345
- magnus::exception::type_error(),
1346
- "Failed to coerce into UInt32Builder",
1347
- )
1348
- })?
1349
- .append_value(*x),
1350
- ParquetValue::UInt64(x) => typed_builder
1351
- .field_builder::<UInt64Builder>(i)
1352
- .ok_or_else(|| {
1353
- MagnusError::new(
1354
- magnus::exception::type_error(),
1355
- "Failed to coerce into UInt64Builder",
1356
- )
1357
- })?
1358
- .append_value(*x),
1359
- ParquetValue::Float16(_) => {
1360
- return Err(MagnusError::new(
1361
- magnus::exception::runtime_error(),
1362
- "Float16 not supported",
1363
- ))
1364
- }
1365
- ParquetValue::Float32(x) => typed_builder
1366
- .field_builder::<Float32Builder>(i)
1367
- .ok_or_else(|| {
1368
- MagnusError::new(
1369
- magnus::exception::type_error(),
1370
- "Failed to coerce into Float32Builder",
1371
- )
1372
- })?
1373
- .append_value(*x),
1374
- ParquetValue::Float64(x) => typed_builder
1375
- .field_builder::<Float64Builder>(i)
1376
- .ok_or_else(|| {
1377
- MagnusError::new(
1378
- magnus::exception::type_error(),
1379
- "Failed to coerce into Float64Builder",
1380
- )
1381
- })?
1382
- .append_value(*x),
1383
- ParquetValue::Boolean(x) => typed_builder
1384
- .field_builder::<BooleanBuilder>(i)
1385
- .ok_or_else(|| {
1386
- MagnusError::new(
1387
- magnus::exception::type_error(),
1388
- "Failed to coerce into BooleanBuilder",
1389
- )
1390
- })?
1391
- .append_value(*x),
1392
- ParquetValue::String(x) => typed_builder
1393
- .field_builder::<StringBuilder>(i)
1394
- .ok_or_else(|| {
1395
- MagnusError::new(
1396
- magnus::exception::type_error(),
1397
- "Failed to coerce into StringBuilder",
1398
- )
1399
- })?
1400
- .append_value(x),
1401
- ParquetValue::Bytes(bytes) => typed_builder
1402
- .field_builder::<BinaryBuilder>(i)
1403
- .ok_or_else(|| {
1404
- MagnusError::new(
1405
- magnus::exception::type_error(),
1406
- "Failed to coerce into BinaryBuilder",
1407
- )
1408
- })?
1409
- .append_value(bytes),
1410
- ParquetValue::Decimal128(x, _scale) => typed_builder
1411
- .field_builder::<Decimal128Builder>(i)
1412
- .ok_or_else(|| {
1413
- MagnusError::new(
1414
- magnus::exception::type_error(),
1415
- "Failed to coerce into Decimal128Builder",
1416
- )
1417
- })?
1418
- .append_value(*x),
1419
- ParquetValue::Decimal256(x, _scale) => typed_builder
1420
- .field_builder::<Decimal256Builder>(i)
1421
- .ok_or_else(|| {
1422
- MagnusError::new(
1423
- magnus::exception::type_error(),
1424
- "Failed to coerce into Decimal256Builder",
1425
- )
1426
- })?
1427
- .append_value(*x),
1428
- ParquetValue::Date32(x) => typed_builder
1429
- .field_builder::<Date32Builder>(i)
1430
- .ok_or_else(|| {
1431
- MagnusError::new(
1432
- magnus::exception::type_error(),
1433
- "Failed to coerce into Date32Builder",
1434
- )
1435
- })?
1436
- .append_value(*x),
1437
- ParquetValue::Date64(x) => typed_builder
1438
- .field_builder::<Date64Builder>(i)
1439
- .ok_or_else(|| {
1440
- MagnusError::new(
1441
- magnus::exception::type_error(),
1442
- "Failed to coerce into Date64Builder",
1443
- )
1444
- })?
1445
- .append_value(*x),
1446
- ParquetValue::TimestampSecond(x, _tz) => typed_builder
1447
- .field_builder::<TimestampSecondBuilder>(i)
1448
- .ok_or_else(|| {
1449
- MagnusError::new(
1450
- magnus::exception::type_error(),
1451
- "Failed to coerce into TimestampSecondBuilder",
1452
- )
1453
- })?
1454
- .append_value(*x),
1455
- ParquetValue::TimestampMillis(x, _tz) => typed_builder
1456
- .field_builder::<TimestampMillisecondBuilder>(i)
1457
- .ok_or_else(|| {
1458
- MagnusError::new(
1459
- magnus::exception::type_error(),
1460
- "Failed to coerce into TimestampMillisecondBuilder",
1461
- )
1462
- })?
1463
- .append_value(*x),
1464
- ParquetValue::TimestampMicros(x, _tz) => typed_builder
1465
- .field_builder::<TimestampMicrosecondBuilder>(i)
1466
- .ok_or_else(|| {
1467
- MagnusError::new(
1468
- magnus::exception::type_error(),
1469
- "Failed to coerce into TimestampMicrosecondBuilder",
1470
- )
1471
- })?
1472
- .append_value(*x),
1473
- ParquetValue::TimestampNanos(x, _tz) => typed_builder
1474
- .field_builder::<TimestampNanosecondBuilder>(i)
1475
- .ok_or_else(|| {
1476
- MagnusError::new(
1477
- magnus::exception::type_error(),
1478
- "Failed to coerce into TimestampNanosecondBuilder",
1479
- )
1480
- })?
1481
- .append_value(*x),
1482
- ParquetValue::TimeMillis(x) => typed_builder
1483
- .field_builder::<Time32MillisecondBuilder>(i)
1484
- .ok_or_else(|| {
1485
- MagnusError::new(
1486
- magnus::exception::type_error(),
1487
- "Failed to coerce into Time32MillisecondBuilder",
1488
- )
1489
- })?
1490
- .append_value(*x),
1491
- ParquetValue::TimeMicros(x) => typed_builder
1492
- .field_builder::<Time64MicrosecondBuilder>(i)
1493
- .ok_or_else(|| {
1494
- MagnusError::new(
1495
- magnus::exception::type_error(),
1496
- "Failed to coerce into Time64MicrosecondBuilder",
1497
- )
1498
- })?
1499
- .append_value(*x),
1500
- ParquetValue::List(items) => {
1501
- let list_builder = typed_builder
1502
- .field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
1503
- .ok_or_else(|| {
1504
- MagnusError::new(
1505
- magnus::exception::type_error(),
1506
- "Failed to coerce into ListBuilder",
1507
- )
1508
- })?;
1509
- fill_builder(
1510
- list_builder.values(),
1511
- &struct_field.fields[i].type_,
1512
- items,
1513
- )?;
1514
- list_builder.append(true);
1515
- }
1516
- ParquetValue::Map(map_data) => {
1517
- let maybe_map_builder = typed_builder
1518
- .field_builder::<MapBuilder<
1519
- Box<dyn ArrayBuilder>,
1520
- Box<dyn ArrayBuilder>,
1521
- >>(i);
1522
-
1523
- if let Some(map_builder) = maybe_map_builder {
1524
- fill_builder(
1525
- map_builder,
1526
- &struct_field.fields[i].type_,
1527
- &[ParquetValue::Map(map_data.clone())],
1528
- )?;
1529
- map_builder.append(true).map_err(|e| {
1530
- MagnusError::new(
1531
- magnus::exception::runtime_error(),
1532
- format!("Failed to append map: {}", e),
1533
- )
1534
- })?;
1535
- } else {
1536
- let child_struct_builder = typed_builder
1537
- .field_builder::<StructBuilder>(i)
1538
- .ok_or_else(|| {
1539
- MagnusError::new(
1540
- magnus::exception::type_error(),
1541
- "Failed to coerce into StructBuilder",
1542
- )
1543
- })?;
1544
- fill_builder(
1545
- child_struct_builder,
1546
- &struct_field.fields[i].type_,
1547
- &[ParquetValue::Map(map_data.clone())],
1548
- )?;
1549
- }
1550
- }
1551
- ParquetValue::Null => match struct_field.fields[i].type_ {
1552
- ParquetSchemaType::Primitive(PrimitiveType::Int8) => typed_builder
1553
- .field_builder::<Int8Builder>(i)
1554
- .ok_or_else(|| {
1555
- MagnusError::new(
1556
- magnus::exception::type_error(),
1557
- "Failed to coerce into Int8Builder",
1558
- )
1559
- })?
1560
- .append_null(),
1561
- ParquetSchemaType::Primitive(PrimitiveType::Int16) => typed_builder
1562
- .field_builder::<Int16Builder>(i)
1563
- .ok_or_else(|| {
1564
- MagnusError::new(
1565
- magnus::exception::type_error(),
1566
- "Failed to coerce into Int16Builder",
1567
- )
1568
- })?
1569
- .append_null(),
1570
- ParquetSchemaType::Primitive(PrimitiveType::Int32) => typed_builder
1571
- .field_builder::<Int32Builder>(i)
1572
- .ok_or_else(|| {
1573
- MagnusError::new(
1574
- magnus::exception::type_error(),
1575
- "Failed to coerce into Int32Builder",
1576
- )
1577
- })?
1578
- .append_null(),
1579
- ParquetSchemaType::Primitive(PrimitiveType::Int64) => typed_builder
1580
- .field_builder::<Int64Builder>(i)
1581
- .ok_or_else(|| {
1582
- MagnusError::new(
1583
- magnus::exception::type_error(),
1584
- "Failed to coerce into Int64Builder",
1585
- )
1586
- })?
1587
- .append_null(),
1588
- ParquetSchemaType::Primitive(PrimitiveType::UInt8) => typed_builder
1589
- .field_builder::<UInt8Builder>(i)
1590
- .ok_or_else(|| {
1591
- MagnusError::new(
1592
- magnus::exception::type_error(),
1593
- "Failed to coerce into UInt8Builder",
1594
- )
1595
- })?
1596
- .append_null(),
1597
- ParquetSchemaType::Primitive(PrimitiveType::UInt16) => typed_builder
1598
- .field_builder::<UInt16Builder>(i)
1599
- .ok_or_else(|| {
1600
- MagnusError::new(
1601
- magnus::exception::type_error(),
1602
- "Failed to coerce into UInt16Builder",
1603
- )
1604
- })?
1605
- .append_null(),
1606
- ParquetSchemaType::Primitive(PrimitiveType::UInt32) => typed_builder
1607
- .field_builder::<UInt32Builder>(i)
1608
- .ok_or_else(|| {
1609
- MagnusError::new(
1610
- magnus::exception::type_error(),
1611
- "Failed to coerce into UInt32Builder",
1612
- )
1613
- })?
1614
- .append_null(),
1615
- ParquetSchemaType::Primitive(PrimitiveType::UInt64) => typed_builder
1616
- .field_builder::<UInt64Builder>(i)
1617
- .ok_or_else(|| {
1618
- MagnusError::new(
1619
- magnus::exception::type_error(),
1620
- "Failed to coerce into UInt64Builder",
1621
- )
1622
- })?
1623
- .append_null(),
1624
- ParquetSchemaType::Primitive(PrimitiveType::Float32) => typed_builder
1625
- .field_builder::<Float32Builder>(i)
1626
- .ok_or_else(|| {
1627
- MagnusError::new(
1628
- magnus::exception::type_error(),
1629
- "Failed to coerce into Float32Builder",
1630
- )
1631
- })?
1632
- .append_null(),
1633
- ParquetSchemaType::Primitive(PrimitiveType::Float64) => typed_builder
1634
- .field_builder::<Float64Builder>(i)
1635
- .ok_or_else(|| {
1636
- MagnusError::new(
1637
- magnus::exception::type_error(),
1638
- "Failed to coerce into Float64Builder",
1639
- )
1640
- })?
1641
- .append_null(),
1642
- ParquetSchemaType::Primitive(PrimitiveType::Decimal128(_, _)) => typed_builder
1643
- .field_builder::<Decimal128Builder>(i)
1644
- .ok_or_else(|| {
1645
- MagnusError::new(
1646
- magnus::exception::type_error(),
1647
- "Failed to coerce into Decimal128Builder",
1648
- )
1649
- })?
1650
- .append_null(),
1651
- ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_, _)) => typed_builder
1652
- .field_builder::<Decimal256Builder>(i)
1653
- .ok_or_else(|| {
1654
- MagnusError::new(
1655
- magnus::exception::type_error(),
1656
- "Failed to coerce into Decimal256Builder for Decimal256",
1657
- )
1658
- })?
1659
- .append_null(),
1660
- ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
1661
- .field_builder::<StringBuilder>(i)
1662
- .ok_or_else(|| {
1663
- MagnusError::new(
1664
- magnus::exception::type_error(),
1665
- "Failed to coerce into StringBuilder",
1666
- )
1667
- })?
1668
- .append_null(),
1669
- ParquetSchemaType::Primitive(PrimitiveType::Binary) => typed_builder
1670
- .field_builder::<BinaryBuilder>(i)
1671
- .ok_or_else(|| {
1672
- MagnusError::new(
1673
- magnus::exception::type_error(),
1674
- "Failed to coerce into BinaryBuilder",
1675
- )
1676
- })?
1677
- .append_null(),
1678
- ParquetSchemaType::Primitive(PrimitiveType::Boolean) => typed_builder
1679
- .field_builder::<BooleanBuilder>(i)
1680
- .ok_or_else(|| {
1681
- MagnusError::new(
1682
- magnus::exception::type_error(),
1683
- "Failed to coerce into BooleanBuilder",
1684
- )
1685
- })?
1686
- .append_null(),
1687
- ParquetSchemaType::Primitive(PrimitiveType::Date32) => typed_builder
1688
- .field_builder::<Date32Builder>(i)
1689
- .ok_or_else(|| {
1690
- MagnusError::new(
1691
- magnus::exception::type_error(),
1692
- "Failed to coerce into Date32Builder",
1693
- )
1694
- })?
1695
- .append_null(),
1696
- ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis) => typed_builder
1697
- .field_builder::<TimestampMillisecondBuilder>(i)
1698
- .ok_or_else(|| {
1699
- MagnusError::new(
1700
- magnus::exception::type_error(),
1701
- "Failed to coerce into TimestampMillisecondBuilder",
1702
- )
1703
- })?
1704
- .append_null(),
1705
- ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => typed_builder
1706
- .field_builder::<TimestampMicrosecondBuilder>(i)
1707
- .ok_or_else(|| {
1708
- MagnusError::new(
1709
- magnus::exception::type_error(),
1710
- "Failed to coerce into TimestampMicrosecondBuilder",
1711
- )
1712
- })?
1713
- .append_null(),
1714
- ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => typed_builder
1715
- .field_builder::<Time32MillisecondBuilder>(i)
1716
- .ok_or_else(|| {
1717
- MagnusError::new(
1718
- magnus::exception::type_error(),
1719
- "Failed to coerce into Time32MillisecondBuilder",
1720
- )
1721
- })?
1722
- .append_null(),
1723
- ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => typed_builder
1724
- .field_builder::<Time64MicrosecondBuilder>(i)
1725
- .ok_or_else(|| {
1726
- MagnusError::new(
1727
- magnus::exception::type_error(),
1728
- "Failed to coerce into Time64MicrosecondBuilder",
1729
- )
1730
- })?
1731
- .append_null(),
1732
- ParquetSchemaType::List(_) => typed_builder
1733
- .field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
1734
- .ok_or_else(|| {
1735
- MagnusError::new(
1736
- magnus::exception::type_error(),
1737
- "Failed to coerce into ListBuilder",
1738
- )
1739
- })?
1740
- .append(false),
1741
- ParquetSchemaType::Map(_) => {
1742
- typed_builder
1743
- .field_builder::<MapBuilder<
1744
- Box<dyn ArrayBuilder>,
1745
- Box<dyn ArrayBuilder>,
1746
- >>(i)
1747
- .ok_or_else(|| {
1748
- MagnusError::new(
1749
- magnus::exception::type_error(),
1750
- "Failed to coerce into MapBuilder",
1751
- )
1752
- })?
1753
- .append(false)
1754
- .map_err(|e| {
1755
- MagnusError::new(
1756
- magnus::exception::runtime_error(),
1757
- format!("Failed to append map: {}", e),
1758
- )
1759
- })?;
1760
- }
1761
- ParquetSchemaType::Struct(_) => typed_builder
1762
- .field_builder::<StructBuilder>(i)
1763
- .ok_or_else(|| {
1764
- MagnusError::new(
1765
- magnus::exception::type_error(),
1766
- "Failed to coerce into StructBuilder",
1767
- )
1768
- })?
1769
- .append_null(),
1770
- },
1771
- }
1772
- } else {
1773
- return Err(MagnusError::new(
1774
- magnus::exception::type_error(),
1775
- format!("Field {} not found in map", i),
1776
- ));
1777
- }
1778
- }
1779
- typed_builder.append(true);
1780
- }
1781
- other => {
1782
- return Err(MagnusError::new(
1783
- magnus::exception::type_error(),
1784
- format!("Expected ParquetValue::Map(...) or Null, got {:?}", other),
1785
- ));
1786
- }
1787
- }
1788
- }
1789
- Ok(())
1790
- }
1791
- }
1792
- }
1793
-
1794
- /// Creates a final Arrow array from a list of ParquetValues and a schema type.
1795
- /// This is your "unified" way to handle any nesting level.
1796
- pub fn convert_parquet_values_to_arrow(
1797
- values: Vec<ParquetValue>,
1798
- type_: &ParquetSchemaType,
1799
- ) -> Result<Arc<dyn Array>, ParquetGemError> {
1800
- // Make sure we always have at least capacity 1 to avoid empty builders
1801
- let capacity = if values.is_empty() { 1 } else { values.len() };
1802
- let mut builder = create_arrow_builder_for_type(type_, Some(capacity))?;
1803
-
1804
- fill_builder(&mut builder, type_, &values)?;
1805
-
1806
- // Finish building the array
1807
- let array = builder.finish();
1808
-
1809
- Ok(Arc::new(array))
1810
- }
1811
-
1812
- pub fn convert_ruby_array_to_arrow(
1813
- ruby: &Ruby,
1814
- values: RArray,
1815
- type_: &ParquetSchemaType,
1816
- ) -> Result<Arc<dyn Array>, ParquetGemError> {
1817
- let mut parquet_values = Vec::with_capacity(values.len());
1818
- for value in values {
1819
- if value.is_nil() {
1820
- parquet_values.push(ParquetValue::Null);
1821
- continue;
1822
- }
1823
- let parquet_value = ParquetValue::from_value(ruby, value, type_, None)?;
1824
- parquet_values.push(parquet_value);
1825
- }
1826
- convert_parquet_values_to_arrow(parquet_values, type_)
1827
- }
1828
-
1829
- pub fn convert_to_time_millis(
1830
- ruby: &Ruby,
1831
- value: Value,
1832
- format: Option<&str>,
1833
- ) -> Result<i32, MagnusError> {
1834
- if value.is_kind_of(ruby.class_time()) {
1835
- // Extract time components
1836
- let hour = i32::try_convert(value.funcall::<_, _, Value>("hour", ())?)?;
1837
- let min = i32::try_convert(value.funcall::<_, _, Value>("min", ())?)?;
1838
- let sec = i32::try_convert(value.funcall::<_, _, Value>("sec", ())?)?;
1839
- let usec = i32::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
1840
-
1841
- // Convert to milliseconds since midnight
1842
- Ok(hour * 3600000 + min * 60000 + sec * 1000 + usec / 1000)
1843
- } else if value.is_kind_of(ruby.class_string()) {
1844
- let s = String::try_convert(value)?;
1845
-
1846
- if let Some(fmt) = format {
1847
- // Parse using the provided format
1848
- match jiff::civil::Time::strptime(fmt, &s) {
1849
- Ok(time) => {
1850
- let millis = time.hour() as i32 * 3600000
1851
- + time.minute() as i32 * 60000
1852
- + time.second() as i32 * 1000
1853
- + time.millisecond() as i32;
1854
- Ok(millis)
1855
- }
1856
- Err(e) => Err(MagnusError::new(
1857
- magnus::exception::type_error(),
1858
- format!(
1859
- "Failed to parse '{}' with format '{}' as time: {}",
1860
- s, fmt, e
1861
- ),
1862
- )),
1863
- }
1864
- } else {
1865
- // Try to parse as standard time format
1866
- match s.parse::<jiff::civil::Time>() {
1867
- Ok(time) => {
1868
- let millis = time.hour() as i32 * 3600000
1869
- + time.minute() as i32 * 60000
1870
- + time.second() as i32 * 1000
1871
- + time.millisecond() as i32;
1872
- Ok(millis)
1873
- }
1874
- Err(e) => Err(MagnusError::new(
1875
- magnus::exception::type_error(),
1876
- format!("Failed to parse '{}' as time: {}", s, e),
1877
- )),
1878
- }
1879
- }
1880
- } else {
1881
- Err(MagnusError::new(
1882
- magnus::exception::type_error(),
1883
- format!("Cannot convert {} to time_millis", unsafe {
1884
- value.classname()
1885
- }),
1886
- ))
1887
- }
1888
- }
1889
-
1890
- pub fn convert_to_time_micros(
1891
- ruby: &Ruby,
1892
- value: Value,
1893
- format: Option<&str>,
1894
- ) -> Result<i64, MagnusError> {
1895
- if value.is_kind_of(ruby.class_time()) {
1896
- // Extract time components
1897
- let hour = i64::try_convert(value.funcall::<_, _, Value>("hour", ())?)?;
1898
- let min = i64::try_convert(value.funcall::<_, _, Value>("min", ())?)?;
1899
- let sec = i64::try_convert(value.funcall::<_, _, Value>("sec", ())?)?;
1900
- let usec = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
1901
-
1902
- // Convert to microseconds since midnight
1903
- Ok(hour * 3600000000 + min * 60000000 + sec * 1000000 + usec)
1904
- } else if value.is_kind_of(ruby.class_string()) {
1905
- let s = String::try_convert(value)?;
1906
-
1907
- if let Some(fmt) = format {
1908
- // Parse using the provided format
1909
- match jiff::civil::Time::strptime(fmt, &s) {
1910
- Ok(time) => {
1911
- let micros = time.hour() as i64 * 3600000000
1912
- + time.minute() as i64 * 60000000
1913
- + time.second() as i64 * 1000000
1914
- + time.microsecond() as i64;
1915
- Ok(micros)
1916
- }
1917
- Err(e) => Err(MagnusError::new(
1918
- magnus::exception::type_error(),
1919
- format!(
1920
- "Failed to parse '{}' with format '{}' as time: {}",
1921
- s, fmt, e
1922
- ),
1923
- )),
1924
- }
1925
- } else {
1926
- // Try to parse as standard time format
1927
- match s.parse::<jiff::civil::Time>() {
1928
- Ok(time) => {
1929
- let micros = time.hour() as i64 * 3600000000
1930
- + time.minute() as i64 * 60000000
1931
- + time.second() as i64 * 1000000
1932
- + time.microsecond() as i64;
1933
- Ok(micros)
1934
- }
1935
- Err(e) => Err(MagnusError::new(
1936
- magnus::exception::type_error(),
1937
- format!("Failed to parse '{}' as time: {}", s, e),
1938
- )),
1939
- }
1940
- }
1941
- } else {
1942
- Err(MagnusError::new(
1943
- magnus::exception::type_error(),
1944
- format!("Cannot convert {} to time_micros", unsafe {
1945
- value.classname()
1946
- }),
1947
- ))
1948
- }
1949
- }