parquet 0.5.12 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +295 -98
- data/Cargo.toml +1 -1
- data/Gemfile +1 -0
- data/README.md +94 -3
- data/ext/parquet/Cargo.toml +8 -5
- data/ext/parquet/src/adapter_ffi.rs +156 -0
- data/ext/parquet/src/lib.rs +13 -21
- data/ext/parquet-core/Cargo.toml +23 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
- data/ext/parquet-core/src/error.rs +163 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +263 -0
- data/ext/parquet-core/src/schema.rs +283 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +151 -0
- data/ext/parquet-core/src/value.rs +209 -0
- data/ext/parquet-core/src/writer.rs +839 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +430 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
- data/ext/parquet-ruby-adapter/src/error.rs +148 -0
- data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
- data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +94 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
- data/lib/parquet/schema.rb +19 -0
- data/lib/parquet/version.rb +1 -1
- metadata +50 -24
- data/ext/parquet/src/enumerator.rs +0 -68
- data/ext/parquet/src/header_cache.rs +0 -99
- data/ext/parquet/src/logger.rs +0 -171
- data/ext/parquet/src/reader/common.rs +0 -111
- data/ext/parquet/src/reader/mod.rs +0 -211
- data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
- data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
- data/ext/parquet/src/reader/unified/mod.rs +0 -363
- data/ext/parquet/src/types/core_types.rs +0 -120
- data/ext/parquet/src/types/mod.rs +0 -100
- data/ext/parquet/src/types/parquet_value.rs +0 -1275
- data/ext/parquet/src/types/record_types.rs +0 -603
- data/ext/parquet/src/types/schema_converter.rs +0 -290
- data/ext/parquet/src/types/schema_node.rs +0 -424
- data/ext/parquet/src/types/timestamp.rs +0 -285
- data/ext/parquet/src/types/type_conversion.rs +0 -1949
- data/ext/parquet/src/types/writer_types.rs +0 -329
- data/ext/parquet/src/utils.rs +0 -184
- data/ext/parquet/src/writer/mod.rs +0 -505
- data/ext/parquet/src/writer/write_columns.rs +0 -238
- data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,423 @@
|
|
1
|
+
use arrow_array::*;
|
2
|
+
use arrow_schema::{DataType, Field, TimeUnit};
|
3
|
+
use bytes::Bytes;
|
4
|
+
use num::BigInt;
|
5
|
+
use ordered_float::OrderedFloat;
|
6
|
+
use parquet_core::arrow_conversion::{arrow_to_parquet_value, parquet_values_to_arrow_array};
|
7
|
+
use parquet_core::*;
|
8
|
+
use std::sync::Arc;
|
9
|
+
|
10
|
+
#[test]
|
11
|
+
fn test_float16_conversion() {
|
12
|
+
let values = vec![
|
13
|
+
ParquetValue::Float16(OrderedFloat(1.0f32)),
|
14
|
+
ParquetValue::Float16(OrderedFloat(-2.5f32)),
|
15
|
+
ParquetValue::Float16(OrderedFloat(0.0f32)),
|
16
|
+
ParquetValue::Null,
|
17
|
+
];
|
18
|
+
|
19
|
+
// Test upcast to Float32
|
20
|
+
let field = Field::new("test", DataType::Float32, true);
|
21
|
+
let array = parquet_values_to_arrow_array(values.clone(), &field).unwrap();
|
22
|
+
assert_eq!(array.len(), 4);
|
23
|
+
|
24
|
+
let float_array = array.as_any().downcast_ref::<Float32Array>().unwrap();
|
25
|
+
assert_eq!(float_array.value(0), 1.0);
|
26
|
+
assert_eq!(float_array.value(1), -2.5);
|
27
|
+
assert_eq!(float_array.value(2), 0.0);
|
28
|
+
assert!(float_array.is_null(3));
|
29
|
+
|
30
|
+
// Test upcast to Float64
|
31
|
+
let field = Field::new("test", DataType::Float64, true);
|
32
|
+
let array = parquet_values_to_arrow_array(values, &field).unwrap();
|
33
|
+
let float_array = array.as_any().downcast_ref::<Float64Array>().unwrap();
|
34
|
+
assert_eq!(float_array.value(0), 1.0);
|
35
|
+
assert_eq!(float_array.value(1), -2.5);
|
36
|
+
assert_eq!(float_array.value(2), 0.0);
|
37
|
+
}
|
38
|
+
|
39
|
+
#[test]
|
40
|
+
fn test_fixed_size_binary_conversion() {
|
41
|
+
let uuid_bytes = Bytes::from(vec![
|
42
|
+
0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
|
43
|
+
0x88,
|
44
|
+
]);
|
45
|
+
|
46
|
+
let values = vec![
|
47
|
+
ParquetValue::Bytes(uuid_bytes.clone()),
|
48
|
+
ParquetValue::Bytes(Bytes::from(vec![0u8; 16])),
|
49
|
+
ParquetValue::Null,
|
50
|
+
];
|
51
|
+
|
52
|
+
let field = Field::new("uuid", DataType::FixedSizeBinary(16), true);
|
53
|
+
let array = parquet_values_to_arrow_array(values, &field).unwrap();
|
54
|
+
|
55
|
+
let fixed_array = array
|
56
|
+
.as_any()
|
57
|
+
.downcast_ref::<FixedSizeBinaryArray>()
|
58
|
+
.unwrap();
|
59
|
+
assert_eq!(fixed_array.value(0), uuid_bytes.as_ref());
|
60
|
+
assert_eq!(fixed_array.value(1), vec![0u8; 16]);
|
61
|
+
assert!(fixed_array.is_null(2));
|
62
|
+
}
|
63
|
+
|
64
|
+
#[test]
|
65
|
+
fn test_fixed_size_binary_wrong_size_error() {
|
66
|
+
let values = vec![
|
67
|
+
ParquetValue::Bytes(Bytes::from(vec![1, 2, 3])), // Wrong size
|
68
|
+
];
|
69
|
+
|
70
|
+
let field = Field::new("test", DataType::FixedSizeBinary(16), true);
|
71
|
+
let result = parquet_values_to_arrow_array(values, &field);
|
72
|
+
|
73
|
+
assert!(result.is_err());
|
74
|
+
assert!(result
|
75
|
+
.unwrap_err()
|
76
|
+
.to_string()
|
77
|
+
.contains("Fixed size binary expected 16 bytes, got 3"));
|
78
|
+
}
|
79
|
+
|
80
|
+
#[test]
|
81
|
+
fn test_decimal256_large_values() {
|
82
|
+
// Test very large Decimal256 values
|
83
|
+
let large_positive = BigInt::parse_bytes(
|
84
|
+
b"99999999999999999999999999999999999999999999999999999999999999999999999999",
|
85
|
+
10,
|
86
|
+
)
|
87
|
+
.unwrap();
|
88
|
+
let large_negative = -large_positive.clone();
|
89
|
+
|
90
|
+
let values = vec![
|
91
|
+
ParquetValue::Decimal256(large_positive.clone(), 0),
|
92
|
+
ParquetValue::Decimal256(large_negative.clone(), 0),
|
93
|
+
ParquetValue::Decimal256(BigInt::from(0), 0),
|
94
|
+
ParquetValue::Null,
|
95
|
+
];
|
96
|
+
|
97
|
+
let field = Field::new("test", DataType::Decimal256(76, 0), true);
|
98
|
+
let array = parquet_values_to_arrow_array(values, &field).unwrap();
|
99
|
+
|
100
|
+
// Verify roundtrip
|
101
|
+
for i in 0..4 {
|
102
|
+
let value = arrow_to_parquet_value(array.as_ref(), i).unwrap();
|
103
|
+
match (i, value) {
|
104
|
+
(0, ParquetValue::Decimal256(v, _)) => assert_eq!(v, large_positive.clone()),
|
105
|
+
(1, ParquetValue::Decimal256(v, _)) => assert_eq!(v, large_negative.clone()),
|
106
|
+
(2, ParquetValue::Decimal256(v, _)) => assert_eq!(v, BigInt::from(0)),
|
107
|
+
(3, ParquetValue::Null) => {}
|
108
|
+
_ => panic!("Unexpected value"),
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
#[test]
|
114
|
+
fn test_decimal256_too_large_error() {
|
115
|
+
// Create a value that's too large for 256 bits
|
116
|
+
let too_large = BigInt::from(2).pow(256);
|
117
|
+
|
118
|
+
let values = vec![ParquetValue::Decimal256(too_large, 0)];
|
119
|
+
|
120
|
+
let field = Field::new("test", DataType::Decimal256(76, 0), true);
|
121
|
+
let result = parquet_values_to_arrow_array(values, &field);
|
122
|
+
|
123
|
+
assert!(result.is_err());
|
124
|
+
assert!(result
|
125
|
+
.unwrap_err()
|
126
|
+
.to_string()
|
127
|
+
.contains("Decimal256 value too large"));
|
128
|
+
}
|
129
|
+
|
130
|
+
#[test]
|
131
|
+
fn test_time_type_conversions() {
|
132
|
+
// Test TimeMillis
|
133
|
+
let values_millis = vec![
|
134
|
+
ParquetValue::TimeMillis(12345),
|
135
|
+
ParquetValue::TimeMillis(0),
|
136
|
+
ParquetValue::TimeMillis(86399999), // Last millisecond of day
|
137
|
+
ParquetValue::Null,
|
138
|
+
];
|
139
|
+
|
140
|
+
let field = Field::new("time", DataType::Time32(TimeUnit::Millisecond), true);
|
141
|
+
let array = parquet_values_to_arrow_array(values_millis, &field).unwrap();
|
142
|
+
assert_eq!(array.len(), 4);
|
143
|
+
|
144
|
+
// Test TimeMicros
|
145
|
+
let values_micros = vec![
|
146
|
+
ParquetValue::TimeMicros(12345678),
|
147
|
+
ParquetValue::TimeMicros(0),
|
148
|
+
ParquetValue::TimeMicros(86399999999), // Last microsecond of day
|
149
|
+
ParquetValue::Null,
|
150
|
+
];
|
151
|
+
|
152
|
+
let field = Field::new("time", DataType::Time64(TimeUnit::Microsecond), true);
|
153
|
+
let array = parquet_values_to_arrow_array(values_micros, &field).unwrap();
|
154
|
+
assert_eq!(array.len(), 4);
|
155
|
+
}
|
156
|
+
|
157
|
+
#[test]
|
158
|
+
fn test_timestamp_with_timezone() {
|
159
|
+
let tz = Some(Arc::from("America/New_York"));
|
160
|
+
|
161
|
+
let values = vec![
|
162
|
+
ParquetValue::TimestampMillis(1234567890123, tz.clone()),
|
163
|
+
ParquetValue::TimestampMillis(0, tz.clone()),
|
164
|
+
ParquetValue::Null,
|
165
|
+
];
|
166
|
+
|
167
|
+
let field = Field::new(
|
168
|
+
"ts",
|
169
|
+
DataType::Timestamp(TimeUnit::Millisecond, Some("America/New_York".into())),
|
170
|
+
true,
|
171
|
+
);
|
172
|
+
let array = parquet_values_to_arrow_array(values, &field).unwrap();
|
173
|
+
|
174
|
+
// Verify roundtrip preserves timezone
|
175
|
+
for i in 0..3 {
|
176
|
+
let value = arrow_to_parquet_value(array.as_ref(), i).unwrap();
|
177
|
+
match value {
|
178
|
+
ParquetValue::TimestampMillis(_, Some(tz)) => {
|
179
|
+
assert_eq!(tz.as_ref(), "America/New_York");
|
180
|
+
}
|
181
|
+
ParquetValue::Null => assert_eq!(i, 2),
|
182
|
+
_ => panic!("Unexpected value"),
|
183
|
+
}
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
#[test]
|
188
|
+
fn test_nested_list_of_lists() {
|
189
|
+
// Create a list of lists: [[1, 2], [3], [], null, [4, 5, 6]]
|
190
|
+
let inner_lists = vec![
|
191
|
+
ParquetValue::List(vec![ParquetValue::Int32(1), ParquetValue::Int32(2)]),
|
192
|
+
ParquetValue::List(vec![ParquetValue::Int32(3)]),
|
193
|
+
ParquetValue::List(vec![]),
|
194
|
+
ParquetValue::Null,
|
195
|
+
ParquetValue::List(vec![
|
196
|
+
ParquetValue::Int32(4),
|
197
|
+
ParquetValue::Int32(5),
|
198
|
+
ParquetValue::Int32(6),
|
199
|
+
]),
|
200
|
+
];
|
201
|
+
|
202
|
+
let values = vec![ParquetValue::List(inner_lists)];
|
203
|
+
|
204
|
+
let inner_field = Field::new("item", DataType::Int32, false);
|
205
|
+
let list_field = Field::new("inner_list", DataType::List(Arc::new(inner_field)), true);
|
206
|
+
let outer_field = Field::new("outer_list", DataType::List(Arc::new(list_field)), false);
|
207
|
+
|
208
|
+
let array = parquet_values_to_arrow_array(values, &outer_field).unwrap();
|
209
|
+
assert_eq!(array.len(), 1);
|
210
|
+
|
211
|
+
// Verify roundtrip
|
212
|
+
let value = arrow_to_parquet_value(array.as_ref(), 0).unwrap();
|
213
|
+
match value {
|
214
|
+
ParquetValue::List(items) => assert_eq!(items.len(), 5),
|
215
|
+
_ => panic!("Expected list"),
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
#[test]
|
220
|
+
fn test_map_with_null_values() {
|
221
|
+
let map_entries = vec![
|
222
|
+
(
|
223
|
+
ParquetValue::String(Arc::from("key1")),
|
224
|
+
ParquetValue::Int32(100),
|
225
|
+
),
|
226
|
+
(ParquetValue::String(Arc::from("key2")), ParquetValue::Null),
|
227
|
+
(
|
228
|
+
ParquetValue::String(Arc::from("key3")),
|
229
|
+
ParquetValue::Int32(300),
|
230
|
+
),
|
231
|
+
];
|
232
|
+
|
233
|
+
let values = vec![ParquetValue::Map(map_entries), ParquetValue::Null];
|
234
|
+
|
235
|
+
let key_field = Field::new("key", DataType::Utf8, false);
|
236
|
+
let value_field = Field::new("value", DataType::Int32, true);
|
237
|
+
let entries_field = Field::new(
|
238
|
+
"entries",
|
239
|
+
DataType::Struct(vec![key_field, value_field].into()),
|
240
|
+
false,
|
241
|
+
);
|
242
|
+
let map_field = Field::new("map", DataType::Map(Arc::new(entries_field), false), true);
|
243
|
+
|
244
|
+
let array = parquet_values_to_arrow_array(values, &map_field).unwrap();
|
245
|
+
assert_eq!(array.len(), 2);
|
246
|
+
|
247
|
+
// Verify the map was created correctly
|
248
|
+
let map_array = array.as_any().downcast_ref::<MapArray>().unwrap();
|
249
|
+
assert!(!map_array.is_null(0));
|
250
|
+
assert!(map_array.is_null(1));
|
251
|
+
}
|
252
|
+
|
253
|
+
#[test]
|
254
|
+
fn test_struct_with_missing_fields() {
|
255
|
+
use indexmap::IndexMap;
|
256
|
+
|
257
|
+
// Create a struct with some fields missing
|
258
|
+
let mut record1 = IndexMap::new();
|
259
|
+
record1.insert(
|
260
|
+
Arc::from("field1"),
|
261
|
+
ParquetValue::String(Arc::from("value1")),
|
262
|
+
);
|
263
|
+
// field2 is missing
|
264
|
+
record1.insert(Arc::from("field3"), ParquetValue::Int32(42));
|
265
|
+
|
266
|
+
let mut record2 = IndexMap::new();
|
267
|
+
record2.insert(
|
268
|
+
Arc::from("field1"),
|
269
|
+
ParquetValue::String(Arc::from("value2")),
|
270
|
+
);
|
271
|
+
record2.insert(Arc::from("field2"), ParquetValue::Boolean(true));
|
272
|
+
record2.insert(Arc::from("field3"), ParquetValue::Int32(99));
|
273
|
+
|
274
|
+
let values = vec![
|
275
|
+
ParquetValue::Record(record1),
|
276
|
+
ParquetValue::Record(record2),
|
277
|
+
ParquetValue::Null,
|
278
|
+
];
|
279
|
+
|
280
|
+
let fields = vec![
|
281
|
+
Field::new("field1", DataType::Utf8, false),
|
282
|
+
Field::new("field2", DataType::Boolean, true), // nullable to handle missing
|
283
|
+
Field::new("field3", DataType::Int32, false),
|
284
|
+
];
|
285
|
+
|
286
|
+
let struct_field = Field::new("struct", DataType::Struct(fields.into()), true);
|
287
|
+
let array = parquet_values_to_arrow_array(values, &struct_field).unwrap();
|
288
|
+
|
289
|
+
let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
|
290
|
+
assert_eq!(struct_array.len(), 3);
|
291
|
+
|
292
|
+
// Verify field2 is null for first record
|
293
|
+
let field2_array = struct_array
|
294
|
+
.column(1)
|
295
|
+
.as_any()
|
296
|
+
.downcast_ref::<BooleanArray>()
|
297
|
+
.unwrap();
|
298
|
+
assert!(field2_array.is_null(0));
|
299
|
+
assert!(!field2_array.is_null(1));
|
300
|
+
}
|
301
|
+
|
302
|
+
#[test]
|
303
|
+
fn test_type_mismatch_errors() {
|
304
|
+
// Test various type mismatches
|
305
|
+
|
306
|
+
// Boolean field expecting String value
|
307
|
+
let values = vec![ParquetValue::String(Arc::from("not a boolean"))];
|
308
|
+
let field = Field::new("test", DataType::Boolean, false);
|
309
|
+
let result = parquet_values_to_arrow_array(values, &field);
|
310
|
+
assert!(result.is_err());
|
311
|
+
let error_msg = result.unwrap_err().to_string();
|
312
|
+
assert!(
|
313
|
+
error_msg.contains("Expected Boolean") && error_msg.contains("String"),
|
314
|
+
"Error message was: {}",
|
315
|
+
error_msg
|
316
|
+
);
|
317
|
+
|
318
|
+
// Int32 field expecting Float value
|
319
|
+
let values = vec![ParquetValue::Float32(OrderedFloat(3.14))];
|
320
|
+
let field = Field::new("test", DataType::Int32, false);
|
321
|
+
let result = parquet_values_to_arrow_array(values, &field);
|
322
|
+
assert!(result.is_err());
|
323
|
+
let error_msg = result.unwrap_err().to_string();
|
324
|
+
assert!(
|
325
|
+
error_msg.contains("Expected Int32") && error_msg.contains("Float32"),
|
326
|
+
"Error message was: {}",
|
327
|
+
error_msg
|
328
|
+
);
|
329
|
+
|
330
|
+
// List field expecting non-list value
|
331
|
+
let values = vec![ParquetValue::Int32(42)];
|
332
|
+
let item_field = Field::new("item", DataType::Int32, false);
|
333
|
+
let list_field = Field::new("list", DataType::List(Arc::new(item_field)), false);
|
334
|
+
let result = parquet_values_to_arrow_array(values, &list_field);
|
335
|
+
assert!(result.is_err());
|
336
|
+
let error_msg = result.unwrap_err().to_string();
|
337
|
+
assert!(
|
338
|
+
error_msg.contains("Expected List") && error_msg.contains("Int32"),
|
339
|
+
"Error message was: {}",
|
340
|
+
error_msg
|
341
|
+
);
|
342
|
+
}
|
343
|
+
|
344
|
+
#[test]
|
345
|
+
fn test_unsupported_arrow_types() {
|
346
|
+
// Test arrow_to_parquet_value with unsupported types
|
347
|
+
// Create a simple union type
|
348
|
+
let type_ids = arrow_buffer::ScalarBuffer::from(vec![0i8, 0, 0]);
|
349
|
+
let fields = vec![Arc::new(Field::new("int", DataType::Int32, false))];
|
350
|
+
let union_fields = arrow_schema::UnionFields::new(vec![0], fields);
|
351
|
+
|
352
|
+
let array = arrow_array::UnionArray::try_new(
|
353
|
+
union_fields,
|
354
|
+
type_ids,
|
355
|
+
None,
|
356
|
+
vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
|
357
|
+
)
|
358
|
+
.unwrap();
|
359
|
+
|
360
|
+
let result = arrow_to_parquet_value(&array, 0);
|
361
|
+
assert!(result.is_err());
|
362
|
+
assert!(result
|
363
|
+
.unwrap_err()
|
364
|
+
.to_string()
|
365
|
+
.contains("Unsupported data type for conversion"));
|
366
|
+
}
|
367
|
+
|
368
|
+
#[test]
|
369
|
+
fn test_integer_overflow_prevention() {
|
370
|
+
// Test that we can't upcast a value that would overflow
|
371
|
+
let values = vec![ParquetValue::Int64(i64::MAX), ParquetValue::Int64(i64::MIN)];
|
372
|
+
|
373
|
+
// These should work fine in Int64
|
374
|
+
let field = Field::new("test", DataType::Int64, false);
|
375
|
+
let array = parquet_values_to_arrow_array(values, &field).unwrap();
|
376
|
+
let int_array = array.as_any().downcast_ref::<Int64Array>().unwrap();
|
377
|
+
assert_eq!(int_array.value(0), i64::MAX);
|
378
|
+
assert_eq!(int_array.value(1), i64::MIN);
|
379
|
+
}
|
380
|
+
|
381
|
+
#[test]
|
382
|
+
fn test_empty_collections() {
|
383
|
+
// Test empty list
|
384
|
+
let values = vec![ParquetValue::List(vec![])];
|
385
|
+
let field = Field::new(
|
386
|
+
"list",
|
387
|
+
DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
|
388
|
+
false,
|
389
|
+
);
|
390
|
+
let array = parquet_values_to_arrow_array(values.clone(), &field).unwrap();
|
391
|
+
let list_array = array.as_any().downcast_ref::<ListArray>().unwrap();
|
392
|
+
assert_eq!(list_array.value(0).len(), 0);
|
393
|
+
|
394
|
+
// Test empty map
|
395
|
+
let values = vec![ParquetValue::Map(vec![])];
|
396
|
+
let key_field = Field::new("key", DataType::Utf8, false);
|
397
|
+
let value_field = Field::new("value", DataType::Int32, true);
|
398
|
+
let entries_field = Field::new(
|
399
|
+
"entries",
|
400
|
+
DataType::Struct(vec![key_field, value_field].into()),
|
401
|
+
false,
|
402
|
+
);
|
403
|
+
let map_field = Field::new("map", DataType::Map(Arc::new(entries_field), false), false);
|
404
|
+
let array = parquet_values_to_arrow_array(values, &map_field).unwrap();
|
405
|
+
let map_array = array.as_any().downcast_ref::<MapArray>().unwrap();
|
406
|
+
assert_eq!(map_array.value(0).len(), 0);
|
407
|
+
|
408
|
+
// Test empty struct (all fields null)
|
409
|
+
use indexmap::IndexMap;
|
410
|
+
let empty_record = IndexMap::new();
|
411
|
+
let values = vec![ParquetValue::Record(empty_record)];
|
412
|
+
let fields = vec![
|
413
|
+
Field::new("field1", DataType::Utf8, true),
|
414
|
+
Field::new("field2", DataType::Int32, true),
|
415
|
+
];
|
416
|
+
let struct_field = Field::new("struct", DataType::Struct(fields.into()), false);
|
417
|
+
let array = parquet_values_to_arrow_array(values, &struct_field).unwrap();
|
418
|
+
let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
|
419
|
+
|
420
|
+
// All fields should be null
|
421
|
+
assert!(struct_array.column(0).is_null(0));
|
422
|
+
assert!(struct_array.column(1).is_null(0));
|
423
|
+
}
|