parquet-tyfoom 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +1854 -0
  3. data/Cargo.toml +3 -0
  4. data/Gemfile +21 -0
  5. data/LICENSE +21 -0
  6. data/README.md +428 -0
  7. data/Rakefile +43 -0
  8. data/ext/parquet/Cargo.toml +39 -0
  9. data/ext/parquet/build.rs +5 -0
  10. data/ext/parquet/extconf.rb +4 -0
  11. data/ext/parquet/src/adapter_ffi.rs +297 -0
  12. data/ext/parquet/src/allocator.rs +13 -0
  13. data/ext/parquet/src/lib.rs +24 -0
  14. data/ext/parquet-core/Cargo.toml +24 -0
  15. data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
  16. data/ext/parquet-core/src/error.rs +189 -0
  17. data/ext/parquet-core/src/lib.rs +60 -0
  18. data/ext/parquet-core/src/reader.rs +368 -0
  19. data/ext/parquet-core/src/schema.rs +452 -0
  20. data/ext/parquet-core/src/test_utils.rs +308 -0
  21. data/ext/parquet-core/src/traits/mod.rs +5 -0
  22. data/ext/parquet-core/src/traits/schema.rs +190 -0
  23. data/ext/parquet-core/src/value.rs +220 -0
  24. data/ext/parquet-core/src/writer.rs +1241 -0
  25. data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
  26. data/ext/parquet-core/tests/binary_data.rs +437 -0
  27. data/ext/parquet-core/tests/column_projection.rs +557 -0
  28. data/ext/parquet-core/tests/complex_types.rs +821 -0
  29. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  30. data/ext/parquet-core/tests/concurrent_access.rs +431 -0
  31. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  32. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  33. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
  34. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  35. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  36. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  37. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  38. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  39. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  40. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
  41. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  42. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  43. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  44. data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
  45. data/ext/parquet-ruby-adapter/build.rs +5 -0
  46. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  47. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  48. data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
  49. data/ext/parquet-ruby-adapter/src/error.rs +141 -0
  50. data/ext/parquet-ruby-adapter/src/io.rs +432 -0
  51. data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
  52. data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
  53. data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
  54. data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
  55. data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
  56. data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
  57. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  58. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  59. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  60. data/ext/parquet-ruby-adapter/src/types.rs +98 -0
  61. data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
  62. data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
  63. data/lib/parquet/schema.rb +262 -0
  64. data/lib/parquet/version.rb +3 -0
  65. data/lib/parquet.rb +11 -0
  66. data/lib/parquet.rbi +181 -0
  67. metadata +165 -0
@@ -0,0 +1,484 @@
1
+ use arrow_array::*;
2
+ use arrow_schema::{DataType, Field, TimeUnit};
3
+ use bytes::Bytes;
4
+ use num::BigInt;
5
+ use ordered_float::OrderedFloat;
6
+ use parquet::schema::types::Type;
7
+ use parquet_core::arrow_conversion::{arrow_to_parquet_value, parquet_values_to_arrow_array};
8
+ use parquet_core::*;
9
+ use std::sync::Arc as StdArc;
10
+ use triomphe::Arc;
11
+
12
+ #[test]
13
+ fn test_float16_conversion() {
14
+ let values = vec![
15
+ ParquetValue::Float16(OrderedFloat(1.0f32)),
16
+ ParquetValue::Float16(OrderedFloat(-2.5f32)),
17
+ ParquetValue::Float16(OrderedFloat(0.0f32)),
18
+ ParquetValue::Null,
19
+ ];
20
+
21
+ // Test upcast to Float32
22
+ let field = Field::new("test", DataType::Float32, true);
23
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
24
+ assert_eq!(array.len(), 4);
25
+
26
+ let float_array = array.as_any().downcast_ref::<Float32Array>().unwrap();
27
+ assert_eq!(float_array.value(0), 1.0);
28
+ assert_eq!(float_array.value(1), -2.5);
29
+ assert_eq!(float_array.value(2), 0.0);
30
+ assert!(float_array.is_null(3));
31
+
32
+ // Test upcast to Float64
33
+ let field = Field::new("test", DataType::Float64, true);
34
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
35
+ let float_array = array.as_any().downcast_ref::<Float64Array>().unwrap();
36
+ assert_eq!(float_array.value(0), 1.0);
37
+ assert_eq!(float_array.value(1), -2.5);
38
+ assert_eq!(float_array.value(2), 0.0);
39
+ }
40
+
41
+ #[test]
42
+ fn test_fixed_size_binary_conversion() {
43
+ let uuid_bytes = Bytes::from(vec![
44
+ 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
45
+ 0x88,
46
+ ]);
47
+
48
+ let values = vec![
49
+ ParquetValue::Bytes(uuid_bytes.clone()),
50
+ ParquetValue::Bytes(Bytes::from(vec![0u8; 16])),
51
+ ParquetValue::Null,
52
+ ];
53
+
54
+ let field = Field::new("uuid", DataType::FixedSizeBinary(16), true);
55
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
56
+
57
+ let fixed_array = array
58
+ .as_any()
59
+ .downcast_ref::<FixedSizeBinaryArray>()
60
+ .unwrap();
61
+ assert_eq!(fixed_array.value(0), uuid_bytes.as_ref());
62
+ assert_eq!(fixed_array.value(1), vec![0u8; 16]);
63
+ assert!(fixed_array.is_null(2));
64
+ }
65
+
66
+ #[test]
67
+ fn test_fixed_size_binary_wrong_size_error() {
68
+ let values = vec![
69
+ ParquetValue::Bytes(Bytes::from(vec![1, 2, 3])), // Wrong size
70
+ ];
71
+
72
+ let field = Field::new("test", DataType::FixedSizeBinary(16), true);
73
+ let result = parquet_values_to_arrow_array(&values, &field);
74
+
75
+ assert!(result.is_err());
76
+ assert!(result
77
+ .unwrap_err()
78
+ .to_string()
79
+ .contains("Fixed size binary expected 16 bytes, got 3"));
80
+ }
81
+
82
+ #[test]
83
+ fn test_decimal256_large_values() {
84
+ // Test very large Decimal256 values
85
+ let large_positive = BigInt::parse_bytes(
86
+ b"99999999999999999999999999999999999999999999999999999999999999999999999999",
87
+ 10,
88
+ )
89
+ .unwrap();
90
+ let large_negative = -large_positive.clone();
91
+
92
+ let values = vec![
93
+ ParquetValue::Decimal256(large_positive.clone(), 0),
94
+ ParquetValue::Decimal256(large_negative.clone(), 0),
95
+ ParquetValue::Decimal256(BigInt::from(0), 0),
96
+ ParquetValue::Null,
97
+ ];
98
+
99
+ let field = Field::new("test", DataType::Decimal256(76, 0), true);
100
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
101
+
102
+ // Verify roundtrip
103
+ for i in 0..4 {
104
+ // Create a dummy parquet type for testing
105
+ let parquet_type =
106
+ Type::primitive_type_builder("test", parquet::basic::Type::FIXED_LEN_BYTE_ARRAY)
107
+ .with_length(32)
108
+ .with_precision(76)
109
+ .with_scale(0)
110
+ .with_logical_type(Some(parquet::basic::LogicalType::Decimal {
111
+ scale: 0,
112
+ precision: 76,
113
+ }))
114
+ .build()
115
+ .unwrap();
116
+ let value = arrow_to_parquet_value(&field, &parquet_type, array.as_ref(), i).unwrap();
117
+ match (i, value) {
118
+ (0, ParquetValue::Decimal256(v, _)) => assert_eq!(v, large_positive.clone()),
119
+ (1, ParquetValue::Decimal256(v, _)) => assert_eq!(v, large_negative.clone()),
120
+ (2, ParquetValue::Decimal256(v, _)) => assert_eq!(v, BigInt::from(0)),
121
+ (3, ParquetValue::Null) => {}
122
+ _ => panic!("Unexpected value"),
123
+ }
124
+ }
125
+ }
126
+
127
+ #[test]
128
+ fn test_decimal256_precision_overflow_error() {
129
+ let too_large = BigInt::from(2).pow(256);
130
+
131
+ let values = vec![ParquetValue::Decimal256(too_large, 0)];
132
+
133
+ let field = Field::new("test", DataType::Decimal256(76, 0), true);
134
+ let result = parquet_values_to_arrow_array(&values, &field);
135
+
136
+ assert!(result.is_err());
137
+ assert!(result
138
+ .unwrap_err()
139
+ .to_string()
140
+ .contains("Decimal precision overflow"));
141
+ }
142
+
143
+ #[test]
144
+ fn test_time_type_conversions() {
145
+ // Test TimeMillis
146
+ let values_millis = vec![
147
+ ParquetValue::TimeMillis(12345),
148
+ ParquetValue::TimeMillis(0),
149
+ ParquetValue::TimeMillis(86399999), // Last millisecond of day
150
+ ParquetValue::Null,
151
+ ];
152
+
153
+ let field = Field::new("time", DataType::Time32(TimeUnit::Millisecond), true);
154
+ let array = parquet_values_to_arrow_array(&values_millis, &field).unwrap();
155
+ assert_eq!(array.len(), 4);
156
+
157
+ // Test TimeMicros
158
+ let values_micros = vec![
159
+ ParquetValue::TimeMicros(12345678),
160
+ ParquetValue::TimeMicros(0),
161
+ ParquetValue::TimeMicros(86399999999), // Last microsecond of day
162
+ ParquetValue::Null,
163
+ ];
164
+
165
+ let field = Field::new("time", DataType::Time64(TimeUnit::Microsecond), true);
166
+ let array = parquet_values_to_arrow_array(&values_micros, &field).unwrap();
167
+ assert_eq!(array.len(), 4);
168
+
169
+ let values_nanos = vec![
170
+ ParquetValue::TimeNanos(123456789),
171
+ ParquetValue::TimeNanos(0),
172
+ ParquetValue::TimeNanos(86399999999999),
173
+ ParquetValue::Null,
174
+ ];
175
+
176
+ let field = Field::new("time", DataType::Time64(TimeUnit::Nanosecond), true);
177
+ let array = parquet_values_to_arrow_array(&values_nanos, &field).unwrap();
178
+ assert_eq!(array.len(), 4);
179
+ }
180
+
181
+ #[test]
182
+ fn test_timestamp_with_timezone() {
183
+ let tz = Some(Arc::from("America/New_York"));
184
+
185
+ let values = vec![
186
+ ParquetValue::TimestampMillis(1234567890123, tz.clone()),
187
+ ParquetValue::TimestampMillis(0, tz.clone()),
188
+ ParquetValue::Null,
189
+ ];
190
+
191
+ let field = Field::new(
192
+ "ts",
193
+ DataType::Timestamp(TimeUnit::Millisecond, Some("America/New_York".into())),
194
+ true,
195
+ );
196
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
197
+
198
+ // Verify roundtrip preserves timezone
199
+ for i in 0..3 {
200
+ // Create a dummy parquet type for testing
201
+ let parquet_type = Type::primitive_type_builder("test", parquet::basic::Type::INT64)
202
+ .with_logical_type(Some(parquet::basic::LogicalType::Timestamp {
203
+ is_adjusted_to_u_t_c: true,
204
+ unit: parquet::basic::TimeUnit::MILLIS,
205
+ }))
206
+ .build()
207
+ .unwrap();
208
+ let value = arrow_to_parquet_value(&field, &parquet_type, array.as_ref(), i).unwrap();
209
+ match value {
210
+ ParquetValue::TimestampMillis(_, Some(tz)) => {
211
+ assert_eq!(tz.as_ref(), "America/New_York");
212
+ }
213
+ ParquetValue::Null => assert_eq!(i, 2),
214
+ _ => panic!("Unexpected value"),
215
+ }
216
+ }
217
+ }
218
+
219
+ #[test]
220
+ fn test_nested_list_of_lists() {
221
+ // Create a list of lists: [[1, 2], [3], [], null, [4, 5, 6]]
222
+ let inner_lists = vec![
223
+ ParquetValue::List(vec![ParquetValue::Int32(1), ParquetValue::Int32(2)]),
224
+ ParquetValue::List(vec![ParquetValue::Int32(3)]),
225
+ ParquetValue::List(vec![]),
226
+ ParquetValue::Null,
227
+ ParquetValue::List(vec![
228
+ ParquetValue::Int32(4),
229
+ ParquetValue::Int32(5),
230
+ ParquetValue::Int32(6),
231
+ ]),
232
+ ];
233
+
234
+ let values = vec![ParquetValue::List(inner_lists)];
235
+
236
+ let inner_field = Field::new("item", DataType::Int32, false);
237
+ let list_field = Field::new("inner_list", DataType::List(StdArc::new(inner_field)), true);
238
+ let outer_field = Field::new("outer_list", DataType::List(StdArc::new(list_field)), false);
239
+
240
+ let array = parquet_values_to_arrow_array(&values, &outer_field).unwrap();
241
+ assert_eq!(array.len(), 1);
242
+
243
+ // Verify roundtrip
244
+ // Create a dummy parquet type for testing - a list of list of int32
245
+ let int_type = Type::primitive_type_builder("item", parquet::basic::Type::INT32)
246
+ .build()
247
+ .unwrap();
248
+ let inner_list = Type::group_type_builder("inner_list")
249
+ .with_fields(vec![StdArc::new(int_type)])
250
+ .build()
251
+ .unwrap();
252
+ let parquet_type = Type::group_type_builder("outer_list")
253
+ .with_fields(vec![StdArc::new(inner_list)])
254
+ .build()
255
+ .unwrap();
256
+ let value = arrow_to_parquet_value(&outer_field, &parquet_type, array.as_ref(), 0).unwrap();
257
+ match value {
258
+ ParquetValue::List(items) => assert_eq!(items.len(), 5),
259
+ _ => panic!("Expected list"),
260
+ }
261
+ }
262
+
263
+ #[test]
264
+ fn test_map_with_null_values() {
265
+ let map_entries = vec![
266
+ (
267
+ ParquetValue::String(Arc::from("key1")),
268
+ ParquetValue::Int32(100),
269
+ ),
270
+ (ParquetValue::String(Arc::from("key2")), ParquetValue::Null),
271
+ (
272
+ ParquetValue::String(Arc::from("key3")),
273
+ ParquetValue::Int32(300),
274
+ ),
275
+ ];
276
+
277
+ let values = vec![ParquetValue::Map(map_entries), ParquetValue::Null];
278
+
279
+ let key_field = Field::new("key", DataType::Utf8, false);
280
+ let value_field = Field::new("value", DataType::Int32, true);
281
+ let entries_field = Field::new(
282
+ "entries",
283
+ DataType::Struct(vec![key_field, value_field].into()),
284
+ false,
285
+ );
286
+ let map_field = Field::new(
287
+ "map",
288
+ DataType::Map(StdArc::new(entries_field), false),
289
+ true,
290
+ );
291
+
292
+ let array = parquet_values_to_arrow_array(&values, &map_field).unwrap();
293
+ assert_eq!(array.len(), 2);
294
+
295
+ // Verify the map was created correctly
296
+ let map_array = array.as_any().downcast_ref::<MapArray>().unwrap();
297
+ assert!(!map_array.is_null(0));
298
+ assert!(map_array.is_null(1));
299
+ }
300
+
301
+ #[test]
302
+ fn test_struct_with_missing_fields() {
303
+ use indexmap::IndexMap;
304
+
305
+ // Create a struct with some fields missing
306
+ let mut record1 = IndexMap::new();
307
+ record1.insert(
308
+ Arc::from("field1"),
309
+ ParquetValue::String(Arc::from("value1")),
310
+ );
311
+ // field2 is missing
312
+ record1.insert(Arc::from("field3"), ParquetValue::Int32(42));
313
+
314
+ let mut record2 = IndexMap::new();
315
+ record2.insert(
316
+ Arc::from("field1"),
317
+ ParquetValue::String(Arc::from("value2")),
318
+ );
319
+ record2.insert(Arc::from("field2"), ParquetValue::Boolean(true));
320
+ record2.insert(Arc::from("field3"), ParquetValue::Int32(99));
321
+
322
+ let values = vec![
323
+ ParquetValue::Record(record1),
324
+ ParquetValue::Record(record2),
325
+ ParquetValue::Null,
326
+ ];
327
+
328
+ let fields = vec![
329
+ Field::new("field1", DataType::Utf8, false),
330
+ Field::new("field2", DataType::Boolean, true), // nullable to handle missing
331
+ Field::new("field3", DataType::Int32, false),
332
+ ];
333
+
334
+ let struct_field = Field::new("struct", DataType::Struct(fields.into()), true);
335
+ let array = parquet_values_to_arrow_array(&values, &struct_field).unwrap();
336
+
337
+ let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
338
+ assert_eq!(struct_array.len(), 3);
339
+
340
+ // Verify field2 is null for first record
341
+ let field2_array = struct_array
342
+ .column(1)
343
+ .as_any()
344
+ .downcast_ref::<BooleanArray>()
345
+ .unwrap();
346
+ assert!(field2_array.is_null(0));
347
+ assert!(!field2_array.is_null(1));
348
+ }
349
+
350
+ #[test]
351
+ fn test_type_mismatch_errors() {
352
+ // Test various type mismatches
353
+
354
+ // Boolean field expecting String value
355
+ let values = vec![ParquetValue::String(Arc::from("not a boolean"))];
356
+ let field = Field::new("test", DataType::Boolean, false);
357
+ let result = parquet_values_to_arrow_array(&values, &field);
358
+ assert!(result.is_err());
359
+ let error_msg = result.unwrap_err().to_string();
360
+ assert!(
361
+ error_msg.contains("Expected Boolean") && error_msg.contains("String"),
362
+ "Error message was: {}",
363
+ error_msg
364
+ );
365
+
366
+ // Int32 field expecting Float value
367
+ let values = vec![ParquetValue::Float32(OrderedFloat(3.14))];
368
+ let field = Field::new("test", DataType::Int32, false);
369
+ let result = parquet_values_to_arrow_array(&values, &field);
370
+ assert!(result.is_err());
371
+ let error_msg = result.unwrap_err().to_string();
372
+ assert!(
373
+ error_msg.contains("Expected Int32") && error_msg.contains("Float32"),
374
+ "Error message was: {}",
375
+ error_msg
376
+ );
377
+
378
+ // List field expecting non-list value
379
+ let values = vec![ParquetValue::Int32(42)];
380
+ let item_field = Field::new("item", DataType::Int32, false);
381
+ let list_field = Field::new("list", DataType::List(StdArc::new(item_field)), false);
382
+ let result = parquet_values_to_arrow_array(&values, &list_field);
383
+ assert!(result.is_err());
384
+ let error_msg = result.unwrap_err().to_string();
385
+ assert!(
386
+ error_msg.contains("Expected List") && error_msg.contains("Int32"),
387
+ "Error message was: {}",
388
+ error_msg
389
+ );
390
+ }
391
+
392
+ #[test]
393
+ fn test_unsupported_arrow_types() {
394
+ // Test arrow_to_parquet_value with unsupported types
395
+ // Create a simple union type
396
+ let type_ids = arrow_buffer::ScalarBuffer::from(vec![0i8, 0, 0]);
397
+ let fields = vec![StdArc::new(Field::new("int", DataType::Int32, false))];
398
+ let union_fields = arrow_schema::UnionFields::try_new(vec![0], fields).unwrap();
399
+
400
+ let array = arrow_array::UnionArray::try_new(
401
+ union_fields,
402
+ type_ids,
403
+ None,
404
+ vec![StdArc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
405
+ )
406
+ .unwrap();
407
+
408
+ // Create a dummy parquet type for testing
409
+ let parquet_type = Type::primitive_type_builder("int", parquet::basic::Type::INT32)
410
+ .build()
411
+ .unwrap();
412
+ let result = arrow_to_parquet_value(
413
+ &Field::new("int", DataType::Int32, false),
414
+ &parquet_type,
415
+ &array,
416
+ 0,
417
+ );
418
+ assert!(result.is_err());
419
+ assert!(result
420
+ .unwrap_err()
421
+ .to_string()
422
+ .contains("Unsupported data type for conversion"));
423
+ }
424
+
425
+ #[test]
426
+ fn test_integer_overflow_prevention() {
427
+ // Test that we can't upcast a value that would overflow
428
+ let values = vec![ParquetValue::Int64(i64::MAX), ParquetValue::Int64(i64::MIN)];
429
+
430
+ // These should work fine in Int64
431
+ let field = Field::new("test", DataType::Int64, false);
432
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
433
+ let int_array = array.as_any().downcast_ref::<Int64Array>().unwrap();
434
+ assert_eq!(int_array.value(0), i64::MAX);
435
+ assert_eq!(int_array.value(1), i64::MIN);
436
+ }
437
+
438
+ #[test]
439
+ fn test_empty_collections() {
440
+ // Test empty list
441
+ let values = vec![ParquetValue::List(vec![])];
442
+ let field = Field::new(
443
+ "list",
444
+ DataType::List(StdArc::new(Field::new("item", DataType::Int32, true))),
445
+ false,
446
+ );
447
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
448
+ let list_array = array.as_any().downcast_ref::<ListArray>().unwrap();
449
+ assert_eq!(list_array.value(0).len(), 0);
450
+
451
+ // Test empty map
452
+ let values = vec![ParquetValue::Map(vec![])];
453
+ let key_field = Field::new("key", DataType::Utf8, false);
454
+ let value_field = Field::new("value", DataType::Int32, true);
455
+ let entries_field = Field::new(
456
+ "entries",
457
+ DataType::Struct(vec![key_field, value_field].into()),
458
+ false,
459
+ );
460
+ let map_field = Field::new(
461
+ "map",
462
+ DataType::Map(StdArc::new(entries_field), false),
463
+ false,
464
+ );
465
+ let array = parquet_values_to_arrow_array(&values, &map_field).unwrap();
466
+ let map_array = array.as_any().downcast_ref::<MapArray>().unwrap();
467
+ assert_eq!(map_array.value(0).len(), 0);
468
+
469
+ // Test empty struct (all fields null)
470
+ use indexmap::IndexMap;
471
+ let empty_record = IndexMap::new();
472
+ let values = vec![ParquetValue::Record(empty_record)];
473
+ let fields = vec![
474
+ Field::new("field1", DataType::Utf8, true),
475
+ Field::new("field2", DataType::Int32, true),
476
+ ];
477
+ let struct_field = Field::new("struct", DataType::Struct(fields.into()), false);
478
+ let array = parquet_values_to_arrow_array(&values, &struct_field).unwrap();
479
+ let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
480
+
481
+ // All fields should be null
482
+ assert!(struct_array.column(0).is_null(0));
483
+ assert!(struct_array.column(1).is_null(0));
484
+ }