parquet 0.5.12 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +295 -98
- data/Cargo.toml +1 -1
- data/Gemfile +1 -0
- data/README.md +94 -3
- data/ext/parquet/Cargo.toml +8 -5
- data/ext/parquet/src/adapter_ffi.rs +156 -0
- data/ext/parquet/src/lib.rs +13 -21
- data/ext/parquet-core/Cargo.toml +23 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
- data/ext/parquet-core/src/error.rs +163 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +263 -0
- data/ext/parquet-core/src/schema.rs +283 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +151 -0
- data/ext/parquet-core/src/value.rs +209 -0
- data/ext/parquet-core/src/writer.rs +839 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +430 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
- data/ext/parquet-ruby-adapter/src/error.rs +148 -0
- data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
- data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +94 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
- data/lib/parquet/schema.rb +19 -0
- data/lib/parquet/version.rb +1 -1
- metadata +50 -24
- data/ext/parquet/src/enumerator.rs +0 -68
- data/ext/parquet/src/header_cache.rs +0 -99
- data/ext/parquet/src/logger.rs +0 -171
- data/ext/parquet/src/reader/common.rs +0 -111
- data/ext/parquet/src/reader/mod.rs +0 -211
- data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
- data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
- data/ext/parquet/src/reader/unified/mod.rs +0 -363
- data/ext/parquet/src/types/core_types.rs +0 -120
- data/ext/parquet/src/types/mod.rs +0 -100
- data/ext/parquet/src/types/parquet_value.rs +0 -1275
- data/ext/parquet/src/types/record_types.rs +0 -603
- data/ext/parquet/src/types/schema_converter.rs +0 -290
- data/ext/parquet/src/types/schema_node.rs +0 -424
- data/ext/parquet/src/types/timestamp.rs +0 -285
- data/ext/parquet/src/types/type_conversion.rs +0 -1949
- data/ext/parquet/src/types/writer_types.rs +0 -329
- data/ext/parquet/src/utils.rs +0 -184
- data/ext/parquet/src/writer/mod.rs +0 -505
- data/ext/parquet/src/writer/write_columns.rs +0 -238
- data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,437 @@
|
|
1
|
+
use bytes::Bytes;
|
2
|
+
use indexmap::IndexMap;
|
3
|
+
use parquet_core::*;
|
4
|
+
use std::sync::Arc;
|
5
|
+
|
6
|
+
#[test]
|
7
|
+
fn test_binary_data_basic() {
|
8
|
+
// Test basic binary data handling
|
9
|
+
let schema = SchemaBuilder::new()
|
10
|
+
.with_root(SchemaNode::Struct {
|
11
|
+
name: "root".to_string(),
|
12
|
+
nullable: false,
|
13
|
+
fields: vec![
|
14
|
+
SchemaNode::Primitive {
|
15
|
+
name: "id".to_string(),
|
16
|
+
primitive_type: PrimitiveType::Int32,
|
17
|
+
nullable: false,
|
18
|
+
format: None,
|
19
|
+
},
|
20
|
+
SchemaNode::Primitive {
|
21
|
+
name: "data".to_string(),
|
22
|
+
primitive_type: PrimitiveType::Binary,
|
23
|
+
nullable: false,
|
24
|
+
format: None,
|
25
|
+
},
|
26
|
+
],
|
27
|
+
})
|
28
|
+
.build()
|
29
|
+
.unwrap();
|
30
|
+
|
31
|
+
let test_data = vec![
|
32
|
+
// Empty binary data
|
33
|
+
vec![
|
34
|
+
ParquetValue::Int32(1),
|
35
|
+
ParquetValue::Bytes(Bytes::from(vec![])),
|
36
|
+
],
|
37
|
+
// Small binary data
|
38
|
+
vec![
|
39
|
+
ParquetValue::Int32(2),
|
40
|
+
ParquetValue::Bytes(Bytes::from(vec![0x00, 0x01, 0x02, 0x03])),
|
41
|
+
],
|
42
|
+
// Bytes data with all byte values
|
43
|
+
vec![
|
44
|
+
ParquetValue::Int32(3),
|
45
|
+
ParquetValue::Bytes(Bytes::from((0u8..=255u8).collect::<Vec<u8>>())),
|
46
|
+
],
|
47
|
+
// Bytes data with null bytes
|
48
|
+
vec![
|
49
|
+
ParquetValue::Int32(4),
|
50
|
+
ParquetValue::Bytes(Bytes::from(vec![0x00, 0x00, 0x00, 0x00])),
|
51
|
+
],
|
52
|
+
// Random binary data
|
53
|
+
vec![
|
54
|
+
ParquetValue::Int32(5),
|
55
|
+
ParquetValue::Bytes(Bytes::from(vec![
|
56
|
+
0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE,
|
57
|
+
])),
|
58
|
+
],
|
59
|
+
];
|
60
|
+
|
61
|
+
let mut buffer = Vec::new();
|
62
|
+
{
|
63
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
64
|
+
writer.write_rows(test_data.clone()).unwrap();
|
65
|
+
writer.close().unwrap();
|
66
|
+
}
|
67
|
+
|
68
|
+
// Read back and verify
|
69
|
+
let bytes = Bytes::from(buffer);
|
70
|
+
let reader = Reader::new(bytes);
|
71
|
+
|
72
|
+
let read_rows: Vec<_> = reader
|
73
|
+
.read_rows()
|
74
|
+
.unwrap()
|
75
|
+
.collect::<Result<Vec<_>>>()
|
76
|
+
.unwrap();
|
77
|
+
|
78
|
+
assert_eq!(read_rows.len(), test_data.len());
|
79
|
+
|
80
|
+
// Verify binary data is preserved exactly
|
81
|
+
for (expected, actual) in test_data.iter().zip(read_rows.iter()) {
|
82
|
+
assert_eq!(expected, actual);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
#[test]
|
87
|
+
fn test_large_binary_data() {
|
88
|
+
// Test handling of large binary blobs
|
89
|
+
let schema = SchemaBuilder::new()
|
90
|
+
.with_root(SchemaNode::Struct {
|
91
|
+
name: "root".to_string(),
|
92
|
+
nullable: false,
|
93
|
+
fields: vec![SchemaNode::Primitive {
|
94
|
+
name: "blob".to_string(),
|
95
|
+
primitive_type: PrimitiveType::Binary,
|
96
|
+
nullable: false,
|
97
|
+
format: None,
|
98
|
+
}],
|
99
|
+
})
|
100
|
+
.build()
|
101
|
+
.unwrap();
|
102
|
+
|
103
|
+
let sizes = vec![
|
104
|
+
1024, // 1 KB
|
105
|
+
10 * 1024, // 10 KB
|
106
|
+
100 * 1024, // 100 KB
|
107
|
+
1024 * 1024, // 1 MB
|
108
|
+
];
|
109
|
+
|
110
|
+
for size in sizes {
|
111
|
+
let large_data: Bytes = (0..size).map(|i| (i % 256) as u8).collect();
|
112
|
+
|
113
|
+
let rows = vec![vec![ParquetValue::Bytes(large_data.clone())]];
|
114
|
+
|
115
|
+
let mut buffer = Vec::new();
|
116
|
+
{
|
117
|
+
let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
|
118
|
+
writer.write_rows(rows).unwrap();
|
119
|
+
writer.close().unwrap();
|
120
|
+
}
|
121
|
+
|
122
|
+
// Read back and verify
|
123
|
+
let bytes = Bytes::from(buffer);
|
124
|
+
let reader = Reader::new(bytes);
|
125
|
+
|
126
|
+
let read_rows: Vec<_> = reader
|
127
|
+
.read_rows()
|
128
|
+
.unwrap()
|
129
|
+
.collect::<Result<Vec<_>>>()
|
130
|
+
.unwrap();
|
131
|
+
|
132
|
+
assert_eq!(read_rows.len(), 1);
|
133
|
+
|
134
|
+
match &read_rows[0][0] {
|
135
|
+
ParquetValue::Bytes(data) => {
|
136
|
+
assert_eq!(data.len(), size);
|
137
|
+
assert_eq!(data, &large_data);
|
138
|
+
}
|
139
|
+
_ => panic!("Expected binary value"),
|
140
|
+
}
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
#[test]
|
145
|
+
fn test_nullable_binary() {
|
146
|
+
// Test nullable binary fields
|
147
|
+
let schema = SchemaBuilder::new()
|
148
|
+
.with_root(SchemaNode::Struct {
|
149
|
+
name: "root".to_string(),
|
150
|
+
nullable: false,
|
151
|
+
fields: vec![SchemaNode::Primitive {
|
152
|
+
name: "optional_data".to_string(),
|
153
|
+
primitive_type: PrimitiveType::Binary,
|
154
|
+
nullable: true,
|
155
|
+
format: None,
|
156
|
+
}],
|
157
|
+
})
|
158
|
+
.build()
|
159
|
+
.unwrap();
|
160
|
+
|
161
|
+
let rows = vec![
|
162
|
+
vec![ParquetValue::Bytes(Bytes::from(vec![1, 2, 3]))],
|
163
|
+
vec![ParquetValue::Null],
|
164
|
+
vec![ParquetValue::Bytes(Bytes::from(vec![]))],
|
165
|
+
vec![ParquetValue::Null],
|
166
|
+
vec![ParquetValue::Bytes(Bytes::from(vec![255, 254, 253]))],
|
167
|
+
];
|
168
|
+
|
169
|
+
let mut buffer = Vec::new();
|
170
|
+
{
|
171
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
172
|
+
writer.write_rows(rows.clone()).unwrap();
|
173
|
+
writer.close().unwrap();
|
174
|
+
}
|
175
|
+
|
176
|
+
// Read back and verify
|
177
|
+
let bytes = Bytes::from(buffer);
|
178
|
+
let reader = Reader::new(bytes);
|
179
|
+
|
180
|
+
let read_rows: Vec<_> = reader
|
181
|
+
.read_rows()
|
182
|
+
.unwrap()
|
183
|
+
.collect::<Result<Vec<_>>>()
|
184
|
+
.unwrap();
|
185
|
+
|
186
|
+
assert_eq!(read_rows.len(), rows.len());
|
187
|
+
|
188
|
+
// Verify nulls and empty binary are handled correctly
|
189
|
+
for (expected, actual) in rows.iter().zip(read_rows.iter()) {
|
190
|
+
assert_eq!(expected, actual);
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
#[test]
|
195
|
+
fn test_fixed_size_binary() {
|
196
|
+
// Test fixed-size binary data (if supported)
|
197
|
+
let schema = SchemaBuilder::new()
|
198
|
+
.with_root(SchemaNode::Struct {
|
199
|
+
name: "root".to_string(),
|
200
|
+
nullable: false,
|
201
|
+
fields: vec![
|
202
|
+
SchemaNode::Primitive {
|
203
|
+
name: "uuid".to_string(),
|
204
|
+
primitive_type: PrimitiveType::Binary, // Ideally would be FixedBytes(16)
|
205
|
+
nullable: false,
|
206
|
+
format: None,
|
207
|
+
},
|
208
|
+
SchemaNode::Primitive {
|
209
|
+
name: "hash".to_string(),
|
210
|
+
primitive_type: PrimitiveType::Binary, // Ideally would be FixedBytes(32)
|
211
|
+
nullable: false,
|
212
|
+
format: None,
|
213
|
+
},
|
214
|
+
],
|
215
|
+
})
|
216
|
+
.build()
|
217
|
+
.unwrap();
|
218
|
+
|
219
|
+
let rows = vec![
|
220
|
+
vec![
|
221
|
+
// 16-byte UUID-like value
|
222
|
+
ParquetValue::Bytes(Bytes::from(vec![
|
223
|
+
0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
|
224
|
+
0xde, 0xf0,
|
225
|
+
])),
|
226
|
+
// 32-byte hash-like value
|
227
|
+
ParquetValue::Bytes(Bytes::from(vec![
|
228
|
+
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd,
|
229
|
+
0xee, 0xff, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb,
|
230
|
+
0xcc, 0xdd, 0xee, 0xff,
|
231
|
+
])),
|
232
|
+
],
|
233
|
+
vec![
|
234
|
+
// Another UUID
|
235
|
+
ParquetValue::Bytes(Bytes::from(vec![
|
236
|
+
0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d,
|
237
|
+
0x1e, 0x0f,
|
238
|
+
])),
|
239
|
+
// Another hash
|
240
|
+
ParquetValue::Bytes(Bytes::from(vec![
|
241
|
+
0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22,
|
242
|
+
0x11, 0x00, 0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44,
|
243
|
+
0x33, 0x22, 0x11, 0x00,
|
244
|
+
])),
|
245
|
+
],
|
246
|
+
];
|
247
|
+
|
248
|
+
let mut buffer = Vec::new();
|
249
|
+
{
|
250
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
251
|
+
writer.write_rows(rows.clone()).unwrap();
|
252
|
+
writer.close().unwrap();
|
253
|
+
}
|
254
|
+
|
255
|
+
// Read back and verify
|
256
|
+
let bytes = Bytes::from(buffer);
|
257
|
+
let reader = Reader::new(bytes);
|
258
|
+
|
259
|
+
let read_rows: Vec<_> = reader
|
260
|
+
.read_rows()
|
261
|
+
.unwrap()
|
262
|
+
.collect::<Result<Vec<_>>>()
|
263
|
+
.unwrap();
|
264
|
+
|
265
|
+
assert_eq!(read_rows.len(), rows.len());
|
266
|
+
|
267
|
+
for (expected, actual) in rows.iter().zip(read_rows.iter()) {
|
268
|
+
assert_eq!(expected, actual);
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
#[test]
|
273
|
+
fn test_binary_string_interoperability() {
|
274
|
+
// Test that binary data doesn't get confused with strings
|
275
|
+
let schema = SchemaBuilder::new()
|
276
|
+
.with_root(SchemaNode::Struct {
|
277
|
+
name: "root".to_string(),
|
278
|
+
nullable: false,
|
279
|
+
fields: vec![
|
280
|
+
SchemaNode::Primitive {
|
281
|
+
name: "text".to_string(),
|
282
|
+
primitive_type: PrimitiveType::String,
|
283
|
+
nullable: false,
|
284
|
+
format: None,
|
285
|
+
},
|
286
|
+
SchemaNode::Primitive {
|
287
|
+
name: "binary".to_string(),
|
288
|
+
primitive_type: PrimitiveType::Binary,
|
289
|
+
nullable: false,
|
290
|
+
format: None,
|
291
|
+
},
|
292
|
+
],
|
293
|
+
})
|
294
|
+
.build()
|
295
|
+
.unwrap();
|
296
|
+
|
297
|
+
let test_string = "Hello, 世界! 🦀";
|
298
|
+
let test_bytes = test_string.as_bytes().to_vec();
|
299
|
+
|
300
|
+
let rows = vec![
|
301
|
+
vec![
|
302
|
+
ParquetValue::String(Arc::from(test_string)),
|
303
|
+
ParquetValue::Bytes(test_bytes.into()),
|
304
|
+
],
|
305
|
+
vec![
|
306
|
+
ParquetValue::String(Arc::from("Regular ASCII text")),
|
307
|
+
ParquetValue::Bytes(Bytes::from(vec![0xff, 0xfe, 0xfd])), // Invalid UTF-8
|
308
|
+
],
|
309
|
+
vec![
|
310
|
+
ParquetValue::String(Arc::from("")), // Empty string
|
311
|
+
ParquetValue::Bytes(Bytes::from(vec![])), // Empty binary
|
312
|
+
],
|
313
|
+
];
|
314
|
+
|
315
|
+
let mut buffer = Vec::new();
|
316
|
+
{
|
317
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
318
|
+
writer.write_rows(rows.clone()).unwrap();
|
319
|
+
writer.close().unwrap();
|
320
|
+
}
|
321
|
+
|
322
|
+
// Read back and verify
|
323
|
+
let bytes = Bytes::from(buffer);
|
324
|
+
let reader = Reader::new(bytes);
|
325
|
+
|
326
|
+
let read_rows: Vec<_> = reader
|
327
|
+
.read_rows()
|
328
|
+
.unwrap()
|
329
|
+
.collect::<Result<Vec<_>>>()
|
330
|
+
.unwrap();
|
331
|
+
|
332
|
+
assert_eq!(read_rows.len(), rows.len());
|
333
|
+
|
334
|
+
// Verify string and binary are kept separate
|
335
|
+
for (expected, actual) in rows.iter().zip(read_rows.iter()) {
|
336
|
+
assert_eq!(expected, actual);
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
#[test]
|
341
|
+
fn test_binary_in_complex_types() {
|
342
|
+
// Test binary data within lists and structs
|
343
|
+
let schema = SchemaBuilder::new()
|
344
|
+
.with_root(SchemaNode::Struct {
|
345
|
+
name: "root".to_string(),
|
346
|
+
nullable: false,
|
347
|
+
fields: vec![
|
348
|
+
SchemaNode::List {
|
349
|
+
name: "binary_list".to_string(),
|
350
|
+
nullable: false,
|
351
|
+
item: Box::new(SchemaNode::Primitive {
|
352
|
+
name: "item".to_string(),
|
353
|
+
primitive_type: PrimitiveType::Binary,
|
354
|
+
nullable: false,
|
355
|
+
format: None,
|
356
|
+
}),
|
357
|
+
},
|
358
|
+
SchemaNode::Struct {
|
359
|
+
name: "binary_struct".to_string(),
|
360
|
+
nullable: false,
|
361
|
+
fields: vec![
|
362
|
+
SchemaNode::Primitive {
|
363
|
+
name: "data1".to_string(),
|
364
|
+
primitive_type: PrimitiveType::Binary,
|
365
|
+
nullable: false,
|
366
|
+
format: None,
|
367
|
+
},
|
368
|
+
SchemaNode::Primitive {
|
369
|
+
name: "data2".to_string(),
|
370
|
+
primitive_type: PrimitiveType::Binary,
|
371
|
+
nullable: true,
|
372
|
+
format: None,
|
373
|
+
},
|
374
|
+
],
|
375
|
+
},
|
376
|
+
],
|
377
|
+
})
|
378
|
+
.build()
|
379
|
+
.unwrap();
|
380
|
+
|
381
|
+
let rows = vec![
|
382
|
+
vec![
|
383
|
+
ParquetValue::List(vec![
|
384
|
+
ParquetValue::Bytes(Bytes::from(vec![1, 2, 3])),
|
385
|
+
ParquetValue::Bytes(Bytes::from(vec![4, 5, 6])),
|
386
|
+
ParquetValue::Bytes(Bytes::from(vec![7, 8, 9])),
|
387
|
+
]),
|
388
|
+
ParquetValue::Record({
|
389
|
+
let mut map = IndexMap::new();
|
390
|
+
map.insert(
|
391
|
+
Arc::from("data1"),
|
392
|
+
ParquetValue::Bytes(Bytes::from(vec![0xAA, 0xBB])),
|
393
|
+
);
|
394
|
+
map.insert(
|
395
|
+
Arc::from("data2"),
|
396
|
+
ParquetValue::Bytes(Bytes::from(vec![0xCC, 0xDD])),
|
397
|
+
);
|
398
|
+
map
|
399
|
+
}),
|
400
|
+
],
|
401
|
+
vec![
|
402
|
+
ParquetValue::List(vec![ParquetValue::Bytes(Bytes::from(vec![]))]),
|
403
|
+
ParquetValue::Record({
|
404
|
+
let mut map = IndexMap::new();
|
405
|
+
map.insert(
|
406
|
+
Arc::from("data1"),
|
407
|
+
ParquetValue::Bytes(Bytes::from(vec![0xFF])),
|
408
|
+
);
|
409
|
+
map.insert(Arc::from("data2"), ParquetValue::Null);
|
410
|
+
map
|
411
|
+
}),
|
412
|
+
],
|
413
|
+
];
|
414
|
+
|
415
|
+
let mut buffer = Vec::new();
|
416
|
+
{
|
417
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
418
|
+
writer.write_rows(rows.clone()).unwrap();
|
419
|
+
writer.close().unwrap();
|
420
|
+
}
|
421
|
+
|
422
|
+
// Read back and verify
|
423
|
+
let bytes = Bytes::from(buffer);
|
424
|
+
let reader = Reader::new(bytes);
|
425
|
+
|
426
|
+
let read_rows: Vec<_> = reader
|
427
|
+
.read_rows()
|
428
|
+
.unwrap()
|
429
|
+
.collect::<Result<Vec<_>>>()
|
430
|
+
.unwrap();
|
431
|
+
|
432
|
+
assert_eq!(read_rows.len(), rows.len());
|
433
|
+
|
434
|
+
for (expected, actual) in rows.iter().zip(read_rows.iter()) {
|
435
|
+
assert_eq!(expected, actual);
|
436
|
+
}
|
437
|
+
}
|