parquet 0.5.12 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +295 -98
- data/Cargo.toml +1 -1
- data/Gemfile +1 -0
- data/README.md +94 -3
- data/ext/parquet/Cargo.toml +8 -5
- data/ext/parquet/src/adapter_ffi.rs +156 -0
- data/ext/parquet/src/lib.rs +13 -21
- data/ext/parquet-core/Cargo.toml +23 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
- data/ext/parquet-core/src/error.rs +163 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +263 -0
- data/ext/parquet-core/src/schema.rs +283 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +151 -0
- data/ext/parquet-core/src/value.rs +209 -0
- data/ext/parquet-core/src/writer.rs +839 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +430 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
- data/ext/parquet-ruby-adapter/src/error.rs +148 -0
- data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
- data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +94 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
- data/lib/parquet/schema.rb +19 -0
- data/lib/parquet/version.rb +1 -1
- metadata +50 -24
- data/ext/parquet/src/enumerator.rs +0 -68
- data/ext/parquet/src/header_cache.rs +0 -99
- data/ext/parquet/src/logger.rs +0 -171
- data/ext/parquet/src/reader/common.rs +0 -111
- data/ext/parquet/src/reader/mod.rs +0 -211
- data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
- data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
- data/ext/parquet/src/reader/unified/mod.rs +0 -363
- data/ext/parquet/src/types/core_types.rs +0 -120
- data/ext/parquet/src/types/mod.rs +0 -100
- data/ext/parquet/src/types/parquet_value.rs +0 -1275
- data/ext/parquet/src/types/record_types.rs +0 -603
- data/ext/parquet/src/types/schema_converter.rs +0 -290
- data/ext/parquet/src/types/schema_node.rs +0 -424
- data/ext/parquet/src/types/timestamp.rs +0 -285
- data/ext/parquet/src/types/type_conversion.rs +0 -1949
- data/ext/parquet/src/types/writer_types.rs +0 -329
- data/ext/parquet/src/utils.rs +0 -184
- data/ext/parquet/src/writer/mod.rs +0 -505
- data/ext/parquet/src/writer/write_columns.rs +0 -238
- data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,547 @@
|
|
1
|
+
use bytes::Bytes;
|
2
|
+
use indexmap::IndexMap;
|
3
|
+
use parquet_core::*;
|
4
|
+
use std::sync::Arc;
|
5
|
+
|
6
|
+
mod test_helpers;
|
7
|
+
|
8
|
+
// ====== Schema Construction Errors ======
|
9
|
+
|
10
|
+
#[test]
|
11
|
+
fn test_schema_builder_error_cases() {
|
12
|
+
// Test building without root node
|
13
|
+
let result = SchemaBuilder::new().build();
|
14
|
+
assert!(result.is_err());
|
15
|
+
assert_eq!(result.unwrap_err(), "Schema must have a root node");
|
16
|
+
}
|
17
|
+
|
18
|
+
#[test]
|
19
|
+
fn test_empty_struct_unsupported() {
|
20
|
+
// Test that empty structs are not supported by Parquet
|
21
|
+
let schema = SchemaBuilder::new()
|
22
|
+
.with_root(SchemaNode::Struct {
|
23
|
+
name: "root".to_string(),
|
24
|
+
nullable: false,
|
25
|
+
fields: vec![
|
26
|
+
SchemaNode::List {
|
27
|
+
name: "empty_list".to_string(),
|
28
|
+
nullable: false,
|
29
|
+
item: Box::new(SchemaNode::Primitive {
|
30
|
+
name: "item".to_string(),
|
31
|
+
primitive_type: PrimitiveType::Int32,
|
32
|
+
nullable: false,
|
33
|
+
format: None,
|
34
|
+
}),
|
35
|
+
},
|
36
|
+
SchemaNode::Map {
|
37
|
+
name: "empty_map".to_string(),
|
38
|
+
nullable: false,
|
39
|
+
key: Box::new(SchemaNode::Primitive {
|
40
|
+
name: "key".to_string(),
|
41
|
+
primitive_type: PrimitiveType::String,
|
42
|
+
nullable: false,
|
43
|
+
format: None,
|
44
|
+
}),
|
45
|
+
value: Box::new(SchemaNode::Primitive {
|
46
|
+
name: "value".to_string(),
|
47
|
+
primitive_type: PrimitiveType::Int32,
|
48
|
+
nullable: false,
|
49
|
+
format: None,
|
50
|
+
}),
|
51
|
+
},
|
52
|
+
SchemaNode::Struct {
|
53
|
+
name: "empty_struct".to_string(),
|
54
|
+
nullable: false,
|
55
|
+
fields: vec![], // Empty struct - not supported by Parquet
|
56
|
+
},
|
57
|
+
],
|
58
|
+
})
|
59
|
+
.build()
|
60
|
+
.unwrap();
|
61
|
+
|
62
|
+
// Try to create a writer - this should fail due to empty struct
|
63
|
+
let mut buffer = Vec::new();
|
64
|
+
let result = Writer::new(&mut buffer, schema.clone());
|
65
|
+
|
66
|
+
// Expect an error about empty structs
|
67
|
+
assert!(result.is_err());
|
68
|
+
match result {
|
69
|
+
Err(ParquetError::Parquet(e)) => {
|
70
|
+
assert!(e.to_string().contains("empty struct"));
|
71
|
+
}
|
72
|
+
_ => panic!("Expected Parquet error about empty structs"),
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
// ====== Field Count Validation Errors ======
|
77
|
+
|
78
|
+
#[test]
|
79
|
+
fn test_field_count_mismatch() {
|
80
|
+
let schema = SchemaBuilder::new()
|
81
|
+
.with_root(SchemaNode::Struct {
|
82
|
+
name: "root".to_string(),
|
83
|
+
nullable: false,
|
84
|
+
fields: vec![
|
85
|
+
SchemaNode::Primitive {
|
86
|
+
name: "field1".to_string(),
|
87
|
+
primitive_type: PrimitiveType::Int32,
|
88
|
+
nullable: false,
|
89
|
+
format: None,
|
90
|
+
},
|
91
|
+
SchemaNode::Primitive {
|
92
|
+
name: "field2".to_string(),
|
93
|
+
primitive_type: PrimitiveType::String,
|
94
|
+
nullable: false,
|
95
|
+
format: None,
|
96
|
+
},
|
97
|
+
SchemaNode::Primitive {
|
98
|
+
name: "field3".to_string(),
|
99
|
+
primitive_type: PrimitiveType::Float64,
|
100
|
+
nullable: false,
|
101
|
+
format: None,
|
102
|
+
},
|
103
|
+
],
|
104
|
+
})
|
105
|
+
.build()
|
106
|
+
.unwrap();
|
107
|
+
|
108
|
+
let mut buffer = Vec::new();
|
109
|
+
|
110
|
+
// Test using Writer
|
111
|
+
{
|
112
|
+
let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
|
113
|
+
|
114
|
+
// Test row with too few fields
|
115
|
+
let result = writer.write_rows(vec![vec![
|
116
|
+
ParquetValue::Int32(1),
|
117
|
+
ParquetValue::String(Arc::from("test")),
|
118
|
+
// Missing third field
|
119
|
+
]]);
|
120
|
+
|
121
|
+
assert!(result.is_err());
|
122
|
+
let err_msg = result.unwrap_err().to_string();
|
123
|
+
assert!(
|
124
|
+
err_msg.contains("Row has 2 values") && err_msg.contains("schema has 3 fields"),
|
125
|
+
"Error message was: {}",
|
126
|
+
err_msg
|
127
|
+
);
|
128
|
+
}
|
129
|
+
|
130
|
+
// Test using WriterBuilder
|
131
|
+
{
|
132
|
+
buffer.clear();
|
133
|
+
let mut writer = WriterBuilder::new()
|
134
|
+
.build(&mut buffer, schema.clone())
|
135
|
+
.unwrap();
|
136
|
+
|
137
|
+
// Test row with too few fields
|
138
|
+
let result = writer.write_row(vec![
|
139
|
+
ParquetValue::Int32(42),
|
140
|
+
// Missing second and third fields
|
141
|
+
]);
|
142
|
+
|
143
|
+
assert!(result.is_err());
|
144
|
+
assert!(result
|
145
|
+
.unwrap_err()
|
146
|
+
.to_string()
|
147
|
+
.contains("Row has 1 values but schema has 3 fields"));
|
148
|
+
|
149
|
+
// Test row with too many fields
|
150
|
+
let result = writer.write_row(vec![
|
151
|
+
ParquetValue::Int32(42),
|
152
|
+
ParquetValue::String(Arc::from("test")),
|
153
|
+
ParquetValue::Float64(ordered_float::OrderedFloat(3.14)),
|
154
|
+
ParquetValue::Boolean(true), // Extra field
|
155
|
+
]);
|
156
|
+
|
157
|
+
assert!(result.is_err());
|
158
|
+
assert!(result
|
159
|
+
.unwrap_err()
|
160
|
+
.to_string()
|
161
|
+
.contains("Row has 4 values but schema has 3 fields"));
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
// ====== Type Mismatch Errors ======
|
166
|
+
|
167
|
+
#[test]
|
168
|
+
fn test_type_mismatch() {
|
169
|
+
let schema = SchemaBuilder::new()
|
170
|
+
.with_root(SchemaNode::Struct {
|
171
|
+
name: "root".to_string(),
|
172
|
+
nullable: false,
|
173
|
+
fields: vec![
|
174
|
+
SchemaNode::Primitive {
|
175
|
+
name: "int_field".to_string(),
|
176
|
+
primitive_type: PrimitiveType::Int32,
|
177
|
+
nullable: false,
|
178
|
+
format: None,
|
179
|
+
},
|
180
|
+
SchemaNode::Primitive {
|
181
|
+
name: "string_field".to_string(),
|
182
|
+
primitive_type: PrimitiveType::String,
|
183
|
+
nullable: false,
|
184
|
+
format: None,
|
185
|
+
},
|
186
|
+
],
|
187
|
+
})
|
188
|
+
.build()
|
189
|
+
.unwrap();
|
190
|
+
|
191
|
+
let mut buffer = Vec::new();
|
192
|
+
{
|
193
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
194
|
+
|
195
|
+
// Try to write wrong types
|
196
|
+
let result = writer.write_rows(vec![vec![
|
197
|
+
ParquetValue::String(Arc::from("not an int")), // Wrong type for int_field
|
198
|
+
ParquetValue::Int32(123), // Wrong type for string_field
|
199
|
+
]]);
|
200
|
+
|
201
|
+
assert!(result.is_err());
|
202
|
+
let err_msg = result.unwrap_err().to_string();
|
203
|
+
assert!(
|
204
|
+
err_msg.contains("Type mismatch") && err_msg.contains("expected Int32"),
|
205
|
+
"Error message was: {}",
|
206
|
+
err_msg
|
207
|
+
);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
// ====== Null Validation Errors ======
|
212
|
+
|
213
|
+
#[test]
|
214
|
+
fn test_null_in_non_nullable_field() {
|
215
|
+
let schema = SchemaBuilder::new()
|
216
|
+
.with_root(SchemaNode::Struct {
|
217
|
+
name: "root".to_string(),
|
218
|
+
nullable: false,
|
219
|
+
fields: vec![SchemaNode::Primitive {
|
220
|
+
name: "required_field".to_string(),
|
221
|
+
primitive_type: PrimitiveType::Int32,
|
222
|
+
nullable: false,
|
223
|
+
format: None,
|
224
|
+
}],
|
225
|
+
})
|
226
|
+
.build()
|
227
|
+
.unwrap();
|
228
|
+
|
229
|
+
let mut buffer = Vec::new();
|
230
|
+
{
|
231
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
232
|
+
|
233
|
+
// Try to write null to non-nullable field
|
234
|
+
let result = writer.write_rows(vec![vec![ParquetValue::Null]]);
|
235
|
+
|
236
|
+
assert!(result.is_err());
|
237
|
+
let err_msg = result.unwrap_err().to_string();
|
238
|
+
assert!(
|
239
|
+
err_msg.contains("null value") && err_msg.contains("non-nullable"),
|
240
|
+
"Error message was: {}",
|
241
|
+
err_msg
|
242
|
+
);
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
// ====== Complex Type Validation Errors ======
|
247
|
+
|
248
|
+
#[test]
|
249
|
+
fn test_invalid_struct_fields() {
|
250
|
+
let schema = SchemaBuilder::new()
|
251
|
+
.with_root(SchemaNode::Struct {
|
252
|
+
name: "root".to_string(),
|
253
|
+
nullable: false,
|
254
|
+
fields: vec![SchemaNode::Struct {
|
255
|
+
name: "nested".to_string(),
|
256
|
+
nullable: false,
|
257
|
+
fields: vec![
|
258
|
+
SchemaNode::Primitive {
|
259
|
+
name: "field1".to_string(),
|
260
|
+
primitive_type: PrimitiveType::Int32,
|
261
|
+
nullable: false,
|
262
|
+
format: None,
|
263
|
+
},
|
264
|
+
SchemaNode::Primitive {
|
265
|
+
name: "field2".to_string(),
|
266
|
+
primitive_type: PrimitiveType::String,
|
267
|
+
nullable: false,
|
268
|
+
format: None,
|
269
|
+
},
|
270
|
+
],
|
271
|
+
}],
|
272
|
+
})
|
273
|
+
.build()
|
274
|
+
.unwrap();
|
275
|
+
|
276
|
+
let mut buffer = Vec::new();
|
277
|
+
{
|
278
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
279
|
+
|
280
|
+
// Try to write struct with missing fields
|
281
|
+
let mut incomplete_struct = IndexMap::new();
|
282
|
+
incomplete_struct.insert(Arc::from("field1"), ParquetValue::Int32(42));
|
283
|
+
// field2 is missing
|
284
|
+
|
285
|
+
let result = writer.write_rows(vec![vec![ParquetValue::Record(incomplete_struct)]]);
|
286
|
+
|
287
|
+
assert!(result.is_err());
|
288
|
+
let err_msg = result.unwrap_err().to_string();
|
289
|
+
assert!(
|
290
|
+
err_msg.contains("Required field") && err_msg.contains("field2"),
|
291
|
+
"Error message was: {}",
|
292
|
+
err_msg
|
293
|
+
);
|
294
|
+
}
|
295
|
+
}
|
296
|
+
|
297
|
+
#[test]
|
298
|
+
fn test_invalid_list_element_type() {
|
299
|
+
let schema = SchemaBuilder::new()
|
300
|
+
.with_root(SchemaNode::Struct {
|
301
|
+
name: "root".to_string(),
|
302
|
+
nullable: false,
|
303
|
+
fields: vec![SchemaNode::List {
|
304
|
+
name: "int_list".to_string(),
|
305
|
+
nullable: false,
|
306
|
+
item: Box::new(SchemaNode::Primitive {
|
307
|
+
name: "item".to_string(),
|
308
|
+
primitive_type: PrimitiveType::Int32,
|
309
|
+
nullable: false,
|
310
|
+
format: None,
|
311
|
+
}),
|
312
|
+
}],
|
313
|
+
})
|
314
|
+
.build()
|
315
|
+
.unwrap();
|
316
|
+
|
317
|
+
let mut buffer = Vec::new();
|
318
|
+
{
|
319
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
320
|
+
|
321
|
+
// Try to write list with wrong element type
|
322
|
+
let result = writer.write_rows(vec![vec![ParquetValue::List(vec![ParquetValue::String(
|
323
|
+
Arc::from("not an int"),
|
324
|
+
)])]]);
|
325
|
+
|
326
|
+
assert!(result.is_err());
|
327
|
+
let err_msg = result.unwrap_err().to_string();
|
328
|
+
assert!(
|
329
|
+
err_msg.contains("Type mismatch") && err_msg.contains("expected Int32"),
|
330
|
+
"Error message was: {}",
|
331
|
+
err_msg
|
332
|
+
);
|
333
|
+
}
|
334
|
+
}
|
335
|
+
|
336
|
+
#[test]
|
337
|
+
fn test_invalid_map_key_value_types() {
|
338
|
+
let schema = SchemaBuilder::new()
|
339
|
+
.with_root(SchemaNode::Struct {
|
340
|
+
name: "root".to_string(),
|
341
|
+
nullable: false,
|
342
|
+
fields: vec![SchemaNode::Map {
|
343
|
+
name: "string_int_map".to_string(),
|
344
|
+
nullable: false,
|
345
|
+
key: Box::new(SchemaNode::Primitive {
|
346
|
+
name: "key".to_string(),
|
347
|
+
primitive_type: PrimitiveType::String,
|
348
|
+
nullable: false,
|
349
|
+
format: None,
|
350
|
+
}),
|
351
|
+
value: Box::new(SchemaNode::Primitive {
|
352
|
+
name: "value".to_string(),
|
353
|
+
primitive_type: PrimitiveType::Int32,
|
354
|
+
nullable: false,
|
355
|
+
format: None,
|
356
|
+
}),
|
357
|
+
}],
|
358
|
+
})
|
359
|
+
.build()
|
360
|
+
.unwrap();
|
361
|
+
|
362
|
+
let mut buffer = Vec::new();
|
363
|
+
{
|
364
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
365
|
+
|
366
|
+
// Try to write map with wrong key type
|
367
|
+
let result = writer.write_rows(vec![vec![ParquetValue::Map(vec![(
|
368
|
+
ParquetValue::Int32(42), // Wrong key type
|
369
|
+
ParquetValue::Int32(100),
|
370
|
+
)])]]);
|
371
|
+
|
372
|
+
assert!(result.is_err());
|
373
|
+
let err_msg = result.unwrap_err().to_string();
|
374
|
+
assert!(
|
375
|
+
err_msg.contains("Type mismatch") && err_msg.contains("expected Utf8"),
|
376
|
+
"Error message was: {}",
|
377
|
+
err_msg
|
378
|
+
);
|
379
|
+
}
|
380
|
+
}
|
381
|
+
|
382
|
+
// ====== Unsupported Features ======
|
383
|
+
|
384
|
+
#[test]
|
385
|
+
fn test_map_with_struct_values_unsupported() {
|
386
|
+
// Test that maps with struct values are not yet supported
|
387
|
+
let schema = SchemaBuilder::new()
|
388
|
+
.with_root(SchemaNode::Struct {
|
389
|
+
name: "root".to_string(),
|
390
|
+
nullable: false,
|
391
|
+
fields: vec![SchemaNode::Map {
|
392
|
+
name: "map_field".to_string(),
|
393
|
+
nullable: false,
|
394
|
+
key: Box::new(SchemaNode::Primitive {
|
395
|
+
name: "key".to_string(),
|
396
|
+
primitive_type: PrimitiveType::String,
|
397
|
+
nullable: false,
|
398
|
+
format: None,
|
399
|
+
}),
|
400
|
+
value: Box::new(SchemaNode::Struct {
|
401
|
+
name: "value_struct".to_string(),
|
402
|
+
nullable: false,
|
403
|
+
fields: vec![SchemaNode::Primitive {
|
404
|
+
name: "field".to_string(),
|
405
|
+
primitive_type: PrimitiveType::Int32,
|
406
|
+
nullable: false,
|
407
|
+
format: None,
|
408
|
+
}],
|
409
|
+
}),
|
410
|
+
}],
|
411
|
+
})
|
412
|
+
.build()
|
413
|
+
.unwrap();
|
414
|
+
|
415
|
+
// Try to write a map with struct values
|
416
|
+
let row = vec![ParquetValue::Map(vec![(
|
417
|
+
ParquetValue::String(Arc::from("key1")),
|
418
|
+
ParquetValue::Record({
|
419
|
+
let mut map = IndexMap::new();
|
420
|
+
map.insert(Arc::from("field"), ParquetValue::Int32(42));
|
421
|
+
map
|
422
|
+
}),
|
423
|
+
)])];
|
424
|
+
|
425
|
+
let mut buffer = Vec::new();
|
426
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
427
|
+
let result = writer.write_rows(vec![row]);
|
428
|
+
|
429
|
+
// Check if this is still a limitation
|
430
|
+
if result.is_err() {
|
431
|
+
match result {
|
432
|
+
Err(ParquetError::Conversion(msg)) => {
|
433
|
+
assert!(msg.contains("Maps with struct values are not yet supported"));
|
434
|
+
}
|
435
|
+
_ => panic!("Expected Conversion error about maps with struct values"),
|
436
|
+
}
|
437
|
+
} else {
|
438
|
+
// If it succeeds, then the limitation has been fixed!
|
439
|
+
// Let's verify we can read it back
|
440
|
+
writer.close().unwrap();
|
441
|
+
|
442
|
+
let bytes = Bytes::from(buffer);
|
443
|
+
let reader = Reader::new(bytes);
|
444
|
+
|
445
|
+
let read_rows: Vec<_> = reader
|
446
|
+
.read_rows()
|
447
|
+
.unwrap()
|
448
|
+
.collect::<Result<Vec<_>>>()
|
449
|
+
.unwrap();
|
450
|
+
|
451
|
+
assert_eq!(read_rows.len(), 1);
|
452
|
+
// Maps with struct values now work!
|
453
|
+
}
|
454
|
+
}
|
455
|
+
|
456
|
+
// ====== Writer State Errors ======
|
457
|
+
|
458
|
+
#[test]
|
459
|
+
fn test_writer_multiple_close() {
|
460
|
+
let schema = SchemaBuilder::new()
|
461
|
+
.with_root(SchemaNode::Struct {
|
462
|
+
name: "root".to_string(),
|
463
|
+
nullable: false,
|
464
|
+
fields: vec![SchemaNode::Primitive {
|
465
|
+
name: "value".to_string(),
|
466
|
+
primitive_type: PrimitiveType::Int32,
|
467
|
+
nullable: false,
|
468
|
+
format: None,
|
469
|
+
}],
|
470
|
+
})
|
471
|
+
.build()
|
472
|
+
.unwrap();
|
473
|
+
|
474
|
+
// Test that we can't write after moving the writer into close()
|
475
|
+
let mut buffer = Vec::new();
|
476
|
+
let mut writer = Writer::new(&mut buffer, schema).unwrap();
|
477
|
+
|
478
|
+
// Write some data
|
479
|
+
writer
|
480
|
+
.write_rows(vec![vec![ParquetValue::Int32(1)]])
|
481
|
+
.unwrap();
|
482
|
+
|
483
|
+
// Close consumes the writer, so we can't use it afterwards
|
484
|
+
writer.close().unwrap();
|
485
|
+
|
486
|
+
// The writer has been consumed by close(), so we can't access it anymore
|
487
|
+
// This is enforced at compile time by Rust's ownership system
|
488
|
+
}
|
489
|
+
|
490
|
+
// ====== Invalid Collection Schemas ======
|
491
|
+
|
492
|
+
#[test]
|
493
|
+
fn test_invalid_collection_schemas() {
|
494
|
+
let test_cases = vec![
|
495
|
+
(
|
496
|
+
"list_without_item",
|
497
|
+
SchemaNode::List {
|
498
|
+
name: "invalid_list".to_string(),
|
499
|
+
nullable: false,
|
500
|
+
item: Box::new(SchemaNode::Struct {
|
501
|
+
name: "empty".to_string(),
|
502
|
+
nullable: false,
|
503
|
+
fields: vec![],
|
504
|
+
}),
|
505
|
+
},
|
506
|
+
),
|
507
|
+
(
|
508
|
+
"map_without_value",
|
509
|
+
SchemaNode::Map {
|
510
|
+
name: "invalid_map".to_string(),
|
511
|
+
nullable: false,
|
512
|
+
key: Box::new(SchemaNode::Primitive {
|
513
|
+
name: "key".to_string(),
|
514
|
+
primitive_type: PrimitiveType::String,
|
515
|
+
nullable: false,
|
516
|
+
format: None,
|
517
|
+
}),
|
518
|
+
value: Box::new(SchemaNode::Struct {
|
519
|
+
name: "empty".to_string(),
|
520
|
+
nullable: false,
|
521
|
+
fields: vec![],
|
522
|
+
}),
|
523
|
+
},
|
524
|
+
),
|
525
|
+
];
|
526
|
+
|
527
|
+
for (name, invalid_node) in test_cases {
|
528
|
+
let result = SchemaBuilder::new()
|
529
|
+
.with_root(SchemaNode::Struct {
|
530
|
+
name: "root".to_string(),
|
531
|
+
nullable: false,
|
532
|
+
fields: vec![invalid_node],
|
533
|
+
})
|
534
|
+
.build();
|
535
|
+
|
536
|
+
// Document the behavior for invalid collection schemas
|
537
|
+
match result {
|
538
|
+
Ok(_) => {
|
539
|
+
// Some invalid schemas might be allowed at build time
|
540
|
+
// but fail at write time
|
541
|
+
}
|
542
|
+
Err(e) => {
|
543
|
+
println!("Schema validation for {}: {}", name, e);
|
544
|
+
}
|
545
|
+
}
|
546
|
+
}
|
547
|
+
}
|