parquet-tyfoom 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +1854 -0
  3. data/Cargo.toml +3 -0
  4. data/Gemfile +21 -0
  5. data/LICENSE +21 -0
  6. data/README.md +428 -0
  7. data/Rakefile +43 -0
  8. data/ext/parquet/Cargo.toml +39 -0
  9. data/ext/parquet/build.rs +5 -0
  10. data/ext/parquet/extconf.rb +4 -0
  11. data/ext/parquet/src/adapter_ffi.rs +297 -0
  12. data/ext/parquet/src/allocator.rs +13 -0
  13. data/ext/parquet/src/lib.rs +24 -0
  14. data/ext/parquet-core/Cargo.toml +24 -0
  15. data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
  16. data/ext/parquet-core/src/error.rs +189 -0
  17. data/ext/parquet-core/src/lib.rs +60 -0
  18. data/ext/parquet-core/src/reader.rs +368 -0
  19. data/ext/parquet-core/src/schema.rs +452 -0
  20. data/ext/parquet-core/src/test_utils.rs +308 -0
  21. data/ext/parquet-core/src/traits/mod.rs +5 -0
  22. data/ext/parquet-core/src/traits/schema.rs +190 -0
  23. data/ext/parquet-core/src/value.rs +220 -0
  24. data/ext/parquet-core/src/writer.rs +1241 -0
  25. data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
  26. data/ext/parquet-core/tests/binary_data.rs +437 -0
  27. data/ext/parquet-core/tests/column_projection.rs +557 -0
  28. data/ext/parquet-core/tests/complex_types.rs +821 -0
  29. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  30. data/ext/parquet-core/tests/concurrent_access.rs +431 -0
  31. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  32. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  33. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
  34. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  35. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  36. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  37. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  38. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  39. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  40. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
  41. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  42. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  43. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  44. data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
  45. data/ext/parquet-ruby-adapter/build.rs +5 -0
  46. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  47. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  48. data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
  49. data/ext/parquet-ruby-adapter/src/error.rs +141 -0
  50. data/ext/parquet-ruby-adapter/src/io.rs +432 -0
  51. data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
  52. data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
  53. data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
  54. data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
  55. data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
  56. data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
  57. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  58. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  59. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  60. data/ext/parquet-ruby-adapter/src/types.rs +98 -0
  61. data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
  62. data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
  63. data/lib/parquet/schema.rb +262 -0
  64. data/lib/parquet/version.rb +3 -0
  65. data/lib/parquet.rb +11 -0
  66. data/lib/parquet.rbi +181 -0
  67. metadata +165 -0
@@ -0,0 +1,787 @@
1
+ use arrow_schema::{DataType, Field};
2
+ use bytes::Bytes;
3
+ use indexmap::IndexMap;
4
+ use num::BigInt;
5
+ use parquet_core::arrow_conversion::parquet_values_to_arrow_array;
6
+ use parquet_core::traits::SchemaInspector;
7
+ use parquet_core::*;
8
+ use std::collections::hash_map::DefaultHasher;
9
+ use std::hash::{Hash, Hasher};
10
+ use triomphe::Arc;
11
+
12
+ fn hash_value(value: &ParquetValue) -> u64 {
13
+ let mut hasher = DefaultHasher::new();
14
+ value.hash(&mut hasher);
15
+ hasher.finish()
16
+ }
17
+
18
+ fn single_field_schema(field: SchemaNode) -> Schema {
19
+ SchemaBuilder::new()
20
+ .with_root(SchemaNode::Struct {
21
+ name: "root".to_string(),
22
+ nullable: false,
23
+ fields: vec![field],
24
+ })
25
+ .build()
26
+ .unwrap()
27
+ }
28
+
29
+ #[test]
30
+ fn equal_records_have_equal_hashes_independent_of_insertion_order() {
31
+ let mut left = IndexMap::new();
32
+ left.insert(Arc::from("id"), ParquetValue::Int64(1));
33
+ left.insert(Arc::from("name"), ParquetValue::String(Arc::from("Ada")));
34
+
35
+ let mut right = IndexMap::new();
36
+ right.insert(Arc::from("name"), ParquetValue::String(Arc::from("Ada")));
37
+ right.insert(Arc::from("id"), ParquetValue::Int64(1));
38
+
39
+ let records = (ParquetValue::Record(left), ParquetValue::Record(right));
40
+
41
+ assert_eq!(records.0, records.1);
42
+ assert_eq!(hash_value(&records.0), hash_value(&records.1));
43
+ }
44
+
45
+ #[test]
46
+ fn equal_nested_records_have_equal_hashes_independent_of_insertion_order() {
47
+ // A nested record whose inner record is built in a different field order
48
+ // must still be equal and hash equally at every depth.
49
+ fn inner(order_swapped: bool) -> ParquetValue {
50
+ let mut map = IndexMap::new();
51
+ if order_swapped {
52
+ map.insert(Arc::from("city"), ParquetValue::String(Arc::from("Paris")));
53
+ map.insert(Arc::from("zip"), ParquetValue::Int64(75001));
54
+ } else {
55
+ map.insert(Arc::from("zip"), ParquetValue::Int64(75001));
56
+ map.insert(Arc::from("city"), ParquetValue::String(Arc::from("Paris")));
57
+ }
58
+ ParquetValue::Record(map)
59
+ }
60
+
61
+ let mut left = IndexMap::new();
62
+ left.insert(Arc::from("id"), ParquetValue::Int64(1));
63
+ left.insert(Arc::from("address"), inner(false));
64
+
65
+ let mut right = IndexMap::new();
66
+ right.insert(Arc::from("address"), inner(true));
67
+ right.insert(Arc::from("id"), ParquetValue::Int64(1));
68
+
69
+ let left = ParquetValue::Record(left);
70
+ let right = ParquetValue::Record(right);
71
+
72
+ assert_eq!(left, right);
73
+ assert_eq!(hash_value(&left), hash_value(&right));
74
+ }
75
+
76
+ #[test]
77
+ fn writer_rejects_null_list_items_when_item_schema_is_not_nullable() {
78
+ let schema = SchemaBuilder::new()
79
+ .with_root(SchemaNode::Struct {
80
+ name: "root".to_string(),
81
+ nullable: false,
82
+ fields: vec![SchemaNode::List {
83
+ name: "values".to_string(),
84
+ nullable: false,
85
+ item: Box::new(SchemaNode::Primitive {
86
+ name: "item".to_string(),
87
+ primitive_type: PrimitiveType::Int64,
88
+ nullable: false,
89
+ format: None,
90
+ }),
91
+ }],
92
+ })
93
+ .build()
94
+ .unwrap();
95
+
96
+ let mut buffer = Vec::new();
97
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
98
+ let error = writer
99
+ .write_rows(vec![vec![ParquetValue::List(vec![ParquetValue::Null])]])
100
+ .unwrap_err();
101
+
102
+ assert_eq!(
103
+ error.to_string(),
104
+ "Schema error: Found null value for non-nullable field at row[0][0]"
105
+ );
106
+ }
107
+
108
+ #[test]
109
+ fn writer_rejects_null_map_values_when_value_schema_is_not_nullable() {
110
+ let schema = SchemaBuilder::new()
111
+ .with_root(SchemaNode::Struct {
112
+ name: "root".to_string(),
113
+ nullable: false,
114
+ fields: vec![SchemaNode::Map {
115
+ name: "lookup".to_string(),
116
+ nullable: false,
117
+ key: Box::new(SchemaNode::Primitive {
118
+ name: "key".to_string(),
119
+ primitive_type: PrimitiveType::String,
120
+ nullable: false,
121
+ format: None,
122
+ }),
123
+ value: Box::new(SchemaNode::Primitive {
124
+ name: "value".to_string(),
125
+ primitive_type: PrimitiveType::Int64,
126
+ nullable: false,
127
+ format: None,
128
+ }),
129
+ }],
130
+ })
131
+ .build()
132
+ .unwrap();
133
+
134
+ let mut buffer = Vec::new();
135
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
136
+ let error = writer
137
+ .write_rows(vec![vec![ParquetValue::Map(vec![(
138
+ ParquetValue::String(Arc::from("a")),
139
+ ParquetValue::Null,
140
+ )])]])
141
+ .unwrap_err();
142
+
143
+ assert_eq!(
144
+ error.to_string(),
145
+ "Schema error: Found null value for non-nullable field at row[0].value[0]"
146
+ );
147
+ }
148
+
149
+ fn nested_projection_schema() -> Schema {
150
+ SchemaBuilder::new()
151
+ .with_root(SchemaNode::Struct {
152
+ name: "root".to_string(),
153
+ nullable: false,
154
+ fields: vec![
155
+ SchemaNode::Primitive {
156
+ name: "id".to_string(),
157
+ primitive_type: PrimitiveType::Int64,
158
+ nullable: false,
159
+ format: None,
160
+ },
161
+ SchemaNode::Struct {
162
+ name: "profile".to_string(),
163
+ nullable: false,
164
+ fields: vec![SchemaNode::Primitive {
165
+ name: "name".to_string(),
166
+ primitive_type: PrimitiveType::String,
167
+ nullable: false,
168
+ format: None,
169
+ }],
170
+ },
171
+ ],
172
+ })
173
+ .build()
174
+ .unwrap()
175
+ }
176
+
177
+ fn profile(name: &str) -> ParquetValue {
178
+ let mut fields = IndexMap::new();
179
+ fields.insert(Arc::from("name"), ParquetValue::String(Arc::from(name)));
180
+ ParquetValue::Record(fields)
181
+ }
182
+
183
+ fn nested_projection_file() -> Vec<u8> {
184
+ let rows = vec![
185
+ vec![ParquetValue::Int64(1), profile("Ada")],
186
+ vec![ParquetValue::Int64(2), profile("Grace")],
187
+ ];
188
+
189
+ let mut buffer = Vec::new();
190
+ {
191
+ let mut writer = Writer::new(&mut buffer, nested_projection_schema()).unwrap();
192
+ writer.write_rows(rows).unwrap();
193
+ writer.close().unwrap();
194
+ }
195
+ buffer
196
+ }
197
+
198
+ #[test]
199
+ fn row_projection_decodes_nested_field_with_matching_parquet_field() {
200
+ let reader = Reader::new(Bytes::from(nested_projection_file()));
201
+ let rows = reader
202
+ .read_rows_with_projection(&["profile".to_string()])
203
+ .unwrap()
204
+ .collect::<Result<Vec<_>>>()
205
+ .unwrap();
206
+
207
+ assert_eq!(rows, vec![vec![profile("Ada")], vec![profile("Grace")]]);
208
+ }
209
+
210
+ #[test]
211
+ fn column_projection_decodes_nested_field_with_matching_parquet_field() {
212
+ let reader = Reader::new(Bytes::from(nested_projection_file()));
213
+ let batches = reader
214
+ .read_columns_with_projection(&["profile".to_string()], None)
215
+ .unwrap()
216
+ .collect::<Result<Vec<_>>>()
217
+ .unwrap();
218
+ let columns = batches
219
+ .into_iter()
220
+ .map(|batch| batch.columns)
221
+ .collect::<Vec<_>>();
222
+
223
+ assert_eq!(
224
+ columns,
225
+ vec![vec![(
226
+ "profile".to_string(),
227
+ vec![profile("Ada"), profile("Grace")]
228
+ )]]
229
+ );
230
+ }
231
+
232
+ #[test]
233
+ fn writer_rejects_decimal128_scale_that_disagrees_with_schema() {
234
+ let schema = SchemaBuilder::new()
235
+ .with_root(SchemaNode::Struct {
236
+ name: "root".to_string(),
237
+ nullable: false,
238
+ fields: vec![SchemaNode::Primitive {
239
+ name: "amount".to_string(),
240
+ primitive_type: PrimitiveType::Decimal128(10, 2),
241
+ nullable: false,
242
+ format: None,
243
+ }],
244
+ })
245
+ .build()
246
+ .unwrap();
247
+
248
+ let mut buffer = Vec::new();
249
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
250
+ let error = writer
251
+ .write_rows(vec![vec![ParquetValue::Decimal128(12345, 4)]])
252
+ .unwrap_err();
253
+
254
+ assert_eq!(
255
+ error.to_string(),
256
+ "Schema error: Decimal scale mismatch at row[0]: schema scale 2, value scale 4"
257
+ );
258
+ }
259
+
260
+ #[test]
261
+ fn writer_rejects_decimal128_precision_overflow() {
262
+ let schema = SchemaBuilder::new()
263
+ .with_root(SchemaNode::Struct {
264
+ name: "root".to_string(),
265
+ nullable: false,
266
+ fields: vec![SchemaNode::Primitive {
267
+ name: "amount".to_string(),
268
+ primitive_type: PrimitiveType::Decimal128(5, 2),
269
+ nullable: false,
270
+ format: None,
271
+ }],
272
+ })
273
+ .build()
274
+ .unwrap();
275
+
276
+ let mut buffer = Vec::new();
277
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
278
+ let error = writer
279
+ .write_rows(vec![vec![ParquetValue::Decimal128(100000, 2)]])
280
+ .unwrap_err();
281
+
282
+ assert_eq!(
283
+ error.to_string(),
284
+ "Schema error: Decimal precision overflow at row[0]: schema precision 5, value has 6 digits"
285
+ );
286
+ }
287
+
288
+ #[test]
289
+ fn write_row_rejects_wrong_length_fixed_size_binary_without_poisoning() {
290
+ let schema = single_field_schema(SchemaNode::Primitive {
291
+ name: "payload".to_string(),
292
+ primitive_type: PrimitiveType::FixedLenByteArray(2),
293
+ nullable: false,
294
+ format: None,
295
+ });
296
+
297
+ let mut buffer = Vec::new();
298
+ let mut writer = WriterBuilder::new()
299
+ .with_batch_size(2)
300
+ .build(&mut buffer, schema)
301
+ .unwrap();
302
+
303
+ // The wrong-length value is rejected at write_row, before it can be buffered,
304
+ // so it never poisons a later flush.
305
+ let error = writer
306
+ .write_row(vec![ParquetValue::Bytes(Bytes::from_static(b"x"))])
307
+ .unwrap_err();
308
+ assert_eq!(
309
+ error.to_string(),
310
+ "Schema error: Fixed size binary expected 2 bytes, got 1 at row[0]"
311
+ );
312
+
313
+ // The writer is still usable: a valid row writes and closes cleanly.
314
+ writer
315
+ .write_row(vec![ParquetValue::Bytes(Bytes::from_static(b"ab"))])
316
+ .unwrap();
317
+ writer.close().unwrap();
318
+ }
319
+
320
+ #[test]
321
+ fn column_write_rejects_required_null_with_schema_error() {
322
+ let schema = single_field_schema(SchemaNode::Primitive {
323
+ name: "id".to_string(),
324
+ primitive_type: PrimitiveType::Int64,
325
+ nullable: false,
326
+ format: None,
327
+ });
328
+
329
+ let mut buffer = Vec::new();
330
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
331
+ let error = writer
332
+ .write_columns(vec![("id".to_string(), vec![ParquetValue::Null])])
333
+ .unwrap_err();
334
+
335
+ assert_eq!(
336
+ error.to_string(),
337
+ "Schema error: Found null value for non-nullable field at column 'id'[0]"
338
+ );
339
+ }
340
+
341
+ #[test]
342
+ fn column_write_rejects_missing_required_struct_field_with_schema_error() {
343
+ let schema = single_field_schema(SchemaNode::Struct {
344
+ name: "profile".to_string(),
345
+ nullable: false,
346
+ fields: vec![SchemaNode::Primitive {
347
+ name: "name".to_string(),
348
+ primitive_type: PrimitiveType::String,
349
+ nullable: false,
350
+ format: None,
351
+ }],
352
+ });
353
+
354
+ let mut buffer = Vec::new();
355
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
356
+ let error = writer
357
+ .write_columns(vec![(
358
+ "profile".to_string(),
359
+ vec![ParquetValue::Record(IndexMap::new())],
360
+ )])
361
+ .unwrap_err();
362
+
363
+ assert_eq!(
364
+ error.to_string(),
365
+ "Schema error: Required field 'name' is missing in struct at column 'profile'[0]"
366
+ );
367
+ }
368
+
369
+ #[test]
370
+ fn column_write_rejects_decimal128_scale_with_schema_error() {
371
+ let schema = single_field_schema(SchemaNode::Primitive {
372
+ name: "amount".to_string(),
373
+ primitive_type: PrimitiveType::Decimal128(10, 2),
374
+ nullable: false,
375
+ format: None,
376
+ });
377
+
378
+ let mut buffer = Vec::new();
379
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
380
+ let error = writer
381
+ .write_columns(vec![(
382
+ "amount".to_string(),
383
+ vec![ParquetValue::Decimal128(12345, 4)],
384
+ )])
385
+ .unwrap_err();
386
+
387
+ assert_eq!(
388
+ error.to_string(),
389
+ "Schema error: Decimal scale mismatch at column 'amount'[0]: schema scale 2, value scale 4"
390
+ );
391
+ }
392
+
393
+ #[test]
394
+ fn arrow_decimal_conversion_rejects_scale_that_disagrees_with_array_type() {
395
+ let field = Field::new("amount", DataType::Decimal128(10, 2), false);
396
+ let values = vec![ParquetValue::Decimal128(12345, 4)];
397
+ let error = parquet_values_to_arrow_array(&values, &field).unwrap_err();
398
+
399
+ assert_eq!(
400
+ error.to_string(),
401
+ "Conversion error: Decimal scale mismatch at value[0]: array scale 2, value scale 4"
402
+ );
403
+ }
404
+
405
+ #[test]
406
+ fn writer_rejects_decimal256_precision_overflow() {
407
+ let schema = SchemaBuilder::new()
408
+ .with_root(SchemaNode::Struct {
409
+ name: "root".to_string(),
410
+ nullable: false,
411
+ fields: vec![SchemaNode::Primitive {
412
+ name: "amount".to_string(),
413
+ primitive_type: PrimitiveType::Decimal256(5, 2),
414
+ nullable: false,
415
+ format: None,
416
+ }],
417
+ })
418
+ .build()
419
+ .unwrap();
420
+
421
+ let mut buffer = Vec::new();
422
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
423
+ let error = writer
424
+ .write_rows(vec![vec![ParquetValue::Decimal256(
425
+ BigInt::from(100000),
426
+ 2,
427
+ )]])
428
+ .unwrap_err();
429
+
430
+ assert_eq!(
431
+ error.to_string(),
432
+ "Schema error: Decimal precision overflow at row[0]: schema precision 5, value has 6 digits"
433
+ );
434
+ }
435
+
436
+ #[test]
437
+ fn timestamp_array_uses_field_timezone_instead_of_value_timezone() {
438
+ let field = Field::new(
439
+ "created_at",
440
+ DataType::Timestamp(
441
+ arrow_schema::TimeUnit::Millisecond,
442
+ Some(std::sync::Arc::from("UTC")),
443
+ ),
444
+ false,
445
+ );
446
+
447
+ let values = vec![ParquetValue::TimestampMillis(0, Some(Arc::from("+09:00")))];
448
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
449
+
450
+ assert_eq!(
451
+ array.data_type(),
452
+ &DataType::Timestamp(
453
+ arrow_schema::TimeUnit::Millisecond,
454
+ Some(std::sync::Arc::from("UTC")),
455
+ )
456
+ );
457
+ }
458
+
459
+ #[test]
460
+ fn timestamp_array_without_field_timezone_ignores_value_timezone() {
461
+ let field = Field::new(
462
+ "created_at",
463
+ DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
464
+ false,
465
+ );
466
+
467
+ let values = vec![ParquetValue::TimestampMillis(0, Some(Arc::from("+09:00")))];
468
+ let array = parquet_values_to_arrow_array(&values, &field).unwrap();
469
+
470
+ assert_eq!(
471
+ array.data_type(),
472
+ &DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
473
+ );
474
+ }
475
+
476
+ #[test]
477
+ fn schema_builder_rejects_empty_nested_structs() {
478
+ let error = SchemaBuilder::new()
479
+ .with_root(SchemaNode::Struct {
480
+ name: "root".to_string(),
481
+ nullable: false,
482
+ fields: vec![SchemaNode::Struct {
483
+ name: "empty".to_string(),
484
+ nullable: false,
485
+ fields: vec![],
486
+ }],
487
+ })
488
+ .build()
489
+ .unwrap_err();
490
+
491
+ assert_eq!(
492
+ error,
493
+ "Struct field 'root.empty' must contain at least one field"
494
+ );
495
+ }
496
+
497
+ #[test]
498
+ fn schema_builder_rejects_nullable_map_keys() {
499
+ let error = SchemaBuilder::new()
500
+ .with_root(SchemaNode::Struct {
501
+ name: "root".to_string(),
502
+ nullable: false,
503
+ fields: vec![SchemaNode::Map {
504
+ name: "lookup".to_string(),
505
+ nullable: false,
506
+ key: Box::new(SchemaNode::Primitive {
507
+ name: "key".to_string(),
508
+ primitive_type: PrimitiveType::String,
509
+ nullable: true,
510
+ format: None,
511
+ }),
512
+ value: Box::new(SchemaNode::Primitive {
513
+ name: "value".to_string(),
514
+ primitive_type: PrimitiveType::Int64,
515
+ nullable: false,
516
+ format: None,
517
+ }),
518
+ }],
519
+ })
520
+ .build()
521
+ .unwrap_err();
522
+
523
+ assert_eq!(error, "Map key field 'root.lookup.key' must be required");
524
+ }
525
+
526
+ #[test]
527
+ fn schema_builder_rejects_invalid_fixed_size_binary_lengths() {
528
+ let error = SchemaBuilder::new()
529
+ .with_root(SchemaNode::Struct {
530
+ name: "root".to_string(),
531
+ nullable: false,
532
+ fields: vec![SchemaNode::Primitive {
533
+ name: "payload".to_string(),
534
+ primitive_type: PrimitiveType::FixedLenByteArray(0),
535
+ nullable: false,
536
+ format: None,
537
+ }],
538
+ })
539
+ .build()
540
+ .unwrap_err();
541
+
542
+ assert_eq!(
543
+ error,
544
+ "FixedLenByteArray field 'root.payload' must have a positive length"
545
+ );
546
+ }
547
+
548
+ #[test]
549
+ fn schema_builder_rejects_invalid_decimal_definitions() {
550
+ let scale_error = SchemaBuilder::new()
551
+ .with_root(SchemaNode::Struct {
552
+ name: "root".to_string(),
553
+ nullable: false,
554
+ fields: vec![SchemaNode::Primitive {
555
+ name: "amount".to_string(),
556
+ primitive_type: PrimitiveType::Decimal128(4, 5),
557
+ nullable: false,
558
+ format: None,
559
+ }],
560
+ })
561
+ .build()
562
+ .unwrap_err();
563
+
564
+ assert_eq!(
565
+ scale_error,
566
+ "Decimal128 field 'root.amount' scale 5 cannot exceed precision 4"
567
+ );
568
+
569
+ let precision_error = SchemaBuilder::new()
570
+ .with_root(SchemaNode::Struct {
571
+ name: "root".to_string(),
572
+ nullable: false,
573
+ fields: vec![SchemaNode::Primitive {
574
+ name: "amount".to_string(),
575
+ primitive_type: PrimitiveType::Decimal256(77, 0),
576
+ nullable: false,
577
+ format: None,
578
+ }],
579
+ })
580
+ .build()
581
+ .unwrap_err();
582
+
583
+ assert_eq!(
584
+ precision_error,
585
+ "Decimal256 field 'root.amount' precision 77 exceeds maximum precision 76"
586
+ );
587
+ }
588
+
589
+ #[test]
590
+ fn schema_builder_rejects_uuid_format_on_non_uuid_storage() {
591
+ let error = SchemaBuilder::new()
592
+ .with_root(SchemaNode::Struct {
593
+ name: "root".to_string(),
594
+ nullable: false,
595
+ fields: vec![SchemaNode::Primitive {
596
+ name: "id".to_string(),
597
+ primitive_type: PrimitiveType::FixedLenByteArray(15),
598
+ nullable: false,
599
+ format: Some("uuid".to_string()),
600
+ }],
601
+ })
602
+ .build()
603
+ .unwrap_err();
604
+
605
+ assert_eq!(error, "UUID field 'root.id' must use FixedLenByteArray(16)");
606
+ }
607
+
608
+ #[test]
609
+ fn time_nanos_requires_format_metadata() {
610
+ assert!(PrimitiveType::TimeNanos.requires_format());
611
+ }
612
+
613
+ #[test]
614
+ fn error_context_preserves_error_category() {
615
+ let error = Err::<(), _>(ParquetError::invalid_argument("bad input"))
616
+ .context("During file read")
617
+ .unwrap_err();
618
+
619
+ assert_eq!(
620
+ error.to_string(),
621
+ "Invalid argument: During file read: bad input"
622
+ );
623
+ assert!(matches!(error, ParquetError::InvalidArgument(_)));
624
+ }
625
+
626
+ #[test]
627
+ fn projected_rows_return_requested_columns_in_schema_order() {
628
+ let schema = SchemaBuilder::new()
629
+ .with_root(SchemaNode::Struct {
630
+ name: "root".to_string(),
631
+ nullable: false,
632
+ fields: vec![
633
+ SchemaNode::Primitive {
634
+ name: "a".to_string(),
635
+ primitive_type: PrimitiveType::Int64,
636
+ nullable: false,
637
+ format: None,
638
+ },
639
+ SchemaNode::Primitive {
640
+ name: "b".to_string(),
641
+ primitive_type: PrimitiveType::String,
642
+ nullable: false,
643
+ format: None,
644
+ },
645
+ SchemaNode::Primitive {
646
+ name: "c".to_string(),
647
+ primitive_type: PrimitiveType::Boolean,
648
+ nullable: false,
649
+ format: None,
650
+ },
651
+ ],
652
+ })
653
+ .build()
654
+ .unwrap();
655
+
656
+ let mut buffer = Vec::new();
657
+ {
658
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
659
+ writer
660
+ .write_row(vec![
661
+ ParquetValue::Int64(1),
662
+ ParquetValue::String(Arc::from("one")),
663
+ ParquetValue::Boolean(true),
664
+ ])
665
+ .unwrap();
666
+ writer.close().unwrap();
667
+ }
668
+
669
+ let rows = Reader::new(Bytes::from(buffer))
670
+ .read_rows_with_projection(&["c".to_string(), "a".to_string()])
671
+ .unwrap()
672
+ .collect::<Result<Vec<_>>>()
673
+ .unwrap();
674
+
675
+ assert_eq!(
676
+ rows,
677
+ vec![vec![ParquetValue::Int64(1), ParquetValue::Boolean(true)]]
678
+ );
679
+ }
680
+
681
+ #[test]
682
+ fn all_schema_inspector_paths_resolve_back_to_fields() {
683
+ let schema = SchemaBuilder::new()
684
+ .with_root(SchemaNode::Struct {
685
+ name: "root".to_string(),
686
+ nullable: false,
687
+ fields: vec![
688
+ SchemaNode::Primitive {
689
+ name: "id".to_string(),
690
+ primitive_type: PrimitiveType::Int64,
691
+ nullable: false,
692
+ format: None,
693
+ },
694
+ SchemaNode::Struct {
695
+ name: "address".to_string(),
696
+ nullable: true,
697
+ fields: vec![SchemaNode::Primitive {
698
+ name: "city".to_string(),
699
+ primitive_type: PrimitiveType::String,
700
+ nullable: true,
701
+ format: None,
702
+ }],
703
+ },
704
+ SchemaNode::List {
705
+ name: "tags".to_string(),
706
+ nullable: true,
707
+ item: Box::new(SchemaNode::Primitive {
708
+ name: "element".to_string(),
709
+ primitive_type: PrimitiveType::String,
710
+ nullable: false,
711
+ format: None,
712
+ }),
713
+ },
714
+ SchemaNode::Map {
715
+ name: "attributes".to_string(),
716
+ nullable: true,
717
+ key: Box::new(SchemaNode::Primitive {
718
+ name: "attribute_key".to_string(),
719
+ primitive_type: PrimitiveType::String,
720
+ nullable: false,
721
+ format: None,
722
+ }),
723
+ value: Box::new(SchemaNode::Primitive {
724
+ name: "attribute_value".to_string(),
725
+ primitive_type: PrimitiveType::Int64,
726
+ nullable: true,
727
+ format: None,
728
+ }),
729
+ },
730
+ ],
731
+ })
732
+ .build()
733
+ .unwrap();
734
+
735
+ let resolved = schema
736
+ .all_field_paths()
737
+ .into_iter()
738
+ .map(|path| {
739
+ let field_name = schema
740
+ .get_field_by_path(&path)
741
+ .map(|field| field.name().to_string());
742
+ (path, field_name)
743
+ })
744
+ .collect::<Vec<_>>();
745
+
746
+ assert_eq!(
747
+ resolved,
748
+ vec![
749
+ ("root".to_string(), Some("root".to_string())),
750
+ ("root.id".to_string(), Some("id".to_string())),
751
+ ("root.address".to_string(), Some("address".to_string())),
752
+ ("root.address.city".to_string(), Some("city".to_string())),
753
+ ("root.tags".to_string(), Some("tags".to_string())),
754
+ ("root.tags.element".to_string(), Some("element".to_string())),
755
+ (
756
+ "root.attributes".to_string(),
757
+ Some("attributes".to_string())
758
+ ),
759
+ (
760
+ "root.attributes.attribute_key".to_string(),
761
+ Some("attribute_key".to_string())
762
+ ),
763
+ (
764
+ "root.attributes.attribute_value".to_string(),
765
+ Some("attribute_value".to_string())
766
+ ),
767
+ ]
768
+ );
769
+ assert_eq!(
770
+ schema
771
+ .get_field_by_path("address.city")
772
+ .map(SchemaNode::name),
773
+ Some("city")
774
+ );
775
+ assert_eq!(
776
+ schema
777
+ .get_field_by_path("tags.element")
778
+ .map(SchemaNode::name),
779
+ Some("element")
780
+ );
781
+ assert_eq!(
782
+ schema
783
+ .get_field_by_path("attributes.attribute_value")
784
+ .map(SchemaNode::name),
785
+ Some("attribute_value")
786
+ );
787
+ }