parquet 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +8 -5
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -603
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,821 @@
1
+ use bytes::Bytes;
2
+ use indexmap::IndexMap;
3
+ use num::BigInt;
4
+ use ordered_float::OrderedFloat;
5
+ use parquet_core::*;
6
+ use std::sync::Arc;
7
+
8
+ #[test]
9
+ fn test_write_and_read_lists() {
10
+ // Create schema with list fields
11
+ let schema = SchemaBuilder::new()
12
+ .with_root(SchemaNode::Struct {
13
+ name: "root".to_string(),
14
+ nullable: false,
15
+ fields: vec![
16
+ SchemaNode::Primitive {
17
+ name: "id".to_string(),
18
+ primitive_type: PrimitiveType::Int32,
19
+ nullable: false,
20
+ format: None,
21
+ },
22
+ SchemaNode::List {
23
+ name: "tags".to_string(),
24
+ nullable: true,
25
+ item: Box::new(SchemaNode::Primitive {
26
+ name: "tag".to_string(),
27
+ primitive_type: PrimitiveType::String,
28
+ nullable: false,
29
+ format: None,
30
+ }),
31
+ },
32
+ SchemaNode::List {
33
+ name: "scores".to_string(),
34
+ nullable: false,
35
+ item: Box::new(SchemaNode::Primitive {
36
+ name: "score".to_string(),
37
+ primitive_type: PrimitiveType::Float64,
38
+ nullable: true,
39
+ format: None,
40
+ }),
41
+ },
42
+ ],
43
+ })
44
+ .build()
45
+ .unwrap();
46
+
47
+ // Create test data with various list scenarios
48
+ let rows = vec![
49
+ // Row with populated lists
50
+ vec![
51
+ ParquetValue::Int32(1),
52
+ ParquetValue::List(vec![
53
+ ParquetValue::String(Arc::from("rust")),
54
+ ParquetValue::String(Arc::from("parquet")),
55
+ ParquetValue::String(Arc::from("ffi")),
56
+ ]),
57
+ ParquetValue::List(vec![
58
+ ParquetValue::Float64(OrderedFloat(95.5)),
59
+ ParquetValue::Float64(OrderedFloat(87.3)),
60
+ ParquetValue::Null,
61
+ ]),
62
+ ],
63
+ // Row with empty lists
64
+ vec![
65
+ ParquetValue::Int32(2),
66
+ ParquetValue::List(vec![]),
67
+ ParquetValue::List(vec![]),
68
+ ],
69
+ // Row with null list
70
+ vec![
71
+ ParquetValue::Int32(3),
72
+ ParquetValue::Null,
73
+ ParquetValue::List(vec![ParquetValue::Float64(OrderedFloat(100.0))]),
74
+ ],
75
+ // Row with single-element lists
76
+ vec![
77
+ ParquetValue::Int32(4),
78
+ ParquetValue::List(vec![ParquetValue::String(Arc::from("single"))]),
79
+ ParquetValue::List(vec![ParquetValue::Float64(OrderedFloat(42.0))]),
80
+ ],
81
+ ];
82
+
83
+ // Write to buffer
84
+ let mut buffer = Vec::new();
85
+ {
86
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
87
+ writer.write_rows(rows.clone()).unwrap();
88
+ writer.close().unwrap();
89
+ }
90
+
91
+ // Read back and verify
92
+ let bytes = Bytes::from(buffer);
93
+ let reader = Reader::new(bytes);
94
+
95
+ let read_rows: Vec<_> = reader
96
+ .read_rows()
97
+ .unwrap()
98
+ .collect::<Result<Vec<_>>>()
99
+ .unwrap();
100
+
101
+ assert_eq!(rows.len(), read_rows.len());
102
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
103
+ assert_eq!(expected, actual);
104
+ }
105
+ }
106
+
107
+ #[test]
108
+ fn test_write_and_read_maps() {
109
+ // Create schema with map field
110
+ let schema = SchemaBuilder::new()
111
+ .with_root(SchemaNode::Struct {
112
+ name: "root".to_string(),
113
+ nullable: false,
114
+ fields: vec![
115
+ SchemaNode::Primitive {
116
+ name: "user_id".to_string(),
117
+ primitive_type: PrimitiveType::Int64,
118
+ nullable: false,
119
+ format: None,
120
+ },
121
+ SchemaNode::Map {
122
+ name: "attributes".to_string(),
123
+ nullable: true,
124
+ key: Box::new(SchemaNode::Primitive {
125
+ name: "key".to_string(),
126
+ primitive_type: PrimitiveType::String,
127
+ nullable: false,
128
+ format: None,
129
+ }),
130
+ value: Box::new(SchemaNode::Primitive {
131
+ name: "value".to_string(),
132
+ primitive_type: PrimitiveType::String,
133
+ nullable: true,
134
+ format: None,
135
+ }),
136
+ },
137
+ ],
138
+ })
139
+ .build()
140
+ .unwrap();
141
+
142
+ let rows = vec![
143
+ // Row with populated map
144
+ vec![
145
+ ParquetValue::Int64(1001),
146
+ ParquetValue::Map(vec![
147
+ (
148
+ ParquetValue::String(Arc::from("name")),
149
+ ParquetValue::String(Arc::from("Alice")),
150
+ ),
151
+ (
152
+ ParquetValue::String(Arc::from("role")),
153
+ ParquetValue::String(Arc::from("admin")),
154
+ ),
155
+ (
156
+ ParquetValue::String(Arc::from("department")),
157
+ ParquetValue::Null,
158
+ ),
159
+ ]),
160
+ ],
161
+ // Row with empty map
162
+ vec![ParquetValue::Int64(1002), ParquetValue::Map(vec![])],
163
+ // Row with null map
164
+ vec![ParquetValue::Int64(1003), ParquetValue::Null],
165
+ // Row with single-entry map
166
+ vec![
167
+ ParquetValue::Int64(1004),
168
+ ParquetValue::Map(vec![(
169
+ ParquetValue::String(Arc::from("status")),
170
+ ParquetValue::String(Arc::from("active")),
171
+ )]),
172
+ ],
173
+ ];
174
+
175
+ // Write to buffer
176
+ let mut buffer = Vec::new();
177
+ {
178
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
179
+ writer.write_rows(rows.clone()).unwrap();
180
+ writer.close().unwrap();
181
+ }
182
+
183
+ // Read back and verify
184
+ let bytes = Bytes::from(buffer);
185
+ let reader = Reader::new(bytes);
186
+
187
+ let read_rows: Vec<_> = reader
188
+ .read_rows()
189
+ .unwrap()
190
+ .collect::<Result<Vec<_>>>()
191
+ .unwrap();
192
+
193
+ assert_eq!(rows.len(), read_rows.len());
194
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
195
+ assert_eq!(expected, actual);
196
+ }
197
+ }
198
+
199
+ #[test]
200
+ fn test_write_and_read_nested_structs() {
201
+ // Create schema with nested structs
202
+ let schema = SchemaBuilder::new()
203
+ .with_root(SchemaNode::Struct {
204
+ name: "root".to_string(),
205
+ nullable: false,
206
+ fields: vec![
207
+ SchemaNode::Primitive {
208
+ name: "id".to_string(),
209
+ primitive_type: PrimitiveType::Int32,
210
+ nullable: false,
211
+ format: None,
212
+ },
213
+ SchemaNode::Struct {
214
+ name: "address".to_string(),
215
+ nullable: true,
216
+ fields: vec![
217
+ SchemaNode::Primitive {
218
+ name: "street".to_string(),
219
+ primitive_type: PrimitiveType::String,
220
+ nullable: true, // Changed to nullable
221
+ format: None,
222
+ },
223
+ SchemaNode::Primitive {
224
+ name: "city".to_string(),
225
+ primitive_type: PrimitiveType::String,
226
+ nullable: true, // Changed to nullable
227
+ format: None,
228
+ },
229
+ SchemaNode::Struct {
230
+ name: "coordinates".to_string(),
231
+ nullable: true,
232
+ fields: vec![
233
+ SchemaNode::Primitive {
234
+ name: "latitude".to_string(),
235
+ primitive_type: PrimitiveType::Float64,
236
+ nullable: true, // Changed to nullable
237
+ format: None,
238
+ },
239
+ SchemaNode::Primitive {
240
+ name: "longitude".to_string(),
241
+ primitive_type: PrimitiveType::Float64,
242
+ nullable: true, // Changed to nullable
243
+ format: None,
244
+ },
245
+ ],
246
+ },
247
+ ],
248
+ },
249
+ ],
250
+ })
251
+ .build()
252
+ .unwrap();
253
+
254
+ let rows = vec![
255
+ // Row with fully populated nested struct
256
+ vec![
257
+ ParquetValue::Int32(1),
258
+ ParquetValue::Record({
259
+ let mut map = IndexMap::new();
260
+ map.insert(
261
+ Arc::from("street"),
262
+ ParquetValue::String(Arc::from("123 Main St")),
263
+ );
264
+ map.insert(
265
+ Arc::from("city"),
266
+ ParquetValue::String(Arc::from("Seattle")),
267
+ );
268
+ map.insert(
269
+ Arc::from("coordinates"),
270
+ ParquetValue::Record({
271
+ let mut coords = IndexMap::new();
272
+ coords.insert(
273
+ Arc::from("latitude"),
274
+ ParquetValue::Float64(OrderedFloat(47.6062)),
275
+ );
276
+ coords.insert(
277
+ Arc::from("longitude"),
278
+ ParquetValue::Float64(OrderedFloat(-122.3321)),
279
+ );
280
+ coords
281
+ }),
282
+ );
283
+ map
284
+ }),
285
+ ],
286
+ // Row with null nested struct
287
+ vec![ParquetValue::Int32(2), ParquetValue::Null],
288
+ // Row with struct containing all required fields
289
+ vec![
290
+ ParquetValue::Int32(3),
291
+ ParquetValue::Record({
292
+ let mut map = IndexMap::new();
293
+ map.insert(
294
+ Arc::from("street"),
295
+ ParquetValue::String(Arc::from("456 Oak Ave")),
296
+ );
297
+ map.insert(
298
+ Arc::from("city"),
299
+ ParquetValue::String(Arc::from("Portland")),
300
+ );
301
+ // Now we can use null since fields are nullable
302
+ map.insert(Arc::from("coordinates"), ParquetValue::Null);
303
+ map
304
+ }),
305
+ ],
306
+ ];
307
+
308
+ // Write to buffer
309
+ let mut buffer = Vec::new();
310
+ {
311
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
312
+ writer.write_rows(rows.clone()).unwrap();
313
+ writer.close().unwrap();
314
+ }
315
+
316
+ // Read back and verify
317
+ let bytes = Bytes::from(buffer);
318
+ let reader = Reader::new(bytes);
319
+
320
+ let read_rows: Vec<_> = reader
321
+ .read_rows()
322
+ .unwrap()
323
+ .collect::<Result<Vec<_>>>()
324
+ .unwrap();
325
+
326
+ assert_eq!(rows.len(), read_rows.len());
327
+
328
+ // Check first row (fully populated)
329
+ assert_eq!(rows[0], read_rows[0]);
330
+
331
+ // Check second row - when we write a null struct, it reads back as null
332
+ assert_eq!(read_rows[1][0], ParquetValue::Int32(2)); // ID should match
333
+ match &read_rows[1][1] {
334
+ ParquetValue::Null => {
335
+ // Correct - null structs read back as null
336
+ }
337
+ _ => panic!("Expected second row address to be Null"),
338
+ }
339
+
340
+ // Check third row - same issue with null coordinates
341
+ match &read_rows[2][1] {
342
+ ParquetValue::Record(record) => {
343
+ // Check the string fields match
344
+ assert_eq!(
345
+ record.get("street"),
346
+ Some(&ParquetValue::String(Arc::from("456 Oak Ave")))
347
+ );
348
+ assert_eq!(
349
+ record.get("city"),
350
+ Some(&ParquetValue::String(Arc::from("Portland")))
351
+ );
352
+ // Verify coordinates is null (not a struct with null fields)
353
+ match record.get("coordinates") {
354
+ Some(ParquetValue::Null) => {
355
+ // Correct - null nested struct reads back as null
356
+ }
357
+ _ => panic!("Expected coordinates to be Null"),
358
+ }
359
+ }
360
+ _ => panic!("Expected third row address to be a Record"),
361
+ }
362
+ }
363
+
364
+ #[test]
365
+ fn test_complex_list_of_structs() {
366
+ // Create schema with list of structs
367
+ let schema = SchemaBuilder::new()
368
+ .with_root(SchemaNode::Struct {
369
+ name: "root".to_string(),
370
+ nullable: false,
371
+ fields: vec![
372
+ SchemaNode::Primitive {
373
+ name: "order_id".to_string(),
374
+ primitive_type: PrimitiveType::Int64,
375
+ nullable: false,
376
+ format: None,
377
+ },
378
+ SchemaNode::List {
379
+ name: "items".to_string(),
380
+ nullable: false,
381
+ item: Box::new(SchemaNode::Struct {
382
+ name: "item".to_string(),
383
+ nullable: false,
384
+ fields: vec![
385
+ SchemaNode::Primitive {
386
+ name: "product_id".to_string(),
387
+ primitive_type: PrimitiveType::Int32,
388
+ nullable: false,
389
+ format: None,
390
+ },
391
+ SchemaNode::Primitive {
392
+ name: "quantity".to_string(),
393
+ primitive_type: PrimitiveType::Int32,
394
+ nullable: false,
395
+ format: None,
396
+ },
397
+ SchemaNode::Primitive {
398
+ name: "price".to_string(),
399
+ primitive_type: PrimitiveType::Decimal128(10, 2),
400
+ nullable: false,
401
+ format: None,
402
+ },
403
+ ],
404
+ }),
405
+ },
406
+ ],
407
+ })
408
+ .build()
409
+ .unwrap();
410
+
411
+ let rows = vec![
412
+ // Order with multiple items
413
+ vec![
414
+ ParquetValue::Int64(100001),
415
+ ParquetValue::List(vec![
416
+ ParquetValue::Record({
417
+ let mut item = IndexMap::new();
418
+ item.insert(Arc::from("product_id"), ParquetValue::Int32(1));
419
+ item.insert(Arc::from("quantity"), ParquetValue::Int32(2));
420
+ item.insert(Arc::from("price"), ParquetValue::Decimal128(1999, 2));
421
+ item
422
+ }),
423
+ ParquetValue::Record({
424
+ let mut item = IndexMap::new();
425
+ item.insert(Arc::from("product_id"), ParquetValue::Int32(2));
426
+ item.insert(Arc::from("quantity"), ParquetValue::Int32(1));
427
+ item.insert(Arc::from("price"), ParquetValue::Decimal128(4995, 2));
428
+ item
429
+ }),
430
+ ]),
431
+ ],
432
+ // Order with single item
433
+ vec![
434
+ ParquetValue::Int64(100002),
435
+ ParquetValue::List(vec![ParquetValue::Record({
436
+ let mut item = IndexMap::new();
437
+ item.insert(Arc::from("product_id"), ParquetValue::Int32(3));
438
+ item.insert(Arc::from("quantity"), ParquetValue::Int32(5));
439
+ item.insert(Arc::from("price"), ParquetValue::Decimal128(999, 2));
440
+ item
441
+ })]),
442
+ ],
443
+ // Order with no items
444
+ vec![ParquetValue::Int64(100003), ParquetValue::List(vec![])],
445
+ ];
446
+
447
+ // Write to buffer
448
+ let mut buffer = Vec::new();
449
+ {
450
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
451
+ writer.write_rows(rows.clone()).unwrap();
452
+ writer.close().unwrap();
453
+ }
454
+
455
+ // Read back and verify
456
+ let bytes = Bytes::from(buffer);
457
+ let reader = Reader::new(bytes);
458
+
459
+ let read_rows: Vec<_> = reader
460
+ .read_rows()
461
+ .unwrap()
462
+ .collect::<Result<Vec<_>>>()
463
+ .unwrap();
464
+
465
+ assert_eq!(rows.len(), read_rows.len());
466
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
467
+ assert_eq!(expected, actual);
468
+ }
469
+ }
470
+
471
+ #[test]
472
+ fn test_map_with_complex_values() {
473
+ // Create schema with map containing struct values
474
+ let schema = SchemaBuilder::new()
475
+ .with_root(SchemaNode::Struct {
476
+ name: "root".to_string(),
477
+ nullable: false,
478
+ fields: vec![
479
+ SchemaNode::Primitive {
480
+ name: "session_id".to_string(),
481
+ primitive_type: PrimitiveType::String,
482
+ nullable: false,
483
+ format: None,
484
+ },
485
+ SchemaNode::Map {
486
+ name: "metrics".to_string(),
487
+ nullable: false,
488
+ key: Box::new(SchemaNode::Primitive {
489
+ name: "metric_name".to_string(),
490
+ primitive_type: PrimitiveType::String,
491
+ nullable: false,
492
+ format: None,
493
+ }),
494
+ value: Box::new(SchemaNode::Struct {
495
+ name: "metric_data".to_string(),
496
+ nullable: false,
497
+ fields: vec![
498
+ SchemaNode::Primitive {
499
+ name: "value".to_string(),
500
+ primitive_type: PrimitiveType::Float64,
501
+ nullable: false,
502
+ format: None,
503
+ },
504
+ SchemaNode::Primitive {
505
+ name: "unit".to_string(),
506
+ primitive_type: PrimitiveType::String,
507
+ nullable: false,
508
+ format: None,
509
+ },
510
+ SchemaNode::Primitive {
511
+ name: "timestamp".to_string(),
512
+ primitive_type: PrimitiveType::TimestampMillis(None),
513
+ nullable: false,
514
+ format: None,
515
+ },
516
+ ],
517
+ }),
518
+ },
519
+ ],
520
+ })
521
+ .build()
522
+ .unwrap();
523
+
524
+ let rows = vec![vec![
525
+ ParquetValue::String(Arc::from("session-123")),
526
+ ParquetValue::Map(vec![
527
+ (
528
+ ParquetValue::String(Arc::from("cpu_usage")),
529
+ ParquetValue::Record({
530
+ let mut data = IndexMap::new();
531
+ data.insert(
532
+ Arc::from("value"),
533
+ ParquetValue::Float64(OrderedFloat(85.5)),
534
+ );
535
+ data.insert(
536
+ Arc::from("unit"),
537
+ ParquetValue::String(Arc::from("percent")),
538
+ );
539
+ data.insert(
540
+ Arc::from("timestamp"),
541
+ ParquetValue::TimestampMillis(1640000000000, None),
542
+ );
543
+ data
544
+ }),
545
+ ),
546
+ (
547
+ ParquetValue::String(Arc::from("memory_usage")),
548
+ ParquetValue::Record({
549
+ let mut data = IndexMap::new();
550
+ data.insert(
551
+ Arc::from("value"),
552
+ ParquetValue::Float64(OrderedFloat(1024.0)),
553
+ );
554
+ data.insert(Arc::from("unit"), ParquetValue::String(Arc::from("MB")));
555
+ data.insert(
556
+ Arc::from("timestamp"),
557
+ ParquetValue::TimestampMillis(1640000001000, None),
558
+ );
559
+ data
560
+ }),
561
+ ),
562
+ ]),
563
+ ]];
564
+
565
+ // Write to buffer
566
+ let mut buffer = Vec::new();
567
+ {
568
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
569
+ writer.write_rows(rows.clone()).unwrap();
570
+ writer.close().unwrap();
571
+ }
572
+
573
+ // Read back and verify
574
+ let bytes = Bytes::from(buffer);
575
+ let reader = Reader::new(bytes);
576
+
577
+ let read_rows: Vec<_> = reader
578
+ .read_rows()
579
+ .unwrap()
580
+ .collect::<Result<Vec<_>>>()
581
+ .unwrap();
582
+
583
+ assert_eq!(rows.len(), read_rows.len());
584
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
585
+ assert_eq!(expected, actual);
586
+ }
587
+ }
588
+
589
+ #[test]
590
+ fn test_deeply_nested_structures() {
591
+ // Create a deeply nested schema: struct -> list -> struct -> list
592
+ // (avoiding map with struct values which is not supported)
593
+ let schema = SchemaBuilder::new()
594
+ .with_root(SchemaNode::Struct {
595
+ name: "root".to_string(),
596
+ nullable: false,
597
+ fields: vec![
598
+ SchemaNode::Primitive {
599
+ name: "doc_id".to_string(),
600
+ primitive_type: PrimitiveType::String,
601
+ nullable: false,
602
+ format: None,
603
+ },
604
+ SchemaNode::Struct {
605
+ name: "content".to_string(),
606
+ nullable: false,
607
+ fields: vec![SchemaNode::List {
608
+ name: "sections".to_string(),
609
+ nullable: false,
610
+ item: Box::new(SchemaNode::Struct {
611
+ name: "section".to_string(),
612
+ nullable: false,
613
+ fields: vec![
614
+ SchemaNode::Primitive {
615
+ name: "title".to_string(),
616
+ primitive_type: PrimitiveType::String,
617
+ nullable: false,
618
+ format: None,
619
+ },
620
+ SchemaNode::List {
621
+ name: "paragraphs".to_string(),
622
+ nullable: false,
623
+ item: Box::new(SchemaNode::Struct {
624
+ name: "paragraph".to_string(),
625
+ nullable: false,
626
+ fields: vec![
627
+ SchemaNode::Primitive {
628
+ name: "text".to_string(),
629
+ primitive_type: PrimitiveType::String,
630
+ nullable: false,
631
+ format: None,
632
+ },
633
+ SchemaNode::Primitive {
634
+ name: "score".to_string(),
635
+ primitive_type: PrimitiveType::Float32,
636
+ nullable: true,
637
+ format: None,
638
+ },
639
+ ],
640
+ }),
641
+ },
642
+ ],
643
+ }),
644
+ }],
645
+ },
646
+ ],
647
+ })
648
+ .build()
649
+ .unwrap();
650
+
651
+ let rows = vec![vec![
652
+ ParquetValue::String(Arc::from("doc-001")),
653
+ ParquetValue::Record({
654
+ let mut content = IndexMap::new();
655
+ content.insert(
656
+ Arc::from("sections"),
657
+ ParquetValue::List(vec![
658
+ ParquetValue::Record({
659
+ let mut section = IndexMap::new();
660
+ section.insert(
661
+ Arc::from("title"),
662
+ ParquetValue::String(Arc::from("Introduction")),
663
+ );
664
+ section.insert(
665
+ Arc::from("paragraphs"),
666
+ ParquetValue::List(vec![
667
+ ParquetValue::Record({
668
+ let mut para = IndexMap::new();
669
+ para.insert(
670
+ Arc::from("text"),
671
+ ParquetValue::String(Arc::from(
672
+ "Welcome to this document.",
673
+ )),
674
+ );
675
+ para.insert(
676
+ Arc::from("score"),
677
+ ParquetValue::Float32(OrderedFloat(0.95)),
678
+ );
679
+ para
680
+ }),
681
+ ParquetValue::Record({
682
+ let mut para = IndexMap::new();
683
+ para.insert(
684
+ Arc::from("text"),
685
+ ParquetValue::String(Arc::from(
686
+ "This is the second paragraph.",
687
+ )),
688
+ );
689
+ para.insert(Arc::from("score"), ParquetValue::Null);
690
+ para
691
+ }),
692
+ ]),
693
+ );
694
+ section
695
+ }),
696
+ ParquetValue::Record({
697
+ let mut section = IndexMap::new();
698
+ section.insert(
699
+ Arc::from("title"),
700
+ ParquetValue::String(Arc::from("Conclusion")),
701
+ );
702
+ section.insert(
703
+ Arc::from("paragraphs"),
704
+ ParquetValue::List(vec![ParquetValue::Record({
705
+ let mut para = IndexMap::new();
706
+ para.insert(
707
+ Arc::from("text"),
708
+ ParquetValue::String(Arc::from("In summary...")),
709
+ );
710
+ para.insert(
711
+ Arc::from("score"),
712
+ ParquetValue::Float32(OrderedFloat(0.88)),
713
+ );
714
+ para
715
+ })]),
716
+ );
717
+ section
718
+ }),
719
+ ]),
720
+ );
721
+ content
722
+ }),
723
+ ]];
724
+
725
+ // Write to buffer
726
+ let mut buffer = Vec::new();
727
+ {
728
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
729
+ writer.write_rows(rows.clone()).unwrap();
730
+ writer.close().unwrap();
731
+ }
732
+
733
+ // Read back and verify
734
+ let bytes = Bytes::from(buffer);
735
+ let reader = Reader::new(bytes);
736
+
737
+ let read_rows: Vec<_> = reader
738
+ .read_rows()
739
+ .unwrap()
740
+ .collect::<Result<Vec<_>>>()
741
+ .unwrap();
742
+
743
+ assert_eq!(rows.len(), read_rows.len());
744
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
745
+ assert_eq!(expected, actual);
746
+ }
747
+ }
748
+
749
+ #[test]
750
+ fn test_decimal256_complex_type() {
751
+ // Test Decimal256 within complex structures
752
+ let schema = SchemaBuilder::new()
753
+ .with_root(SchemaNode::Struct {
754
+ name: "root".to_string(),
755
+ nullable: false,
756
+ fields: vec![
757
+ SchemaNode::Primitive {
758
+ name: "id".to_string(),
759
+ primitive_type: PrimitiveType::Int32,
760
+ nullable: false,
761
+ format: None,
762
+ },
763
+ SchemaNode::List {
764
+ name: "large_values".to_string(),
765
+ nullable: false,
766
+ item: Box::new(SchemaNode::Primitive {
767
+ name: "value".to_string(),
768
+ primitive_type: PrimitiveType::Decimal256(50, 10),
769
+ nullable: true,
770
+ format: None,
771
+ }),
772
+ },
773
+ ],
774
+ })
775
+ .build()
776
+ .unwrap();
777
+
778
+ let rows = vec![
779
+ vec![
780
+ ParquetValue::Int32(1),
781
+ ParquetValue::List(vec![
782
+ ParquetValue::Decimal256(
783
+ BigInt::parse_bytes(b"123456789012345678901234567890", 10).unwrap(),
784
+ 10,
785
+ ),
786
+ ParquetValue::Decimal256(
787
+ BigInt::parse_bytes(b"-987654321098765432109876543210", 10).unwrap(),
788
+ 10,
789
+ ),
790
+ ParquetValue::Null,
791
+ ]),
792
+ ],
793
+ vec![
794
+ ParquetValue::Int32(2),
795
+ ParquetValue::List(vec![ParquetValue::Decimal256(BigInt::from(0), 10)]),
796
+ ],
797
+ ];
798
+
799
+ // Write to buffer
800
+ let mut buffer = Vec::new();
801
+ {
802
+ let mut writer = Writer::new(&mut buffer, schema.clone()).unwrap();
803
+ writer.write_rows(rows.clone()).unwrap();
804
+ writer.close().unwrap();
805
+ }
806
+
807
+ // Read back and verify
808
+ let bytes = Bytes::from(buffer);
809
+ let reader = Reader::new(bytes);
810
+
811
+ let read_rows: Vec<_> = reader
812
+ .read_rows()
813
+ .unwrap()
814
+ .collect::<Result<Vec<_>>>()
815
+ .unwrap();
816
+
817
+ assert_eq!(rows.len(), read_rows.len());
818
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
819
+ assert_eq!(expected, actual);
820
+ }
821
+ }