parquet-tyfoom 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +1854 -0
  3. data/Cargo.toml +3 -0
  4. data/Gemfile +21 -0
  5. data/LICENSE +21 -0
  6. data/README.md +428 -0
  7. data/Rakefile +43 -0
  8. data/ext/parquet/Cargo.toml +39 -0
  9. data/ext/parquet/build.rs +5 -0
  10. data/ext/parquet/extconf.rb +4 -0
  11. data/ext/parquet/src/adapter_ffi.rs +297 -0
  12. data/ext/parquet/src/allocator.rs +13 -0
  13. data/ext/parquet/src/lib.rs +24 -0
  14. data/ext/parquet-core/Cargo.toml +24 -0
  15. data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
  16. data/ext/parquet-core/src/error.rs +189 -0
  17. data/ext/parquet-core/src/lib.rs +60 -0
  18. data/ext/parquet-core/src/reader.rs +368 -0
  19. data/ext/parquet-core/src/schema.rs +452 -0
  20. data/ext/parquet-core/src/test_utils.rs +308 -0
  21. data/ext/parquet-core/src/traits/mod.rs +5 -0
  22. data/ext/parquet-core/src/traits/schema.rs +190 -0
  23. data/ext/parquet-core/src/value.rs +220 -0
  24. data/ext/parquet-core/src/writer.rs +1241 -0
  25. data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
  26. data/ext/parquet-core/tests/binary_data.rs +437 -0
  27. data/ext/parquet-core/tests/column_projection.rs +557 -0
  28. data/ext/parquet-core/tests/complex_types.rs +821 -0
  29. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  30. data/ext/parquet-core/tests/concurrent_access.rs +431 -0
  31. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  32. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  33. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
  34. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  35. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  36. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  37. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  38. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  39. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  40. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
  41. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  42. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  43. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  44. data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
  45. data/ext/parquet-ruby-adapter/build.rs +5 -0
  46. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  47. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  48. data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
  49. data/ext/parquet-ruby-adapter/src/error.rs +141 -0
  50. data/ext/parquet-ruby-adapter/src/io.rs +432 -0
  51. data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
  52. data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
  53. data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
  54. data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
  55. data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
  56. data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
  57. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  58. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  59. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  60. data/ext/parquet-ruby-adapter/src/types.rs +98 -0
  61. data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
  62. data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
  63. data/lib/parquet/schema.rb +262 -0
  64. data/lib/parquet/version.rb +3 -0
  65. data/lib/parquet.rb +11 -0
  66. data/lib/parquet.rbi +181 -0
  67. metadata +165 -0
@@ -0,0 +1,542 @@
1
+ use bytes::Bytes;
2
+ use parquet_core::*;
3
+
4
+ mod test_helpers;
5
+
6
+ // ====== Schema Builder Tests ======
7
+
8
+ #[test]
9
+ fn test_schema_builder_error_cases() {
10
+ // Test building without root node
11
+ let result = SchemaBuilder::new().build();
12
+ assert!(result.is_err());
13
+ assert_eq!(result.unwrap_err(), "Schema must have a root node");
14
+ }
15
+
16
+ #[test]
17
+ fn test_schema_builder_default() {
18
+ let builder1 = SchemaBuilder::new();
19
+ let builder2 = SchemaBuilder::default();
20
+
21
+ // Both should fail with the same error when building without a root
22
+ let result1 = builder1.build();
23
+ let result2 = builder2.build();
24
+
25
+ assert!(result1.is_err());
26
+ assert!(result2.is_err());
27
+ assert_eq!(result1.unwrap_err(), result2.unwrap_err());
28
+ }
29
+
30
+ #[test]
31
+ fn test_schema_equality() {
32
+ let schema1 = SchemaBuilder::new()
33
+ .with_root(SchemaNode::Struct {
34
+ name: "root".to_string(),
35
+ nullable: false,
36
+ fields: vec![
37
+ SchemaNode::Primitive {
38
+ name: "id".to_string(),
39
+ primitive_type: PrimitiveType::Int64,
40
+ nullable: false,
41
+ format: None,
42
+ },
43
+ SchemaNode::Primitive {
44
+ name: "name".to_string(),
45
+ primitive_type: PrimitiveType::String,
46
+ nullable: true,
47
+ format: None,
48
+ },
49
+ ],
50
+ })
51
+ .build()
52
+ .unwrap();
53
+
54
+ let schema2 = SchemaBuilder::new()
55
+ .with_root(SchemaNode::Struct {
56
+ name: "root".to_string(),
57
+ nullable: false,
58
+ fields: vec![
59
+ SchemaNode::Primitive {
60
+ name: "id".to_string(),
61
+ primitive_type: PrimitiveType::Int64,
62
+ nullable: false,
63
+ format: None,
64
+ },
65
+ SchemaNode::Primitive {
66
+ name: "name".to_string(),
67
+ primitive_type: PrimitiveType::String,
68
+ nullable: true,
69
+ format: None,
70
+ },
71
+ ],
72
+ })
73
+ .build()
74
+ .unwrap();
75
+
76
+ assert_eq!(schema1, schema2);
77
+ }
78
+
79
+ #[test]
80
+ fn test_schema_inequality() {
81
+ let schema1 = SchemaBuilder::new()
82
+ .with_root(SchemaNode::Struct {
83
+ name: "root".to_string(),
84
+ nullable: false,
85
+ fields: vec![SchemaNode::Primitive {
86
+ name: "id".to_string(),
87
+ primitive_type: PrimitiveType::Int64,
88
+ nullable: false,
89
+ format: None,
90
+ }],
91
+ })
92
+ .build()
93
+ .unwrap();
94
+
95
+ let schema2 = SchemaBuilder::new()
96
+ .with_root(SchemaNode::Struct {
97
+ name: "root".to_string(),
98
+ nullable: false,
99
+ fields: vec![SchemaNode::Primitive {
100
+ name: "id".to_string(),
101
+ primitive_type: PrimitiveType::Int32,
102
+ nullable: false,
103
+ format: None,
104
+ }],
105
+ })
106
+ .build()
107
+ .unwrap();
108
+
109
+ assert_ne!(schema1, schema2);
110
+ }
111
+
112
+ // ====== Complex Schema Construction Tests ======
113
+
114
+ #[test]
115
+ fn test_deeply_nested_schema_construction() {
116
+ let inner_struct = SchemaNode::Struct {
117
+ name: "inner".to_string(),
118
+ nullable: true,
119
+ fields: vec![SchemaNode::Primitive {
120
+ name: "value".to_string(),
121
+ primitive_type: PrimitiveType::String,
122
+ nullable: false,
123
+ format: None,
124
+ }],
125
+ };
126
+
127
+ let list_of_structs = SchemaNode::List {
128
+ name: "list".to_string(),
129
+ nullable: false,
130
+ item: Box::new(inner_struct),
131
+ };
132
+
133
+ let map_with_complex_value = SchemaNode::Map {
134
+ name: "map".to_string(),
135
+ nullable: true,
136
+ key: Box::new(SchemaNode::Primitive {
137
+ name: "key".to_string(),
138
+ primitive_type: PrimitiveType::String,
139
+ nullable: false,
140
+ format: None,
141
+ }),
142
+ value: Box::new(list_of_structs),
143
+ };
144
+
145
+ let schema = SchemaBuilder::new()
146
+ .with_root(SchemaNode::Struct {
147
+ name: "root".to_string(),
148
+ nullable: false,
149
+ fields: vec![map_with_complex_value],
150
+ })
151
+ .build()
152
+ .unwrap();
153
+
154
+ assert_eq!(schema.root.name(), "root");
155
+ }
156
+
157
+ #[test]
158
+ fn test_complex_schema_with_all_node_types() {
159
+ let schema = SchemaBuilder::new()
160
+ .with_root(SchemaNode::Struct {
161
+ name: "root".to_string(),
162
+ nullable: false,
163
+ fields: vec![
164
+ SchemaNode::Primitive {
165
+ name: "id".to_string(),
166
+ primitive_type: PrimitiveType::Int64,
167
+ nullable: false,
168
+ format: None,
169
+ },
170
+ SchemaNode::List {
171
+ name: "tags".to_string(),
172
+ nullable: true,
173
+ item: Box::new(SchemaNode::Primitive {
174
+ name: "tag".to_string(),
175
+ primitive_type: PrimitiveType::String,
176
+ nullable: false,
177
+ format: None,
178
+ }),
179
+ },
180
+ SchemaNode::Map {
181
+ name: "metadata".to_string(),
182
+ nullable: false,
183
+ key: Box::new(SchemaNode::Primitive {
184
+ name: "key".to_string(),
185
+ primitive_type: PrimitiveType::String,
186
+ nullable: false,
187
+ format: None,
188
+ }),
189
+ value: Box::new(SchemaNode::Primitive {
190
+ name: "value".to_string(),
191
+ primitive_type: PrimitiveType::String,
192
+ nullable: true,
193
+ format: None,
194
+ }),
195
+ },
196
+ SchemaNode::Struct {
197
+ name: "nested".to_string(),
198
+ nullable: true,
199
+ fields: vec![
200
+ SchemaNode::Primitive {
201
+ name: "field1".to_string(),
202
+ primitive_type: PrimitiveType::Float64,
203
+ nullable: false,
204
+ format: None,
205
+ },
206
+ SchemaNode::Primitive {
207
+ name: "field2".to_string(),
208
+ primitive_type: PrimitiveType::Boolean,
209
+ nullable: true,
210
+ format: None,
211
+ },
212
+ ],
213
+ },
214
+ ],
215
+ })
216
+ .build()
217
+ .unwrap();
218
+
219
+ assert_eq!(schema.root.name(), "root");
220
+ assert!(!schema.root.is_nullable());
221
+ }
222
+
223
+ // ====== Primitive Type Tests ======
224
+
225
+ #[test]
226
+ fn test_primitive_type_names_and_format_requirements() {
227
+ // Test type_name() for all types
228
+ assert_eq!(PrimitiveType::Int8.type_name(), "Int8");
229
+ assert_eq!(PrimitiveType::Int16.type_name(), "Int16");
230
+ assert_eq!(PrimitiveType::Int32.type_name(), "Int32");
231
+ assert_eq!(PrimitiveType::Int64.type_name(), "Int64");
232
+ assert_eq!(PrimitiveType::UInt8.type_name(), "UInt8");
233
+ assert_eq!(PrimitiveType::UInt16.type_name(), "UInt16");
234
+ assert_eq!(PrimitiveType::UInt32.type_name(), "UInt32");
235
+ assert_eq!(PrimitiveType::UInt64.type_name(), "UInt64");
236
+ assert_eq!(PrimitiveType::Float32.type_name(), "Float32");
237
+ assert_eq!(PrimitiveType::Float64.type_name(), "Float64");
238
+ assert_eq!(PrimitiveType::Decimal128(10, 2).type_name(), "Decimal128");
239
+ assert_eq!(PrimitiveType::Decimal256(20, 4).type_name(), "Decimal256");
240
+ assert_eq!(PrimitiveType::Boolean.type_name(), "Boolean");
241
+ assert_eq!(PrimitiveType::String.type_name(), "String");
242
+ assert_eq!(PrimitiveType::Binary.type_name(), "Binary");
243
+ assert_eq!(PrimitiveType::Date32.type_name(), "Date32");
244
+ assert_eq!(PrimitiveType::Date64.type_name(), "Date64");
245
+ assert_eq!(
246
+ PrimitiveType::TimestampSecond(None).type_name(),
247
+ "TimestampSecond"
248
+ );
249
+ assert_eq!(
250
+ PrimitiveType::TimestampMillis(None).type_name(),
251
+ "TimestampMillis"
252
+ );
253
+ assert_eq!(
254
+ PrimitiveType::TimestampMicros(None).type_name(),
255
+ "TimestampMicros"
256
+ );
257
+ assert_eq!(
258
+ PrimitiveType::TimestampNanos(None).type_name(),
259
+ "TimestampNanos"
260
+ );
261
+ assert_eq!(PrimitiveType::TimeMillis.type_name(), "TimeMillis");
262
+ assert_eq!(PrimitiveType::TimeMicros.type_name(), "TimeMicros");
263
+ assert_eq!(PrimitiveType::TimeNanos.type_name(), "TimeNanos");
264
+ assert_eq!(
265
+ PrimitiveType::FixedLenByteArray(16).type_name(),
266
+ "FixedLenByteArray"
267
+ );
268
+
269
+ // Test requires_format()
270
+ assert!(!PrimitiveType::Int32.requires_format());
271
+ assert!(!PrimitiveType::String.requires_format());
272
+ assert!(!PrimitiveType::Binary.requires_format());
273
+ assert!(!PrimitiveType::Decimal128(10, 2).requires_format());
274
+ assert!(!PrimitiveType::FixedLenByteArray(16).requires_format());
275
+
276
+ assert!(PrimitiveType::Date32.requires_format());
277
+ assert!(PrimitiveType::Date64.requires_format());
278
+ assert!(PrimitiveType::TimestampSecond(None).requires_format());
279
+ assert!(PrimitiveType::TimestampMillis(None).requires_format());
280
+ assert!(PrimitiveType::TimestampMicros(None).requires_format());
281
+ assert!(PrimitiveType::TimestampNanos(None).requires_format());
282
+ assert!(PrimitiveType::TimeMillis.requires_format());
283
+ assert!(PrimitiveType::TimeMicros.requires_format());
284
+ assert!(PrimitiveType::TimeNanos.requires_format());
285
+ }
286
+
287
+ #[test]
288
+ fn test_repetition_from_nullability() {
289
+ let nullable_node = SchemaNode::Primitive {
290
+ name: "nullable".to_string(),
291
+ primitive_type: PrimitiveType::String,
292
+ nullable: true,
293
+ format: None,
294
+ };
295
+ assert_eq!(nullable_node.repetition(), Repetition::Optional);
296
+
297
+ let required_node = SchemaNode::Primitive {
298
+ name: "required".to_string(),
299
+ primitive_type: PrimitiveType::String,
300
+ nullable: false,
301
+ format: None,
302
+ };
303
+ assert_eq!(required_node.repetition(), Repetition::Required);
304
+
305
+ let nullable_struct = SchemaNode::Struct {
306
+ name: "struct".to_string(),
307
+ nullable: true,
308
+ fields: vec![],
309
+ };
310
+ assert_eq!(nullable_struct.repetition(), Repetition::Optional);
311
+
312
+ let nullable_list = SchemaNode::List {
313
+ name: "list".to_string(),
314
+ nullable: true,
315
+ item: Box::new(required_node.clone()),
316
+ };
317
+ assert_eq!(nullable_list.repetition(), Repetition::Optional);
318
+
319
+ let nullable_map = SchemaNode::Map {
320
+ name: "map".to_string(),
321
+ nullable: true,
322
+ key: Box::new(required_node.clone()),
323
+ value: Box::new(nullable_node.clone()),
324
+ };
325
+ assert_eq!(nullable_map.repetition(), Repetition::Optional);
326
+ }
327
+
328
+ // ====== Empty File Handling Test ======
329
+
330
+ #[test]
331
+ fn test_empty_file_handling() {
332
+ let schema = SchemaBuilder::new()
333
+ .with_root(SchemaNode::Struct {
334
+ name: "root".to_string(),
335
+ nullable: false,
336
+ fields: vec![SchemaNode::Primitive {
337
+ name: "id".to_string(),
338
+ primitive_type: PrimitiveType::Int64,
339
+ nullable: false,
340
+ format: None,
341
+ }],
342
+ })
343
+ .build()
344
+ .unwrap();
345
+
346
+ let mut buffer = Vec::new();
347
+ {
348
+ let writer = Writer::new(&mut buffer, schema).unwrap();
349
+ // Close without writing any rows
350
+ writer.close().unwrap();
351
+ }
352
+
353
+ // Try to read empty file
354
+ let bytes = Bytes::from(buffer);
355
+ let reader = Reader::new(bytes);
356
+
357
+ let rows: Vec<_> = reader
358
+ .read_rows()
359
+ .unwrap()
360
+ .collect::<Result<Vec<_>>>()
361
+ .unwrap();
362
+
363
+ assert_eq!(rows.len(), 0);
364
+ }
365
+
366
+ // ====== All Primitive Types Test ======
367
+ // Using the comprehensive version from schema_builder_tests.rs which includes all types
368
+
369
+ #[test]
370
+ fn test_all_primitive_types_in_schema() {
371
+ let fields = vec![
372
+ SchemaNode::Primitive {
373
+ name: "int8".to_string(),
374
+ primitive_type: PrimitiveType::Int8,
375
+ nullable: false,
376
+ format: None,
377
+ },
378
+ SchemaNode::Primitive {
379
+ name: "int16".to_string(),
380
+ primitive_type: PrimitiveType::Int16,
381
+ nullable: false,
382
+ format: None,
383
+ },
384
+ SchemaNode::Primitive {
385
+ name: "int32".to_string(),
386
+ primitive_type: PrimitiveType::Int32,
387
+ nullable: false,
388
+ format: None,
389
+ },
390
+ SchemaNode::Primitive {
391
+ name: "int64".to_string(),
392
+ primitive_type: PrimitiveType::Int64,
393
+ nullable: false,
394
+ format: None,
395
+ },
396
+ SchemaNode::Primitive {
397
+ name: "uint8".to_string(),
398
+ primitive_type: PrimitiveType::UInt8,
399
+ nullable: false,
400
+ format: None,
401
+ },
402
+ SchemaNode::Primitive {
403
+ name: "uint16".to_string(),
404
+ primitive_type: PrimitiveType::UInt16,
405
+ nullable: false,
406
+ format: None,
407
+ },
408
+ SchemaNode::Primitive {
409
+ name: "uint32".to_string(),
410
+ primitive_type: PrimitiveType::UInt32,
411
+ nullable: false,
412
+ format: None,
413
+ },
414
+ SchemaNode::Primitive {
415
+ name: "uint64".to_string(),
416
+ primitive_type: PrimitiveType::UInt64,
417
+ nullable: false,
418
+ format: None,
419
+ },
420
+ SchemaNode::Primitive {
421
+ name: "float32".to_string(),
422
+ primitive_type: PrimitiveType::Float32,
423
+ nullable: false,
424
+ format: None,
425
+ },
426
+ SchemaNode::Primitive {
427
+ name: "float64".to_string(),
428
+ primitive_type: PrimitiveType::Float64,
429
+ nullable: false,
430
+ format: None,
431
+ },
432
+ SchemaNode::Primitive {
433
+ name: "decimal128".to_string(),
434
+ primitive_type: PrimitiveType::Decimal128(38, 10),
435
+ nullable: false,
436
+ format: None,
437
+ },
438
+ SchemaNode::Primitive {
439
+ name: "decimal256".to_string(),
440
+ primitive_type: PrimitiveType::Decimal256(76, 20),
441
+ nullable: false,
442
+ format: None,
443
+ },
444
+ SchemaNode::Primitive {
445
+ name: "boolean".to_string(),
446
+ primitive_type: PrimitiveType::Boolean,
447
+ nullable: false,
448
+ format: None,
449
+ },
450
+ SchemaNode::Primitive {
451
+ name: "string".to_string(),
452
+ primitive_type: PrimitiveType::String,
453
+ nullable: false,
454
+ format: None,
455
+ },
456
+ SchemaNode::Primitive {
457
+ name: "binary".to_string(),
458
+ primitive_type: PrimitiveType::Binary,
459
+ nullable: false,
460
+ format: None,
461
+ },
462
+ SchemaNode::Primitive {
463
+ name: "date32".to_string(),
464
+ primitive_type: PrimitiveType::Date32,
465
+ nullable: false,
466
+ format: Some("date".to_string()),
467
+ },
468
+ SchemaNode::Primitive {
469
+ name: "date64".to_string(),
470
+ primitive_type: PrimitiveType::Date64,
471
+ nullable: false,
472
+ format: Some("date".to_string()),
473
+ },
474
+ SchemaNode::Primitive {
475
+ name: "timestamp_second".to_string(),
476
+ primitive_type: PrimitiveType::TimestampSecond(None),
477
+ nullable: false,
478
+ format: Some("timestamp".to_string()),
479
+ },
480
+ SchemaNode::Primitive {
481
+ name: "timestamp_millis".to_string(),
482
+ primitive_type: PrimitiveType::TimestampMillis(None),
483
+ nullable: false,
484
+ format: Some("timestamp".to_string()),
485
+ },
486
+ SchemaNode::Primitive {
487
+ name: "timestamp_micros".to_string(),
488
+ primitive_type: PrimitiveType::TimestampMicros(None),
489
+ nullable: false,
490
+ format: Some("timestamp".to_string()),
491
+ },
492
+ SchemaNode::Primitive {
493
+ name: "timestamp_nanos".to_string(),
494
+ primitive_type: PrimitiveType::TimestampNanos(None),
495
+ nullable: false,
496
+ format: Some("timestamp".to_string()),
497
+ },
498
+ SchemaNode::Primitive {
499
+ name: "time_millis".to_string(),
500
+ primitive_type: PrimitiveType::TimeMillis,
501
+ nullable: false,
502
+ format: Some("time".to_string()),
503
+ },
504
+ SchemaNode::Primitive {
505
+ name: "time_micros".to_string(),
506
+ primitive_type: PrimitiveType::TimeMicros,
507
+ nullable: false,
508
+ format: Some("time".to_string()),
509
+ },
510
+ SchemaNode::Primitive {
511
+ name: "time_nanos".to_string(),
512
+ primitive_type: PrimitiveType::TimeNanos,
513
+ nullable: false,
514
+ format: Some("time".to_string()),
515
+ },
516
+ SchemaNode::Primitive {
517
+ name: "fixed_len_byte_array".to_string(),
518
+ primitive_type: PrimitiveType::FixedLenByteArray(16),
519
+ nullable: false,
520
+ format: None,
521
+ },
522
+ ];
523
+
524
+ let schema = SchemaBuilder::new()
525
+ .with_root(SchemaNode::Struct {
526
+ name: "root".to_string(),
527
+ nullable: false,
528
+ fields,
529
+ })
530
+ .build()
531
+ .unwrap();
532
+
533
+ assert_eq!(schema.root.name(), "root");
534
+
535
+ // Verify we can create a writer with this schema
536
+ let mut buffer = Vec::new();
537
+ let writer_result = Writer::new(&mut buffer, schema);
538
+ assert!(
539
+ writer_result.is_ok(),
540
+ "Should be able to create writer with all primitive types"
541
+ );
542
+ }