parquet-tyfoom 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +1854 -0
  3. data/Cargo.toml +3 -0
  4. data/Gemfile +21 -0
  5. data/LICENSE +21 -0
  6. data/README.md +428 -0
  7. data/Rakefile +43 -0
  8. data/ext/parquet/Cargo.toml +39 -0
  9. data/ext/parquet/build.rs +5 -0
  10. data/ext/parquet/extconf.rb +4 -0
  11. data/ext/parquet/src/adapter_ffi.rs +297 -0
  12. data/ext/parquet/src/allocator.rs +13 -0
  13. data/ext/parquet/src/lib.rs +24 -0
  14. data/ext/parquet-core/Cargo.toml +24 -0
  15. data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
  16. data/ext/parquet-core/src/error.rs +189 -0
  17. data/ext/parquet-core/src/lib.rs +60 -0
  18. data/ext/parquet-core/src/reader.rs +368 -0
  19. data/ext/parquet-core/src/schema.rs +452 -0
  20. data/ext/parquet-core/src/test_utils.rs +308 -0
  21. data/ext/parquet-core/src/traits/mod.rs +5 -0
  22. data/ext/parquet-core/src/traits/schema.rs +190 -0
  23. data/ext/parquet-core/src/value.rs +220 -0
  24. data/ext/parquet-core/src/writer.rs +1241 -0
  25. data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
  26. data/ext/parquet-core/tests/binary_data.rs +437 -0
  27. data/ext/parquet-core/tests/column_projection.rs +557 -0
  28. data/ext/parquet-core/tests/complex_types.rs +821 -0
  29. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  30. data/ext/parquet-core/tests/concurrent_access.rs +431 -0
  31. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  32. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  33. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
  34. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  35. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  36. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  37. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  38. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  39. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  40. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
  41. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  42. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  43. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  44. data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
  45. data/ext/parquet-ruby-adapter/build.rs +5 -0
  46. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  47. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  48. data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
  49. data/ext/parquet-ruby-adapter/src/error.rs +141 -0
  50. data/ext/parquet-ruby-adapter/src/io.rs +432 -0
  51. data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
  52. data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
  53. data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
  54. data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
  55. data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
  56. data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
  57. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  58. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  59. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  60. data/ext/parquet-ruby-adapter/src/types.rs +98 -0
  61. data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
  62. data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
  63. data/lib/parquet/schema.rb +262 -0
  64. data/lib/parquet/version.rb +3 -0
  65. data/lib/parquet.rb +11 -0
  66. data/lib/parquet.rbi +181 -0
  67. metadata +165 -0
@@ -0,0 +1,279 @@
1
+ use bytes::Bytes;
2
+ use num::BigInt;
3
+ use ordered_float::OrderedFloat;
4
+ use parquet_core::*;
5
+ use triomphe::Arc;
6
+
7
+ #[test]
8
+ fn test_all_primitive_types_roundtrip() {
9
+ // Comprehensive test that all primitive types roundtrip correctly
10
+ let schema = SchemaBuilder::new()
11
+ .with_root(SchemaNode::Struct {
12
+ name: "root".to_string(),
13
+ nullable: false,
14
+ fields: vec![
15
+ SchemaNode::Primitive {
16
+ name: "bool_val".to_string(),
17
+ primitive_type: PrimitiveType::Boolean,
18
+ nullable: false,
19
+ format: None,
20
+ },
21
+ SchemaNode::Primitive {
22
+ name: "int8_val".to_string(),
23
+ primitive_type: PrimitiveType::Int8,
24
+ nullable: false,
25
+ format: None,
26
+ },
27
+ SchemaNode::Primitive {
28
+ name: "int16_val".to_string(),
29
+ primitive_type: PrimitiveType::Int16,
30
+ nullable: false,
31
+ format: None,
32
+ },
33
+ SchemaNode::Primitive {
34
+ name: "int32_val".to_string(),
35
+ primitive_type: PrimitiveType::Int32,
36
+ nullable: false,
37
+ format: None,
38
+ },
39
+ SchemaNode::Primitive {
40
+ name: "int64_val".to_string(),
41
+ primitive_type: PrimitiveType::Int64,
42
+ nullable: false,
43
+ format: None,
44
+ },
45
+ SchemaNode::Primitive {
46
+ name: "uint8_val".to_string(),
47
+ primitive_type: PrimitiveType::UInt8,
48
+ nullable: false,
49
+ format: None,
50
+ },
51
+ SchemaNode::Primitive {
52
+ name: "uint16_val".to_string(),
53
+ primitive_type: PrimitiveType::UInt16,
54
+ nullable: false,
55
+ format: None,
56
+ },
57
+ SchemaNode::Primitive {
58
+ name: "uint32_val".to_string(),
59
+ primitive_type: PrimitiveType::UInt32,
60
+ nullable: false,
61
+ format: None,
62
+ },
63
+ SchemaNode::Primitive {
64
+ name: "uint64_val".to_string(),
65
+ primitive_type: PrimitiveType::UInt64,
66
+ nullable: false,
67
+ format: None,
68
+ },
69
+ SchemaNode::Primitive {
70
+ name: "float32_val".to_string(),
71
+ primitive_type: PrimitiveType::Float32,
72
+ nullable: false,
73
+ format: None,
74
+ },
75
+ SchemaNode::Primitive {
76
+ name: "float64_val".to_string(),
77
+ primitive_type: PrimitiveType::Float64,
78
+ nullable: false,
79
+ format: None,
80
+ },
81
+ SchemaNode::Primitive {
82
+ name: "string_val".to_string(),
83
+ primitive_type: PrimitiveType::String,
84
+ nullable: false,
85
+ format: None,
86
+ },
87
+ SchemaNode::Primitive {
88
+ name: "binary_val".to_string(),
89
+ primitive_type: PrimitiveType::Binary,
90
+ nullable: false,
91
+ format: None,
92
+ },
93
+ SchemaNode::Primitive {
94
+ name: "date32_val".to_string(),
95
+ primitive_type: PrimitiveType::Date32,
96
+ nullable: false,
97
+ format: None,
98
+ },
99
+ SchemaNode::Primitive {
100
+ name: "date64_val".to_string(),
101
+ primitive_type: PrimitiveType::Date64,
102
+ nullable: false,
103
+ format: None,
104
+ },
105
+ SchemaNode::Primitive {
106
+ name: "time_millis_val".to_string(),
107
+ primitive_type: PrimitiveType::TimeMillis,
108
+ nullable: false,
109
+ format: None,
110
+ },
111
+ SchemaNode::Primitive {
112
+ name: "time_micros_val".to_string(),
113
+ primitive_type: PrimitiveType::TimeMicros,
114
+ nullable: false,
115
+ format: None,
116
+ },
117
+ SchemaNode::Primitive {
118
+ name: "timestamp_millis_val".to_string(),
119
+ primitive_type: PrimitiveType::TimestampMillis(None),
120
+ nullable: false,
121
+ format: None,
122
+ },
123
+ SchemaNode::Primitive {
124
+ name: "decimal128_val".to_string(),
125
+ primitive_type: PrimitiveType::Decimal128(10, 2),
126
+ nullable: false,
127
+ format: None,
128
+ },
129
+ SchemaNode::Primitive {
130
+ name: "decimal256_val".to_string(),
131
+ primitive_type: PrimitiveType::Decimal256(50, 10),
132
+ nullable: false,
133
+ format: None,
134
+ },
135
+ ],
136
+ })
137
+ .build()
138
+ .unwrap();
139
+
140
+ let rows = vec![vec![
141
+ ParquetValue::Boolean(true),
142
+ ParquetValue::Int8(42),
143
+ ParquetValue::Int16(1000),
144
+ ParquetValue::Int32(100000),
145
+ ParquetValue::Int64(1000000000),
146
+ ParquetValue::UInt8(200),
147
+ ParquetValue::UInt16(50000),
148
+ ParquetValue::UInt32(3000000000),
149
+ ParquetValue::UInt64(10000000000),
150
+ ParquetValue::Float32(OrderedFloat(std::f32::consts::PI)),
151
+ ParquetValue::Float64(OrderedFloat(std::f64::consts::E)),
152
+ ParquetValue::String(Arc::from("Test string 🦀")),
153
+ ParquetValue::Bytes(Bytes::from(vec![0xDE, 0xAD, 0xBE, 0xEF])),
154
+ ParquetValue::Date32(19000),
155
+ ParquetValue::Date64(1640995200000),
156
+ ParquetValue::TimeMillis(43200000),
157
+ ParquetValue::TimeMicros(43200000000),
158
+ ParquetValue::TimestampMillis(1640995200000, None),
159
+ ParquetValue::Decimal128(12345, 2),
160
+ ParquetValue::Decimal256(
161
+ BigInt::parse_bytes(b"1234567890123456789012345678901234567890", 10).unwrap(),
162
+ 10,
163
+ ),
164
+ ]];
165
+
166
+ let mut buffer = Vec::new();
167
+ {
168
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
169
+ writer.write_rows(rows.clone()).unwrap();
170
+ writer.close().unwrap();
171
+ }
172
+
173
+ // Read back and verify
174
+ let bytes = Bytes::from(buffer);
175
+ let reader = Reader::new(bytes);
176
+
177
+ let read_rows: Vec<_> = reader
178
+ .read_rows()
179
+ .unwrap()
180
+ .collect::<Result<Vec<_>>>()
181
+ .unwrap();
182
+
183
+ assert_eq!(rows.len(), read_rows.len());
184
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
185
+ assert_eq!(expected, actual);
186
+ }
187
+ }
188
+
189
+ #[test]
190
+ fn test_empty_collections_roundtrip() {
191
+ // Test that empty lists, maps, and strings roundtrip correctly
192
+ let schema = SchemaBuilder::new()
193
+ .with_root(SchemaNode::Struct {
194
+ name: "root".to_string(),
195
+ nullable: false,
196
+ fields: vec![
197
+ SchemaNode::Primitive {
198
+ name: "empty_string".to_string(),
199
+ primitive_type: PrimitiveType::String,
200
+ nullable: false,
201
+ format: None,
202
+ },
203
+ SchemaNode::Primitive {
204
+ name: "empty_binary".to_string(),
205
+ primitive_type: PrimitiveType::Binary,
206
+ nullable: false,
207
+ format: None,
208
+ },
209
+ SchemaNode::List {
210
+ name: "empty_list".to_string(),
211
+ nullable: false,
212
+ item: Box::new(SchemaNode::Primitive {
213
+ name: "item".to_string(),
214
+ primitive_type: PrimitiveType::Int32,
215
+ nullable: false,
216
+ format: None,
217
+ }),
218
+ },
219
+ SchemaNode::Map {
220
+ name: "empty_map".to_string(),
221
+ nullable: false,
222
+ key: Box::new(SchemaNode::Primitive {
223
+ name: "key".to_string(),
224
+ primitive_type: PrimitiveType::String,
225
+ nullable: false,
226
+ format: None,
227
+ }),
228
+ value: Box::new(SchemaNode::Primitive {
229
+ name: "value".to_string(),
230
+ primitive_type: PrimitiveType::Int32,
231
+ nullable: false,
232
+ format: None,
233
+ }),
234
+ },
235
+ ],
236
+ })
237
+ .build()
238
+ .unwrap();
239
+
240
+ let rows = vec![
241
+ vec![
242
+ ParquetValue::String(Arc::from("")),
243
+ ParquetValue::Bytes(Bytes::from(vec![])),
244
+ ParquetValue::List(vec![]),
245
+ ParquetValue::Map(vec![]),
246
+ ],
247
+ vec![
248
+ ParquetValue::String(Arc::from("not empty")),
249
+ ParquetValue::Bytes(Bytes::from(vec![1, 2, 3])),
250
+ ParquetValue::List(vec![ParquetValue::Int32(42)]),
251
+ ParquetValue::Map(vec![(
252
+ ParquetValue::String(Arc::from("key")),
253
+ ParquetValue::Int32(100),
254
+ )]),
255
+ ],
256
+ ];
257
+
258
+ let mut buffer = Vec::new();
259
+ {
260
+ let mut writer = Writer::new(&mut buffer, schema).unwrap();
261
+ writer.write_rows(rows.clone()).unwrap();
262
+ writer.close().unwrap();
263
+ }
264
+
265
+ // Read back and verify
266
+ let bytes = Bytes::from(buffer);
267
+ let reader = Reader::new(bytes);
268
+
269
+ let read_rows: Vec<_> = reader
270
+ .read_rows()
271
+ .unwrap()
272
+ .collect::<Result<Vec<_>>>()
273
+ .unwrap();
274
+
275
+ assert_eq!(rows.len(), read_rows.len());
276
+ for (expected, actual) in rows.iter().zip(read_rows.iter()) {
277
+ assert_eq!(expected, actual);
278
+ }
279
+ }