parquet 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +8 -5
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -603
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,283 @@
1
+ use std::sync::Arc;
2
+
3
+ /// Core schema representation for Parquet files
4
+ #[derive(Debug, Clone, PartialEq)]
5
+ pub struct Schema {
6
+ pub root: SchemaNode,
7
+ }
8
+
9
+ /// Represents a node in the Parquet schema tree
10
+ #[derive(Debug, Clone, PartialEq)]
11
+ pub enum SchemaNode {
12
+ /// A struct with named fields
13
+ Struct {
14
+ name: String,
15
+ nullable: bool,
16
+ fields: Vec<SchemaNode>,
17
+ },
18
+ /// A list containing items of a single type
19
+ List {
20
+ name: String,
21
+ nullable: bool,
22
+ item: Box<SchemaNode>,
23
+ },
24
+ /// A map with key-value pairs
25
+ Map {
26
+ name: String,
27
+ nullable: bool,
28
+ key: Box<SchemaNode>,
29
+ value: Box<SchemaNode>,
30
+ },
31
+ /// A primitive/leaf type
32
+ Primitive {
33
+ name: String,
34
+ primitive_type: PrimitiveType,
35
+ nullable: bool,
36
+ format: Option<String>,
37
+ },
38
+ }
39
+
40
+ /// Primitive data types supported by Parquet
41
+ #[derive(Debug, Clone, PartialEq, Eq, Hash)]
42
+ pub enum PrimitiveType {
43
+ // Integer types
44
+ Int8,
45
+ Int16,
46
+ Int32,
47
+ Int64,
48
+ UInt8,
49
+ UInt16,
50
+ UInt32,
51
+ UInt64,
52
+
53
+ // Floating point types
54
+ Float32,
55
+ Float64,
56
+
57
+ // Decimal types (precision, scale)
58
+ Decimal128(u8, i8),
59
+ Decimal256(u8, i8),
60
+
61
+ // Other basic types
62
+ Boolean,
63
+ String,
64
+ Binary,
65
+
66
+ // Date/Time types
67
+ Date32,
68
+ Date64,
69
+ TimestampSecond(Option<Arc<str>>),
70
+ TimestampMillis(Option<Arc<str>>),
71
+ TimestampMicros(Option<Arc<str>>),
72
+ TimestampNanos(Option<Arc<str>>),
73
+ TimeMillis,
74
+ TimeMicros,
75
+
76
+ // Fixed-length byte array
77
+ FixedLenByteArray(i32),
78
+ }
79
+
80
+ /// Represents how values are repeated in Parquet
81
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
82
+ pub enum Repetition {
83
+ /// Field must have exactly one value
84
+ Required,
85
+ /// Field can have 0 or 1 value
86
+ Optional,
87
+ /// Field can have 0 or more values
88
+ Repeated,
89
+ }
90
+
91
+ impl SchemaNode {
92
+ /// Get the name of this schema node
93
+ pub fn name(&self) -> &str {
94
+ match self {
95
+ SchemaNode::Struct { name, .. } => name,
96
+ SchemaNode::List { name, .. } => name,
97
+ SchemaNode::Map { name, .. } => name,
98
+ SchemaNode::Primitive { name, .. } => name,
99
+ }
100
+ }
101
+
102
+ /// Check if this node is nullable
103
+ pub fn is_nullable(&self) -> bool {
104
+ match self {
105
+ SchemaNode::Struct { nullable, .. } => *nullable,
106
+ SchemaNode::List { nullable, .. } => *nullable,
107
+ SchemaNode::Map { nullable, .. } => *nullable,
108
+ SchemaNode::Primitive { nullable, .. } => *nullable,
109
+ }
110
+ }
111
+
112
+ /// Get the repetition level based on nullability
113
+ pub fn repetition(&self) -> Repetition {
114
+ if self.is_nullable() {
115
+ Repetition::Optional
116
+ } else {
117
+ Repetition::Required
118
+ }
119
+ }
120
+ }
121
+
122
+ impl PrimitiveType {
123
+ /// Get the logical type name for display
124
+ pub fn type_name(&self) -> &'static str {
125
+ match self {
126
+ PrimitiveType::Int8 => "Int8",
127
+ PrimitiveType::Int16 => "Int16",
128
+ PrimitiveType::Int32 => "Int32",
129
+ PrimitiveType::Int64 => "Int64",
130
+ PrimitiveType::UInt8 => "UInt8",
131
+ PrimitiveType::UInt16 => "UInt16",
132
+ PrimitiveType::UInt32 => "UInt32",
133
+ PrimitiveType::UInt64 => "UInt64",
134
+ PrimitiveType::Float32 => "Float32",
135
+ PrimitiveType::Float64 => "Float64",
136
+ PrimitiveType::Decimal128(_, _) => "Decimal128",
137
+ PrimitiveType::Decimal256(_, _) => "Decimal256",
138
+ PrimitiveType::Boolean => "Boolean",
139
+ PrimitiveType::String => "String",
140
+ PrimitiveType::Binary => "Binary",
141
+ PrimitiveType::Date32 => "Date32",
142
+ PrimitiveType::Date64 => "Date64",
143
+ PrimitiveType::TimestampSecond(_) => "TimestampSecond",
144
+ PrimitiveType::TimestampMillis(_) => "TimestampMillis",
145
+ PrimitiveType::TimestampMicros(_) => "TimestampMicros",
146
+ PrimitiveType::TimestampNanos(_) => "TimestampNanos",
147
+ PrimitiveType::TimeMillis => "TimeMillis",
148
+ PrimitiveType::TimeMicros => "TimeMicros",
149
+ PrimitiveType::FixedLenByteArray(_) => "FixedLenByteArray",
150
+ }
151
+ }
152
+
153
+ /// Check if this type requires a format specifier
154
+ pub fn requires_format(&self) -> bool {
155
+ matches!(
156
+ self,
157
+ PrimitiveType::Date32
158
+ | PrimitiveType::Date64
159
+ | PrimitiveType::TimestampSecond(_)
160
+ | PrimitiveType::TimestampMillis(_)
161
+ | PrimitiveType::TimestampMicros(_)
162
+ | PrimitiveType::TimestampNanos(_)
163
+ | PrimitiveType::TimeMillis
164
+ | PrimitiveType::TimeMicros
165
+ )
166
+ }
167
+ }
168
+
169
+ /// Builder for creating schemas
170
+ pub struct SchemaBuilder {
171
+ root: Option<SchemaNode>,
172
+ }
173
+
174
+ impl SchemaBuilder {
175
+ pub fn new() -> Self {
176
+ Self { root: None }
177
+ }
178
+
179
+ pub fn with_root(mut self, root: SchemaNode) -> Self {
180
+ self.root = Some(root);
181
+ self
182
+ }
183
+
184
+ pub fn build(self) -> Result<Schema, &'static str> {
185
+ match self.root {
186
+ Some(root) => Ok(Schema { root }),
187
+ None => Err("Schema must have a root node"),
188
+ }
189
+ }
190
+ }
191
+
192
+ impl Default for SchemaBuilder {
193
+ fn default() -> Self {
194
+ Self::new()
195
+ }
196
+ }
197
+
198
+ #[cfg(test)]
199
+ mod tests {
200
+ use super::*;
201
+
202
+ #[test]
203
+ fn test_schema_creation() {
204
+ let schema = SchemaBuilder::new()
205
+ .with_root(SchemaNode::Struct {
206
+ name: "root".to_string(),
207
+ nullable: false,
208
+ fields: vec![
209
+ SchemaNode::Primitive {
210
+ name: "id".to_string(),
211
+ primitive_type: PrimitiveType::Int64,
212
+ nullable: false,
213
+ format: None,
214
+ },
215
+ SchemaNode::Primitive {
216
+ name: "name".to_string(),
217
+ primitive_type: PrimitiveType::String,
218
+ nullable: true,
219
+ format: None,
220
+ },
221
+ ],
222
+ })
223
+ .build()
224
+ .unwrap();
225
+
226
+ assert_eq!(schema.root.name(), "root");
227
+ assert!(!schema.root.is_nullable());
228
+ }
229
+
230
+ #[test]
231
+ fn test_primitive_types() {
232
+ let decimal = PrimitiveType::Decimal128(10, 2);
233
+ assert_eq!(decimal.type_name(), "Decimal128");
234
+
235
+ let timestamp = PrimitiveType::TimestampMicros(None);
236
+ assert!(timestamp.requires_format());
237
+
238
+ let integer = PrimitiveType::Int32;
239
+ assert!(!integer.requires_format());
240
+ }
241
+
242
+ #[test]
243
+ fn test_nested_schema() {
244
+ let list_node = SchemaNode::List {
245
+ name: "items".to_string(),
246
+ nullable: true,
247
+ item: Box::new(SchemaNode::Primitive {
248
+ name: "item".to_string(),
249
+ primitive_type: PrimitiveType::String,
250
+ nullable: false,
251
+ format: None,
252
+ }),
253
+ };
254
+
255
+ assert_eq!(list_node.name(), "items");
256
+ assert!(list_node.is_nullable());
257
+ assert_eq!(list_node.repetition(), Repetition::Optional);
258
+ }
259
+
260
+ #[test]
261
+ fn test_map_schema() {
262
+ let map_node = SchemaNode::Map {
263
+ name: "metadata".to_string(),
264
+ nullable: false,
265
+ key: Box::new(SchemaNode::Primitive {
266
+ name: "key".to_string(),
267
+ primitive_type: PrimitiveType::String,
268
+ nullable: false,
269
+ format: None,
270
+ }),
271
+ value: Box::new(SchemaNode::Primitive {
272
+ name: "value".to_string(),
273
+ primitive_type: PrimitiveType::String,
274
+ nullable: true,
275
+ format: None,
276
+ }),
277
+ };
278
+
279
+ assert_eq!(map_node.name(), "metadata");
280
+ assert!(!map_node.is_nullable());
281
+ assert_eq!(map_node.repetition(), Repetition::Required);
282
+ }
283
+ }
@@ -0,0 +1,308 @@
1
+ //! Test utilities for parquet-core
2
+
3
+ #[cfg(test)]
4
+ pub mod test {
5
+ use crate::{ParquetValue, PrimitiveType, Schema, SchemaBuilder, SchemaNode};
6
+ use indexmap::IndexMap;
7
+ use ordered_float::OrderedFloat;
8
+ use std::sync::Arc;
9
+
10
+ /// Create a simple schema for testing
11
+ pub fn sample_schema() -> Schema {
12
+ SchemaBuilder::new()
13
+ .with_root(SchemaNode::Struct {
14
+ name: "root".to_string(),
15
+ nullable: false,
16
+ fields: vec![
17
+ SchemaNode::Primitive {
18
+ name: "id".to_string(),
19
+ primitive_type: PrimitiveType::Int64,
20
+ nullable: false,
21
+ format: None,
22
+ },
23
+ SchemaNode::Primitive {
24
+ name: "name".to_string(),
25
+ primitive_type: PrimitiveType::String,
26
+ nullable: true,
27
+ format: None,
28
+ },
29
+ SchemaNode::Primitive {
30
+ name: "age".to_string(),
31
+ primitive_type: PrimitiveType::Int32,
32
+ nullable: true,
33
+ format: None,
34
+ },
35
+ SchemaNode::Primitive {
36
+ name: "salary".to_string(),
37
+ primitive_type: PrimitiveType::Float64,
38
+ nullable: true,
39
+ format: None,
40
+ },
41
+ ],
42
+ })
43
+ .build()
44
+ .unwrap()
45
+ }
46
+
47
+ /// Create a complex schema with nested types
48
+ pub fn complex_schema() -> Schema {
49
+ SchemaBuilder::new()
50
+ .with_root(SchemaNode::Struct {
51
+ name: "root".to_string(),
52
+ nullable: false,
53
+ fields: vec![
54
+ SchemaNode::Primitive {
55
+ name: "id".to_string(),
56
+ primitive_type: PrimitiveType::Int64,
57
+ nullable: false,
58
+ format: None,
59
+ },
60
+ SchemaNode::Struct {
61
+ name: "person".to_string(),
62
+ nullable: true,
63
+ fields: vec![
64
+ SchemaNode::Primitive {
65
+ name: "name".to_string(),
66
+ primitive_type: PrimitiveType::String,
67
+ nullable: false,
68
+ format: None,
69
+ },
70
+ SchemaNode::Primitive {
71
+ name: "age".to_string(),
72
+ primitive_type: PrimitiveType::Int32,
73
+ nullable: true,
74
+ format: None,
75
+ },
76
+ ],
77
+ },
78
+ SchemaNode::List {
79
+ name: "scores".to_string(),
80
+ nullable: true,
81
+ item: Box::new(SchemaNode::Primitive {
82
+ name: "item".to_string(),
83
+ primitive_type: PrimitiveType::Float32,
84
+ nullable: false,
85
+ format: None,
86
+ }),
87
+ },
88
+ ],
89
+ })
90
+ .build()
91
+ .unwrap()
92
+ }
93
+
94
+ /// Create sample row values matching the simple schema
95
+ pub fn sample_values() -> Vec<ParquetValue> {
96
+ vec![
97
+ ParquetValue::Int64(1),
98
+ ParquetValue::String(Arc::from("Alice")),
99
+ ParquetValue::Int32(30),
100
+ ParquetValue::Float64(OrderedFloat(75000.0)),
101
+ ]
102
+ }
103
+
104
+ /// Create multiple sample rows
105
+ pub fn sample_rows(count: usize) -> Vec<Vec<ParquetValue>> {
106
+ (0..count)
107
+ .map(|i| {
108
+ vec![
109
+ ParquetValue::Int64(i as i64),
110
+ ParquetValue::String(Arc::from(format!("Person{}", i))),
111
+ ParquetValue::Int32((20 + i % 50) as i32),
112
+ ParquetValue::Float64(OrderedFloat(50000.0 + (i as f64 * 1000.0))),
113
+ ]
114
+ })
115
+ .collect()
116
+ }
117
+
118
+ /// Create sample values with nulls
119
+ pub fn sample_values_with_nulls() -> Vec<ParquetValue> {
120
+ vec![
121
+ ParquetValue::Int64(2),
122
+ ParquetValue::Null,
123
+ ParquetValue::Int32(25),
124
+ ParquetValue::Null,
125
+ ]
126
+ }
127
+
128
+ /// Create complex values matching the complex schema
129
+ pub fn complex_values() -> Vec<ParquetValue> {
130
+ let mut person = IndexMap::new();
131
+ person.insert(Arc::from("name"), ParquetValue::String(Arc::from("Bob")));
132
+ person.insert(Arc::from("age"), ParquetValue::Int32(35));
133
+
134
+ vec![
135
+ ParquetValue::Int64(1),
136
+ ParquetValue::Record(person),
137
+ ParquetValue::List(vec![
138
+ ParquetValue::Float32(OrderedFloat(90.5)),
139
+ ParquetValue::Float32(OrderedFloat(87.3)),
140
+ ParquetValue::Float32(OrderedFloat(92.1)),
141
+ ]),
142
+ ]
143
+ }
144
+
145
+ /// Test data for all primitive types
146
+ pub fn all_primitive_values() -> Vec<(PrimitiveType, ParquetValue)> {
147
+ vec![
148
+ (PrimitiveType::Boolean, ParquetValue::Boolean(true)),
149
+ (PrimitiveType::Int8, ParquetValue::Int8(42)),
150
+ (PrimitiveType::Int16, ParquetValue::Int16(1000)),
151
+ (PrimitiveType::Int32, ParquetValue::Int32(100000)),
152
+ (PrimitiveType::Int64, ParquetValue::Int64(1000000000)),
153
+ (PrimitiveType::UInt8, ParquetValue::UInt8(200)),
154
+ (PrimitiveType::UInt16, ParquetValue::UInt16(50000)),
155
+ (PrimitiveType::UInt32, ParquetValue::UInt32(3000000000)),
156
+ (PrimitiveType::UInt64, ParquetValue::UInt64(10000000000)),
157
+ (
158
+ PrimitiveType::Float32,
159
+ ParquetValue::Float32(OrderedFloat(3.75)),
160
+ ),
161
+ (
162
+ PrimitiveType::Float64,
163
+ ParquetValue::Float64(OrderedFloat(2.625)),
164
+ ),
165
+ (
166
+ PrimitiveType::String,
167
+ ParquetValue::String(Arc::from("test string")),
168
+ ),
169
+ (
170
+ PrimitiveType::Binary,
171
+ ParquetValue::Bytes(bytes::Bytes::from(vec![0x01, 0x02, 0x03])),
172
+ ),
173
+ (PrimitiveType::Date32, ParquetValue::Date32(18628)), // 2021-01-01
174
+ (
175
+ PrimitiveType::TimeMillis,
176
+ ParquetValue::TimeMillis(43200000),
177
+ ), // 12:00:00
178
+ (
179
+ PrimitiveType::TimeMicros,
180
+ ParquetValue::TimeMicros(43200000000),
181
+ ), // 12:00:00
182
+ (
183
+ PrimitiveType::TimestampMillis(None),
184
+ ParquetValue::TimestampMillis(1609459200000, None),
185
+ ), // 2021-01-01 00:00:00
186
+ (
187
+ PrimitiveType::TimestampMicros(None),
188
+ ParquetValue::TimestampMicros(1609459200000000, None),
189
+ ), // 2021-01-01 00:00:00
190
+ (
191
+ PrimitiveType::Decimal128(10, 2),
192
+ ParquetValue::Decimal128(12345, 2),
193
+ ), // 123.45
194
+ ]
195
+ }
196
+
197
+ /// Create a temporary file path for testing
198
+ pub fn temp_file_path() -> String {
199
+ format!("/tmp/parquet_test_{}.parquet", uuid::Uuid::new_v4())
200
+ }
201
+
202
+ /// Compare two ParquetValues for equality, handling floating point comparison
203
+ pub fn values_equal(a: &ParquetValue, b: &ParquetValue) -> bool {
204
+ match (a, b) {
205
+ (ParquetValue::Float32(OrderedFloat(a)), ParquetValue::Float32(OrderedFloat(b))) => {
206
+ (a - b).abs() < f32::EPSILON
207
+ }
208
+ (ParquetValue::Float64(OrderedFloat(a)), ParquetValue::Float64(OrderedFloat(b))) => {
209
+ (a - b).abs() < f64::EPSILON
210
+ }
211
+ (ParquetValue::List(a), ParquetValue::List(b)) => {
212
+ a.len() == b.len() && a.iter().zip(b.iter()).all(|(a, b)| values_equal(a, b))
213
+ }
214
+ (ParquetValue::Map(a), ParquetValue::Map(b)) => {
215
+ a.len() == b.len()
216
+ && a.iter()
217
+ .zip(b.iter())
218
+ .all(|((k1, v1), (k2, v2))| values_equal(k1, k2) && values_equal(v1, v2))
219
+ }
220
+ (ParquetValue::Record(a), ParquetValue::Record(b)) => {
221
+ a.len() == b.len()
222
+ && a.iter()
223
+ .all(|(k, v)| b.get(k).map_or(false, |v2| values_equal(v, v2)))
224
+ }
225
+ _ => a == b,
226
+ }
227
+ }
228
+
229
+ /// Assert that two vectors of ParquetValues are equal
230
+ pub fn assert_values_equal(expected: &[ParquetValue], actual: &[ParquetValue]) {
231
+ assert_eq!(
232
+ expected.len(),
233
+ actual.len(),
234
+ "Value vectors have different lengths: expected {}, got {}",
235
+ expected.len(),
236
+ actual.len()
237
+ );
238
+
239
+ for (i, (e, a)) in expected.iter().zip(actual.iter()).enumerate() {
240
+ assert!(
241
+ values_equal(e, a),
242
+ "Values at index {} are not equal:\nExpected: {:?}\nActual: {:?}",
243
+ i,
244
+ e,
245
+ a
246
+ );
247
+ }
248
+ }
249
+ }
250
+
251
+ #[cfg(test)]
252
+ mod test_utils_tests {
253
+ use super::test::*;
254
+
255
+ #[test]
256
+ fn test_sample_schema() {
257
+ let schema = sample_schema();
258
+ assert_eq!(schema.root.name(), "root");
259
+
260
+ if let crate::SchemaNode::Struct { fields, .. } = &schema.root {
261
+ assert_eq!(fields.len(), 4);
262
+ assert_eq!(fields[0].name(), "id");
263
+ assert_eq!(fields[1].name(), "name");
264
+ assert_eq!(fields[2].name(), "age");
265
+ assert_eq!(fields[3].name(), "salary");
266
+ } else {
267
+ panic!("Expected struct schema");
268
+ }
269
+ }
270
+
271
+ #[test]
272
+ fn test_sample_values() {
273
+ let values = sample_values();
274
+ assert_eq!(values.len(), 4);
275
+ assert!(matches!(values[0], crate::ParquetValue::Int64(1)));
276
+ assert!(matches!(&values[1], crate::ParquetValue::String(s) if s.as_ref() == "Alice"));
277
+ }
278
+
279
+ #[test]
280
+ fn test_values_equal() {
281
+ use crate::ParquetValue;
282
+ use ordered_float::OrderedFloat;
283
+
284
+ // Test exact equality
285
+ assert!(values_equal(
286
+ &ParquetValue::Int32(42),
287
+ &ParquetValue::Int32(42)
288
+ ));
289
+
290
+ // Test floating point equality
291
+ assert!(values_equal(
292
+ &ParquetValue::Float32(OrderedFloat(1.0)),
293
+ &ParquetValue::Float32(OrderedFloat(1.0 + f32::EPSILON / 2.0))
294
+ ));
295
+
296
+ // Test list equality
297
+ assert!(values_equal(
298
+ &ParquetValue::List(vec![ParquetValue::Int32(1), ParquetValue::Int32(2)]),
299
+ &ParquetValue::List(vec![ParquetValue::Int32(1), ParquetValue::Int32(2)])
300
+ ));
301
+
302
+ // Test inequality
303
+ assert!(!values_equal(
304
+ &ParquetValue::Int32(42),
305
+ &ParquetValue::Int32(43)
306
+ ));
307
+ }
308
+ }
@@ -0,0 +1,5 @@
1
+ //! Traits for abstracting Parquet operations
2
+
3
+ pub mod schema;
4
+
5
+ pub use schema::SchemaInspector;