parquet-tyfoom 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +1854 -0
- data/Cargo.toml +3 -0
- data/Gemfile +21 -0
- data/LICENSE +21 -0
- data/README.md +428 -0
- data/Rakefile +43 -0
- data/ext/parquet/Cargo.toml +39 -0
- data/ext/parquet/build.rs +5 -0
- data/ext/parquet/extconf.rb +4 -0
- data/ext/parquet/src/adapter_ffi.rs +297 -0
- data/ext/parquet/src/allocator.rs +13 -0
- data/ext/parquet/src/lib.rs +24 -0
- data/ext/parquet-core/Cargo.toml +24 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
- data/ext/parquet-core/src/error.rs +189 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +368 -0
- data/ext/parquet-core/src/schema.rs +452 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +190 -0
- data/ext/parquet-core/src/value.rs +220 -0
- data/ext/parquet-core/src/writer.rs +1241 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +431 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/review_regressions.rs +787 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
- data/ext/parquet-ruby-adapter/src/error.rs +141 -0
- data/ext/parquet-ruby-adapter/src/io.rs +432 -0
- data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
- data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
- data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +98 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
- data/lib/parquet/schema.rb +262 -0
- data/lib/parquet/version.rb +3 -0
- data/lib/parquet.rb +11 -0
- data/lib/parquet.rbi +181 -0
- metadata +165 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
//! Test utilities for parquet-core
|
|
2
|
+
|
|
3
|
+
#[cfg(test)]
|
|
4
|
+
pub mod test {
|
|
5
|
+
use crate::{ParquetValue, PrimitiveType, Schema, SchemaBuilder, SchemaNode};
|
|
6
|
+
use indexmap::IndexMap;
|
|
7
|
+
use ordered_float::OrderedFloat;
|
|
8
|
+
use triomphe::Arc;
|
|
9
|
+
|
|
10
|
+
/// Create a simple schema for testing
|
|
11
|
+
pub fn sample_schema() -> Schema {
|
|
12
|
+
SchemaBuilder::new()
|
|
13
|
+
.with_root(SchemaNode::Struct {
|
|
14
|
+
name: "root".to_string(),
|
|
15
|
+
nullable: false,
|
|
16
|
+
fields: vec![
|
|
17
|
+
SchemaNode::Primitive {
|
|
18
|
+
name: "id".to_string(),
|
|
19
|
+
primitive_type: PrimitiveType::Int64,
|
|
20
|
+
nullable: false,
|
|
21
|
+
format: None,
|
|
22
|
+
},
|
|
23
|
+
SchemaNode::Primitive {
|
|
24
|
+
name: "name".to_string(),
|
|
25
|
+
primitive_type: PrimitiveType::String,
|
|
26
|
+
nullable: true,
|
|
27
|
+
format: None,
|
|
28
|
+
},
|
|
29
|
+
SchemaNode::Primitive {
|
|
30
|
+
name: "age".to_string(),
|
|
31
|
+
primitive_type: PrimitiveType::Int32,
|
|
32
|
+
nullable: true,
|
|
33
|
+
format: None,
|
|
34
|
+
},
|
|
35
|
+
SchemaNode::Primitive {
|
|
36
|
+
name: "salary".to_string(),
|
|
37
|
+
primitive_type: PrimitiveType::Float64,
|
|
38
|
+
nullable: true,
|
|
39
|
+
format: None,
|
|
40
|
+
},
|
|
41
|
+
],
|
|
42
|
+
})
|
|
43
|
+
.build()
|
|
44
|
+
.unwrap()
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/// Create a complex schema with nested types
|
|
48
|
+
pub fn complex_schema() -> Schema {
|
|
49
|
+
SchemaBuilder::new()
|
|
50
|
+
.with_root(SchemaNode::Struct {
|
|
51
|
+
name: "root".to_string(),
|
|
52
|
+
nullable: false,
|
|
53
|
+
fields: vec![
|
|
54
|
+
SchemaNode::Primitive {
|
|
55
|
+
name: "id".to_string(),
|
|
56
|
+
primitive_type: PrimitiveType::Int64,
|
|
57
|
+
nullable: false,
|
|
58
|
+
format: None,
|
|
59
|
+
},
|
|
60
|
+
SchemaNode::Struct {
|
|
61
|
+
name: "person".to_string(),
|
|
62
|
+
nullable: true,
|
|
63
|
+
fields: vec![
|
|
64
|
+
SchemaNode::Primitive {
|
|
65
|
+
name: "name".to_string(),
|
|
66
|
+
primitive_type: PrimitiveType::String,
|
|
67
|
+
nullable: false,
|
|
68
|
+
format: None,
|
|
69
|
+
},
|
|
70
|
+
SchemaNode::Primitive {
|
|
71
|
+
name: "age".to_string(),
|
|
72
|
+
primitive_type: PrimitiveType::Int32,
|
|
73
|
+
nullable: true,
|
|
74
|
+
format: None,
|
|
75
|
+
},
|
|
76
|
+
],
|
|
77
|
+
},
|
|
78
|
+
SchemaNode::List {
|
|
79
|
+
name: "scores".to_string(),
|
|
80
|
+
nullable: true,
|
|
81
|
+
item: Box::new(SchemaNode::Primitive {
|
|
82
|
+
name: "item".to_string(),
|
|
83
|
+
primitive_type: PrimitiveType::Float32,
|
|
84
|
+
nullable: false,
|
|
85
|
+
format: None,
|
|
86
|
+
}),
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
})
|
|
90
|
+
.build()
|
|
91
|
+
.unwrap()
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/// Create sample row values matching the simple schema
|
|
95
|
+
pub fn sample_values() -> Vec<ParquetValue> {
|
|
96
|
+
vec![
|
|
97
|
+
ParquetValue::Int64(1),
|
|
98
|
+
ParquetValue::String(Arc::from("Alice")),
|
|
99
|
+
ParquetValue::Int32(30),
|
|
100
|
+
ParquetValue::Float64(OrderedFloat(75000.0)),
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/// Create multiple sample rows
|
|
105
|
+
pub fn sample_rows(count: usize) -> Vec<Vec<ParquetValue>> {
|
|
106
|
+
(0..count)
|
|
107
|
+
.map(|i| {
|
|
108
|
+
vec![
|
|
109
|
+
ParquetValue::Int64(i as i64),
|
|
110
|
+
ParquetValue::String(Arc::from(format!("Person{}", i))),
|
|
111
|
+
ParquetValue::Int32((20 + i % 50) as i32),
|
|
112
|
+
ParquetValue::Float64(OrderedFloat(50000.0 + (i as f64 * 1000.0))),
|
|
113
|
+
]
|
|
114
|
+
})
|
|
115
|
+
.collect()
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/// Create sample values with nulls
|
|
119
|
+
pub fn sample_values_with_nulls() -> Vec<ParquetValue> {
|
|
120
|
+
vec![
|
|
121
|
+
ParquetValue::Int64(2),
|
|
122
|
+
ParquetValue::Null,
|
|
123
|
+
ParquetValue::Int32(25),
|
|
124
|
+
ParquetValue::Null,
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/// Create complex values matching the complex schema
|
|
129
|
+
pub fn complex_values() -> Vec<ParquetValue> {
|
|
130
|
+
let mut person = IndexMap::new();
|
|
131
|
+
person.insert(Arc::from("name"), ParquetValue::String(Arc::from("Bob")));
|
|
132
|
+
person.insert(Arc::from("age"), ParquetValue::Int32(35));
|
|
133
|
+
|
|
134
|
+
vec![
|
|
135
|
+
ParquetValue::Int64(1),
|
|
136
|
+
ParquetValue::Record(person),
|
|
137
|
+
ParquetValue::List(vec![
|
|
138
|
+
ParquetValue::Float32(OrderedFloat(90.5)),
|
|
139
|
+
ParquetValue::Float32(OrderedFloat(87.3)),
|
|
140
|
+
ParquetValue::Float32(OrderedFloat(92.1)),
|
|
141
|
+
]),
|
|
142
|
+
]
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/// Test data for all primitive types
|
|
146
|
+
pub fn all_primitive_values() -> Vec<(PrimitiveType, ParquetValue)> {
|
|
147
|
+
vec![
|
|
148
|
+
(PrimitiveType::Boolean, ParquetValue::Boolean(true)),
|
|
149
|
+
(PrimitiveType::Int8, ParquetValue::Int8(42)),
|
|
150
|
+
(PrimitiveType::Int16, ParquetValue::Int16(1000)),
|
|
151
|
+
(PrimitiveType::Int32, ParquetValue::Int32(100000)),
|
|
152
|
+
(PrimitiveType::Int64, ParquetValue::Int64(1000000000)),
|
|
153
|
+
(PrimitiveType::UInt8, ParquetValue::UInt8(200)),
|
|
154
|
+
(PrimitiveType::UInt16, ParquetValue::UInt16(50000)),
|
|
155
|
+
(PrimitiveType::UInt32, ParquetValue::UInt32(3000000000)),
|
|
156
|
+
(PrimitiveType::UInt64, ParquetValue::UInt64(10000000000)),
|
|
157
|
+
(
|
|
158
|
+
PrimitiveType::Float32,
|
|
159
|
+
ParquetValue::Float32(OrderedFloat(3.75)),
|
|
160
|
+
),
|
|
161
|
+
(
|
|
162
|
+
PrimitiveType::Float64,
|
|
163
|
+
ParquetValue::Float64(OrderedFloat(2.625)),
|
|
164
|
+
),
|
|
165
|
+
(
|
|
166
|
+
PrimitiveType::String,
|
|
167
|
+
ParquetValue::String(Arc::from("test string")),
|
|
168
|
+
),
|
|
169
|
+
(
|
|
170
|
+
PrimitiveType::Binary,
|
|
171
|
+
ParquetValue::Bytes(bytes::Bytes::from(vec![0x01, 0x02, 0x03])),
|
|
172
|
+
),
|
|
173
|
+
(PrimitiveType::Date32, ParquetValue::Date32(18628)), // 2021-01-01
|
|
174
|
+
(
|
|
175
|
+
PrimitiveType::TimeMillis,
|
|
176
|
+
ParquetValue::TimeMillis(43200000),
|
|
177
|
+
), // 12:00:00
|
|
178
|
+
(
|
|
179
|
+
PrimitiveType::TimeMicros,
|
|
180
|
+
ParquetValue::TimeMicros(43200000000),
|
|
181
|
+
), // 12:00:00
|
|
182
|
+
(
|
|
183
|
+
PrimitiveType::TimestampMillis(None),
|
|
184
|
+
ParquetValue::TimestampMillis(1609459200000, None),
|
|
185
|
+
), // 2021-01-01 00:00:00
|
|
186
|
+
(
|
|
187
|
+
PrimitiveType::TimestampMicros(None),
|
|
188
|
+
ParquetValue::TimestampMicros(1609459200000000, None),
|
|
189
|
+
), // 2021-01-01 00:00:00
|
|
190
|
+
(
|
|
191
|
+
PrimitiveType::Decimal128(10, 2),
|
|
192
|
+
ParquetValue::Decimal128(12345, 2),
|
|
193
|
+
), // 123.45
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/// Create a temporary file path for testing
|
|
198
|
+
pub fn temp_file_path() -> String {
|
|
199
|
+
format!("/tmp/parquet_test_{}.parquet", uuid::Uuid::new_v4())
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/// Compare two ParquetValues for equality, handling floating point comparison
|
|
203
|
+
pub fn values_equal(a: &ParquetValue, b: &ParquetValue) -> bool {
|
|
204
|
+
match (a, b) {
|
|
205
|
+
(ParquetValue::Float32(OrderedFloat(a)), ParquetValue::Float32(OrderedFloat(b))) => {
|
|
206
|
+
(a - b).abs() < f32::EPSILON
|
|
207
|
+
}
|
|
208
|
+
(ParquetValue::Float64(OrderedFloat(a)), ParquetValue::Float64(OrderedFloat(b))) => {
|
|
209
|
+
(a - b).abs() < f64::EPSILON
|
|
210
|
+
}
|
|
211
|
+
(ParquetValue::List(a), ParquetValue::List(b)) => {
|
|
212
|
+
a.len() == b.len() && a.iter().zip(b.iter()).all(|(a, b)| values_equal(a, b))
|
|
213
|
+
}
|
|
214
|
+
(ParquetValue::Map(a), ParquetValue::Map(b)) => {
|
|
215
|
+
a.len() == b.len()
|
|
216
|
+
&& a.iter()
|
|
217
|
+
.zip(b.iter())
|
|
218
|
+
.all(|((k1, v1), (k2, v2))| values_equal(k1, k2) && values_equal(v1, v2))
|
|
219
|
+
}
|
|
220
|
+
(ParquetValue::Record(a), ParquetValue::Record(b)) => {
|
|
221
|
+
a.len() == b.len()
|
|
222
|
+
&& a.iter()
|
|
223
|
+
.all(|(k, v)| b.get(k).map_or(false, |v2| values_equal(v, v2)))
|
|
224
|
+
}
|
|
225
|
+
_ => a == b,
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/// Assert that two vectors of ParquetValues are equal
|
|
230
|
+
pub fn assert_values_equal(expected: &[ParquetValue], actual: &[ParquetValue]) {
|
|
231
|
+
assert_eq!(
|
|
232
|
+
expected.len(),
|
|
233
|
+
actual.len(),
|
|
234
|
+
"Value vectors have different lengths: expected {}, got {}",
|
|
235
|
+
expected.len(),
|
|
236
|
+
actual.len()
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
for (i, (e, a)) in expected.iter().zip(actual.iter()).enumerate() {
|
|
240
|
+
assert!(
|
|
241
|
+
values_equal(e, a),
|
|
242
|
+
"Values at index {} are not equal:\nExpected: {:?}\nActual: {:?}",
|
|
243
|
+
i,
|
|
244
|
+
e,
|
|
245
|
+
a
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
#[cfg(test)]
|
|
252
|
+
mod test_utils_tests {
|
|
253
|
+
use super::test::*;
|
|
254
|
+
|
|
255
|
+
#[test]
|
|
256
|
+
fn test_sample_schema() {
|
|
257
|
+
let schema = sample_schema();
|
|
258
|
+
assert_eq!(schema.root.name(), "root");
|
|
259
|
+
|
|
260
|
+
if let crate::SchemaNode::Struct { fields, .. } = &schema.root {
|
|
261
|
+
assert_eq!(fields.len(), 4);
|
|
262
|
+
assert_eq!(fields[0].name(), "id");
|
|
263
|
+
assert_eq!(fields[1].name(), "name");
|
|
264
|
+
assert_eq!(fields[2].name(), "age");
|
|
265
|
+
assert_eq!(fields[3].name(), "salary");
|
|
266
|
+
} else {
|
|
267
|
+
panic!("Expected struct schema");
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
#[test]
|
|
272
|
+
fn test_sample_values() {
|
|
273
|
+
let values = sample_values();
|
|
274
|
+
assert_eq!(values.len(), 4);
|
|
275
|
+
assert!(matches!(values[0], crate::ParquetValue::Int64(1)));
|
|
276
|
+
assert!(matches!(&values[1], crate::ParquetValue::String(s) if s.as_ref() == "Alice"));
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
#[test]
|
|
280
|
+
fn test_values_equal() {
|
|
281
|
+
use crate::ParquetValue;
|
|
282
|
+
use ordered_float::OrderedFloat;
|
|
283
|
+
|
|
284
|
+
// Test exact equality
|
|
285
|
+
assert!(values_equal(
|
|
286
|
+
&ParquetValue::Int32(42),
|
|
287
|
+
&ParquetValue::Int32(42)
|
|
288
|
+
));
|
|
289
|
+
|
|
290
|
+
// Test floating point equality
|
|
291
|
+
assert!(values_equal(
|
|
292
|
+
&ParquetValue::Float32(OrderedFloat(1.0)),
|
|
293
|
+
&ParquetValue::Float32(OrderedFloat(1.0 + f32::EPSILON / 2.0))
|
|
294
|
+
));
|
|
295
|
+
|
|
296
|
+
// Test list equality
|
|
297
|
+
assert!(values_equal(
|
|
298
|
+
&ParquetValue::List(vec![ParquetValue::Int32(1), ParquetValue::Int32(2)]),
|
|
299
|
+
&ParquetValue::List(vec![ParquetValue::Int32(1), ParquetValue::Int32(2)])
|
|
300
|
+
));
|
|
301
|
+
|
|
302
|
+
// Test inequality
|
|
303
|
+
assert!(!values_equal(
|
|
304
|
+
&ParquetValue::Int32(42),
|
|
305
|
+
&ParquetValue::Int32(43)
|
|
306
|
+
));
|
|
307
|
+
}
|
|
308
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
use crate::SchemaNode;
|
|
2
|
+
|
|
3
|
+
/// Trait for schema introspection
|
|
4
|
+
///
|
|
5
|
+
/// This trait provides methods for examining and querying schemas
|
|
6
|
+
/// without modifying them.
|
|
7
|
+
pub trait SchemaInspector {
|
|
8
|
+
/// Get the total number of fields (including nested)
|
|
9
|
+
fn field_count(&self) -> usize;
|
|
10
|
+
|
|
11
|
+
/// Get field by path (e.g., "address.city")
|
|
12
|
+
fn get_field_by_path(&self, path: &str) -> Option<&SchemaNode>;
|
|
13
|
+
|
|
14
|
+
/// Check if schema contains a specific field
|
|
15
|
+
fn has_field(&self, name: &str) -> bool;
|
|
16
|
+
|
|
17
|
+
/// Get all field paths in the schema
|
|
18
|
+
fn all_field_paths(&self) -> Vec<String>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
impl SchemaInspector for crate::Schema {
|
|
22
|
+
fn field_count(&self) -> usize {
|
|
23
|
+
count_fields(&self.root)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
fn get_field_by_path(&self, path: &str) -> Option<&SchemaNode> {
|
|
27
|
+
get_field_by_path(&self.root, path)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
fn has_field(&self, name: &str) -> bool {
|
|
31
|
+
self.get_field_by_path(name).is_some()
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
fn all_field_paths(&self) -> Vec<String> {
|
|
35
|
+
let mut paths = Vec::new();
|
|
36
|
+
collect_field_paths(&self.root, String::new(), &mut paths);
|
|
37
|
+
paths
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Helper functions for schema inspection
|
|
42
|
+
fn count_fields(node: &SchemaNode) -> usize {
|
|
43
|
+
match node {
|
|
44
|
+
SchemaNode::Struct { fields, .. } => 1 + fields.iter().map(count_fields).sum::<usize>(),
|
|
45
|
+
SchemaNode::List { item, .. } => 1 + count_fields(item),
|
|
46
|
+
SchemaNode::Map { key, value, .. } => 1 + count_fields(key) + count_fields(value),
|
|
47
|
+
SchemaNode::Primitive { .. } => 1,
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
fn get_field_by_path<'a>(node: &'a SchemaNode, path: &str) -> Option<&'a SchemaNode> {
|
|
52
|
+
if path.is_empty() {
|
|
53
|
+
return None;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let mut parts: Vec<&str> = path.split('.').collect();
|
|
57
|
+
// Strip a leading segment equal to the root's own name (so a path may carry
|
|
58
|
+
// the root name as a prefix or omit it), but not when the root actually has a
|
|
59
|
+
// child of that name — there the segment refers to the child, not the root.
|
|
60
|
+
if parts.first().copied() == Some(node.name()) && !has_child_named(node, node.name()) {
|
|
61
|
+
parts.remove(0);
|
|
62
|
+
}
|
|
63
|
+
get_field_by_path_parts(node, &parts)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
fn has_child_named(node: &SchemaNode, name: &str) -> bool {
|
|
67
|
+
matches!(node, SchemaNode::Struct { fields, .. } if fields.iter().any(|f| f.name() == name))
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
fn get_field_by_path_parts<'a>(node: &'a SchemaNode, parts: &[&str]) -> Option<&'a SchemaNode> {
|
|
71
|
+
if parts.is_empty() {
|
|
72
|
+
return Some(node);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
let first = parts[0];
|
|
76
|
+
let rest = &parts[1..];
|
|
77
|
+
|
|
78
|
+
match node {
|
|
79
|
+
SchemaNode::Struct { fields, .. } => fields
|
|
80
|
+
.iter()
|
|
81
|
+
.find(|f| f.name() == first)
|
|
82
|
+
.and_then(|f| get_field_by_path_parts(f, rest)),
|
|
83
|
+
SchemaNode::List { item, .. } if first == "item" || first == item.name() => {
|
|
84
|
+
get_field_by_path_parts(item, rest)
|
|
85
|
+
}
|
|
86
|
+
SchemaNode::Map { key, value, .. } => match first {
|
|
87
|
+
name if name == "key" || name == key.name() => get_field_by_path_parts(key, rest),
|
|
88
|
+
name if name == "value" || name == value.name() => get_field_by_path_parts(value, rest),
|
|
89
|
+
_ => None,
|
|
90
|
+
},
|
|
91
|
+
_ => None,
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
fn collect_field_paths(node: &SchemaNode, prefix: String, paths: &mut Vec<String>) {
|
|
96
|
+
let current_path = if prefix.is_empty() {
|
|
97
|
+
node.name().to_string()
|
|
98
|
+
} else {
|
|
99
|
+
format!("{}.{}", prefix, node.name())
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
paths.push(current_path.clone());
|
|
103
|
+
|
|
104
|
+
match node {
|
|
105
|
+
SchemaNode::Struct { fields, .. } => {
|
|
106
|
+
for field in fields {
|
|
107
|
+
collect_field_paths(field, current_path.clone(), paths);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
SchemaNode::List { item, .. } => {
|
|
111
|
+
collect_field_paths(item, current_path, paths);
|
|
112
|
+
}
|
|
113
|
+
SchemaNode::Map { key, value, .. } => {
|
|
114
|
+
collect_field_paths(key, current_path.clone(), paths);
|
|
115
|
+
collect_field_paths(value, current_path, paths);
|
|
116
|
+
}
|
|
117
|
+
SchemaNode::Primitive { .. } => {}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
#[cfg(test)]
|
|
122
|
+
mod tests {
|
|
123
|
+
use super::*;
|
|
124
|
+
use crate::{PrimitiveType, SchemaBuilder as CoreSchemaBuilder};
|
|
125
|
+
|
|
126
|
+
#[test]
|
|
127
|
+
fn test_schema_inspector() {
|
|
128
|
+
let schema = CoreSchemaBuilder::new()
|
|
129
|
+
.with_root(SchemaNode::Struct {
|
|
130
|
+
name: "root".to_string(),
|
|
131
|
+
nullable: false,
|
|
132
|
+
fields: vec![
|
|
133
|
+
SchemaNode::Primitive {
|
|
134
|
+
name: "id".to_string(),
|
|
135
|
+
primitive_type: PrimitiveType::Int64,
|
|
136
|
+
nullable: false,
|
|
137
|
+
format: None,
|
|
138
|
+
},
|
|
139
|
+
SchemaNode::Struct {
|
|
140
|
+
name: "address".to_string(),
|
|
141
|
+
nullable: true,
|
|
142
|
+
fields: vec![SchemaNode::Primitive {
|
|
143
|
+
name: "city".to_string(),
|
|
144
|
+
primitive_type: PrimitiveType::String,
|
|
145
|
+
nullable: true,
|
|
146
|
+
format: None,
|
|
147
|
+
}],
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
})
|
|
151
|
+
.build()
|
|
152
|
+
.unwrap();
|
|
153
|
+
|
|
154
|
+
// Test field count
|
|
155
|
+
assert_eq!(schema.field_count(), 4); // root, id, address, city
|
|
156
|
+
|
|
157
|
+
// Test field lookup
|
|
158
|
+
assert!(schema.has_field("id"));
|
|
159
|
+
assert!(schema.has_field("address"));
|
|
160
|
+
assert!(schema.has_field("address.city"));
|
|
161
|
+
assert!(!schema.has_field("missing"));
|
|
162
|
+
|
|
163
|
+
// Test get field by path
|
|
164
|
+
let city = schema.get_field_by_path("address.city").unwrap();
|
|
165
|
+
assert_eq!(city.name(), "city");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
#[test]
|
|
169
|
+
fn leading_root_segment_resolves_to_child_when_root_has_such_a_child() {
|
|
170
|
+
// When the root struct has a child sharing the root's own name, a leading
|
|
171
|
+
// "root" segment must refer to that child, not be stripped as the root.
|
|
172
|
+
let schema = CoreSchemaBuilder::new()
|
|
173
|
+
.with_root(SchemaNode::Struct {
|
|
174
|
+
name: "root".to_string(),
|
|
175
|
+
nullable: false,
|
|
176
|
+
fields: vec![SchemaNode::Primitive {
|
|
177
|
+
name: "root".to_string(),
|
|
178
|
+
primitive_type: PrimitiveType::Int64,
|
|
179
|
+
nullable: false,
|
|
180
|
+
format: None,
|
|
181
|
+
}],
|
|
182
|
+
})
|
|
183
|
+
.build()
|
|
184
|
+
.unwrap();
|
|
185
|
+
|
|
186
|
+
let resolved = schema.get_field_by_path("root").unwrap();
|
|
187
|
+
assert!(matches!(resolved, SchemaNode::Primitive { .. }));
|
|
188
|
+
assert_eq!(resolved.name(), "root");
|
|
189
|
+
}
|
|
190
|
+
}
|