parquet 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +228 -4
- data/ext/parquet/src/reader/mod.rs +2 -1
- data/ext/parquet/src/reader/parquet_column_reader.rs +15 -127
- data/ext/parquet/src/reader/parquet_row_reader.rs +14 -134
- data/ext/parquet/src/reader/unified/mod.rs +328 -0
- data/ext/parquet/src/types/parquet_value.rs +90 -16
- data/ext/parquet/src/types/record_types.rs +28 -4
- data/ext/parquet/src/types/type_conversion.rs +13 -11
- data/ext/parquet/src/types/writer_types.rs +38 -19
- data/lib/parquet/schema.rb +21 -9
- data/lib/parquet/version.rb +1 -1
- metadata +2 -1
@@ -101,31 +101,48 @@ impl FromStr for ParquetSchemaType<'_> {
|
|
101
101
|
// Check if it's a decimal type with precision and scale
|
102
102
|
if let Some(decimal_params) = s.strip_prefix("decimal(").and_then(|s| s.strip_suffix(")")) {
|
103
103
|
let parts: Vec<&str> = decimal_params.split(',').collect();
|
104
|
-
|
104
|
+
|
105
|
+
// Handle both single parameter (precision only) and two parameters (precision and scale)
|
106
|
+
if parts.len() == 1 {
|
107
|
+
// Only precision provided, scale defaults to 0
|
108
|
+
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
109
|
+
MagnusError::new(
|
110
|
+
magnus::exception::runtime_error(),
|
111
|
+
format!("Invalid precision value in decimal type: {}", parts[0]),
|
112
|
+
)
|
113
|
+
})?;
|
114
|
+
|
115
|
+
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
116
|
+
precision, 0,
|
117
|
+
)));
|
118
|
+
} else if parts.len() == 2 {
|
119
|
+
// Both precision and scale provided
|
120
|
+
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
121
|
+
MagnusError::new(
|
122
|
+
magnus::exception::runtime_error(),
|
123
|
+
format!("Invalid precision value in decimal type: {}", parts[0]),
|
124
|
+
)
|
125
|
+
})?;
|
126
|
+
|
127
|
+
let scale = parts[1].trim().parse::<i8>().map_err(|_| {
|
128
|
+
MagnusError::new(
|
129
|
+
magnus::exception::runtime_error(),
|
130
|
+
format!("Invalid scale value in decimal type: {}", parts[1]),
|
131
|
+
)
|
132
|
+
})?;
|
133
|
+
|
134
|
+
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
135
|
+
precision, scale,
|
136
|
+
)));
|
137
|
+
} else {
|
105
138
|
return Err(MagnusError::new(
|
106
139
|
magnus::exception::runtime_error(),
|
107
140
|
format!(
|
108
|
-
"Invalid decimal format. Expected 'decimal(precision,scale)', got '{}'",
|
141
|
+
"Invalid decimal format. Expected 'decimal(precision)' or 'decimal(precision,scale)', got '{}'",
|
109
142
|
s
|
110
143
|
),
|
111
144
|
));
|
112
145
|
}
|
113
|
-
|
114
|
-
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
115
|
-
MagnusError::new(
|
116
|
-
magnus::exception::runtime_error(),
|
117
|
-
format!("Invalid precision value in decimal type: {}", parts[0]),
|
118
|
-
)
|
119
|
-
})?;
|
120
|
-
|
121
|
-
let scale = parts[1].trim().parse::<i8>().map_err(|_| {
|
122
|
-
MagnusError::new(
|
123
|
-
magnus::exception::runtime_error(),
|
124
|
-
format!("Invalid scale value in decimal type: {}", parts[1]),
|
125
|
-
)
|
126
|
-
})?;
|
127
|
-
|
128
|
-
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)));
|
129
146
|
}
|
130
147
|
|
131
148
|
// Handle primitive types
|
@@ -146,7 +163,9 @@ impl FromStr for ParquetSchemaType<'_> {
|
|
146
163
|
"date32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Date32)),
|
147
164
|
"timestamp_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis)),
|
148
165
|
"timestamp_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros)),
|
149
|
-
"decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
166
|
+
"decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
167
|
+
38, 0,
|
168
|
+
))),
|
150
169
|
"list" => Ok(ParquetSchemaType::List(Box::new(ListField {
|
151
170
|
item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
|
152
171
|
format: None,
|
data/lib/parquet/schema.rb
CHANGED
@@ -58,7 +58,7 @@ module Parquet
|
|
58
58
|
# - `key:, value:` if type == :map
|
59
59
|
# - `key_nullable:, value_nullable:` controls nullability of map keys/values (default: true)
|
60
60
|
# - `format:` if you want to store some format string
|
61
|
-
# - `precision:, scale:` if type == :decimal (precision defaults to
|
61
|
+
# - `precision:, scale:` if type == :decimal (precision defaults to 38, scale to 0)
|
62
62
|
# - `nullable:` default to true if not specified
|
63
63
|
def field(name, type, nullable: true, **kwargs, &block)
|
64
64
|
field_hash = { name: name.to_s, type: type, nullable: !!nullable }
|
@@ -77,12 +77,18 @@ module Parquet
|
|
77
77
|
raise ArgumentError, "list field `#{name}` requires `item:` type" unless item_type
|
78
78
|
# Pass item_nullable if provided, otherwise use true as default
|
79
79
|
item_nullable = kwargs[:item_nullable].nil? ? true : !!kwargs[:item_nullable]
|
80
|
-
|
80
|
+
|
81
81
|
# Pass precision and scale if type is decimal
|
82
82
|
if item_type == :decimal
|
83
83
|
precision = kwargs[:precision]
|
84
84
|
scale = kwargs[:scale]
|
85
|
-
field_hash[:item] = wrap_subtype(
|
85
|
+
field_hash[:item] = wrap_subtype(
|
86
|
+
item_type,
|
87
|
+
nullable: item_nullable,
|
88
|
+
precision: precision,
|
89
|
+
scale: scale,
|
90
|
+
&block
|
91
|
+
)
|
86
92
|
else
|
87
93
|
field_hash[:item] = wrap_subtype(item_type, nullable: item_nullable, &block)
|
88
94
|
end
|
@@ -94,14 +100,20 @@ module Parquet
|
|
94
100
|
# Pass key_nullable and value_nullable if provided, otherwise use true as default
|
95
101
|
key_nullable = kwargs[:key_nullable].nil? ? true : !!kwargs[:key_nullable]
|
96
102
|
value_nullable = kwargs[:value_nullable].nil? ? true : !!kwargs[:value_nullable]
|
97
|
-
|
103
|
+
|
98
104
|
field_hash[:key] = wrap_subtype(key_type, nullable: key_nullable)
|
99
|
-
|
105
|
+
|
100
106
|
# Pass precision and scale if value type is decimal
|
101
107
|
if value_type == :decimal
|
102
108
|
precision = kwargs[:precision]
|
103
109
|
scale = kwargs[:scale]
|
104
|
-
field_hash[:value] = wrap_subtype(
|
110
|
+
field_hash[:value] = wrap_subtype(
|
111
|
+
value_type,
|
112
|
+
nullable: value_nullable,
|
113
|
+
precision: precision,
|
114
|
+
scale: scale,
|
115
|
+
&block
|
116
|
+
)
|
105
117
|
else
|
106
118
|
field_hash[:value] = wrap_subtype(value_type, nullable: value_nullable, &block)
|
107
119
|
end
|
@@ -111,7 +123,7 @@ module Parquet
|
|
111
123
|
# 2. When only precision is provided, scale defaults to 0
|
112
124
|
# 3. When only scale is provided, use maximum precision (38)
|
113
125
|
# 4. When both are provided, use the provided values
|
114
|
-
|
126
|
+
|
115
127
|
if kwargs[:precision].nil? && kwargs[:scale].nil?
|
116
128
|
# No precision or scale provided - use maximum precision
|
117
129
|
field_hash[:precision] = 38
|
@@ -192,7 +204,7 @@ module Parquet
|
|
192
204
|
elsif t == :decimal
|
193
205
|
# Handle decimal type with precision and scale
|
194
206
|
result = { type: t, nullable: nullable, name: "item" }
|
195
|
-
|
207
|
+
|
196
208
|
# Follow the same rules as in field() method:
|
197
209
|
# 1. When neither precision nor scale is provided, use maximum precision (38)
|
198
210
|
# 2. When only precision is provided, scale defaults to 0
|
@@ -215,7 +227,7 @@ module Parquet
|
|
215
227
|
result[:precision] = precision
|
216
228
|
result[:scale] = scale
|
217
229
|
end
|
218
|
-
|
230
|
+
|
219
231
|
result
|
220
232
|
else
|
221
233
|
# e.g. :int32 => { type: :int32, nullable: true }
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- ext/parquet/src/reader/mod.rs
|
67
67
|
- ext/parquet/src/reader/parquet_column_reader.rs
|
68
68
|
- ext/parquet/src/reader/parquet_row_reader.rs
|
69
|
+
- ext/parquet/src/reader/unified/mod.rs
|
69
70
|
- ext/parquet/src/ruby_reader.rs
|
70
71
|
- ext/parquet/src/types/core_types.rs
|
71
72
|
- ext/parquet/src/types/mod.rs
|