parquet 0.5.8 → 0.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +3 -0
- data/ext/parquet/Cargo.toml +2 -0
- data/ext/parquet/build.rs +1 -1
- data/ext/parquet/src/lib.rs +3 -0
- data/ext/parquet/src/reader/arrow_reader.rs +579 -0
- data/ext/parquet/src/reader/common.rs +65 -11
- data/ext/parquet/src/reader/format_detector.rs +69 -0
- data/ext/parquet/src/reader/mod.rs +7 -2
- data/ext/parquet/src/reader/unified/mod.rs +82 -14
- data/ext/parquet/src/types/core_types.rs +1 -0
- data/ext/parquet/src/types/mod.rs +12 -6
- data/ext/parquet/src/types/parquet_value.rs +290 -73
- data/ext/parquet/src/types/record_types.rs +172 -26
- data/ext/parquet/src/types/schema_node.rs +11 -5
- data/ext/parquet/src/types/type_conversion.rs +216 -0
- data/ext/parquet/src/types/writer_types.rs +50 -0
- data/ext/parquet/src/writer/mod.rs +3 -0
- data/ext/parquet/src/writer/write_columns.rs +3 -0
- data/ext/parquet/src/writer/write_rows.rs +1 -0
- data/lib/parquet/version.rb +1 -1
- metadata +4 -2
@@ -22,6 +22,19 @@ pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -
|
|
22
22
|
}
|
23
23
|
}
|
24
24
|
|
25
|
+
/// Format i256 decimal value with appropriate scale for BigDecimal conversion
|
26
|
+
/// Uses bytes conversion to preserve full precision
|
27
|
+
pub fn format_i256_decimal_with_scale(
|
28
|
+
value: arrow_buffer::i256,
|
29
|
+
scale: i8,
|
30
|
+
) -> Result<String, ParquetGemError> {
|
31
|
+
// Convert i256 to big-endian bytes
|
32
|
+
let bytes = value.to_be_bytes();
|
33
|
+
|
34
|
+
// Use the existing bytes_to_decimal function which handles full precision
|
35
|
+
bytes_to_decimal(&bytes, scale as i32)
|
36
|
+
}
|
37
|
+
|
25
38
|
/// Format decimal value with appropriate scale for BigDecimal conversion
|
26
39
|
/// Handles positive and negative scales correctly for i32 scale
|
27
40
|
pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
|
@@ -34,6 +47,164 @@ pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32)
|
|
34
47
|
}
|
35
48
|
}
|
36
49
|
|
50
|
+
/// Convert arbitrary-length big-endian byte array to decimal string
|
51
|
+
/// Supports byte arrays from 1 to 32 bytes in length
|
52
|
+
fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
|
53
|
+
match bytes.len() {
|
54
|
+
0 => Err(ParquetGemError::InvalidDecimal(
|
55
|
+
"Empty byte array for decimal".to_string(),
|
56
|
+
)),
|
57
|
+
1 => {
|
58
|
+
// For 1 byte, use i8
|
59
|
+
let value = bytes[0] as i8;
|
60
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
61
|
+
}
|
62
|
+
2 => {
|
63
|
+
// For 2 bytes, use i16
|
64
|
+
let mut value: i16 = 0;
|
65
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
66
|
+
|
67
|
+
for &byte in bytes {
|
68
|
+
value = (value << 8) | (byte as i16);
|
69
|
+
}
|
70
|
+
|
71
|
+
// Sign extend if negative
|
72
|
+
if is_negative {
|
73
|
+
let shift = 16 - (bytes.len() * 8);
|
74
|
+
value = (value << shift) >> shift;
|
75
|
+
}
|
76
|
+
|
77
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
78
|
+
}
|
79
|
+
3..=4 => {
|
80
|
+
// For 3-4 bytes, use i32
|
81
|
+
let mut value: i32 = 0;
|
82
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
83
|
+
|
84
|
+
for &byte in bytes {
|
85
|
+
value = (value << 8) | (byte as i32);
|
86
|
+
}
|
87
|
+
|
88
|
+
// Sign extend if negative
|
89
|
+
if is_negative {
|
90
|
+
let shift = 32 - (bytes.len() * 8);
|
91
|
+
value = (value << shift) >> shift;
|
92
|
+
}
|
93
|
+
|
94
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
95
|
+
}
|
96
|
+
5..=8 => {
|
97
|
+
// For 5-8 bytes, use i64
|
98
|
+
let mut value: i64 = 0;
|
99
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
100
|
+
|
101
|
+
for &byte in bytes {
|
102
|
+
value = (value << 8) | (byte as i64);
|
103
|
+
}
|
104
|
+
|
105
|
+
// Sign extend if negative
|
106
|
+
if is_negative {
|
107
|
+
let shift = 64 - (bytes.len() * 8);
|
108
|
+
value = (value << shift) >> shift;
|
109
|
+
}
|
110
|
+
|
111
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
112
|
+
}
|
113
|
+
9..=16 => {
|
114
|
+
// For 9-16 bytes, use i128
|
115
|
+
let mut value: i128 = 0;
|
116
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
117
|
+
|
118
|
+
for &byte in bytes {
|
119
|
+
value = (value << 8) | (byte as i128);
|
120
|
+
}
|
121
|
+
|
122
|
+
// Sign extend if negative
|
123
|
+
if is_negative {
|
124
|
+
let shift = 128 - (bytes.len() * 8);
|
125
|
+
value = (value << shift) >> shift;
|
126
|
+
}
|
127
|
+
|
128
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
129
|
+
}
|
130
|
+
17..=32 => {
|
131
|
+
// For 17-32 bytes, we need arbitrary precision handling
|
132
|
+
// Check if the number is negative (MSB of first byte)
|
133
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
134
|
+
|
135
|
+
if is_negative {
|
136
|
+
// For negative numbers, we need to compute two's complement
|
137
|
+
// First, invert all bits
|
138
|
+
let mut inverted = Vec::with_capacity(bytes.len());
|
139
|
+
for &byte in bytes {
|
140
|
+
inverted.push(!byte);
|
141
|
+
}
|
142
|
+
|
143
|
+
// Then add 1
|
144
|
+
let mut carry = 1u8;
|
145
|
+
for i in (0..inverted.len()).rev() {
|
146
|
+
let (sum, new_carry) = inverted[i].overflowing_add(carry);
|
147
|
+
inverted[i] = sum;
|
148
|
+
carry = if new_carry { 1 } else { 0 };
|
149
|
+
}
|
150
|
+
|
151
|
+
// Convert to decimal string
|
152
|
+
let mut result = String::new();
|
153
|
+
let mut remainder = inverted;
|
154
|
+
|
155
|
+
// Repeatedly divide by 10 to get decimal digits
|
156
|
+
while !remainder.iter().all(|&b| b == 0) {
|
157
|
+
let mut carry = 0u16;
|
158
|
+
for i in 0..remainder.len() {
|
159
|
+
let temp = (carry << 8) | (remainder[i] as u16);
|
160
|
+
remainder[i] = (temp / 10) as u8;
|
161
|
+
carry = temp % 10;
|
162
|
+
}
|
163
|
+
result.push_str(&carry.to_string());
|
164
|
+
}
|
165
|
+
|
166
|
+
// The digits are in reverse order
|
167
|
+
if result.is_empty() {
|
168
|
+
result = "0".to_string();
|
169
|
+
} else {
|
170
|
+
result = result.chars().rev().collect();
|
171
|
+
}
|
172
|
+
|
173
|
+
// Add negative sign and format with scale
|
174
|
+
Ok(format_decimal_with_i32_scale(format!("-{}", result), scale))
|
175
|
+
} else {
|
176
|
+
// For positive numbers, direct conversion
|
177
|
+
let mut result = String::new();
|
178
|
+
let mut remainder = bytes.to_vec();
|
179
|
+
|
180
|
+
// Repeatedly divide by 10 to get decimal digits
|
181
|
+
while !remainder.iter().all(|&b| b == 0) {
|
182
|
+
let mut carry = 0u16;
|
183
|
+
for i in 0..remainder.len() {
|
184
|
+
let temp = (carry << 8) | (remainder[i] as u16);
|
185
|
+
remainder[i] = (temp / 10) as u8;
|
186
|
+
carry = temp % 10;
|
187
|
+
}
|
188
|
+
result.push_str(&carry.to_string());
|
189
|
+
}
|
190
|
+
|
191
|
+
// The digits are in reverse order
|
192
|
+
if result.is_empty() {
|
193
|
+
result = "0".to_string();
|
194
|
+
} else {
|
195
|
+
result = result.chars().rev().collect();
|
196
|
+
}
|
197
|
+
|
198
|
+
Ok(format_decimal_with_i32_scale(result, scale))
|
199
|
+
}
|
200
|
+
}
|
201
|
+
_ => Err(ParquetGemError::InvalidDecimal(format!(
|
202
|
+
"Unsupported decimal byte array size: {} (maximum 32 bytes)",
|
203
|
+
bytes.len()
|
204
|
+
))),
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
37
208
|
#[derive(Debug)]
|
38
209
|
pub enum RowRecord<S: BuildHasher + Default> {
|
39
210
|
Vec(Vec<ParquetField>),
|
@@ -282,32 +453,7 @@ impl TryIntoValue for ParquetField {
|
|
282
453
|
format_decimal_with_i32_scale(unscaled, scale)
|
283
454
|
}
|
284
455
|
Decimal::Bytes { value, scale, .. } => {
|
285
|
-
|
286
|
-
4 => {
|
287
|
-
// value is a byte array containing the bytes for an i32 value in big endian order
|
288
|
-
let casted = value.as_bytes()[..4].try_into()?;
|
289
|
-
let unscaled = i32::from_be_bytes(casted);
|
290
|
-
format_decimal_with_i32_scale(unscaled, scale)
|
291
|
-
}
|
292
|
-
8 => {
|
293
|
-
// value is a byte array containing the bytes for an i64 value in big endian order
|
294
|
-
let casted = value.as_bytes()[..8].try_into()?;
|
295
|
-
let unscaled = i64::from_be_bytes(casted);
|
296
|
-
format_decimal_with_i32_scale(unscaled, scale)
|
297
|
-
}
|
298
|
-
16 => {
|
299
|
-
// value is a byte array containing the bytes for an i128 value in big endian order
|
300
|
-
let casted = value.as_bytes()[..16].try_into()?;
|
301
|
-
let unscaled = i128::from_be_bytes(casted);
|
302
|
-
format_decimal_with_i32_scale(unscaled, scale)
|
303
|
-
}
|
304
|
-
_ => {
|
305
|
-
unimplemented!(
|
306
|
-
"Unsupported decimal byte array size: {}",
|
307
|
-
value.len()
|
308
|
-
);
|
309
|
-
}
|
310
|
-
}
|
456
|
+
bytes_to_decimal(value.as_bytes(), scale)?
|
311
457
|
}
|
312
458
|
};
|
313
459
|
|
@@ -185,17 +185,18 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
|
|
185
185
|
// 2. When precision only - use scale 0
|
186
186
|
// 3. When scale only - use max precision (38)
|
187
187
|
let (precision, scale) = match (precision_val, scale_val) {
|
188
|
-
(None, None) => (38, 0),
|
188
|
+
(None, None) => (38, 0), // Maximum accuracy, scale 0
|
189
189
|
(Some(p), None) => {
|
190
190
|
// Precision provided, scale defaults to 0
|
191
191
|
let prec = u8::try_convert(p).map_err(|_| {
|
192
192
|
MagnusError::new(
|
193
193
|
ruby.exception_type_error(),
|
194
|
-
"Invalid precision value for decimal type, expected a positive integer"
|
194
|
+
"Invalid precision value for decimal type, expected a positive integer"
|
195
|
+
.to_string(),
|
195
196
|
)
|
196
197
|
})?;
|
197
198
|
(prec, 0)
|
198
|
-
}
|
199
|
+
}
|
199
200
|
(None, Some(s)) => {
|
200
201
|
// Scale provided, precision set to maximum (38)
|
201
202
|
let scl = i8::try_convert(s).map_err(|_| {
|
@@ -205,13 +206,14 @@ pub fn parse_schema_node(ruby: &Ruby, node_value: Value) -> Result<SchemaNode, M
|
|
205
206
|
)
|
206
207
|
})?;
|
207
208
|
(38, scl)
|
208
|
-
}
|
209
|
+
}
|
209
210
|
(Some(p), Some(s)) => {
|
210
211
|
// Both provided
|
211
212
|
let prec = u8::try_convert(p).map_err(|_| {
|
212
213
|
MagnusError::new(
|
213
214
|
ruby.exception_type_error(),
|
214
|
-
"Invalid precision value for decimal type, expected a positive integer"
|
215
|
+
"Invalid precision value for decimal type, expected a positive integer"
|
216
|
+
.to_string(),
|
215
217
|
)
|
216
218
|
})?;
|
217
219
|
let scl = i8::try_convert(s).map_err(|_| {
|
@@ -294,6 +296,7 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
|
|
294
296
|
"timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
|
295
297
|
"timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
|
296
298
|
"decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
|
299
|
+
"decimal256" => Some(PrimitiveType::Decimal256(38, 0)), // Maximum precision, scale 0
|
297
300
|
_ => None,
|
298
301
|
}
|
299
302
|
}
|
@@ -321,6 +324,9 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
|
|
321
324
|
PrimitiveType::Decimal128(precision, scale) => {
|
322
325
|
ArrowDataType::Decimal128(*precision, *scale)
|
323
326
|
}
|
327
|
+
PrimitiveType::Decimal256(precision, scale) => {
|
328
|
+
ArrowDataType::Decimal256(*precision, *scale)
|
329
|
+
}
|
324
330
|
PrimitiveType::Boolean => ArrowDataType::Boolean,
|
325
331
|
PrimitiveType::String => ArrowDataType::Utf8,
|
326
332
|
PrimitiveType::Binary => ArrowDataType::Binary,
|
@@ -243,6 +243,7 @@ pub fn parquet_schema_type_to_arrow_data_type(
|
|
243
243
|
PrimitiveType::Float32 => DataType::Float32,
|
244
244
|
PrimitiveType::Float64 => DataType::Float64,
|
245
245
|
PrimitiveType::Decimal128(precision, scale) => DataType::Decimal128(*precision, *scale),
|
246
|
+
PrimitiveType::Decimal256(precision, scale) => DataType::Decimal256(*precision, *scale),
|
246
247
|
PrimitiveType::String => DataType::Utf8,
|
247
248
|
PrimitiveType::Binary => DataType::Binary,
|
248
249
|
PrimitiveType::Boolean => DataType::Boolean,
|
@@ -381,6 +382,22 @@ fn create_arrow_builder_for_type(
|
|
381
382
|
|
382
383
|
Ok(Box::new(builder_with_precision))
|
383
384
|
}
|
385
|
+
ParquetSchemaType::Primitive(PrimitiveType::Decimal256(precision, scale)) => {
|
386
|
+
// Create a Decimal128Builder since we're truncating Decimal256 to Decimal128
|
387
|
+
let builder = Decimal256Builder::with_capacity(cap);
|
388
|
+
|
389
|
+
// Set precision and scale for the decimal and return the new builder
|
390
|
+
let builder_with_precision = builder
|
391
|
+
.with_precision_and_scale(*precision, *scale)
|
392
|
+
.map_err(|e| {
|
393
|
+
MagnusError::new(
|
394
|
+
magnus::exception::runtime_error(),
|
395
|
+
format!("Failed to set precision and scale: {}", e),
|
396
|
+
)
|
397
|
+
})?;
|
398
|
+
|
399
|
+
Ok(Box::new(builder_with_precision))
|
400
|
+
}
|
384
401
|
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
385
402
|
Ok(Box::new(StringBuilder::with_capacity(cap, cap * 32)))
|
386
403
|
}
|
@@ -891,6 +908,187 @@ fn fill_builder(
|
|
891
908
|
}
|
892
909
|
Ok(())
|
893
910
|
}
|
911
|
+
ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_precision, scale)) => {
|
912
|
+
let typed_builder = builder
|
913
|
+
.as_any_mut()
|
914
|
+
.downcast_mut::<Decimal256Builder>()
|
915
|
+
.expect("Builder mismatch: expected Decimal256Builder for Decimal256");
|
916
|
+
|
917
|
+
for val in values {
|
918
|
+
match val {
|
919
|
+
ParquetValue::Decimal256(d, _scale) => typed_builder.append_value(*d),
|
920
|
+
ParquetValue::Decimal128(d, _scale) => {
|
921
|
+
// Convert i128 to i256
|
922
|
+
typed_builder.append_value(arrow_buffer::i256::from_i128(*d))
|
923
|
+
}
|
924
|
+
ParquetValue::Float64(f) => {
|
925
|
+
// Scale the float to the desired precision and scale
|
926
|
+
// For large values, use BigInt to avoid overflow
|
927
|
+
let scaled = *f * 10_f64.powi(*scale as i32);
|
928
|
+
if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
|
929
|
+
let scaled_value = scaled as i128;
|
930
|
+
typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
|
931
|
+
} else {
|
932
|
+
// Use BigInt for values that don't fit in i128
|
933
|
+
use num::{BigInt, FromPrimitive};
|
934
|
+
let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
|
935
|
+
MagnusError::new(
|
936
|
+
magnus::exception::type_error(),
|
937
|
+
format!("Failed to convert float {} to BigInt", f),
|
938
|
+
)
|
939
|
+
})?;
|
940
|
+
let bytes = bigint.to_signed_bytes_le();
|
941
|
+
if bytes.len() <= 32 {
|
942
|
+
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
943
|
+
[0xff; 32]
|
944
|
+
} else {
|
945
|
+
[0; 32]
|
946
|
+
};
|
947
|
+
buf[..bytes.len()].copy_from_slice(&bytes);
|
948
|
+
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
949
|
+
} else {
|
950
|
+
return Err(MagnusError::new(
|
951
|
+
magnus::exception::type_error(),
|
952
|
+
format!(
|
953
|
+
"Float value {} scaled to {} is too large for Decimal256",
|
954
|
+
f, scaled
|
955
|
+
),
|
956
|
+
));
|
957
|
+
}
|
958
|
+
}
|
959
|
+
}
|
960
|
+
ParquetValue::Float32(flo) => {
|
961
|
+
// Scale the float to the desired precision and scale
|
962
|
+
let scaled = (*flo as f64) * 10_f64.powi(*scale as i32);
|
963
|
+
if scaled >= i128::MIN as f64 && scaled <= i128::MAX as f64 {
|
964
|
+
let scaled_value = scaled as i128;
|
965
|
+
typed_builder.append_value(arrow_buffer::i256::from_i128(scaled_value))
|
966
|
+
} else {
|
967
|
+
// Use BigInt for values that don't fit in i128
|
968
|
+
use num::{BigInt, FromPrimitive};
|
969
|
+
let bigint = BigInt::from_f64(scaled).ok_or_else(|| {
|
970
|
+
MagnusError::new(
|
971
|
+
magnus::exception::type_error(),
|
972
|
+
format!("Failed to convert float {} to BigInt", flo),
|
973
|
+
)
|
974
|
+
})?;
|
975
|
+
let bytes = bigint.to_signed_bytes_le();
|
976
|
+
if bytes.len() <= 32 {
|
977
|
+
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
978
|
+
[0xff; 32]
|
979
|
+
} else {
|
980
|
+
[0; 32]
|
981
|
+
};
|
982
|
+
buf[..bytes.len()].copy_from_slice(&bytes);
|
983
|
+
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
984
|
+
} else {
|
985
|
+
return Err(MagnusError::new(
|
986
|
+
magnus::exception::type_error(),
|
987
|
+
format!(
|
988
|
+
"Float value {} scaled is too large for Decimal256",
|
989
|
+
flo
|
990
|
+
),
|
991
|
+
));
|
992
|
+
}
|
993
|
+
}
|
994
|
+
}
|
995
|
+
ParquetValue::Int64(i) => {
|
996
|
+
// Scale the integer to the desired scale
|
997
|
+
let base = arrow_buffer::i256::from_i128(*i as i128);
|
998
|
+
if *scale <= 38 {
|
999
|
+
// Can use i128 multiplication for scale <= 38
|
1000
|
+
let scale_factor =
|
1001
|
+
arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
|
1002
|
+
match base.checked_mul(scale_factor) {
|
1003
|
+
Some(scaled) => typed_builder.append_value(scaled),
|
1004
|
+
None => {
|
1005
|
+
return Err(MagnusError::new(
|
1006
|
+
magnus::exception::type_error(),
|
1007
|
+
format!(
|
1008
|
+
"Integer {} scaled by {} overflows Decimal256",
|
1009
|
+
i, scale
|
1010
|
+
),
|
1011
|
+
));
|
1012
|
+
}
|
1013
|
+
}
|
1014
|
+
} else {
|
1015
|
+
// For very large scales, use BigInt
|
1016
|
+
use num::BigInt;
|
1017
|
+
let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
|
1018
|
+
let bytes = bigint.to_signed_bytes_le();
|
1019
|
+
if bytes.len() <= 32 {
|
1020
|
+
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
1021
|
+
[0xff; 32]
|
1022
|
+
} else {
|
1023
|
+
[0; 32]
|
1024
|
+
};
|
1025
|
+
buf[..bytes.len()].copy_from_slice(&bytes);
|
1026
|
+
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
1027
|
+
} else {
|
1028
|
+
return Err(MagnusError::new(
|
1029
|
+
magnus::exception::type_error(),
|
1030
|
+
format!(
|
1031
|
+
"Integer {} scaled by {} is too large for Decimal256",
|
1032
|
+
i, scale
|
1033
|
+
),
|
1034
|
+
));
|
1035
|
+
}
|
1036
|
+
}
|
1037
|
+
}
|
1038
|
+
ParquetValue::Int32(i) => {
|
1039
|
+
// Scale the integer to the desired scale
|
1040
|
+
let base = arrow_buffer::i256::from_i128(*i as i128);
|
1041
|
+
if *scale <= 38 {
|
1042
|
+
// Can use i128 multiplication for scale <= 38
|
1043
|
+
let scale_factor =
|
1044
|
+
arrow_buffer::i256::from_i128(10_i128.pow(*scale as u32));
|
1045
|
+
match base.checked_mul(scale_factor) {
|
1046
|
+
Some(scaled) => typed_builder.append_value(scaled),
|
1047
|
+
None => {
|
1048
|
+
return Err(MagnusError::new(
|
1049
|
+
magnus::exception::type_error(),
|
1050
|
+
format!(
|
1051
|
+
"Integer {} scaled by {} overflows Decimal256",
|
1052
|
+
i, scale
|
1053
|
+
),
|
1054
|
+
));
|
1055
|
+
}
|
1056
|
+
}
|
1057
|
+
} else {
|
1058
|
+
// For very large scales, use BigInt
|
1059
|
+
use num::BigInt;
|
1060
|
+
let bigint = BigInt::from(*i) * BigInt::from(10).pow(*scale as u32);
|
1061
|
+
let bytes = bigint.to_signed_bytes_le();
|
1062
|
+
if bytes.len() <= 32 {
|
1063
|
+
let mut buf = if bigint.sign() == num::bigint::Sign::Minus {
|
1064
|
+
[0xff; 32]
|
1065
|
+
} else {
|
1066
|
+
[0; 32]
|
1067
|
+
};
|
1068
|
+
buf[..bytes.len()].copy_from_slice(&bytes);
|
1069
|
+
typed_builder.append_value(arrow_buffer::i256::from_le_bytes(buf))
|
1070
|
+
} else {
|
1071
|
+
return Err(MagnusError::new(
|
1072
|
+
magnus::exception::type_error(),
|
1073
|
+
format!(
|
1074
|
+
"Integer {} scaled by {} is too large for Decimal256",
|
1075
|
+
i, scale
|
1076
|
+
),
|
1077
|
+
));
|
1078
|
+
}
|
1079
|
+
}
|
1080
|
+
}
|
1081
|
+
ParquetValue::Null => typed_builder.append_null(),
|
1082
|
+
other => {
|
1083
|
+
return Err(MagnusError::new(
|
1084
|
+
magnus::exception::type_error(),
|
1085
|
+
format!("Expected numeric value for Decimal256, got {:?}", other),
|
1086
|
+
))
|
1087
|
+
}
|
1088
|
+
}
|
1089
|
+
}
|
1090
|
+
Ok(())
|
1091
|
+
}
|
894
1092
|
ParquetSchemaType::Primitive(PrimitiveType::Boolean) => {
|
895
1093
|
let typed_builder = builder
|
896
1094
|
.as_any_mut()
|
@@ -1172,6 +1370,15 @@ fn fill_builder(
|
|
1172
1370
|
)
|
1173
1371
|
})?
|
1174
1372
|
.append_value(*x),
|
1373
|
+
ParquetValue::Decimal256(x, _scale) => typed_builder
|
1374
|
+
.field_builder::<Decimal256Builder>(i)
|
1375
|
+
.ok_or_else(|| {
|
1376
|
+
MagnusError::new(
|
1377
|
+
magnus::exception::type_error(),
|
1378
|
+
"Failed to coerce into Decimal256Builder",
|
1379
|
+
)
|
1380
|
+
})?
|
1381
|
+
.append_value(*x),
|
1175
1382
|
ParquetValue::Date32(x) => typed_builder
|
1176
1383
|
.field_builder::<Date32Builder>(i)
|
1177
1384
|
.ok_or_else(|| {
|
@@ -1377,6 +1584,15 @@ fn fill_builder(
|
|
1377
1584
|
)
|
1378
1585
|
})?
|
1379
1586
|
.append_null(),
|
1587
|
+
ParquetSchemaType::Primitive(PrimitiveType::Decimal256(_, _)) => typed_builder
|
1588
|
+
.field_builder::<Decimal256Builder>(i)
|
1589
|
+
.ok_or_else(|| {
|
1590
|
+
MagnusError::new(
|
1591
|
+
magnus::exception::type_error(),
|
1592
|
+
"Failed to coerce into Decimal256Builder for Decimal256",
|
1593
|
+
)
|
1594
|
+
})?
|
1595
|
+
.append_null(),
|
1380
1596
|
ParquetSchemaType::Primitive(PrimitiveType::String) => typed_builder
|
1381
1597
|
.field_builder::<StringBuilder>(i)
|
1382
1598
|
.ok_or_else(|| {
|
@@ -145,6 +145,53 @@ impl FromStr for ParquetSchemaType<'_> {
|
|
145
145
|
}
|
146
146
|
}
|
147
147
|
|
148
|
+
// Check if it's a decimal256 type with precision and scale
|
149
|
+
if let Some(decimal_params) = s.strip_prefix("decimal256(").and_then(|s| s.strip_suffix(")")) {
|
150
|
+
let parts: Vec<&str> = decimal_params.split(',').collect();
|
151
|
+
|
152
|
+
// Handle both single parameter (precision only) and two parameters (precision and scale)
|
153
|
+
if parts.len() == 1 {
|
154
|
+
// Only precision provided, scale defaults to 0
|
155
|
+
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
156
|
+
MagnusError::new(
|
157
|
+
magnus::exception::runtime_error(),
|
158
|
+
format!("Invalid precision value in decimal256 type: {}", parts[0]),
|
159
|
+
)
|
160
|
+
})?;
|
161
|
+
|
162
|
+
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
|
163
|
+
precision, 0,
|
164
|
+
)));
|
165
|
+
} else if parts.len() == 2 {
|
166
|
+
// Both precision and scale provided
|
167
|
+
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
168
|
+
MagnusError::new(
|
169
|
+
magnus::exception::runtime_error(),
|
170
|
+
format!("Invalid precision value in decimal256 type: {}", parts[0]),
|
171
|
+
)
|
172
|
+
})?;
|
173
|
+
|
174
|
+
let scale = parts[1].trim().parse::<i8>().map_err(|_| {
|
175
|
+
MagnusError::new(
|
176
|
+
magnus::exception::runtime_error(),
|
177
|
+
format!("Invalid scale value in decimal256 type: {}", parts[1]),
|
178
|
+
)
|
179
|
+
})?;
|
180
|
+
|
181
|
+
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
|
182
|
+
precision, scale,
|
183
|
+
)));
|
184
|
+
} else {
|
185
|
+
return Err(MagnusError::new(
|
186
|
+
magnus::exception::runtime_error(),
|
187
|
+
format!(
|
188
|
+
"Invalid decimal256 format. Expected 'decimal256(precision)' or 'decimal256(precision,scale)', got '{}'",
|
189
|
+
s
|
190
|
+
),
|
191
|
+
));
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
148
195
|
// Handle primitive types
|
149
196
|
match s {
|
150
197
|
"int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
|
@@ -166,6 +213,9 @@ impl FromStr for ParquetSchemaType<'_> {
|
|
166
213
|
"decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
167
214
|
38, 0,
|
168
215
|
))),
|
216
|
+
"decimal256" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
|
217
|
+
38, 0,
|
218
|
+
))),
|
169
219
|
"list" => Ok(ParquetSchemaType::List(Box::new(ListField {
|
170
220
|
item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
|
171
221
|
format: None,
|
@@ -197,6 +197,9 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
|
|
197
197
|
DataType::Decimal128(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal128(
|
198
198
|
*precision, *scale,
|
199
199
|
))),
|
200
|
+
DataType::Decimal256(precision, scale) => Ok(PST::Primitive(PrimitiveType::Decimal256(
|
201
|
+
*precision, *scale,
|
202
|
+
))),
|
200
203
|
DataType::Date32 => Ok(PST::Primitive(PrimitiveType::Date32)),
|
201
204
|
DataType::Date64 => {
|
202
205
|
// Our code typically uses Date32 or Timestamp for 64. But Arrow has Date64
|
@@ -170,6 +170,9 @@ fn write_columns_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemEr
|
|
170
170
|
PrimitiveType::TimestampMicros => {
|
171
171
|
PST::Primitive(PrimitiveType::TimestampMicros)
|
172
172
|
}
|
173
|
+
PrimitiveType::Decimal256(precision, scale) => {
|
174
|
+
PST::Primitive(PrimitiveType::Decimal256(precision, scale))
|
175
|
+
}
|
173
176
|
},
|
174
177
|
SchemaNode::List { .. }
|
175
178
|
| SchemaNode::Map { .. }
|
@@ -258,6 +258,7 @@ pub fn estimate_value_size(
|
|
258
258
|
| PST::Primitive(PrimitiveType::Float64) => Ok(8),
|
259
259
|
PST::Primitive(PrimitiveType::Boolean) => Ok(1),
|
260
260
|
PST::Primitive(PrimitiveType::Decimal128(_, _)) => Ok(16),
|
261
|
+
PST::Primitive(PrimitiveType::Decimal256(_, _)) => Ok(32),
|
261
262
|
PST::Primitive(PrimitiveType::Date32)
|
262
263
|
| PST::Primitive(PrimitiveType::TimestampMillis)
|
263
264
|
| PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8),
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -62,7 +62,9 @@ files:
|
|
62
62
|
- ext/parquet/src/header_cache.rs
|
63
63
|
- ext/parquet/src/lib.rs
|
64
64
|
- ext/parquet/src/logger.rs
|
65
|
+
- ext/parquet/src/reader/arrow_reader.rs
|
65
66
|
- ext/parquet/src/reader/common.rs
|
67
|
+
- ext/parquet/src/reader/format_detector.rs
|
66
68
|
- ext/parquet/src/reader/mod.rs
|
67
69
|
- ext/parquet/src/reader/parquet_column_reader.rs
|
68
70
|
- ext/parquet/src/reader/parquet_row_reader.rs
|