parquet 0.5.8 → 0.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/parquet/src/types/mod.rs +1 -2
- data/ext/parquet/src/types/record_types.rs +88 -26
- data/lib/parquet/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1a1f7c250b960dbe334145a537e865889fbc759e7b8bfbafcbadc77689972cd
|
4
|
+
data.tar.gz: 22116ec0b9fe89f0ad08a3674267bd00b141170b889091b476f3aab0d6be88a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef8485d03247dd0d31993a774117669c1aaef5b875e7cb5c6f4c53e237a72fb81113ea35251426a21ea1ba24f8ae568bd2c3a158e6a45ce2416a308251d0f467
|
7
|
+
data.tar.gz: 672f38dfbf703dae996283fba8d137529e3089f569797df87feaac32fb0f956ea7c4d7ae57032715d1a21bd5bfa4dd728c2a3fe80174fb5fc0abdef51c73110a
|
@@ -32,7 +32,6 @@ use arrow_schema::{DataType, TimeUnit};
|
|
32
32
|
use magnus::{value::ReprValue, Error as MagnusError, IntoValue, Ruby, Value};
|
33
33
|
use parquet::data_type::Decimal;
|
34
34
|
use parquet::record::Field;
|
35
|
-
use std::array::TryFromSliceError;
|
36
35
|
use std::{collections::HashMap, hash::BuildHasher, sync::Arc};
|
37
36
|
|
38
37
|
use crate::header_cache::StringCacheKey;
|
@@ -60,7 +59,7 @@ pub enum ParquetGemError {
|
|
60
59
|
#[error("Jiff error: {0}")]
|
61
60
|
Jiff(#[from] jiff::Error),
|
62
61
|
#[error("Failed to cast slice to array: {0}")]
|
63
|
-
InvalidDecimal(
|
62
|
+
InvalidDecimal(String),
|
64
63
|
#[error("Failed to parse UUID: {0}")]
|
65
64
|
UuidError(#[from] uuid::Error),
|
66
65
|
}
|
@@ -34,6 +34,93 @@ pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32)
|
|
34
34
|
}
|
35
35
|
}
|
36
36
|
|
37
|
+
/// Convert arbitrary-length big-endian byte array to decimal string
|
38
|
+
/// Supports byte arrays from 1 to 16 bytes in length
|
39
|
+
fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
|
40
|
+
match bytes.len() {
|
41
|
+
0 => Err(ParquetGemError::InvalidDecimal(
|
42
|
+
"Empty byte array for decimal".to_string(),
|
43
|
+
)),
|
44
|
+
1 => {
|
45
|
+
// For 1 byte, use i8
|
46
|
+
let value = bytes[0] as i8;
|
47
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
48
|
+
}
|
49
|
+
2 => {
|
50
|
+
// For 2 bytes, use i16
|
51
|
+
let mut value: i16 = 0;
|
52
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
53
|
+
|
54
|
+
for &byte in bytes {
|
55
|
+
value = (value << 8) | (byte as i16);
|
56
|
+
}
|
57
|
+
|
58
|
+
// Sign extend if negative
|
59
|
+
if is_negative {
|
60
|
+
let shift = 16 - (bytes.len() * 8);
|
61
|
+
value = (value << shift) >> shift;
|
62
|
+
}
|
63
|
+
|
64
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
65
|
+
}
|
66
|
+
3..=4 => {
|
67
|
+
// For 3-4 bytes, use i32
|
68
|
+
let mut value: i32 = 0;
|
69
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
70
|
+
|
71
|
+
for &byte in bytes {
|
72
|
+
value = (value << 8) | (byte as i32);
|
73
|
+
}
|
74
|
+
|
75
|
+
// Sign extend if negative
|
76
|
+
if is_negative {
|
77
|
+
let shift = 32 - (bytes.len() * 8);
|
78
|
+
value = (value << shift) >> shift;
|
79
|
+
}
|
80
|
+
|
81
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
82
|
+
}
|
83
|
+
5..=8 => {
|
84
|
+
// For 5-8 bytes, use i64
|
85
|
+
let mut value: i64 = 0;
|
86
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
87
|
+
|
88
|
+
for &byte in bytes {
|
89
|
+
value = (value << 8) | (byte as i64);
|
90
|
+
}
|
91
|
+
|
92
|
+
// Sign extend if negative
|
93
|
+
if is_negative {
|
94
|
+
let shift = 64 - (bytes.len() * 8);
|
95
|
+
value = (value << shift) >> shift;
|
96
|
+
}
|
97
|
+
|
98
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
99
|
+
}
|
100
|
+
9..=16 => {
|
101
|
+
// For 9-16 bytes, use i128
|
102
|
+
let mut value: i128 = 0;
|
103
|
+
let is_negative = bytes[0] & 0x80 != 0;
|
104
|
+
|
105
|
+
for &byte in bytes {
|
106
|
+
value = (value << 8) | (byte as i128);
|
107
|
+
}
|
108
|
+
|
109
|
+
// Sign extend if negative
|
110
|
+
if is_negative {
|
111
|
+
let shift = 128 - (bytes.len() * 8);
|
112
|
+
value = (value << shift) >> shift;
|
113
|
+
}
|
114
|
+
|
115
|
+
Ok(format_decimal_with_i32_scale(value, scale))
|
116
|
+
}
|
117
|
+
_ => Err(ParquetGemError::InvalidDecimal(format!(
|
118
|
+
"Unsupported decimal byte array size: {}",
|
119
|
+
bytes.len()
|
120
|
+
))),
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
37
124
|
#[derive(Debug)]
|
38
125
|
pub enum RowRecord<S: BuildHasher + Default> {
|
39
126
|
Vec(Vec<ParquetField>),
|
@@ -282,32 +369,7 @@ impl TryIntoValue for ParquetField {
|
|
282
369
|
format_decimal_with_i32_scale(unscaled, scale)
|
283
370
|
}
|
284
371
|
Decimal::Bytes { value, scale, .. } => {
|
285
|
-
|
286
|
-
4 => {
|
287
|
-
// value is a byte array containing the bytes for an i32 value in big endian order
|
288
|
-
let casted = value.as_bytes()[..4].try_into()?;
|
289
|
-
let unscaled = i32::from_be_bytes(casted);
|
290
|
-
format_decimal_with_i32_scale(unscaled, scale)
|
291
|
-
}
|
292
|
-
8 => {
|
293
|
-
// value is a byte array containing the bytes for an i64 value in big endian order
|
294
|
-
let casted = value.as_bytes()[..8].try_into()?;
|
295
|
-
let unscaled = i64::from_be_bytes(casted);
|
296
|
-
format_decimal_with_i32_scale(unscaled, scale)
|
297
|
-
}
|
298
|
-
16 => {
|
299
|
-
// value is a byte array containing the bytes for an i128 value in big endian order
|
300
|
-
let casted = value.as_bytes()[..16].try_into()?;
|
301
|
-
let unscaled = i128::from_be_bytes(casted);
|
302
|
-
format_decimal_with_i32_scale(unscaled, scale)
|
303
|
-
}
|
304
|
-
_ => {
|
305
|
-
unimplemented!(
|
306
|
-
"Unsupported decimal byte array size: {}",
|
307
|
-
value.len()
|
308
|
-
);
|
309
|
-
}
|
310
|
-
}
|
372
|
+
bytes_to_decimal(value.as_bytes(), scale)?
|
311
373
|
}
|
312
374
|
};
|
313
375
|
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|