parquet 0.5.8 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c01534e1b898294b050175a11a0f4ca09ca088c24b6f496e470185427c855ff4
4
- data.tar.gz: 0d9888f96914433267bce8406fdf9064e7bcf94c8840c82ca980385798ac1203
3
+ metadata.gz: f1a1f7c250b960dbe334145a537e865889fbc759e7b8bfbafcbadc77689972cd
4
+ data.tar.gz: 22116ec0b9fe89f0ad08a3674267bd00b141170b889091b476f3aab0d6be88a8
5
5
  SHA512:
6
- metadata.gz: 011b1d7425843ee32fb7494825f5d32f3d7be2d06082c639622ac8b00865b3ea5f13057c03024690b14950c235b0882d1ae1a7dbd0c00756f32f099ed8c64289
7
- data.tar.gz: 5ff4cd63dc21844a68a26bd2ae5e428b64abf6a8165de314ddc674af27b5e18e3d4303d02ba1c2631cb35498a93d2466e99aae0d267e8000380ef26105f10310
6
+ metadata.gz: ef8485d03247dd0d31993a774117669c1aaef5b875e7cb5c6f4c53e237a72fb81113ea35251426a21ea1ba24f8ae568bd2c3a158e6a45ce2416a308251d0f467
7
+ data.tar.gz: 672f38dfbf703dae996283fba8d137529e3089f569797df87feaac32fb0f956ea7c4d7ae57032715d1a21bd5bfa4dd728c2a3fe80174fb5fc0abdef51c73110a
@@ -32,7 +32,6 @@ use arrow_schema::{DataType, TimeUnit};
32
32
  use magnus::{value::ReprValue, Error as MagnusError, IntoValue, Ruby, Value};
33
33
  use parquet::data_type::Decimal;
34
34
  use parquet::record::Field;
35
- use std::array::TryFromSliceError;
36
35
  use std::{collections::HashMap, hash::BuildHasher, sync::Arc};
37
36
 
38
37
  use crate::header_cache::StringCacheKey;
@@ -60,7 +59,7 @@ pub enum ParquetGemError {
60
59
  #[error("Jiff error: {0}")]
61
60
  Jiff(#[from] jiff::Error),
62
61
  #[error("Failed to cast slice to array: {0}")]
63
- InvalidDecimal(#[from] TryFromSliceError),
62
+ InvalidDecimal(String),
64
63
  #[error("Failed to parse UUID: {0}")]
65
64
  UuidError(#[from] uuid::Error),
66
65
  }
@@ -34,6 +34,93 @@ pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32)
34
34
  }
35
35
  }
36
36
 
37
+ /// Convert arbitrary-length big-endian byte array to decimal string
38
+ /// Supports byte arrays from 1 to 16 bytes in length
39
+ fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
40
+ match bytes.len() {
41
+ 0 => Err(ParquetGemError::InvalidDecimal(
42
+ "Empty byte array for decimal".to_string(),
43
+ )),
44
+ 1 => {
45
+ // For 1 byte, use i8
46
+ let value = bytes[0] as i8;
47
+ Ok(format_decimal_with_i32_scale(value, scale))
48
+ }
49
+ 2 => {
50
+ // For 2 bytes, use i16
51
+ let mut value: i16 = 0;
52
+ let is_negative = bytes[0] & 0x80 != 0;
53
+
54
+ for &byte in bytes {
55
+ value = (value << 8) | (byte as i16);
56
+ }
57
+
58
+ // Sign extend if negative
59
+ if is_negative {
60
+ let shift = 16 - (bytes.len() * 8);
61
+ value = (value << shift) >> shift;
62
+ }
63
+
64
+ Ok(format_decimal_with_i32_scale(value, scale))
65
+ }
66
+ 3..=4 => {
67
+ // For 3-4 bytes, use i32
68
+ let mut value: i32 = 0;
69
+ let is_negative = bytes[0] & 0x80 != 0;
70
+
71
+ for &byte in bytes {
72
+ value = (value << 8) | (byte as i32);
73
+ }
74
+
75
+ // Sign extend if negative
76
+ if is_negative {
77
+ let shift = 32 - (bytes.len() * 8);
78
+ value = (value << shift) >> shift;
79
+ }
80
+
81
+ Ok(format_decimal_with_i32_scale(value, scale))
82
+ }
83
+ 5..=8 => {
84
+ // For 5-8 bytes, use i64
85
+ let mut value: i64 = 0;
86
+ let is_negative = bytes[0] & 0x80 != 0;
87
+
88
+ for &byte in bytes {
89
+ value = (value << 8) | (byte as i64);
90
+ }
91
+
92
+ // Sign extend if negative
93
+ if is_negative {
94
+ let shift = 64 - (bytes.len() * 8);
95
+ value = (value << shift) >> shift;
96
+ }
97
+
98
+ Ok(format_decimal_with_i32_scale(value, scale))
99
+ }
100
+ 9..=16 => {
101
+ // For 9-16 bytes, use i128
102
+ let mut value: i128 = 0;
103
+ let is_negative = bytes[0] & 0x80 != 0;
104
+
105
+ for &byte in bytes {
106
+ value = (value << 8) | (byte as i128);
107
+ }
108
+
109
+ // Sign extend if negative
110
+ if is_negative {
111
+ let shift = 128 - (bytes.len() * 8);
112
+ value = (value << shift) >> shift;
113
+ }
114
+
115
+ Ok(format_decimal_with_i32_scale(value, scale))
116
+ }
117
+ _ => Err(ParquetGemError::InvalidDecimal(format!(
118
+ "Unsupported decimal byte array size: {}",
119
+ bytes.len()
120
+ ))),
121
+ }
122
+ }
123
+
37
124
  #[derive(Debug)]
38
125
  pub enum RowRecord<S: BuildHasher + Default> {
39
126
  Vec(Vec<ParquetField>),
@@ -282,32 +369,7 @@ impl TryIntoValue for ParquetField {
282
369
  format_decimal_with_i32_scale(unscaled, scale)
283
370
  }
284
371
  Decimal::Bytes { value, scale, .. } => {
285
- match value.len() {
286
- 4 => {
287
- // value is a byte array containing the bytes for an i32 value in big endian order
288
- let casted = value.as_bytes()[..4].try_into()?;
289
- let unscaled = i32::from_be_bytes(casted);
290
- format_decimal_with_i32_scale(unscaled, scale)
291
- }
292
- 8 => {
293
- // value is a byte array containing the bytes for an i64 value in big endian order
294
- let casted = value.as_bytes()[..8].try_into()?;
295
- let unscaled = i64::from_be_bytes(casted);
296
- format_decimal_with_i32_scale(unscaled, scale)
297
- }
298
- 16 => {
299
- // value is a byte array containing the bytes for an i128 value in big endian order
300
- let casted = value.as_bytes()[..16].try_into()?;
301
- let unscaled = i128::from_be_bytes(casted);
302
- format_decimal_with_i32_scale(unscaled, scale)
303
- }
304
- _ => {
305
- unimplemented!(
306
- "Unsupported decimal byte array size: {}",
307
- value.len()
308
- );
309
- }
310
- }
372
+ bytes_to_decimal(value.as_bytes(), scale)?
311
373
  }
312
374
  };
313
375
 
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.8"
2
+ VERSION = "0.5.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.8
4
+ version: 0.5.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-26 00:00:00.000000000 Z
11
+ date: 2025-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys