parquet 0.6.2 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dfd19103b2414e7feeaa6d1ec3c9a9c25ce42cf5c8362baa37e3b9d8d5245f82
4
- data.tar.gz: c5c1170dbdc3635577738a568688c36adc9670710f4b0d570fae29294e337754
3
+ metadata.gz: c1f3c1598e0557dfbf9ea851624342aa4e04865b3d84c4125617b17e7e3f016a
4
+ data.tar.gz: 00aeec0e5d3db34d6d405492d69ca1f5ba0a6398796fcdce1a7f8676784d3fe9
5
5
  SHA512:
6
- metadata.gz: c9bf72b4e708c750ab7ae30afd97aef7f456a4249904fe3eb74f916557e28ca1a53bc262a6492db38c38162ab3e3f684e30f0c70dabbaf8f8f4145ef4d9af259
7
- data.tar.gz: 164c5b0569d3d13242bcff7c09d66edf67b279d8289f97def043000a508b4333dd1387a4f47517be09023cd270cf3b6dfd57fdf658ac1b52e25f3f5b2b5ca30c
6
+ metadata.gz: 03b488f3cc83e31d8cd9bc61f67c3c7234837c2a0f7f2262b563fdc5094148f66072d3a748a37cda8fc74de8d37f63457d0c5cc63b7f83e6993e0c8f4d504462
7
+ data.tar.gz: 6a11d5c74536784fb96421d3c46da9608e30b77e42adbe60eb43b6fad96750d75b194b7243383bf512f32b0a9af3c10ddc27652bffdcf54b546245a2faa53ac5
@@ -208,12 +208,14 @@ pub fn arrow_to_parquet_value(
208
208
 
209
209
  let key_field = map_value
210
210
  .fields()
211
- .iter().find(|f| f.name() == "key")
211
+ .iter()
212
+ .find(|f| f.name() == "key")
212
213
  .ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
213
214
 
214
215
  let value_field = map_value
215
216
  .fields()
216
- .iter().find(|f| f.name() == "value")
217
+ .iter()
218
+ .find(|f| f.name() == "value")
217
219
  .ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
218
220
 
219
221
  let mut map_vec = Vec::with_capacity(keys.len());
@@ -4,13 +4,14 @@ use indexmap::IndexMap;
4
4
  use magnus::r_hash::ForEach;
5
5
  use magnus::value::ReprValue;
6
6
  use magnus::{
7
- Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol, TryConvert,
8
- Value,
7
+ kwargs, Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol,
8
+ TryConvert, Value,
9
9
  };
10
10
  use ordered_float::OrderedFloat;
11
11
  use parquet_core::{ParquetError, ParquetValue, Result};
12
12
  use std::cell::RefCell;
13
13
  use std::sync::Arc;
14
+ use uuid::Uuid;
14
15
 
15
16
  /// Ruby value converter
16
17
  ///
@@ -1394,11 +1395,64 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1394
1395
  ParquetValue::UInt16(i) => Ok((i as u64).into_value_with(&ruby)),
1395
1396
  ParquetValue::UInt32(i) => Ok((i as u64).into_value_with(&ruby)),
1396
1397
  ParquetValue::UInt64(i) => Ok(i.into_value_with(&ruby)),
1397
- ParquetValue::Float16(OrderedFloat(f)) => Ok((f as f64).into_value_with(&ruby)),
1398
- ParquetValue::Float32(OrderedFloat(f)) => Ok((f as f64).into_value_with(&ruby)),
1398
+ ParquetValue::Float16(OrderedFloat(f)) => {
1399
+ let cleaned = {
1400
+ // Fast-path the specials.
1401
+ if f.is_nan() || f.is_infinite() {
1402
+ f as f64
1403
+ } else if f == 0.0 {
1404
+ // Keep the IEEE-754 sign bit for −0.0.
1405
+ if f.is_sign_negative() {
1406
+ -0.0
1407
+ } else {
1408
+ 0.0
1409
+ }
1410
+ } else {
1411
+ // `to_string` gives the shortest exact, round-trippable decimal.
1412
+ // Parsing it back to `f64` cannot fail, but fall back defensively.
1413
+ match f.to_string().parse::<f64>() {
1414
+ Ok(v) => v,
1415
+ Err(e) => {
1416
+ dbg!(e);
1417
+ f as f64
1418
+ } // extremely unlikely
1419
+ }
1420
+ }
1421
+ };
1422
+ Ok(cleaned.into_value_with(&ruby))
1423
+ }
1424
+ ParquetValue::Float32(OrderedFloat(f)) => {
1425
+ let cleaned = {
1426
+ // Fast-path the specials.
1427
+ if f.is_nan() || f.is_infinite() {
1428
+ f as f64
1429
+ } else if f == 0.0 {
1430
+ // Keep the IEEE-754 sign bit for −0.0.
1431
+ if f.is_sign_negative() {
1432
+ -0.0
1433
+ } else {
1434
+ 0.0
1435
+ }
1436
+ } else {
1437
+ // `to_string` gives the shortest exact, round-trippable decimal.
1438
+ // Parsing it back to `f64` cannot fail, but fall back defensively.
1439
+ match f.to_string().parse::<f64>() {
1440
+ Ok(v) => v,
1441
+ Err(e) => {
1442
+ dbg!(e);
1443
+ f as f64
1444
+ } // extremely unlikely
1445
+ }
1446
+ }
1447
+ };
1448
+ Ok(cleaned.into_value_with(&ruby))
1449
+ }
1399
1450
  ParquetValue::Float64(OrderedFloat(f)) => Ok(f.into_value_with(&ruby)),
1400
1451
  ParquetValue::String(s) => Ok(s.into_value_with(&ruby)),
1401
- ParquetValue::Uuid(u) => Ok(u.to_string().into_value_with(&ruby)),
1452
+ ParquetValue::Uuid(u) => Ok(u
1453
+ .hyphenated()
1454
+ .encode_lower(&mut Uuid::encode_buffer())
1455
+ .into_value_with(&ruby)),
1402
1456
  ParquetValue::Bytes(b) => Ok(ruby.enc_str_new(&b, ruby.ascii8bit_encoding()).as_value()),
1403
1457
  ParquetValue::Date32(days) => {
1404
1458
  // Convert days since epoch to Date object
@@ -1489,10 +1543,26 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1489
1543
  .funcall("utc", (year, month, day, hours, minutes, seconds, us))
1490
1544
  .map_err(|e| ParquetError::Conversion(e.to_string()))
1491
1545
  }
1546
+ ParquetValue::TimeNanos(nanos) => {
1547
+ let time_class = ruby.class_time();
1548
+ let secs = nanos / 1_000_000_000;
1549
+ let nsec = nanos % 1_000_000_000;
1550
+ time_class
1551
+ .funcall(
1552
+ "at",
1553
+ (
1554
+ secs,
1555
+ nsec,
1556
+ Symbol::new("nanosecond"),
1557
+ kwargs!("in" => "UTC"),
1558
+ ),
1559
+ )
1560
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1561
+ }
1492
1562
  ParquetValue::TimestampSecond(secs, tz) => {
1493
1563
  let time_class = ruby.class_time();
1494
1564
  let time = time_class
1495
- .funcall::<_, _, Value>("at", (secs,))
1565
+ .funcall::<_, _, Value>("at", (secs, kwargs!("in" => "UTC")))
1496
1566
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1497
1567
  apply_timezone(time, &tz)
1498
1568
  }
@@ -1501,7 +1571,7 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1501
1571
  let secs = millis / 1000;
1502
1572
  let usec = (millis % 1000) * 1000; // Convert millisecond remainder to microseconds
1503
1573
  let time = time_class
1504
- .funcall::<_, _, Value>("at", (secs, usec))
1574
+ .funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
1505
1575
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1506
1576
  apply_timezone(time, &tz)
1507
1577
  }
@@ -1510,25 +1580,25 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1510
1580
  let secs = micros / 1_000_000;
1511
1581
  let usec = micros % 1_000_000; // Already in microseconds
1512
1582
  let time = time_class
1513
- .funcall::<_, _, Value>("at", (secs, usec))
1583
+ .funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
1514
1584
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1515
1585
  apply_timezone(time, &tz)
1516
1586
  }
1517
- ParquetValue::TimeNanos(nanos) => {
1518
- let time_class = ruby.class_time();
1519
- let secs = nanos / 1_000_000_000;
1520
- let nsec = nanos % 1_000_000_000;
1521
- time_class
1522
- .funcall("at", (secs, nsec, Symbol::new("nanosecond")))
1523
- .map_err(|e| ParquetError::Conversion(e.to_string()))
1524
- }
1525
1587
  ParquetValue::TimestampNanos(nanos, tz) => {
1526
1588
  let time_class = ruby.class_time();
1527
1589
  let secs = nanos / 1_000_000_000;
1528
1590
  let nsec = nanos % 1_000_000_000;
1529
1591
  // Use the nanosecond form of Time.at
1530
1592
  let time = time_class
1531
- .funcall::<_, _, Value>("at", (secs, nsec, Symbol::new("nanosecond")))
1593
+ .funcall::<_, _, Value>(
1594
+ "at",
1595
+ (
1596
+ secs,
1597
+ nsec,
1598
+ Symbol::new("nanosecond"),
1599
+ kwargs!("in" => "UTC"),
1600
+ ),
1601
+ )
1532
1602
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1533
1603
  apply_timezone(time, &tz)
1534
1604
  }
@@ -115,9 +115,111 @@ impl TryIntoValue for RubyParquetMetaData {
115
115
  .map_err(|e| {
116
116
  RubyAdapterError::metadata(format!("Failed to set converted_type: {}", e))
117
117
  })?;
118
+
118
119
  if let Some(logical_type) = basic_info.logical_type() {
120
+ let logical_type_value = match logical_type {
121
+ parquet::basic::LogicalType::Decimal { scale, precision } => {
122
+ let logical_hash = handle.hash_new();
123
+ logical_hash.aset("type", "Decimal").map_err(|e| {
124
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
125
+ })?;
126
+ logical_hash.aset("scale", scale).map_err(|e| {
127
+ RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
128
+ })?;
129
+ logical_hash.aset("precision", precision).map_err(|e| {
130
+ RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
131
+ })?;
132
+ logical_hash.as_value()
133
+ }
134
+ parquet::basic::LogicalType::Time {
135
+ is_adjusted_to_u_t_c,
136
+ unit,
137
+ } => {
138
+ let logical_hash = handle.hash_new();
139
+ logical_hash.aset("type", "Time").map_err(|e| {
140
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
141
+ })?;
142
+ logical_hash
143
+ .aset(
144
+ "is_adjusted_to_utc",
145
+ is_adjusted_to_u_t_c.to_string().as_str(),
146
+ )
147
+ .map_err(|e| {
148
+ RubyAdapterError::metadata(format!(
149
+ "Failed to set is_adjusted_to_u_t_c: {}",
150
+ e
151
+ ))
152
+ })?;
153
+
154
+ let unit_str = match unit {
155
+ parquet::basic::TimeUnit::MILLIS(_) => "millis",
156
+ parquet::basic::TimeUnit::MICROS(_) => "micros",
157
+ parquet::basic::TimeUnit::NANOS(_) => "nanos",
158
+ };
159
+ logical_hash.aset("unit", unit_str).map_err(|e| {
160
+ RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
161
+ })?;
162
+ logical_hash.as_value()
163
+ }
164
+ parquet::basic::LogicalType::Timestamp {
165
+ is_adjusted_to_u_t_c,
166
+ unit,
167
+ } => {
168
+ let logical_hash = handle.hash_new();
169
+ logical_hash.aset("type", "Timestamp").map_err(|e| {
170
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
171
+ })?;
172
+ logical_hash
173
+ .aset("is_adjusted_to_utc", is_adjusted_to_u_t_c)
174
+ .map_err(|e| {
175
+ RubyAdapterError::metadata(format!(
176
+ "Failed to set is_adjusted_to_u_t_c: {}",
177
+ e
178
+ ))
179
+ })?;
180
+ let unit_str = match unit {
181
+ parquet::basic::TimeUnit::MILLIS(_) => "millis",
182
+ parquet::basic::TimeUnit::MICROS(_) => "micros",
183
+ parquet::basic::TimeUnit::NANOS(_) => "nanos",
184
+ };
185
+ logical_hash.aset("unit", unit_str).map_err(|e| {
186
+ RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
187
+ })?;
188
+ logical_hash.as_value()
189
+ }
190
+ parquet::basic::LogicalType::Integer {
191
+ bit_width,
192
+ is_signed,
193
+ } => {
194
+ let logical_hash = handle.hash_new();
195
+ logical_hash.aset("type", "Integer").map_err(|e| {
196
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
197
+ })?;
198
+ logical_hash.aset("bit_width", bit_width).map_err(|e| {
199
+ RubyAdapterError::metadata(format!("Failed to set bit_width: {}", e))
200
+ })?;
201
+ logical_hash
202
+ .aset("is_signed", is_signed.to_string().as_str())
203
+ .map_err(|e| {
204
+ RubyAdapterError::metadata(format!(
205
+ "Failed to set is_signed: {}",
206
+ e
207
+ ))
208
+ })?;
209
+ logical_hash.as_value()
210
+ }
211
+ _ => {
212
+ let logical_hash = handle.hash_new();
213
+ logical_hash
214
+ .aset("type", format!("{:?}", logical_type))
215
+ .map_err(|e| {
216
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
217
+ })?;
218
+ logical_hash.as_value()
219
+ }
220
+ };
119
221
  field_hash
120
- .aset("logical_type", format!("{:?}", logical_type))
222
+ .aset("logical_type", logical_type_value)
121
223
  .map_err(|e| {
122
224
  RubyAdapterError::metadata(format!("Failed to set logical_type: {}", e))
123
225
  })?;
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.6.2"
2
+ VERSION = "0.7.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-07-04 00:00:00.000000000 Z
11
+ date: 2025-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys