parquet 0.6.2 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1f3c1598e0557dfbf9ea851624342aa4e04865b3d84c4125617b17e7e3f016a
|
4
|
+
data.tar.gz: 00aeec0e5d3db34d6d405492d69ca1f5ba0a6398796fcdce1a7f8676784d3fe9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03b488f3cc83e31d8cd9bc61f67c3c7234837c2a0f7f2262b563fdc5094148f66072d3a748a37cda8fc74de8d37f63457d0c5cc63b7f83e6993e0c8f4d504462
|
7
|
+
data.tar.gz: 6a11d5c74536784fb96421d3c46da9608e30b77e42adbe60eb43b6fad96750d75b194b7243383bf512f32b0a9af3c10ddc27652bffdcf54b546245a2faa53ac5
|
@@ -208,12 +208,14 @@ pub fn arrow_to_parquet_value(
|
|
208
208
|
|
209
209
|
let key_field = map_value
|
210
210
|
.fields()
|
211
|
-
.iter()
|
211
|
+
.iter()
|
212
|
+
.find(|f| f.name() == "key")
|
212
213
|
.ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
|
213
214
|
|
214
215
|
let value_field = map_value
|
215
216
|
.fields()
|
216
|
-
.iter()
|
217
|
+
.iter()
|
218
|
+
.find(|f| f.name() == "value")
|
217
219
|
.ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
|
218
220
|
|
219
221
|
let mut map_vec = Vec::with_capacity(keys.len());
|
@@ -4,13 +4,14 @@ use indexmap::IndexMap;
|
|
4
4
|
use magnus::r_hash::ForEach;
|
5
5
|
use magnus::value::ReprValue;
|
6
6
|
use magnus::{
|
7
|
-
Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol,
|
8
|
-
Value,
|
7
|
+
kwargs, Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol,
|
8
|
+
TryConvert, Value,
|
9
9
|
};
|
10
10
|
use ordered_float::OrderedFloat;
|
11
11
|
use parquet_core::{ParquetError, ParquetValue, Result};
|
12
12
|
use std::cell::RefCell;
|
13
13
|
use std::sync::Arc;
|
14
|
+
use uuid::Uuid;
|
14
15
|
|
15
16
|
/// Ruby value converter
|
16
17
|
///
|
@@ -1394,11 +1395,64 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1394
1395
|
ParquetValue::UInt16(i) => Ok((i as u64).into_value_with(&ruby)),
|
1395
1396
|
ParquetValue::UInt32(i) => Ok((i as u64).into_value_with(&ruby)),
|
1396
1397
|
ParquetValue::UInt64(i) => Ok(i.into_value_with(&ruby)),
|
1397
|
-
ParquetValue::Float16(OrderedFloat(f)) =>
|
1398
|
-
|
1398
|
+
ParquetValue::Float16(OrderedFloat(f)) => {
|
1399
|
+
let cleaned = {
|
1400
|
+
// Fast-path the specials.
|
1401
|
+
if f.is_nan() || f.is_infinite() {
|
1402
|
+
f as f64
|
1403
|
+
} else if f == 0.0 {
|
1404
|
+
// Keep the IEEE-754 sign bit for −0.0.
|
1405
|
+
if f.is_sign_negative() {
|
1406
|
+
-0.0
|
1407
|
+
} else {
|
1408
|
+
0.0
|
1409
|
+
}
|
1410
|
+
} else {
|
1411
|
+
// `to_string` gives the shortest exact, round-trippable decimal.
|
1412
|
+
// Parsing it back to `f64` cannot fail, but fall back defensively.
|
1413
|
+
match f.to_string().parse::<f64>() {
|
1414
|
+
Ok(v) => v,
|
1415
|
+
Err(e) => {
|
1416
|
+
dbg!(e);
|
1417
|
+
f as f64
|
1418
|
+
} // extremely unlikely
|
1419
|
+
}
|
1420
|
+
}
|
1421
|
+
};
|
1422
|
+
Ok(cleaned.into_value_with(&ruby))
|
1423
|
+
}
|
1424
|
+
ParquetValue::Float32(OrderedFloat(f)) => {
|
1425
|
+
let cleaned = {
|
1426
|
+
// Fast-path the specials.
|
1427
|
+
if f.is_nan() || f.is_infinite() {
|
1428
|
+
f as f64
|
1429
|
+
} else if f == 0.0 {
|
1430
|
+
// Keep the IEEE-754 sign bit for −0.0.
|
1431
|
+
if f.is_sign_negative() {
|
1432
|
+
-0.0
|
1433
|
+
} else {
|
1434
|
+
0.0
|
1435
|
+
}
|
1436
|
+
} else {
|
1437
|
+
// `to_string` gives the shortest exact, round-trippable decimal.
|
1438
|
+
// Parsing it back to `f64` cannot fail, but fall back defensively.
|
1439
|
+
match f.to_string().parse::<f64>() {
|
1440
|
+
Ok(v) => v,
|
1441
|
+
Err(e) => {
|
1442
|
+
dbg!(e);
|
1443
|
+
f as f64
|
1444
|
+
} // extremely unlikely
|
1445
|
+
}
|
1446
|
+
}
|
1447
|
+
};
|
1448
|
+
Ok(cleaned.into_value_with(&ruby))
|
1449
|
+
}
|
1399
1450
|
ParquetValue::Float64(OrderedFloat(f)) => Ok(f.into_value_with(&ruby)),
|
1400
1451
|
ParquetValue::String(s) => Ok(s.into_value_with(&ruby)),
|
1401
|
-
ParquetValue::Uuid(u) => Ok(u
|
1452
|
+
ParquetValue::Uuid(u) => Ok(u
|
1453
|
+
.hyphenated()
|
1454
|
+
.encode_lower(&mut Uuid::encode_buffer())
|
1455
|
+
.into_value_with(&ruby)),
|
1402
1456
|
ParquetValue::Bytes(b) => Ok(ruby.enc_str_new(&b, ruby.ascii8bit_encoding()).as_value()),
|
1403
1457
|
ParquetValue::Date32(days) => {
|
1404
1458
|
// Convert days since epoch to Date object
|
@@ -1489,10 +1543,26 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1489
1543
|
.funcall("utc", (year, month, day, hours, minutes, seconds, us))
|
1490
1544
|
.map_err(|e| ParquetError::Conversion(e.to_string()))
|
1491
1545
|
}
|
1546
|
+
ParquetValue::TimeNanos(nanos) => {
|
1547
|
+
let time_class = ruby.class_time();
|
1548
|
+
let secs = nanos / 1_000_000_000;
|
1549
|
+
let nsec = nanos % 1_000_000_000;
|
1550
|
+
time_class
|
1551
|
+
.funcall(
|
1552
|
+
"at",
|
1553
|
+
(
|
1554
|
+
secs,
|
1555
|
+
nsec,
|
1556
|
+
Symbol::new("nanosecond"),
|
1557
|
+
kwargs!("in" => "UTC"),
|
1558
|
+
),
|
1559
|
+
)
|
1560
|
+
.map_err(|e| ParquetError::Conversion(e.to_string()))
|
1561
|
+
}
|
1492
1562
|
ParquetValue::TimestampSecond(secs, tz) => {
|
1493
1563
|
let time_class = ruby.class_time();
|
1494
1564
|
let time = time_class
|
1495
|
-
.funcall::<_, _, Value>("at", (secs,))
|
1565
|
+
.funcall::<_, _, Value>("at", (secs, kwargs!("in" => "UTC")))
|
1496
1566
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1497
1567
|
apply_timezone(time, &tz)
|
1498
1568
|
}
|
@@ -1501,7 +1571,7 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1501
1571
|
let secs = millis / 1000;
|
1502
1572
|
let usec = (millis % 1000) * 1000; // Convert millisecond remainder to microseconds
|
1503
1573
|
let time = time_class
|
1504
|
-
.funcall::<_, _, Value>("at", (secs, usec))
|
1574
|
+
.funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
|
1505
1575
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1506
1576
|
apply_timezone(time, &tz)
|
1507
1577
|
}
|
@@ -1510,25 +1580,25 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1510
1580
|
let secs = micros / 1_000_000;
|
1511
1581
|
let usec = micros % 1_000_000; // Already in microseconds
|
1512
1582
|
let time = time_class
|
1513
|
-
.funcall::<_, _, Value>("at", (secs, usec))
|
1583
|
+
.funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
|
1514
1584
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1515
1585
|
apply_timezone(time, &tz)
|
1516
1586
|
}
|
1517
|
-
ParquetValue::TimeNanos(nanos) => {
|
1518
|
-
let time_class = ruby.class_time();
|
1519
|
-
let secs = nanos / 1_000_000_000;
|
1520
|
-
let nsec = nanos % 1_000_000_000;
|
1521
|
-
time_class
|
1522
|
-
.funcall("at", (secs, nsec, Symbol::new("nanosecond")))
|
1523
|
-
.map_err(|e| ParquetError::Conversion(e.to_string()))
|
1524
|
-
}
|
1525
1587
|
ParquetValue::TimestampNanos(nanos, tz) => {
|
1526
1588
|
let time_class = ruby.class_time();
|
1527
1589
|
let secs = nanos / 1_000_000_000;
|
1528
1590
|
let nsec = nanos % 1_000_000_000;
|
1529
1591
|
// Use the nanosecond form of Time.at
|
1530
1592
|
let time = time_class
|
1531
|
-
.funcall::<_, _, Value>(
|
1593
|
+
.funcall::<_, _, Value>(
|
1594
|
+
"at",
|
1595
|
+
(
|
1596
|
+
secs,
|
1597
|
+
nsec,
|
1598
|
+
Symbol::new("nanosecond"),
|
1599
|
+
kwargs!("in" => "UTC"),
|
1600
|
+
),
|
1601
|
+
)
|
1532
1602
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1533
1603
|
apply_timezone(time, &tz)
|
1534
1604
|
}
|
@@ -115,9 +115,111 @@ impl TryIntoValue for RubyParquetMetaData {
|
|
115
115
|
.map_err(|e| {
|
116
116
|
RubyAdapterError::metadata(format!("Failed to set converted_type: {}", e))
|
117
117
|
})?;
|
118
|
+
|
118
119
|
if let Some(logical_type) = basic_info.logical_type() {
|
120
|
+
let logical_type_value = match logical_type {
|
121
|
+
parquet::basic::LogicalType::Decimal { scale, precision } => {
|
122
|
+
let logical_hash = handle.hash_new();
|
123
|
+
logical_hash.aset("type", "Decimal").map_err(|e| {
|
124
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
125
|
+
})?;
|
126
|
+
logical_hash.aset("scale", scale).map_err(|e| {
|
127
|
+
RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
|
128
|
+
})?;
|
129
|
+
logical_hash.aset("precision", precision).map_err(|e| {
|
130
|
+
RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
|
131
|
+
})?;
|
132
|
+
logical_hash.as_value()
|
133
|
+
}
|
134
|
+
parquet::basic::LogicalType::Time {
|
135
|
+
is_adjusted_to_u_t_c,
|
136
|
+
unit,
|
137
|
+
} => {
|
138
|
+
let logical_hash = handle.hash_new();
|
139
|
+
logical_hash.aset("type", "Time").map_err(|e| {
|
140
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
141
|
+
})?;
|
142
|
+
logical_hash
|
143
|
+
.aset(
|
144
|
+
"is_adjusted_to_utc",
|
145
|
+
is_adjusted_to_u_t_c.to_string().as_str(),
|
146
|
+
)
|
147
|
+
.map_err(|e| {
|
148
|
+
RubyAdapterError::metadata(format!(
|
149
|
+
"Failed to set is_adjusted_to_u_t_c: {}",
|
150
|
+
e
|
151
|
+
))
|
152
|
+
})?;
|
153
|
+
|
154
|
+
let unit_str = match unit {
|
155
|
+
parquet::basic::TimeUnit::MILLIS(_) => "millis",
|
156
|
+
parquet::basic::TimeUnit::MICROS(_) => "micros",
|
157
|
+
parquet::basic::TimeUnit::NANOS(_) => "nanos",
|
158
|
+
};
|
159
|
+
logical_hash.aset("unit", unit_str).map_err(|e| {
|
160
|
+
RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
|
161
|
+
})?;
|
162
|
+
logical_hash.as_value()
|
163
|
+
}
|
164
|
+
parquet::basic::LogicalType::Timestamp {
|
165
|
+
is_adjusted_to_u_t_c,
|
166
|
+
unit,
|
167
|
+
} => {
|
168
|
+
let logical_hash = handle.hash_new();
|
169
|
+
logical_hash.aset("type", "Timestamp").map_err(|e| {
|
170
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
171
|
+
})?;
|
172
|
+
logical_hash
|
173
|
+
.aset("is_adjusted_to_utc", is_adjusted_to_u_t_c)
|
174
|
+
.map_err(|e| {
|
175
|
+
RubyAdapterError::metadata(format!(
|
176
|
+
"Failed to set is_adjusted_to_u_t_c: {}",
|
177
|
+
e
|
178
|
+
))
|
179
|
+
})?;
|
180
|
+
let unit_str = match unit {
|
181
|
+
parquet::basic::TimeUnit::MILLIS(_) => "millis",
|
182
|
+
parquet::basic::TimeUnit::MICROS(_) => "micros",
|
183
|
+
parquet::basic::TimeUnit::NANOS(_) => "nanos",
|
184
|
+
};
|
185
|
+
logical_hash.aset("unit", unit_str).map_err(|e| {
|
186
|
+
RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
|
187
|
+
})?;
|
188
|
+
logical_hash.as_value()
|
189
|
+
}
|
190
|
+
parquet::basic::LogicalType::Integer {
|
191
|
+
bit_width,
|
192
|
+
is_signed,
|
193
|
+
} => {
|
194
|
+
let logical_hash = handle.hash_new();
|
195
|
+
logical_hash.aset("type", "Integer").map_err(|e| {
|
196
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
197
|
+
})?;
|
198
|
+
logical_hash.aset("bit_width", bit_width).map_err(|e| {
|
199
|
+
RubyAdapterError::metadata(format!("Failed to set bit_width: {}", e))
|
200
|
+
})?;
|
201
|
+
logical_hash
|
202
|
+
.aset("is_signed", is_signed.to_string().as_str())
|
203
|
+
.map_err(|e| {
|
204
|
+
RubyAdapterError::metadata(format!(
|
205
|
+
"Failed to set is_signed: {}",
|
206
|
+
e
|
207
|
+
))
|
208
|
+
})?;
|
209
|
+
logical_hash.as_value()
|
210
|
+
}
|
211
|
+
_ => {
|
212
|
+
let logical_hash = handle.hash_new();
|
213
|
+
logical_hash
|
214
|
+
.aset("type", format!("{:?}", logical_type))
|
215
|
+
.map_err(|e| {
|
216
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
217
|
+
})?;
|
218
|
+
logical_hash.as_value()
|
219
|
+
}
|
220
|
+
};
|
119
221
|
field_hash
|
120
|
-
.aset("logical_type",
|
222
|
+
.aset("logical_type", logical_type_value)
|
121
223
|
.map_err(|e| {
|
122
224
|
RubyAdapterError::metadata(format!("Failed to set logical_type: {}", e))
|
123
225
|
})?;
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-07-
|
11
|
+
date: 2025-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|