parquet 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de6b7f5c61eb1e796e7066790e2c1e0ada9ba4519140cee4e2378cd402db2586
|
4
|
+
data.tar.gz: 5b1dc2e442b1be17af82dd3a431b6f3a66254410229055cdbd8713aa1c009be2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7635247bc9627cdafe79ee9be1072c13b0f8ec11549506f9a8b6170d9b095883ede0f8a8165d0340572d89e1501c1d5f144c20f963ab960171dcb5813f15022c
|
7
|
+
data.tar.gz: abb59172a54c8d63ca39f24bdda4c64b98a60015622fc7f4a7a2a4c42ff03c3327f134de8ec66e006eb38a8cb38da90824a987b44f6e2fcc2af1c01bd4d85ee1
|
@@ -208,12 +208,14 @@ pub fn arrow_to_parquet_value(
|
|
208
208
|
|
209
209
|
let key_field = map_value
|
210
210
|
.fields()
|
211
|
-
.iter()
|
211
|
+
.iter()
|
212
|
+
.find(|f| f.name() == "key")
|
212
213
|
.ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
|
213
214
|
|
214
215
|
let value_field = map_value
|
215
216
|
.fields()
|
216
|
-
.iter()
|
217
|
+
.iter()
|
218
|
+
.find(|f| f.name() == "value")
|
217
219
|
.ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
|
218
220
|
|
219
221
|
let mut map_vec = Vec::with_capacity(keys.len());
|
@@ -4,13 +4,14 @@ use indexmap::IndexMap;
|
|
4
4
|
use magnus::r_hash::ForEach;
|
5
5
|
use magnus::value::ReprValue;
|
6
6
|
use magnus::{
|
7
|
-
Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol,
|
8
|
-
Value,
|
7
|
+
kwargs, Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol,
|
8
|
+
TryConvert, Value,
|
9
9
|
};
|
10
10
|
use ordered_float::OrderedFloat;
|
11
11
|
use parquet_core::{ParquetError, ParquetValue, Result};
|
12
12
|
use std::cell::RefCell;
|
13
13
|
use std::sync::Arc;
|
14
|
+
use uuid::Uuid;
|
14
15
|
|
15
16
|
/// Ruby value converter
|
16
17
|
///
|
@@ -1398,7 +1399,10 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1398
1399
|
ParquetValue::Float32(OrderedFloat(f)) => Ok((f as f64).into_value_with(&ruby)),
|
1399
1400
|
ParquetValue::Float64(OrderedFloat(f)) => Ok(f.into_value_with(&ruby)),
|
1400
1401
|
ParquetValue::String(s) => Ok(s.into_value_with(&ruby)),
|
1401
|
-
ParquetValue::Uuid(u) => Ok(u
|
1402
|
+
ParquetValue::Uuid(u) => Ok(u
|
1403
|
+
.hyphenated()
|
1404
|
+
.encode_lower(&mut Uuid::encode_buffer())
|
1405
|
+
.into_value_with(&ruby)),
|
1402
1406
|
ParquetValue::Bytes(b) => Ok(ruby.enc_str_new(&b, ruby.ascii8bit_encoding()).as_value()),
|
1403
1407
|
ParquetValue::Date32(days) => {
|
1404
1408
|
// Convert days since epoch to Date object
|
@@ -1489,10 +1493,26 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1489
1493
|
.funcall("utc", (year, month, day, hours, minutes, seconds, us))
|
1490
1494
|
.map_err(|e| ParquetError::Conversion(e.to_string()))
|
1491
1495
|
}
|
1496
|
+
ParquetValue::TimeNanos(nanos) => {
|
1497
|
+
let time_class = ruby.class_time();
|
1498
|
+
let secs = nanos / 1_000_000_000;
|
1499
|
+
let nsec = nanos % 1_000_000_000;
|
1500
|
+
time_class
|
1501
|
+
.funcall(
|
1502
|
+
"at",
|
1503
|
+
(
|
1504
|
+
secs,
|
1505
|
+
nsec,
|
1506
|
+
Symbol::new("nanosecond"),
|
1507
|
+
kwargs!("in" => "UTC"),
|
1508
|
+
),
|
1509
|
+
)
|
1510
|
+
.map_err(|e| ParquetError::Conversion(e.to_string()))
|
1511
|
+
}
|
1492
1512
|
ParquetValue::TimestampSecond(secs, tz) => {
|
1493
1513
|
let time_class = ruby.class_time();
|
1494
1514
|
let time = time_class
|
1495
|
-
.funcall::<_, _, Value>("at", (secs,))
|
1515
|
+
.funcall::<_, _, Value>("at", (secs, kwargs!("in" => "UTC")))
|
1496
1516
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1497
1517
|
apply_timezone(time, &tz)
|
1498
1518
|
}
|
@@ -1501,7 +1521,7 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1501
1521
|
let secs = millis / 1000;
|
1502
1522
|
let usec = (millis % 1000) * 1000; // Convert millisecond remainder to microseconds
|
1503
1523
|
let time = time_class
|
1504
|
-
.funcall::<_, _, Value>("at", (secs, usec))
|
1524
|
+
.funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
|
1505
1525
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1506
1526
|
apply_timezone(time, &tz)
|
1507
1527
|
}
|
@@ -1510,25 +1530,25 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
|
|
1510
1530
|
let secs = micros / 1_000_000;
|
1511
1531
|
let usec = micros % 1_000_000; // Already in microseconds
|
1512
1532
|
let time = time_class
|
1513
|
-
.funcall::<_, _, Value>("at", (secs, usec))
|
1533
|
+
.funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
|
1514
1534
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1515
1535
|
apply_timezone(time, &tz)
|
1516
1536
|
}
|
1517
|
-
ParquetValue::TimeNanos(nanos) => {
|
1518
|
-
let time_class = ruby.class_time();
|
1519
|
-
let secs = nanos / 1_000_000_000;
|
1520
|
-
let nsec = nanos % 1_000_000_000;
|
1521
|
-
time_class
|
1522
|
-
.funcall("at", (secs, nsec, Symbol::new("nanosecond")))
|
1523
|
-
.map_err(|e| ParquetError::Conversion(e.to_string()))
|
1524
|
-
}
|
1525
1537
|
ParquetValue::TimestampNanos(nanos, tz) => {
|
1526
1538
|
let time_class = ruby.class_time();
|
1527
1539
|
let secs = nanos / 1_000_000_000;
|
1528
1540
|
let nsec = nanos % 1_000_000_000;
|
1529
1541
|
// Use the nanosecond form of Time.at
|
1530
1542
|
let time = time_class
|
1531
|
-
.funcall::<_, _, Value>(
|
1543
|
+
.funcall::<_, _, Value>(
|
1544
|
+
"at",
|
1545
|
+
(
|
1546
|
+
secs,
|
1547
|
+
nsec,
|
1548
|
+
Symbol::new("nanosecond"),
|
1549
|
+
kwargs!("in" => "UTC"),
|
1550
|
+
),
|
1551
|
+
)
|
1532
1552
|
.map_err(|e| ParquetError::Conversion(e.to_string()))?;
|
1533
1553
|
apply_timezone(time, &tz)
|
1534
1554
|
}
|
@@ -115,9 +115,111 @@ impl TryIntoValue for RubyParquetMetaData {
|
|
115
115
|
.map_err(|e| {
|
116
116
|
RubyAdapterError::metadata(format!("Failed to set converted_type: {}", e))
|
117
117
|
})?;
|
118
|
+
|
118
119
|
if let Some(logical_type) = basic_info.logical_type() {
|
120
|
+
let logical_type_value = match logical_type {
|
121
|
+
parquet::basic::LogicalType::Decimal { scale, precision } => {
|
122
|
+
let logical_hash = handle.hash_new();
|
123
|
+
logical_hash.aset("type", "Decimal").map_err(|e| {
|
124
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
125
|
+
})?;
|
126
|
+
logical_hash.aset("scale", scale).map_err(|e| {
|
127
|
+
RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
|
128
|
+
})?;
|
129
|
+
logical_hash.aset("precision", precision).map_err(|e| {
|
130
|
+
RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
|
131
|
+
})?;
|
132
|
+
logical_hash.as_value()
|
133
|
+
}
|
134
|
+
parquet::basic::LogicalType::Time {
|
135
|
+
is_adjusted_to_u_t_c,
|
136
|
+
unit,
|
137
|
+
} => {
|
138
|
+
let logical_hash = handle.hash_new();
|
139
|
+
logical_hash.aset("type", "Time").map_err(|e| {
|
140
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
141
|
+
})?;
|
142
|
+
logical_hash
|
143
|
+
.aset(
|
144
|
+
"is_adjusted_to_utc",
|
145
|
+
is_adjusted_to_u_t_c.to_string().as_str(),
|
146
|
+
)
|
147
|
+
.map_err(|e| {
|
148
|
+
RubyAdapterError::metadata(format!(
|
149
|
+
"Failed to set is_adjusted_to_u_t_c: {}",
|
150
|
+
e
|
151
|
+
))
|
152
|
+
})?;
|
153
|
+
|
154
|
+
let unit_str = match unit {
|
155
|
+
parquet::basic::TimeUnit::MILLIS(_) => "millis",
|
156
|
+
parquet::basic::TimeUnit::MICROS(_) => "micros",
|
157
|
+
parquet::basic::TimeUnit::NANOS(_) => "nanos",
|
158
|
+
};
|
159
|
+
logical_hash.aset("unit", unit_str).map_err(|e| {
|
160
|
+
RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
|
161
|
+
})?;
|
162
|
+
logical_hash.as_value()
|
163
|
+
}
|
164
|
+
parquet::basic::LogicalType::Timestamp {
|
165
|
+
is_adjusted_to_u_t_c,
|
166
|
+
unit,
|
167
|
+
} => {
|
168
|
+
let logical_hash = handle.hash_new();
|
169
|
+
logical_hash.aset("type", "Timestamp").map_err(|e| {
|
170
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
171
|
+
})?;
|
172
|
+
logical_hash
|
173
|
+
.aset("is_adjusted_to_utc", is_adjusted_to_u_t_c)
|
174
|
+
.map_err(|e| {
|
175
|
+
RubyAdapterError::metadata(format!(
|
176
|
+
"Failed to set is_adjusted_to_u_t_c: {}",
|
177
|
+
e
|
178
|
+
))
|
179
|
+
})?;
|
180
|
+
let unit_str = match unit {
|
181
|
+
parquet::basic::TimeUnit::MILLIS(_) => "millis",
|
182
|
+
parquet::basic::TimeUnit::MICROS(_) => "micros",
|
183
|
+
parquet::basic::TimeUnit::NANOS(_) => "nanos",
|
184
|
+
};
|
185
|
+
logical_hash.aset("unit", unit_str).map_err(|e| {
|
186
|
+
RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
|
187
|
+
})?;
|
188
|
+
logical_hash.as_value()
|
189
|
+
}
|
190
|
+
parquet::basic::LogicalType::Integer {
|
191
|
+
bit_width,
|
192
|
+
is_signed,
|
193
|
+
} => {
|
194
|
+
let logical_hash = handle.hash_new();
|
195
|
+
logical_hash.aset("type", "Integer").map_err(|e| {
|
196
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
197
|
+
})?;
|
198
|
+
logical_hash.aset("bit_width", bit_width).map_err(|e| {
|
199
|
+
RubyAdapterError::metadata(format!("Failed to set bit_width: {}", e))
|
200
|
+
})?;
|
201
|
+
logical_hash
|
202
|
+
.aset("is_signed", is_signed.to_string().as_str())
|
203
|
+
.map_err(|e| {
|
204
|
+
RubyAdapterError::metadata(format!(
|
205
|
+
"Failed to set is_signed: {}",
|
206
|
+
e
|
207
|
+
))
|
208
|
+
})?;
|
209
|
+
logical_hash.as_value()
|
210
|
+
}
|
211
|
+
_ => {
|
212
|
+
let logical_hash = handle.hash_new();
|
213
|
+
logical_hash
|
214
|
+
.aset("type", format!("{:?}", logical_type))
|
215
|
+
.map_err(|e| {
|
216
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
217
|
+
})?;
|
218
|
+
logical_hash.as_value()
|
219
|
+
}
|
220
|
+
};
|
119
221
|
field_hash
|
120
|
-
.aset("logical_type",
|
222
|
+
.aset("logical_type", logical_type_value)
|
121
223
|
.map_err(|e| {
|
122
224
|
RubyAdapterError::metadata(format!("Failed to set logical_type: {}", e))
|
123
225
|
})?;
|
data/lib/parquet/version.rb
CHANGED