parquet 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dfd19103b2414e7feeaa6d1ec3c9a9c25ce42cf5c8362baa37e3b9d8d5245f82
4
- data.tar.gz: c5c1170dbdc3635577738a568688c36adc9670710f4b0d570fae29294e337754
3
+ metadata.gz: de6b7f5c61eb1e796e7066790e2c1e0ada9ba4519140cee4e2378cd402db2586
4
+ data.tar.gz: 5b1dc2e442b1be17af82dd3a431b6f3a66254410229055cdbd8713aa1c009be2
5
5
  SHA512:
6
- metadata.gz: c9bf72b4e708c750ab7ae30afd97aef7f456a4249904fe3eb74f916557e28ca1a53bc262a6492db38c38162ab3e3f684e30f0c70dabbaf8f8f4145ef4d9af259
7
- data.tar.gz: 164c5b0569d3d13242bcff7c09d66edf67b279d8289f97def043000a508b4333dd1387a4f47517be09023cd270cf3b6dfd57fdf658ac1b52e25f3f5b2b5ca30c
6
+ metadata.gz: 7635247bc9627cdafe79ee9be1072c13b0f8ec11549506f9a8b6170d9b095883ede0f8a8165d0340572d89e1501c1d5f144c20f963ab960171dcb5813f15022c
7
+ data.tar.gz: abb59172a54c8d63ca39f24bdda4c64b98a60015622fc7f4a7a2a4c42ff03c3327f134de8ec66e006eb38a8cb38da90824a987b44f6e2fcc2af1c01bd4d85ee1
@@ -208,12 +208,14 @@ pub fn arrow_to_parquet_value(
208
208
 
209
209
  let key_field = map_value
210
210
  .fields()
211
- .iter().find(|f| f.name() == "key")
211
+ .iter()
212
+ .find(|f| f.name() == "key")
212
213
  .ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
213
214
 
214
215
  let value_field = map_value
215
216
  .fields()
216
- .iter().find(|f| f.name() == "value")
217
+ .iter()
218
+ .find(|f| f.name() == "value")
217
219
  .ok_or_else(|| ParquetError::Conversion("No value field found".to_string()))?;
218
220
 
219
221
  let mut map_vec = Vec::with_capacity(keys.len());
@@ -4,13 +4,14 @@ use indexmap::IndexMap;
4
4
  use magnus::r_hash::ForEach;
5
5
  use magnus::value::ReprValue;
6
6
  use magnus::{
7
- Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol, TryConvert,
8
- Value,
7
+ kwargs, Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol,
8
+ TryConvert, Value,
9
9
  };
10
10
  use ordered_float::OrderedFloat;
11
11
  use parquet_core::{ParquetError, ParquetValue, Result};
12
12
  use std::cell::RefCell;
13
13
  use std::sync::Arc;
14
+ use uuid::Uuid;
14
15
 
15
16
  /// Ruby value converter
16
17
  ///
@@ -1398,7 +1399,10 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1398
1399
  ParquetValue::Float32(OrderedFloat(f)) => Ok((f as f64).into_value_with(&ruby)),
1399
1400
  ParquetValue::Float64(OrderedFloat(f)) => Ok(f.into_value_with(&ruby)),
1400
1401
  ParquetValue::String(s) => Ok(s.into_value_with(&ruby)),
1401
- ParquetValue::Uuid(u) => Ok(u.to_string().into_value_with(&ruby)),
1402
+ ParquetValue::Uuid(u) => Ok(u
1403
+ .hyphenated()
1404
+ .encode_lower(&mut Uuid::encode_buffer())
1405
+ .into_value_with(&ruby)),
1402
1406
  ParquetValue::Bytes(b) => Ok(ruby.enc_str_new(&b, ruby.ascii8bit_encoding()).as_value()),
1403
1407
  ParquetValue::Date32(days) => {
1404
1408
  // Convert days since epoch to Date object
@@ -1489,10 +1493,26 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1489
1493
  .funcall("utc", (year, month, day, hours, minutes, seconds, us))
1490
1494
  .map_err(|e| ParquetError::Conversion(e.to_string()))
1491
1495
  }
1496
+ ParquetValue::TimeNanos(nanos) => {
1497
+ let time_class = ruby.class_time();
1498
+ let secs = nanos / 1_000_000_000;
1499
+ let nsec = nanos % 1_000_000_000;
1500
+ time_class
1501
+ .funcall(
1502
+ "at",
1503
+ (
1504
+ secs,
1505
+ nsec,
1506
+ Symbol::new("nanosecond"),
1507
+ kwargs!("in" => "UTC"),
1508
+ ),
1509
+ )
1510
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1511
+ }
1492
1512
  ParquetValue::TimestampSecond(secs, tz) => {
1493
1513
  let time_class = ruby.class_time();
1494
1514
  let time = time_class
1495
- .funcall::<_, _, Value>("at", (secs,))
1515
+ .funcall::<_, _, Value>("at", (secs, kwargs!("in" => "UTC")))
1496
1516
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1497
1517
  apply_timezone(time, &tz)
1498
1518
  }
@@ -1501,7 +1521,7 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1501
1521
  let secs = millis / 1000;
1502
1522
  let usec = (millis % 1000) * 1000; // Convert millisecond remainder to microseconds
1503
1523
  let time = time_class
1504
- .funcall::<_, _, Value>("at", (secs, usec))
1524
+ .funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
1505
1525
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1506
1526
  apply_timezone(time, &tz)
1507
1527
  }
@@ -1510,25 +1530,25 @@ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1510
1530
  let secs = micros / 1_000_000;
1511
1531
  let usec = micros % 1_000_000; // Already in microseconds
1512
1532
  let time = time_class
1513
- .funcall::<_, _, Value>("at", (secs, usec))
1533
+ .funcall::<_, _, Value>("at", (secs, usec, kwargs!("in" => "UTC")))
1514
1534
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1515
1535
  apply_timezone(time, &tz)
1516
1536
  }
1517
- ParquetValue::TimeNanos(nanos) => {
1518
- let time_class = ruby.class_time();
1519
- let secs = nanos / 1_000_000_000;
1520
- let nsec = nanos % 1_000_000_000;
1521
- time_class
1522
- .funcall("at", (secs, nsec, Symbol::new("nanosecond")))
1523
- .map_err(|e| ParquetError::Conversion(e.to_string()))
1524
- }
1525
1537
  ParquetValue::TimestampNanos(nanos, tz) => {
1526
1538
  let time_class = ruby.class_time();
1527
1539
  let secs = nanos / 1_000_000_000;
1528
1540
  let nsec = nanos % 1_000_000_000;
1529
1541
  // Use the nanosecond form of Time.at
1530
1542
  let time = time_class
1531
- .funcall::<_, _, Value>("at", (secs, nsec, Symbol::new("nanosecond")))
1543
+ .funcall::<_, _, Value>(
1544
+ "at",
1545
+ (
1546
+ secs,
1547
+ nsec,
1548
+ Symbol::new("nanosecond"),
1549
+ kwargs!("in" => "UTC"),
1550
+ ),
1551
+ )
1532
1552
  .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1533
1553
  apply_timezone(time, &tz)
1534
1554
  }
@@ -115,9 +115,111 @@ impl TryIntoValue for RubyParquetMetaData {
115
115
  .map_err(|e| {
116
116
  RubyAdapterError::metadata(format!("Failed to set converted_type: {}", e))
117
117
  })?;
118
+
118
119
  if let Some(logical_type) = basic_info.logical_type() {
120
+ let logical_type_value = match logical_type {
121
+ parquet::basic::LogicalType::Decimal { scale, precision } => {
122
+ let logical_hash = handle.hash_new();
123
+ logical_hash.aset("type", "Decimal").map_err(|e| {
124
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
125
+ })?;
126
+ logical_hash.aset("scale", scale).map_err(|e| {
127
+ RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
128
+ })?;
129
+ logical_hash.aset("precision", precision).map_err(|e| {
130
+ RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
131
+ })?;
132
+ logical_hash.as_value()
133
+ }
134
+ parquet::basic::LogicalType::Time {
135
+ is_adjusted_to_u_t_c,
136
+ unit,
137
+ } => {
138
+ let logical_hash = handle.hash_new();
139
+ logical_hash.aset("type", "Time").map_err(|e| {
140
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
141
+ })?;
142
+ logical_hash
143
+ .aset(
144
+ "is_adjusted_to_utc",
145
+ is_adjusted_to_u_t_c.to_string().as_str(),
146
+ )
147
+ .map_err(|e| {
148
+ RubyAdapterError::metadata(format!(
149
+ "Failed to set is_adjusted_to_u_t_c: {}",
150
+ e
151
+ ))
152
+ })?;
153
+
154
+ let unit_str = match unit {
155
+ parquet::basic::TimeUnit::MILLIS(_) => "millis",
156
+ parquet::basic::TimeUnit::MICROS(_) => "micros",
157
+ parquet::basic::TimeUnit::NANOS(_) => "nanos",
158
+ };
159
+ logical_hash.aset("unit", unit_str).map_err(|e| {
160
+ RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
161
+ })?;
162
+ logical_hash.as_value()
163
+ }
164
+ parquet::basic::LogicalType::Timestamp {
165
+ is_adjusted_to_u_t_c,
166
+ unit,
167
+ } => {
168
+ let logical_hash = handle.hash_new();
169
+ logical_hash.aset("type", "Timestamp").map_err(|e| {
170
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
171
+ })?;
172
+ logical_hash
173
+ .aset("is_adjusted_to_utc", is_adjusted_to_u_t_c)
174
+ .map_err(|e| {
175
+ RubyAdapterError::metadata(format!(
176
+ "Failed to set is_adjusted_to_u_t_c: {}",
177
+ e
178
+ ))
179
+ })?;
180
+ let unit_str = match unit {
181
+ parquet::basic::TimeUnit::MILLIS(_) => "millis",
182
+ parquet::basic::TimeUnit::MICROS(_) => "micros",
183
+ parquet::basic::TimeUnit::NANOS(_) => "nanos",
184
+ };
185
+ logical_hash.aset("unit", unit_str).map_err(|e| {
186
+ RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
187
+ })?;
188
+ logical_hash.as_value()
189
+ }
190
+ parquet::basic::LogicalType::Integer {
191
+ bit_width,
192
+ is_signed,
193
+ } => {
194
+ let logical_hash = handle.hash_new();
195
+ logical_hash.aset("type", "Integer").map_err(|e| {
196
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
197
+ })?;
198
+ logical_hash.aset("bit_width", bit_width).map_err(|e| {
199
+ RubyAdapterError::metadata(format!("Failed to set bit_width: {}", e))
200
+ })?;
201
+ logical_hash
202
+ .aset("is_signed", is_signed.to_string().as_str())
203
+ .map_err(|e| {
204
+ RubyAdapterError::metadata(format!(
205
+ "Failed to set is_signed: {}",
206
+ e
207
+ ))
208
+ })?;
209
+ logical_hash.as_value()
210
+ }
211
+ _ => {
212
+ let logical_hash = handle.hash_new();
213
+ logical_hash
214
+ .aset("type", format!("{:?}", logical_type))
215
+ .map_err(|e| {
216
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
217
+ })?;
218
+ logical_hash.as_value()
219
+ }
220
+ };
119
221
  field_hash
120
- .aset("logical_type", format!("{:?}", logical_type))
222
+ .aset("logical_type", logical_type_value)
121
223
  .map_err(|e| {
122
224
  RubyAdapterError::metadata(format!("Failed to set logical_type: {}", e))
123
225
  })?;
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.6.2"
2
+ VERSION = "0.7.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko