parquet 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc1d1eda7d71aa6336fbf6cc94789517439df3fab1852ec7d2e9d265e0c016c4
4
- data.tar.gz: 6fff5321a31d3fe19a59a4f47add56222dbeb274bef7a068163b48757d65252d
3
+ metadata.gz: e8a79e74af0419282904a0041c09509520f64ce1e504e133237f4b87697dce14
4
+ data.tar.gz: 63391ffff73907caccc142f37550e85c12826f302f00ac726f826af391f8d8cd
5
5
  SHA512:
6
- metadata.gz: ddd50f82df2b42cf844e379a7f07c0214e9aef925e7c43ec566b6b9f27be311676b6f887c163aa5d41d4523cd1d506266b15623205453bc8e08467c88e7c2b63
7
- data.tar.gz: afb235ad09338d8c4cd59588dded3d312890c5d5d879b77040fcbf960be69653981fe5176cc591969a80ba54214d4c6a63cff96c36ceda7b9e00c75ba8e9e913
6
+ metadata.gz: cddb7c6711e7e49ea785f6c0ab5ae3c40181756ad0e3fc23f298c291b725b178fdfbe5a8430fd9be10591b09e1b963255cb50637743054fe2173c9798e1e8bcc
7
+ data.tar.gz: 927a112ff1994800b3ed989f5000ed2a43438cebff886a545d0dd22018731b042b9052ead5b14983faf50fffc593cdd7512dd764bfed4de8ffa7781e6f2fda1a
data/Cargo.lock CHANGED
@@ -64,7 +64,7 @@ dependencies = [
64
64
  [[package]]
65
65
  name = "arrow-array"
66
66
  version = "55.1.0"
67
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
67
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
68
68
  dependencies = [
69
69
  "ahash",
70
70
  "arrow-buffer",
@@ -79,7 +79,7 @@ dependencies = [
79
79
  [[package]]
80
80
  name = "arrow-buffer"
81
81
  version = "55.1.0"
82
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
82
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
83
83
  dependencies = [
84
84
  "bytes",
85
85
  "half",
@@ -89,7 +89,7 @@ dependencies = [
89
89
  [[package]]
90
90
  name = "arrow-cast"
91
91
  version = "55.1.0"
92
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
92
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
93
93
  dependencies = [
94
94
  "arrow-array",
95
95
  "arrow-buffer",
@@ -108,7 +108,7 @@ dependencies = [
108
108
  [[package]]
109
109
  name = "arrow-data"
110
110
  version = "55.1.0"
111
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
111
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
112
112
  dependencies = [
113
113
  "arrow-buffer",
114
114
  "arrow-schema",
@@ -119,7 +119,7 @@ dependencies = [
119
119
  [[package]]
120
120
  name = "arrow-ipc"
121
121
  version = "55.1.0"
122
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
122
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
123
123
  dependencies = [
124
124
  "arrow-array",
125
125
  "arrow-buffer",
@@ -131,12 +131,12 @@ dependencies = [
131
131
  [[package]]
132
132
  name = "arrow-schema"
133
133
  version = "55.1.0"
134
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
134
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
135
135
 
136
136
  [[package]]
137
137
  name = "arrow-select"
138
138
  version = "55.1.0"
139
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
139
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
140
140
  dependencies = [
141
141
  "ahash",
142
142
  "arrow-array",
@@ -844,12 +844,13 @@ dependencies = [
844
844
  "simdutf8",
845
845
  "tempfile",
846
846
  "thiserror",
847
+ "uuid",
847
848
  ]
848
849
 
849
850
  [[package]]
850
851
  name = "parquet"
851
852
  version = "55.1.0"
852
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan%2Ffix-reading-int32-timestamp-records#f791b78a67cb5d9a0b4ec0fcab80780dcb61c346"
853
+ source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
853
854
  dependencies = [
854
855
  "ahash",
855
856
  "arrow-array",
@@ -1230,6 +1231,12 @@ version = "1.0.17"
1230
1231
  source = "registry+https://github.com/rust-lang/crates.io-index"
1231
1232
  checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
1232
1233
 
1234
+ [[package]]
1235
+ name = "uuid"
1236
+ version = "1.16.0"
1237
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1238
+ checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
1239
+
1233
1240
  [[package]]
1234
1241
  name = "version_check"
1235
1242
  version = "0.9.5"
@@ -11,20 +11,21 @@ rb-sys-env = "^0.2"
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
- arrow-array = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-reading-int32-timestamp-records" }
15
- arrow-schema = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-reading-int32-timestamp-records" }
14
+ arrow-array = { git = "https://github.com/apache/arrow-rs", branch = "main" }
15
+ arrow-schema = { git = "https://github.com/apache/arrow-rs", branch = "main" }
16
16
  bytes = "^1.9"
17
17
  either = "1.9"
18
18
  itertools = "^0.14"
19
19
  jiff = "0.2"
20
20
  magnus = { version = "0.7", features = ["rb-sys"] }
21
- parquet = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan/fix-reading-int32-timestamp-records", features = ["json"] }
21
+ parquet = { git = "https://github.com/apache/arrow-rs", branch = "main", features = ["json"] }
22
22
  rand = "0.9"
23
23
  rb-sys = "^0.9"
24
24
  simdutf8 = "0.1.5"
25
25
  tempfile = "^3.15"
26
26
  thiserror = "2.0"
27
27
  num = "0.4.3"
28
+ uuid = "1.16.0"
28
29
 
29
30
  [target.'cfg(target_os = "linux")'.dependencies]
30
31
  jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
@@ -2,8 +2,8 @@ use crate::header_cache::StringCache;
2
2
  use crate::logger::RubyLogger;
3
3
  use crate::types::TryIntoValue;
4
4
  use crate::{
5
- create_column_enumerator, create_row_enumerator, ParquetField, ParquetGemError,
6
- ParserResultType, ColumnEnumeratorArgs, RowEnumeratorArgs, RowRecord, ColumnRecord, ParquetValueVec,
5
+ create_column_enumerator, create_row_enumerator, ColumnEnumeratorArgs, ColumnRecord,
6
+ ParquetField, ParquetGemError, ParquetValueVec, ParserResultType, RowEnumeratorArgs, RowRecord,
7
7
  };
8
8
  use ahash::RandomState;
9
9
  use either::Either;
@@ -13,10 +13,10 @@ use std::collections::HashMap;
13
13
  use std::rc::Rc;
14
14
  use std::sync::OnceLock;
15
15
 
16
- use crate::types::ArrayWrapper;
17
16
  use super::common::{
18
17
  create_batch_reader, handle_block_or_enum, handle_empty_file, open_parquet_source,
19
18
  };
19
+ use crate::types::ArrayWrapper;
20
20
 
21
21
  /// A unified parser configuration that can be used for both row and column parsing
22
22
  pub enum ParserType {
@@ -53,11 +53,11 @@ pub fn parse_parquet_unified(
53
53
  } = args;
54
54
 
55
55
  // Initialize the logger if provided
56
- let ruby_logger = RubyLogger::new(&ruby, logger.clone())?;
57
-
56
+ let ruby_logger = RubyLogger::new(&ruby, logger)?;
57
+
58
58
  // Clone values for the closure to avoid move issues
59
59
  let columns_clone = columns.clone();
60
-
60
+
61
61
  // Determine if we're handling rows or columns for enumerator creation
62
62
  match &parser_type {
63
63
  ParserType::Row { strict } => {
@@ -75,13 +75,13 @@ pub fn parse_parquet_unified(
75
75
  })? {
76
76
  return Ok(enum_value);
77
77
  }
78
- },
78
+ }
79
79
  ParserType::Column { batch_size, strict } => {
80
80
  // For column-based parsing, log the batch size if present
81
81
  if let Some(ref bs) = batch_size {
82
82
  ruby_logger.debug(|| format!("Using batch size: {}", bs))?;
83
83
  }
84
-
84
+
85
85
  // Handle block or create column enumerator
86
86
  if let Some(enum_value) = handle_block_or_enum(&ruby, ruby.block_given(), || {
87
87
  create_column_enumerator(ColumnEnumeratorArgs {
@@ -102,19 +102,34 @@ pub fn parse_parquet_unified(
102
102
 
103
103
  // Open the Parquet source
104
104
  let source = open_parquet_source(ruby.clone(), to_read)?;
105
-
105
+
106
106
  // Based on the parser type, handle the data differently
107
107
  match parser_type {
108
108
  ParserType::Row { strict } => {
109
109
  // Handle row-based parsing
110
- process_row_data(ruby.clone(), source, &columns, result_type, strict, &ruby_logger)?;
111
- },
110
+ process_row_data(
111
+ ruby.clone(),
112
+ source,
113
+ &columns,
114
+ result_type,
115
+ strict,
116
+ &ruby_logger,
117
+ )?;
118
+ }
112
119
  ParserType::Column { batch_size, strict } => {
113
120
  // Handle column-based parsing
114
- process_column_data(ruby.clone(), source, &columns, result_type, batch_size, strict, &ruby_logger)?;
121
+ process_column_data(
122
+ ruby.clone(),
123
+ source,
124
+ &columns,
125
+ result_type,
126
+ batch_size,
127
+ strict,
128
+ &ruby_logger,
129
+ )?;
115
130
  }
116
131
  }
117
-
132
+
118
133
  Ok(ruby.qnil().into_value_with(&ruby))
119
134
  }
120
135
 
@@ -129,7 +144,7 @@ fn process_row_data(
129
144
  ) -> Result<(), ParquetGemError> {
130
145
  use parquet::file::reader::{FileReader, SerializedFileReader};
131
146
  use parquet::record::reader::RowIter as ParquetRowIter;
132
-
147
+
133
148
  // Create the row-based reader
134
149
  let reader: Box<dyn FileReader> = match source {
135
150
  Either::Left(file) => {
@@ -174,8 +189,19 @@ fn process_row_data(
174
189
 
175
190
  let mut map =
176
191
  HashMap::with_capacity_and_hasher(headers.len(), RandomState::default());
177
- for (i, (_, v)) in row.get_column_iter().enumerate() {
178
- map.insert(headers[i], ParquetField(v.clone(), strict));
192
+ for (i, ((_, v), t)) in
193
+ row.get_column_iter().zip(schema.get_fields()).enumerate()
194
+ {
195
+ let type_info = t.get_basic_info();
196
+ map.insert(
197
+ headers[i],
198
+ ParquetField {
199
+ field: v.clone(),
200
+ converted_type: type_info.converted_type(),
201
+ logical_type: type_info.logical_type().clone(),
202
+ strict,
203
+ },
204
+ );
179
205
  }
180
206
  map
181
207
  })
@@ -193,8 +219,14 @@ fn process_row_data(
193
219
  row.map(|row| {
194
220
  let column_count = row.get_column_iter().count();
195
221
  let mut vec = Vec::with_capacity(column_count);
196
- for (_, v) in row.get_column_iter() {
197
- vec.push(ParquetField(v.clone(), strict));
222
+ for ((_, v), t) in row.get_column_iter().zip(schema.get_fields()) {
223
+ let type_info = t.get_basic_info();
224
+ vec.push(ParquetField {
225
+ field: v.clone(),
226
+ converted_type: type_info.converted_type(),
227
+ logical_type: type_info.logical_type().clone(),
228
+ strict,
229
+ });
198
230
  }
199
231
  vec
200
232
  })
@@ -309,7 +341,10 @@ fn process_column_data(
309
341
  }
310
342
 
311
343
  /// Helper function to create a projection schema
312
- fn create_projection_schema(schema: &parquet::schema::types::Type, columns: &[String]) -> parquet::schema::types::Type {
344
+ fn create_projection_schema(
345
+ schema: &parquet::schema::types::Type,
346
+ columns: &[String],
347
+ ) -> parquet::schema::types::Type {
313
348
  if let parquet::schema::types::Type::GroupType { fields, .. } = schema {
314
349
  let projected_fields: Vec<std::sync::Arc<parquet::schema::types::Type>> = fields
315
350
  .iter()
@@ -325,4 +360,4 @@ fn create_projection_schema(schema: &parquet::schema::types::Type, columns: &[St
325
360
  // Return original schema if not a group type
326
361
  schema.clone()
327
362
  }
328
- }
363
+ }
@@ -61,6 +61,8 @@ pub enum ParquetGemError {
61
61
  Jiff(#[from] jiff::Error),
62
62
  #[error("Failed to cast slice to array: {0}")]
63
63
  InvalidDecimal(#[from] TryFromSliceError),
64
+ #[error("Failed to parse UUID: {0}")]
65
+ UuidError(#[from] uuid::Error),
64
66
  }
65
67
 
66
68
  #[derive(Debug)]
@@ -1,7 +1,10 @@
1
1
  use std::sync::OnceLock;
2
2
 
3
3
  use itertools::Itertools;
4
- use parquet::data_type::AsBytes;
4
+ use parquet::{
5
+ basic::{ConvertedType, LogicalType},
6
+ data_type::AsBytes,
7
+ };
5
8
 
6
9
  use super::*;
7
10
 
@@ -44,7 +47,13 @@ pub enum ColumnRecord<S: BuildHasher + Default> {
44
47
  }
45
48
 
46
49
  #[derive(Debug)]
47
- pub struct ParquetField(pub Field, pub bool);
50
+ pub struct ParquetField {
51
+ pub field: Field,
52
+ #[allow(dead_code)]
53
+ pub converted_type: ConvertedType,
54
+ pub logical_type: Option<LogicalType>,
55
+ pub strict: bool,
56
+ }
48
57
 
49
58
  impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
50
59
  fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
@@ -158,7 +167,7 @@ pub trait TryIntoValue {
158
167
 
159
168
  impl TryIntoValue for ParquetField {
160
169
  fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
161
- match self.0 {
170
+ match self.field {
162
171
  Field::Null => Ok(handle.qnil().as_value()),
163
172
  Field::Bool(b) => Ok(b.into_value_with(handle)),
164
173
  Field::Short(s) => Ok(s.into_value_with(handle)),
@@ -172,7 +181,7 @@ impl TryIntoValue for ParquetField {
172
181
  Field::Float(f) => Ok(f.into_value_with(handle)),
173
182
  Field::Double(d) => Ok(d.into_value_with(handle)),
174
183
  Field::Str(s) => {
175
- if self.1 {
184
+ if self.strict {
176
185
  Ok(simdutf8::basic::from_utf8(s.as_bytes())
177
186
  .map_err(ParquetGemError::Utf8Error)
178
187
  .map(|s| s.into_value_with(handle))?)
@@ -182,7 +191,15 @@ impl TryIntoValue for ParquetField {
182
191
  }
183
192
  }
184
193
  Field::Byte(b) => Ok(b.into_value_with(handle)),
185
- Field::Bytes(b) => Ok(handle.str_from_slice(b.data()).as_value()),
194
+ Field::Bytes(b) => {
195
+ if matches!(self.logical_type, Some(parquet::basic::LogicalType::Uuid)) {
196
+ let bytes = b.as_bytes();
197
+ let uuid = uuid::Uuid::from_slice(bytes)?;
198
+ Ok(uuid.to_string().into_value_with(handle))
199
+ } else {
200
+ Ok(handle.str_from_slice(b.data()).as_value())
201
+ }
202
+ }
186
203
  Field::Date(d) => {
187
204
  let ts = jiff::Timestamp::from_second((d as i64) * 86400)?;
188
205
  let formatted = ts.strftime("%Y-%m-%d").to_string();
@@ -206,7 +223,15 @@ impl TryIntoValue for ParquetField {
206
223
  let elements = list.elements();
207
224
  let ary = handle.ary_new_capa(elements.len());
208
225
  elements.iter().try_for_each(|e| {
209
- ary.push(ParquetField(e.clone(), self.1).try_into_value_with(handle)?)?;
226
+ ary.push(
227
+ ParquetField {
228
+ field: e.clone(),
229
+ logical_type: e.to_logical_type(),
230
+ converted_type: e.to_converted_type(),
231
+ strict: self.strict,
232
+ }
233
+ .try_into_value_with(handle)?,
234
+ )?;
210
235
  Ok::<_, ParquetGemError>(())
211
236
  })?;
212
237
  Ok(ary.into_value_with(handle))
@@ -220,8 +245,20 @@ impl TryIntoValue for ParquetField {
220
245
 
221
246
  map.entries().iter().try_for_each(|(k, v)| {
222
247
  hash.aset(
223
- ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
224
- ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
248
+ ParquetField {
249
+ field: k.clone(),
250
+ converted_type: k.to_converted_type(),
251
+ logical_type: k.to_logical_type(),
252
+ strict: self.strict,
253
+ }
254
+ .try_into_value_with(handle)?,
255
+ ParquetField {
256
+ field: v.clone(),
257
+ converted_type: v.to_converted_type(),
258
+ logical_type: v.to_logical_type(),
259
+ strict: self.strict,
260
+ }
261
+ .try_into_value_with(handle)?,
225
262
  )?;
226
263
  Ok::<_, ParquetGemError>(())
227
264
  })?;
@@ -278,7 +315,13 @@ impl TryIntoValue for ParquetField {
278
315
  row.get_column_iter().try_for_each(|(k, v)| {
279
316
  hash.aset(
280
317
  k.clone().into_value_with(handle),
281
- ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
318
+ ParquetField {
319
+ field: v.clone(),
320
+ converted_type: v.to_converted_type(),
321
+ logical_type: v.to_logical_type(),
322
+ strict: self.strict,
323
+ }
324
+ .try_into_value_with(handle)?,
282
325
  )?;
283
326
  Ok::<_, ParquetGemError>(())
284
327
  })?;
@@ -287,3 +330,111 @@ impl TryIntoValue for ParquetField {
287
330
  }
288
331
  }
289
332
  }
333
+
334
+ trait ToTypeInfo {
335
+ fn to_converted_type(&self) -> ConvertedType;
336
+ fn to_logical_type(&self) -> Option<LogicalType>;
337
+ }
338
+
339
+ impl ToTypeInfo for &parquet::record::Field {
340
+ fn to_converted_type(&self) -> ConvertedType {
341
+ match self {
342
+ Field::Null => ConvertedType::NONE,
343
+ Field::Bool(_) => ConvertedType::INT_8,
344
+ Field::Byte(_) => ConvertedType::INT_8,
345
+ Field::Short(_) => ConvertedType::INT_16,
346
+ Field::Int(_) => ConvertedType::INT_32,
347
+ Field::Long(_) => ConvertedType::INT_64,
348
+ Field::UByte(_) => ConvertedType::UINT_8,
349
+ Field::UShort(_) => ConvertedType::UINT_16,
350
+ Field::UInt(_) => ConvertedType::UINT_32,
351
+ Field::ULong(_) => ConvertedType::UINT_64,
352
+ Field::Float16(_) => ConvertedType::NONE,
353
+ Field::Float(_) => ConvertedType::NONE,
354
+ Field::Double(_) => ConvertedType::NONE,
355
+ Field::Decimal(_) => ConvertedType::DECIMAL,
356
+ Field::Str(_) => ConvertedType::UTF8,
357
+ Field::Bytes(_) => ConvertedType::LIST,
358
+ Field::Date(_) => ConvertedType::DATE,
359
+ Field::TimestampMillis(_) => ConvertedType::TIMESTAMP_MILLIS,
360
+ Field::TimestampMicros(_) => ConvertedType::TIMESTAMP_MICROS,
361
+ Field::Group(_) => ConvertedType::NONE,
362
+ Field::ListInternal(_) => ConvertedType::LIST,
363
+ Field::MapInternal(_) => ConvertedType::MAP,
364
+ }
365
+ }
366
+ fn to_logical_type(&self) -> Option<LogicalType> {
367
+ Some(match self {
368
+ Field::Null => LogicalType::Unknown,
369
+ Field::Bool(_) => LogicalType::Integer {
370
+ bit_width: 1,
371
+ is_signed: false,
372
+ },
373
+ Field::Byte(_) => LogicalType::Integer {
374
+ bit_width: 8,
375
+ is_signed: false,
376
+ },
377
+ Field::Short(_) => LogicalType::Integer {
378
+ bit_width: 16,
379
+ is_signed: true,
380
+ },
381
+ Field::Int(_) => LogicalType::Integer {
382
+ bit_width: 32,
383
+ is_signed: true,
384
+ },
385
+ Field::Long(_) => LogicalType::Integer {
386
+ bit_width: 64,
387
+ is_signed: true,
388
+ },
389
+ Field::UByte(_) => LogicalType::Integer {
390
+ bit_width: 8,
391
+ is_signed: false,
392
+ },
393
+ Field::UShort(_) => LogicalType::Integer {
394
+ bit_width: 16,
395
+ is_signed: false,
396
+ },
397
+ Field::UInt(_) => LogicalType::Integer {
398
+ bit_width: 32,
399
+ is_signed: false,
400
+ },
401
+ Field::ULong(_) => LogicalType::Integer {
402
+ bit_width: 64,
403
+ is_signed: false,
404
+ },
405
+ Field::Float16(_) => LogicalType::Float16,
406
+ Field::Float(_) => LogicalType::Decimal {
407
+ scale: 7,
408
+ precision: 7,
409
+ },
410
+ Field::Double(_) => LogicalType::Decimal {
411
+ scale: 15,
412
+ precision: 15,
413
+ },
414
+ Field::Decimal(decimal) => LogicalType::Decimal {
415
+ scale: decimal.scale(),
416
+ precision: decimal.precision(),
417
+ },
418
+ Field::Str(_) => LogicalType::String,
419
+ Field::Bytes(b) => {
420
+ if b.data().len() == 16 && uuid::Uuid::from_slice(b.as_bytes()).is_ok() {
421
+ LogicalType::Uuid
422
+ } else {
423
+ LogicalType::Unknown
424
+ }
425
+ }
426
+ Field::Date(_) => LogicalType::Date,
427
+ Field::TimestampMillis(_) => LogicalType::Timestamp {
428
+ is_adjusted_to_u_t_c: true,
429
+ unit: parquet::basic::TimeUnit::MILLIS(parquet::format::MilliSeconds {}),
430
+ },
431
+ Field::TimestampMicros(_) => LogicalType::Timestamp {
432
+ is_adjusted_to_u_t_c: true,
433
+ unit: parquet::basic::TimeUnit::MICROS(parquet::format::MicroSeconds {}),
434
+ },
435
+ Field::Group(_) => LogicalType::Unknown,
436
+ Field::ListInternal(_) => LogicalType::List,
437
+ Field::MapInternal(_) => LogicalType::Map,
438
+ })
439
+ }
440
+ }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.6"
2
+ VERSION = "0.5.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.6
4
+ version: 0.5.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-15 00:00:00.000000000 Z
11
+ date: 2025-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys