parquet 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +15 -8
- data/ext/parquet/Cargo.toml +4 -3
- data/ext/parquet/src/reader/unified/mod.rs +55 -20
- data/ext/parquet/src/types/mod.rs +2 -0
- data/ext/parquet/src/types/record_types.rs +160 -9
- data/lib/parquet/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8a79e74af0419282904a0041c09509520f64ce1e504e133237f4b87697dce14
|
4
|
+
data.tar.gz: 63391ffff73907caccc142f37550e85c12826f302f00ac726f826af391f8d8cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cddb7c6711e7e49ea785f6c0ab5ae3c40181756ad0e3fc23f298c291b725b178fdfbe5a8430fd9be10591b09e1b963255cb50637743054fe2173c9798e1e8bcc
|
7
|
+
data.tar.gz: 927a112ff1994800b3ed989f5000ed2a43438cebff886a545d0dd22018731b042b9052ead5b14983faf50fffc593cdd7512dd764bfed4de8ffa7781e6f2fda1a
|
data/Cargo.lock
CHANGED
@@ -64,7 +64,7 @@ dependencies = [
|
|
64
64
|
[[package]]
|
65
65
|
name = "arrow-array"
|
66
66
|
version = "55.1.0"
|
67
|
-
source = "git+https://github.com/
|
67
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
68
68
|
dependencies = [
|
69
69
|
"ahash",
|
70
70
|
"arrow-buffer",
|
@@ -79,7 +79,7 @@ dependencies = [
|
|
79
79
|
[[package]]
|
80
80
|
name = "arrow-buffer"
|
81
81
|
version = "55.1.0"
|
82
|
-
source = "git+https://github.com/
|
82
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
83
83
|
dependencies = [
|
84
84
|
"bytes",
|
85
85
|
"half",
|
@@ -89,7 +89,7 @@ dependencies = [
|
|
89
89
|
[[package]]
|
90
90
|
name = "arrow-cast"
|
91
91
|
version = "55.1.0"
|
92
|
-
source = "git+https://github.com/
|
92
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
93
93
|
dependencies = [
|
94
94
|
"arrow-array",
|
95
95
|
"arrow-buffer",
|
@@ -108,7 +108,7 @@ dependencies = [
|
|
108
108
|
[[package]]
|
109
109
|
name = "arrow-data"
|
110
110
|
version = "55.1.0"
|
111
|
-
source = "git+https://github.com/
|
111
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
112
112
|
dependencies = [
|
113
113
|
"arrow-buffer",
|
114
114
|
"arrow-schema",
|
@@ -119,7 +119,7 @@ dependencies = [
|
|
119
119
|
[[package]]
|
120
120
|
name = "arrow-ipc"
|
121
121
|
version = "55.1.0"
|
122
|
-
source = "git+https://github.com/
|
122
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
123
123
|
dependencies = [
|
124
124
|
"arrow-array",
|
125
125
|
"arrow-buffer",
|
@@ -131,12 +131,12 @@ dependencies = [
|
|
131
131
|
[[package]]
|
132
132
|
name = "arrow-schema"
|
133
133
|
version = "55.1.0"
|
134
|
-
source = "git+https://github.com/
|
134
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
135
135
|
|
136
136
|
[[package]]
|
137
137
|
name = "arrow-select"
|
138
138
|
version = "55.1.0"
|
139
|
-
source = "git+https://github.com/
|
139
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
140
140
|
dependencies = [
|
141
141
|
"ahash",
|
142
142
|
"arrow-array",
|
@@ -844,12 +844,13 @@ dependencies = [
|
|
844
844
|
"simdutf8",
|
845
845
|
"tempfile",
|
846
846
|
"thiserror",
|
847
|
+
"uuid",
|
847
848
|
]
|
848
849
|
|
849
850
|
[[package]]
|
850
851
|
name = "parquet"
|
851
852
|
version = "55.1.0"
|
852
|
-
source = "git+https://github.com/
|
853
|
+
source = "git+https://github.com/apache/arrow-rs?branch=main#e9df239980baa6d0f7eb4384eb01078bdd9b1701"
|
853
854
|
dependencies = [
|
854
855
|
"ahash",
|
855
856
|
"arrow-array",
|
@@ -1230,6 +1231,12 @@ version = "1.0.17"
|
|
1230
1231
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1231
1232
|
checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
|
1232
1233
|
|
1234
|
+
[[package]]
|
1235
|
+
name = "uuid"
|
1236
|
+
version = "1.16.0"
|
1237
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1238
|
+
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
|
1239
|
+
|
1233
1240
|
[[package]]
|
1234
1241
|
name = "version_check"
|
1235
1242
|
version = "0.9.5"
|
data/ext/parquet/Cargo.toml
CHANGED
@@ -11,20 +11,21 @@ rb-sys-env = "^0.2"
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
-
arrow-array = { git = "https://github.com/
|
15
|
-
arrow-schema = { git = "https://github.com/
|
14
|
+
arrow-array = { git = "https://github.com/apache/arrow-rs", branch = "main" }
|
15
|
+
arrow-schema = { git = "https://github.com/apache/arrow-rs", branch = "main" }
|
16
16
|
bytes = "^1.9"
|
17
17
|
either = "1.9"
|
18
18
|
itertools = "^0.14"
|
19
19
|
jiff = "0.2"
|
20
20
|
magnus = { version = "0.7", features = ["rb-sys"] }
|
21
|
-
parquet = { git = "https://github.com/
|
21
|
+
parquet = { git = "https://github.com/apache/arrow-rs", branch = "main", features = ["json"] }
|
22
22
|
rand = "0.9"
|
23
23
|
rb-sys = "^0.9"
|
24
24
|
simdutf8 = "0.1.5"
|
25
25
|
tempfile = "^3.15"
|
26
26
|
thiserror = "2.0"
|
27
27
|
num = "0.4.3"
|
28
|
+
uuid = "1.16.0"
|
28
29
|
|
29
30
|
[target.'cfg(target_os = "linux")'.dependencies]
|
30
31
|
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
@@ -2,8 +2,8 @@ use crate::header_cache::StringCache;
|
|
2
2
|
use crate::logger::RubyLogger;
|
3
3
|
use crate::types::TryIntoValue;
|
4
4
|
use crate::{
|
5
|
-
create_column_enumerator, create_row_enumerator,
|
6
|
-
|
5
|
+
create_column_enumerator, create_row_enumerator, ColumnEnumeratorArgs, ColumnRecord,
|
6
|
+
ParquetField, ParquetGemError, ParquetValueVec, ParserResultType, RowEnumeratorArgs, RowRecord,
|
7
7
|
};
|
8
8
|
use ahash::RandomState;
|
9
9
|
use either::Either;
|
@@ -13,10 +13,10 @@ use std::collections::HashMap;
|
|
13
13
|
use std::rc::Rc;
|
14
14
|
use std::sync::OnceLock;
|
15
15
|
|
16
|
-
use crate::types::ArrayWrapper;
|
17
16
|
use super::common::{
|
18
17
|
create_batch_reader, handle_block_or_enum, handle_empty_file, open_parquet_source,
|
19
18
|
};
|
19
|
+
use crate::types::ArrayWrapper;
|
20
20
|
|
21
21
|
/// A unified parser configuration that can be used for both row and column parsing
|
22
22
|
pub enum ParserType {
|
@@ -53,11 +53,11 @@ pub fn parse_parquet_unified(
|
|
53
53
|
} = args;
|
54
54
|
|
55
55
|
// Initialize the logger if provided
|
56
|
-
let ruby_logger = RubyLogger::new(&ruby, logger
|
57
|
-
|
56
|
+
let ruby_logger = RubyLogger::new(&ruby, logger)?;
|
57
|
+
|
58
58
|
// Clone values for the closure to avoid move issues
|
59
59
|
let columns_clone = columns.clone();
|
60
|
-
|
60
|
+
|
61
61
|
// Determine if we're handling rows or columns for enumerator creation
|
62
62
|
match &parser_type {
|
63
63
|
ParserType::Row { strict } => {
|
@@ -75,13 +75,13 @@ pub fn parse_parquet_unified(
|
|
75
75
|
})? {
|
76
76
|
return Ok(enum_value);
|
77
77
|
}
|
78
|
-
}
|
78
|
+
}
|
79
79
|
ParserType::Column { batch_size, strict } => {
|
80
80
|
// For column-based parsing, log the batch size if present
|
81
81
|
if let Some(ref bs) = batch_size {
|
82
82
|
ruby_logger.debug(|| format!("Using batch size: {}", bs))?;
|
83
83
|
}
|
84
|
-
|
84
|
+
|
85
85
|
// Handle block or create column enumerator
|
86
86
|
if let Some(enum_value) = handle_block_or_enum(&ruby, ruby.block_given(), || {
|
87
87
|
create_column_enumerator(ColumnEnumeratorArgs {
|
@@ -102,19 +102,34 @@ pub fn parse_parquet_unified(
|
|
102
102
|
|
103
103
|
// Open the Parquet source
|
104
104
|
let source = open_parquet_source(ruby.clone(), to_read)?;
|
105
|
-
|
105
|
+
|
106
106
|
// Based on the parser type, handle the data differently
|
107
107
|
match parser_type {
|
108
108
|
ParserType::Row { strict } => {
|
109
109
|
// Handle row-based parsing
|
110
|
-
process_row_data(
|
111
|
-
|
110
|
+
process_row_data(
|
111
|
+
ruby.clone(),
|
112
|
+
source,
|
113
|
+
&columns,
|
114
|
+
result_type,
|
115
|
+
strict,
|
116
|
+
&ruby_logger,
|
117
|
+
)?;
|
118
|
+
}
|
112
119
|
ParserType::Column { batch_size, strict } => {
|
113
120
|
// Handle column-based parsing
|
114
|
-
process_column_data(
|
121
|
+
process_column_data(
|
122
|
+
ruby.clone(),
|
123
|
+
source,
|
124
|
+
&columns,
|
125
|
+
result_type,
|
126
|
+
batch_size,
|
127
|
+
strict,
|
128
|
+
&ruby_logger,
|
129
|
+
)?;
|
115
130
|
}
|
116
131
|
}
|
117
|
-
|
132
|
+
|
118
133
|
Ok(ruby.qnil().into_value_with(&ruby))
|
119
134
|
}
|
120
135
|
|
@@ -129,7 +144,7 @@ fn process_row_data(
|
|
129
144
|
) -> Result<(), ParquetGemError> {
|
130
145
|
use parquet::file::reader::{FileReader, SerializedFileReader};
|
131
146
|
use parquet::record::reader::RowIter as ParquetRowIter;
|
132
|
-
|
147
|
+
|
133
148
|
// Create the row-based reader
|
134
149
|
let reader: Box<dyn FileReader> = match source {
|
135
150
|
Either::Left(file) => {
|
@@ -174,8 +189,19 @@ fn process_row_data(
|
|
174
189
|
|
175
190
|
let mut map =
|
176
191
|
HashMap::with_capacity_and_hasher(headers.len(), RandomState::default());
|
177
|
-
for (i, (_, v)
|
178
|
-
|
192
|
+
for (i, ((_, v), t)) in
|
193
|
+
row.get_column_iter().zip(schema.get_fields()).enumerate()
|
194
|
+
{
|
195
|
+
let type_info = t.get_basic_info();
|
196
|
+
map.insert(
|
197
|
+
headers[i],
|
198
|
+
ParquetField {
|
199
|
+
field: v.clone(),
|
200
|
+
converted_type: type_info.converted_type(),
|
201
|
+
logical_type: type_info.logical_type().clone(),
|
202
|
+
strict,
|
203
|
+
},
|
204
|
+
);
|
179
205
|
}
|
180
206
|
map
|
181
207
|
})
|
@@ -193,8 +219,14 @@ fn process_row_data(
|
|
193
219
|
row.map(|row| {
|
194
220
|
let column_count = row.get_column_iter().count();
|
195
221
|
let mut vec = Vec::with_capacity(column_count);
|
196
|
-
for (_, v) in row.get_column_iter() {
|
197
|
-
|
222
|
+
for ((_, v), t) in row.get_column_iter().zip(schema.get_fields()) {
|
223
|
+
let type_info = t.get_basic_info();
|
224
|
+
vec.push(ParquetField {
|
225
|
+
field: v.clone(),
|
226
|
+
converted_type: type_info.converted_type(),
|
227
|
+
logical_type: type_info.logical_type().clone(),
|
228
|
+
strict,
|
229
|
+
});
|
198
230
|
}
|
199
231
|
vec
|
200
232
|
})
|
@@ -309,7 +341,10 @@ fn process_column_data(
|
|
309
341
|
}
|
310
342
|
|
311
343
|
/// Helper function to create a projection schema
|
312
|
-
fn create_projection_schema(
|
344
|
+
fn create_projection_schema(
|
345
|
+
schema: &parquet::schema::types::Type,
|
346
|
+
columns: &[String],
|
347
|
+
) -> parquet::schema::types::Type {
|
313
348
|
if let parquet::schema::types::Type::GroupType { fields, .. } = schema {
|
314
349
|
let projected_fields: Vec<std::sync::Arc<parquet::schema::types::Type>> = fields
|
315
350
|
.iter()
|
@@ -325,4 +360,4 @@ fn create_projection_schema(schema: &parquet::schema::types::Type, columns: &[St
|
|
325
360
|
// Return original schema if not a group type
|
326
361
|
schema.clone()
|
327
362
|
}
|
328
|
-
}
|
363
|
+
}
|
@@ -1,7 +1,10 @@
|
|
1
1
|
use std::sync::OnceLock;
|
2
2
|
|
3
3
|
use itertools::Itertools;
|
4
|
-
use parquet::
|
4
|
+
use parquet::{
|
5
|
+
basic::{ConvertedType, LogicalType},
|
6
|
+
data_type::AsBytes,
|
7
|
+
};
|
5
8
|
|
6
9
|
use super::*;
|
7
10
|
|
@@ -44,7 +47,13 @@ pub enum ColumnRecord<S: BuildHasher + Default> {
|
|
44
47
|
}
|
45
48
|
|
46
49
|
#[derive(Debug)]
|
47
|
-
pub struct ParquetField
|
50
|
+
pub struct ParquetField {
|
51
|
+
pub field: Field,
|
52
|
+
#[allow(dead_code)]
|
53
|
+
pub converted_type: ConvertedType,
|
54
|
+
pub logical_type: Option<LogicalType>,
|
55
|
+
pub strict: bool,
|
56
|
+
}
|
48
57
|
|
49
58
|
impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
50
59
|
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
@@ -158,7 +167,7 @@ pub trait TryIntoValue {
|
|
158
167
|
|
159
168
|
impl TryIntoValue for ParquetField {
|
160
169
|
fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
|
161
|
-
match self.
|
170
|
+
match self.field {
|
162
171
|
Field::Null => Ok(handle.qnil().as_value()),
|
163
172
|
Field::Bool(b) => Ok(b.into_value_with(handle)),
|
164
173
|
Field::Short(s) => Ok(s.into_value_with(handle)),
|
@@ -172,7 +181,7 @@ impl TryIntoValue for ParquetField {
|
|
172
181
|
Field::Float(f) => Ok(f.into_value_with(handle)),
|
173
182
|
Field::Double(d) => Ok(d.into_value_with(handle)),
|
174
183
|
Field::Str(s) => {
|
175
|
-
if self.
|
184
|
+
if self.strict {
|
176
185
|
Ok(simdutf8::basic::from_utf8(s.as_bytes())
|
177
186
|
.map_err(ParquetGemError::Utf8Error)
|
178
187
|
.map(|s| s.into_value_with(handle))?)
|
@@ -182,7 +191,15 @@ impl TryIntoValue for ParquetField {
|
|
182
191
|
}
|
183
192
|
}
|
184
193
|
Field::Byte(b) => Ok(b.into_value_with(handle)),
|
185
|
-
Field::Bytes(b) =>
|
194
|
+
Field::Bytes(b) => {
|
195
|
+
if matches!(self.logical_type, Some(parquet::basic::LogicalType::Uuid)) {
|
196
|
+
let bytes = b.as_bytes();
|
197
|
+
let uuid = uuid::Uuid::from_slice(bytes)?;
|
198
|
+
Ok(uuid.to_string().into_value_with(handle))
|
199
|
+
} else {
|
200
|
+
Ok(handle.str_from_slice(b.data()).as_value())
|
201
|
+
}
|
202
|
+
}
|
186
203
|
Field::Date(d) => {
|
187
204
|
let ts = jiff::Timestamp::from_second((d as i64) * 86400)?;
|
188
205
|
let formatted = ts.strftime("%Y-%m-%d").to_string();
|
@@ -206,7 +223,15 @@ impl TryIntoValue for ParquetField {
|
|
206
223
|
let elements = list.elements();
|
207
224
|
let ary = handle.ary_new_capa(elements.len());
|
208
225
|
elements.iter().try_for_each(|e| {
|
209
|
-
ary.push(
|
226
|
+
ary.push(
|
227
|
+
ParquetField {
|
228
|
+
field: e.clone(),
|
229
|
+
logical_type: e.to_logical_type(),
|
230
|
+
converted_type: e.to_converted_type(),
|
231
|
+
strict: self.strict,
|
232
|
+
}
|
233
|
+
.try_into_value_with(handle)?,
|
234
|
+
)?;
|
210
235
|
Ok::<_, ParquetGemError>(())
|
211
236
|
})?;
|
212
237
|
Ok(ary.into_value_with(handle))
|
@@ -220,8 +245,20 @@ impl TryIntoValue for ParquetField {
|
|
220
245
|
|
221
246
|
map.entries().iter().try_for_each(|(k, v)| {
|
222
247
|
hash.aset(
|
223
|
-
ParquetField
|
224
|
-
|
248
|
+
ParquetField {
|
249
|
+
field: k.clone(),
|
250
|
+
converted_type: k.to_converted_type(),
|
251
|
+
logical_type: k.to_logical_type(),
|
252
|
+
strict: self.strict,
|
253
|
+
}
|
254
|
+
.try_into_value_with(handle)?,
|
255
|
+
ParquetField {
|
256
|
+
field: v.clone(),
|
257
|
+
converted_type: v.to_converted_type(),
|
258
|
+
logical_type: v.to_logical_type(),
|
259
|
+
strict: self.strict,
|
260
|
+
}
|
261
|
+
.try_into_value_with(handle)?,
|
225
262
|
)?;
|
226
263
|
Ok::<_, ParquetGemError>(())
|
227
264
|
})?;
|
@@ -278,7 +315,13 @@ impl TryIntoValue for ParquetField {
|
|
278
315
|
row.get_column_iter().try_for_each(|(k, v)| {
|
279
316
|
hash.aset(
|
280
317
|
k.clone().into_value_with(handle),
|
281
|
-
ParquetField
|
318
|
+
ParquetField {
|
319
|
+
field: v.clone(),
|
320
|
+
converted_type: v.to_converted_type(),
|
321
|
+
logical_type: v.to_logical_type(),
|
322
|
+
strict: self.strict,
|
323
|
+
}
|
324
|
+
.try_into_value_with(handle)?,
|
282
325
|
)?;
|
283
326
|
Ok::<_, ParquetGemError>(())
|
284
327
|
})?;
|
@@ -287,3 +330,111 @@ impl TryIntoValue for ParquetField {
|
|
287
330
|
}
|
288
331
|
}
|
289
332
|
}
|
333
|
+
|
334
|
+
trait ToTypeInfo {
|
335
|
+
fn to_converted_type(&self) -> ConvertedType;
|
336
|
+
fn to_logical_type(&self) -> Option<LogicalType>;
|
337
|
+
}
|
338
|
+
|
339
|
+
impl ToTypeInfo for &parquet::record::Field {
|
340
|
+
fn to_converted_type(&self) -> ConvertedType {
|
341
|
+
match self {
|
342
|
+
Field::Null => ConvertedType::NONE,
|
343
|
+
Field::Bool(_) => ConvertedType::INT_8,
|
344
|
+
Field::Byte(_) => ConvertedType::INT_8,
|
345
|
+
Field::Short(_) => ConvertedType::INT_16,
|
346
|
+
Field::Int(_) => ConvertedType::INT_32,
|
347
|
+
Field::Long(_) => ConvertedType::INT_64,
|
348
|
+
Field::UByte(_) => ConvertedType::UINT_8,
|
349
|
+
Field::UShort(_) => ConvertedType::UINT_16,
|
350
|
+
Field::UInt(_) => ConvertedType::UINT_32,
|
351
|
+
Field::ULong(_) => ConvertedType::UINT_64,
|
352
|
+
Field::Float16(_) => ConvertedType::NONE,
|
353
|
+
Field::Float(_) => ConvertedType::NONE,
|
354
|
+
Field::Double(_) => ConvertedType::NONE,
|
355
|
+
Field::Decimal(_) => ConvertedType::DECIMAL,
|
356
|
+
Field::Str(_) => ConvertedType::UTF8,
|
357
|
+
Field::Bytes(_) => ConvertedType::LIST,
|
358
|
+
Field::Date(_) => ConvertedType::DATE,
|
359
|
+
Field::TimestampMillis(_) => ConvertedType::TIMESTAMP_MILLIS,
|
360
|
+
Field::TimestampMicros(_) => ConvertedType::TIMESTAMP_MICROS,
|
361
|
+
Field::Group(_) => ConvertedType::NONE,
|
362
|
+
Field::ListInternal(_) => ConvertedType::LIST,
|
363
|
+
Field::MapInternal(_) => ConvertedType::MAP,
|
364
|
+
}
|
365
|
+
}
|
366
|
+
fn to_logical_type(&self) -> Option<LogicalType> {
|
367
|
+
Some(match self {
|
368
|
+
Field::Null => LogicalType::Unknown,
|
369
|
+
Field::Bool(_) => LogicalType::Integer {
|
370
|
+
bit_width: 1,
|
371
|
+
is_signed: false,
|
372
|
+
},
|
373
|
+
Field::Byte(_) => LogicalType::Integer {
|
374
|
+
bit_width: 8,
|
375
|
+
is_signed: false,
|
376
|
+
},
|
377
|
+
Field::Short(_) => LogicalType::Integer {
|
378
|
+
bit_width: 16,
|
379
|
+
is_signed: true,
|
380
|
+
},
|
381
|
+
Field::Int(_) => LogicalType::Integer {
|
382
|
+
bit_width: 32,
|
383
|
+
is_signed: true,
|
384
|
+
},
|
385
|
+
Field::Long(_) => LogicalType::Integer {
|
386
|
+
bit_width: 64,
|
387
|
+
is_signed: true,
|
388
|
+
},
|
389
|
+
Field::UByte(_) => LogicalType::Integer {
|
390
|
+
bit_width: 8,
|
391
|
+
is_signed: false,
|
392
|
+
},
|
393
|
+
Field::UShort(_) => LogicalType::Integer {
|
394
|
+
bit_width: 16,
|
395
|
+
is_signed: false,
|
396
|
+
},
|
397
|
+
Field::UInt(_) => LogicalType::Integer {
|
398
|
+
bit_width: 32,
|
399
|
+
is_signed: false,
|
400
|
+
},
|
401
|
+
Field::ULong(_) => LogicalType::Integer {
|
402
|
+
bit_width: 64,
|
403
|
+
is_signed: false,
|
404
|
+
},
|
405
|
+
Field::Float16(_) => LogicalType::Float16,
|
406
|
+
Field::Float(_) => LogicalType::Decimal {
|
407
|
+
scale: 7,
|
408
|
+
precision: 7,
|
409
|
+
},
|
410
|
+
Field::Double(_) => LogicalType::Decimal {
|
411
|
+
scale: 15,
|
412
|
+
precision: 15,
|
413
|
+
},
|
414
|
+
Field::Decimal(decimal) => LogicalType::Decimal {
|
415
|
+
scale: decimal.scale(),
|
416
|
+
precision: decimal.precision(),
|
417
|
+
},
|
418
|
+
Field::Str(_) => LogicalType::String,
|
419
|
+
Field::Bytes(b) => {
|
420
|
+
if b.data().len() == 16 && uuid::Uuid::from_slice(b.as_bytes()).is_ok() {
|
421
|
+
LogicalType::Uuid
|
422
|
+
} else {
|
423
|
+
LogicalType::Unknown
|
424
|
+
}
|
425
|
+
}
|
426
|
+
Field::Date(_) => LogicalType::Date,
|
427
|
+
Field::TimestampMillis(_) => LogicalType::Timestamp {
|
428
|
+
is_adjusted_to_u_t_c: true,
|
429
|
+
unit: parquet::basic::TimeUnit::MILLIS(parquet::format::MilliSeconds {}),
|
430
|
+
},
|
431
|
+
Field::TimestampMicros(_) => LogicalType::Timestamp {
|
432
|
+
is_adjusted_to_u_t_c: true,
|
433
|
+
unit: parquet::basic::TimeUnit::MICROS(parquet::format::MicroSeconds {}),
|
434
|
+
},
|
435
|
+
Field::Group(_) => LogicalType::Unknown,
|
436
|
+
Field::ListInternal(_) => LogicalType::List,
|
437
|
+
Field::MapInternal(_) => LogicalType::Map,
|
438
|
+
})
|
439
|
+
}
|
440
|
+
}
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-05-
|
11
|
+
date: 2025-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|