parquet 0.5.10 → 0.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/parquet/src/reader/common.rs +11 -65
- data/ext/parquet/src/reader/mod.rs +2 -7
- data/ext/parquet/src/reader/unified/mod.rs +14 -82
- data/ext/parquet/src/types/core_types.rs +2 -0
- data/ext/parquet/src/types/mod.rs +3 -7
- data/ext/parquet/src/types/parquet_value.rs +108 -4
- data/ext/parquet/src/types/schema_node.rs +8 -0
- data/ext/parquet/src/types/timestamp.rs +222 -25
- data/ext/parquet/src/types/type_conversion.rs +204 -0
- data/ext/parquet/src/types/writer_types.rs +6 -1
- data/ext/parquet/src/writer/mod.rs +10 -0
- data/ext/parquet/src/writer/write_columns.rs +6 -0
- data/ext/parquet/src/writer/write_rows.rs +5 -3
- data/lib/parquet/version.rb +1 -1
- metadata +2 -4
- data/ext/parquet/src/reader/arrow_reader.rs +0 -579
- data/ext/parquet/src/reader/format_detector.rs +0 -69
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f2474bf56190257826281d5135739d010ad3a8e51a30eea807d03fc147f7300
|
4
|
+
data.tar.gz: 880084ad0ceb3836195588ce834583359ecf65304e826fd3b025590b960fed37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71348a0d7a46fdb32467a15466201898f8752ec232fe279d30e30631a0d876a639474cfc492c92316c131d7ed057ded4eb9d8a2cbb4b13fb81ebd46de806aa51
|
7
|
+
data.tar.gz: 476b7f307813c3163088557b0b5af62117a9ccb3bb964d5d5e072d2634a9ad11eab3712b4d6585f7007fbbd5f872ebf4d2d5b296de885048a04f6313209dd179
|
data/README.md
CHANGED
@@ -12,81 +12,27 @@ use magnus::value::ReprValue;
|
|
12
12
|
use magnus::{Error as MagnusError, Ruby, Value};
|
13
13
|
|
14
14
|
use crate::header_cache::StringCache;
|
15
|
-
use crate::logger::RubyLogger;
|
16
15
|
use crate::ruby_reader::{RubyReader, ThreadSafeRubyReader};
|
17
16
|
use crate::types::{ParquetGemError, TryIntoValue};
|
18
17
|
use crate::ColumnRecord;
|
19
18
|
|
20
|
-
|
21
|
-
|
22
|
-
///
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
}
|
27
|
-
|
28
|
-
/// Opens a data file (Parquet or Arrow) for reading, automatically detecting the format
|
29
|
-
pub fn open_data_source(
|
19
|
+
/// Opens a parquet file or IO-like object for reading
|
20
|
+
///
|
21
|
+
/// This function handles both file paths (as strings) and IO-like objects,
|
22
|
+
/// returning either a File or a ThreadSafeRubyReader that can be used with
|
23
|
+
/// parquet readers.
|
24
|
+
pub fn open_parquet_source(
|
30
25
|
ruby: Rc<Ruby>,
|
31
26
|
to_read: Value,
|
32
|
-
|
33
|
-
) -> Result<DataSource, ParquetGemError> {
|
27
|
+
) -> Result<Either<File, ThreadSafeRubyReader>, ParquetGemError> {
|
34
28
|
if to_read.is_kind_of(ruby.class_string()) {
|
35
29
|
let path_string = to_read.to_r_string()?;
|
36
30
|
let file_path = unsafe { path_string.as_str()? };
|
37
|
-
|
38
|
-
|
39
|
-
let format_hint = detect_format_from_extension(file_path);
|
40
|
-
|
41
|
-
let mut file = File::open(file_path).map_err(ParquetGemError::from)?;
|
42
|
-
|
43
|
-
// Detect actual format from file content
|
44
|
-
let format = detect_file_format(&mut file)?;
|
45
|
-
|
46
|
-
// Warn if extension doesn't match content
|
47
|
-
if let Some(hint) = format_hint {
|
48
|
-
if hint != format {
|
49
|
-
ruby_logger.warn(|| {
|
50
|
-
format!(
|
51
|
-
"Extension implied format {:?} but actual format is {:?}",
|
52
|
-
hint, format
|
53
|
-
)
|
54
|
-
})?;
|
55
|
-
}
|
56
|
-
}
|
57
|
-
|
58
|
-
match format {
|
59
|
-
FileFormat::Parquet => Ok(DataSource::Parquet(Either::Left(file))),
|
60
|
-
FileFormat::Arrow => Ok(DataSource::Arrow(Either::Left(file))),
|
61
|
-
}
|
31
|
+
let file = File::open(file_path).map_err(ParquetGemError::from)?;
|
32
|
+
Ok(Either::Left(file))
|
62
33
|
} else {
|
63
|
-
|
64
|
-
|
65
|
-
use tempfile::NamedTempFile;
|
66
|
-
|
67
|
-
let mut readable = RubyReader::new(ruby.clone(), to_read)?;
|
68
|
-
let mut temp_file = NamedTempFile::new().map_err(ParquetGemError::from)?;
|
69
|
-
|
70
|
-
// Copy the entire content to the temporary file
|
71
|
-
let mut buffer = vec![0u8; 8192];
|
72
|
-
loop {
|
73
|
-
let bytes_read = readable.read(&mut buffer)?;
|
74
|
-
if bytes_read == 0 {
|
75
|
-
break;
|
76
|
-
}
|
77
|
-
temp_file.write_all(&buffer[..bytes_read])?;
|
78
|
-
}
|
79
|
-
temp_file.flush()?;
|
80
|
-
|
81
|
-
// Detect format from the temporary file
|
82
|
-
let mut file = temp_file.reopen()?;
|
83
|
-
let format = detect_file_format(&mut file)?;
|
84
|
-
|
85
|
-
// Use the temporary file as the source
|
86
|
-
match format {
|
87
|
-
FileFormat::Parquet => Ok(DataSource::Parquet(Either::Left(file))),
|
88
|
-
FileFormat::Arrow => Ok(DataSource::Arrow(Either::Left(file))),
|
89
|
-
}
|
34
|
+
let readable = ThreadSafeRubyReader::new(RubyReader::new(ruby, to_read)?);
|
35
|
+
Ok(Either::Right(readable))
|
90
36
|
}
|
91
37
|
}
|
92
38
|
|
@@ -1,6 +1,4 @@
|
|
1
|
-
mod arrow_reader;
|
2
1
|
mod common;
|
3
|
-
mod format_detector;
|
4
2
|
mod parquet_column_reader;
|
5
3
|
mod parquet_row_reader;
|
6
4
|
mod unified;
|
@@ -190,10 +188,7 @@ pub fn parse_metadata(_rb_self: Value, args: &[Value]) -> Result<Value, MagnusEr
|
|
190
188
|
if args.len() != 1 {
|
191
189
|
return Err(MagnusError::new(
|
192
190
|
magnus::exception::arg_error(),
|
193
|
-
format!(
|
194
|
-
"metadata expects exactly 1 argument (file path or IO-like object), got {}",
|
195
|
-
args.len()
|
196
|
-
),
|
191
|
+
format!("metadata expects exactly 1 argument (file path or IO-like object), got {}", args.len()),
|
197
192
|
));
|
198
193
|
}
|
199
194
|
|
@@ -213,4 +208,4 @@ pub fn parse_metadata(_rb_self: Value, args: &[Value]) -> Result<Value, MagnusEr
|
|
213
208
|
let metadata = reader.finish().map_err(ParquetGemError::Parquet)?;
|
214
209
|
|
215
210
|
Ok(RubyParquetMetaData(metadata).try_into_value_with(&ruby)?)
|
216
|
-
}
|
211
|
+
}
|
@@ -13,11 +13,8 @@ use std::collections::HashMap;
|
|
13
13
|
use std::rc::Rc;
|
14
14
|
use std::sync::OnceLock;
|
15
15
|
|
16
|
-
use super::arrow_reader::{
|
17
|
-
process_arrow_column_data, process_arrow_file_column_data, process_arrow_row_data,
|
18
|
-
};
|
19
16
|
use super::common::{
|
20
|
-
create_batch_reader, handle_block_or_enum, handle_empty_file,
|
17
|
+
create_batch_reader, handle_block_or_enum, handle_empty_file, open_parquet_source,
|
21
18
|
};
|
22
19
|
use crate::types::ArrayWrapper;
|
23
20
|
|
@@ -103,99 +100,34 @@ pub fn parse_parquet_unified(
|
|
103
100
|
}
|
104
101
|
}
|
105
102
|
|
106
|
-
// Open the
|
107
|
-
let source =
|
103
|
+
// Open the Parquet source
|
104
|
+
let source = open_parquet_source(ruby.clone(), to_read)?;
|
108
105
|
|
109
|
-
// Based on the
|
110
|
-
match
|
111
|
-
|
112
|
-
// Handle
|
106
|
+
// Based on the parser type, handle the data differently
|
107
|
+
match parser_type {
|
108
|
+
ParserType::Row { strict } => {
|
109
|
+
// Handle row-based parsing
|
113
110
|
process_row_data(
|
114
111
|
ruby.clone(),
|
115
|
-
|
112
|
+
source,
|
116
113
|
&columns,
|
117
114
|
result_type,
|
118
|
-
|
115
|
+
strict,
|
119
116
|
&ruby_logger,
|
120
117
|
)?;
|
121
118
|
}
|
122
|
-
|
123
|
-
// Handle
|
119
|
+
ParserType::Column { batch_size, strict } => {
|
120
|
+
// Handle column-based parsing
|
124
121
|
process_column_data(
|
125
122
|
ruby.clone(),
|
126
|
-
|
123
|
+
source,
|
127
124
|
&columns,
|
128
125
|
result_type,
|
129
|
-
|
130
|
-
|
126
|
+
batch_size,
|
127
|
+
strict,
|
131
128
|
&ruby_logger,
|
132
129
|
)?;
|
133
130
|
}
|
134
|
-
(DataSource::Arrow(reader), ParserType::Row { strict }) => {
|
135
|
-
// Handle Arrow row-based parsing
|
136
|
-
match reader {
|
137
|
-
Either::Left(file) => {
|
138
|
-
// For seekable files, use FileReader which handles IPC file format
|
139
|
-
use arrow_ipc::reader::FileReader;
|
140
|
-
let file_reader = FileReader::try_new(file, None)
|
141
|
-
.map_err(|e| ParquetGemError::ArrowIpc(e.to_string()))?;
|
142
|
-
|
143
|
-
use super::arrow_reader::process_arrow_file_row_data;
|
144
|
-
process_arrow_file_row_data(
|
145
|
-
ruby.clone(),
|
146
|
-
file_reader,
|
147
|
-
&columns,
|
148
|
-
result_type,
|
149
|
-
*strict,
|
150
|
-
&ruby_logger,
|
151
|
-
)?;
|
152
|
-
}
|
153
|
-
Either::Right(readable) => {
|
154
|
-
use arrow_ipc::reader::StreamReader;
|
155
|
-
let stream_reader = StreamReader::try_new(readable, None)
|
156
|
-
.map_err(|e| ParquetGemError::ArrowIpc(e.to_string()))?;
|
157
|
-
process_arrow_row_data(
|
158
|
-
ruby.clone(),
|
159
|
-
stream_reader,
|
160
|
-
&columns,
|
161
|
-
result_type,
|
162
|
-
*strict,
|
163
|
-
&ruby_logger,
|
164
|
-
)?;
|
165
|
-
}
|
166
|
-
}
|
167
|
-
}
|
168
|
-
(DataSource::Arrow(reader), ParserType::Column { batch_size, strict }) => {
|
169
|
-
// Handle Arrow column-based parsing
|
170
|
-
match reader {
|
171
|
-
Either::Left(file) => {
|
172
|
-
// For seekable files, we can use the optimized FileReader
|
173
|
-
process_arrow_file_column_data(
|
174
|
-
ruby.clone(),
|
175
|
-
file,
|
176
|
-
&columns,
|
177
|
-
result_type,
|
178
|
-
*batch_size,
|
179
|
-
*strict,
|
180
|
-
&ruby_logger,
|
181
|
-
)?;
|
182
|
-
}
|
183
|
-
Either::Right(readable) => {
|
184
|
-
use arrow_ipc::reader::StreamReader;
|
185
|
-
let stream_reader = StreamReader::try_new(readable, None)
|
186
|
-
.map_err(|e| ParquetGemError::ArrowIpc(e.to_string()))?;
|
187
|
-
process_arrow_column_data(
|
188
|
-
ruby.clone(),
|
189
|
-
stream_reader,
|
190
|
-
&columns,
|
191
|
-
result_type,
|
192
|
-
*batch_size,
|
193
|
-
*strict,
|
194
|
-
&ruby_logger,
|
195
|
-
)?;
|
196
|
-
}
|
197
|
-
}
|
198
|
-
}
|
199
131
|
}
|
200
132
|
|
201
133
|
Ok(ruby.qnil().into_value_with(&ruby))
|
@@ -25,9 +25,9 @@ use arrow_array::cast::downcast_array;
|
|
25
25
|
use arrow_array::{
|
26
26
|
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array,
|
27
27
|
Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
|
28
|
-
ListArray, NullArray, StringArray, StructArray,
|
29
|
-
TimestampMillisecondArray, TimestampNanosecondArray,
|
30
|
-
UInt32Array, UInt64Array, UInt8Array,
|
28
|
+
ListArray, NullArray, StringArray, StructArray, Time32MillisecondArray, Time64MicrosecondArray,
|
29
|
+
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
30
|
+
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
|
31
31
|
};
|
32
32
|
use arrow_schema::{DataType, TimeUnit};
|
33
33
|
use magnus::{value::ReprValue, Error as MagnusError, IntoValue, Ruby, Value};
|
@@ -55,10 +55,6 @@ pub enum ParquetGemError {
|
|
55
55
|
Parquet(#[from] parquet::errors::ParquetError),
|
56
56
|
#[error("Arrow error: {0}")]
|
57
57
|
Arrow(#[from] arrow_schema::ArrowError),
|
58
|
-
#[error("Arrow IPC error: {0}")]
|
59
|
-
ArrowIpc(String),
|
60
|
-
#[error("Unknown file format")]
|
61
|
-
UnknownFormat,
|
62
58
|
#[error("UTF-8 error: {0}")]
|
63
59
|
Utf8Error(#[from] simdutf8::basic::Utf8Error),
|
64
60
|
#[error("Jiff error: {0}")]
|
@@ -29,6 +29,8 @@ pub enum ParquetValue {
|
|
29
29
|
TimestampMillis(i64, Option<Arc<str>>),
|
30
30
|
TimestampMicros(i64, Option<Arc<str>>),
|
31
31
|
TimestampNanos(i64, Option<Arc<str>>),
|
32
|
+
TimeMillis(i32), // Time of day in milliseconds since midnight
|
33
|
+
TimeMicros(i64), // Time of day in microseconds since midnight
|
32
34
|
List(Vec<ParquetValue>), // A list of values (can be empty or have null items)
|
33
35
|
// We're not using a separate NilList type anymore - we'll handle nil lists elsewhere
|
34
36
|
Map(HashMap<ParquetValue, ParquetValue>),
|
@@ -108,6 +110,8 @@ impl PartialEq for ParquetValue {
|
|
108
110
|
(ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
|
109
111
|
(ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
|
110
112
|
(ParquetValue::TimestampNanos(a, _), ParquetValue::TimestampNanos(b, _)) => a == b,
|
113
|
+
(ParquetValue::TimeMillis(a), ParquetValue::TimeMillis(b)) => a == b,
|
114
|
+
(ParquetValue::TimeMicros(a), ParquetValue::TimeMicros(b)) => a == b,
|
111
115
|
(ParquetValue::List(a), ParquetValue::List(b)) => a == b,
|
112
116
|
(ParquetValue::Null, ParquetValue::Null) => true,
|
113
117
|
_ => false,
|
@@ -160,6 +164,8 @@ impl std::hash::Hash for ParquetValue {
|
|
160
164
|
ts.hash(state);
|
161
165
|
tz.hash(state);
|
162
166
|
}
|
167
|
+
ParquetValue::TimeMillis(t) => t.hash(state),
|
168
|
+
ParquetValue::TimeMicros(t) => t.hash(state),
|
163
169
|
ParquetValue::List(l) => l.hash(state),
|
164
170
|
ParquetValue::Map(m) => {
|
165
171
|
for (k, v) in m {
|
@@ -224,6 +230,38 @@ impl TryIntoValue for ParquetValue {
|
|
224
230
|
timestamp @ ParquetValue::TimestampNanos(_, _) => {
|
225
231
|
impl_timestamp_conversion!(timestamp, TimestampNanos, handle)
|
226
232
|
}
|
233
|
+
ParquetValue::TimeMillis(millis) => {
|
234
|
+
// Convert time of day in milliseconds to a Ruby Time object
|
235
|
+
// Use epoch date (1970-01-01) with the given time
|
236
|
+
let total_seconds = millis / 1000;
|
237
|
+
let ms = millis % 1000;
|
238
|
+
let hours = total_seconds / 3600;
|
239
|
+
let minutes = (total_seconds % 3600) / 60;
|
240
|
+
let seconds = total_seconds % 60;
|
241
|
+
|
242
|
+
// Create a Time object for 1970-01-01 with the given time
|
243
|
+
let time_class = handle.class_time();
|
244
|
+
let time = time_class.funcall::<_, _, Value>(
|
245
|
+
"new",
|
246
|
+
(1970, 1, 1, hours, minutes, seconds, ms * 1000), // Ruby expects microseconds
|
247
|
+
)?;
|
248
|
+
Ok(time.into_value_with(handle))
|
249
|
+
}
|
250
|
+
ParquetValue::TimeMicros(micros) => {
|
251
|
+
// Convert time of day in microseconds to a Ruby Time object
|
252
|
+
// Use epoch date (1970-01-01) with the given time
|
253
|
+
let total_seconds = micros / 1_000_000;
|
254
|
+
let us = micros % 1_000_000;
|
255
|
+
let hours = total_seconds / 3600;
|
256
|
+
let minutes = (total_seconds % 3600) / 60;
|
257
|
+
let seconds = total_seconds % 60;
|
258
|
+
|
259
|
+
// Create a Time object for 1970-01-01 with the given time
|
260
|
+
let time_class = handle.class_time();
|
261
|
+
let time = time_class
|
262
|
+
.funcall::<_, _, Value>("new", (1970, 1, 1, hours, minutes, seconds, us))?;
|
263
|
+
Ok(time.into_value_with(handle))
|
264
|
+
}
|
227
265
|
ParquetValue::List(l) => {
|
228
266
|
// For lists, convert to Ruby array and check for specific cases
|
229
267
|
// when we might need to return nil instead of an empty array
|
@@ -356,12 +394,32 @@ impl ParquetValue {
|
|
356
394
|
Ok(ParquetValue::Date32(v))
|
357
395
|
}
|
358
396
|
PrimitiveType::TimestampMillis => {
|
359
|
-
|
360
|
-
|
397
|
+
if value.is_kind_of(ruby.class_time()) {
|
398
|
+
use crate::types::timestamp::ruby_time_to_timestamp_with_tz;
|
399
|
+
let (v, tz) = ruby_time_to_timestamp_with_tz(value, "millis")?;
|
400
|
+
Ok(ParquetValue::TimestampMillis(v, tz))
|
401
|
+
} else {
|
402
|
+
let v = convert_to_timestamp_millis(ruby, value, format)?;
|
403
|
+
Ok(ParquetValue::TimestampMillis(v, None))
|
404
|
+
}
|
361
405
|
}
|
362
406
|
PrimitiveType::TimestampMicros => {
|
363
|
-
|
364
|
-
|
407
|
+
if value.is_kind_of(ruby.class_time()) {
|
408
|
+
use crate::types::timestamp::ruby_time_to_timestamp_with_tz;
|
409
|
+
let (v, tz) = ruby_time_to_timestamp_with_tz(value, "micros")?;
|
410
|
+
Ok(ParquetValue::TimestampMicros(v, tz))
|
411
|
+
} else {
|
412
|
+
let v = convert_to_timestamp_micros(ruby, value, format)?;
|
413
|
+
Ok(ParquetValue::TimestampMicros(v, None))
|
414
|
+
}
|
415
|
+
}
|
416
|
+
PrimitiveType::TimeMillis => {
|
417
|
+
let v = convert_to_time_millis(ruby, value, format)?;
|
418
|
+
Ok(ParquetValue::TimeMillis(v))
|
419
|
+
}
|
420
|
+
PrimitiveType::TimeMicros => {
|
421
|
+
let v = convert_to_time_micros(ruby, value, format)?;
|
422
|
+
Ok(ParquetValue::TimeMicros(v))
|
365
423
|
}
|
366
424
|
},
|
367
425
|
ParquetSchemaType::List(list_field) => {
|
@@ -980,6 +1038,52 @@ impl<'a> TryFrom<ArrayWrapper<'a>> for ParquetValueVec {
|
|
980
1038
|
tz
|
981
1039
|
)
|
982
1040
|
}
|
1041
|
+
DataType::Time32(TimeUnit::Millisecond) => {
|
1042
|
+
let array = downcast_array::<Time32MillisecondArray>(column.array);
|
1043
|
+
Ok(ParquetValueVec(if array.is_nullable() {
|
1044
|
+
array
|
1045
|
+
.values()
|
1046
|
+
.iter()
|
1047
|
+
.enumerate()
|
1048
|
+
.map(|(i, x)| {
|
1049
|
+
if array.is_null(i) {
|
1050
|
+
ParquetValue::Null
|
1051
|
+
} else {
|
1052
|
+
ParquetValue::TimeMillis(*x)
|
1053
|
+
}
|
1054
|
+
})
|
1055
|
+
.collect()
|
1056
|
+
} else {
|
1057
|
+
array
|
1058
|
+
.values()
|
1059
|
+
.iter()
|
1060
|
+
.map(|x| ParquetValue::TimeMillis(*x))
|
1061
|
+
.collect()
|
1062
|
+
}))
|
1063
|
+
}
|
1064
|
+
DataType::Time64(TimeUnit::Microsecond) => {
|
1065
|
+
let array = downcast_array::<Time64MicrosecondArray>(column.array);
|
1066
|
+
Ok(ParquetValueVec(if array.is_nullable() {
|
1067
|
+
array
|
1068
|
+
.values()
|
1069
|
+
.iter()
|
1070
|
+
.enumerate()
|
1071
|
+
.map(|(i, x)| {
|
1072
|
+
if array.is_null(i) {
|
1073
|
+
ParquetValue::Null
|
1074
|
+
} else {
|
1075
|
+
ParquetValue::TimeMicros(*x)
|
1076
|
+
}
|
1077
|
+
})
|
1078
|
+
.collect()
|
1079
|
+
} else {
|
1080
|
+
array
|
1081
|
+
.values()
|
1082
|
+
.iter()
|
1083
|
+
.map(|x| ParquetValue::TimeMicros(*x))
|
1084
|
+
.collect()
|
1085
|
+
}))
|
1086
|
+
}
|
983
1087
|
DataType::Float16 => {
|
984
1088
|
let array = downcast_array::<Float16Array>(column.array);
|
985
1089
|
if array.is_nullable() {
|
@@ -295,6 +295,8 @@ fn parse_primitive_type(s: &str) -> Option<PrimitiveType> {
|
|
295
295
|
"date" | "date32" => Some(PrimitiveType::Date32),
|
296
296
|
"timestamp_millis" | "timestamp_ms" => Some(PrimitiveType::TimestampMillis),
|
297
297
|
"timestamp_micros" | "timestamp_us" => Some(PrimitiveType::TimestampMicros),
|
298
|
+
"time_millis" | "time_ms" => Some(PrimitiveType::TimeMillis),
|
299
|
+
"time_micros" | "time_us" => Some(PrimitiveType::TimeMicros),
|
298
300
|
"decimal" => Some(PrimitiveType::Decimal128(38, 0)), // Maximum precision, scale 0
|
299
301
|
"decimal256" => Some(PrimitiveType::Decimal256(38, 0)), // Maximum precision, scale 0
|
300
302
|
_ => None,
|
@@ -337,6 +339,12 @@ pub fn schema_node_to_arrow_field(node: &SchemaNode) -> ArrowField {
|
|
337
339
|
PrimitiveType::TimestampMicros => {
|
338
340
|
ArrowDataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
|
339
341
|
}
|
342
|
+
PrimitiveType::TimeMillis => {
|
343
|
+
ArrowDataType::Time32(arrow_schema::TimeUnit::Millisecond)
|
344
|
+
}
|
345
|
+
PrimitiveType::TimeMicros => {
|
346
|
+
ArrowDataType::Time64(arrow_schema::TimeUnit::Microsecond)
|
347
|
+
}
|
340
348
|
};
|
341
349
|
ArrowField::new(name, dt, *nullable)
|
342
350
|
}
|