parquet 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/parquet/src/reader/mod.rs +2 -1
- data/ext/parquet/src/reader/parquet_column_reader.rs +15 -127
- data/ext/parquet/src/reader/parquet_row_reader.rs +14 -134
- data/ext/parquet/src/reader/unified/mod.rs +328 -0
- data/ext/parquet/src/types/parquet_value.rs +90 -16
- data/ext/parquet/src/types/record_types.rs +28 -4
- data/ext/parquet/src/types/type_conversion.rs +13 -11
- data/lib/parquet/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2295ee94fe35758ae8e5137070e2206ec1e104aad6b9a0806aa508ad4799247
|
4
|
+
data.tar.gz: 340f86257082bdba22d6ced530ecd1d201c7b4e6d9116eebac41541ba2aaa257
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f333ae2914cdd00468c390e8b3d876aec4e522a546d43ab29db5d777792105a38d2a40c49db0f0afe1e800bf32e54bb4c479441f8f9876937ba59917b444d15a
|
7
|
+
data.tar.gz: da2832c3514729cc0e99e16f70a10bbfc4e9093dc734de55715305121649ebc371dff93a7bb462b97fde27c79ad65cec12c5fa90a47f70bc64153a7fd2ce1a5c
|
@@ -1,6 +1,7 @@
|
|
1
1
|
mod common;
|
2
2
|
mod parquet_column_reader;
|
3
3
|
mod parquet_row_reader;
|
4
|
+
mod unified;
|
4
5
|
use std::{fs::File, rc::Rc};
|
5
6
|
|
6
7
|
use magnus::{value::ReprValue, Error as MagnusError, Ruby, Value};
|
@@ -207,4 +208,4 @@ pub fn parse_metadata(_rb_self: Value, args: &[Value]) -> Result<Value, MagnusEr
|
|
207
208
|
let metadata = reader.finish().map_err(ParquetGemError::Parquet)?;
|
208
209
|
|
209
210
|
Ok(RubyParquetMetaData(metadata).try_into_value_with(&ruby)?)
|
210
|
-
}
|
211
|
+
}
|
@@ -1,21 +1,9 @@
|
|
1
|
-
use crate::
|
2
|
-
use crate::
|
3
|
-
use crate::
|
4
|
-
|
5
|
-
create_column_enumerator, utils::*, ColumnEnumeratorArgs, ColumnRecord, ParquetValueVec,
|
6
|
-
ParserResultType,
|
7
|
-
};
|
8
|
-
use ahash::RandomState;
|
9
|
-
use either::Either;
|
10
|
-
use magnus::IntoValue;
|
1
|
+
use crate::reader::unified::{parse_parquet_unified, ParserType, UnifiedParserArgs};
|
2
|
+
use crate::utils::*;
|
3
|
+
use crate::ParquetGemError;
|
4
|
+
|
11
5
|
use magnus::{Error as MagnusError, Ruby, Value};
|
12
|
-
use std::collections::HashMap;
|
13
6
|
use std::rc::Rc;
|
14
|
-
use std::sync::OnceLock;
|
15
|
-
|
16
|
-
use super::common::{
|
17
|
-
create_batch_reader, handle_block_or_enum, handle_empty_file, open_parquet_source,
|
18
|
-
};
|
19
7
|
|
20
8
|
#[inline]
|
21
9
|
pub fn parse_parquet_columns(rb_self: Value, args: &[Value]) -> Result<Value, MagnusError> {
|
@@ -41,116 +29,16 @@ fn parse_parquet_columns_impl(
|
|
41
29
|
logger,
|
42
30
|
} = parse_parquet_columns_args(&ruby, args)?;
|
43
31
|
|
44
|
-
//
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
// Clone values for the closure to avoid move issues
|
51
|
-
let columns_clone = columns.clone();
|
52
|
-
|
53
|
-
// Handle block or create enumerator
|
54
|
-
if let Some(enum_value) = handle_block_or_enum(&ruby, ruby.block_given(), || {
|
55
|
-
create_column_enumerator(ColumnEnumeratorArgs {
|
56
|
-
rb_self,
|
32
|
+
// Use the unified parsing implementation
|
33
|
+
parse_parquet_unified(
|
34
|
+
ruby,
|
35
|
+
rb_self,
|
36
|
+
UnifiedParserArgs {
|
57
37
|
to_read,
|
58
38
|
result_type,
|
59
|
-
columns
|
60
|
-
batch_size,
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
})? {
|
66
|
-
return Ok(enum_value);
|
67
|
-
}
|
68
|
-
|
69
|
-
let source = open_parquet_source(ruby.clone(), to_read)?;
|
70
|
-
|
71
|
-
// Use the common function to create the batch reader
|
72
|
-
|
73
|
-
let (batch_reader, schema, num_rows) = match source {
|
74
|
-
Either::Left(file) => create_batch_reader(file, &columns, batch_size)?,
|
75
|
-
Either::Right(readable) => create_batch_reader(readable, &columns, batch_size)?,
|
76
|
-
};
|
77
|
-
|
78
|
-
match result_type {
|
79
|
-
ParserResultType::Hash => {
|
80
|
-
// For hash return type, we need to return a hash with column names pointing at empty arrays
|
81
|
-
if handle_empty_file(&ruby, &schema, num_rows)? {
|
82
|
-
return Ok(ruby.qnil().into_value_with(&ruby));
|
83
|
-
}
|
84
|
-
|
85
|
-
let headers = OnceLock::new();
|
86
|
-
let headers_clone = headers.clone();
|
87
|
-
let iter = batch_reader.map(move |batch| {
|
88
|
-
batch.map_err(ParquetGemError::Arrow).and_then(|batch| {
|
89
|
-
let local_headers = headers_clone
|
90
|
-
.get_or_init(|| {
|
91
|
-
let schema = batch.schema();
|
92
|
-
let fields = schema.fields();
|
93
|
-
let mut header_string = Vec::with_capacity(fields.len());
|
94
|
-
for field in fields {
|
95
|
-
header_string.push(field.name().to_owned());
|
96
|
-
}
|
97
|
-
StringCache::intern_many(&header_string)
|
98
|
-
})
|
99
|
-
.as_ref()
|
100
|
-
.map_err(|e| ParquetGemError::HeaderIntern(e.clone()))?;
|
101
|
-
|
102
|
-
let mut map = HashMap::with_capacity_and_hasher(
|
103
|
-
local_headers.len(),
|
104
|
-
RandomState::default(),
|
105
|
-
);
|
106
|
-
|
107
|
-
batch
|
108
|
-
.columns()
|
109
|
-
.iter()
|
110
|
-
.enumerate()
|
111
|
-
.try_for_each(|(i, column)| {
|
112
|
-
let header = local_headers[i];
|
113
|
-
let values = ParquetValueVec::try_from(ArrayWrapper {
|
114
|
-
array: column,
|
115
|
-
strict,
|
116
|
-
})?;
|
117
|
-
map.insert(header, values.into_inner());
|
118
|
-
Ok::<_, ParquetGemError>(())
|
119
|
-
})?;
|
120
|
-
|
121
|
-
Ok(ColumnRecord::Map::<RandomState>(map))
|
122
|
-
})
|
123
|
-
});
|
124
|
-
|
125
|
-
for result in iter {
|
126
|
-
let record = result?;
|
127
|
-
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
128
|
-
}
|
129
|
-
}
|
130
|
-
ParserResultType::Array => {
|
131
|
-
let iter = batch_reader.map(|batch| {
|
132
|
-
batch.map_err(ParquetGemError::Arrow).and_then(|batch| {
|
133
|
-
let vec = batch
|
134
|
-
.columns()
|
135
|
-
.iter()
|
136
|
-
.map(|column| {
|
137
|
-
let values = ParquetValueVec::try_from(ArrayWrapper {
|
138
|
-
array: column,
|
139
|
-
strict,
|
140
|
-
})?;
|
141
|
-
Ok::<_, ParquetGemError>(values.into_inner())
|
142
|
-
})
|
143
|
-
.collect::<Result<Vec<_>, _>>()?;
|
144
|
-
Ok(ColumnRecord::Vec::<RandomState>(vec))
|
145
|
-
})
|
146
|
-
});
|
147
|
-
|
148
|
-
for result in iter {
|
149
|
-
let record = result?;
|
150
|
-
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
151
|
-
}
|
152
|
-
}
|
153
|
-
}
|
154
|
-
|
155
|
-
Ok(ruby.qnil().into_value_with(&ruby))
|
156
|
-
}
|
39
|
+
columns,
|
40
|
+
parser_type: ParserType::Column { batch_size, strict },
|
41
|
+
logger,
|
42
|
+
},
|
43
|
+
)
|
44
|
+
}
|
@@ -1,22 +1,9 @@
|
|
1
|
-
use crate::
|
2
|
-
use crate::
|
3
|
-
use crate::
|
4
|
-
|
5
|
-
create_row_enumerator, utils::*, ParquetField, ParquetGemError, ParserResultType,
|
6
|
-
RowEnumeratorArgs, RowRecord,
|
7
|
-
};
|
8
|
-
use ahash::RandomState;
|
9
|
-
use either::Either;
|
10
|
-
use magnus::IntoValue;
|
1
|
+
use crate::reader::unified::{parse_parquet_unified, ParserType, UnifiedParserArgs};
|
2
|
+
use crate::utils::*;
|
3
|
+
use crate::ParquetGemError;
|
4
|
+
|
11
5
|
use magnus::{Error as MagnusError, Ruby, Value};
|
12
|
-
use parquet::file::reader::{FileReader, SerializedFileReader};
|
13
|
-
use parquet::record::reader::RowIter as ParquetRowIter;
|
14
|
-
use parquet::schema::types::{Type as SchemaType, TypePtr};
|
15
|
-
use std::collections::HashMap;
|
16
6
|
use std::rc::Rc;
|
17
|
-
use std::sync::OnceLock;
|
18
|
-
|
19
|
-
use super::common::{handle_block_or_enum, open_parquet_source};
|
20
7
|
|
21
8
|
#[inline]
|
22
9
|
pub fn parse_parquet_rows(rb_self: Value, args: &[Value]) -> Result<Value, MagnusError> {
|
@@ -41,123 +28,16 @@ fn parse_parquet_rows_impl(
|
|
41
28
|
logger,
|
42
29
|
} = parse_parquet_rows_args(&ruby, args)?;
|
43
30
|
|
44
|
-
//
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
// Handle block or create enumerator
|
51
|
-
if let Some(enum_value) = handle_block_or_enum(&ruby, ruby.block_given(), || {
|
52
|
-
create_row_enumerator(RowEnumeratorArgs {
|
53
|
-
rb_self,
|
31
|
+
// Use the unified parsing implementation
|
32
|
+
parse_parquet_unified(
|
33
|
+
ruby,
|
34
|
+
rb_self,
|
35
|
+
UnifiedParserArgs {
|
54
36
|
to_read,
|
55
37
|
result_type,
|
56
|
-
columns
|
57
|
-
strict,
|
38
|
+
columns,
|
39
|
+
parser_type: ParserType::Row { strict },
|
58
40
|
logger,
|
59
|
-
}
|
60
|
-
|
61
|
-
|
62
|
-
return Ok(enum_value);
|
63
|
-
}
|
64
|
-
|
65
|
-
let source = open_parquet_source(ruby.clone(), to_read)?;
|
66
|
-
let reader: Box<dyn FileReader> = match source {
|
67
|
-
Either::Left(file) => {
|
68
|
-
Box::new(SerializedFileReader::new(file).map_err(ParquetGemError::from)?)
|
69
|
-
}
|
70
|
-
Either::Right(readable) => {
|
71
|
-
Box::new(SerializedFileReader::new(readable).map_err(ParquetGemError::from)?)
|
72
|
-
}
|
73
|
-
};
|
74
|
-
|
75
|
-
let schema = reader.metadata().file_metadata().schema().clone();
|
76
|
-
ruby_logger.debug(|| format!("Schema loaded: {:?}", schema))?;
|
77
|
-
|
78
|
-
let mut iter = ParquetRowIter::from_file_into(reader);
|
79
|
-
if let Some(cols) = columns {
|
80
|
-
ruby_logger.debug(|| format!("Projecting columns: {:?}", cols))?;
|
81
|
-
let projection = create_projection_schema(&schema, &cols);
|
82
|
-
iter = iter.project(Some(projection.to_owned())).map_err(|e| {
|
83
|
-
MagnusError::new(
|
84
|
-
ruby.exception_runtime_error(),
|
85
|
-
format!("Failed to create projection: {}", e),
|
86
|
-
)
|
87
|
-
})?;
|
88
|
-
}
|
89
|
-
|
90
|
-
match result_type {
|
91
|
-
ParserResultType::Hash => {
|
92
|
-
let headers = OnceLock::new();
|
93
|
-
let headers_clone = headers.clone();
|
94
|
-
let iter = iter.map(move |row| {
|
95
|
-
row.map(|row| {
|
96
|
-
let headers = headers_clone.get_or_init(|| {
|
97
|
-
let column_count = row.get_column_iter().count();
|
98
|
-
|
99
|
-
let mut header_string = Vec::with_capacity(column_count);
|
100
|
-
for (k, _) in row.get_column_iter() {
|
101
|
-
header_string.push(k.to_owned());
|
102
|
-
}
|
103
|
-
|
104
|
-
StringCache::intern_many(&header_string).expect("Failed to intern headers")
|
105
|
-
});
|
106
|
-
|
107
|
-
let mut map =
|
108
|
-
HashMap::with_capacity_and_hasher(headers.len(), RandomState::default());
|
109
|
-
for (i, (_, v)) in row.get_column_iter().enumerate() {
|
110
|
-
map.insert(headers[i], ParquetField(v.clone(), strict));
|
111
|
-
}
|
112
|
-
map
|
113
|
-
})
|
114
|
-
.map(RowRecord::Map::<RandomState>)
|
115
|
-
.map_err(ParquetGemError::from)
|
116
|
-
});
|
117
|
-
|
118
|
-
for result in iter {
|
119
|
-
let record = result?;
|
120
|
-
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
121
|
-
}
|
122
|
-
}
|
123
|
-
ParserResultType::Array => {
|
124
|
-
let iter = iter.map(|row| {
|
125
|
-
row.map(|row| {
|
126
|
-
let column_count = row.get_column_iter().count();
|
127
|
-
let mut vec = Vec::with_capacity(column_count);
|
128
|
-
for (_, v) in row.get_column_iter() {
|
129
|
-
vec.push(ParquetField(v.clone(), strict));
|
130
|
-
}
|
131
|
-
vec
|
132
|
-
})
|
133
|
-
.map(RowRecord::Vec::<RandomState>)
|
134
|
-
.map_err(ParquetGemError::from)
|
135
|
-
});
|
136
|
-
|
137
|
-
for result in iter {
|
138
|
-
let record = result?;
|
139
|
-
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
140
|
-
}
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
Ok(ruby.qnil().into_value_with(&ruby))
|
145
|
-
}
|
146
|
-
|
147
|
-
fn create_projection_schema(schema: &SchemaType, columns: &[String]) -> SchemaType {
|
148
|
-
if let SchemaType::GroupType { fields, .. } = schema {
|
149
|
-
let projected_fields: Vec<TypePtr> = fields
|
150
|
-
.iter()
|
151
|
-
.filter(|field| columns.contains(&field.name().to_string()))
|
152
|
-
.cloned()
|
153
|
-
.collect();
|
154
|
-
|
155
|
-
SchemaType::GroupType {
|
156
|
-
basic_info: schema.get_basic_info().clone(),
|
157
|
-
fields: projected_fields,
|
158
|
-
}
|
159
|
-
} else {
|
160
|
-
// Return original schema if not a group type
|
161
|
-
schema.clone()
|
162
|
-
}
|
163
|
-
}
|
41
|
+
},
|
42
|
+
)
|
43
|
+
}
|
@@ -0,0 +1,328 @@
|
|
1
|
+
use crate::header_cache::StringCache;
|
2
|
+
use crate::logger::RubyLogger;
|
3
|
+
use crate::types::TryIntoValue;
|
4
|
+
use crate::{
|
5
|
+
create_column_enumerator, create_row_enumerator, ParquetField, ParquetGemError,
|
6
|
+
ParserResultType, ColumnEnumeratorArgs, RowEnumeratorArgs, RowRecord, ColumnRecord, ParquetValueVec,
|
7
|
+
};
|
8
|
+
use ahash::RandomState;
|
9
|
+
use either::Either;
|
10
|
+
use magnus::IntoValue;
|
11
|
+
use magnus::{Error as MagnusError, Ruby, Value};
|
12
|
+
use std::collections::HashMap;
|
13
|
+
use std::rc::Rc;
|
14
|
+
use std::sync::OnceLock;
|
15
|
+
|
16
|
+
use crate::types::ArrayWrapper;
|
17
|
+
use super::common::{
|
18
|
+
create_batch_reader, handle_block_or_enum, handle_empty_file, open_parquet_source,
|
19
|
+
};
|
20
|
+
|
21
|
+
/// A unified parser configuration that can be used for both row and column parsing
|
22
|
+
pub enum ParserType {
|
23
|
+
Row {
|
24
|
+
strict: bool,
|
25
|
+
},
|
26
|
+
Column {
|
27
|
+
batch_size: Option<usize>,
|
28
|
+
strict: bool,
|
29
|
+
},
|
30
|
+
}
|
31
|
+
|
32
|
+
/// Unified parser arguments structure
|
33
|
+
pub struct UnifiedParserArgs {
|
34
|
+
pub to_read: Value,
|
35
|
+
pub result_type: ParserResultType,
|
36
|
+
pub columns: Option<Vec<String>>,
|
37
|
+
pub parser_type: ParserType,
|
38
|
+
pub logger: Option<Value>,
|
39
|
+
}
|
40
|
+
|
41
|
+
/// Unified implementation for parsing Parquet data (both rows and columns)
|
42
|
+
pub fn parse_parquet_unified(
|
43
|
+
ruby: Rc<Ruby>,
|
44
|
+
rb_self: Value,
|
45
|
+
args: UnifiedParserArgs,
|
46
|
+
) -> Result<Value, ParquetGemError> {
|
47
|
+
let UnifiedParserArgs {
|
48
|
+
to_read,
|
49
|
+
result_type,
|
50
|
+
columns,
|
51
|
+
parser_type,
|
52
|
+
logger,
|
53
|
+
} = args;
|
54
|
+
|
55
|
+
// Initialize the logger if provided
|
56
|
+
let ruby_logger = RubyLogger::new(&ruby, logger.clone())?;
|
57
|
+
|
58
|
+
// Clone values for the closure to avoid move issues
|
59
|
+
let columns_clone = columns.clone();
|
60
|
+
|
61
|
+
// Determine if we're handling rows or columns for enumerator creation
|
62
|
+
match &parser_type {
|
63
|
+
ParserType::Row { strict } => {
|
64
|
+
// Handle block or create row enumerator
|
65
|
+
if let Some(enum_value) = handle_block_or_enum(&ruby, ruby.block_given(), || {
|
66
|
+
create_row_enumerator(RowEnumeratorArgs {
|
67
|
+
rb_self,
|
68
|
+
to_read,
|
69
|
+
result_type,
|
70
|
+
columns: columns_clone,
|
71
|
+
strict: *strict,
|
72
|
+
logger,
|
73
|
+
})
|
74
|
+
.map(|yield_enum| yield_enum.into_value_with(&ruby))
|
75
|
+
})? {
|
76
|
+
return Ok(enum_value);
|
77
|
+
}
|
78
|
+
},
|
79
|
+
ParserType::Column { batch_size, strict } => {
|
80
|
+
// For column-based parsing, log the batch size if present
|
81
|
+
if let Some(ref bs) = batch_size {
|
82
|
+
ruby_logger.debug(|| format!("Using batch size: {}", bs))?;
|
83
|
+
}
|
84
|
+
|
85
|
+
// Handle block or create column enumerator
|
86
|
+
if let Some(enum_value) = handle_block_or_enum(&ruby, ruby.block_given(), || {
|
87
|
+
create_column_enumerator(ColumnEnumeratorArgs {
|
88
|
+
rb_self,
|
89
|
+
to_read,
|
90
|
+
result_type,
|
91
|
+
columns: columns_clone,
|
92
|
+
batch_size: *batch_size,
|
93
|
+
strict: *strict,
|
94
|
+
logger: logger.as_ref().map(|_| to_read),
|
95
|
+
})
|
96
|
+
.map(|yield_enum| yield_enum.into_value_with(&ruby))
|
97
|
+
})? {
|
98
|
+
return Ok(enum_value);
|
99
|
+
}
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
// Open the Parquet source
|
104
|
+
let source = open_parquet_source(ruby.clone(), to_read)?;
|
105
|
+
|
106
|
+
// Based on the parser type, handle the data differently
|
107
|
+
match parser_type {
|
108
|
+
ParserType::Row { strict } => {
|
109
|
+
// Handle row-based parsing
|
110
|
+
process_row_data(ruby.clone(), source, &columns, result_type, strict, &ruby_logger)?;
|
111
|
+
},
|
112
|
+
ParserType::Column { batch_size, strict } => {
|
113
|
+
// Handle column-based parsing
|
114
|
+
process_column_data(ruby.clone(), source, &columns, result_type, batch_size, strict, &ruby_logger)?;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
Ok(ruby.qnil().into_value_with(&ruby))
|
119
|
+
}
|
120
|
+
|
121
|
+
/// Process row-based Parquet data
|
122
|
+
fn process_row_data(
|
123
|
+
ruby: Rc<Ruby>,
|
124
|
+
source: Either<std::fs::File, crate::ruby_reader::ThreadSafeRubyReader>,
|
125
|
+
columns: &Option<Vec<String>>,
|
126
|
+
result_type: ParserResultType,
|
127
|
+
strict: bool,
|
128
|
+
ruby_logger: &RubyLogger,
|
129
|
+
) -> Result<(), ParquetGemError> {
|
130
|
+
use parquet::file::reader::{FileReader, SerializedFileReader};
|
131
|
+
use parquet::record::reader::RowIter as ParquetRowIter;
|
132
|
+
|
133
|
+
// Create the row-based reader
|
134
|
+
let reader: Box<dyn FileReader> = match source {
|
135
|
+
Either::Left(file) => {
|
136
|
+
Box::new(SerializedFileReader::new(file).map_err(ParquetGemError::from)?)
|
137
|
+
}
|
138
|
+
Either::Right(readable) => {
|
139
|
+
Box::new(SerializedFileReader::new(readable).map_err(ParquetGemError::from)?)
|
140
|
+
}
|
141
|
+
};
|
142
|
+
|
143
|
+
let schema = reader.metadata().file_metadata().schema().clone();
|
144
|
+
ruby_logger.debug(|| format!("Schema loaded: {:?}", schema))?;
|
145
|
+
|
146
|
+
let mut iter = ParquetRowIter::from_file_into(reader);
|
147
|
+
if let Some(cols) = columns {
|
148
|
+
ruby_logger.debug(|| format!("Projecting columns: {:?}", cols))?;
|
149
|
+
let projection = create_projection_schema(&schema, cols);
|
150
|
+
iter = iter.project(Some(projection.to_owned())).map_err(|e| {
|
151
|
+
MagnusError::new(
|
152
|
+
ruby.exception_runtime_error(),
|
153
|
+
format!("Failed to create projection: {}", e),
|
154
|
+
)
|
155
|
+
})?;
|
156
|
+
}
|
157
|
+
|
158
|
+
match result_type {
|
159
|
+
ParserResultType::Hash => {
|
160
|
+
let headers = OnceLock::new();
|
161
|
+
let headers_clone = headers.clone();
|
162
|
+
let iter = iter.map(move |row| {
|
163
|
+
row.map(|row| {
|
164
|
+
let headers = headers_clone.get_or_init(|| {
|
165
|
+
let column_count = row.get_column_iter().count();
|
166
|
+
|
167
|
+
let mut header_string = Vec::with_capacity(column_count);
|
168
|
+
for (k, _) in row.get_column_iter() {
|
169
|
+
header_string.push(k.to_owned());
|
170
|
+
}
|
171
|
+
|
172
|
+
StringCache::intern_many(&header_string).expect("Failed to intern headers")
|
173
|
+
});
|
174
|
+
|
175
|
+
let mut map =
|
176
|
+
HashMap::with_capacity_and_hasher(headers.len(), RandomState::default());
|
177
|
+
for (i, (_, v)) in row.get_column_iter().enumerate() {
|
178
|
+
map.insert(headers[i], ParquetField(v.clone(), strict));
|
179
|
+
}
|
180
|
+
map
|
181
|
+
})
|
182
|
+
.map(RowRecord::Map::<RandomState>)
|
183
|
+
.map_err(ParquetGemError::from)
|
184
|
+
});
|
185
|
+
|
186
|
+
for result in iter {
|
187
|
+
let record = result?;
|
188
|
+
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
189
|
+
}
|
190
|
+
}
|
191
|
+
ParserResultType::Array => {
|
192
|
+
let iter = iter.map(|row| {
|
193
|
+
row.map(|row| {
|
194
|
+
let column_count = row.get_column_iter().count();
|
195
|
+
let mut vec = Vec::with_capacity(column_count);
|
196
|
+
for (_, v) in row.get_column_iter() {
|
197
|
+
vec.push(ParquetField(v.clone(), strict));
|
198
|
+
}
|
199
|
+
vec
|
200
|
+
})
|
201
|
+
.map(RowRecord::Vec::<RandomState>)
|
202
|
+
.map_err(ParquetGemError::from)
|
203
|
+
});
|
204
|
+
|
205
|
+
for result in iter {
|
206
|
+
let record = result?;
|
207
|
+
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
208
|
+
}
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
Ok(())
|
213
|
+
}
|
214
|
+
|
215
|
+
/// Process column-based Parquet data
|
216
|
+
fn process_column_data(
|
217
|
+
ruby: Rc<Ruby>,
|
218
|
+
source: Either<std::fs::File, crate::ruby_reader::ThreadSafeRubyReader>,
|
219
|
+
columns: &Option<Vec<String>>,
|
220
|
+
result_type: ParserResultType,
|
221
|
+
batch_size: Option<usize>,
|
222
|
+
strict: bool,
|
223
|
+
_ruby_logger: &RubyLogger,
|
224
|
+
) -> Result<(), ParquetGemError> {
|
225
|
+
// Create the batch reader
|
226
|
+
let (batch_reader, schema, num_rows) = match source {
|
227
|
+
Either::Left(file) => create_batch_reader(file, columns, batch_size)?,
|
228
|
+
Either::Right(readable) => create_batch_reader(readable, columns, batch_size)?,
|
229
|
+
};
|
230
|
+
|
231
|
+
match result_type {
|
232
|
+
ParserResultType::Hash => {
|
233
|
+
// For hash return type, we need to return a hash with column names pointing at empty arrays
|
234
|
+
if handle_empty_file(&ruby, &schema, num_rows)? {
|
235
|
+
return Ok(());
|
236
|
+
}
|
237
|
+
|
238
|
+
let headers = OnceLock::new();
|
239
|
+
let headers_clone = headers.clone();
|
240
|
+
let iter = batch_reader.map(move |batch| {
|
241
|
+
batch.map_err(ParquetGemError::Arrow).and_then(|batch| {
|
242
|
+
let local_headers = headers_clone
|
243
|
+
.get_or_init(|| {
|
244
|
+
let schema = batch.schema();
|
245
|
+
let fields = schema.fields();
|
246
|
+
let mut header_string = Vec::with_capacity(fields.len());
|
247
|
+
for field in fields {
|
248
|
+
header_string.push(field.name().to_owned());
|
249
|
+
}
|
250
|
+
StringCache::intern_many(&header_string)
|
251
|
+
})
|
252
|
+
.as_ref()
|
253
|
+
.map_err(|e| ParquetGemError::HeaderIntern(e.clone()))?;
|
254
|
+
|
255
|
+
let mut map = HashMap::with_capacity_and_hasher(
|
256
|
+
local_headers.len(),
|
257
|
+
RandomState::default(),
|
258
|
+
);
|
259
|
+
|
260
|
+
batch
|
261
|
+
.columns()
|
262
|
+
.iter()
|
263
|
+
.enumerate()
|
264
|
+
.try_for_each(|(i, column)| {
|
265
|
+
let header = local_headers[i];
|
266
|
+
let values = ParquetValueVec::try_from(ArrayWrapper {
|
267
|
+
array: column,
|
268
|
+
strict,
|
269
|
+
})?;
|
270
|
+
map.insert(header, values.into_inner());
|
271
|
+
Ok::<_, ParquetGemError>(())
|
272
|
+
})?;
|
273
|
+
|
274
|
+
Ok(ColumnRecord::Map::<RandomState>(map))
|
275
|
+
})
|
276
|
+
});
|
277
|
+
|
278
|
+
for result in iter {
|
279
|
+
let record = result?;
|
280
|
+
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
281
|
+
}
|
282
|
+
}
|
283
|
+
ParserResultType::Array => {
|
284
|
+
let iter = batch_reader.map(|batch| {
|
285
|
+
batch.map_err(ParquetGemError::Arrow).and_then(|batch| {
|
286
|
+
let vec = batch
|
287
|
+
.columns()
|
288
|
+
.iter()
|
289
|
+
.map(|column| {
|
290
|
+
let values = ParquetValueVec::try_from(ArrayWrapper {
|
291
|
+
array: column,
|
292
|
+
strict,
|
293
|
+
})?;
|
294
|
+
Ok::<_, ParquetGemError>(values.into_inner())
|
295
|
+
})
|
296
|
+
.collect::<Result<Vec<_>, _>>()?;
|
297
|
+
Ok(ColumnRecord::Vec::<RandomState>(vec))
|
298
|
+
})
|
299
|
+
});
|
300
|
+
|
301
|
+
for result in iter {
|
302
|
+
let record = result?;
|
303
|
+
let _: Value = ruby.yield_value(record.try_into_value_with(&ruby)?)?;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
}
|
307
|
+
|
308
|
+
Ok(())
|
309
|
+
}
|
310
|
+
|
311
|
+
/// Helper function to create a projection schema
|
312
|
+
fn create_projection_schema(schema: &parquet::schema::types::Type, columns: &[String]) -> parquet::schema::types::Type {
|
313
|
+
if let parquet::schema::types::Type::GroupType { fields, .. } = schema {
|
314
|
+
let projected_fields: Vec<std::sync::Arc<parquet::schema::types::Type>> = fields
|
315
|
+
.iter()
|
316
|
+
.filter(|field| columns.contains(&field.name().to_string()))
|
317
|
+
.cloned()
|
318
|
+
.collect();
|
319
|
+
|
320
|
+
parquet::schema::types::Type::GroupType {
|
321
|
+
basic_info: schema.get_basic_info().clone(),
|
322
|
+
fields: projected_fields,
|
323
|
+
}
|
324
|
+
} else {
|
325
|
+
// Return original schema if not a group type
|
326
|
+
schema.clone()
|
327
|
+
}
|
328
|
+
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
use crate::{impl_date_conversion, impl_timestamp_array_conversion, impl_timestamp_conversion};
|
2
2
|
|
3
3
|
use super::*;
|
4
|
+
use super::record_types::format_decimal_with_i8_scale;
|
4
5
|
use arrow_array::MapArray;
|
5
6
|
use magnus::{RArray, RString};
|
6
7
|
|
@@ -22,7 +23,7 @@ pub enum ParquetValue {
|
|
22
23
|
Bytes(Vec<u8>),
|
23
24
|
Date32(i32),
|
24
25
|
Date64(i64),
|
25
|
-
Decimal128(i128),
|
26
|
+
Decimal128(i128, i8),
|
26
27
|
TimestampSecond(i64, Option<Arc<str>>),
|
27
28
|
TimestampMillis(i64, Option<Arc<str>>),
|
28
29
|
TimestampMicros(i64, Option<Arc<str>>),
|
@@ -52,7 +53,47 @@ impl PartialEq for ParquetValue {
|
|
52
53
|
(ParquetValue::Bytes(a), ParquetValue::Bytes(b)) => a == b,
|
53
54
|
(ParquetValue::Date32(a), ParquetValue::Date32(b)) => a == b,
|
54
55
|
(ParquetValue::Date64(a), ParquetValue::Date64(b)) => a == b,
|
55
|
-
(ParquetValue::Decimal128(a), ParquetValue::Decimal128(b)) =>
|
56
|
+
(ParquetValue::Decimal128(a, scale_a), ParquetValue::Decimal128(b, scale_b)) => {
|
57
|
+
if scale_a == scale_b {
|
58
|
+
// Same scale, compare directly
|
59
|
+
a == b
|
60
|
+
} else {
|
61
|
+
// Different scales, need to adjust for proper comparison
|
62
|
+
let mut a_val = *a;
|
63
|
+
let mut b_val = *b;
|
64
|
+
|
65
|
+
// Adjust to the same scale for proper comparison
|
66
|
+
if scale_a < scale_b {
|
67
|
+
// Scale up a to match b's scale
|
68
|
+
let scale_diff = (*scale_b - *scale_a) as u32;
|
69
|
+
if scale_diff <= 38 {
|
70
|
+
// Limit to avoid overflow
|
71
|
+
a_val *= 10_i128.pow(scale_diff);
|
72
|
+
} else {
|
73
|
+
// For large scale differences, use BigInt for the comparison
|
74
|
+
let a_big = num::BigInt::from(*a)
|
75
|
+
* num::BigInt::from(10_i128.pow(scale_diff.min(38)));
|
76
|
+
let b_big = num::BigInt::from(*b);
|
77
|
+
return a_big == b_big;
|
78
|
+
}
|
79
|
+
} else {
|
80
|
+
// Scale up b to match a's scale
|
81
|
+
let scale_diff = (*scale_a - *scale_b) as u32;
|
82
|
+
if scale_diff <= 38 {
|
83
|
+
// Limit to avoid overflow
|
84
|
+
b_val *= 10_i128.pow(scale_diff);
|
85
|
+
} else {
|
86
|
+
// For large scale differences, use BigInt for the comparison
|
87
|
+
let a_big = num::BigInt::from(*a);
|
88
|
+
let b_big = num::BigInt::from(*b)
|
89
|
+
* num::BigInt::from(10_i128.pow(scale_diff.min(38)));
|
90
|
+
return a_big == b_big;
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
a_val == b_val
|
95
|
+
}
|
96
|
+
}
|
56
97
|
(ParquetValue::TimestampSecond(a, _), ParquetValue::TimestampSecond(b, _)) => a == b,
|
57
98
|
(ParquetValue::TimestampMillis(a, _), ParquetValue::TimestampMillis(b, _)) => a == b,
|
58
99
|
(ParquetValue::TimestampMicros(a, _), ParquetValue::TimestampMicros(b, _)) => a == b,
|
@@ -85,7 +126,10 @@ impl std::hash::Hash for ParquetValue {
|
|
85
126
|
ParquetValue::Bytes(b) => b.hash(state),
|
86
127
|
ParquetValue::Date32(d) => d.hash(state),
|
87
128
|
ParquetValue::Date64(d) => d.hash(state),
|
88
|
-
ParquetValue::Decimal128(d) =>
|
129
|
+
ParquetValue::Decimal128(d, scale) => {
|
130
|
+
d.hash(state);
|
131
|
+
scale.hash(state);
|
132
|
+
}
|
89
133
|
ParquetValue::TimestampSecond(ts, tz) => {
|
90
134
|
ts.hash(state);
|
91
135
|
tz.hash(state);
|
@@ -131,7 +175,16 @@ impl TryIntoValue for ParquetValue {
|
|
131
175
|
ParquetValue::Boolean(b) => Ok(b.into_value_with(handle)),
|
132
176
|
ParquetValue::String(s) => Ok(s.into_value_with(handle)),
|
133
177
|
ParquetValue::Bytes(b) => Ok(handle.str_from_slice(&b).as_value()),
|
134
|
-
ParquetValue::Decimal128(d) =>
|
178
|
+
ParquetValue::Decimal128(d, scale) => {
|
179
|
+
// Load the bigdecimal gem if it's not already loaded
|
180
|
+
LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
|
181
|
+
|
182
|
+
// Format with proper scaling based on the sign of scale
|
183
|
+
let value = format_decimal_with_i8_scale(d, scale);
|
184
|
+
|
185
|
+
let kernel = handle.module_kernel();
|
186
|
+
Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
|
187
|
+
}
|
135
188
|
ParquetValue::Date32(d) => impl_date_conversion!(d, handle),
|
136
189
|
ParquetValue::Date64(d) => impl_date_conversion!(d, handle),
|
137
190
|
timestamp @ ParquetValue::TimestampSecond(_, _) => {
|
@@ -375,7 +428,7 @@ impl ParquetValue {
|
|
375
428
|
/// Unified helper to parse a decimal string and apply scaling
|
376
429
|
fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, MagnusError> {
|
377
430
|
let s = input_str.trim();
|
378
|
-
|
431
|
+
|
379
432
|
// 1. Handle scientific notation case (e.g., "0.12345e3")
|
380
433
|
if let Some(e_pos) = s.to_lowercase().find('e') {
|
381
434
|
let base = &s[0..e_pos];
|
@@ -385,7 +438,10 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
385
438
|
let exp_val = exp.parse::<i32>().map_err(|e| {
|
386
439
|
MagnusError::new(
|
387
440
|
magnus::exception::type_error(),
|
388
|
-
format!(
|
441
|
+
format!(
|
442
|
+
"Failed to parse exponent '{}' in decimal string '{}': {}",
|
443
|
+
exp, s, e
|
444
|
+
),
|
389
445
|
)
|
390
446
|
})?;
|
391
447
|
|
@@ -407,7 +463,10 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
407
463
|
let base_val = base_without_point.parse::<i128>().map_err(|e| {
|
408
464
|
MagnusError::new(
|
409
465
|
magnus::exception::type_error(),
|
410
|
-
format!(
|
466
|
+
format!(
|
467
|
+
"Failed to parse base '{}' in scientific notation '{}': {}",
|
468
|
+
base, s, e
|
469
|
+
),
|
411
470
|
)
|
412
471
|
})?;
|
413
472
|
|
@@ -417,7 +476,10 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
417
476
|
let base_val = base.parse::<i128>().map_err(|e| {
|
418
477
|
MagnusError::new(
|
419
478
|
magnus::exception::type_error(),
|
420
|
-
format!(
|
479
|
+
format!(
|
480
|
+
"Failed to parse base '{}' in scientific notation '{}': {}",
|
481
|
+
base, s, e
|
482
|
+
),
|
421
483
|
)
|
422
484
|
})?;
|
423
485
|
|
@@ -466,7 +528,10 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
466
528
|
let v = s_without_point.parse::<i128>().map_err(|e| {
|
467
529
|
MagnusError::new(
|
468
530
|
magnus::exception::type_error(),
|
469
|
-
format!(
|
531
|
+
format!(
|
532
|
+
"Failed to parse decimal string '{}' (without decimal point: '{}'): {}",
|
533
|
+
s, s_without_point, e
|
534
|
+
),
|
470
535
|
)
|
471
536
|
})?;
|
472
537
|
|
@@ -497,7 +562,7 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
497
562
|
}
|
498
563
|
std::cmp::Ordering::Equal => Ok(v),
|
499
564
|
}
|
500
|
-
}
|
565
|
+
}
|
501
566
|
// 3. Plain integer value (e.g., "12345")
|
502
567
|
else {
|
503
568
|
// No decimal point, parse as i128 and scale appropriately
|
@@ -512,12 +577,18 @@ fn parse_decimal_string(input_str: &str, input_scale: i8) -> Result<i128, Magnus
|
|
512
577
|
if input_scale > 38 {
|
513
578
|
return Err(MagnusError::new(
|
514
579
|
magnus::exception::range_error(),
|
515
|
-
format!(
|
580
|
+
format!(
|
581
|
+
"Scale {} is too large for decimal value '{}'. Must be ≤ 38.",
|
582
|
+
input_scale, s
|
583
|
+
),
|
516
584
|
));
|
517
585
|
} else if input_scale < -38 {
|
518
586
|
return Err(MagnusError::new(
|
519
587
|
magnus::exception::range_error(),
|
520
|
-
format!(
|
588
|
+
format!(
|
589
|
+
"Scale {} is too small for decimal value '{}'. Must be ≥ -38.",
|
590
|
+
input_scale, s
|
591
|
+
),
|
521
592
|
));
|
522
593
|
}
|
523
594
|
|
@@ -540,14 +611,17 @@ fn convert_to_decimal128(value: Value, scale: i8) -> Result<ParquetValue, Magnus
|
|
540
611
|
} else {
|
541
612
|
value.to_r_string()?.to_string()?
|
542
613
|
};
|
543
|
-
|
614
|
+
|
544
615
|
// Use our unified parser to convert the string to a decimal value with scaling
|
545
616
|
match parse_decimal_string(&s, scale) {
|
546
|
-
Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value)),
|
617
|
+
Ok(decimal_value) => Ok(ParquetValue::Decimal128(decimal_value, scale)),
|
547
618
|
Err(e) => Err(MagnusError::new(
|
548
619
|
magnus::exception::type_error(),
|
549
|
-
format!(
|
550
|
-
|
620
|
+
format!(
|
621
|
+
"Failed to convert '{}' to decimal with scale {}: {}",
|
622
|
+
s, scale, e
|
623
|
+
),
|
624
|
+
)),
|
551
625
|
}
|
552
626
|
}
|
553
627
|
|
@@ -5,7 +5,31 @@ use parquet::data_type::AsBytes;
|
|
5
5
|
|
6
6
|
use super::*;
|
7
7
|
|
8
|
-
static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
|
8
|
+
pub static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
|
9
|
+
|
10
|
+
/// Format decimal value with appropriate scale for BigDecimal conversion
|
11
|
+
/// Handles positive and negative scales correctly for i8 scale
|
12
|
+
pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -> String {
|
13
|
+
if scale >= 0 {
|
14
|
+
// Positive scale means divide (move decimal point left)
|
15
|
+
format!("{}e-{}", value, scale)
|
16
|
+
} else {
|
17
|
+
// Negative scale means multiply (move decimal point right)
|
18
|
+
format!("{}e{}", value, -scale)
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
/// Format decimal value with appropriate scale for BigDecimal conversion
|
23
|
+
/// Handles positive and negative scales correctly for i32 scale
|
24
|
+
pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
|
25
|
+
if scale >= 0 {
|
26
|
+
// Positive scale means divide (move decimal point left)
|
27
|
+
format!("{}e-{}", value, scale)
|
28
|
+
} else {
|
29
|
+
// Negative scale means multiply (move decimal point right)
|
30
|
+
format!("{}e{}", value, -scale)
|
31
|
+
}
|
32
|
+
}
|
9
33
|
|
10
34
|
#[derive(Debug)]
|
11
35
|
pub enum RowRecord<S: BuildHasher + Default> {
|
@@ -207,17 +231,17 @@ impl TryIntoValue for ParquetField {
|
|
207
231
|
let value = match d {
|
208
232
|
Decimal::Int32 { value, scale, .. } => {
|
209
233
|
let unscaled = i32::from_be_bytes(value);
|
210
|
-
|
234
|
+
format_decimal_with_i32_scale(unscaled, scale)
|
211
235
|
}
|
212
236
|
Decimal::Int64 { value, scale, .. } => {
|
213
237
|
let unscaled = i64::from_be_bytes(value);
|
214
|
-
|
238
|
+
format_decimal_with_i32_scale(unscaled, scale)
|
215
239
|
}
|
216
240
|
Decimal::Bytes { value, scale, .. } => {
|
217
241
|
// value is a byte array containing the bytes for an i128 value in big endian order
|
218
242
|
let casted = value.as_bytes()[..16].try_into()?;
|
219
243
|
let unscaled = i128::from_be_bytes(casted);
|
220
|
-
|
244
|
+
format_decimal_with_i32_scale(unscaled, scale)
|
221
245
|
}
|
222
246
|
};
|
223
247
|
|
@@ -2,8 +2,8 @@ use std::str::FromStr;
|
|
2
2
|
use std::sync::Arc;
|
3
3
|
|
4
4
|
use super::*;
|
5
|
-
use arrow_array::builder::*;
|
6
5
|
use arrow_array::builder::MapFieldNames;
|
6
|
+
use arrow_array::builder::*;
|
7
7
|
use arrow_schema::{DataType, Field, Fields, TimeUnit};
|
8
8
|
use jiff::tz::{Offset, TimeZone};
|
9
9
|
use magnus::{RArray, RString, TryConvert};
|
@@ -368,15 +368,17 @@ fn create_arrow_builder_for_type(
|
|
368
368
|
ParquetSchemaType::Primitive(PrimitiveType::Decimal128(precision, scale)) => {
|
369
369
|
// Create a Decimal128Builder with specific precision and scale
|
370
370
|
let builder = Decimal128Builder::with_capacity(cap);
|
371
|
-
|
371
|
+
|
372
372
|
// Set precision and scale for the decimal and return the new builder
|
373
|
-
let builder_with_precision = builder
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
373
|
+
let builder_with_precision = builder
|
374
|
+
.with_precision_and_scale(*precision, *scale)
|
375
|
+
.map_err(|e| {
|
376
|
+
MagnusError::new(
|
377
|
+
magnus::exception::runtime_error(),
|
378
|
+
format!("Failed to set precision and scale: {}", e),
|
379
|
+
)
|
380
|
+
})?;
|
381
|
+
|
380
382
|
Ok(Box::new(builder_with_precision))
|
381
383
|
}
|
382
384
|
ParquetSchemaType::Primitive(PrimitiveType::String) => {
|
@@ -857,7 +859,7 @@ fn fill_builder(
|
|
857
859
|
|
858
860
|
for val in values {
|
859
861
|
match val {
|
860
|
-
ParquetValue::Decimal128(d) => typed_builder.append_value(*d),
|
862
|
+
ParquetValue::Decimal128(d, _scale) => typed_builder.append_value(*d),
|
861
863
|
ParquetValue::Float64(f) => {
|
862
864
|
// Scale the float to the desired precision and scale
|
863
865
|
let scaled_value = (*f * 10_f64.powi(*scale as i32)) as i128;
|
@@ -1161,7 +1163,7 @@ fn fill_builder(
|
|
1161
1163
|
)
|
1162
1164
|
})?
|
1163
1165
|
.append_value(bytes),
|
1164
|
-
ParquetValue::Decimal128(x) => typed_builder
|
1166
|
+
ParquetValue::Decimal128(x, _scale) => typed_builder
|
1165
1167
|
.field_builder::<Decimal128Builder>(i)
|
1166
1168
|
.ok_or_else(|| {
|
1167
1169
|
MagnusError::new(
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- ext/parquet/src/reader/mod.rs
|
67
67
|
- ext/parquet/src/reader/parquet_column_reader.rs
|
68
68
|
- ext/parquet/src/reader/parquet_row_reader.rs
|
69
|
+
- ext/parquet/src/reader/unified/mod.rs
|
69
70
|
- ext/parquet/src/ruby_reader.rs
|
70
71
|
- ext/parquet/src/types/core_types.rs
|
71
72
|
- ext/parquet/src/types/mod.rs
|