parquet 0.5.13 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +295 -98
- data/Cargo.toml +1 -1
- data/Gemfile +1 -0
- data/README.md +94 -3
- data/ext/parquet/Cargo.toml +3 -0
- data/ext/parquet/src/adapter_ffi.rs +156 -0
- data/ext/parquet/src/lib.rs +13 -21
- data/ext/parquet-core/Cargo.toml +23 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
- data/ext/parquet-core/src/error.rs +163 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +263 -0
- data/ext/parquet-core/src/schema.rs +283 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +151 -0
- data/ext/parquet-core/src/value.rs +209 -0
- data/ext/parquet-core/src/writer.rs +839 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +430 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
- data/ext/parquet-ruby-adapter/src/error.rs +148 -0
- data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
- data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +94 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
- data/lib/parquet/schema.rb +19 -0
- data/lib/parquet/version.rb +1 -1
- metadata +50 -24
- data/ext/parquet/src/enumerator.rs +0 -68
- data/ext/parquet/src/header_cache.rs +0 -99
- data/ext/parquet/src/logger.rs +0 -171
- data/ext/parquet/src/reader/common.rs +0 -111
- data/ext/parquet/src/reader/mod.rs +0 -211
- data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
- data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
- data/ext/parquet/src/reader/unified/mod.rs +0 -363
- data/ext/parquet/src/types/core_types.rs +0 -120
- data/ext/parquet/src/types/mod.rs +0 -100
- data/ext/parquet/src/types/parquet_value.rs +0 -1275
- data/ext/parquet/src/types/record_types.rs +0 -605
- data/ext/parquet/src/types/schema_converter.rs +0 -290
- data/ext/parquet/src/types/schema_node.rs +0 -424
- data/ext/parquet/src/types/timestamp.rs +0 -285
- data/ext/parquet/src/types/type_conversion.rs +0 -1949
- data/ext/parquet/src/types/writer_types.rs +0 -329
- data/ext/parquet/src/utils.rs +0 -184
- data/ext/parquet/src/writer/mod.rs +0 -505
- data/ext/parquet/src/writer/write_columns.rs +0 -238
- data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -1,329 +0,0 @@
|
|
1
|
-
use super::{core_types::SchemaNode, ParquetGemError, PrimitiveType};
|
2
|
-
use crate::{
|
3
|
-
types::{ListField, MapField, ParquetSchemaType},
|
4
|
-
utils::parse_string_or_symbol,
|
5
|
-
};
|
6
|
-
use arrow_array::{Array, RecordBatch};
|
7
|
-
use magnus::{value::ReprValue, Error as MagnusError, RString, Ruby, TryConvert, Value};
|
8
|
-
use parquet::{arrow::ArrowWriter, errors::ParquetError};
|
9
|
-
use std::{
|
10
|
-
io::{self, Write},
|
11
|
-
str::FromStr,
|
12
|
-
sync::Arc,
|
13
|
-
};
|
14
|
-
use tempfile::NamedTempFile;
|
15
|
-
|
16
|
-
#[derive(Debug, Clone)]
|
17
|
-
pub struct SchemaField<'a> {
|
18
|
-
pub name: String,
|
19
|
-
pub type_: ParquetSchemaType<'a>,
|
20
|
-
pub format: Option<String>,
|
21
|
-
pub nullable: bool,
|
22
|
-
}
|
23
|
-
|
24
|
-
#[derive(Debug)]
|
25
|
-
pub struct ParquetWriteArgs {
|
26
|
-
pub read_from: Value,
|
27
|
-
pub write_to: Value,
|
28
|
-
pub schema: SchemaNode,
|
29
|
-
pub batch_size: Option<usize>,
|
30
|
-
pub flush_threshold: Option<usize>,
|
31
|
-
pub compression: Option<String>,
|
32
|
-
pub sample_size: Option<usize>,
|
33
|
-
pub logger: Option<Value>,
|
34
|
-
}
|
35
|
-
|
36
|
-
pub trait SendableWrite: Send + Write {}
|
37
|
-
impl<T: Send + Write> SendableWrite for T {}
|
38
|
-
|
39
|
-
pub struct IoLikeValue(pub(crate) Value);
|
40
|
-
|
41
|
-
impl Write for IoLikeValue {
|
42
|
-
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
43
|
-
let ruby_bytes = RString::from_slice(buf);
|
44
|
-
|
45
|
-
let bytes_written = self
|
46
|
-
.0
|
47
|
-
.funcall::<_, _, usize>("write", (ruby_bytes,))
|
48
|
-
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
49
|
-
|
50
|
-
Ok(bytes_written)
|
51
|
-
}
|
52
|
-
|
53
|
-
fn flush(&mut self) -> Result<(), io::Error> {
|
54
|
-
self.0
|
55
|
-
.funcall::<_, _, Value>("flush", ())
|
56
|
-
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
57
|
-
|
58
|
-
Ok(())
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
impl FromStr for ParquetSchemaType<'_> {
|
63
|
-
type Err = MagnusError;
|
64
|
-
|
65
|
-
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
66
|
-
// Check if it's a list type
|
67
|
-
if let Some(inner_type_str) = s.strip_prefix("list<").and_then(|s| s.strip_suffix(">")) {
|
68
|
-
let inner_type = inner_type_str.parse::<ParquetSchemaType>()?;
|
69
|
-
return Ok(ParquetSchemaType::List(Box::new(ListField {
|
70
|
-
item_type: inner_type,
|
71
|
-
format: None,
|
72
|
-
nullable: true,
|
73
|
-
})));
|
74
|
-
}
|
75
|
-
|
76
|
-
// Check if it's a map type
|
77
|
-
if let Some(kv_types_str) = s.strip_prefix("map<").and_then(|s| s.strip_suffix(">")) {
|
78
|
-
let parts: Vec<&str> = kv_types_str.splitn(2, ',').collect();
|
79
|
-
if parts.len() != 2 {
|
80
|
-
return Err(MagnusError::new(
|
81
|
-
magnus::exception::runtime_error(),
|
82
|
-
format!(
|
83
|
-
"Invalid map format. Expected 'map<keyType,valueType>', got '{}'",
|
84
|
-
s
|
85
|
-
),
|
86
|
-
));
|
87
|
-
}
|
88
|
-
|
89
|
-
let key_type = parts[0].trim().parse::<ParquetSchemaType>()?;
|
90
|
-
let value_type = parts[1].trim().parse::<ParquetSchemaType>()?;
|
91
|
-
|
92
|
-
return Ok(ParquetSchemaType::Map(Box::new(MapField {
|
93
|
-
key_type,
|
94
|
-
value_type,
|
95
|
-
key_format: None,
|
96
|
-
value_format: None,
|
97
|
-
value_nullable: true,
|
98
|
-
})));
|
99
|
-
}
|
100
|
-
|
101
|
-
// Check if it's a decimal type with precision and scale
|
102
|
-
if let Some(decimal_params) = s.strip_prefix("decimal(").and_then(|s| s.strip_suffix(")")) {
|
103
|
-
let parts: Vec<&str> = decimal_params.split(',').collect();
|
104
|
-
|
105
|
-
// Handle both single parameter (precision only) and two parameters (precision and scale)
|
106
|
-
if parts.len() == 1 {
|
107
|
-
// Only precision provided, scale defaults to 0
|
108
|
-
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
109
|
-
MagnusError::new(
|
110
|
-
magnus::exception::runtime_error(),
|
111
|
-
format!("Invalid precision value in decimal type: {}", parts[0]),
|
112
|
-
)
|
113
|
-
})?;
|
114
|
-
|
115
|
-
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
116
|
-
precision, 0,
|
117
|
-
)));
|
118
|
-
} else if parts.len() == 2 {
|
119
|
-
// Both precision and scale provided
|
120
|
-
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
121
|
-
MagnusError::new(
|
122
|
-
magnus::exception::runtime_error(),
|
123
|
-
format!("Invalid precision value in decimal type: {}", parts[0]),
|
124
|
-
)
|
125
|
-
})?;
|
126
|
-
|
127
|
-
let scale = parts[1].trim().parse::<i8>().map_err(|_| {
|
128
|
-
MagnusError::new(
|
129
|
-
magnus::exception::runtime_error(),
|
130
|
-
format!("Invalid scale value in decimal type: {}", parts[1]),
|
131
|
-
)
|
132
|
-
})?;
|
133
|
-
|
134
|
-
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
135
|
-
precision, scale,
|
136
|
-
)));
|
137
|
-
} else {
|
138
|
-
return Err(MagnusError::new(
|
139
|
-
magnus::exception::runtime_error(),
|
140
|
-
format!(
|
141
|
-
"Invalid decimal format. Expected 'decimal(precision)' or 'decimal(precision,scale)', got '{}'",
|
142
|
-
s
|
143
|
-
),
|
144
|
-
));
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
// Check if it's a decimal256 type with precision and scale
|
149
|
-
if let Some(decimal_params) = s
|
150
|
-
.strip_prefix("decimal256(")
|
151
|
-
.and_then(|s| s.strip_suffix(")"))
|
152
|
-
{
|
153
|
-
let parts: Vec<&str> = decimal_params.split(',').collect();
|
154
|
-
|
155
|
-
// Handle both single parameter (precision only) and two parameters (precision and scale)
|
156
|
-
if parts.len() == 1 {
|
157
|
-
// Only precision provided, scale defaults to 0
|
158
|
-
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
159
|
-
MagnusError::new(
|
160
|
-
magnus::exception::runtime_error(),
|
161
|
-
format!("Invalid precision value in decimal256 type: {}", parts[0]),
|
162
|
-
)
|
163
|
-
})?;
|
164
|
-
|
165
|
-
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
|
166
|
-
precision, 0,
|
167
|
-
)));
|
168
|
-
} else if parts.len() == 2 {
|
169
|
-
// Both precision and scale provided
|
170
|
-
let precision = parts[0].trim().parse::<u8>().map_err(|_| {
|
171
|
-
MagnusError::new(
|
172
|
-
magnus::exception::runtime_error(),
|
173
|
-
format!("Invalid precision value in decimal256 type: {}", parts[0]),
|
174
|
-
)
|
175
|
-
})?;
|
176
|
-
|
177
|
-
let scale = parts[1].trim().parse::<i8>().map_err(|_| {
|
178
|
-
MagnusError::new(
|
179
|
-
magnus::exception::runtime_error(),
|
180
|
-
format!("Invalid scale value in decimal256 type: {}", parts[1]),
|
181
|
-
)
|
182
|
-
})?;
|
183
|
-
|
184
|
-
return Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
|
185
|
-
precision, scale,
|
186
|
-
)));
|
187
|
-
} else {
|
188
|
-
return Err(MagnusError::new(
|
189
|
-
magnus::exception::runtime_error(),
|
190
|
-
format!(
|
191
|
-
"Invalid decimal256 format. Expected 'decimal256(precision)' or 'decimal256(precision,scale)', got '{}'",
|
192
|
-
s
|
193
|
-
),
|
194
|
-
));
|
195
|
-
}
|
196
|
-
}
|
197
|
-
|
198
|
-
// Handle primitive types
|
199
|
-
match s {
|
200
|
-
"int8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int8)),
|
201
|
-
"int16" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int16)),
|
202
|
-
"int32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int32)),
|
203
|
-
"int64" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Int64)),
|
204
|
-
"uint8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::UInt8)),
|
205
|
-
"uint16" => Ok(ParquetSchemaType::Primitive(PrimitiveType::UInt16)),
|
206
|
-
"uint32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::UInt32)),
|
207
|
-
"uint64" => Ok(ParquetSchemaType::Primitive(PrimitiveType::UInt64)),
|
208
|
-
"float" | "float32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Float32)),
|
209
|
-
"double" | "float64" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Float64)),
|
210
|
-
"string" | "utf8" => Ok(ParquetSchemaType::Primitive(PrimitiveType::String)),
|
211
|
-
"binary" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Binary)),
|
212
|
-
"boolean" | "bool" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Boolean)),
|
213
|
-
"date32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Date32)),
|
214
|
-
"timestamp_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis)),
|
215
|
-
"timestamp_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros)),
|
216
|
-
"time_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimeMillis)),
|
217
|
-
"time_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimeMicros)),
|
218
|
-
"decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
|
219
|
-
38, 0,
|
220
|
-
))),
|
221
|
-
"decimal256" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal256(
|
222
|
-
38, 0,
|
223
|
-
))),
|
224
|
-
"list" => Ok(ParquetSchemaType::List(Box::new(ListField {
|
225
|
-
item_type: ParquetSchemaType::Primitive(PrimitiveType::String),
|
226
|
-
format: None,
|
227
|
-
nullable: true,
|
228
|
-
}))),
|
229
|
-
_ => Err(MagnusError::new(
|
230
|
-
magnus::exception::runtime_error(),
|
231
|
-
format!("Invalid schema type: {}", s),
|
232
|
-
)),
|
233
|
-
}
|
234
|
-
}
|
235
|
-
}
|
236
|
-
|
237
|
-
impl TryConvert for ParquetSchemaType<'_> {
|
238
|
-
fn try_convert(value: Value) -> Result<Self, MagnusError> {
|
239
|
-
let ruby = unsafe { Ruby::get_unchecked() };
|
240
|
-
let schema_type = parse_string_or_symbol(&ruby, value)?;
|
241
|
-
|
242
|
-
schema_type
|
243
|
-
.ok_or_else(|| {
|
244
|
-
MagnusError::new(magnus::exception::type_error(), "Invalid schema type")
|
245
|
-
})?
|
246
|
-
.parse()
|
247
|
-
}
|
248
|
-
}
|
249
|
-
|
250
|
-
// We know this type is safe to move between threads because it's just an enum
|
251
|
-
// with simple primitive types and strings
|
252
|
-
unsafe impl Send for ParquetSchemaType<'_> {}
|
253
|
-
|
254
|
-
pub enum WriterOutput {
|
255
|
-
File(ArrowWriter<Box<dyn SendableWrite>>),
|
256
|
-
TempFile(ArrowWriter<Box<dyn SendableWrite>>, NamedTempFile),
|
257
|
-
}
|
258
|
-
|
259
|
-
impl WriterOutput {
|
260
|
-
pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ParquetError> {
|
261
|
-
match self {
|
262
|
-
WriterOutput::File(writer) | WriterOutput::TempFile(writer, _) => writer.write(batch),
|
263
|
-
}
|
264
|
-
}
|
265
|
-
|
266
|
-
pub fn close(self) -> Result<Option<NamedTempFile>, ParquetError> {
|
267
|
-
match self {
|
268
|
-
WriterOutput::File(writer) => {
|
269
|
-
writer.close()?;
|
270
|
-
Ok(None)
|
271
|
-
}
|
272
|
-
WriterOutput::TempFile(writer, temp_file) => {
|
273
|
-
writer.close()?;
|
274
|
-
Ok(Some(temp_file))
|
275
|
-
}
|
276
|
-
}
|
277
|
-
}
|
278
|
-
}
|
279
|
-
|
280
|
-
pub struct ColumnCollector<'a> {
|
281
|
-
pub ruby: &'a Ruby,
|
282
|
-
pub name: String,
|
283
|
-
pub type_: ParquetSchemaType<'a>,
|
284
|
-
pub format: Option<String>,
|
285
|
-
pub nullable: bool,
|
286
|
-
pub values: Vec<crate::types::ParquetValue>,
|
287
|
-
}
|
288
|
-
|
289
|
-
impl<'a> ColumnCollector<'a> {
|
290
|
-
pub fn new(
|
291
|
-
ruby: &'a Ruby,
|
292
|
-
name: String,
|
293
|
-
type_: ParquetSchemaType<'a>,
|
294
|
-
format: Option<String>,
|
295
|
-
nullable: bool,
|
296
|
-
) -> Self {
|
297
|
-
Self {
|
298
|
-
ruby,
|
299
|
-
name,
|
300
|
-
type_,
|
301
|
-
format,
|
302
|
-
nullable,
|
303
|
-
values: Vec::new(),
|
304
|
-
}
|
305
|
-
}
|
306
|
-
|
307
|
-
pub fn push_value(&mut self, value: Value) -> Result<(), MagnusError> {
|
308
|
-
use crate::types::ParquetValue;
|
309
|
-
|
310
|
-
if value.is_nil() && !self.nullable {
|
311
|
-
// For non-nullable fields, raise an error
|
312
|
-
return Err(MagnusError::new(
|
313
|
-
magnus::exception::runtime_error(),
|
314
|
-
"Cannot write nil value for non-nullable field",
|
315
|
-
));
|
316
|
-
}
|
317
|
-
|
318
|
-
// For all other types, proceed as normal
|
319
|
-
let parquet_value =
|
320
|
-
ParquetValue::from_value(self.ruby, value, &self.type_, self.format.as_deref())?;
|
321
|
-
self.values.push(parquet_value);
|
322
|
-
Ok(())
|
323
|
-
}
|
324
|
-
|
325
|
-
pub fn take_array(&mut self) -> Result<Arc<dyn Array>, ParquetGemError> {
|
326
|
-
let values = std::mem::take(&mut self.values);
|
327
|
-
crate::convert_parquet_values_to_arrow(values, &self.type_)
|
328
|
-
}
|
329
|
-
}
|
data/ext/parquet/src/utils.rs
DELETED
@@ -1,184 +0,0 @@
|
|
1
|
-
use crate::ParserResultType;
|
2
|
-
use magnus::{
|
3
|
-
scan_args::{get_kwargs, scan_args},
|
4
|
-
value::ReprValue,
|
5
|
-
Error, RString, Ruby, Symbol, Value,
|
6
|
-
};
|
7
|
-
|
8
|
-
/// Convert a Ruby Value to a String, handling both String and Symbol types
|
9
|
-
pub fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, Error> {
|
10
|
-
if value.is_nil() {
|
11
|
-
Ok(None)
|
12
|
-
} else if value.is_kind_of(ruby.class_string()) {
|
13
|
-
RString::from_value(value)
|
14
|
-
.ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid string value"))?
|
15
|
-
.to_string()
|
16
|
-
.map(Some)
|
17
|
-
} else if value.is_kind_of(ruby.class_symbol()) {
|
18
|
-
Symbol::from_value(value)
|
19
|
-
.ok_or_else(|| Error::new(magnus::exception::type_error(), "Invalid symbol value"))?
|
20
|
-
.funcall("to_s", ())
|
21
|
-
.map(Some)
|
22
|
-
} else {
|
23
|
-
Err(Error::new(
|
24
|
-
magnus::exception::type_error(),
|
25
|
-
"Value must be a String or Symbol",
|
26
|
-
))
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
#[derive(Debug)]
|
31
|
-
pub struct ParquetRowsArgs {
|
32
|
-
pub to_read: Value,
|
33
|
-
pub result_type: ParserResultType,
|
34
|
-
pub columns: Option<Vec<String>>,
|
35
|
-
pub strict: bool,
|
36
|
-
pub logger: Option<Value>,
|
37
|
-
}
|
38
|
-
|
39
|
-
/// Parse common arguments for parquet row iteration
|
40
|
-
pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRowsArgs, Error> {
|
41
|
-
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
42
|
-
let (to_read,) = parsed_args.required;
|
43
|
-
|
44
|
-
let kwargs = get_kwargs::<
|
45
|
-
_,
|
46
|
-
(),
|
47
|
-
(
|
48
|
-
Option<Option<Value>>,
|
49
|
-
Option<Option<Vec<String>>>,
|
50
|
-
Option<Option<bool>>,
|
51
|
-
Option<Option<Value>>,
|
52
|
-
),
|
53
|
-
(),
|
54
|
-
>(
|
55
|
-
parsed_args.keywords,
|
56
|
-
&[],
|
57
|
-
&["result_type", "columns", "strict", "logger"],
|
58
|
-
)?;
|
59
|
-
|
60
|
-
let result_type: ParserResultType = match kwargs
|
61
|
-
.optional
|
62
|
-
.0
|
63
|
-
.flatten()
|
64
|
-
.map(|value| parse_string_or_symbol(ruby, value))
|
65
|
-
{
|
66
|
-
Some(Ok(Some(parsed))) => parsed.try_into().map_err(|e| {
|
67
|
-
Error::new(
|
68
|
-
magnus::exception::runtime_error(),
|
69
|
-
format!(
|
70
|
-
"Invalid result type: {e}. Must be one of {}",
|
71
|
-
ParserResultType::iter()
|
72
|
-
.map(|v| v.to_string())
|
73
|
-
.collect::<Vec<_>>()
|
74
|
-
.join(", ")
|
75
|
-
),
|
76
|
-
)
|
77
|
-
})?,
|
78
|
-
Some(Ok(None)) => ParserResultType::Hash,
|
79
|
-
Some(Err(_)) => {
|
80
|
-
return Err(Error::new(
|
81
|
-
magnus::exception::type_error(),
|
82
|
-
"result_type must be a String or Symbol",
|
83
|
-
))
|
84
|
-
}
|
85
|
-
None => ParserResultType::Hash,
|
86
|
-
};
|
87
|
-
|
88
|
-
let strict = kwargs.optional.2.flatten().unwrap_or(true);
|
89
|
-
let logger = kwargs.optional.3.flatten();
|
90
|
-
|
91
|
-
Ok(ParquetRowsArgs {
|
92
|
-
to_read,
|
93
|
-
result_type,
|
94
|
-
columns: kwargs.optional.1.flatten(),
|
95
|
-
strict,
|
96
|
-
logger,
|
97
|
-
})
|
98
|
-
}
|
99
|
-
|
100
|
-
#[derive(Debug)]
|
101
|
-
pub struct ParquetColumnsArgs {
|
102
|
-
pub to_read: Value,
|
103
|
-
pub result_type: ParserResultType,
|
104
|
-
pub columns: Option<Vec<String>>,
|
105
|
-
pub batch_size: Option<usize>,
|
106
|
-
pub strict: bool,
|
107
|
-
pub logger: Option<Value>,
|
108
|
-
}
|
109
|
-
|
110
|
-
/// Parse common arguments for parquet column iteration
|
111
|
-
pub fn parse_parquet_columns_args(
|
112
|
-
ruby: &Ruby,
|
113
|
-
args: &[Value],
|
114
|
-
) -> Result<ParquetColumnsArgs, Error> {
|
115
|
-
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
116
|
-
let (to_read,) = parsed_args.required;
|
117
|
-
|
118
|
-
let kwargs = get_kwargs::<
|
119
|
-
_,
|
120
|
-
(),
|
121
|
-
(
|
122
|
-
Option<Option<Value>>,
|
123
|
-
Option<Option<Vec<String>>>,
|
124
|
-
Option<Option<usize>>,
|
125
|
-
Option<Option<bool>>,
|
126
|
-
Option<Option<Value>>,
|
127
|
-
),
|
128
|
-
(),
|
129
|
-
>(
|
130
|
-
parsed_args.keywords,
|
131
|
-
&[],
|
132
|
-
&["result_type", "columns", "batch_size", "strict", "logger"],
|
133
|
-
)?;
|
134
|
-
|
135
|
-
let result_type: ParserResultType = match kwargs
|
136
|
-
.optional
|
137
|
-
.0
|
138
|
-
.flatten()
|
139
|
-
.map(|value| parse_string_or_symbol(ruby, value))
|
140
|
-
{
|
141
|
-
Some(Ok(Some(parsed))) => parsed.try_into().map_err(|e| {
|
142
|
-
Error::new(
|
143
|
-
magnus::exception::runtime_error(),
|
144
|
-
format!(
|
145
|
-
"Invalid result type: {e}. Must be one of {}",
|
146
|
-
ParserResultType::iter()
|
147
|
-
.map(|v| v.to_string())
|
148
|
-
.collect::<Vec<_>>()
|
149
|
-
.join(", ")
|
150
|
-
),
|
151
|
-
)
|
152
|
-
})?,
|
153
|
-
Some(Ok(None)) => ParserResultType::Hash,
|
154
|
-
Some(Err(_)) => {
|
155
|
-
return Err(Error::new(
|
156
|
-
magnus::exception::type_error(),
|
157
|
-
"result_type must be a String or Symbol",
|
158
|
-
))
|
159
|
-
}
|
160
|
-
None => ParserResultType::Hash,
|
161
|
-
};
|
162
|
-
|
163
|
-
let batch_size = kwargs.optional.2.flatten();
|
164
|
-
if let Some(batch_size) = batch_size {
|
165
|
-
if batch_size == 0 {
|
166
|
-
return Err(Error::new(
|
167
|
-
magnus::exception::arg_error(),
|
168
|
-
"Batch size must be greater than 0",
|
169
|
-
));
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
let strict = kwargs.optional.3.flatten().unwrap_or(true);
|
174
|
-
let logger = kwargs.optional.4.flatten();
|
175
|
-
|
176
|
-
Ok(ParquetColumnsArgs {
|
177
|
-
to_read,
|
178
|
-
result_type,
|
179
|
-
columns: kwargs.optional.1.flatten(),
|
180
|
-
batch_size,
|
181
|
-
strict,
|
182
|
-
logger,
|
183
|
-
})
|
184
|
-
}
|