parquet-tyfoom 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +1854 -0
- data/Cargo.toml +3 -0
- data/Gemfile +21 -0
- data/LICENSE +21 -0
- data/README.md +428 -0
- data/Rakefile +43 -0
- data/ext/parquet/Cargo.toml +39 -0
- data/ext/parquet/build.rs +5 -0
- data/ext/parquet/extconf.rb +4 -0
- data/ext/parquet/src/adapter_ffi.rs +297 -0
- data/ext/parquet/src/allocator.rs +13 -0
- data/ext/parquet/src/lib.rs +24 -0
- data/ext/parquet-core/Cargo.toml +24 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
- data/ext/parquet-core/src/error.rs +189 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +368 -0
- data/ext/parquet-core/src/schema.rs +452 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +190 -0
- data/ext/parquet-core/src/value.rs +220 -0
- data/ext/parquet-core/src/writer.rs +1241 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +431 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/review_regressions.rs +787 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
- data/ext/parquet-ruby-adapter/src/error.rs +141 -0
- data/ext/parquet-ruby-adapter/src/io.rs +432 -0
- data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
- data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
- data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +98 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
- data/lib/parquet/schema.rb +262 -0
- data/lib/parquet/version.rb +3 -0
- data/lib/parquet.rb +11 -0
- data/lib/parquet.rbi +181 -0
- metadata +165 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
use crate::{error::Result, RubyAdapterError};
|
|
2
|
+
use magnus::{value::ReprValue, IntoValue, Ruby, Value};
|
|
3
|
+
|
|
4
|
+
/// Trait for converting Rust values to Ruby values with error handling
|
|
5
|
+
///
|
|
6
|
+
/// This is similar to Magnus's `IntoValue` trait but allows for returning errors
|
|
7
|
+
/// instead of panicking or returning invalid values.
|
|
8
|
+
pub trait TryIntoValue: Sized {
|
|
9
|
+
/// Convert `self` to a Ruby value with error handling
|
|
10
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value>;
|
|
11
|
+
|
|
12
|
+
/// Convert `self` to a Ruby value with error handling, using the Ruby runtime from the current thread
|
|
13
|
+
fn try_into_value_with_current_thread(self) -> Result<Value> {
|
|
14
|
+
let ruby =
|
|
15
|
+
Ruby::get().map_err(|_| RubyAdapterError::runtime("Failed to get Ruby runtime"))?;
|
|
16
|
+
self.try_into_value(&ruby)
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Note: We don't provide a blanket implementation for all IntoValue types
|
|
21
|
+
// because some types may want to provide custom error handling.
|
|
22
|
+
// Types that need TryIntoValue should implement it explicitly.
|
|
23
|
+
|
|
24
|
+
// Convenience implementations for common types
|
|
25
|
+
impl TryIntoValue for String {
|
|
26
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
27
|
+
Ok(self.into_value_with(handle))
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
impl TryIntoValue for &str {
|
|
32
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
33
|
+
Ok(self.into_value_with(handle))
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
impl TryIntoValue for i32 {
|
|
38
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
39
|
+
Ok(self.into_value_with(handle))
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
impl TryIntoValue for i64 {
|
|
44
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
45
|
+
Ok(self.into_value_with(handle))
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
impl TryIntoValue for f32 {
|
|
50
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
51
|
+
Ok(self.into_value_with(handle))
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
impl TryIntoValue for f64 {
|
|
56
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
57
|
+
Ok(self.into_value_with(handle))
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
impl TryIntoValue for bool {
|
|
62
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
63
|
+
Ok(self.into_value_with(handle))
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
impl<T> TryIntoValue for Vec<T>
|
|
68
|
+
where
|
|
69
|
+
T: TryIntoValue,
|
|
70
|
+
{
|
|
71
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
72
|
+
let array = handle.ary_new();
|
|
73
|
+
for item in self {
|
|
74
|
+
let ruby_value = item.try_into_value(handle)?;
|
|
75
|
+
array.push(ruby_value)?;
|
|
76
|
+
}
|
|
77
|
+
Ok(handle.into_value(array))
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
impl<T> TryIntoValue for Option<T>
|
|
82
|
+
where
|
|
83
|
+
T: TryIntoValue,
|
|
84
|
+
{
|
|
85
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
|
86
|
+
match self {
|
|
87
|
+
Some(value) => value.try_into_value(handle),
|
|
88
|
+
None => Ok(handle.qnil().as_value()),
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
use crate::string_storage::StringStorageConfig;
|
|
2
|
+
use magnus::Value;
|
|
3
|
+
use std::fs::File;
|
|
4
|
+
use std::str::FromStr;
|
|
5
|
+
use tempfile::NamedTempFile;
|
|
6
|
+
|
|
7
|
+
/// Arguments for writing Parquet files
|
|
8
|
+
#[derive(Debug)]
|
|
9
|
+
pub struct ParquetWriteArgs {
|
|
10
|
+
pub read_from: Value,
|
|
11
|
+
pub write_to: Value,
|
|
12
|
+
pub schema_value: Value,
|
|
13
|
+
pub batch_size: Option<usize>,
|
|
14
|
+
pub flush_threshold: Option<usize>,
|
|
15
|
+
pub compression: Option<String>,
|
|
16
|
+
pub sample_size: Option<usize>,
|
|
17
|
+
pub logger: Option<Value>,
|
|
18
|
+
/// Requested string-cache capacity; `None` means the cache is disabled.
|
|
19
|
+
pub string_cache: Option<usize>,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/// Arguments for creating row enumerators
|
|
23
|
+
pub struct RowEnumeratorArgs {
|
|
24
|
+
pub rb_self: Value,
|
|
25
|
+
pub to_read: Value,
|
|
26
|
+
pub result_type: ParserResultType,
|
|
27
|
+
pub columns: Option<Vec<String>>,
|
|
28
|
+
pub strict: bool,
|
|
29
|
+
pub string_storage: StringStorageConfig,
|
|
30
|
+
pub logger: Option<Value>,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/// Arguments for creating column enumerators
|
|
34
|
+
pub struct ColumnEnumeratorArgs {
|
|
35
|
+
pub rb_self: Value,
|
|
36
|
+
pub to_read: Value,
|
|
37
|
+
pub result_type: ParserResultType,
|
|
38
|
+
pub columns: Option<Vec<String>>,
|
|
39
|
+
pub batch_size: Option<usize>,
|
|
40
|
+
pub strict: bool,
|
|
41
|
+
pub string_storage: StringStorageConfig,
|
|
42
|
+
pub logger: Option<Value>,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/// Enum to handle different writer outputs
|
|
46
|
+
pub enum WriterOutput {
|
|
47
|
+
File(parquet_core::Writer<File>),
|
|
48
|
+
TempFile(parquet_core::Writer<File>, NamedTempFile, Value), // Writer, temp file, IO object
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/// Result type for parser output
|
|
52
|
+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
|
53
|
+
pub enum ParserResultType {
|
|
54
|
+
Hash,
|
|
55
|
+
Array,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
impl ParserResultType {
|
|
59
|
+
pub fn iter() -> impl Iterator<Item = Self> {
|
|
60
|
+
[Self::Hash, Self::Array].into_iter()
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
impl FromStr for ParserResultType {
|
|
65
|
+
type Err = String;
|
|
66
|
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
67
|
+
Self::try_from(s)
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
impl TryFrom<&str> for ParserResultType {
|
|
72
|
+
type Error = String;
|
|
73
|
+
|
|
74
|
+
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
|
75
|
+
match value {
|
|
76
|
+
"hash" => Ok(ParserResultType::Hash),
|
|
77
|
+
"array" => Ok(ParserResultType::Array),
|
|
78
|
+
_ => Err(format!("Invalid parser result type: {}", value)),
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
impl TryFrom<String> for ParserResultType {
|
|
84
|
+
type Error = String;
|
|
85
|
+
|
|
86
|
+
fn try_from(value: String) -> Result<Self, Self::Error> {
|
|
87
|
+
Self::try_from(value.as_str())
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
impl std::fmt::Display for ParserResultType {
|
|
92
|
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
93
|
+
match self {
|
|
94
|
+
ParserResultType::Hash => write!(f, "hash"),
|
|
95
|
+
ParserResultType::Array => write!(f, "array"),
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
use magnus::value::ReprValue;
|
|
2
|
+
use magnus::{
|
|
3
|
+
scan_args::{get_kwargs, scan_args},
|
|
4
|
+
Error as MagnusError, KwArgs, Ruby, TryConvert, Value,
|
|
5
|
+
};
|
|
6
|
+
use parquet::basic::Compression;
|
|
7
|
+
use parquet_core::{MAX_BATCH_SIZE, MAX_SAMPLE_SIZE};
|
|
8
|
+
|
|
9
|
+
use crate::string_cache::{DEFAULT_STRING_CACHE_CAPACITY, STRING_CACHE_CAPACITY_MAX};
|
|
10
|
+
use crate::string_storage::{
|
|
11
|
+
StringStorageConfig, StringStorageMode, DEFAULT_SHARED_MAX_ENTRIES,
|
|
12
|
+
DEFAULT_SHARED_MAX_VALUE_BYTES,
|
|
13
|
+
};
|
|
14
|
+
use crate::types::{ColumnEnumeratorArgs, ParquetWriteArgs, RowEnumeratorArgs};
|
|
15
|
+
|
|
16
|
+
/// Reconstruct the `string_storage:` kwarg value for an enumerator so a
|
|
17
|
+
/// block-less call round-trips losslessly: a plain symbol for the mode, or a
|
|
18
|
+
/// hash when a `:shared` budget differs from the default. Returns `None` for the
|
|
19
|
+
/// default (`:copy`) config so the kwarg is simply omitted.
|
|
20
|
+
fn string_storage_kwarg(
|
|
21
|
+
ruby: &Ruby,
|
|
22
|
+
config: StringStorageConfig,
|
|
23
|
+
) -> Result<Option<Value>, MagnusError> {
|
|
24
|
+
if config == StringStorageConfig::default() {
|
|
25
|
+
return Ok(None);
|
|
26
|
+
}
|
|
27
|
+
let default_budget = config.shared_max_entries == DEFAULT_SHARED_MAX_ENTRIES
|
|
28
|
+
&& config.shared_max_value_bytes == DEFAULT_SHARED_MAX_VALUE_BYTES;
|
|
29
|
+
if config.mode == StringStorageMode::Shared && !default_budget {
|
|
30
|
+
let hash = ruby.hash_new();
|
|
31
|
+
hash.aset(
|
|
32
|
+
ruby.to_symbol("mode"),
|
|
33
|
+
ruby.to_symbol(config.mode.to_string()),
|
|
34
|
+
)?;
|
|
35
|
+
hash.aset(ruby.to_symbol("max_entries"), config.shared_max_entries)?;
|
|
36
|
+
hash.aset(
|
|
37
|
+
ruby.to_symbol("max_value_bytes"),
|
|
38
|
+
config.shared_max_value_bytes,
|
|
39
|
+
)?;
|
|
40
|
+
Ok(Some(hash.as_value()))
|
|
41
|
+
} else {
|
|
42
|
+
Ok(Some(ruby.to_symbol(config.mode.to_string()).as_value()))
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/// Parse compression type from string
|
|
47
|
+
pub fn parse_compression(
|
|
48
|
+
ruby: &Ruby,
|
|
49
|
+
compression: Option<String>,
|
|
50
|
+
) -> Result<Compression, MagnusError> {
|
|
51
|
+
match compression.map(|s| s.to_lowercase()).as_deref() {
|
|
52
|
+
Some("none") | Some("uncompressed") => Ok(Compression::UNCOMPRESSED),
|
|
53
|
+
Some("snappy") => Ok(Compression::SNAPPY),
|
|
54
|
+
Some("gzip") => Ok(Compression::GZIP(parquet::basic::GzipLevel::default())),
|
|
55
|
+
Some("lz4") => Ok(Compression::LZ4),
|
|
56
|
+
Some("zstd") => Ok(Compression::ZSTD(parquet::basic::ZstdLevel::default())),
|
|
57
|
+
Some("brotli") => Ok(Compression::BROTLI(parquet::basic::BrotliLevel::default())),
|
|
58
|
+
None => Ok(Compression::SNAPPY), // Default to SNAPPY
|
|
59
|
+
Some(other) => Err(MagnusError::new(
|
|
60
|
+
ruby.exception_arg_error(),
|
|
61
|
+
format!("Invalid compression option: '{}'. Valid options are: none, snappy, gzip, lz4, zstd, brotli", other),
|
|
62
|
+
)),
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/// Parse arguments for Parquet writing
|
|
67
|
+
pub fn parse_parquet_write_args(
|
|
68
|
+
ruby: &Ruby,
|
|
69
|
+
args: &[Value],
|
|
70
|
+
) -> Result<ParquetWriteArgs, MagnusError> {
|
|
71
|
+
let parsed_args = scan_args::<(Value,), (), (), (), _, ()>(args)?;
|
|
72
|
+
let (read_from,) = parsed_args.required;
|
|
73
|
+
|
|
74
|
+
let kwargs = get_kwargs::<
|
|
75
|
+
_,
|
|
76
|
+
(Value, Value),
|
|
77
|
+
(
|
|
78
|
+
Option<Option<usize>>,
|
|
79
|
+
Option<Option<usize>>,
|
|
80
|
+
Option<Option<String>>,
|
|
81
|
+
Option<Option<usize>>,
|
|
82
|
+
Option<Option<Value>>,
|
|
83
|
+
Option<Option<Value>>,
|
|
84
|
+
),
|
|
85
|
+
(),
|
|
86
|
+
>(
|
|
87
|
+
parsed_args.keywords,
|
|
88
|
+
&["schema", "write_to"],
|
|
89
|
+
&[
|
|
90
|
+
"batch_size",
|
|
91
|
+
"flush_threshold",
|
|
92
|
+
"compression",
|
|
93
|
+
"sample_size",
|
|
94
|
+
"logger",
|
|
95
|
+
"string_cache",
|
|
96
|
+
],
|
|
97
|
+
)?;
|
|
98
|
+
|
|
99
|
+
Ok(ParquetWriteArgs {
|
|
100
|
+
read_from,
|
|
101
|
+
write_to: kwargs.required.1,
|
|
102
|
+
schema_value: kwargs.required.0,
|
|
103
|
+
batch_size: parse_positive_bounded_usize(
|
|
104
|
+
ruby,
|
|
105
|
+
"batch_size",
|
|
106
|
+
kwargs.optional.0.flatten(),
|
|
107
|
+
MAX_BATCH_SIZE,
|
|
108
|
+
)?,
|
|
109
|
+
// A zero threshold would flush a row group per row; reject it like the
|
|
110
|
+
// other sizing options rather than producing a pathological file.
|
|
111
|
+
flush_threshold: parse_positive_bounded_usize(
|
|
112
|
+
ruby,
|
|
113
|
+
"flush_threshold",
|
|
114
|
+
kwargs.optional.1.flatten(),
|
|
115
|
+
usize::MAX,
|
|
116
|
+
)?,
|
|
117
|
+
compression: kwargs.optional.2.flatten(),
|
|
118
|
+
sample_size: parse_positive_bounded_usize(
|
|
119
|
+
ruby,
|
|
120
|
+
"sample_size",
|
|
121
|
+
kwargs.optional.3.flatten(),
|
|
122
|
+
MAX_SAMPLE_SIZE,
|
|
123
|
+
)?,
|
|
124
|
+
logger: kwargs.optional.4.flatten(),
|
|
125
|
+
string_cache: parse_string_cache(ruby, kwargs.optional.5.flatten())?,
|
|
126
|
+
})
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
fn parse_positive_bounded_usize(
|
|
130
|
+
ruby: &Ruby,
|
|
131
|
+
name: &str,
|
|
132
|
+
value: Option<usize>,
|
|
133
|
+
max: usize,
|
|
134
|
+
) -> Result<Option<usize>, MagnusError> {
|
|
135
|
+
let Some(value) = value else {
|
|
136
|
+
return Ok(None);
|
|
137
|
+
};
|
|
138
|
+
if value == 0 {
|
|
139
|
+
return Err(MagnusError::new(
|
|
140
|
+
ruby.exception_arg_error(),
|
|
141
|
+
format!("{name} must be positive"),
|
|
142
|
+
));
|
|
143
|
+
}
|
|
144
|
+
if value > max {
|
|
145
|
+
return Err(MagnusError::new(
|
|
146
|
+
ruby.exception_arg_error(),
|
|
147
|
+
format!("{name} must be at most {max}"),
|
|
148
|
+
));
|
|
149
|
+
}
|
|
150
|
+
Ok(Some(value))
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/// Parse the `string_cache:` write option. `false`/`nil`/absent disables it,
|
|
154
|
+
/// `true` enables it with the default capacity, and a positive Integer enables
|
|
155
|
+
/// it with that capacity. Returns the requested capacity, or `None` when
|
|
156
|
+
/// disabled.
|
|
157
|
+
pub fn parse_string_cache(ruby: &Ruby, value: Option<Value>) -> Result<Option<usize>, MagnusError> {
|
|
158
|
+
let Some(value) = value else {
|
|
159
|
+
return Ok(None);
|
|
160
|
+
};
|
|
161
|
+
// Strict: only true/false/nil and Integer are accepted (no Ruby truthiness
|
|
162
|
+
// coercion, so a stray String is a clear error rather than "enabled").
|
|
163
|
+
if value.is_nil() || value.eql(ruby.qfalse())? {
|
|
164
|
+
return Ok(None);
|
|
165
|
+
}
|
|
166
|
+
if value.eql(ruby.qtrue())? {
|
|
167
|
+
return Ok(Some(DEFAULT_STRING_CACHE_CAPACITY));
|
|
168
|
+
}
|
|
169
|
+
if value.is_kind_of(ruby.class_integer()) {
|
|
170
|
+
let capacity: usize = TryConvert::try_convert(value)?;
|
|
171
|
+
if capacity == 0 {
|
|
172
|
+
return Err(MagnusError::new(
|
|
173
|
+
ruby.exception_arg_error(),
|
|
174
|
+
"string_cache capacity must be positive",
|
|
175
|
+
));
|
|
176
|
+
}
|
|
177
|
+
if capacity > STRING_CACHE_CAPACITY_MAX {
|
|
178
|
+
return Err(MagnusError::new(
|
|
179
|
+
ruby.exception_arg_error(),
|
|
180
|
+
format!(
|
|
181
|
+
"string_cache capacity must be at most {}",
|
|
182
|
+
STRING_CACHE_CAPACITY_MAX
|
|
183
|
+
),
|
|
184
|
+
));
|
|
185
|
+
}
|
|
186
|
+
return Ok(Some(capacity));
|
|
187
|
+
}
|
|
188
|
+
Err(MagnusError::new(
|
|
189
|
+
ruby.exception_type_error(),
|
|
190
|
+
"string_cache must be true, false, or a positive Integer",
|
|
191
|
+
))
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/// Convert a Ruby Value to a String, handling both String and Symbol types
|
|
195
|
+
pub fn parse_string_or_symbol(ruby: &Ruby, value: Value) -> Result<Option<String>, MagnusError> {
|
|
196
|
+
if value.is_nil() {
|
|
197
|
+
Ok(None)
|
|
198
|
+
} else if value.is_kind_of(ruby.class_string()) || value.is_kind_of(ruby.class_symbol()) {
|
|
199
|
+
let stringed = value.to_r_string()?.to_string()?;
|
|
200
|
+
Ok(Some(stringed))
|
|
201
|
+
} else {
|
|
202
|
+
Err(MagnusError::new(
|
|
203
|
+
ruby.exception_type_error(),
|
|
204
|
+
"Value must be a String or Symbol",
|
|
205
|
+
))
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/// Handle block or enumerator creation
|
|
210
|
+
pub fn handle_block_or_enum<F, T>(
|
|
211
|
+
block_given: bool,
|
|
212
|
+
create_enum: F,
|
|
213
|
+
) -> Result<Option<T>, MagnusError>
|
|
214
|
+
where
|
|
215
|
+
F: FnOnce() -> Result<T, MagnusError>,
|
|
216
|
+
{
|
|
217
|
+
if !block_given {
|
|
218
|
+
let enum_value = create_enum()?;
|
|
219
|
+
return Ok(Some(enum_value));
|
|
220
|
+
}
|
|
221
|
+
Ok(None)
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/// Create a row enumerator
|
|
225
|
+
pub fn create_row_enumerator(
|
|
226
|
+
ruby: &Ruby,
|
|
227
|
+
args: RowEnumeratorArgs,
|
|
228
|
+
) -> Result<magnus::Enumerator, MagnusError> {
|
|
229
|
+
let kwargs = ruby.hash_new();
|
|
230
|
+
kwargs.aset(
|
|
231
|
+
ruby.to_symbol("result_type"),
|
|
232
|
+
ruby.to_symbol(args.result_type.to_string()),
|
|
233
|
+
)?;
|
|
234
|
+
if let Some(columns) = args.columns {
|
|
235
|
+
kwargs.aset(ruby.to_symbol("columns"), ruby.ary_from_vec(columns))?;
|
|
236
|
+
}
|
|
237
|
+
if args.strict {
|
|
238
|
+
kwargs.aset(ruby.to_symbol("strict"), true)?;
|
|
239
|
+
}
|
|
240
|
+
if let Some(value) = string_storage_kwarg(ruby, args.string_storage)? {
|
|
241
|
+
kwargs.aset(ruby.to_symbol("string_storage"), value)?;
|
|
242
|
+
}
|
|
243
|
+
if let Some(logger) = args.logger {
|
|
244
|
+
kwargs.aset(ruby.to_symbol("logger"), logger)?;
|
|
245
|
+
}
|
|
246
|
+
Ok(args
|
|
247
|
+
.rb_self
|
|
248
|
+
.enumeratorize("each_row", (args.to_read, KwArgs(kwargs))))
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/// Create a column enumerator
|
|
252
|
+
#[inline]
|
|
253
|
+
pub fn create_column_enumerator(
|
|
254
|
+
ruby: &Ruby,
|
|
255
|
+
args: ColumnEnumeratorArgs,
|
|
256
|
+
) -> Result<magnus::Enumerator, MagnusError> {
|
|
257
|
+
let kwargs = ruby.hash_new();
|
|
258
|
+
kwargs.aset(
|
|
259
|
+
ruby.to_symbol("result_type"),
|
|
260
|
+
ruby.to_symbol(args.result_type.to_string()),
|
|
261
|
+
)?;
|
|
262
|
+
if let Some(columns) = args.columns {
|
|
263
|
+
kwargs.aset(ruby.to_symbol("columns"), ruby.ary_from_vec(columns))?;
|
|
264
|
+
}
|
|
265
|
+
if let Some(batch_size) = args.batch_size {
|
|
266
|
+
kwargs.aset(ruby.to_symbol("batch_size"), batch_size)?;
|
|
267
|
+
}
|
|
268
|
+
if args.strict {
|
|
269
|
+
kwargs.aset(ruby.to_symbol("strict"), true)?;
|
|
270
|
+
}
|
|
271
|
+
if let Some(value) = string_storage_kwarg(ruby, args.string_storage)? {
|
|
272
|
+
kwargs.aset(ruby.to_symbol("string_storage"), value)?;
|
|
273
|
+
}
|
|
274
|
+
if let Some(logger) = args.logger {
|
|
275
|
+
kwargs.aset(ruby.to_symbol("logger"), logger)?;
|
|
276
|
+
}
|
|
277
|
+
Ok(args
|
|
278
|
+
.rb_self
|
|
279
|
+
.enumeratorize("each_column", (args.to_read, KwArgs(kwargs))))
|
|
280
|
+
}
|