polars-df 0.21.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +120 -90
- data/Cargo.toml +3 -0
- data/README.md +20 -7
- data/ext/polars/Cargo.toml +18 -12
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/catalog/unity.rs +96 -94
- data/ext/polars/src/conversion/any_value.rs +39 -37
- data/ext/polars/src/conversion/chunked_array.rs +36 -29
- data/ext/polars/src/conversion/datetime.rs +11 -0
- data/ext/polars/src/conversion/mod.rs +244 -51
- data/ext/polars/src/dataframe/construction.rs +5 -17
- data/ext/polars/src/dataframe/export.rs +17 -15
- data/ext/polars/src/dataframe/general.rs +15 -17
- data/ext/polars/src/dataframe/io.rs +1 -2
- data/ext/polars/src/dataframe/mod.rs +25 -1
- data/ext/polars/src/dataframe/serde.rs +23 -8
- data/ext/polars/src/exceptions.rs +8 -5
- data/ext/polars/src/expr/datatype.rs +4 -4
- data/ext/polars/src/expr/datetime.rs +22 -28
- data/ext/polars/src/expr/general.rs +3 -10
- data/ext/polars/src/expr/list.rs +8 -24
- data/ext/polars/src/expr/meta.rs +4 -6
- data/ext/polars/src/expr/mod.rs +2 -0
- data/ext/polars/src/expr/name.rs +11 -14
- data/ext/polars/src/expr/serde.rs +28 -0
- data/ext/polars/src/expr/string.rs +5 -10
- data/ext/polars/src/file.rs +20 -14
- data/ext/polars/src/functions/business.rs +0 -1
- data/ext/polars/src/functions/io.rs +7 -4
- data/ext/polars/src/functions/lazy.rs +7 -6
- data/ext/polars/src/functions/meta.rs +3 -3
- data/ext/polars/src/functions/string_cache.rs +3 -3
- data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
- data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
- data/ext/polars/src/io/mod.rs +23 -3
- data/ext/polars/src/lazyframe/general.rs +35 -50
- data/ext/polars/src/lazyframe/mod.rs +16 -1
- data/ext/polars/src/lazyframe/optflags.rs +57 -0
- data/ext/polars/src/lazyframe/serde.rs +27 -3
- data/ext/polars/src/lib.rs +144 -19
- data/ext/polars/src/map/dataframe.rs +18 -15
- data/ext/polars/src/map/lazy.rs +6 -5
- data/ext/polars/src/map/series.rs +7 -6
- data/ext/polars/src/on_startup.rs +12 -5
- data/ext/polars/src/rb_modules.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +49 -29
- data/ext/polars/src/series/construction.rs +2 -0
- data/ext/polars/src/series/export.rs +38 -33
- data/ext/polars/src/series/general.rs +69 -31
- data/ext/polars/src/series/mod.rs +29 -4
- data/lib/polars/array_expr.rb +1 -1
- data/lib/polars/data_frame.rb +119 -15
- data/lib/polars/data_types.rb +23 -6
- data/lib/polars/date_time_expr.rb +36 -15
- data/lib/polars/expr.rb +41 -32
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/lazy.rb +1 -1
- data/lib/polars/iceberg_dataset.rb +113 -0
- data/lib/polars/io/iceberg.rb +34 -0
- data/lib/polars/io/ipc.rb +28 -49
- data/lib/polars/io/parquet.rb +7 -4
- data/lib/polars/io/scan_options.rb +12 -3
- data/lib/polars/io/utils.rb +17 -0
- data/lib/polars/lazy_frame.rb +97 -10
- data/lib/polars/list_expr.rb +21 -13
- data/lib/polars/list_name_space.rb +33 -21
- data/lib/polars/meta_expr.rb +25 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +23 -1
- data/lib/polars/schema.rb +1 -1
- data/lib/polars/selectors.rb +8 -8
- data/lib/polars/series.rb +26 -2
- data/lib/polars/string_expr.rb +27 -28
- data/lib/polars/string_name_space.rb +18 -5
- data/lib/polars/utils/convert.rb +2 -2
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +4 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +10 -1
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
use magnus::{IntoValue,
|
|
1
|
+
use magnus::{IntoValue, RString, Ruby, TryConvert, Value, prelude::*};
|
|
2
2
|
use polars::prelude::*;
|
|
3
|
+
use polars_compute::decimal::DecimalFmtBuffer;
|
|
3
4
|
|
|
4
5
|
use super::{Wrap, get_rbseq, struct_dict};
|
|
5
6
|
|
|
6
7
|
use crate::RbResult;
|
|
7
|
-
use crate::rb_modules::
|
|
8
|
+
use crate::rb_modules::pl_utils;
|
|
8
9
|
|
|
9
10
|
impl TryConvert for Wrap<StringChunked> {
|
|
10
11
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
|
@@ -39,19 +40,19 @@ impl TryConvert for Wrap<BinaryChunked> {
|
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
impl IntoValue for Wrap<&StringChunked> {
|
|
42
|
-
fn into_value_with(self,
|
|
43
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
43
44
|
let iter = self.0.into_iter();
|
|
44
|
-
|
|
45
|
+
ruby.ary_from_iter(iter).as_value()
|
|
45
46
|
}
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
impl IntoValue for Wrap<&BinaryChunked> {
|
|
49
|
-
fn into_value_with(self,
|
|
50
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
50
51
|
let iter = self
|
|
51
52
|
.0
|
|
52
53
|
.into_iter()
|
|
53
|
-
.map(|opt_bytes| opt_bytes.map(
|
|
54
|
-
|
|
54
|
+
.map(|opt_bytes| opt_bytes.map(|v| ruby.str_from_slice(v)));
|
|
55
|
+
ruby.ary_from_iter(iter).as_value()
|
|
55
56
|
}
|
|
56
57
|
}
|
|
57
58
|
|
|
@@ -62,19 +63,19 @@ impl IntoValue for Wrap<&StructChunked> {
|
|
|
62
63
|
// make series::iter() accept a chunk index.
|
|
63
64
|
let s = s.rechunk();
|
|
64
65
|
let iter = s.iter().map(|av| match av {
|
|
65
|
-
AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
|
66
|
+
AnyValue::Struct(_, _, flds) => struct_dict(ruby, av._iter_struct_av(), flds),
|
|
66
67
|
AnyValue::Null => ruby.qnil().as_value(),
|
|
67
68
|
_ => unreachable!(),
|
|
68
69
|
});
|
|
69
70
|
|
|
70
|
-
|
|
71
|
+
ruby.ary_from_iter(iter).as_value()
|
|
71
72
|
}
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
impl IntoValue for Wrap<&DurationChunked> {
|
|
75
|
-
fn into_value_with(self,
|
|
76
|
-
let utils =
|
|
77
|
-
let time_unit = Wrap(self.0.time_unit()).
|
|
76
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
77
|
+
let utils = pl_utils();
|
|
78
|
+
let time_unit = Wrap(self.0.time_unit()).into_value_with(ruby);
|
|
78
79
|
let iter = self.0.physical().into_iter().map(|opt_v| {
|
|
79
80
|
opt_v.map(|v| {
|
|
80
81
|
utils
|
|
@@ -82,15 +83,19 @@ impl IntoValue for Wrap<&DurationChunked> {
|
|
|
82
83
|
.unwrap()
|
|
83
84
|
})
|
|
84
85
|
});
|
|
85
|
-
|
|
86
|
+
ruby.ary_from_iter(iter).as_value()
|
|
86
87
|
}
|
|
87
88
|
}
|
|
88
89
|
|
|
89
90
|
impl IntoValue for Wrap<&DatetimeChunked> {
|
|
90
|
-
fn into_value_with(self,
|
|
91
|
-
let utils =
|
|
92
|
-
let time_unit = Wrap(self.0.time_unit()).
|
|
93
|
-
let time_zone = self
|
|
91
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
92
|
+
let utils = pl_utils();
|
|
93
|
+
let time_unit = Wrap(self.0.time_unit()).into_value_with(ruby);
|
|
94
|
+
let time_zone = self
|
|
95
|
+
.0
|
|
96
|
+
.time_zone()
|
|
97
|
+
.as_deref()
|
|
98
|
+
.map(|v| v.into_value_with(ruby));
|
|
94
99
|
let iter = self.0.physical().into_iter().map(|opt_v| {
|
|
95
100
|
opt_v.map(|v| {
|
|
96
101
|
utils
|
|
@@ -98,41 +103,43 @@ impl IntoValue for Wrap<&DatetimeChunked> {
|
|
|
98
103
|
.unwrap()
|
|
99
104
|
})
|
|
100
105
|
});
|
|
101
|
-
|
|
106
|
+
ruby.ary_from_iter(iter).as_value()
|
|
102
107
|
}
|
|
103
108
|
}
|
|
104
109
|
|
|
105
110
|
impl IntoValue for Wrap<&TimeChunked> {
|
|
106
|
-
fn into_value_with(self,
|
|
107
|
-
let utils =
|
|
111
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
112
|
+
let utils = pl_utils();
|
|
108
113
|
let iter = self.0.physical().into_iter().map(|opt_v| {
|
|
109
114
|
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
|
110
115
|
});
|
|
111
|
-
|
|
116
|
+
ruby.ary_from_iter(iter).as_value()
|
|
112
117
|
}
|
|
113
118
|
}
|
|
114
119
|
|
|
115
120
|
impl IntoValue for Wrap<&DateChunked> {
|
|
116
|
-
fn into_value_with(self,
|
|
117
|
-
let utils =
|
|
121
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
122
|
+
let utils = pl_utils();
|
|
118
123
|
let iter = self.0.physical().into_iter().map(|opt_v| {
|
|
119
124
|
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
|
120
125
|
});
|
|
121
|
-
|
|
126
|
+
ruby.ary_from_iter(iter).as_value()
|
|
122
127
|
}
|
|
123
128
|
}
|
|
124
129
|
|
|
125
130
|
impl IntoValue for Wrap<&DecimalChunked> {
|
|
126
|
-
fn into_value_with(self,
|
|
127
|
-
let utils =
|
|
128
|
-
let
|
|
131
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
132
|
+
let utils = pl_utils();
|
|
133
|
+
let rb_precision = self.0.precision().into_value_with(ruby);
|
|
134
|
+
let mut buf = DecimalFmtBuffer::new();
|
|
129
135
|
let iter = self.0.physical().into_iter().map(|opt_v| {
|
|
130
136
|
opt_v.map(|v| {
|
|
137
|
+
let s = buf.format_dec128(v, self.0.scale(), false, false);
|
|
131
138
|
utils
|
|
132
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (
|
|
139
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (rb_precision, s))
|
|
133
140
|
.unwrap()
|
|
134
141
|
})
|
|
135
142
|
});
|
|
136
|
-
|
|
143
|
+
ruby.ary_from_iter(iter).as_value()
|
|
137
144
|
}
|
|
138
145
|
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
use magnus::{Value, prelude::*};
|
|
2
|
+
use polars::prelude::*;
|
|
3
|
+
|
|
4
|
+
use crate::rb_modules::pl_utils;
|
|
5
|
+
|
|
6
|
+
pub fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> Value {
|
|
7
|
+
let tu = tu.to_ascii();
|
|
8
|
+
pl_utils()
|
|
9
|
+
.funcall("_to_ruby_datetime", (v, tu, tz.map(|v| v.to_string())))
|
|
10
|
+
.unwrap()
|
|
11
|
+
}
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
pub(crate) mod any_value;
|
|
2
2
|
mod categorical;
|
|
3
3
|
mod chunked_array;
|
|
4
|
+
mod datetime;
|
|
4
5
|
|
|
6
|
+
use std::collections::BTreeMap;
|
|
5
7
|
use std::fmt::{Debug, Display, Formatter};
|
|
6
8
|
use std::fs::File;
|
|
7
9
|
use std::hash::{Hash, Hasher};
|
|
8
|
-
use std::num::NonZeroUsize;
|
|
9
10
|
|
|
10
11
|
pub use categorical::RbCategories;
|
|
11
12
|
use magnus::{
|
|
12
|
-
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
|
|
13
|
-
|
|
13
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, prelude::*, r_hash::ForEach,
|
|
14
|
+
try_convert::TryConvertOwned, value::Opaque,
|
|
14
15
|
};
|
|
15
16
|
use polars::chunked_array::object::PolarsObjectSafe;
|
|
16
17
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
|
@@ -18,9 +19,13 @@ use polars::datatypes::AnyValue;
|
|
|
18
19
|
use polars::frame::row::Row;
|
|
19
20
|
use polars::io::avro::AvroCompression;
|
|
20
21
|
use polars::io::cloud::CloudOptions;
|
|
22
|
+
use polars::prelude::default_values::{
|
|
23
|
+
DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
|
|
24
|
+
};
|
|
21
25
|
use polars::prelude::deletion::DeletionFilesList;
|
|
22
26
|
use polars::prelude::*;
|
|
23
27
|
use polars::series::ops::NullBehavior;
|
|
28
|
+
use polars_compute::decimal::dec128_verify_prec_scale;
|
|
24
29
|
use polars_core::schema::iceberg::IcebergSchema;
|
|
25
30
|
use polars_core::utils::arrow::array::Array;
|
|
26
31
|
use polars_core::utils::materialize_dyn_int;
|
|
@@ -30,7 +35,7 @@ use polars_utils::total_ord::{TotalEq, TotalHash};
|
|
|
30
35
|
|
|
31
36
|
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
|
32
37
|
use crate::object::OBJECT_NAME;
|
|
33
|
-
use crate::rb_modules::
|
|
38
|
+
use crate::rb_modules::pl_series;
|
|
34
39
|
use crate::utils::to_rb_err;
|
|
35
40
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
|
36
41
|
|
|
@@ -86,7 +91,7 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
|
86
91
|
}
|
|
87
92
|
|
|
88
93
|
pub(crate) fn to_series(s: RbSeries) -> Value {
|
|
89
|
-
let series =
|
|
94
|
+
let series = pl_series();
|
|
90
95
|
series
|
|
91
96
|
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
|
92
97
|
.unwrap()
|
|
@@ -121,16 +126,16 @@ impl TryConvert for Wrap<NullValues> {
|
|
|
121
126
|
}
|
|
122
127
|
}
|
|
123
128
|
|
|
124
|
-
fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
|
|
125
|
-
let dict =
|
|
129
|
+
fn struct_dict<'a>(ruby: &Ruby, vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
|
|
130
|
+
let dict = ruby.hash_new();
|
|
126
131
|
for (fld, val) in flds.iter().zip(vals) {
|
|
127
132
|
dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
|
|
128
133
|
}
|
|
129
|
-
dict.
|
|
134
|
+
dict.as_value()
|
|
130
135
|
}
|
|
131
136
|
|
|
132
137
|
impl IntoValue for Wrap<DataType> {
|
|
133
|
-
fn into_value_with(self,
|
|
138
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
134
139
|
let pl = crate::rb_modules::polars();
|
|
135
140
|
|
|
136
141
|
match self.0 {
|
|
@@ -170,6 +175,10 @@ impl IntoValue for Wrap<DataType> {
|
|
|
170
175
|
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
|
171
176
|
class.funcall("new", ()).unwrap()
|
|
172
177
|
}
|
|
178
|
+
DataType::UInt128 => {
|
|
179
|
+
let class = pl.const_get::<_, Value>("UInt128").unwrap();
|
|
180
|
+
class.funcall("new", ()).unwrap()
|
|
181
|
+
}
|
|
173
182
|
DataType::Float32 => {
|
|
174
183
|
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
|
175
184
|
class.funcall("new", ()).unwrap()
|
|
@@ -236,8 +245,10 @@ impl IntoValue for Wrap<DataType> {
|
|
|
236
245
|
let categories: Value = categories_class
|
|
237
246
|
.funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
|
|
238
247
|
.unwrap();
|
|
239
|
-
let kwargs =
|
|
240
|
-
kwargs
|
|
248
|
+
let kwargs = ruby.hash_new();
|
|
249
|
+
kwargs
|
|
250
|
+
.aset(ruby.to_symbol("categories"), categories)
|
|
251
|
+
.unwrap();
|
|
241
252
|
categorical_class.funcall("new", (kwargs,)).unwrap()
|
|
242
253
|
}
|
|
243
254
|
DataType::Enum(_, mapping) => {
|
|
@@ -264,7 +275,7 @@ impl IntoValue for Wrap<DataType> {
|
|
|
264
275
|
.funcall::<_, _, Value>("new", (name, dtype))
|
|
265
276
|
.unwrap()
|
|
266
277
|
});
|
|
267
|
-
let fields =
|
|
278
|
+
let fields = ruby.ary_from_iter(iter);
|
|
268
279
|
let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
|
|
269
280
|
struct_class
|
|
270
281
|
.funcall::<_, _, Value>("new", (fields,))
|
|
@@ -275,7 +286,7 @@ impl IntoValue for Wrap<DataType> {
|
|
|
275
286
|
class.funcall("new", ()).unwrap()
|
|
276
287
|
}
|
|
277
288
|
DataType::Unknown(UnknownKind::Int(v)) => {
|
|
278
|
-
Wrap(materialize_dyn_int(v).dtype()).
|
|
289
|
+
Wrap(materialize_dyn_int(v).dtype()).into_value_with(ruby)
|
|
279
290
|
}
|
|
280
291
|
DataType::Unknown(_) => {
|
|
281
292
|
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
|
@@ -293,19 +304,19 @@ enum CategoricalOrdering {
|
|
|
293
304
|
}
|
|
294
305
|
|
|
295
306
|
impl IntoValue for Wrap<CategoricalOrdering> {
|
|
296
|
-
fn into_value_with(self,
|
|
297
|
-
"lexical".
|
|
307
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
308
|
+
"lexical".into_value_with(ruby)
|
|
298
309
|
}
|
|
299
310
|
}
|
|
300
311
|
|
|
301
312
|
impl IntoValue for Wrap<TimeUnit> {
|
|
302
|
-
fn into_value_with(self,
|
|
313
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
303
314
|
let tu = match self.0 {
|
|
304
315
|
TimeUnit::Nanoseconds => "ns",
|
|
305
316
|
TimeUnit::Microseconds => "us",
|
|
306
317
|
TimeUnit::Milliseconds => "ms",
|
|
307
318
|
};
|
|
308
|
-
tu.
|
|
319
|
+
tu.into_value_with(ruby)
|
|
309
320
|
}
|
|
310
321
|
}
|
|
311
322
|
|
|
@@ -319,7 +330,8 @@ impl TryConvert for Wrap<Field> {
|
|
|
319
330
|
|
|
320
331
|
impl TryConvert for Wrap<DataType> {
|
|
321
332
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
322
|
-
let
|
|
333
|
+
let ruby = Ruby::get_with(ob);
|
|
334
|
+
let dtype = if ob.is_kind_of(ruby.class_class()) {
|
|
323
335
|
let name = ob.funcall::<_, _, String>("name", ())?;
|
|
324
336
|
match name.as_str() {
|
|
325
337
|
"Polars::Int8" => DataType::Int8,
|
|
@@ -343,7 +355,11 @@ impl TryConvert for Wrap<DataType> {
|
|
|
343
355
|
"Polars::Time" => DataType::Time,
|
|
344
356
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
|
345
357
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
|
346
|
-
"Polars::Decimal" =>
|
|
358
|
+
"Polars::Decimal" => {
|
|
359
|
+
return Err(RbTypeError::new_err(
|
|
360
|
+
"Decimal without precision/scale set is not a valid Polars datatype",
|
|
361
|
+
));
|
|
362
|
+
}
|
|
347
363
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
|
348
364
|
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
|
349
365
|
"Polars::Struct" => DataType::Struct(vec![]),
|
|
@@ -408,7 +424,8 @@ impl TryConvert for Wrap<DataType> {
|
|
|
408
424
|
"Polars::Decimal" => {
|
|
409
425
|
let precision = ob.funcall("precision", ())?;
|
|
410
426
|
let scale = ob.funcall("scale", ())?;
|
|
411
|
-
|
|
427
|
+
dec128_verify_prec_scale(precision, scale).map_err(to_rb_err)?;
|
|
428
|
+
DataType::Decimal(precision, scale)
|
|
412
429
|
}
|
|
413
430
|
"Polars::List" => {
|
|
414
431
|
let inner: Value = ob.funcall("inner", ()).unwrap();
|
|
@@ -527,8 +544,56 @@ impl TryConvert for Wrap<Schema> {
|
|
|
527
544
|
}
|
|
528
545
|
|
|
529
546
|
impl TryConvert for Wrap<ArrowSchema> {
|
|
530
|
-
fn try_convert(
|
|
531
|
-
|
|
547
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
548
|
+
let ruby = Ruby::get_with(ob);
|
|
549
|
+
// TODO improve
|
|
550
|
+
let ob = RHash::try_convert(ob)?;
|
|
551
|
+
let fields: RArray = ob.aref(ruby.to_symbol("fields"))?;
|
|
552
|
+
let mut arrow_schema = ArrowSchema::with_capacity(fields.len());
|
|
553
|
+
for f in fields {
|
|
554
|
+
let f = RHash::try_convert(f)?;
|
|
555
|
+
let name: String = f.aref(ruby.to_symbol("name"))?;
|
|
556
|
+
let rb_dtype: String = f.aref(ruby.to_symbol("type"))?;
|
|
557
|
+
let dtype = match rb_dtype.as_str() {
|
|
558
|
+
"null" => ArrowDataType::Null,
|
|
559
|
+
"boolean" => ArrowDataType::Boolean,
|
|
560
|
+
"int8" => ArrowDataType::Int8,
|
|
561
|
+
"int16" => ArrowDataType::Int16,
|
|
562
|
+
"int32" => ArrowDataType::Int32,
|
|
563
|
+
"int64" => ArrowDataType::Int64,
|
|
564
|
+
"uint8" => ArrowDataType::UInt8,
|
|
565
|
+
"uint16" => ArrowDataType::UInt16,
|
|
566
|
+
"uint32" => ArrowDataType::UInt32,
|
|
567
|
+
"uint64" => ArrowDataType::UInt64,
|
|
568
|
+
"float16" => ArrowDataType::Float16,
|
|
569
|
+
"float32" => ArrowDataType::Float32,
|
|
570
|
+
"float64" => ArrowDataType::Float64,
|
|
571
|
+
"date32" => ArrowDataType::Date32,
|
|
572
|
+
"date64" => ArrowDataType::Date64,
|
|
573
|
+
"binary" => ArrowDataType::Binary,
|
|
574
|
+
"large_binary" => ArrowDataType::LargeBinary,
|
|
575
|
+
"string" => ArrowDataType::Utf8,
|
|
576
|
+
"large_string" => ArrowDataType::LargeUtf8,
|
|
577
|
+
"binary_view" => ArrowDataType::BinaryView,
|
|
578
|
+
"string_view" => ArrowDataType::Utf8View,
|
|
579
|
+
"unknown" => ArrowDataType::Unknown,
|
|
580
|
+
_ => todo!(),
|
|
581
|
+
};
|
|
582
|
+
let is_nullable = f.aref(ruby.to_symbol("nullable"))?;
|
|
583
|
+
let rb_metadata: RHash = f.aref(ruby.to_symbol("metadata"))?;
|
|
584
|
+
let mut metadata = BTreeMap::new();
|
|
585
|
+
rb_metadata.foreach(|k: String, v: String| {
|
|
586
|
+
metadata.insert(k.into(), v.into());
|
|
587
|
+
Ok(ForEach::Continue)
|
|
588
|
+
})?;
|
|
589
|
+
arrow_schema
|
|
590
|
+
.try_insert(
|
|
591
|
+
name.clone().into(),
|
|
592
|
+
ArrowField::new(name.into(), dtype, is_nullable).with_metadata(metadata),
|
|
593
|
+
)
|
|
594
|
+
.map_err(to_rb_err)?;
|
|
595
|
+
}
|
|
596
|
+
Ok(Wrap(arrow_schema))
|
|
532
597
|
}
|
|
533
598
|
}
|
|
534
599
|
|
|
@@ -673,7 +738,7 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
|
673
738
|
|
|
674
739
|
impl ObjectValue {
|
|
675
740
|
pub fn to_value(&self) -> Value {
|
|
676
|
-
self.clone().
|
|
741
|
+
self.clone().into_value_with(&Ruby::get().unwrap())
|
|
677
742
|
}
|
|
678
743
|
}
|
|
679
744
|
|
|
@@ -827,7 +892,7 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
|
|
|
827
892
|
let parsed = match String::try_convert(ob)?.as_str() {
|
|
828
893
|
"uncompressed" => None,
|
|
829
894
|
"lz4" => Some(IpcCompression::LZ4),
|
|
830
|
-
"zstd" => Some(IpcCompression::ZSTD),
|
|
895
|
+
"zstd" => Some(IpcCompression::ZSTD(Default::default())),
|
|
831
896
|
v => {
|
|
832
897
|
return Err(RbValueError::new_err(format!(
|
|
833
898
|
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
|
|
@@ -1036,21 +1101,6 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
|
1036
1101
|
}
|
|
1037
1102
|
}
|
|
1038
1103
|
|
|
1039
|
-
impl TryConvert for Wrap<IpcCompression> {
|
|
1040
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1041
|
-
let parsed = match String::try_convert(ob)?.as_str() {
|
|
1042
|
-
"lz4" => IpcCompression::LZ4,
|
|
1043
|
-
"zstd" => IpcCompression::ZSTD,
|
|
1044
|
-
v => {
|
|
1045
|
-
return Err(RbValueError::new_err(format!(
|
|
1046
|
-
"compression must be one of {{'lz4', 'zstd'}}, got {v}"
|
|
1047
|
-
)));
|
|
1048
|
-
}
|
|
1049
|
-
};
|
|
1050
|
-
Ok(Wrap(parsed))
|
|
1051
|
-
}
|
|
1052
|
-
}
|
|
1053
|
-
|
|
1054
1104
|
impl TryConvert for Wrap<SearchSortedSide> {
|
|
1055
1105
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1056
1106
|
let parsed = match String::try_convert(ob)?.as_str() {
|
|
@@ -1153,7 +1203,8 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
|
1153
1203
|
}
|
|
1154
1204
|
|
|
1155
1205
|
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
|
1156
|
-
let out = CloudOptions::from_untyped_config(uri, kv)
|
|
1206
|
+
let out = CloudOptions::from_untyped_config(CloudScheme::from_uri(uri).as_ref(), kv)
|
|
1207
|
+
.map_err(RbPolarsErr::from)?;
|
|
1157
1208
|
Ok(out)
|
|
1158
1209
|
}
|
|
1159
1210
|
|
|
@@ -1180,7 +1231,121 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
|
|
|
1180
1231
|
let out = Wrap(CastColumnsPolicy::ERROR_ON_MISMATCH);
|
|
1181
1232
|
return Ok(out);
|
|
1182
1233
|
}
|
|
1183
|
-
|
|
1234
|
+
|
|
1235
|
+
let integer_upcast = match &*ob.funcall::<_, _, String>("integer_cast", ())? {
|
|
1236
|
+
"upcast" => true,
|
|
1237
|
+
"forbid" => false,
|
|
1238
|
+
v => {
|
|
1239
|
+
return Err(RbValueError::new_err(format!(
|
|
1240
|
+
"unknown option for integer_cast: {v}"
|
|
1241
|
+
)));
|
|
1242
|
+
}
|
|
1243
|
+
};
|
|
1244
|
+
|
|
1245
|
+
let mut float_upcast = false;
|
|
1246
|
+
let mut float_downcast = false;
|
|
1247
|
+
|
|
1248
|
+
let float_cast_object: Value = ob.funcall("float_cast", ())?;
|
|
1249
|
+
|
|
1250
|
+
parse_multiple_options("float_cast", float_cast_object, |v| {
|
|
1251
|
+
match v {
|
|
1252
|
+
"forbid" => {}
|
|
1253
|
+
"upcast" => float_upcast = true,
|
|
1254
|
+
"downcast" => float_downcast = true,
|
|
1255
|
+
v => {
|
|
1256
|
+
return Err(RbValueError::new_err(format!(
|
|
1257
|
+
"unknown option for float_cast: {v}"
|
|
1258
|
+
)));
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
Ok(())
|
|
1263
|
+
})?;
|
|
1264
|
+
|
|
1265
|
+
let mut datetime_nanoseconds_downcast = false;
|
|
1266
|
+
let mut datetime_convert_timezone = false;
|
|
1267
|
+
|
|
1268
|
+
let datetime_cast_object: Value = ob.funcall("datetime_cast", ())?;
|
|
1269
|
+
|
|
1270
|
+
parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
|
|
1271
|
+
match v {
|
|
1272
|
+
"forbid" => {}
|
|
1273
|
+
"nanosecond-downcast" => datetime_nanoseconds_downcast = true,
|
|
1274
|
+
"convert-timezone" => datetime_convert_timezone = true,
|
|
1275
|
+
v => {
|
|
1276
|
+
return Err(RbValueError::new_err(format!(
|
|
1277
|
+
"unknown option for datetime_cast: {v}"
|
|
1278
|
+
)));
|
|
1279
|
+
}
|
|
1280
|
+
};
|
|
1281
|
+
|
|
1282
|
+
Ok(())
|
|
1283
|
+
})?;
|
|
1284
|
+
|
|
1285
|
+
let missing_struct_fields =
|
|
1286
|
+
match &*ob.funcall::<_, _, String>("missing_struct_fields", ())? {
|
|
1287
|
+
"insert" => MissingColumnsPolicy::Insert,
|
|
1288
|
+
"raise" => MissingColumnsPolicy::Raise,
|
|
1289
|
+
v => {
|
|
1290
|
+
return Err(RbValueError::new_err(format!(
|
|
1291
|
+
"unknown option for missing_struct_fields: {v}"
|
|
1292
|
+
)));
|
|
1293
|
+
}
|
|
1294
|
+
};
|
|
1295
|
+
|
|
1296
|
+
let extra_struct_fields = match &*ob.funcall::<_, _, String>("extra_struct_fields", ())? {
|
|
1297
|
+
"ignore" => ExtraColumnsPolicy::Ignore,
|
|
1298
|
+
"raise" => ExtraColumnsPolicy::Raise,
|
|
1299
|
+
v => {
|
|
1300
|
+
return Err(RbValueError::new_err(format!(
|
|
1301
|
+
"unknown option for extra_struct_fields: {v}"
|
|
1302
|
+
)));
|
|
1303
|
+
}
|
|
1304
|
+
};
|
|
1305
|
+
|
|
1306
|
+
let categorical_to_string =
|
|
1307
|
+
match &*ob.funcall::<_, _, String>("categorical_to_string", ())? {
|
|
1308
|
+
"allow" => true,
|
|
1309
|
+
"forbid" => false,
|
|
1310
|
+
v => {
|
|
1311
|
+
return Err(RbValueError::new_err(format!(
|
|
1312
|
+
"unknown option for categorical_to_string: {v}"
|
|
1313
|
+
)));
|
|
1314
|
+
}
|
|
1315
|
+
};
|
|
1316
|
+
|
|
1317
|
+
return Ok(Wrap(CastColumnsPolicy {
|
|
1318
|
+
integer_upcast,
|
|
1319
|
+
float_upcast,
|
|
1320
|
+
float_downcast,
|
|
1321
|
+
datetime_nanoseconds_downcast,
|
|
1322
|
+
datetime_microseconds_downcast: false,
|
|
1323
|
+
datetime_convert_timezone,
|
|
1324
|
+
null_upcast: true,
|
|
1325
|
+
categorical_to_string,
|
|
1326
|
+
missing_struct_fields,
|
|
1327
|
+
extra_struct_fields,
|
|
1328
|
+
}));
|
|
1329
|
+
|
|
1330
|
+
fn parse_multiple_options(
|
|
1331
|
+
parameter_name: &'static str,
|
|
1332
|
+
rb_object: Value,
|
|
1333
|
+
mut parser_func: impl FnMut(&str) -> RbResult<()>,
|
|
1334
|
+
) -> RbResult<()> {
|
|
1335
|
+
if let Ok(v) = String::try_convert(rb_object) {
|
|
1336
|
+
parser_func(&v)?;
|
|
1337
|
+
} else if let Ok(v) = RArray::try_convert(rb_object) {
|
|
1338
|
+
for v in v {
|
|
1339
|
+
parser_func(&String::try_convert(v)?)?;
|
|
1340
|
+
}
|
|
1341
|
+
} else {
|
|
1342
|
+
return Err(RbValueError::new_err(format!(
|
|
1343
|
+
"unknown type for {parameter_name}: {rb_object}"
|
|
1344
|
+
)));
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
Ok(())
|
|
1348
|
+
}
|
|
1184
1349
|
}
|
|
1185
1350
|
}
|
|
1186
1351
|
|
|
@@ -1198,7 +1363,7 @@ pub fn parse_fill_null_strategy(
|
|
|
1198
1363
|
"one" => FillNullStrategy::One,
|
|
1199
1364
|
e => {
|
|
1200
1365
|
return Err(magnus::Error::new(
|
|
1201
|
-
|
|
1366
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
1202
1367
|
format!(
|
|
1203
1368
|
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
|
|
1204
1369
|
),
|
|
@@ -1249,15 +1414,6 @@ pub fn parse_parquet_compression(
|
|
|
1249
1414
|
Ok(parsed)
|
|
1250
1415
|
}
|
|
1251
1416
|
|
|
1252
|
-
impl TryConvert for Wrap<NonZeroUsize> {
|
|
1253
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1254
|
-
let v = usize::try_convert(ob)?;
|
|
1255
|
-
NonZeroUsize::new(v)
|
|
1256
|
-
.map(Wrap)
|
|
1257
|
-
.ok_or(RbValueError::new_err("must be non-zero"))
|
|
1258
|
-
}
|
|
1259
|
-
}
|
|
1260
|
-
|
|
1261
1417
|
pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
|
|
1262
1418
|
where
|
|
1263
1419
|
I: IntoIterator<Item = S>,
|
|
@@ -1411,3 +1567,40 @@ impl TryConvert for Wrap<DeletionFilesList> {
|
|
|
1411
1567
|
}))
|
|
1412
1568
|
}
|
|
1413
1569
|
}
|
|
1570
|
+
|
|
1571
|
+
impl TryConvert for Wrap<DefaultFieldValues> {
|
|
1572
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1573
|
+
let (default_values_type, ob) = <(String, Value)>::try_convert(ob)?;
|
|
1574
|
+
|
|
1575
|
+
Ok(Wrap(match &*default_values_type {
|
|
1576
|
+
"iceberg" => {
|
|
1577
|
+
let dict = RHash::try_convert(ob)?;
|
|
1578
|
+
|
|
1579
|
+
let mut out = PlIndexMap::new();
|
|
1580
|
+
|
|
1581
|
+
dict.foreach(|k: u32, v: Value| {
|
|
1582
|
+
let v: Result<Column, String> = if let Ok(s) = get_series(v) {
|
|
1583
|
+
Ok(s.into_column())
|
|
1584
|
+
} else {
|
|
1585
|
+
let err_msg = String::try_convert(v)?;
|
|
1586
|
+
Err(err_msg)
|
|
1587
|
+
};
|
|
1588
|
+
|
|
1589
|
+
out.insert(k, v);
|
|
1590
|
+
|
|
1591
|
+
Ok(ForEach::Continue)
|
|
1592
|
+
})?;
|
|
1593
|
+
|
|
1594
|
+
DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
|
|
1595
|
+
out,
|
|
1596
|
+
)))
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
v => {
|
|
1600
|
+
return Err(RbValueError::new_err(format!(
|
|
1601
|
+
"unknown deletion file type: {v}"
|
|
1602
|
+
)));
|
|
1603
|
+
}
|
|
1604
|
+
}))
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{RArray, RHash, Symbol, Value, prelude::*, r_hash::ForEach};
|
|
1
|
+
use magnus::{RArray, RHash, Ruby, Symbol, Value, prelude::*, r_hash::ForEach};
|
|
2
2
|
use polars::frame::row::{Row, rows_to_schema_supertypes, rows_to_supertypes};
|
|
3
3
|
use polars::prelude::*;
|
|
4
4
|
|
|
@@ -54,7 +54,7 @@ fn finish_from_rows(
|
|
|
54
54
|
schema_overrides: Option<Schema>,
|
|
55
55
|
infer_schema_length: Option<usize>,
|
|
56
56
|
) -> RbResult<RbDataFrame> {
|
|
57
|
-
let
|
|
57
|
+
let schema = if let Some(mut schema) = schema {
|
|
58
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
|
59
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
|
60
60
|
schema
|
|
@@ -62,11 +62,6 @@ fn finish_from_rows(
|
|
|
62
62
|
rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?
|
|
63
63
|
};
|
|
64
64
|
|
|
65
|
-
// TODO: Remove this step when Decimals are supported properly.
|
|
66
|
-
// Erasing the decimal precision/scale here will just require us to infer it again later.
|
|
67
|
-
// https://github.com/pola-rs/polars/issues/14427
|
|
68
|
-
erase_decimal_precision_scale(&mut schema);
|
|
69
|
-
|
|
70
65
|
let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
|
|
71
66
|
Ok(df.into())
|
|
72
67
|
}
|
|
@@ -106,14 +101,6 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
|
|
|
106
101
|
}
|
|
107
102
|
}
|
|
108
103
|
|
|
109
|
-
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
|
110
|
-
for dtype in schema.iter_values_mut() {
|
|
111
|
-
if let DataType::Decimal(_, _) = dtype {
|
|
112
|
-
*dtype = DataType::Decimal(None, None)
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
104
|
fn columns_names_to_empty_schema<'a, I>(column_names: I) -> Schema
|
|
118
105
|
where
|
|
119
106
|
I: IntoIterator<Item = &'a str>,
|
|
@@ -125,6 +112,7 @@ where
|
|
|
125
112
|
}
|
|
126
113
|
|
|
127
114
|
fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
|
|
115
|
+
let ruby = Ruby::get_with(*data);
|
|
128
116
|
let (data, len) = get_rbseq(*data)?;
|
|
129
117
|
let mut rows = Vec::with_capacity(len);
|
|
130
118
|
for d in data.into_iter() {
|
|
@@ -132,8 +120,8 @@ fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResu
|
|
|
132
120
|
|
|
133
121
|
let mut row = Vec::with_capacity(names.len());
|
|
134
122
|
for k in names.iter() {
|
|
135
|
-
// TODO improve performance
|
|
136
|
-
let val = match d.get(k.clone()).or_else(|| d.get(
|
|
123
|
+
// TODO improve performance (must work with GC)
|
|
124
|
+
let val = match d.get(k.clone()).or_else(|| d.get(ruby.to_symbol(k))) {
|
|
137
125
|
None => AnyValue::Null,
|
|
138
126
|
Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
|
|
139
127
|
};
|