polars-df 0.20.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +19 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +275 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +2 -1
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +37 -0
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +106 -22
- data/ext/polars/src/expr/list.rs +45 -2
- data/ext/polars/src/expr/meta.rs +5 -28
- data/ext/polars/src/expr/mod.rs +4 -1
- data/ext/polars/src/expr/name.rs +10 -2
- data/ext/polars/src/expr/rolling.rs +21 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +73 -6
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +21 -11
- data/ext/polars/src/functions/lazy.rs +26 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +124 -111
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +216 -29
- data/ext/polars/src/map/dataframe.rs +9 -9
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +9 -9
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +75 -210
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +20 -2
data/ext/polars/src/map/mod.rs
CHANGED
@@ -2,27 +2,37 @@ pub mod dataframe;
|
|
2
2
|
pub mod lazy;
|
3
3
|
pub mod series;
|
4
4
|
|
5
|
-
use magnus::{
|
5
|
+
use magnus::{RHash, Value, prelude::*};
|
6
6
|
use polars::chunked_array::builder::get_list_builder;
|
7
7
|
use polars::prelude::*;
|
8
|
-
use polars_core::utils::CustomIterTools;
|
9
8
|
use polars_core::POOL;
|
9
|
+
use polars_core::utils::CustomIterTools;
|
10
10
|
use rayon::prelude::*;
|
11
11
|
|
12
|
-
use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
|
12
|
+
use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, RbValueError, Wrap};
|
13
|
+
|
14
|
+
pub trait RbPolarsNumericType: PolarsNumericType {}
|
13
15
|
|
14
|
-
|
16
|
+
impl RbPolarsNumericType for UInt8Type {}
|
17
|
+
impl RbPolarsNumericType for UInt16Type {}
|
18
|
+
impl RbPolarsNumericType for UInt32Type {}
|
19
|
+
impl RbPolarsNumericType for UInt64Type {}
|
20
|
+
impl RbPolarsNumericType for Int8Type {}
|
21
|
+
impl RbPolarsNumericType for Int16Type {}
|
22
|
+
impl RbPolarsNumericType for Int32Type {}
|
23
|
+
impl RbPolarsNumericType for Int64Type {}
|
24
|
+
impl RbPolarsNumericType for Float32Type {}
|
25
|
+
impl RbPolarsNumericType for Float64Type {}
|
15
26
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
impl RbArrowPrimitiveType for Float64Type {}
|
27
|
+
pub(super) fn check_nested_object(dt: &DataType) -> RbResult<()> {
|
28
|
+
if dt.contains_objects() {
|
29
|
+
Err(RbValueError::new_err(
|
30
|
+
"nested objects are not allowed\n\nSet `return_dtype: Polars::Object` to use Ruby's native nesting.",
|
31
|
+
))
|
32
|
+
} else {
|
33
|
+
Ok(())
|
34
|
+
}
|
35
|
+
}
|
26
36
|
|
27
37
|
fn iterator_to_struct(
|
28
38
|
it: impl Iterator<Item = Option<Value>>,
|
@@ -37,7 +47,7 @@ fn iterator_to_struct(
|
|
37
47
|
_ => {
|
38
48
|
return Err(crate::exceptions::ComputeError::new_err(format!(
|
39
49
|
"expected struct got {first_value:?}",
|
40
|
-
)))
|
50
|
+
)));
|
41
51
|
}
|
42
52
|
};
|
43
53
|
|
@@ -70,9 +80,11 @@ fn iterator_to_struct(
|
|
70
80
|
Some(dict) => {
|
71
81
|
let dict = RHash::try_convert(dict)?;
|
72
82
|
if dict.len() != struct_width {
|
73
|
-
return Err(crate::exceptions::ComputeError::new_err(
|
74
|
-
|
75
|
-
|
83
|
+
return Err(crate::exceptions::ComputeError::new_err(format!(
|
84
|
+
"Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.",
|
85
|
+
struct_width,
|
86
|
+
dict.len()
|
87
|
+
)));
|
76
88
|
}
|
77
89
|
// we ignore the keys of the rest of the dicts
|
78
90
|
// the first item determines the output name
|
@@ -109,7 +121,7 @@ fn iterator_to_primitive<T>(
|
|
109
121
|
capacity: usize,
|
110
122
|
) -> ChunkedArray<T>
|
111
123
|
where
|
112
|
-
T:
|
124
|
+
T: RbPolarsNumericType,
|
113
125
|
{
|
114
126
|
// safety: we know the iterators len
|
115
127
|
let mut ca: ChunkedArray<T> = unsafe {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, prelude::*, typed_data::Obj
|
1
|
+
use magnus::{IntoValue, TryConvert, Value, class, prelude::*, typed_data::Obj};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
|
|
85
85
|
first_value: Option<D::Native>,
|
86
86
|
) -> RbResult<ChunkedArray<D>>
|
87
87
|
where
|
88
|
-
D:
|
88
|
+
D: RbPolarsNumericType,
|
89
89
|
D::Native: IntoValue + TryConvert;
|
90
90
|
|
91
91
|
/// Apply a lambda with a boolean output type
|
@@ -143,7 +143,7 @@ where
|
|
143
143
|
{
|
144
144
|
match call_lambda(lambda, in_val) {
|
145
145
|
Ok(out) => S::try_convert(out),
|
146
|
-
Err(e) => panic!("ruby function failed {}"
|
146
|
+
Err(e) => panic!("ruby function failed {e}"),
|
147
147
|
}
|
148
148
|
}
|
149
149
|
|
@@ -219,7 +219,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
219
219
|
first_value: Option<D::Native>,
|
220
220
|
) -> RbResult<ChunkedArray<D>>
|
221
221
|
where
|
222
|
-
D:
|
222
|
+
D: RbPolarsNumericType,
|
223
223
|
D::Native: IntoValue + TryConvert,
|
224
224
|
{
|
225
225
|
let skip = usize::from(first_value.is_some());
|
@@ -438,7 +438,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
438
438
|
|
439
439
|
impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
|
440
440
|
where
|
441
|
-
T:
|
441
|
+
T: RbPolarsNumericType + PolarsNumericType,
|
442
442
|
T::Native: IntoValue + TryConvert,
|
443
443
|
ChunkedArray<T>: IntoSeries,
|
444
444
|
{
|
@@ -503,7 +503,7 @@ where
|
|
503
503
|
first_value: Option<D::Native>,
|
504
504
|
) -> RbResult<ChunkedArray<D>>
|
505
505
|
where
|
506
|
-
D:
|
506
|
+
D: RbPolarsNumericType,
|
507
507
|
D::Native: IntoValue + TryConvert,
|
508
508
|
{
|
509
509
|
let skip = usize::from(first_value.is_some());
|
@@ -782,7 +782,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
782
782
|
first_value: Option<D::Native>,
|
783
783
|
) -> RbResult<ChunkedArray<D>>
|
784
784
|
where
|
785
|
-
D:
|
785
|
+
D: RbPolarsNumericType,
|
786
786
|
D::Native: IntoValue + TryConvert,
|
787
787
|
{
|
788
788
|
let skip = usize::from(first_value.is_some());
|
@@ -999,7 +999,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
999
999
|
}
|
1000
1000
|
}
|
1001
1001
|
|
1002
|
-
fn iter_struct(ca: &StructChunked) -> impl Iterator<Item = AnyValue
|
1002
|
+
fn iter_struct(ca: &StructChunked) -> impl Iterator<Item = AnyValue<'_>> {
|
1003
1003
|
(0..ca.len()).map(|i| unsafe { ca.get_any_value_unchecked(i) })
|
1004
1004
|
}
|
1005
1005
|
|
@@ -1047,7 +1047,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1047
1047
|
first_value: Option<D::Native>,
|
1048
1048
|
) -> RbResult<ChunkedArray<D>>
|
1049
1049
|
where
|
1050
|
-
D:
|
1050
|
+
D: RbPolarsNumericType,
|
1051
1051
|
D::Native: IntoValue + TryConvert,
|
1052
1052
|
{
|
1053
1053
|
let skip = usize::from(first_value.is_some());
|
@@ -9,8 +9,8 @@ use polars_core::chunked_array::object::registry;
|
|
9
9
|
use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
|
10
10
|
use polars_core::prelude::AnyValue;
|
11
11
|
|
12
|
-
use crate::prelude::ObjectValue;
|
13
12
|
use crate::Wrap;
|
13
|
+
use crate::prelude::ObjectValue;
|
14
14
|
|
15
15
|
static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new();
|
16
16
|
|
@@ -39,6 +39,9 @@ pub(crate) fn register_startup_deps() {
|
|
39
39
|
object_converter,
|
40
40
|
rbobject_converter,
|
41
41
|
physical_dtype,
|
42
|
-
)
|
42
|
+
);
|
43
|
+
// TODO
|
44
|
+
// Register warning function for `polars_warn!`.
|
45
|
+
// polars_error::set_warning_function(warning_function);
|
43
46
|
});
|
44
47
|
}
|
@@ -3,6 +3,10 @@ use crate::prelude::*;
|
|
3
3
|
use crate::{RbResult, RbSeries};
|
4
4
|
use magnus::{IntoValue, Value};
|
5
5
|
|
6
|
+
fn scalar_to_rb(scalar: RbResult<Scalar>) -> RbResult<Value> {
|
7
|
+
Ok(Wrap(scalar?.as_any_value()).into_value())
|
8
|
+
}
|
9
|
+
|
6
10
|
impl RbSeries {
|
7
11
|
pub fn any(&self, ignore_nulls: bool) -> RbResult<Option<bool>> {
|
8
12
|
let binding = self.series.borrow();
|
@@ -118,4 +122,44 @@ impl RbSeries {
|
|
118
122
|
)
|
119
123
|
.into_value())
|
120
124
|
}
|
125
|
+
|
126
|
+
pub fn first(&self) -> RbResult<Value> {
|
127
|
+
scalar_to_rb(Ok(self.series.borrow().first()))
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn last(&self) -> RbResult<Value> {
|
131
|
+
scalar_to_rb(Ok(self.series.borrow().last()))
|
132
|
+
}
|
133
|
+
|
134
|
+
pub fn approx_n_unique(&self) -> RbResult<IdxSize> {
|
135
|
+
Ok(self
|
136
|
+
.series
|
137
|
+
.borrow()
|
138
|
+
.approx_n_unique()
|
139
|
+
.map_err(RbPolarsErr::from)?)
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn bitwise_and(&self) -> RbResult<Value> {
|
143
|
+
scalar_to_rb(Ok(self
|
144
|
+
.series
|
145
|
+
.borrow()
|
146
|
+
.and_reduce()
|
147
|
+
.map_err(RbPolarsErr::from)?))
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn bitwise_or(&self) -> RbResult<Value> {
|
151
|
+
scalar_to_rb(Ok(self
|
152
|
+
.series
|
153
|
+
.borrow()
|
154
|
+
.or_reduce()
|
155
|
+
.map_err(RbPolarsErr::from)?))
|
156
|
+
}
|
157
|
+
|
158
|
+
pub fn bitwise_xor(&self) -> RbResult<Value> {
|
159
|
+
scalar_to_rb(Ok(self
|
160
|
+
.series
|
161
|
+
.borrow()
|
162
|
+
.xor_reduce()
|
163
|
+
.map_err(RbPolarsErr::from)?))
|
164
|
+
}
|
121
165
|
}
|
@@ -1,8 +1,8 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{RArray, RString, prelude::*};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
4
|
use crate::any_value::rb_object_to_any_value;
|
5
|
-
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped
|
5
|
+
use crate::conversion::{Wrap, slice_extract_wrapped, vec_extract_wrapped};
|
6
6
|
use crate::prelude::ObjectValue;
|
7
7
|
use crate::series::to_series;
|
8
8
|
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -122,13 +122,17 @@ impl RbSeries {
|
|
122
122
|
.into_iter()
|
123
123
|
.map(|v| rb_object_to_any_value(v, strict))
|
124
124
|
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
-
let s =
|
126
|
-
|
127
|
-
|
128
|
-
|
125
|
+
let s = Series::from_any_values_and_dtype(
|
126
|
+
name.into(),
|
127
|
+
any_values.as_slice(),
|
128
|
+
&dtype.0,
|
129
|
+
strict,
|
130
|
+
)
|
131
|
+
.map_err(|e| {
|
132
|
+
RbTypeError::new_err(format!(
|
129
133
|
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
130
134
|
))
|
131
|
-
|
135
|
+
})?;
|
132
136
|
Ok(s.into())
|
133
137
|
}
|
134
138
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{IntoValue, RArray, Value, value::qnil};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
-
use crate::prelude::*;
|
5
4
|
use crate::RbSeries;
|
5
|
+
use crate::prelude::*;
|
6
6
|
|
7
7
|
impl RbSeries {
|
8
8
|
/// Convert this Series to a Ruby array.
|
@@ -25,7 +25,9 @@ impl RbSeries {
|
|
25
25
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
26
26
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
27
27
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
28
|
-
|
28
|
+
with_match_categorical_physical_type!(series.dtype().cat_physical().unwrap(), |$C| {
|
29
|
+
RArray::from_iter(series.cat::<$C>().unwrap().iter_str()).into_value()
|
30
|
+
})
|
29
31
|
}
|
30
32
|
DataType::Object(_) => {
|
31
33
|
let v = RArray::with_capacity(series.len());
|
@@ -132,7 +134,7 @@ impl RbSeries {
|
|
132
134
|
unreachable!()
|
133
135
|
}
|
134
136
|
};
|
135
|
-
rblist
|
137
|
+
rblist.into_value()
|
136
138
|
}
|
137
139
|
|
138
140
|
to_a_recursive(series)
|
@@ -1,10 +1,11 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{Error, IntoValue, RArray, Value, exception, value::ReprValue};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars::series::IsSorted;
|
4
|
+
use polars_core::utils::flatten::flatten_series;
|
4
5
|
|
5
|
-
use crate::apply_method_all_arrow_series2;
|
6
6
|
use crate::conversion::*;
|
7
|
-
use crate::
|
7
|
+
use crate::exceptions::RbIndexError;
|
8
|
+
use crate::rb_modules;
|
8
9
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
|
9
10
|
|
10
11
|
impl RbSeries {
|
@@ -42,21 +43,15 @@ impl RbSeries {
|
|
42
43
|
}
|
43
44
|
|
44
45
|
pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
|
45
|
-
|
46
|
-
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
47
|
-
Ok(ca.uses_lexical_ordering())
|
46
|
+
Ok(true)
|
48
47
|
}
|
49
48
|
|
50
49
|
pub fn cat_is_local(&self) -> RbResult<bool> {
|
51
|
-
|
52
|
-
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
53
|
-
Ok(ca.get_rev_map().is_local())
|
50
|
+
Ok(false)
|
54
51
|
}
|
55
52
|
|
56
53
|
pub fn cat_to_local(&self) -> RbResult<Self> {
|
57
|
-
|
58
|
-
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
59
|
-
Ok(ca.to_local().into_series().into())
|
54
|
+
Ok(self.clone())
|
60
55
|
}
|
61
56
|
|
62
57
|
pub fn estimated_size(&self) -> usize {
|
@@ -75,7 +70,7 @@ impl RbSeries {
|
|
75
70
|
if val == v_trunc {
|
76
71
|
val
|
77
72
|
} else {
|
78
|
-
format!("{}…"
|
73
|
+
format!("{v_trunc}…")
|
79
74
|
}
|
80
75
|
} else {
|
81
76
|
val
|
@@ -92,8 +87,39 @@ impl RbSeries {
|
|
92
87
|
}
|
93
88
|
}
|
94
89
|
|
95
|
-
pub fn
|
96
|
-
|
90
|
+
pub fn get_index(&self, index: usize) -> RbResult<Value> {
|
91
|
+
let binding = self.series.borrow();
|
92
|
+
let av = match binding.get(index) {
|
93
|
+
Ok(v) => v,
|
94
|
+
Err(PolarsError::OutOfBounds(err)) => {
|
95
|
+
return Err(RbIndexError::new_err(err.to_string()));
|
96
|
+
}
|
97
|
+
Err(e) => return Err(RbPolarsErr::from(e).into()),
|
98
|
+
};
|
99
|
+
|
100
|
+
match av {
|
101
|
+
AnyValue::List(s) | AnyValue::Array(s, _) => {
|
102
|
+
let rbseries = RbSeries::new(s);
|
103
|
+
rb_modules::utils().funcall("wrap_s", (rbseries,))
|
104
|
+
}
|
105
|
+
_ => Ok(Wrap(av).into_value()),
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
pub fn get_index_signed(&self, index: isize) -> RbResult<Value> {
|
110
|
+
let index = if index < 0 {
|
111
|
+
match self.len().checked_sub(index.unsigned_abs()) {
|
112
|
+
Some(v) => v,
|
113
|
+
None => {
|
114
|
+
return Err(RbIndexError::new_err(
|
115
|
+
polars_err!(oob = index, self.len()).to_string(),
|
116
|
+
));
|
117
|
+
}
|
118
|
+
}
|
119
|
+
} else {
|
120
|
+
usize::try_from(index).unwrap()
|
121
|
+
};
|
122
|
+
self.get_index(index)
|
97
123
|
}
|
98
124
|
|
99
125
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
@@ -287,7 +313,7 @@ impl RbSeries {
|
|
287
313
|
}
|
288
314
|
}
|
289
315
|
|
290
|
-
pub fn
|
316
|
+
pub fn not_(&self) -> RbResult<Self> {
|
291
317
|
let binding = self.series.borrow();
|
292
318
|
let bool = binding.bool().map_err(RbPolarsErr::from)?;
|
293
319
|
Ok((!bool).into_series().into())
|
@@ -305,198 +331,6 @@ impl RbSeries {
|
|
305
331
|
RbSeries::new(self.series.borrow().clone())
|
306
332
|
}
|
307
333
|
|
308
|
-
pub fn apply_lambda(
|
309
|
-
&self,
|
310
|
-
lambda: Value,
|
311
|
-
output_type: Option<Wrap<DataType>>,
|
312
|
-
skip_nulls: bool,
|
313
|
-
) -> RbResult<Self> {
|
314
|
-
let series = &self.series.borrow();
|
315
|
-
|
316
|
-
let output_type = output_type.map(|dt| dt.0);
|
317
|
-
|
318
|
-
macro_rules! dispatch_apply {
|
319
|
-
($self:expr, $method:ident, $($args:expr),*) => {
|
320
|
-
if matches!($self.dtype(), DataType::Object(_)) {
|
321
|
-
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
322
|
-
// ca.$method($($args),*)
|
323
|
-
todo!()
|
324
|
-
} else {
|
325
|
-
apply_method_all_arrow_series2!(
|
326
|
-
$self,
|
327
|
-
$method,
|
328
|
-
$($args),*
|
329
|
-
)
|
330
|
-
}
|
331
|
-
|
332
|
-
}
|
333
|
-
|
334
|
-
}
|
335
|
-
|
336
|
-
if matches!(
|
337
|
-
series.dtype(),
|
338
|
-
DataType::Datetime(_, _)
|
339
|
-
| DataType::Date
|
340
|
-
| DataType::Duration(_)
|
341
|
-
| DataType::Categorical(_, _)
|
342
|
-
| DataType::Time
|
343
|
-
) || !skip_nulls
|
344
|
-
{
|
345
|
-
let mut avs = Vec::with_capacity(series.len());
|
346
|
-
let iter = series.iter().map(|av| {
|
347
|
-
let input = Wrap(av);
|
348
|
-
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
349
|
-
.unwrap()
|
350
|
-
.0
|
351
|
-
});
|
352
|
-
avs.extend(iter);
|
353
|
-
return Ok(Series::new(self.name().into(), &avs).into());
|
354
|
-
}
|
355
|
-
|
356
|
-
let out = match output_type {
|
357
|
-
Some(DataType::Int8) => {
|
358
|
-
let ca: Int8Chunked = dispatch_apply!(
|
359
|
-
series,
|
360
|
-
apply_lambda_with_primitive_out_type,
|
361
|
-
lambda,
|
362
|
-
0,
|
363
|
-
None
|
364
|
-
)?;
|
365
|
-
ca.into_series()
|
366
|
-
}
|
367
|
-
Some(DataType::Int16) => {
|
368
|
-
let ca: Int16Chunked = dispatch_apply!(
|
369
|
-
series,
|
370
|
-
apply_lambda_with_primitive_out_type,
|
371
|
-
lambda,
|
372
|
-
0,
|
373
|
-
None
|
374
|
-
)?;
|
375
|
-
ca.into_series()
|
376
|
-
}
|
377
|
-
Some(DataType::Int32) => {
|
378
|
-
let ca: Int32Chunked = dispatch_apply!(
|
379
|
-
series,
|
380
|
-
apply_lambda_with_primitive_out_type,
|
381
|
-
lambda,
|
382
|
-
0,
|
383
|
-
None
|
384
|
-
)?;
|
385
|
-
ca.into_series()
|
386
|
-
}
|
387
|
-
Some(DataType::Int64) => {
|
388
|
-
let ca: Int64Chunked = dispatch_apply!(
|
389
|
-
series,
|
390
|
-
apply_lambda_with_primitive_out_type,
|
391
|
-
lambda,
|
392
|
-
0,
|
393
|
-
None
|
394
|
-
)?;
|
395
|
-
ca.into_series()
|
396
|
-
}
|
397
|
-
Some(DataType::UInt8) => {
|
398
|
-
let ca: UInt8Chunked = dispatch_apply!(
|
399
|
-
series,
|
400
|
-
apply_lambda_with_primitive_out_type,
|
401
|
-
lambda,
|
402
|
-
0,
|
403
|
-
None
|
404
|
-
)?;
|
405
|
-
ca.into_series()
|
406
|
-
}
|
407
|
-
Some(DataType::UInt16) => {
|
408
|
-
let ca: UInt16Chunked = dispatch_apply!(
|
409
|
-
series,
|
410
|
-
apply_lambda_with_primitive_out_type,
|
411
|
-
lambda,
|
412
|
-
0,
|
413
|
-
None
|
414
|
-
)?;
|
415
|
-
ca.into_series()
|
416
|
-
}
|
417
|
-
Some(DataType::UInt32) => {
|
418
|
-
let ca: UInt32Chunked = dispatch_apply!(
|
419
|
-
series,
|
420
|
-
apply_lambda_with_primitive_out_type,
|
421
|
-
lambda,
|
422
|
-
0,
|
423
|
-
None
|
424
|
-
)?;
|
425
|
-
ca.into_series()
|
426
|
-
}
|
427
|
-
Some(DataType::UInt64) => {
|
428
|
-
let ca: UInt64Chunked = dispatch_apply!(
|
429
|
-
series,
|
430
|
-
apply_lambda_with_primitive_out_type,
|
431
|
-
lambda,
|
432
|
-
0,
|
433
|
-
None
|
434
|
-
)?;
|
435
|
-
ca.into_series()
|
436
|
-
}
|
437
|
-
Some(DataType::Float32) => {
|
438
|
-
let ca: Float32Chunked = dispatch_apply!(
|
439
|
-
series,
|
440
|
-
apply_lambda_with_primitive_out_type,
|
441
|
-
lambda,
|
442
|
-
0,
|
443
|
-
None
|
444
|
-
)?;
|
445
|
-
ca.into_series()
|
446
|
-
}
|
447
|
-
Some(DataType::Float64) => {
|
448
|
-
let ca: Float64Chunked = dispatch_apply!(
|
449
|
-
series,
|
450
|
-
apply_lambda_with_primitive_out_type,
|
451
|
-
lambda,
|
452
|
-
0,
|
453
|
-
None
|
454
|
-
)?;
|
455
|
-
ca.into_series()
|
456
|
-
}
|
457
|
-
Some(DataType::Boolean) => {
|
458
|
-
let ca: BooleanChunked =
|
459
|
-
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
460
|
-
ca.into_series()
|
461
|
-
}
|
462
|
-
Some(DataType::Date) => {
|
463
|
-
let ca: Int32Chunked = dispatch_apply!(
|
464
|
-
series,
|
465
|
-
apply_lambda_with_primitive_out_type,
|
466
|
-
lambda,
|
467
|
-
0,
|
468
|
-
None
|
469
|
-
)?;
|
470
|
-
ca.into_date().into_series()
|
471
|
-
}
|
472
|
-
Some(DataType::Datetime(tu, tz)) => {
|
473
|
-
let ca: Int64Chunked = dispatch_apply!(
|
474
|
-
series,
|
475
|
-
apply_lambda_with_primitive_out_type,
|
476
|
-
lambda,
|
477
|
-
0,
|
478
|
-
None
|
479
|
-
)?;
|
480
|
-
ca.into_datetime(tu, tz).into_series()
|
481
|
-
}
|
482
|
-
Some(DataType::String) => {
|
483
|
-
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
484
|
-
|
485
|
-
ca.into_series()
|
486
|
-
}
|
487
|
-
Some(DataType::Object(_)) => {
|
488
|
-
let ca =
|
489
|
-
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
490
|
-
ca.into_series()
|
491
|
-
}
|
492
|
-
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
493
|
-
|
494
|
-
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
495
|
-
};
|
496
|
-
|
497
|
-
Ok(RbSeries::new(out))
|
498
|
-
}
|
499
|
-
|
500
334
|
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
501
335
|
let binding = mask.series.borrow();
|
502
336
|
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
@@ -508,11 +342,16 @@ impl RbSeries {
|
|
508
342
|
Ok(RbSeries::new(s))
|
509
343
|
}
|
510
344
|
|
511
|
-
pub fn to_dummies(
|
345
|
+
pub fn to_dummies(
|
346
|
+
&self,
|
347
|
+
sep: Option<String>,
|
348
|
+
drop_first: bool,
|
349
|
+
drop_nulls: bool,
|
350
|
+
) -> RbResult<RbDataFrame> {
|
512
351
|
let df = self
|
513
352
|
.series
|
514
353
|
.borrow()
|
515
|
-
.to_dummies(sep.as_deref(), drop_first)
|
354
|
+
.to_dummies(sep.as_deref(), drop_first, drop_nulls)
|
516
355
|
.map_err(RbPolarsErr::from)?;
|
517
356
|
Ok(df.into())
|
518
357
|
}
|
@@ -565,6 +404,32 @@ impl RbSeries {
|
|
565
404
|
Ok(out.into())
|
566
405
|
}
|
567
406
|
|
407
|
+
pub fn get_chunks(&self) -> RbResult<RArray> {
|
408
|
+
flatten_series(&self.series.borrow())
|
409
|
+
.into_iter()
|
410
|
+
.map(|s| rb_modules::utils().funcall::<_, _, Value>("wrap_s", (Self::new(s),)))
|
411
|
+
.collect()
|
412
|
+
}
|
413
|
+
|
414
|
+
pub fn is_sorted(&self, descending: bool, nulls_last: bool) -> RbResult<bool> {
|
415
|
+
let options = SortOptions {
|
416
|
+
descending,
|
417
|
+
nulls_last,
|
418
|
+
multithreaded: true,
|
419
|
+
maintain_order: false,
|
420
|
+
limit: None,
|
421
|
+
};
|
422
|
+
Ok(self
|
423
|
+
.series
|
424
|
+
.borrow()
|
425
|
+
.is_sorted(options)
|
426
|
+
.map_err(RbPolarsErr::from)?)
|
427
|
+
}
|
428
|
+
|
429
|
+
pub fn clear(&self) -> Self {
|
430
|
+
self.series.borrow().clear().into()
|
431
|
+
}
|
432
|
+
|
568
433
|
pub fn time_unit(&self) -> Option<String> {
|
569
434
|
if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
|
570
435
|
Some(
|
@@ -1,13 +1,13 @@
|
|
1
1
|
use arrow::array::Array;
|
2
2
|
use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
3
|
-
use magnus::prelude::*;
|
4
3
|
use magnus::Value;
|
4
|
+
use magnus::prelude::*;
|
5
5
|
use polars::prelude::*;
|
6
6
|
|
7
7
|
use super::RbSeries;
|
8
8
|
|
9
|
-
use crate::exceptions::RbValueError;
|
10
9
|
use crate::RbResult;
|
10
|
+
use crate::exceptions::RbValueError;
|
11
11
|
|
12
12
|
/// Import `arrow_c_stream` across Ruby boundary.
|
13
13
|
fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
|