polars-df 0.25.1 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/Cargo.lock +268 -95
- data/LICENSE.txt +1 -1
- data/README.md +1 -3
- data/ext/polars/Cargo.toml +18 -18
- data/ext/polars/src/catalog/unity.rs +15 -20
- data/ext/polars/src/conversion/any_value.rs +25 -24
- data/ext/polars/src/conversion/chunked_array.rs +58 -56
- data/ext/polars/src/conversion/datetime.rs +58 -7
- data/ext/polars/src/conversion/mod.rs +155 -141
- data/ext/polars/src/dataframe/export.rs +15 -12
- data/ext/polars/src/dataframe/general.rs +5 -4
- data/ext/polars/src/dataframe/map.rs +6 -4
- data/ext/polars/src/error.rs +1 -1
- data/ext/polars/src/expr/array.rs +0 -24
- data/ext/polars/src/expr/datatype.rs +3 -2
- data/ext/polars/src/expr/datetime.rs +4 -4
- data/ext/polars/src/expr/general.rs +27 -15
- data/ext/polars/src/expr/list.rs +0 -26
- data/ext/polars/src/functions/business.rs +2 -2
- data/ext/polars/src/functions/io.rs +4 -3
- data/ext/polars/src/functions/lazy.rs +58 -46
- data/ext/polars/src/functions/meta.rs +6 -5
- data/ext/polars/src/functions/mod.rs +0 -1
- data/ext/polars/src/functions/utils.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +4 -2
- data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
- data/ext/polars/src/io/scan_options.rs +6 -3
- data/ext/polars/src/io/sink_options.rs +2 -0
- data/ext/polars/src/lazyframe/general.rs +28 -13
- data/ext/polars/src/lazyframe/optflags.rs +2 -1
- data/ext/polars/src/lib.rs +14 -33
- data/ext/polars/src/map/lazy.rs +5 -2
- data/ext/polars/src/map/series.rs +19 -18
- data/ext/polars/src/on_startup.rs +16 -7
- data/ext/polars/src/ruby/numo.rs +3 -4
- data/ext/polars/src/ruby/rb_modules.rs +2 -4
- data/ext/polars/src/ruby/ruby_udf.rs +7 -9
- data/ext/polars/src/ruby/utils.rs +12 -1
- data/ext/polars/src/series/aggregation.rs +13 -1
- data/ext/polars/src/series/export.rs +33 -38
- data/ext/polars/src/series/general.rs +4 -3
- data/ext/polars/src/series/map.rs +3 -2
- data/ext/polars/src/series/scatter.rs +4 -4
- data/ext/polars/src/utils.rs +31 -7
- data/lib/polars/array_expr.rb +23 -7
- data/lib/polars/array_name_space.rb +16 -2
- data/lib/polars/binary_name_space.rb +32 -0
- data/lib/polars/data_frame.rb +73 -10
- data/lib/polars/date_time_expr.rb +91 -3
- data/lib/polars/date_time_name_space.rb +7 -1
- data/lib/polars/expr.rb +122 -44
- data/lib/polars/functions/business.rb +2 -2
- data/lib/polars/functions/eager.rb +80 -7
- data/lib/polars/functions/lazy.rb +5 -2
- data/lib/polars/io/csv.rb +27 -5
- data/lib/polars/io/ipc.rb +1 -1
- data/lib/polars/io/lines.rb +4 -4
- data/lib/polars/io/sink_options.rb +4 -2
- data/lib/polars/lazy_frame.rb +97 -14
- data/lib/polars/list_expr.rb +21 -7
- data/lib/polars/list_name_space.rb +16 -2
- data/lib/polars/query_opt_flags.rb +22 -5
- data/lib/polars/selectors.rb +1 -1
- data/lib/polars/series.rb +88 -19
- data/lib/polars/sql_context.rb +2 -2
- data/lib/polars/string_cache.rb +19 -72
- data/lib/polars/string_expr.rb +1 -7
- data/lib/polars/string_name_space.rb +1 -7
- data/lib/polars/utils/construction/series.rb +8 -3
- data/lib/polars/utils/convert.rb +16 -6
- data/lib/polars/utils/parse.rb +7 -0
- data/lib/polars/utils/reduce_balanced.rb +43 -0
- data/lib/polars/utils/various.rb +5 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -1
- metadata +3 -17
- data/ext/polars/src/functions/string_cache.rs +0 -24
|
@@ -2,49 +2,49 @@ use magnus::{IntoValue, Ruby, Value, value::ReprValue};
|
|
|
2
2
|
use polars_core::prelude::*;
|
|
3
3
|
|
|
4
4
|
use crate::prelude::*;
|
|
5
|
+
use crate::ruby::utils::TryIntoValue;
|
|
5
6
|
use crate::{RbPolarsErr, RbResult, RbSeries};
|
|
6
7
|
|
|
7
8
|
impl RbSeries {
|
|
8
9
|
/// Convert this Series to a Ruby array.
|
|
9
10
|
/// This operation copies data.
|
|
10
|
-
pub fn to_a(&
|
|
11
|
-
let series = &
|
|
11
|
+
pub fn to_a(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
|
|
12
|
+
let series = &self_.series.read();
|
|
12
13
|
|
|
13
|
-
fn to_a_recursive(series: &Series) -> RbResult<Value> {
|
|
14
|
-
let ruby = Ruby::get().unwrap();
|
|
14
|
+
fn to_a_recursive(ruby: &Ruby, series: &Series) -> RbResult<Value> {
|
|
15
15
|
let rblist = match series.dtype() {
|
|
16
16
|
DataType::Boolean => ruby
|
|
17
|
-
.ary_from_iter(series.bool().map_err(RbPolarsErr::from)
|
|
17
|
+
.ary_from_iter(series.bool().map_err(RbPolarsErr::from)?.iter())
|
|
18
18
|
.as_value(),
|
|
19
19
|
DataType::UInt8 => ruby
|
|
20
|
-
.ary_from_iter(series.u8().map_err(RbPolarsErr::from)
|
|
20
|
+
.ary_from_iter(series.u8().map_err(RbPolarsErr::from)?.iter())
|
|
21
21
|
.as_value(),
|
|
22
22
|
DataType::UInt16 => ruby
|
|
23
|
-
.ary_from_iter(series.u16().map_err(RbPolarsErr::from)
|
|
23
|
+
.ary_from_iter(series.u16().map_err(RbPolarsErr::from)?.iter())
|
|
24
24
|
.as_value(),
|
|
25
25
|
DataType::UInt32 => ruby
|
|
26
|
-
.ary_from_iter(series.u32().map_err(RbPolarsErr::from)
|
|
26
|
+
.ary_from_iter(series.u32().map_err(RbPolarsErr::from)?.iter())
|
|
27
27
|
.as_value(),
|
|
28
28
|
DataType::UInt64 => ruby
|
|
29
|
-
.ary_from_iter(series.u64().map_err(RbPolarsErr::from)
|
|
29
|
+
.ary_from_iter(series.u64().map_err(RbPolarsErr::from)?.iter())
|
|
30
30
|
.as_value(),
|
|
31
31
|
DataType::UInt128 => ruby
|
|
32
|
-
.ary_from_iter(series.u128().map_err(RbPolarsErr::from)
|
|
32
|
+
.ary_from_iter(series.u128().map_err(RbPolarsErr::from)?.iter())
|
|
33
33
|
.as_value(),
|
|
34
34
|
DataType::Int8 => ruby
|
|
35
|
-
.ary_from_iter(series.i8().map_err(RbPolarsErr::from)
|
|
35
|
+
.ary_from_iter(series.i8().map_err(RbPolarsErr::from)?.iter())
|
|
36
36
|
.as_value(),
|
|
37
37
|
DataType::Int16 => ruby
|
|
38
|
-
.ary_from_iter(series.i16().map_err(RbPolarsErr::from)
|
|
38
|
+
.ary_from_iter(series.i16().map_err(RbPolarsErr::from)?.iter())
|
|
39
39
|
.as_value(),
|
|
40
40
|
DataType::Int32 => ruby
|
|
41
|
-
.ary_from_iter(series.i32().map_err(RbPolarsErr::from)
|
|
41
|
+
.ary_from_iter(series.i32().map_err(RbPolarsErr::from)?.iter())
|
|
42
42
|
.as_value(),
|
|
43
43
|
DataType::Int64 => ruby
|
|
44
|
-
.ary_from_iter(series.i64().map_err(RbPolarsErr::from)
|
|
44
|
+
.ary_from_iter(series.i64().map_err(RbPolarsErr::from)?.iter())
|
|
45
45
|
.as_value(),
|
|
46
46
|
DataType::Int128 => ruby
|
|
47
|
-
.ary_from_iter(series.i128().map_err(RbPolarsErr::from)
|
|
47
|
+
.ary_from_iter(series.i128().map_err(RbPolarsErr::from)?.iter())
|
|
48
48
|
.as_value(),
|
|
49
49
|
DataType::Float16 => ruby
|
|
50
50
|
.ary_from_iter(
|
|
@@ -53,14 +53,15 @@ impl RbSeries {
|
|
|
53
53
|
.cast(&DataType::Float32)
|
|
54
54
|
.map_err(RbPolarsErr::from)?
|
|
55
55
|
.f32()
|
|
56
|
-
.map_err(RbPolarsErr::from)
|
|
56
|
+
.map_err(RbPolarsErr::from)?
|
|
57
|
+
.iter(),
|
|
57
58
|
)
|
|
58
59
|
.as_value(),
|
|
59
60
|
DataType::Float32 => ruby
|
|
60
|
-
.ary_from_iter(series.f32().map_err(RbPolarsErr::from)
|
|
61
|
+
.ary_from_iter(series.f32().map_err(RbPolarsErr::from)?.iter())
|
|
61
62
|
.as_value(),
|
|
62
63
|
DataType::Float64 => ruby
|
|
63
|
-
.ary_from_iter(series.f64().map_err(RbPolarsErr::from)
|
|
64
|
+
.ary_from_iter(series.f64().map_err(RbPolarsErr::from)?.iter())
|
|
64
65
|
.as_value(),
|
|
65
66
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
|
66
67
|
with_match_categorical_physical_type!(series.dtype().cat_physical().unwrap(), |$C| {
|
|
@@ -71,10 +72,7 @@ impl RbSeries {
|
|
|
71
72
|
let v = ruby.ary_new_capa(series.len());
|
|
72
73
|
for i in 0..series.len() {
|
|
73
74
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
|
74
|
-
|
|
75
|
-
Some(val) => v.push(val.to_value()).unwrap(),
|
|
76
|
-
None => v.push(ruby.qnil()).unwrap(),
|
|
77
|
-
};
|
|
75
|
+
v.push(obj.cloned().into_value_with(ruby))?;
|
|
78
76
|
}
|
|
79
77
|
v.as_value()
|
|
80
78
|
}
|
|
@@ -87,7 +85,7 @@ impl RbSeries {
|
|
|
87
85
|
v.push(ruby.qnil()).unwrap();
|
|
88
86
|
}
|
|
89
87
|
Some(s) => {
|
|
90
|
-
let rblst = to_a_recursive(s.as_ref())?;
|
|
88
|
+
let rblst = to_a_recursive(ruby, s.as_ref())?;
|
|
91
89
|
v.push(rblst)?;
|
|
92
90
|
}
|
|
93
91
|
}
|
|
@@ -103,7 +101,7 @@ impl RbSeries {
|
|
|
103
101
|
v.push(ruby.qnil()).unwrap();
|
|
104
102
|
}
|
|
105
103
|
Some(s) => {
|
|
106
|
-
let rblst = to_a_recursive(s.as_ref())?;
|
|
104
|
+
let rblst = to_a_recursive(ruby, s.as_ref())?;
|
|
107
105
|
v.push(rblst)?;
|
|
108
106
|
}
|
|
109
107
|
}
|
|
@@ -112,35 +110,35 @@ impl RbSeries {
|
|
|
112
110
|
}
|
|
113
111
|
DataType::Date => {
|
|
114
112
|
let ca = series.date().map_err(RbPolarsErr::from)?;
|
|
115
|
-
return
|
|
113
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
116
114
|
}
|
|
117
115
|
DataType::Time => {
|
|
118
116
|
let ca = series.time().map_err(RbPolarsErr::from)?;
|
|
119
|
-
return
|
|
117
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
120
118
|
}
|
|
121
119
|
DataType::Datetime(_, _) => {
|
|
122
120
|
let ca = series.datetime().map_err(RbPolarsErr::from)?;
|
|
123
|
-
return
|
|
121
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
124
122
|
}
|
|
125
123
|
DataType::Decimal(_, _) => {
|
|
126
124
|
let ca = series.decimal().map_err(RbPolarsErr::from)?;
|
|
127
|
-
return
|
|
125
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
128
126
|
}
|
|
129
127
|
DataType::String => {
|
|
130
128
|
let ca = series.str().map_err(RbPolarsErr::from)?;
|
|
131
|
-
return
|
|
129
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
132
130
|
}
|
|
133
131
|
DataType::Struct(_) => {
|
|
134
132
|
let ca = series.struct_().map_err(RbPolarsErr::from)?;
|
|
135
|
-
return
|
|
133
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
136
134
|
}
|
|
137
135
|
DataType::Duration(_) => {
|
|
138
136
|
let ca = series.duration().map_err(RbPolarsErr::from)?;
|
|
139
|
-
return
|
|
137
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
140
138
|
}
|
|
141
139
|
DataType::Binary => {
|
|
142
140
|
let ca = series.binary().map_err(RbPolarsErr::from)?;
|
|
143
|
-
return
|
|
141
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
144
142
|
}
|
|
145
143
|
DataType::Null => {
|
|
146
144
|
let null: Option<u8> = None;
|
|
@@ -163,10 +161,7 @@ impl RbSeries {
|
|
|
163
161
|
}
|
|
164
162
|
impl ExactSizeIterator for NullIter {}
|
|
165
163
|
|
|
166
|
-
|
|
167
|
-
.unwrap()
|
|
168
|
-
.ary_from_iter(NullIter { iter, n })
|
|
169
|
-
.as_value()
|
|
164
|
+
ruby.ary_from_iter(NullIter { iter, n }).as_value()
|
|
170
165
|
}
|
|
171
166
|
DataType::Unknown(_) => {
|
|
172
167
|
panic!("to_a not implemented for unknown")
|
|
@@ -175,12 +170,12 @@ impl RbSeries {
|
|
|
175
170
|
unreachable!()
|
|
176
171
|
}
|
|
177
172
|
DataType::Extension(_, _) => {
|
|
178
|
-
return to_a_recursive(series.ext().unwrap().storage());
|
|
173
|
+
return to_a_recursive(ruby, series.ext().unwrap().storage());
|
|
179
174
|
}
|
|
180
175
|
};
|
|
181
176
|
Ok(rblist.as_value())
|
|
182
177
|
}
|
|
183
178
|
|
|
184
|
-
to_a_recursive(series)
|
|
179
|
+
to_a_recursive(ruby, series)
|
|
185
180
|
}
|
|
186
181
|
}
|
|
@@ -10,6 +10,7 @@ use crate::ruby::exceptions::{RbIndexError, RbRuntimeError, RbValueError};
|
|
|
10
10
|
use crate::ruby::gvl::GvlExt;
|
|
11
11
|
use crate::ruby::plan_callback::PlanCallbackExt;
|
|
12
12
|
use crate::ruby::ruby_function::RubyObject;
|
|
13
|
+
use crate::ruby::utils::TryIntoValue;
|
|
13
14
|
use crate::utils::EnterPolarsExt;
|
|
14
15
|
use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
|
|
15
16
|
|
|
@@ -112,7 +113,7 @@ impl RbSeries {
|
|
|
112
113
|
let rbseries = RbSeries::new(s);
|
|
113
114
|
rb_modules::pl_utils(ruby).funcall("wrap_s", (rbseries,))
|
|
114
115
|
}
|
|
115
|
-
_ =>
|
|
116
|
+
_ => Wrap(av).try_into_value_with(ruby),
|
|
116
117
|
}
|
|
117
118
|
}
|
|
118
119
|
|
|
@@ -156,8 +157,8 @@ impl RbSeries {
|
|
|
156
157
|
self.series.write().rename(name.into());
|
|
157
158
|
}
|
|
158
159
|
|
|
159
|
-
pub fn dtype(rb: &Ruby, self_: &Self) -> Value {
|
|
160
|
-
Wrap(self_.series.read().dtype().clone()).
|
|
160
|
+
pub fn dtype(rb: &Ruby, self_: &Self) -> RbResult<Value> {
|
|
161
|
+
Wrap(self_.series.read().dtype().clone()).try_into_value_with(rb)
|
|
161
162
|
}
|
|
162
163
|
|
|
163
164
|
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
|
@@ -4,6 +4,7 @@ use super::RbSeries;
|
|
|
4
4
|
use crate::map::series::ApplyLambdaGeneric;
|
|
5
5
|
use crate::prelude::*;
|
|
6
6
|
use crate::ruby::gvl::GvlExt;
|
|
7
|
+
use crate::ruby::utils::TryIntoValue;
|
|
7
8
|
use crate::series::construction::series_from_objects;
|
|
8
9
|
use crate::{RbPolarsErr, RbResult};
|
|
9
10
|
use crate::{apply_all_polars_dtypes, raise_err};
|
|
@@ -78,7 +79,7 @@ fn call_and_collect_objects<T, I>(
|
|
|
78
79
|
skip_nulls: bool,
|
|
79
80
|
) -> RbResult<Series>
|
|
80
81
|
where
|
|
81
|
-
T:
|
|
82
|
+
T: TryIntoValue,
|
|
82
83
|
I: Iterator<Item = Option<T>>,
|
|
83
84
|
{
|
|
84
85
|
let mut objects = Vec::with_capacity(len);
|
|
@@ -91,7 +92,7 @@ where
|
|
|
91
92
|
continue;
|
|
92
93
|
}
|
|
93
94
|
None => rb.qnil().into_value_with(rb),
|
|
94
|
-
Some(val) => val.
|
|
95
|
+
Some(val) => val.try_into_value_with(rb)?,
|
|
95
96
|
};
|
|
96
97
|
let out: Value = lambda.funcall("call", (arg,))?;
|
|
97
98
|
objects.push(ObjectValue {
|
|
@@ -139,23 +139,23 @@ fn scatter_impl(
|
|
|
139
139
|
with_match_physical_numeric_polars_type!(dt, |$T| {
|
|
140
140
|
let ca: &mut ChunkedArray<$T> = mutable_s.as_mut();
|
|
141
141
|
let values: &ChunkedArray<$T> = values.as_ref().as_ref();
|
|
142
|
-
ca.scatter(idx, values)
|
|
142
|
+
ca.scatter(idx, values.iter())
|
|
143
143
|
})
|
|
144
144
|
}
|
|
145
145
|
DataType::Boolean => {
|
|
146
146
|
let ca: &mut ChunkedArray<BooleanType> = mutable_s.as_mut();
|
|
147
147
|
let values = values.bool()?;
|
|
148
|
-
ca.scatter(idx, values)
|
|
148
|
+
ca.scatter(idx, values.iter())
|
|
149
149
|
}
|
|
150
150
|
DataType::Binary => {
|
|
151
151
|
let ca: &mut ChunkedArray<BinaryType> = mutable_s.as_mut();
|
|
152
152
|
let values = values.binary()?;
|
|
153
|
-
ca.scatter(idx, values)
|
|
153
|
+
ca.scatter(idx, values.iter())
|
|
154
154
|
}
|
|
155
155
|
DataType::String => {
|
|
156
156
|
let ca: &mut ChunkedArray<StringType> = mutable_s.as_mut();
|
|
157
157
|
let values = values.str()?;
|
|
158
|
-
ca.scatter(idx, values)
|
|
158
|
+
ca.scatter(idx, values.iter())
|
|
159
159
|
}
|
|
160
160
|
_ => Err(PolarsError::ComputeError(
|
|
161
161
|
format!("not yet implemented for dtype: {logical_dtype}").into(),
|
data/ext/polars/src/utils.rs
CHANGED
|
@@ -16,7 +16,6 @@ macro_rules! apply_all_polars_dtypes {
|
|
|
16
16
|
($self:expr, $method:ident, $($args:expr),*) => {
|
|
17
17
|
match $self.dtype() {
|
|
18
18
|
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
|
19
|
-
DataType::String => $self.str().unwrap().$method($($args),*),
|
|
20
19
|
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
|
21
20
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
|
22
21
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
|
@@ -27,15 +26,40 @@ macro_rules! apply_all_polars_dtypes {
|
|
|
27
26
|
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
|
28
27
|
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
|
29
28
|
DataType::Int128 => $self.i128().unwrap().$method($($args),*),
|
|
30
|
-
DataType::Float16 =>
|
|
29
|
+
DataType::Float16 => $self.cast(&DataType::Float32).unwrap().f32().unwrap().$method($($args),*),
|
|
31
30
|
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
|
32
31
|
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
|
33
|
-
DataType::
|
|
34
|
-
DataType::
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
DataType::String => $self.str().unwrap().$method($($args),*),
|
|
33
|
+
DataType::Binary => $self.binary().unwrap().$method($($args),*),
|
|
34
|
+
DataType::Decimal(_, _) => $self.decimal().unwrap().$method($($args),*),
|
|
35
|
+
|
|
36
|
+
DataType::Date => $self.date().unwrap().$method($($args),*),
|
|
37
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
|
38
|
+
DataType::Duration(_) => $self.duration().unwrap().$method($($args),*),
|
|
39
|
+
DataType::Time => $self.time().unwrap().$method($($args),*),
|
|
40
|
+
|
|
41
|
+
DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
|
37
42
|
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
|
38
|
-
|
|
43
|
+
DataType::Array(_, _) => $self.array().unwrap().$method($($args),*),
|
|
44
|
+
|
|
45
|
+
dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => match dt.cat_physical().unwrap() {
|
|
46
|
+
CategoricalPhysical::U8 => $self.cat8().unwrap().$method($($args),*),
|
|
47
|
+
CategoricalPhysical::U16 => $self.cat16().unwrap().$method($($args),*),
|
|
48
|
+
CategoricalPhysical::U32 => $self.cat32().unwrap().$method($($args),*),
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
DataType::Object(_) => {
|
|
52
|
+
$self
|
|
53
|
+
.as_any()
|
|
54
|
+
.downcast_ref::<ObjectChunked<ObjectValue>>()
|
|
55
|
+
.unwrap()
|
|
56
|
+
.$method($($args),*)
|
|
57
|
+
},
|
|
58
|
+
DataType::Extension(_, _) => $self.ext().unwrap().$method($($args),*),
|
|
59
|
+
|
|
60
|
+
DataType::Null => $self.null().unwrap().$method($($args),*),
|
|
61
|
+
|
|
62
|
+
dt @ (DataType::BinaryOffset | DataType::Unknown(_)) => panic!("dtype {:?} not supported", dt)
|
|
39
63
|
}
|
|
40
64
|
}
|
|
41
65
|
}
|
data/lib/polars/array_expr.rb
CHANGED
|
@@ -377,7 +377,9 @@ module Polars
|
|
|
377
377
|
# # │ [1, 2] │
|
|
378
378
|
# # └───────────┘
|
|
379
379
|
def unique(maintain_order: false)
|
|
380
|
-
|
|
380
|
+
eval(
|
|
381
|
+
F.element.unique(maintain_order: maintain_order), as_list: true
|
|
382
|
+
)
|
|
381
383
|
end
|
|
382
384
|
|
|
383
385
|
# Count the number of unique values in every sub-arrays.
|
|
@@ -403,7 +405,7 @@ module Polars
|
|
|
403
405
|
# # │ [2, 3, 4] ┆ 3 │
|
|
404
406
|
# # └───────────────┴──────────┘
|
|
405
407
|
def n_unique
|
|
406
|
-
|
|
408
|
+
agg(F.element.n_unique)
|
|
407
409
|
end
|
|
408
410
|
|
|
409
411
|
# Convert an Array column into a List column with the same inner data type.
|
|
@@ -432,6 +434,13 @@ module Polars
|
|
|
432
434
|
|
|
433
435
|
# Evaluate whether any boolean value is true for every subarray.
|
|
434
436
|
#
|
|
437
|
+
# @param ignore_nulls [Boolean]
|
|
438
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
439
|
+
# are no non-null values, the output is `false`.
|
|
440
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
441
|
+
# if the column contains any null values and no `true` values,
|
|
442
|
+
# the output is null.
|
|
443
|
+
#
|
|
435
444
|
# @return [Expr]
|
|
436
445
|
#
|
|
437
446
|
# @example
|
|
@@ -461,12 +470,19 @@ module Polars
|
|
|
461
470
|
# # │ [null, null] ┆ false │
|
|
462
471
|
# # │ null ┆ null │
|
|
463
472
|
# # └────────────────┴───────┘
|
|
464
|
-
def any
|
|
465
|
-
|
|
473
|
+
def any(ignore_nulls: true)
|
|
474
|
+
agg(F.element.any(ignore_nulls: ignore_nulls))
|
|
466
475
|
end
|
|
467
476
|
|
|
468
477
|
# Evaluate whether all boolean values are true for every subarray.
|
|
469
478
|
#
|
|
479
|
+
# @param ignore_nulls [Boolean]
|
|
480
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
481
|
+
# are no non-null values, the output is `true`.
|
|
482
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
483
|
+
# if the column contains any null values and no `false` values,
|
|
484
|
+
# the output is null.
|
|
485
|
+
#
|
|
470
486
|
# @return [Expr]
|
|
471
487
|
#
|
|
472
488
|
# @example
|
|
@@ -496,8 +512,8 @@ module Polars
|
|
|
496
512
|
# # │ [null, null] ┆ true │
|
|
497
513
|
# # │ null ┆ null │
|
|
498
514
|
# # └────────────────┴───────┘
|
|
499
|
-
def all
|
|
500
|
-
|
|
515
|
+
def all(ignore_nulls: true)
|
|
516
|
+
agg(F.element.all(ignore_nulls: ignore_nulls))
|
|
501
517
|
end
|
|
502
518
|
|
|
503
519
|
# Sort the arrays in this column.
|
|
@@ -567,7 +583,7 @@ module Polars
|
|
|
567
583
|
# # │ [9, 1, 2] ┆ [2, 1, 9] │
|
|
568
584
|
# # └───────────────┴───────────────┘
|
|
569
585
|
def reverse
|
|
570
|
-
|
|
586
|
+
eval(F.element.reverse)
|
|
571
587
|
end
|
|
572
588
|
|
|
573
589
|
# Retrieve the index of the minimal value in every sub-array.
|
|
@@ -212,6 +212,13 @@ module Polars
|
|
|
212
212
|
|
|
213
213
|
# Evaluate whether any boolean value is true for every subarray.
|
|
214
214
|
#
|
|
215
|
+
# @param ignore_nulls [Boolean]
|
|
216
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
217
|
+
# are no non-null values, the output is `false`.
|
|
218
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
219
|
+
# if the column contains any null values and no `true` values,
|
|
220
|
+
# the output is null.
|
|
221
|
+
#
|
|
215
222
|
# @return [Series]
|
|
216
223
|
#
|
|
217
224
|
# @example
|
|
@@ -230,7 +237,7 @@ module Polars
|
|
|
230
237
|
# # false
|
|
231
238
|
# # null
|
|
232
239
|
# # ]
|
|
233
|
-
def any
|
|
240
|
+
def any(ignore_nulls: true)
|
|
234
241
|
super
|
|
235
242
|
end
|
|
236
243
|
|
|
@@ -378,6 +385,13 @@ module Polars
|
|
|
378
385
|
|
|
379
386
|
# Evaluate whether all boolean values are true for every subarray.
|
|
380
387
|
#
|
|
388
|
+
# @param ignore_nulls [Boolean]
|
|
389
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
390
|
+
# are no non-null values, the output is `true`.
|
|
391
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
392
|
+
# if the column contains any null values and no `false` values,
|
|
393
|
+
# the output is null.
|
|
394
|
+
#
|
|
381
395
|
# @return [Series]
|
|
382
396
|
#
|
|
383
397
|
# @example
|
|
@@ -396,7 +410,7 @@ module Polars
|
|
|
396
410
|
# # true
|
|
397
411
|
# # null
|
|
398
412
|
# # ]
|
|
399
|
-
def all
|
|
413
|
+
def all(ignore_nulls: true)
|
|
400
414
|
super
|
|
401
415
|
end
|
|
402
416
|
|
|
@@ -226,6 +226,38 @@ module Polars
|
|
|
226
226
|
super
|
|
227
227
|
end
|
|
228
228
|
|
|
229
|
+
# Get the byte value at the given index.
|
|
230
|
+
#
|
|
231
|
+
# For example, index `0` would return the first byte of every binary value
|
|
232
|
+
# and index `-1` would return the last byte of every binary value.
|
|
233
|
+
# The behavior if an index is out of bounds is determined by the argument
|
|
234
|
+
# `null_on_oob`.
|
|
235
|
+
#
|
|
236
|
+
# @param index [Object]
|
|
237
|
+
# Index to return per binary value
|
|
238
|
+
# @param null_on_oob [Boolean]
|
|
239
|
+
# Behavior if an index is out of bounds:
|
|
240
|
+
#
|
|
241
|
+
# * true -> set as null
|
|
242
|
+
# * false -> raise an error
|
|
243
|
+
#
|
|
244
|
+
# @return [Series]
|
|
245
|
+
#
|
|
246
|
+
# @example
|
|
247
|
+
# s = Polars::Series.new("a", ["\x01\x02\x03".b, "".b, "\x04\x05".b])
|
|
248
|
+
# s.bin.get(0, null_on_oob: true)
|
|
249
|
+
# # =>
|
|
250
|
+
# # shape: (3,)
|
|
251
|
+
# # Series: 'a' [u8]
|
|
252
|
+
# # [
|
|
253
|
+
# # 1
|
|
254
|
+
# # null
|
|
255
|
+
# # 4
|
|
256
|
+
# # ]
|
|
257
|
+
def get(index, null_on_oob: false)
|
|
258
|
+
super
|
|
259
|
+
end
|
|
260
|
+
|
|
229
261
|
# Take the first `n` bytes of the binary values.
|
|
230
262
|
#
|
|
231
263
|
# @param n [Object]
|
data/lib/polars/data_frame.rb
CHANGED
|
@@ -3808,6 +3808,57 @@ module Polars
|
|
|
3808
3808
|
.collect(optimizations: QueryOptFlags._eager)
|
|
3809
3809
|
end
|
|
3810
3810
|
|
|
3811
|
+
# Selects rows from this DataFrame at the given indices.
|
|
3812
|
+
#
|
|
3813
|
+
# @note
|
|
3814
|
+
# This functionality is experimental. It may be
|
|
3815
|
+
# changed at any point without it being considered a breaking change.
|
|
3816
|
+
#
|
|
3817
|
+
# @param indices [Object]
|
|
3818
|
+
# The indices of the rows to select.
|
|
3819
|
+
# @param null_on_oob [Boolean]
|
|
3820
|
+
# If true when an index is out-of-bounds a null row will be generated
|
|
3821
|
+
# instead of raising an error.
|
|
3822
|
+
#
|
|
3823
|
+
# @return [DataFrame]
|
|
3824
|
+
#
|
|
3825
|
+
# @example
|
|
3826
|
+
# df = Polars::DataFrame.new({"x" => [2, 1, 0], "s" => ["foo", "bar", "baz"]})
|
|
3827
|
+
# df.gather([2, 0, 0])
|
|
3828
|
+
# # =>
|
|
3829
|
+
# # shape: (3, 2)
|
|
3830
|
+
# # ┌─────┬─────┐
|
|
3831
|
+
# # │ x ┆ s │
|
|
3832
|
+
# # │ --- ┆ --- │
|
|
3833
|
+
# # │ i64 ┆ str │
|
|
3834
|
+
# # ╞═════╪═════╡
|
|
3835
|
+
# # │ 0 ┆ baz │
|
|
3836
|
+
# # │ 2 ┆ foo │
|
|
3837
|
+
# # │ 2 ┆ foo │
|
|
3838
|
+
# # └─────┴─────┘
|
|
3839
|
+
#
|
|
3840
|
+
# @example
|
|
3841
|
+
# df.gather([0, 10, 1], null_on_oob: true)
|
|
3842
|
+
# # =>
|
|
3843
|
+
# # shape: (3, 2)
|
|
3844
|
+
# # ┌──────┬──────┐
|
|
3845
|
+
# # │ x ┆ s │
|
|
3846
|
+
# # │ --- ┆ --- │
|
|
3847
|
+
# # │ i64 ┆ str │
|
|
3848
|
+
# # ╞══════╪══════╡
|
|
3849
|
+
# # │ 2 ┆ foo │
|
|
3850
|
+
# # │ null ┆ null │
|
|
3851
|
+
# # │ 1 ┆ bar │
|
|
3852
|
+
# # └──────┴──────┘
|
|
3853
|
+
def gather(
|
|
3854
|
+
indices,
|
|
3855
|
+
null_on_oob: false
|
|
3856
|
+
)
|
|
3857
|
+
lazy
|
|
3858
|
+
.gather(indices, null_on_oob: null_on_oob)
|
|
3859
|
+
.collect(optimizations: QueryOptFlags._eager)
|
|
3860
|
+
end
|
|
3861
|
+
|
|
3811
3862
|
# Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
|
|
3812
3863
|
#
|
|
3813
3864
|
# The UDF will receive each row as a tuple of values: `udf(row)`.
|
|
@@ -4527,6 +4578,8 @@ module Polars
|
|
|
4527
4578
|
# @param separator [String]
|
|
4528
4579
|
# Used as separator/delimiter in generated column names in case of multiple
|
|
4529
4580
|
# `values` columns.
|
|
4581
|
+
# @param column_naming ['auto', 'combine']
|
|
4582
|
+
# How resulting column names will be constructed.
|
|
4530
4583
|
#
|
|
4531
4584
|
# @return [DataFrame]
|
|
4532
4585
|
#
|
|
@@ -4557,7 +4610,8 @@ module Polars
|
|
|
4557
4610
|
aggregate_function: nil,
|
|
4558
4611
|
maintain_order: true,
|
|
4559
4612
|
sort_columns: false,
|
|
4560
|
-
separator: "_"
|
|
4613
|
+
separator: "_",
|
|
4614
|
+
column_naming: "auto"
|
|
4561
4615
|
)
|
|
4562
4616
|
if on_columns.nil?
|
|
4563
4617
|
cols = select(on).unique(maintain_order: true)
|
|
@@ -4577,7 +4631,8 @@ module Polars
|
|
|
4577
4631
|
values: values,
|
|
4578
4632
|
aggregate_function: aggregate_function,
|
|
4579
4633
|
maintain_order: maintain_order,
|
|
4580
|
-
separator: separator
|
|
4634
|
+
separator: separator,
|
|
4635
|
+
column_naming: column_naming
|
|
4581
4636
|
)
|
|
4582
4637
|
.collect(optimizations: QueryOptFlags._eager)
|
|
4583
4638
|
end
|
|
@@ -4593,7 +4648,8 @@ module Polars
|
|
|
4593
4648
|
#
|
|
4594
4649
|
# @param on [Object]
|
|
4595
4650
|
# Column(s) or selector(s) to use as values variables; if `on`
|
|
4596
|
-
# is empty
|
|
4651
|
+
# is empty no columns will be used. If set to `nil` (default)
|
|
4652
|
+
# all columns that are not in `index` will be used.
|
|
4597
4653
|
# @param index [Object]
|
|
4598
4654
|
# Column(s) or selector(s) to use as identifier variables.
|
|
4599
4655
|
# @param variable_name [Object]
|
|
@@ -4627,7 +4683,7 @@ module Polars
|
|
|
4627
4683
|
# # │ z ┆ c ┆ 6 │
|
|
4628
4684
|
# # └─────┴──────────┴───────┘
|
|
4629
4685
|
def unpivot(on = nil, index: nil, variable_name: nil, value_name: nil)
|
|
4630
|
-
on = on.nil? ?
|
|
4686
|
+
on = on.nil? ? nil : Utils._expand_selectors(self, on)
|
|
4631
4687
|
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
|
4632
4688
|
|
|
4633
4689
|
_from_rbdf(_df.unpivot(on, index, value_name, variable_name))
|
|
@@ -6488,7 +6544,7 @@ module Polars
|
|
|
6488
6544
|
# # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
|
|
6489
6545
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
|
6490
6546
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
|
6491
|
-
def unnest(columns, *more_columns, separator: nil)
|
|
6547
|
+
def unnest(columns = nil, *more_columns, separator: nil)
|
|
6492
6548
|
lazy.unnest(columns, *more_columns, separator: separator).collect(optimizations: QueryOptFlags._eager)
|
|
6493
6549
|
end
|
|
6494
6550
|
|
|
@@ -6504,6 +6560,10 @@ module Polars
|
|
|
6504
6560
|
# Other DataFrame that must be merged
|
|
6505
6561
|
# @param key [String]
|
|
6506
6562
|
# Key that is sorted.
|
|
6563
|
+
# @param maintain_order [Boolean]
|
|
6564
|
+
# If `true`, the output is guaranteed to have left-biased ordering
|
|
6565
|
+
# for equal keys: rows from the left frame appear before rows from
|
|
6566
|
+
# the right frame when their keys are equal.
|
|
6507
6567
|
#
|
|
6508
6568
|
# @return [DataFrame]
|
|
6509
6569
|
#
|
|
@@ -6530,8 +6590,8 @@ module Polars
|
|
|
6530
6590
|
# # │ steve ┆ 42 │
|
|
6531
6591
|
# # │ elise ┆ 44 │
|
|
6532
6592
|
# # └────────┴─────┘
|
|
6533
|
-
def merge_sorted(other, key)
|
|
6534
|
-
lazy.merge_sorted(other.lazy, key).collect(optimizations: QueryOptFlags._eager)
|
|
6593
|
+
def merge_sorted(other, key, maintain_order: false)
|
|
6594
|
+
lazy.merge_sorted(other.lazy, key, maintain_order: maintain_order).collect(optimizations: QueryOptFlags._eager)
|
|
6535
6595
|
end
|
|
6536
6596
|
|
|
6537
6597
|
# Flag a column as sorted.
|
|
@@ -6545,14 +6605,17 @@ module Polars
|
|
|
6545
6605
|
# Column that is sorted.
|
|
6546
6606
|
# @param descending [Boolean]
|
|
6547
6607
|
# Whether the column is sorted in descending order.
|
|
6608
|
+
# @param nulls_last [Boolean]
|
|
6609
|
+
# Whether the nulls are at the end.
|
|
6548
6610
|
#
|
|
6549
6611
|
# @return [DataFrame]
|
|
6550
6612
|
def set_sorted(
|
|
6551
6613
|
column,
|
|
6552
|
-
descending: false
|
|
6614
|
+
descending: false,
|
|
6615
|
+
nulls_last: false
|
|
6553
6616
|
)
|
|
6554
6617
|
lazy
|
|
6555
|
-
.set_sorted(column, descending: descending)
|
|
6618
|
+
.set_sorted(column, descending: descending, nulls_last: nulls_last)
|
|
6556
6619
|
.collect(optimizations: QueryOptFlags._eager)
|
|
6557
6620
|
end
|
|
6558
6621
|
|
|
@@ -7111,7 +7174,7 @@ module Polars
|
|
|
7111
7174
|
end
|
|
7112
7175
|
|
|
7113
7176
|
def _select_rows_by_slice(df, key)
|
|
7114
|
-
|
|
7177
|
+
Slice.new(df).apply(key)
|
|
7115
7178
|
end
|
|
7116
7179
|
|
|
7117
7180
|
def _select_rows_by_index(df, key)
|