polars-df 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/Cargo.lock +270 -97
- data/LICENSE.txt +1 -1
- data/README.md +1 -3
- data/ext/polars/Cargo.toml +19 -18
- data/ext/polars/src/catalog/unity.rs +15 -20
- data/ext/polars/src/conversion/any_value.rs +53 -29
- data/ext/polars/src/conversion/chunked_array.rs +58 -56
- data/ext/polars/src/conversion/datetime.rs +58 -7
- data/ext/polars/src/conversion/mod.rs +200 -150
- data/ext/polars/src/dataframe/export.rs +15 -12
- data/ext/polars/src/dataframe/general.rs +25 -7
- data/ext/polars/src/dataframe/map.rs +6 -4
- data/ext/polars/src/error.rs +1 -1
- data/ext/polars/src/expr/array.rs +0 -24
- data/ext/polars/src/expr/datatype.rs +13 -3
- data/ext/polars/src/expr/datetime.rs +4 -4
- data/ext/polars/src/expr/general.rs +35 -15
- data/ext/polars/src/expr/list.rs +0 -26
- data/ext/polars/src/expr/rolling.rs +24 -0
- data/ext/polars/src/functions/business.rs +2 -2
- data/ext/polars/src/functions/io.rs +4 -3
- data/ext/polars/src/functions/lazy.rs +65 -46
- data/ext/polars/src/functions/meta.rs +6 -5
- data/ext/polars/src/functions/mod.rs +0 -1
- data/ext/polars/src/functions/range.rs +13 -0
- data/ext/polars/src/functions/utils.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +4 -2
- data/ext/polars/src/interop/arrow/to_rb.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
- data/ext/polars/src/io/scan_options.rs +6 -3
- data/ext/polars/src/io/sink_options.rs +2 -0
- data/ext/polars/src/lazyframe/general.rs +243 -17
- data/ext/polars/src/lazyframe/optflags.rs +2 -1
- data/ext/polars/src/lib.rs +39 -35
- data/ext/polars/src/map/lazy.rs +5 -2
- data/ext/polars/src/map/series.rs +19 -18
- data/ext/polars/src/on_startup.rs +25 -6
- data/ext/polars/src/ruby/numo.rs +3 -4
- data/ext/polars/src/ruby/plan_callback.rs +1 -4
- data/ext/polars/src/ruby/rb_modules.rs +2 -4
- data/ext/polars/src/ruby/ruby_udf.rs +7 -9
- data/ext/polars/src/ruby/utils.rs +12 -1
- data/ext/polars/src/series/aggregation.rs +13 -1
- data/ext/polars/src/series/construction.rs +31 -50
- data/ext/polars/src/series/export.rs +33 -38
- data/ext/polars/src/series/general.rs +6 -6
- data/ext/polars/src/series/map.rs +3 -2
- data/ext/polars/src/series/scatter.rs +4 -4
- data/ext/polars/src/utils.rs +31 -7
- data/lib/polars/array_expr.rb +23 -7
- data/lib/polars/array_name_space.rb +16 -2
- data/lib/polars/binary_name_space.rb +32 -0
- data/lib/polars/collect_batches.rb +4 -0
- data/lib/polars/data_frame.rb +144 -11
- data/lib/polars/data_type_group.rb +5 -0
- data/lib/polars/date_time_expr.rb +91 -3
- data/lib/polars/date_time_name_space.rb +7 -1
- data/lib/polars/expr.rb +247 -44
- data/lib/polars/functions/business.rb +2 -2
- data/lib/polars/functions/datatype.rb +30 -0
- data/lib/polars/functions/eager.rb +80 -7
- data/lib/polars/functions/lazy.rb +97 -2
- data/lib/polars/functions/range/linear_space.rb +118 -0
- data/lib/polars/io/csv.rb +27 -5
- data/lib/polars/io/database.rb +2 -3
- data/lib/polars/io/ipc.rb +2 -2
- data/lib/polars/io/lines.rb +172 -0
- data/lib/polars/io/parquet.rb +1 -1
- data/lib/polars/io/sink_options.rb +5 -2
- data/lib/polars/lazy_frame.rb +517 -14
- data/lib/polars/list_expr.rb +21 -7
- data/lib/polars/list_name_space.rb +16 -2
- data/lib/polars/query_opt_flags.rb +23 -5
- data/lib/polars/selectors.rb +2 -2
- data/lib/polars/series.rb +176 -19
- data/lib/polars/sql_context.rb +2 -2
- data/lib/polars/string_cache.rb +19 -72
- data/lib/polars/string_expr.rb +1 -7
- data/lib/polars/string_name_space.rb +1 -7
- data/lib/polars/utils/construction/series.rb +24 -39
- data/lib/polars/utils/convert.rb +16 -6
- data/lib/polars/utils/parse.rb +7 -0
- data/lib/polars/utils/reduce_balanced.rb +43 -0
- data/lib/polars/utils/various.rb +5 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -17
- data/ext/polars/src/functions/string_cache.rs +0 -24
|
@@ -4,9 +4,10 @@ use num_traits::AsPrimitive;
|
|
|
4
4
|
use polars::prelude::*;
|
|
5
5
|
|
|
6
6
|
use crate::any_value::rb_object_to_any_value;
|
|
7
|
-
use crate::conversion::
|
|
7
|
+
use crate::conversion::Wrap;
|
|
8
8
|
use crate::prelude::ObjectValue;
|
|
9
|
-
use crate::
|
|
9
|
+
use crate::ruby::gvl::GvlExt;
|
|
10
|
+
use crate::{RbResult, RbSeries, RbTypeError, RbValueError};
|
|
10
11
|
|
|
11
12
|
pub fn series_from_objects(rb: &Ruby, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {
|
|
12
13
|
let mut validity = BitmapBuilder::with_capacity(objects.len());
|
|
@@ -107,21 +108,26 @@ impl RbSeries {
|
|
|
107
108
|
}
|
|
108
109
|
}
|
|
109
110
|
|
|
110
|
-
fn
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
111
|
+
fn convert_to_avs(
|
|
112
|
+
values: RArray,
|
|
113
|
+
strict: bool,
|
|
114
|
+
allow_object: bool,
|
|
115
|
+
) -> RbResult<Vec<AnyValue<'static>>> {
|
|
116
|
+
values
|
|
117
|
+
.into_iter()
|
|
118
|
+
.map(|v| rb_object_to_any_value(v, strict, allow_object))
|
|
119
|
+
.collect()
|
|
116
120
|
}
|
|
117
121
|
|
|
118
122
|
impl RbSeries {
|
|
119
123
|
pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
|
120
|
-
let any_values_result =
|
|
121
|
-
|
|
124
|
+
let any_values_result = values
|
|
125
|
+
.into_iter()
|
|
126
|
+
.map(|v| rb_object_to_any_value(v, strict, true))
|
|
127
|
+
.collect::<RbResult<Vec<AnyValue>>>();
|
|
128
|
+
|
|
122
129
|
let result = any_values_result.and_then(|avs| {
|
|
123
|
-
let
|
|
124
|
-
let s = Series::from_any_values(name.clone().into(), avs, strict).map_err(|e| {
|
|
130
|
+
let s = Series::from_any_values(name.clone().into(), avs.as_slice(), strict).map_err(|e| {
|
|
125
131
|
RbTypeError::new_err(format!(
|
|
126
132
|
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
|
127
133
|
))
|
|
@@ -131,7 +137,7 @@ impl RbSeries {
|
|
|
131
137
|
|
|
132
138
|
// Fall back to Object type for non-strict construction.
|
|
133
139
|
if !strict && result.is_err() {
|
|
134
|
-
return Self::new_object(name, values, strict);
|
|
140
|
+
return Ruby::attach(|rb| Self::new_object(rb, name, values, strict));
|
|
135
141
|
}
|
|
136
142
|
|
|
137
143
|
result
|
|
@@ -143,21 +149,13 @@ impl RbSeries {
|
|
|
143
149
|
dtype: Wrap<DataType>,
|
|
144
150
|
strict: bool,
|
|
145
151
|
) -> RbResult<Self> {
|
|
146
|
-
let
|
|
147
|
-
|
|
148
|
-
.
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
&dtype.0,
|
|
154
|
-
strict,
|
|
155
|
-
)
|
|
156
|
-
.map_err(|e| {
|
|
157
|
-
RbTypeError::new_err(format!(
|
|
158
|
-
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
|
159
|
-
))
|
|
160
|
-
})?;
|
|
152
|
+
let avs = convert_to_avs(values, strict, false)?;
|
|
153
|
+
let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
|
|
154
|
+
.map_err(|e| {
|
|
155
|
+
RbTypeError::new_err(format!(
|
|
156
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
|
157
|
+
))
|
|
158
|
+
})?;
|
|
161
159
|
Ok(s.into())
|
|
162
160
|
}
|
|
163
161
|
|
|
@@ -204,7 +202,7 @@ impl RbSeries {
|
|
|
204
202
|
Ok(Series::new_null(name.into(), len).into())
|
|
205
203
|
}
|
|
206
204
|
|
|
207
|
-
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
|
205
|
+
pub fn new_object(_rb: &Ruby, name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
|
208
206
|
let val = val
|
|
209
207
|
.into_iter()
|
|
210
208
|
.map(ObjectValue::from)
|
|
@@ -233,29 +231,12 @@ impl RbSeries {
|
|
|
233
231
|
}
|
|
234
232
|
|
|
235
233
|
pub fn new_array(
|
|
236
|
-
width: usize,
|
|
237
|
-
inner: Option<Wrap<DataType>>,
|
|
238
234
|
name: String,
|
|
239
|
-
|
|
240
|
-
|
|
235
|
+
values: RArray,
|
|
236
|
+
strict: bool,
|
|
237
|
+
dtype: Wrap<DataType>,
|
|
241
238
|
) -> RbResult<Self> {
|
|
242
|
-
|
|
243
|
-
let val = vec_extract_wrapped(val);
|
|
244
|
-
let out = Series::new(name.into(), &val);
|
|
245
|
-
match out.dtype() {
|
|
246
|
-
DataType::List(list_inner) => {
|
|
247
|
-
let out = out
|
|
248
|
-
.cast(&DataType::Array(
|
|
249
|
-
Box::new(inner.map(|dt| dt.0).unwrap_or(*list_inner.clone())),
|
|
250
|
-
width,
|
|
251
|
-
))
|
|
252
|
-
.map_err(RbPolarsErr::from)?;
|
|
253
|
-
Ok(out.into())
|
|
254
|
-
}
|
|
255
|
-
_ => Err(RbValueError::new_err(
|
|
256
|
-
"could not create Array from input".to_string(),
|
|
257
|
-
)),
|
|
258
|
-
}
|
|
239
|
+
Self::new_from_any_values_and_dtype(name, values, dtype, strict)
|
|
259
240
|
}
|
|
260
241
|
|
|
261
242
|
pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
|
@@ -2,49 +2,49 @@ use magnus::{IntoValue, Ruby, Value, value::ReprValue};
|
|
|
2
2
|
use polars_core::prelude::*;
|
|
3
3
|
|
|
4
4
|
use crate::prelude::*;
|
|
5
|
+
use crate::ruby::utils::TryIntoValue;
|
|
5
6
|
use crate::{RbPolarsErr, RbResult, RbSeries};
|
|
6
7
|
|
|
7
8
|
impl RbSeries {
|
|
8
9
|
/// Convert this Series to a Ruby array.
|
|
9
10
|
/// This operation copies data.
|
|
10
|
-
pub fn to_a(&
|
|
11
|
-
let series = &
|
|
11
|
+
pub fn to_a(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
|
|
12
|
+
let series = &self_.series.read();
|
|
12
13
|
|
|
13
|
-
fn to_a_recursive(series: &Series) -> RbResult<Value> {
|
|
14
|
-
let ruby = Ruby::get().unwrap();
|
|
14
|
+
fn to_a_recursive(ruby: &Ruby, series: &Series) -> RbResult<Value> {
|
|
15
15
|
let rblist = match series.dtype() {
|
|
16
16
|
DataType::Boolean => ruby
|
|
17
|
-
.ary_from_iter(series.bool().map_err(RbPolarsErr::from)
|
|
17
|
+
.ary_from_iter(series.bool().map_err(RbPolarsErr::from)?.iter())
|
|
18
18
|
.as_value(),
|
|
19
19
|
DataType::UInt8 => ruby
|
|
20
|
-
.ary_from_iter(series.u8().map_err(RbPolarsErr::from)
|
|
20
|
+
.ary_from_iter(series.u8().map_err(RbPolarsErr::from)?.iter())
|
|
21
21
|
.as_value(),
|
|
22
22
|
DataType::UInt16 => ruby
|
|
23
|
-
.ary_from_iter(series.u16().map_err(RbPolarsErr::from)
|
|
23
|
+
.ary_from_iter(series.u16().map_err(RbPolarsErr::from)?.iter())
|
|
24
24
|
.as_value(),
|
|
25
25
|
DataType::UInt32 => ruby
|
|
26
|
-
.ary_from_iter(series.u32().map_err(RbPolarsErr::from)
|
|
26
|
+
.ary_from_iter(series.u32().map_err(RbPolarsErr::from)?.iter())
|
|
27
27
|
.as_value(),
|
|
28
28
|
DataType::UInt64 => ruby
|
|
29
|
-
.ary_from_iter(series.u64().map_err(RbPolarsErr::from)
|
|
29
|
+
.ary_from_iter(series.u64().map_err(RbPolarsErr::from)?.iter())
|
|
30
30
|
.as_value(),
|
|
31
31
|
DataType::UInt128 => ruby
|
|
32
|
-
.ary_from_iter(series.u128().map_err(RbPolarsErr::from)
|
|
32
|
+
.ary_from_iter(series.u128().map_err(RbPolarsErr::from)?.iter())
|
|
33
33
|
.as_value(),
|
|
34
34
|
DataType::Int8 => ruby
|
|
35
|
-
.ary_from_iter(series.i8().map_err(RbPolarsErr::from)
|
|
35
|
+
.ary_from_iter(series.i8().map_err(RbPolarsErr::from)?.iter())
|
|
36
36
|
.as_value(),
|
|
37
37
|
DataType::Int16 => ruby
|
|
38
|
-
.ary_from_iter(series.i16().map_err(RbPolarsErr::from)
|
|
38
|
+
.ary_from_iter(series.i16().map_err(RbPolarsErr::from)?.iter())
|
|
39
39
|
.as_value(),
|
|
40
40
|
DataType::Int32 => ruby
|
|
41
|
-
.ary_from_iter(series.i32().map_err(RbPolarsErr::from)
|
|
41
|
+
.ary_from_iter(series.i32().map_err(RbPolarsErr::from)?.iter())
|
|
42
42
|
.as_value(),
|
|
43
43
|
DataType::Int64 => ruby
|
|
44
|
-
.ary_from_iter(series.i64().map_err(RbPolarsErr::from)
|
|
44
|
+
.ary_from_iter(series.i64().map_err(RbPolarsErr::from)?.iter())
|
|
45
45
|
.as_value(),
|
|
46
46
|
DataType::Int128 => ruby
|
|
47
|
-
.ary_from_iter(series.i128().map_err(RbPolarsErr::from)
|
|
47
|
+
.ary_from_iter(series.i128().map_err(RbPolarsErr::from)?.iter())
|
|
48
48
|
.as_value(),
|
|
49
49
|
DataType::Float16 => ruby
|
|
50
50
|
.ary_from_iter(
|
|
@@ -53,14 +53,15 @@ impl RbSeries {
|
|
|
53
53
|
.cast(&DataType::Float32)
|
|
54
54
|
.map_err(RbPolarsErr::from)?
|
|
55
55
|
.f32()
|
|
56
|
-
.map_err(RbPolarsErr::from)
|
|
56
|
+
.map_err(RbPolarsErr::from)?
|
|
57
|
+
.iter(),
|
|
57
58
|
)
|
|
58
59
|
.as_value(),
|
|
59
60
|
DataType::Float32 => ruby
|
|
60
|
-
.ary_from_iter(series.f32().map_err(RbPolarsErr::from)
|
|
61
|
+
.ary_from_iter(series.f32().map_err(RbPolarsErr::from)?.iter())
|
|
61
62
|
.as_value(),
|
|
62
63
|
DataType::Float64 => ruby
|
|
63
|
-
.ary_from_iter(series.f64().map_err(RbPolarsErr::from)
|
|
64
|
+
.ary_from_iter(series.f64().map_err(RbPolarsErr::from)?.iter())
|
|
64
65
|
.as_value(),
|
|
65
66
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
|
66
67
|
with_match_categorical_physical_type!(series.dtype().cat_physical().unwrap(), |$C| {
|
|
@@ -71,10 +72,7 @@ impl RbSeries {
|
|
|
71
72
|
let v = ruby.ary_new_capa(series.len());
|
|
72
73
|
for i in 0..series.len() {
|
|
73
74
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
|
74
|
-
|
|
75
|
-
Some(val) => v.push(val.to_value()).unwrap(),
|
|
76
|
-
None => v.push(ruby.qnil()).unwrap(),
|
|
77
|
-
};
|
|
75
|
+
v.push(obj.cloned().into_value_with(ruby))?;
|
|
78
76
|
}
|
|
79
77
|
v.as_value()
|
|
80
78
|
}
|
|
@@ -87,7 +85,7 @@ impl RbSeries {
|
|
|
87
85
|
v.push(ruby.qnil()).unwrap();
|
|
88
86
|
}
|
|
89
87
|
Some(s) => {
|
|
90
|
-
let rblst = to_a_recursive(s.as_ref())?;
|
|
88
|
+
let rblst = to_a_recursive(ruby, s.as_ref())?;
|
|
91
89
|
v.push(rblst)?;
|
|
92
90
|
}
|
|
93
91
|
}
|
|
@@ -103,7 +101,7 @@ impl RbSeries {
|
|
|
103
101
|
v.push(ruby.qnil()).unwrap();
|
|
104
102
|
}
|
|
105
103
|
Some(s) => {
|
|
106
|
-
let rblst = to_a_recursive(s.as_ref())?;
|
|
104
|
+
let rblst = to_a_recursive(ruby, s.as_ref())?;
|
|
107
105
|
v.push(rblst)?;
|
|
108
106
|
}
|
|
109
107
|
}
|
|
@@ -112,35 +110,35 @@ impl RbSeries {
|
|
|
112
110
|
}
|
|
113
111
|
DataType::Date => {
|
|
114
112
|
let ca = series.date().map_err(RbPolarsErr::from)?;
|
|
115
|
-
return
|
|
113
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
116
114
|
}
|
|
117
115
|
DataType::Time => {
|
|
118
116
|
let ca = series.time().map_err(RbPolarsErr::from)?;
|
|
119
|
-
return
|
|
117
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
120
118
|
}
|
|
121
119
|
DataType::Datetime(_, _) => {
|
|
122
120
|
let ca = series.datetime().map_err(RbPolarsErr::from)?;
|
|
123
|
-
return
|
|
121
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
124
122
|
}
|
|
125
123
|
DataType::Decimal(_, _) => {
|
|
126
124
|
let ca = series.decimal().map_err(RbPolarsErr::from)?;
|
|
127
|
-
return
|
|
125
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
128
126
|
}
|
|
129
127
|
DataType::String => {
|
|
130
128
|
let ca = series.str().map_err(RbPolarsErr::from)?;
|
|
131
|
-
return
|
|
129
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
132
130
|
}
|
|
133
131
|
DataType::Struct(_) => {
|
|
134
132
|
let ca = series.struct_().map_err(RbPolarsErr::from)?;
|
|
135
|
-
return
|
|
133
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
136
134
|
}
|
|
137
135
|
DataType::Duration(_) => {
|
|
138
136
|
let ca = series.duration().map_err(RbPolarsErr::from)?;
|
|
139
|
-
return
|
|
137
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
140
138
|
}
|
|
141
139
|
DataType::Binary => {
|
|
142
140
|
let ca = series.binary().map_err(RbPolarsErr::from)?;
|
|
143
|
-
return
|
|
141
|
+
return Wrap(ca).try_into_value_with(ruby);
|
|
144
142
|
}
|
|
145
143
|
DataType::Null => {
|
|
146
144
|
let null: Option<u8> = None;
|
|
@@ -163,10 +161,7 @@ impl RbSeries {
|
|
|
163
161
|
}
|
|
164
162
|
impl ExactSizeIterator for NullIter {}
|
|
165
163
|
|
|
166
|
-
|
|
167
|
-
.unwrap()
|
|
168
|
-
.ary_from_iter(NullIter { iter, n })
|
|
169
|
-
.as_value()
|
|
164
|
+
ruby.ary_from_iter(NullIter { iter, n }).as_value()
|
|
170
165
|
}
|
|
171
166
|
DataType::Unknown(_) => {
|
|
172
167
|
panic!("to_a not implemented for unknown")
|
|
@@ -175,12 +170,12 @@ impl RbSeries {
|
|
|
175
170
|
unreachable!()
|
|
176
171
|
}
|
|
177
172
|
DataType::Extension(_, _) => {
|
|
178
|
-
return to_a_recursive(series.ext().unwrap().storage());
|
|
173
|
+
return to_a_recursive(ruby, series.ext().unwrap().storage());
|
|
179
174
|
}
|
|
180
175
|
};
|
|
181
176
|
Ok(rblist.as_value())
|
|
182
177
|
}
|
|
183
178
|
|
|
184
|
-
to_a_recursive(series)
|
|
179
|
+
to_a_recursive(ruby, series)
|
|
185
180
|
}
|
|
186
181
|
}
|
|
@@ -10,7 +10,7 @@ use crate::ruby::exceptions::{RbIndexError, RbRuntimeError, RbValueError};
|
|
|
10
10
|
use crate::ruby::gvl::GvlExt;
|
|
11
11
|
use crate::ruby::plan_callback::PlanCallbackExt;
|
|
12
12
|
use crate::ruby::ruby_function::RubyObject;
|
|
13
|
-
use crate::ruby::
|
|
13
|
+
use crate::ruby::utils::TryIntoValue;
|
|
14
14
|
use crate::utils::EnterPolarsExt;
|
|
15
15
|
use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
|
|
16
16
|
|
|
@@ -113,7 +113,7 @@ impl RbSeries {
|
|
|
113
113
|
let rbseries = RbSeries::new(s);
|
|
114
114
|
rb_modules::pl_utils(ruby).funcall("wrap_s", (rbseries,))
|
|
115
115
|
}
|
|
116
|
-
_ =>
|
|
116
|
+
_ => Wrap(av).try_into_value_with(ruby),
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
119
|
|
|
@@ -157,8 +157,8 @@ impl RbSeries {
|
|
|
157
157
|
self.series.write().rename(name.into());
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
-
pub fn dtype(rb: &Ruby, self_: &Self) -> Value {
|
|
161
|
-
Wrap(self_.series.read().dtype().clone()).
|
|
160
|
+
pub fn dtype(rb: &Ruby, self_: &Self) -> RbResult<Value> {
|
|
161
|
+
Wrap(self_.series.read().dtype().clone()).try_into_value_with(rb)
|
|
162
162
|
}
|
|
163
163
|
|
|
164
164
|
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
|
@@ -445,10 +445,10 @@ impl RbSeries {
|
|
|
445
445
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
|
446
446
|
name_gen: Option<Value>,
|
|
447
447
|
) -> RbResult<Self> {
|
|
448
|
-
start_background_ruby_thread(rb);
|
|
449
448
|
let name_gen = name_gen.map(RubyObject::from);
|
|
449
|
+
// ensure new_ruby is called with GVL
|
|
450
|
+
let get_index_name = name_gen.map(PlanCallback::<usize, String>::new_ruby);
|
|
450
451
|
rb.enter_polars(|| {
|
|
451
|
-
let get_index_name = name_gen.map(PlanCallback::<usize, String>::new_ruby);
|
|
452
452
|
let get_index_name = get_index_name.map(|f| {
|
|
453
453
|
NameGenerator(Arc::new(move |i| f.call(i).map(PlSmallStr::from)) as Arc<_>)
|
|
454
454
|
});
|
|
@@ -4,6 +4,7 @@ use super::RbSeries;
|
|
|
4
4
|
use crate::map::series::ApplyLambdaGeneric;
|
|
5
5
|
use crate::prelude::*;
|
|
6
6
|
use crate::ruby::gvl::GvlExt;
|
|
7
|
+
use crate::ruby::utils::TryIntoValue;
|
|
7
8
|
use crate::series::construction::series_from_objects;
|
|
8
9
|
use crate::{RbPolarsErr, RbResult};
|
|
9
10
|
use crate::{apply_all_polars_dtypes, raise_err};
|
|
@@ -78,7 +79,7 @@ fn call_and_collect_objects<T, I>(
|
|
|
78
79
|
skip_nulls: bool,
|
|
79
80
|
) -> RbResult<Series>
|
|
80
81
|
where
|
|
81
|
-
T:
|
|
82
|
+
T: TryIntoValue,
|
|
82
83
|
I: Iterator<Item = Option<T>>,
|
|
83
84
|
{
|
|
84
85
|
let mut objects = Vec::with_capacity(len);
|
|
@@ -91,7 +92,7 @@ where
|
|
|
91
92
|
continue;
|
|
92
93
|
}
|
|
93
94
|
None => rb.qnil().into_value_with(rb),
|
|
94
|
-
Some(val) => val.
|
|
95
|
+
Some(val) => val.try_into_value_with(rb)?,
|
|
95
96
|
};
|
|
96
97
|
let out: Value = lambda.funcall("call", (arg,))?;
|
|
97
98
|
objects.push(ObjectValue {
|
|
@@ -139,23 +139,23 @@ fn scatter_impl(
|
|
|
139
139
|
with_match_physical_numeric_polars_type!(dt, |$T| {
|
|
140
140
|
let ca: &mut ChunkedArray<$T> = mutable_s.as_mut();
|
|
141
141
|
let values: &ChunkedArray<$T> = values.as_ref().as_ref();
|
|
142
|
-
ca.scatter(idx, values)
|
|
142
|
+
ca.scatter(idx, values.iter())
|
|
143
143
|
})
|
|
144
144
|
}
|
|
145
145
|
DataType::Boolean => {
|
|
146
146
|
let ca: &mut ChunkedArray<BooleanType> = mutable_s.as_mut();
|
|
147
147
|
let values = values.bool()?;
|
|
148
|
-
ca.scatter(idx, values)
|
|
148
|
+
ca.scatter(idx, values.iter())
|
|
149
149
|
}
|
|
150
150
|
DataType::Binary => {
|
|
151
151
|
let ca: &mut ChunkedArray<BinaryType> = mutable_s.as_mut();
|
|
152
152
|
let values = values.binary()?;
|
|
153
|
-
ca.scatter(idx, values)
|
|
153
|
+
ca.scatter(idx, values.iter())
|
|
154
154
|
}
|
|
155
155
|
DataType::String => {
|
|
156
156
|
let ca: &mut ChunkedArray<StringType> = mutable_s.as_mut();
|
|
157
157
|
let values = values.str()?;
|
|
158
|
-
ca.scatter(idx, values)
|
|
158
|
+
ca.scatter(idx, values.iter())
|
|
159
159
|
}
|
|
160
160
|
_ => Err(PolarsError::ComputeError(
|
|
161
161
|
format!("not yet implemented for dtype: {logical_dtype}").into(),
|
data/ext/polars/src/utils.rs
CHANGED
|
@@ -16,7 +16,6 @@ macro_rules! apply_all_polars_dtypes {
|
|
|
16
16
|
($self:expr, $method:ident, $($args:expr),*) => {
|
|
17
17
|
match $self.dtype() {
|
|
18
18
|
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
|
19
|
-
DataType::String => $self.str().unwrap().$method($($args),*),
|
|
20
19
|
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
|
21
20
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
|
22
21
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
|
@@ -27,15 +26,40 @@ macro_rules! apply_all_polars_dtypes {
|
|
|
27
26
|
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
|
28
27
|
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
|
29
28
|
DataType::Int128 => $self.i128().unwrap().$method($($args),*),
|
|
30
|
-
DataType::Float16 =>
|
|
29
|
+
DataType::Float16 => $self.cast(&DataType::Float32).unwrap().f32().unwrap().$method($($args),*),
|
|
31
30
|
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
|
32
31
|
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
|
33
|
-
DataType::
|
|
34
|
-
DataType::
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
DataType::String => $self.str().unwrap().$method($($args),*),
|
|
33
|
+
DataType::Binary => $self.binary().unwrap().$method($($args),*),
|
|
34
|
+
DataType::Decimal(_, _) => $self.decimal().unwrap().$method($($args),*),
|
|
35
|
+
|
|
36
|
+
DataType::Date => $self.date().unwrap().$method($($args),*),
|
|
37
|
+
DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
|
|
38
|
+
DataType::Duration(_) => $self.duration().unwrap().$method($($args),*),
|
|
39
|
+
DataType::Time => $self.time().unwrap().$method($($args),*),
|
|
40
|
+
|
|
41
|
+
DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
|
37
42
|
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
|
38
|
-
|
|
43
|
+
DataType::Array(_, _) => $self.array().unwrap().$method($($args),*),
|
|
44
|
+
|
|
45
|
+
dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => match dt.cat_physical().unwrap() {
|
|
46
|
+
CategoricalPhysical::U8 => $self.cat8().unwrap().$method($($args),*),
|
|
47
|
+
CategoricalPhysical::U16 => $self.cat16().unwrap().$method($($args),*),
|
|
48
|
+
CategoricalPhysical::U32 => $self.cat32().unwrap().$method($($args),*),
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
DataType::Object(_) => {
|
|
52
|
+
$self
|
|
53
|
+
.as_any()
|
|
54
|
+
.downcast_ref::<ObjectChunked<ObjectValue>>()
|
|
55
|
+
.unwrap()
|
|
56
|
+
.$method($($args),*)
|
|
57
|
+
},
|
|
58
|
+
DataType::Extension(_, _) => $self.ext().unwrap().$method($($args),*),
|
|
59
|
+
|
|
60
|
+
DataType::Null => $self.null().unwrap().$method($($args),*),
|
|
61
|
+
|
|
62
|
+
dt @ (DataType::BinaryOffset | DataType::Unknown(_)) => panic!("dtype {:?} not supported", dt)
|
|
39
63
|
}
|
|
40
64
|
}
|
|
41
65
|
}
|
data/lib/polars/array_expr.rb
CHANGED
|
@@ -377,7 +377,9 @@ module Polars
|
|
|
377
377
|
# # │ [1, 2] │
|
|
378
378
|
# # └───────────┘
|
|
379
379
|
def unique(maintain_order: false)
|
|
380
|
-
|
|
380
|
+
eval(
|
|
381
|
+
F.element.unique(maintain_order: maintain_order), as_list: true
|
|
382
|
+
)
|
|
381
383
|
end
|
|
382
384
|
|
|
383
385
|
# Count the number of unique values in every sub-arrays.
|
|
@@ -403,7 +405,7 @@ module Polars
|
|
|
403
405
|
# # │ [2, 3, 4] ┆ 3 │
|
|
404
406
|
# # └───────────────┴──────────┘
|
|
405
407
|
def n_unique
|
|
406
|
-
|
|
408
|
+
agg(F.element.n_unique)
|
|
407
409
|
end
|
|
408
410
|
|
|
409
411
|
# Convert an Array column into a List column with the same inner data type.
|
|
@@ -432,6 +434,13 @@ module Polars
|
|
|
432
434
|
|
|
433
435
|
# Evaluate whether any boolean value is true for every subarray.
|
|
434
436
|
#
|
|
437
|
+
# @param ignore_nulls [Boolean]
|
|
438
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
439
|
+
# are no non-null values, the output is `false`.
|
|
440
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
441
|
+
# if the column contains any null values and no `true` values,
|
|
442
|
+
# the output is null.
|
|
443
|
+
#
|
|
435
444
|
# @return [Expr]
|
|
436
445
|
#
|
|
437
446
|
# @example
|
|
@@ -461,12 +470,19 @@ module Polars
|
|
|
461
470
|
# # │ [null, null] ┆ false │
|
|
462
471
|
# # │ null ┆ null │
|
|
463
472
|
# # └────────────────┴───────┘
|
|
464
|
-
def any
|
|
465
|
-
|
|
473
|
+
def any(ignore_nulls: true)
|
|
474
|
+
agg(F.element.any(ignore_nulls: ignore_nulls))
|
|
466
475
|
end
|
|
467
476
|
|
|
468
477
|
# Evaluate whether all boolean values are true for every subarray.
|
|
469
478
|
#
|
|
479
|
+
# @param ignore_nulls [Boolean]
|
|
480
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
481
|
+
# are no non-null values, the output is `true`.
|
|
482
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
483
|
+
# if the column contains any null values and no `false` values,
|
|
484
|
+
# the output is null.
|
|
485
|
+
#
|
|
470
486
|
# @return [Expr]
|
|
471
487
|
#
|
|
472
488
|
# @example
|
|
@@ -496,8 +512,8 @@ module Polars
|
|
|
496
512
|
# # │ [null, null] ┆ true │
|
|
497
513
|
# # │ null ┆ null │
|
|
498
514
|
# # └────────────────┴───────┘
|
|
499
|
-
def all
|
|
500
|
-
|
|
515
|
+
def all(ignore_nulls: true)
|
|
516
|
+
agg(F.element.all(ignore_nulls: ignore_nulls))
|
|
501
517
|
end
|
|
502
518
|
|
|
503
519
|
# Sort the arrays in this column.
|
|
@@ -567,7 +583,7 @@ module Polars
|
|
|
567
583
|
# # │ [9, 1, 2] ┆ [2, 1, 9] │
|
|
568
584
|
# # └───────────────┴───────────────┘
|
|
569
585
|
def reverse
|
|
570
|
-
|
|
586
|
+
eval(F.element.reverse)
|
|
571
587
|
end
|
|
572
588
|
|
|
573
589
|
# Retrieve the index of the minimal value in every sub-array.
|
|
@@ -212,6 +212,13 @@ module Polars
|
|
|
212
212
|
|
|
213
213
|
# Evaluate whether any boolean value is true for every subarray.
|
|
214
214
|
#
|
|
215
|
+
# @param ignore_nulls [Boolean]
|
|
216
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
217
|
+
# are no non-null values, the output is `false`.
|
|
218
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
219
|
+
# if the column contains any null values and no `true` values,
|
|
220
|
+
# the output is null.
|
|
221
|
+
#
|
|
215
222
|
# @return [Series]
|
|
216
223
|
#
|
|
217
224
|
# @example
|
|
@@ -230,7 +237,7 @@ module Polars
|
|
|
230
237
|
# # false
|
|
231
238
|
# # null
|
|
232
239
|
# # ]
|
|
233
|
-
def any
|
|
240
|
+
def any(ignore_nulls: true)
|
|
234
241
|
super
|
|
235
242
|
end
|
|
236
243
|
|
|
@@ -378,6 +385,13 @@ module Polars
|
|
|
378
385
|
|
|
379
386
|
# Evaluate whether all boolean values are true for every subarray.
|
|
380
387
|
#
|
|
388
|
+
# @param ignore_nulls [Boolean]
|
|
389
|
+
# * If set to `true` (default), null values are ignored. If there
|
|
390
|
+
# are no non-null values, the output is `true`.
|
|
391
|
+
# * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
|
|
392
|
+
# if the column contains any null values and no `false` values,
|
|
393
|
+
# the output is null.
|
|
394
|
+
#
|
|
381
395
|
# @return [Series]
|
|
382
396
|
#
|
|
383
397
|
# @example
|
|
@@ -396,7 +410,7 @@ module Polars
|
|
|
396
410
|
# # true
|
|
397
411
|
# # null
|
|
398
412
|
# # ]
|
|
399
|
-
def all
|
|
413
|
+
def all(ignore_nulls: true)
|
|
400
414
|
super
|
|
401
415
|
end
|
|
402
416
|
|
|
@@ -226,6 +226,38 @@ module Polars
|
|
|
226
226
|
super
|
|
227
227
|
end
|
|
228
228
|
|
|
229
|
+
# Get the byte value at the given index.
|
|
230
|
+
#
|
|
231
|
+
# For example, index `0` would return the first byte of every binary value
|
|
232
|
+
# and index `-1` would return the last byte of every binary value.
|
|
233
|
+
# The behavior if an index is out of bounds is determined by the argument
|
|
234
|
+
# `null_on_oob`.
|
|
235
|
+
#
|
|
236
|
+
# @param index [Object]
|
|
237
|
+
# Index to return per binary value
|
|
238
|
+
# @param null_on_oob [Boolean]
|
|
239
|
+
# Behavior if an index is out of bounds:
|
|
240
|
+
#
|
|
241
|
+
# * true -> set as null
|
|
242
|
+
# * false -> raise an error
|
|
243
|
+
#
|
|
244
|
+
# @return [Series]
|
|
245
|
+
#
|
|
246
|
+
# @example
|
|
247
|
+
# s = Polars::Series.new("a", ["\x01\x02\x03".b, "".b, "\x04\x05".b])
|
|
248
|
+
# s.bin.get(0, null_on_oob: true)
|
|
249
|
+
# # =>
|
|
250
|
+
# # shape: (3,)
|
|
251
|
+
# # Series: 'a' [u8]
|
|
252
|
+
# # [
|
|
253
|
+
# # 1
|
|
254
|
+
# # null
|
|
255
|
+
# # 4
|
|
256
|
+
# # ]
|
|
257
|
+
def get(index, null_on_oob: false)
|
|
258
|
+
super
|
|
259
|
+
end
|
|
260
|
+
|
|
229
261
|
# Take the first `n` bytes of the binary values.
|
|
230
262
|
#
|
|
231
263
|
# @param n [Object]
|