polars-df 0.21.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +120 -90
- data/Cargo.toml +3 -0
- data/README.md +20 -7
- data/ext/polars/Cargo.toml +18 -12
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/catalog/unity.rs +96 -94
- data/ext/polars/src/conversion/any_value.rs +39 -37
- data/ext/polars/src/conversion/chunked_array.rs +36 -29
- data/ext/polars/src/conversion/datetime.rs +11 -0
- data/ext/polars/src/conversion/mod.rs +244 -51
- data/ext/polars/src/dataframe/construction.rs +5 -17
- data/ext/polars/src/dataframe/export.rs +17 -15
- data/ext/polars/src/dataframe/general.rs +15 -17
- data/ext/polars/src/dataframe/io.rs +1 -2
- data/ext/polars/src/dataframe/mod.rs +25 -1
- data/ext/polars/src/dataframe/serde.rs +23 -8
- data/ext/polars/src/exceptions.rs +8 -5
- data/ext/polars/src/expr/datatype.rs +4 -4
- data/ext/polars/src/expr/datetime.rs +22 -28
- data/ext/polars/src/expr/general.rs +3 -10
- data/ext/polars/src/expr/list.rs +8 -24
- data/ext/polars/src/expr/meta.rs +4 -6
- data/ext/polars/src/expr/mod.rs +2 -0
- data/ext/polars/src/expr/name.rs +11 -14
- data/ext/polars/src/expr/serde.rs +28 -0
- data/ext/polars/src/expr/string.rs +5 -10
- data/ext/polars/src/file.rs +20 -14
- data/ext/polars/src/functions/business.rs +0 -1
- data/ext/polars/src/functions/io.rs +7 -4
- data/ext/polars/src/functions/lazy.rs +7 -6
- data/ext/polars/src/functions/meta.rs +3 -3
- data/ext/polars/src/functions/string_cache.rs +3 -3
- data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
- data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
- data/ext/polars/src/io/mod.rs +23 -3
- data/ext/polars/src/lazyframe/general.rs +35 -50
- data/ext/polars/src/lazyframe/mod.rs +16 -1
- data/ext/polars/src/lazyframe/optflags.rs +57 -0
- data/ext/polars/src/lazyframe/serde.rs +27 -3
- data/ext/polars/src/lib.rs +144 -19
- data/ext/polars/src/map/dataframe.rs +18 -15
- data/ext/polars/src/map/lazy.rs +6 -5
- data/ext/polars/src/map/series.rs +7 -6
- data/ext/polars/src/on_startup.rs +12 -5
- data/ext/polars/src/rb_modules.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +49 -29
- data/ext/polars/src/series/construction.rs +2 -0
- data/ext/polars/src/series/export.rs +38 -33
- data/ext/polars/src/series/general.rs +69 -31
- data/ext/polars/src/series/mod.rs +29 -4
- data/lib/polars/array_expr.rb +1 -1
- data/lib/polars/data_frame.rb +119 -15
- data/lib/polars/data_types.rb +23 -6
- data/lib/polars/date_time_expr.rb +36 -15
- data/lib/polars/expr.rb +41 -32
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/lazy.rb +1 -1
- data/lib/polars/iceberg_dataset.rb +113 -0
- data/lib/polars/io/iceberg.rb +34 -0
- data/lib/polars/io/ipc.rb +28 -49
- data/lib/polars/io/parquet.rb +7 -4
- data/lib/polars/io/scan_options.rb +12 -3
- data/lib/polars/io/utils.rb +17 -0
- data/lib/polars/lazy_frame.rb +97 -10
- data/lib/polars/list_expr.rb +21 -13
- data/lib/polars/list_name_space.rb +33 -21
- data/lib/polars/meta_expr.rb +25 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +23 -1
- data/lib/polars/schema.rb +1 -1
- data/lib/polars/selectors.rb +8 -8
- data/lib/polars/series.rb +26 -2
- data/lib/polars/string_expr.rb +27 -28
- data/lib/polars/string_name_space.rb +18 -5
- data/lib/polars/utils/convert.rb +2 -2
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +4 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +10 -1
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
use crate::error::RbPolarsErr;
|
|
2
2
|
use crate::prelude::*;
|
|
3
|
+
use crate::utils::to_rb_err;
|
|
3
4
|
use crate::{RbResult, RbSeries};
|
|
4
|
-
use magnus::{IntoValue, Value};
|
|
5
|
+
use magnus::{IntoValue, Ruby, Value};
|
|
5
6
|
|
|
6
7
|
fn scalar_to_rb(scalar: RbResult<Scalar>) -> RbResult<Value> {
|
|
7
|
-
|
|
8
|
+
let ruby = Ruby::get().unwrap();
|
|
9
|
+
Ok(Wrap(scalar?.as_any_value()).into_value_with(&ruby))
|
|
8
10
|
}
|
|
9
11
|
|
|
10
12
|
impl RbSeries {
|
|
@@ -36,40 +38,50 @@ impl RbSeries {
|
|
|
36
38
|
self.series.borrow().arg_min()
|
|
37
39
|
}
|
|
38
40
|
|
|
39
|
-
pub fn max(&
|
|
41
|
+
pub fn max(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
|
40
42
|
Ok(Wrap(
|
|
41
|
-
|
|
43
|
+
rb_self
|
|
44
|
+
.series
|
|
42
45
|
.borrow()
|
|
43
46
|
.max_reduce()
|
|
44
47
|
.map_err(RbPolarsErr::from)?
|
|
45
48
|
.as_any_value(),
|
|
46
49
|
)
|
|
47
|
-
.
|
|
50
|
+
.into_value_with(ruby))
|
|
48
51
|
}
|
|
49
52
|
|
|
50
|
-
pub fn mean(&
|
|
51
|
-
match
|
|
53
|
+
pub fn mean(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
|
54
|
+
match rb_self.series.borrow().dtype() {
|
|
52
55
|
DataType::Boolean => Ok(Wrap(
|
|
53
|
-
|
|
56
|
+
rb_self
|
|
57
|
+
.series
|
|
54
58
|
.borrow()
|
|
55
59
|
.cast(&DataType::UInt8)
|
|
56
60
|
.unwrap()
|
|
57
61
|
.mean_reduce()
|
|
62
|
+
.map_err(to_rb_err)?
|
|
58
63
|
.as_any_value(),
|
|
59
64
|
)
|
|
60
|
-
.
|
|
65
|
+
.into_value_with(ruby)),
|
|
61
66
|
// For non-numeric output types we require mean_reduce.
|
|
62
|
-
dt if dt.is_temporal() =>
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
dt if dt.is_temporal() => Ok(Wrap(
|
|
68
|
+
rb_self
|
|
69
|
+
.series
|
|
70
|
+
.borrow()
|
|
71
|
+
.mean_reduce()
|
|
72
|
+
.map_err(to_rb_err)?
|
|
73
|
+
.as_any_value(),
|
|
74
|
+
)
|
|
75
|
+
.into_value_with(ruby)),
|
|
76
|
+
_ => Ok(rb_self.series.borrow().mean().into_value_with(ruby)),
|
|
66
77
|
}
|
|
67
78
|
}
|
|
68
79
|
|
|
69
|
-
pub fn median(&
|
|
70
|
-
match
|
|
80
|
+
pub fn median(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
|
81
|
+
match rb_self.series.borrow().dtype() {
|
|
71
82
|
DataType::Boolean => Ok(Wrap(
|
|
72
|
-
|
|
83
|
+
rb_self
|
|
84
|
+
.series
|
|
73
85
|
.borrow()
|
|
74
86
|
.cast(&DataType::UInt8)
|
|
75
87
|
.unwrap()
|
|
@@ -77,50 +89,58 @@ impl RbSeries {
|
|
|
77
89
|
.map_err(RbPolarsErr::from)?
|
|
78
90
|
.as_any_value(),
|
|
79
91
|
)
|
|
80
|
-
.
|
|
92
|
+
.into_value_with(ruby)),
|
|
81
93
|
// For non-numeric output types we require median_reduce.
|
|
82
94
|
dt if dt.is_temporal() => Ok(Wrap(
|
|
83
|
-
|
|
95
|
+
rb_self
|
|
96
|
+
.series
|
|
84
97
|
.borrow()
|
|
85
98
|
.median_reduce()
|
|
86
99
|
.map_err(RbPolarsErr::from)?
|
|
87
100
|
.as_any_value(),
|
|
88
101
|
)
|
|
89
|
-
.
|
|
90
|
-
_ => Ok(
|
|
102
|
+
.into_value_with(ruby)),
|
|
103
|
+
_ => Ok(rb_self.series.borrow().median().into_value_with(ruby)),
|
|
91
104
|
}
|
|
92
105
|
}
|
|
93
106
|
|
|
94
|
-
pub fn min(&
|
|
107
|
+
pub fn min(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
|
95
108
|
Ok(Wrap(
|
|
96
|
-
|
|
109
|
+
rb_self
|
|
110
|
+
.series
|
|
97
111
|
.borrow()
|
|
98
112
|
.min_reduce()
|
|
99
113
|
.map_err(RbPolarsErr::from)?
|
|
100
114
|
.as_any_value(),
|
|
101
115
|
)
|
|
102
|
-
.
|
|
116
|
+
.into_value_with(ruby))
|
|
103
117
|
}
|
|
104
118
|
|
|
105
|
-
pub fn quantile(
|
|
106
|
-
|
|
119
|
+
pub fn quantile(
|
|
120
|
+
ruby: &Ruby,
|
|
121
|
+
rb_self: &Self,
|
|
122
|
+
quantile: f64,
|
|
123
|
+
interpolation: Wrap<QuantileMethod>,
|
|
124
|
+
) -> RbResult<Value> {
|
|
125
|
+
let bind = rb_self
|
|
107
126
|
.series
|
|
108
127
|
.borrow()
|
|
109
128
|
.quantile_reduce(quantile, interpolation.0);
|
|
110
129
|
let sc = bind.map_err(RbPolarsErr::from)?;
|
|
111
130
|
|
|
112
|
-
Ok(Wrap(sc.as_any_value()).
|
|
131
|
+
Ok(Wrap(sc.as_any_value()).into_value_with(ruby))
|
|
113
132
|
}
|
|
114
133
|
|
|
115
|
-
pub fn sum(&
|
|
134
|
+
pub fn sum(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
|
116
135
|
Ok(Wrap(
|
|
117
|
-
|
|
136
|
+
rb_self
|
|
137
|
+
.series
|
|
118
138
|
.borrow()
|
|
119
139
|
.sum_reduce()
|
|
120
140
|
.map_err(RbPolarsErr::from)?
|
|
121
141
|
.as_any_value(),
|
|
122
142
|
)
|
|
123
|
-
.
|
|
143
|
+
.into_value_with(ruby))
|
|
124
144
|
}
|
|
125
145
|
|
|
126
146
|
pub fn first(&self) -> RbResult<Value> {
|
|
@@ -75,10 +75,12 @@ init_method_opt!(new_opt_u8, UInt8Type, u8);
|
|
|
75
75
|
init_method_opt!(new_opt_u16, UInt16Type, u16);
|
|
76
76
|
init_method_opt!(new_opt_u32, UInt32Type, u32);
|
|
77
77
|
init_method_opt!(new_opt_u64, UInt64Type, u64);
|
|
78
|
+
init_method_opt!(new_opt_u128, UInt128Type, u128);
|
|
78
79
|
init_method_opt!(new_opt_i8, Int8Type, i8);
|
|
79
80
|
init_method_opt!(new_opt_i16, Int16Type, i16);
|
|
80
81
|
init_method_opt!(new_opt_i32, Int32Type, i32);
|
|
81
82
|
init_method_opt!(new_opt_i64, Int64Type, i64);
|
|
83
|
+
init_method_opt!(new_opt_i128, Int128Type, i128);
|
|
82
84
|
init_method_opt!(new_opt_f32, Float32Type, f32);
|
|
83
85
|
init_method_opt!(new_opt_f64, Float64Type, f64);
|
|
84
86
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{IntoValue,
|
|
1
|
+
use magnus::{IntoValue, Ruby, Value, value::ReprValue};
|
|
2
2
|
use polars_core::prelude::*;
|
|
3
3
|
|
|
4
4
|
use crate::RbSeries;
|
|
@@ -11,42 +11,44 @@ impl RbSeries {
|
|
|
11
11
|
let series = &self.series.borrow();
|
|
12
12
|
|
|
13
13
|
fn to_a_recursive(series: &Series) -> Value {
|
|
14
|
+
let ruby = Ruby::get().unwrap();
|
|
14
15
|
let rblist = match series.dtype() {
|
|
15
|
-
DataType::Boolean =>
|
|
16
|
-
DataType::UInt8 =>
|
|
17
|
-
DataType::UInt16 =>
|
|
18
|
-
DataType::UInt32 =>
|
|
19
|
-
DataType::UInt64 =>
|
|
20
|
-
DataType::
|
|
21
|
-
DataType::
|
|
22
|
-
DataType::
|
|
23
|
-
DataType::
|
|
24
|
-
DataType::
|
|
25
|
-
DataType::
|
|
26
|
-
DataType::
|
|
16
|
+
DataType::Boolean => ruby.ary_from_iter(series.bool().unwrap()).as_value(),
|
|
17
|
+
DataType::UInt8 => ruby.ary_from_iter(series.u8().unwrap()).as_value(),
|
|
18
|
+
DataType::UInt16 => ruby.ary_from_iter(series.u16().unwrap()).as_value(),
|
|
19
|
+
DataType::UInt32 => ruby.ary_from_iter(series.u32().unwrap()).as_value(),
|
|
20
|
+
DataType::UInt64 => ruby.ary_from_iter(series.u64().unwrap()).as_value(),
|
|
21
|
+
DataType::UInt128 => ruby.ary_from_iter(series.u128().unwrap()).as_value(),
|
|
22
|
+
DataType::Int8 => ruby.ary_from_iter(series.i8().unwrap()).as_value(),
|
|
23
|
+
DataType::Int16 => ruby.ary_from_iter(series.i16().unwrap()).as_value(),
|
|
24
|
+
DataType::Int32 => ruby.ary_from_iter(series.i32().unwrap()).as_value(),
|
|
25
|
+
DataType::Int64 => ruby.ary_from_iter(series.i64().unwrap()).as_value(),
|
|
26
|
+
DataType::Int128 => ruby.ary_from_iter(series.i128().unwrap()).as_value(),
|
|
27
|
+
DataType::Float32 => ruby.ary_from_iter(series.f32().unwrap()).as_value(),
|
|
28
|
+
DataType::Float64 => ruby.ary_from_iter(series.f64().unwrap()).as_value(),
|
|
27
29
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
|
28
30
|
with_match_categorical_physical_type!(series.dtype().cat_physical().unwrap(), |$C| {
|
|
29
|
-
|
|
31
|
+
ruby.ary_from_iter(series.cat::<$C>().unwrap().iter_str()).as_value()
|
|
30
32
|
})
|
|
31
33
|
}
|
|
32
34
|
DataType::Object(_) => {
|
|
33
|
-
let v =
|
|
35
|
+
let v = ruby.ary_new_capa(series.len());
|
|
34
36
|
for i in 0..series.len() {
|
|
35
37
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
|
36
38
|
match obj {
|
|
37
39
|
Some(val) => v.push(val.to_value()).unwrap(),
|
|
38
|
-
None => v.push(qnil()).unwrap(),
|
|
40
|
+
None => v.push(ruby.qnil()).unwrap(),
|
|
39
41
|
};
|
|
40
42
|
}
|
|
41
|
-
v.
|
|
43
|
+
v.as_value()
|
|
42
44
|
}
|
|
43
45
|
DataType::List(_) => {
|
|
44
|
-
let v =
|
|
46
|
+
let v = ruby.ary_new();
|
|
45
47
|
let ca = series.list().unwrap();
|
|
46
48
|
for opt_s in ca.amortized_iter() {
|
|
47
49
|
match opt_s {
|
|
48
50
|
None => {
|
|
49
|
-
v.push(qnil()).unwrap();
|
|
51
|
+
v.push(ruby.qnil()).unwrap();
|
|
50
52
|
}
|
|
51
53
|
Some(s) => {
|
|
52
54
|
let rblst = to_a_recursive(s.as_ref());
|
|
@@ -54,15 +56,15 @@ impl RbSeries {
|
|
|
54
56
|
}
|
|
55
57
|
}
|
|
56
58
|
}
|
|
57
|
-
v.
|
|
59
|
+
v.as_value()
|
|
58
60
|
}
|
|
59
61
|
DataType::Array(_, _) => {
|
|
60
|
-
let v =
|
|
62
|
+
let v = ruby.ary_new();
|
|
61
63
|
let ca = series.array().unwrap();
|
|
62
64
|
for opt_s in ca.amortized_iter() {
|
|
63
65
|
match opt_s {
|
|
64
66
|
None => {
|
|
65
|
-
v.push(qnil()).unwrap();
|
|
67
|
+
v.push(ruby.qnil()).unwrap();
|
|
66
68
|
}
|
|
67
69
|
Some(s) => {
|
|
68
70
|
let rblst = to_a_recursive(s.as_ref());
|
|
@@ -70,39 +72,39 @@ impl RbSeries {
|
|
|
70
72
|
}
|
|
71
73
|
}
|
|
72
74
|
}
|
|
73
|
-
v.
|
|
75
|
+
v.as_value()
|
|
74
76
|
}
|
|
75
77
|
DataType::Date => {
|
|
76
78
|
let ca = series.date().unwrap();
|
|
77
|
-
return Wrap(ca).
|
|
79
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
78
80
|
}
|
|
79
81
|
DataType::Time => {
|
|
80
82
|
let ca = series.time().unwrap();
|
|
81
|
-
return Wrap(ca).
|
|
83
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
82
84
|
}
|
|
83
85
|
DataType::Datetime(_, _) => {
|
|
84
86
|
let ca = series.datetime().unwrap();
|
|
85
|
-
return Wrap(ca).
|
|
87
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
86
88
|
}
|
|
87
89
|
DataType::Decimal(_, _) => {
|
|
88
90
|
let ca = series.decimal().unwrap();
|
|
89
|
-
return Wrap(ca).
|
|
91
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
90
92
|
}
|
|
91
93
|
DataType::String => {
|
|
92
94
|
let ca = series.str().unwrap();
|
|
93
|
-
return Wrap(ca).
|
|
95
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
94
96
|
}
|
|
95
97
|
DataType::Struct(_) => {
|
|
96
98
|
let ca = series.struct_().unwrap();
|
|
97
|
-
return Wrap(ca).
|
|
99
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
98
100
|
}
|
|
99
101
|
DataType::Duration(_) => {
|
|
100
102
|
let ca = series.duration().unwrap();
|
|
101
|
-
return Wrap(ca).
|
|
103
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
102
104
|
}
|
|
103
105
|
DataType::Binary => {
|
|
104
106
|
let ca = series.binary().unwrap();
|
|
105
|
-
return Wrap(ca).
|
|
107
|
+
return Wrap(ca).into_value_with(&ruby);
|
|
106
108
|
}
|
|
107
109
|
DataType::Null => {
|
|
108
110
|
let null: Option<u8> = None;
|
|
@@ -125,7 +127,10 @@ impl RbSeries {
|
|
|
125
127
|
}
|
|
126
128
|
impl ExactSizeIterator for NullIter {}
|
|
127
129
|
|
|
128
|
-
|
|
130
|
+
Ruby::get()
|
|
131
|
+
.unwrap()
|
|
132
|
+
.ary_from_iter(NullIter { iter, n })
|
|
133
|
+
.as_value()
|
|
129
134
|
}
|
|
130
135
|
DataType::Unknown(_) => {
|
|
131
136
|
panic!("to_a not implemented for unknown")
|
|
@@ -134,7 +139,7 @@ impl RbSeries {
|
|
|
134
139
|
unreachable!()
|
|
135
140
|
}
|
|
136
141
|
};
|
|
137
|
-
rblist.
|
|
142
|
+
rblist.as_value()
|
|
138
143
|
}
|
|
139
144
|
|
|
140
145
|
to_a_recursive(series)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{Error, IntoValue, RArray,
|
|
1
|
+
use magnus::{Error, IntoValue, RArray, Ruby, Value, value::ReprValue};
|
|
2
2
|
use polars::prelude::*;
|
|
3
3
|
use polars::series::IsSorted;
|
|
4
4
|
use polars_core::utils::flatten::flatten_series;
|
|
@@ -6,7 +6,7 @@ use polars_core::utils::flatten::flatten_series;
|
|
|
6
6
|
use crate::conversion::*;
|
|
7
7
|
use crate::exceptions::RbIndexError;
|
|
8
8
|
use crate::rb_modules;
|
|
9
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
|
|
9
|
+
use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
|
|
10
10
|
|
|
11
11
|
impl RbSeries {
|
|
12
12
|
pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
|
|
@@ -16,7 +16,6 @@ impl RbSeries {
|
|
|
16
16
|
Ok(df.into())
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
-
// TODO add to Ruby
|
|
20
19
|
pub fn struct_fields(&self) -> RbResult<Vec<String>> {
|
|
21
20
|
let binding = self.series.borrow();
|
|
22
21
|
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
|
@@ -87,8 +86,8 @@ impl RbSeries {
|
|
|
87
86
|
}
|
|
88
87
|
}
|
|
89
88
|
|
|
90
|
-
pub fn get_index(&
|
|
91
|
-
let binding =
|
|
89
|
+
pub fn get_index(ruby: &Ruby, rb_self: &Self, index: usize) -> RbResult<Value> {
|
|
90
|
+
let binding = rb_self.series.borrow();
|
|
92
91
|
let av = match binding.get(index) {
|
|
93
92
|
Ok(v) => v,
|
|
94
93
|
Err(PolarsError::OutOfBounds(err)) => {
|
|
@@ -100,26 +99,26 @@ impl RbSeries {
|
|
|
100
99
|
match av {
|
|
101
100
|
AnyValue::List(s) | AnyValue::Array(s, _) => {
|
|
102
101
|
let rbseries = RbSeries::new(s);
|
|
103
|
-
rb_modules::
|
|
102
|
+
rb_modules::pl_utils().funcall("wrap_s", (rbseries,))
|
|
104
103
|
}
|
|
105
|
-
_ => Ok(Wrap(av).
|
|
104
|
+
_ => Ok(Wrap(av).into_value_with(ruby)),
|
|
106
105
|
}
|
|
107
106
|
}
|
|
108
107
|
|
|
109
|
-
pub fn get_index_signed(&
|
|
108
|
+
pub fn get_index_signed(ruby: &Ruby, rb_self: &Self, index: isize) -> RbResult<Value> {
|
|
110
109
|
let index = if index < 0 {
|
|
111
|
-
match
|
|
110
|
+
match rb_self.len().checked_sub(index.unsigned_abs()) {
|
|
112
111
|
Some(v) => v,
|
|
113
112
|
None => {
|
|
114
113
|
return Err(RbIndexError::new_err(
|
|
115
|
-
polars_err!(oob = index,
|
|
114
|
+
polars_err!(oob = index, rb_self.len()).to_string(),
|
|
116
115
|
));
|
|
117
116
|
}
|
|
118
117
|
}
|
|
119
118
|
} else {
|
|
120
119
|
usize::try_from(index).unwrap()
|
|
121
120
|
};
|
|
122
|
-
|
|
121
|
+
Self::get_index(ruby, rb_self, index)
|
|
123
122
|
}
|
|
124
123
|
|
|
125
124
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
|
@@ -149,16 +148,17 @@ impl RbSeries {
|
|
|
149
148
|
self.series.borrow_mut().rename(name.into());
|
|
150
149
|
}
|
|
151
150
|
|
|
152
|
-
pub fn dtype(&
|
|
153
|
-
Wrap(
|
|
151
|
+
pub fn dtype(ruby: &Ruby, rb_self: &Self) -> Value {
|
|
152
|
+
Wrap(rb_self.series.borrow().dtype().clone()).into_value_with(ruby)
|
|
154
153
|
}
|
|
155
154
|
|
|
156
|
-
pub fn inner_dtype(&
|
|
157
|
-
|
|
155
|
+
pub fn inner_dtype(ruby: &Ruby, rb_self: &Self) -> Option<Value> {
|
|
156
|
+
rb_self
|
|
157
|
+
.series
|
|
158
158
|
.borrow()
|
|
159
159
|
.dtype()
|
|
160
160
|
.inner_dtype()
|
|
161
|
-
.map(|dt| Wrap(dt.clone()).
|
|
161
|
+
.map(|dt| Wrap(dt.clone()).into_value_with(ruby))
|
|
162
162
|
}
|
|
163
163
|
|
|
164
164
|
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
|
@@ -175,11 +175,11 @@ impl RbSeries {
|
|
|
175
175
|
self.series.borrow().n_chunks()
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
-
pub fn append(&
|
|
179
|
-
let mut binding =
|
|
178
|
+
pub fn append(ruby: &Ruby, rb_self: &Self, other: &RbSeries) -> RbResult<()> {
|
|
179
|
+
let mut binding = rb_self.series.borrow_mut();
|
|
180
180
|
let res = binding.append(&other.series.borrow());
|
|
181
181
|
if let Err(e) = res {
|
|
182
|
-
Err(Error::new(
|
|
182
|
+
Err(Error::new(ruby.exception_runtime_error(), e.to_string()))
|
|
183
183
|
} else {
|
|
184
184
|
Ok(())
|
|
185
185
|
}
|
|
@@ -193,22 +193,30 @@ impl RbSeries {
|
|
|
193
193
|
Ok(())
|
|
194
194
|
}
|
|
195
195
|
|
|
196
|
-
pub fn new_from_index(
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
pub fn new_from_index(
|
|
197
|
+
ruby: &Ruby,
|
|
198
|
+
rb_self: &Self,
|
|
199
|
+
index: usize,
|
|
200
|
+
length: usize,
|
|
201
|
+
) -> RbResult<Self> {
|
|
202
|
+
if index >= rb_self.series.borrow().len() {
|
|
203
|
+
Err(Error::new(
|
|
204
|
+
ruby.exception_arg_error(),
|
|
205
|
+
"index is out of bounds",
|
|
206
|
+
))
|
|
199
207
|
} else {
|
|
200
|
-
Ok(
|
|
208
|
+
Ok(rb_self.series.borrow().new_from_index(index, length).into())
|
|
201
209
|
}
|
|
202
210
|
}
|
|
203
211
|
|
|
204
|
-
pub fn filter(&
|
|
212
|
+
pub fn filter(ruby: &Ruby, rb_self: &Self, filter: &RbSeries) -> RbResult<Self> {
|
|
205
213
|
let filter_series = &filter.series.borrow();
|
|
206
214
|
if let Ok(ca) = filter_series.bool() {
|
|
207
|
-
let series =
|
|
215
|
+
let series = rb_self.series.borrow().filter(ca).unwrap();
|
|
208
216
|
Ok(series.into())
|
|
209
217
|
} else {
|
|
210
218
|
Err(Error::new(
|
|
211
|
-
|
|
219
|
+
ruby.exception_runtime_error(),
|
|
212
220
|
"Expected a boolean mask".to_string(),
|
|
213
221
|
))
|
|
214
222
|
}
|
|
@@ -319,6 +327,35 @@ impl RbSeries {
|
|
|
319
327
|
Ok((!bool).into_series().into())
|
|
320
328
|
}
|
|
321
329
|
|
|
330
|
+
pub fn shrink_dtype(&self) -> RbResult<Self> {
|
|
331
|
+
self.series
|
|
332
|
+
.borrow()
|
|
333
|
+
.shrink_type()
|
|
334
|
+
.map(Into::into)
|
|
335
|
+
.map_err(RbPolarsErr::from)
|
|
336
|
+
.map_err(RbErr::from)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
pub fn str_to_decimal_infer(&self, inference_length: usize) -> RbResult<Self> {
|
|
340
|
+
let s = self.series.borrow();
|
|
341
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
|
342
|
+
ca.to_decimal_infer(inference_length)
|
|
343
|
+
.map(Into::into)
|
|
344
|
+
.map_err(RbPolarsErr::from)
|
|
345
|
+
.map_err(RbErr::from)
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
pub fn str_json_decode(&self, infer_schema_length: Option<usize>) -> RbResult<Self> {
|
|
349
|
+
let lock = self.series.borrow();
|
|
350
|
+
lock.str()
|
|
351
|
+
.map_err(RbPolarsErr::from)?
|
|
352
|
+
.json_decode(None, infer_schema_length)
|
|
353
|
+
.map(|s| s.with_name(lock.name().clone()))
|
|
354
|
+
.map(Into::into)
|
|
355
|
+
.map_err(RbPolarsErr::from)
|
|
356
|
+
.map_err(RbErr::from)
|
|
357
|
+
}
|
|
358
|
+
|
|
322
359
|
pub fn to_s(&self) -> String {
|
|
323
360
|
format!("{}", self.series.borrow())
|
|
324
361
|
}
|
|
@@ -404,11 +441,12 @@ impl RbSeries {
|
|
|
404
441
|
Ok(out.into())
|
|
405
442
|
}
|
|
406
443
|
|
|
407
|
-
pub fn get_chunks(&
|
|
408
|
-
|
|
409
|
-
.
|
|
410
|
-
|
|
411
|
-
|
|
444
|
+
pub fn get_chunks(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
|
445
|
+
ruby.ary_try_from_iter(
|
|
446
|
+
flatten_series(&rb_self.series.borrow())
|
|
447
|
+
.into_iter()
|
|
448
|
+
.map(|s| rb_modules::pl_utils().funcall::<_, _, Value>("wrap_s", (Self::new(s),))),
|
|
449
|
+
)
|
|
412
450
|
}
|
|
413
451
|
|
|
414
452
|
pub fn is_sorted(&self, descending: bool, nulls_last: bool) -> RbResult<bool> {
|
|
@@ -8,13 +8,14 @@ mod import;
|
|
|
8
8
|
mod map;
|
|
9
9
|
mod scatter;
|
|
10
10
|
|
|
11
|
-
use magnus::{RArray, prelude::*};
|
|
11
|
+
use magnus::{DataTypeFunctions, RArray, Ruby, TypedData, gc, prelude::*};
|
|
12
12
|
use polars::prelude::*;
|
|
13
13
|
use std::cell::RefCell;
|
|
14
14
|
|
|
15
|
-
use crate::RbResult;
|
|
15
|
+
use crate::{ObjectValue, RbResult};
|
|
16
16
|
|
|
17
|
-
#[
|
|
17
|
+
#[derive(TypedData)]
|
|
18
|
+
#[magnus(class = "Polars::RbSeries", mark)]
|
|
18
19
|
pub struct RbSeries {
|
|
19
20
|
pub series: RefCell<Series>,
|
|
20
21
|
}
|
|
@@ -42,9 +43,33 @@ pub fn to_series(rs: RArray) -> RbResult<Vec<Series>> {
|
|
|
42
43
|
}
|
|
43
44
|
|
|
44
45
|
pub fn to_rbseries(s: Vec<Column>) -> RArray {
|
|
45
|
-
|
|
46
|
+
Ruby::get().unwrap().ary_from_iter(
|
|
46
47
|
s.into_iter()
|
|
47
48
|
.map(|c| c.take_materialized_series())
|
|
48
49
|
.map(RbSeries::new),
|
|
49
50
|
)
|
|
50
51
|
}
|
|
52
|
+
|
|
53
|
+
pub fn mark_series(marker: &gc::Marker, series: &Series) {
|
|
54
|
+
if let DataType::Object(_) = series.dtype() {
|
|
55
|
+
for i in 0..series.len() {
|
|
56
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
|
57
|
+
if let Some(o) = obj {
|
|
58
|
+
marker.mark(o.inner);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
impl DataTypeFunctions for RbSeries {
|
|
65
|
+
fn mark(&self, marker: &gc::Marker) {
|
|
66
|
+
// this is not ideal, as objects will not be marked if unable to borrow
|
|
67
|
+
// this should never happen, but log for now to avoid panic,
|
|
68
|
+
// as most series will not use Object datatype
|
|
69
|
+
if let Ok(s) = &self.series.try_borrow() {
|
|
70
|
+
mark_series(marker, s);
|
|
71
|
+
} else {
|
|
72
|
+
eprintln!("[polars] Could not borrow!");
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
data/lib/polars/array_expr.rb
CHANGED
|
@@ -38,7 +38,7 @@ module Polars
|
|
|
38
38
|
# @param offset [Integer]
|
|
39
39
|
# Start index. Negative indexing is supported.
|
|
40
40
|
# @param length [Integer]
|
|
41
|
-
# Length of the slice. If set to `
|
|
41
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
|
42
42
|
# end of the list.
|
|
43
43
|
# @param as_array [Boolean]
|
|
44
44
|
# Return result as a fixed-length `Array`, otherwise as a `List`.
|