polars-df 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,12 +1,15 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::encoding::{self, EncodingCapable};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
|
4
|
+
};
|
2
5
|
use polars::lazy::dsl;
|
3
6
|
use polars::prelude::*;
|
4
7
|
|
5
|
-
use crate::apply::lazy::binary_lambda;
|
6
8
|
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
9
|
+
use crate::map::lazy::binary_lambda;
|
7
10
|
use crate::prelude::vec_extract_wrapped;
|
8
11
|
use crate::rb_exprs_to_exprs;
|
9
|
-
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
12
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
10
13
|
|
11
14
|
macro_rules! set_unwrapped_or_0 {
|
12
15
|
($($var:ident),+ $(,)?) => {
|
@@ -14,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
|
|
14
17
|
};
|
15
18
|
}
|
16
19
|
|
17
|
-
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
18
|
-
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
19
|
-
}
|
20
|
-
|
21
20
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
22
21
|
let by = rb_exprs_to_exprs(by)?;
|
23
22
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -29,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
29
28
|
|
30
29
|
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
31
30
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
32
|
-
Ok(dsl::as_struct(
|
31
|
+
Ok(dsl::as_struct(exprs).into())
|
33
32
|
}
|
34
33
|
|
35
34
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
@@ -44,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
|
|
44
43
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
45
44
|
let lfs = lfs
|
46
45
|
.each()
|
47
|
-
.map(|v|
|
46
|
+
.map(|v| <&RbLazyFrame>::try_convert(v?))
|
48
47
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
49
48
|
|
50
49
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
@@ -57,7 +56,12 @@ pub fn cols(names: Vec<String>) -> RbExpr {
|
|
57
56
|
dsl::cols(names).into()
|
58
57
|
}
|
59
58
|
|
60
|
-
pub fn concat_lf(
|
59
|
+
pub fn concat_lf(
|
60
|
+
lfs: Value,
|
61
|
+
rechunk: bool,
|
62
|
+
parallel: bool,
|
63
|
+
to_supertypes: bool,
|
64
|
+
) -> RbResult<RbLazyFrame> {
|
61
65
|
let (seq, len) = get_rbseq(lfs)?;
|
62
66
|
let mut lfs = Vec::with_capacity(len);
|
63
67
|
|
@@ -67,40 +71,77 @@ pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFr
|
|
67
71
|
lfs.push(lf);
|
68
72
|
}
|
69
73
|
|
70
|
-
let lf =
|
74
|
+
let lf = dsl::concat(
|
75
|
+
lfs,
|
76
|
+
UnionArgs {
|
77
|
+
rechunk,
|
78
|
+
parallel,
|
79
|
+
to_supertypes,
|
80
|
+
},
|
81
|
+
)
|
82
|
+
.map_err(RbPolarsErr::from)?;
|
83
|
+
Ok(lf.into())
|
84
|
+
}
|
85
|
+
|
86
|
+
pub fn concat_lf_diagonal(
|
87
|
+
lfs: RArray,
|
88
|
+
rechunk: bool,
|
89
|
+
parallel: bool,
|
90
|
+
to_supertypes: bool,
|
91
|
+
) -> RbResult<RbLazyFrame> {
|
92
|
+
let iter = lfs.each();
|
93
|
+
|
94
|
+
let lfs = iter
|
95
|
+
.map(|item| {
|
96
|
+
let item = item?;
|
97
|
+
get_lf(item)
|
98
|
+
})
|
99
|
+
.collect::<RbResult<Vec<_>>>()?;
|
100
|
+
|
101
|
+
let lf = dsl::functions::concat_lf_diagonal(
|
102
|
+
lfs,
|
103
|
+
UnionArgs {
|
104
|
+
rechunk,
|
105
|
+
parallel,
|
106
|
+
to_supertypes,
|
107
|
+
},
|
108
|
+
)
|
109
|
+
.map_err(RbPolarsErr::from)?;
|
71
110
|
Ok(lf.into())
|
72
111
|
}
|
73
112
|
|
74
113
|
#[allow(clippy::too_many_arguments)]
|
75
114
|
pub fn duration(
|
115
|
+
weeks: Option<&RbExpr>,
|
76
116
|
days: Option<&RbExpr>,
|
117
|
+
hours: Option<&RbExpr>,
|
118
|
+
minutes: Option<&RbExpr>,
|
77
119
|
seconds: Option<&RbExpr>,
|
78
|
-
nanoseconds: Option<&RbExpr>,
|
79
|
-
microseconds: Option<&RbExpr>,
|
80
120
|
milliseconds: Option<&RbExpr>,
|
81
|
-
|
82
|
-
|
83
|
-
|
121
|
+
microseconds: Option<&RbExpr>,
|
122
|
+
nanoseconds: Option<&RbExpr>,
|
123
|
+
time_unit: Wrap<TimeUnit>,
|
84
124
|
) -> RbExpr {
|
85
125
|
set_unwrapped_or_0!(
|
126
|
+
weeks,
|
86
127
|
days,
|
128
|
+
hours,
|
129
|
+
minutes,
|
87
130
|
seconds,
|
88
|
-
nanoseconds,
|
89
|
-
microseconds,
|
90
131
|
milliseconds,
|
91
|
-
|
92
|
-
|
93
|
-
weeks,
|
132
|
+
microseconds,
|
133
|
+
nanoseconds,
|
94
134
|
);
|
95
135
|
let args = DurationArgs {
|
136
|
+
weeks,
|
96
137
|
days,
|
138
|
+
hours,
|
139
|
+
minutes,
|
97
140
|
seconds,
|
98
|
-
nanoseconds,
|
99
|
-
microseconds,
|
100
141
|
milliseconds,
|
101
|
-
|
102
|
-
|
103
|
-
|
142
|
+
microseconds,
|
143
|
+
nanoseconds,
|
144
|
+
time_unit: time_unit.0,
|
104
145
|
};
|
105
146
|
dsl::duration(args).into()
|
106
147
|
}
|
@@ -123,28 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
|
123
164
|
|
124
165
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
125
166
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
|
+
let lambda = Opaque::from(lambda);
|
126
168
|
|
127
|
-
let func =
|
169
|
+
let func =
|
170
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
128
171
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
129
172
|
}
|
130
173
|
|
131
174
|
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
132
175
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
176
|
+
let lambda = Opaque::from(lambda);
|
133
177
|
|
134
|
-
let func =
|
135
|
-
|
178
|
+
let func =
|
179
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
+
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
136
181
|
}
|
137
182
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
Ok(dsl::lit(series.series.borrow().clone()).into())
|
144
|
-
} else if let Some(v) = RString::from_value(value) {
|
145
|
-
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
146
|
-
} else if value.is_kind_of(class::integer()) {
|
147
|
-
match value.try_convert::<i64>() {
|
183
|
+
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
184
|
+
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
185
|
+
Ok(dsl::lit(bool::try_convert(value)?).into())
|
186
|
+
} else if let Some(v) = Integer::from_value(value) {
|
187
|
+
match v.to_i64() {
|
148
188
|
Ok(val) => {
|
149
189
|
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
150
190
|
Ok(dsl::lit(val as i32).into())
|
@@ -153,21 +193,51 @@ pub fn lit(value: Value) -> RbResult<RbExpr> {
|
|
153
193
|
}
|
154
194
|
}
|
155
195
|
_ => {
|
156
|
-
let val =
|
196
|
+
let val = v.to_u64()?;
|
157
197
|
Ok(dsl::lit(val).into())
|
158
198
|
}
|
159
199
|
}
|
200
|
+
} else if let Some(v) = Float::from_value(value) {
|
201
|
+
Ok(dsl::lit(v.to_f64()).into())
|
202
|
+
} else if let Some(v) = RString::from_value(value) {
|
203
|
+
if v.enc_get() == encoding::Index::utf8() {
|
204
|
+
Ok(dsl::lit(v.to_string()?).into())
|
205
|
+
} else {
|
206
|
+
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
207
|
+
}
|
208
|
+
} else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
|
209
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
210
|
+
} else if value.is_nil() {
|
211
|
+
Ok(dsl::lit(Null {}).into())
|
212
|
+
} else if allow_object {
|
213
|
+
todo!()
|
160
214
|
} else {
|
161
|
-
|
215
|
+
Err(RbValueError::new_err(format!(
|
216
|
+
"could not convert value {:?} as a Literal",
|
217
|
+
value.to_string()
|
218
|
+
)))
|
162
219
|
}
|
163
220
|
}
|
164
221
|
|
165
|
-
pub fn repeat(value:
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
222
|
+
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
223
|
+
let mut value = value.inner.clone();
|
224
|
+
let n = n.inner.clone();
|
225
|
+
|
226
|
+
if let Some(dtype) = dtype {
|
227
|
+
value = value.cast(dtype.0);
|
228
|
+
}
|
229
|
+
|
230
|
+
if let Expr::Literal(lv) = &value {
|
231
|
+
let av = lv.to_anyvalue().unwrap();
|
232
|
+
// Integer inputs that fit in Int32 are parsed as such
|
233
|
+
if let DataType::Int64 = av.dtype() {
|
234
|
+
let int_value = av.try_extract::<i64>().unwrap();
|
235
|
+
if int_value >= i32::MIN as i64 && int_value <= i32::MAX as i64 {
|
236
|
+
value = value.cast(DataType::Int32);
|
237
|
+
}
|
238
|
+
}
|
170
239
|
}
|
240
|
+
Ok(dsl::repeat(value, n).into())
|
171
241
|
}
|
172
242
|
|
173
243
|
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
@@ -179,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
179
249
|
.into()
|
180
250
|
}
|
181
251
|
|
182
|
-
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
183
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
252
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
184
254
|
}
|
185
255
|
|
186
256
|
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
@@ -197,13 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
|
197
267
|
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
198
268
|
let dtypes = dtypes
|
199
269
|
.each()
|
200
|
-
.map(|v|
|
270
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
201
271
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
202
272
|
let dtypes = vec_extract_wrapped(dtypes);
|
203
273
|
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
204
274
|
}
|
205
|
-
|
206
|
-
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
207
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
208
|
-
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
209
|
-
}
|
@@ -1,8 +1,38 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
|
+
use polars_core;
|
3
|
+
use polars_core::fmt::FloatFmt;
|
2
4
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
+
use polars_core::POOL;
|
3
6
|
|
4
7
|
use crate::conversion::Wrap;
|
8
|
+
use crate::{RbResult, RbValueError};
|
5
9
|
|
6
10
|
pub fn get_idx_type() -> Value {
|
7
11
|
Wrap(IDX_DTYPE).into_value()
|
8
12
|
}
|
13
|
+
|
14
|
+
pub fn threadpool_size() -> usize {
|
15
|
+
POOL.current_num_threads()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
19
|
+
let fmt = match fmt.as_str() {
|
20
|
+
"full" => FloatFmt::Full,
|
21
|
+
"mixed" => FloatFmt::Mixed,
|
22
|
+
e => {
|
23
|
+
return Err(RbValueError::new_err(format!(
|
24
|
+
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
+
)))
|
26
|
+
}
|
27
|
+
};
|
28
|
+
polars_core::fmt::set_float_fmt(fmt);
|
29
|
+
Ok(())
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn get_float_fmt() -> RbResult<String> {
|
33
|
+
let strfmt = match polars_core::fmt::get_float_fmt() {
|
34
|
+
FloatFmt::Full => "full",
|
35
|
+
FloatFmt::Mixed => "mixed",
|
36
|
+
};
|
37
|
+
Ok(strfmt.to_string())
|
38
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::prelude::*;
|
6
|
+
use crate::RbExpr;
|
7
|
+
|
8
|
+
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let dtype = dtype.0;
|
10
|
+
|
11
|
+
let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
|
12
|
+
|
13
|
+
if dtype != DataType::Int64 {
|
14
|
+
result = result.cast(dtype)
|
15
|
+
}
|
16
|
+
|
17
|
+
result.into()
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
21
|
+
let dtype = dtype.0;
|
22
|
+
|
23
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
|
24
|
+
|
25
|
+
if dtype != DataType::Int64 {
|
26
|
+
result = result.cast(DataType::List(Box::new(dtype)))
|
27
|
+
}
|
28
|
+
|
29
|
+
result.into()
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn date_range(
|
33
|
+
start: &RbExpr,
|
34
|
+
end: &RbExpr,
|
35
|
+
every: String,
|
36
|
+
closed: Wrap<ClosedWindow>,
|
37
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
38
|
+
time_zone: Option<TimeZone>,
|
39
|
+
) -> RbExpr {
|
40
|
+
let start = start.inner.clone();
|
41
|
+
let end = end.inner.clone();
|
42
|
+
let every = Duration::parse(&every);
|
43
|
+
let closed = closed.0;
|
44
|
+
let time_unit = time_unit.map(|x| x.0);
|
45
|
+
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
46
|
+
}
|
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
|
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
18
|
#[derive(Clone)]
|
19
|
-
pub struct
|
20
|
-
pub inner: dsl::
|
19
|
+
pub struct RbThen {
|
20
|
+
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
impl From<dsl::
|
24
|
-
fn from(inner: dsl::
|
25
|
-
|
23
|
+
impl From<dsl::Then> for RbThen {
|
24
|
+
fn from(inner: dsl::Then) -> Self {
|
25
|
+
RbThen { inner }
|
26
26
|
}
|
27
27
|
}
|
28
28
|
|
29
29
|
impl RbWhen {
|
30
|
-
pub fn then(&self, expr: &RbExpr) ->
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbThen {
|
31
31
|
self.inner.clone().then(expr.inner.clone()).into()
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
-
impl
|
35
|
+
impl RbThen {
|
36
36
|
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
37
|
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
38
|
}
|