polars-df 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,12 +1,15 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::encoding::{self, EncodingCapable};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
|
4
|
+
};
|
2
5
|
use polars::lazy::dsl;
|
3
6
|
use polars::prelude::*;
|
4
7
|
|
5
|
-
use crate::apply::lazy::binary_lambda;
|
6
8
|
use crate::conversion::{get_lf, get_rbseq, Wrap};
|
9
|
+
use crate::map::lazy::binary_lambda;
|
7
10
|
use crate::prelude::vec_extract_wrapped;
|
8
11
|
use crate::rb_exprs_to_exprs;
|
9
|
-
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
12
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
10
13
|
|
11
14
|
macro_rules! set_unwrapped_or_0 {
|
12
15
|
($($var:ident),+ $(,)?) => {
|
@@ -14,10 +17,6 @@ macro_rules! set_unwrapped_or_0 {
|
|
14
17
|
};
|
15
18
|
}
|
16
19
|
|
17
|
-
pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
|
18
|
-
dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
|
19
|
-
}
|
20
|
-
|
21
20
|
pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
|
22
21
|
let by = rb_exprs_to_exprs(by)?;
|
23
22
|
Ok(dsl::arg_sort_by(by, &descending).into())
|
@@ -29,7 +28,7 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
29
28
|
|
30
29
|
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
31
30
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
32
|
-
Ok(dsl::as_struct(
|
31
|
+
Ok(dsl::as_struct(exprs).into())
|
33
32
|
}
|
34
33
|
|
35
34
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
@@ -44,7 +43,7 @@ pub fn col(name: String) -> RbExpr {
|
|
44
43
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
45
44
|
let lfs = lfs
|
46
45
|
.each()
|
47
|
-
.map(|v|
|
46
|
+
.map(|v| <&RbLazyFrame>::try_convert(v?))
|
48
47
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
49
48
|
|
50
49
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
@@ -57,7 +56,12 @@ pub fn cols(names: Vec<String>) -> RbExpr {
|
|
57
56
|
dsl::cols(names).into()
|
58
57
|
}
|
59
58
|
|
60
|
-
pub fn concat_lf(
|
59
|
+
pub fn concat_lf(
|
60
|
+
lfs: Value,
|
61
|
+
rechunk: bool,
|
62
|
+
parallel: bool,
|
63
|
+
to_supertypes: bool,
|
64
|
+
) -> RbResult<RbLazyFrame> {
|
61
65
|
let (seq, len) = get_rbseq(lfs)?;
|
62
66
|
let mut lfs = Vec::with_capacity(len);
|
63
67
|
|
@@ -67,40 +71,77 @@ pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFr
|
|
67
71
|
lfs.push(lf);
|
68
72
|
}
|
69
73
|
|
70
|
-
let lf =
|
74
|
+
let lf = dsl::concat(
|
75
|
+
lfs,
|
76
|
+
UnionArgs {
|
77
|
+
rechunk,
|
78
|
+
parallel,
|
79
|
+
to_supertypes,
|
80
|
+
},
|
81
|
+
)
|
82
|
+
.map_err(RbPolarsErr::from)?;
|
83
|
+
Ok(lf.into())
|
84
|
+
}
|
85
|
+
|
86
|
+
pub fn concat_lf_diagonal(
|
87
|
+
lfs: RArray,
|
88
|
+
rechunk: bool,
|
89
|
+
parallel: bool,
|
90
|
+
to_supertypes: bool,
|
91
|
+
) -> RbResult<RbLazyFrame> {
|
92
|
+
let iter = lfs.each();
|
93
|
+
|
94
|
+
let lfs = iter
|
95
|
+
.map(|item| {
|
96
|
+
let item = item?;
|
97
|
+
get_lf(item)
|
98
|
+
})
|
99
|
+
.collect::<RbResult<Vec<_>>>()?;
|
100
|
+
|
101
|
+
let lf = dsl::functions::concat_lf_diagonal(
|
102
|
+
lfs,
|
103
|
+
UnionArgs {
|
104
|
+
rechunk,
|
105
|
+
parallel,
|
106
|
+
to_supertypes,
|
107
|
+
},
|
108
|
+
)
|
109
|
+
.map_err(RbPolarsErr::from)?;
|
71
110
|
Ok(lf.into())
|
72
111
|
}
|
73
112
|
|
74
113
|
#[allow(clippy::too_many_arguments)]
|
75
114
|
pub fn duration(
|
115
|
+
weeks: Option<&RbExpr>,
|
76
116
|
days: Option<&RbExpr>,
|
117
|
+
hours: Option<&RbExpr>,
|
118
|
+
minutes: Option<&RbExpr>,
|
77
119
|
seconds: Option<&RbExpr>,
|
78
|
-
nanoseconds: Option<&RbExpr>,
|
79
|
-
microseconds: Option<&RbExpr>,
|
80
120
|
milliseconds: Option<&RbExpr>,
|
81
|
-
|
82
|
-
|
83
|
-
|
121
|
+
microseconds: Option<&RbExpr>,
|
122
|
+
nanoseconds: Option<&RbExpr>,
|
123
|
+
time_unit: Wrap<TimeUnit>,
|
84
124
|
) -> RbExpr {
|
85
125
|
set_unwrapped_or_0!(
|
126
|
+
weeks,
|
86
127
|
days,
|
128
|
+
hours,
|
129
|
+
minutes,
|
87
130
|
seconds,
|
88
|
-
nanoseconds,
|
89
|
-
microseconds,
|
90
131
|
milliseconds,
|
91
|
-
|
92
|
-
|
93
|
-
weeks,
|
132
|
+
microseconds,
|
133
|
+
nanoseconds,
|
94
134
|
);
|
95
135
|
let args = DurationArgs {
|
136
|
+
weeks,
|
96
137
|
days,
|
138
|
+
hours,
|
139
|
+
minutes,
|
97
140
|
seconds,
|
98
|
-
nanoseconds,
|
99
|
-
microseconds,
|
100
141
|
milliseconds,
|
101
|
-
|
102
|
-
|
103
|
-
|
142
|
+
microseconds,
|
143
|
+
nanoseconds,
|
144
|
+
time_unit: time_unit.0,
|
104
145
|
};
|
105
146
|
dsl::duration(args).into()
|
106
147
|
}
|
@@ -123,28 +164,27 @@ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
|
123
164
|
|
124
165
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
125
166
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
167
|
+
let lambda = Opaque::from(lambda);
|
126
168
|
|
127
|
-
let func =
|
169
|
+
let func =
|
170
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
128
171
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
129
172
|
}
|
130
173
|
|
131
174
|
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
132
175
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
176
|
+
let lambda = Opaque::from(lambda);
|
133
177
|
|
134
|
-
let func =
|
135
|
-
|
178
|
+
let func =
|
179
|
+
move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
|
180
|
+
Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
136
181
|
}
|
137
182
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
Ok(dsl::lit(series.series.borrow().clone()).into())
|
144
|
-
} else if let Some(v) = RString::from_value(value) {
|
145
|
-
Ok(dsl::lit(v.try_convert::<String>()?).into())
|
146
|
-
} else if value.is_kind_of(class::integer()) {
|
147
|
-
match value.try_convert::<i64>() {
|
183
|
+
pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
184
|
+
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
185
|
+
Ok(dsl::lit(bool::try_convert(value)?).into())
|
186
|
+
} else if let Some(v) = Integer::from_value(value) {
|
187
|
+
match v.to_i64() {
|
148
188
|
Ok(val) => {
|
149
189
|
if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
|
150
190
|
Ok(dsl::lit(val as i32).into())
|
@@ -153,21 +193,51 @@ pub fn lit(value: Value) -> RbResult<RbExpr> {
|
|
153
193
|
}
|
154
194
|
}
|
155
195
|
_ => {
|
156
|
-
let val =
|
196
|
+
let val = v.to_u64()?;
|
157
197
|
Ok(dsl::lit(val).into())
|
158
198
|
}
|
159
199
|
}
|
200
|
+
} else if let Some(v) = Float::from_value(value) {
|
201
|
+
Ok(dsl::lit(v.to_f64()).into())
|
202
|
+
} else if let Some(v) = RString::from_value(value) {
|
203
|
+
if v.enc_get() == encoding::Index::utf8() {
|
204
|
+
Ok(dsl::lit(v.to_string()?).into())
|
205
|
+
} else {
|
206
|
+
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
207
|
+
}
|
208
|
+
} else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
|
209
|
+
Ok(dsl::lit(series.series.borrow().clone()).into())
|
210
|
+
} else if value.is_nil() {
|
211
|
+
Ok(dsl::lit(Null {}).into())
|
212
|
+
} else if allow_object {
|
213
|
+
todo!()
|
160
214
|
} else {
|
161
|
-
|
215
|
+
Err(RbValueError::new_err(format!(
|
216
|
+
"could not convert value {:?} as a Literal",
|
217
|
+
value.to_string()
|
218
|
+
)))
|
162
219
|
}
|
163
220
|
}
|
164
221
|
|
165
|
-
pub fn repeat(value:
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
222
|
+
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
223
|
+
let mut value = value.inner.clone();
|
224
|
+
let n = n.inner.clone();
|
225
|
+
|
226
|
+
if let Some(dtype) = dtype {
|
227
|
+
value = value.cast(dtype.0);
|
228
|
+
}
|
229
|
+
|
230
|
+
if let Expr::Literal(lv) = &value {
|
231
|
+
let av = lv.to_anyvalue().unwrap();
|
232
|
+
// Integer inputs that fit in Int32 are parsed as such
|
233
|
+
if let DataType::Int64 = av.dtype() {
|
234
|
+
let int_value = av.try_extract::<i64>().unwrap();
|
235
|
+
if int_value >= i32::MIN as i64 && int_value <= i32::MAX as i64 {
|
236
|
+
value = value.cast(DataType::Int32);
|
237
|
+
}
|
238
|
+
}
|
170
239
|
}
|
240
|
+
Ok(dsl::repeat(value, n).into())
|
171
241
|
}
|
172
242
|
|
173
243
|
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
@@ -179,8 +249,8 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
179
249
|
.into()
|
180
250
|
}
|
181
251
|
|
182
|
-
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
183
|
-
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
252
|
+
pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
253
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
|
184
254
|
}
|
185
255
|
|
186
256
|
pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
|
@@ -197,13 +267,8 @@ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
|
|
197
267
|
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
198
268
|
let dtypes = dtypes
|
199
269
|
.each()
|
200
|
-
.map(|v|
|
270
|
+
.map(|v| Wrap::<DataType>::try_convert(v?))
|
201
271
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
202
272
|
let dtypes = vec_extract_wrapped(dtypes);
|
203
273
|
Ok(crate::functions::lazy::dtype_cols(dtypes))
|
204
274
|
}
|
205
|
-
|
206
|
-
pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
207
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
208
|
-
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
209
|
-
}
|
@@ -1,8 +1,38 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
|
+
use polars_core;
|
3
|
+
use polars_core::fmt::FloatFmt;
|
2
4
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
+
use polars_core::POOL;
|
3
6
|
|
4
7
|
use crate::conversion::Wrap;
|
8
|
+
use crate::{RbResult, RbValueError};
|
5
9
|
|
6
10
|
pub fn get_idx_type() -> Value {
|
7
11
|
Wrap(IDX_DTYPE).into_value()
|
8
12
|
}
|
13
|
+
|
14
|
+
pub fn threadpool_size() -> usize {
|
15
|
+
POOL.current_num_threads()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
19
|
+
let fmt = match fmt.as_str() {
|
20
|
+
"full" => FloatFmt::Full,
|
21
|
+
"mixed" => FloatFmt::Mixed,
|
22
|
+
e => {
|
23
|
+
return Err(RbValueError::new_err(format!(
|
24
|
+
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
+
)))
|
26
|
+
}
|
27
|
+
};
|
28
|
+
polars_core::fmt::set_float_fmt(fmt);
|
29
|
+
Ok(())
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn get_float_fmt() -> RbResult<String> {
|
33
|
+
let strfmt = match polars_core::fmt::get_float_fmt() {
|
34
|
+
FloatFmt::Full => "full",
|
35
|
+
FloatFmt::Mixed => "mixed",
|
36
|
+
};
|
37
|
+
Ok(strfmt.to_string())
|
38
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::lazy::dsl;
|
2
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::prelude::*;
|
6
|
+
use crate::RbExpr;
|
7
|
+
|
8
|
+
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
9
|
+
let dtype = dtype.0;
|
10
|
+
|
11
|
+
let mut result = dsl::int_range(start.inner.clone(), end.inner.clone(), step);
|
12
|
+
|
13
|
+
if dtype != DataType::Int64 {
|
14
|
+
result = result.cast(dtype)
|
15
|
+
}
|
16
|
+
|
17
|
+
result.into()
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
21
|
+
let dtype = dtype.0;
|
22
|
+
|
23
|
+
let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step);
|
24
|
+
|
25
|
+
if dtype != DataType::Int64 {
|
26
|
+
result = result.cast(DataType::List(Box::new(dtype)))
|
27
|
+
}
|
28
|
+
|
29
|
+
result.into()
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn date_range(
|
33
|
+
start: &RbExpr,
|
34
|
+
end: &RbExpr,
|
35
|
+
every: String,
|
36
|
+
closed: Wrap<ClosedWindow>,
|
37
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
38
|
+
time_zone: Option<TimeZone>,
|
39
|
+
) -> RbExpr {
|
40
|
+
let start = start.inner.clone();
|
41
|
+
let end = end.inner.clone();
|
42
|
+
let every = Duration::parse(&every);
|
43
|
+
let closed = closed.0;
|
44
|
+
let time_unit = time_unit.map(|x| x.0);
|
45
|
+
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
46
|
+
}
|
@@ -16,23 +16,23 @@ impl From<dsl::When> for RbWhen {
|
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
18
18
|
#[derive(Clone)]
|
19
|
-
pub struct
|
20
|
-
pub inner: dsl::
|
19
|
+
pub struct RbThen {
|
20
|
+
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
impl From<dsl::
|
24
|
-
fn from(inner: dsl::
|
25
|
-
|
23
|
+
impl From<dsl::Then> for RbThen {
|
24
|
+
fn from(inner: dsl::Then) -> Self {
|
25
|
+
RbThen { inner }
|
26
26
|
}
|
27
27
|
}
|
28
28
|
|
29
29
|
impl RbWhen {
|
30
|
-
pub fn then(&self, expr: &RbExpr) ->
|
30
|
+
pub fn then(&self, expr: &RbExpr) -> RbThen {
|
31
31
|
self.inner.clone().then(expr.inner.clone()).into()
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
-
impl
|
35
|
+
impl RbThen {
|
36
36
|
pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
|
37
37
|
self.inner.clone().otherwise(expr.inner.clone()).into()
|
38
38
|
}
|