polars-df 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -9,8 +9,8 @@ use crate::{RbDataFrame, RbResult, RbSeries};
|
|
9
9
|
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
10
10
|
use polars_core::error::PolarsResult;
|
11
11
|
|
12
|
-
let mut iter = seq.
|
13
|
-
let first = iter.next().unwrap()
|
12
|
+
let mut iter = seq.into_iter();
|
13
|
+
let first = iter.next().unwrap();
|
14
14
|
|
15
15
|
let first_rdf = get_df(first)?;
|
16
16
|
let identity_df = first_rdf.slice(0, 0);
|
@@ -18,7 +18,7 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
18
18
|
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
19
19
|
|
20
20
|
for item in iter {
|
21
|
-
let rdf = get_df(item
|
21
|
+
let rdf = get_df(item)?;
|
22
22
|
rdfs.push(Ok(rdf));
|
23
23
|
}
|
24
24
|
|
@@ -37,13 +37,13 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
37
37
|
}
|
38
38
|
|
39
39
|
pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
40
|
-
let mut iter = seq.
|
41
|
-
let first = iter.next().unwrap()
|
40
|
+
let mut iter = seq.into_iter();
|
41
|
+
let first = iter.next().unwrap();
|
42
42
|
|
43
43
|
let mut s = get_series(first)?;
|
44
44
|
|
45
45
|
for res in iter {
|
46
|
-
let item = res
|
46
|
+
let item = res;
|
47
47
|
let item = get_series(item)?;
|
48
48
|
s.append(&item).map_err(RbPolarsErr::from)?;
|
49
49
|
}
|
@@ -52,8 +52,8 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
|
52
52
|
|
53
53
|
pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
54
54
|
let mut dfs = Vec::new();
|
55
|
-
for item in seq.
|
56
|
-
dfs.push(get_df(item
|
55
|
+
for item in seq.into_iter() {
|
56
|
+
dfs.push(get_df(item)?);
|
57
57
|
}
|
58
58
|
let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
|
59
59
|
Ok(df.into())
|
@@ -61,8 +61,8 @@ pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
|
61
61
|
|
62
62
|
pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
63
63
|
let mut dfs = Vec::new();
|
64
|
-
for item in seq.
|
65
|
-
dfs.push(get_df(item
|
64
|
+
for item in seq.into_iter() {
|
65
|
+
dfs.push(get_df(item)?);
|
66
66
|
}
|
67
67
|
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
68
68
|
Ok(df.into())
|
@@ -58,7 +58,7 @@ pub fn rolling_cov(
|
|
58
58
|
pub fn arg_sort_by(
|
59
59
|
by: RArray,
|
60
60
|
descending: Vec<bool>,
|
61
|
-
nulls_last: bool
|
61
|
+
nulls_last: Vec<bool>,
|
62
62
|
multithreaded: bool,
|
63
63
|
maintain_order: bool,
|
64
64
|
) -> RbResult<RbExpr> {
|
@@ -95,12 +95,12 @@ pub fn col(name: String) -> RbExpr {
|
|
95
95
|
|
96
96
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
97
97
|
let lfs = lfs
|
98
|
-
.
|
99
|
-
.map(
|
98
|
+
.into_iter()
|
99
|
+
.map(<&RbLazyFrame>::try_convert)
|
100
100
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
101
101
|
|
102
102
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
103
|
-
let df = lf.ldf.clone().collect().unwrap();
|
103
|
+
let df = lf.ldf.borrow().clone().collect().unwrap();
|
104
104
|
RbDataFrame::new(df)
|
105
105
|
})))
|
106
106
|
}
|
@@ -118,8 +118,8 @@ pub fn concat_lf(
|
|
118
118
|
let (seq, len) = get_rbseq(lfs)?;
|
119
119
|
let mut lfs = Vec::with_capacity(len);
|
120
120
|
|
121
|
-
for res in seq.
|
122
|
-
let item = res
|
121
|
+
for res in seq.into_iter() {
|
122
|
+
let item = res;
|
123
123
|
let lf = get_lf(item)?;
|
124
124
|
lfs.push(lf);
|
125
125
|
}
|
@@ -184,14 +184,9 @@ pub fn concat_lf_diagonal(
|
|
184
184
|
parallel: bool,
|
185
185
|
to_supertypes: bool,
|
186
186
|
) -> RbResult<RbLazyFrame> {
|
187
|
-
let iter = lfs.
|
187
|
+
let iter = lfs.into_iter();
|
188
188
|
|
189
|
-
let lfs = iter
|
190
|
-
.map(|item| {
|
191
|
-
let item = item?;
|
192
|
-
get_lf(item)
|
193
|
-
})
|
194
|
-
.collect::<RbResult<Vec<_>>>()?;
|
189
|
+
let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
|
195
190
|
|
196
191
|
let lf = dsl::functions::concat_lf_diagonal(
|
197
192
|
lfs,
|
@@ -206,17 +201,22 @@ pub fn concat_lf_diagonal(
|
|
206
201
|
Ok(lf.into())
|
207
202
|
}
|
208
203
|
|
209
|
-
pub fn dtype_cols(dtypes:
|
210
|
-
dsl::dtype_cols(dtypes).into()
|
211
|
-
}
|
212
|
-
|
213
|
-
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
204
|
+
pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
214
205
|
let dtypes = dtypes
|
215
|
-
.
|
216
|
-
.map(
|
206
|
+
.into_iter()
|
207
|
+
.map(Wrap::<DataType>::try_convert)
|
217
208
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
218
209
|
let dtypes = vec_extract_wrapped(dtypes);
|
219
|
-
Ok(
|
210
|
+
Ok(dsl::dtype_cols(dtypes).into())
|
211
|
+
}
|
212
|
+
|
213
|
+
pub fn index_cols(indices: Vec<i64>) -> RbExpr {
|
214
|
+
if indices.len() == 1 {
|
215
|
+
dsl::nth(indices[0])
|
216
|
+
} else {
|
217
|
+
dsl::index_cols(indices)
|
218
|
+
}
|
219
|
+
.into()
|
220
220
|
}
|
221
221
|
|
222
222
|
#[allow(clippy::too_many_arguments)]
|
@@ -27,33 +27,27 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
|
|
27
27
|
pub fn date_range(
|
28
28
|
start: &RbExpr,
|
29
29
|
end: &RbExpr,
|
30
|
-
|
30
|
+
interval: String,
|
31
31
|
closed: Wrap<ClosedWindow>,
|
32
|
-
time_unit: Option<Wrap<TimeUnit>>,
|
33
|
-
time_zone: Option<TimeZone>,
|
34
32
|
) -> RbExpr {
|
35
33
|
let start = start.inner.clone();
|
36
34
|
let end = end.inner.clone();
|
37
|
-
let
|
35
|
+
let interval = Duration::parse(&interval);
|
38
36
|
let closed = closed.0;
|
39
|
-
|
40
|
-
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
37
|
+
dsl::date_range(start, end, interval, closed).into()
|
41
38
|
}
|
42
39
|
|
43
40
|
pub fn date_ranges(
|
44
41
|
start: &RbExpr,
|
45
42
|
end: &RbExpr,
|
46
|
-
|
43
|
+
interval: String,
|
47
44
|
closed: Wrap<ClosedWindow>,
|
48
|
-
time_unit: Option<Wrap<TimeUnit>>,
|
49
|
-
time_zone: Option<TimeZone>,
|
50
45
|
) -> RbExpr {
|
51
46
|
let start = start.inner.clone();
|
52
47
|
let end = end.inner.clone();
|
53
|
-
let
|
48
|
+
let interval = Duration::parse(&interval);
|
54
49
|
let closed = closed.0;
|
55
|
-
|
56
|
-
dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
|
50
|
+
dsl::date_ranges(start, end, interval, closed).into()
|
57
51
|
}
|
58
52
|
|
59
53
|
pub fn datetime_range(
|
@@ -1,4 +1,5 @@
|
|
1
1
|
use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars::series::BitRepr;
|
2
3
|
use polars_core::prelude::*;
|
3
4
|
|
4
5
|
use crate::error::RbPolarsErr;
|
@@ -23,7 +24,7 @@ impl RbSeries {
|
|
23
24
|
.funcall("cast", (np_arr,))
|
24
25
|
}
|
25
26
|
dt if dt.is_numeric() => {
|
26
|
-
if s.
|
27
|
+
if let Some(BitRepr::Large(_)) = s.bit_repr() {
|
27
28
|
let s = s.cast(&DataType::Float64).unwrap();
|
28
29
|
let ca = s.f64().unwrap();
|
29
30
|
// TODO make more efficient
|