polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -9,8 +9,8 @@ use crate::{RbDataFrame, RbResult, RbSeries};
|
|
9
9
|
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
10
10
|
use polars_core::error::PolarsResult;
|
11
11
|
|
12
|
-
let mut iter = seq.
|
13
|
-
let first = iter.next().unwrap()
|
12
|
+
let mut iter = seq.into_iter();
|
13
|
+
let first = iter.next().unwrap();
|
14
14
|
|
15
15
|
let first_rdf = get_df(first)?;
|
16
16
|
let identity_df = first_rdf.slice(0, 0);
|
@@ -18,7 +18,7 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
18
18
|
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
19
19
|
|
20
20
|
for item in iter {
|
21
|
-
let rdf = get_df(item
|
21
|
+
let rdf = get_df(item)?;
|
22
22
|
rdfs.push(Ok(rdf));
|
23
23
|
}
|
24
24
|
|
@@ -37,13 +37,13 @@ pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
37
37
|
}
|
38
38
|
|
39
39
|
pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
40
|
-
let mut iter = seq.
|
41
|
-
let first = iter.next().unwrap()
|
40
|
+
let mut iter = seq.into_iter();
|
41
|
+
let first = iter.next().unwrap();
|
42
42
|
|
43
43
|
let mut s = get_series(first)?;
|
44
44
|
|
45
45
|
for res in iter {
|
46
|
-
let item = res
|
46
|
+
let item = res;
|
47
47
|
let item = get_series(item)?;
|
48
48
|
s.append(&item).map_err(RbPolarsErr::from)?;
|
49
49
|
}
|
@@ -52,8 +52,8 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
|
52
52
|
|
53
53
|
pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
54
54
|
let mut dfs = Vec::new();
|
55
|
-
for item in seq.
|
56
|
-
dfs.push(get_df(item
|
55
|
+
for item in seq.into_iter() {
|
56
|
+
dfs.push(get_df(item)?);
|
57
57
|
}
|
58
58
|
let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
|
59
59
|
Ok(df.into())
|
@@ -61,8 +61,8 @@ pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
|
61
61
|
|
62
62
|
pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
63
63
|
let mut dfs = Vec::new();
|
64
|
-
for item in seq.
|
65
|
-
dfs.push(get_df(item
|
64
|
+
for item in seq.into_iter() {
|
65
|
+
dfs.push(get_df(item)?);
|
66
66
|
}
|
67
67
|
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
68
68
|
Ok(df.into())
|
@@ -58,7 +58,7 @@ pub fn rolling_cov(
|
|
58
58
|
pub fn arg_sort_by(
|
59
59
|
by: RArray,
|
60
60
|
descending: Vec<bool>,
|
61
|
-
nulls_last: bool
|
61
|
+
nulls_last: Vec<bool>,
|
62
62
|
multithreaded: bool,
|
63
63
|
maintain_order: bool,
|
64
64
|
) -> RbResult<RbExpr> {
|
@@ -95,12 +95,12 @@ pub fn col(name: String) -> RbExpr {
|
|
95
95
|
|
96
96
|
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
97
97
|
let lfs = lfs
|
98
|
-
.
|
99
|
-
.map(
|
98
|
+
.into_iter()
|
99
|
+
.map(<&RbLazyFrame>::try_convert)
|
100
100
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
101
101
|
|
102
102
|
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
103
|
-
let df = lf.ldf.clone().collect().unwrap();
|
103
|
+
let df = lf.ldf.borrow().clone().collect().unwrap();
|
104
104
|
RbDataFrame::new(df)
|
105
105
|
})))
|
106
106
|
}
|
@@ -118,8 +118,8 @@ pub fn concat_lf(
|
|
118
118
|
let (seq, len) = get_rbseq(lfs)?;
|
119
119
|
let mut lfs = Vec::with_capacity(len);
|
120
120
|
|
121
|
-
for res in seq.
|
122
|
-
let item = res
|
121
|
+
for res in seq.into_iter() {
|
122
|
+
let item = res;
|
123
123
|
let lf = get_lf(item)?;
|
124
124
|
lfs.push(lf);
|
125
125
|
}
|
@@ -184,14 +184,9 @@ pub fn concat_lf_diagonal(
|
|
184
184
|
parallel: bool,
|
185
185
|
to_supertypes: bool,
|
186
186
|
) -> RbResult<RbLazyFrame> {
|
187
|
-
let iter = lfs.
|
187
|
+
let iter = lfs.into_iter();
|
188
188
|
|
189
|
-
let lfs = iter
|
190
|
-
.map(|item| {
|
191
|
-
let item = item?;
|
192
|
-
get_lf(item)
|
193
|
-
})
|
194
|
-
.collect::<RbResult<Vec<_>>>()?;
|
189
|
+
let lfs = iter.map(get_lf).collect::<RbResult<Vec<_>>>()?;
|
195
190
|
|
196
191
|
let lf = dsl::functions::concat_lf_diagonal(
|
197
192
|
lfs,
|
@@ -206,17 +201,22 @@ pub fn concat_lf_diagonal(
|
|
206
201
|
Ok(lf.into())
|
207
202
|
}
|
208
203
|
|
209
|
-
pub fn dtype_cols(dtypes:
|
210
|
-
dsl::dtype_cols(dtypes).into()
|
211
|
-
}
|
212
|
-
|
213
|
-
pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
|
204
|
+
pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
214
205
|
let dtypes = dtypes
|
215
|
-
.
|
216
|
-
.map(
|
206
|
+
.into_iter()
|
207
|
+
.map(Wrap::<DataType>::try_convert)
|
217
208
|
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
218
209
|
let dtypes = vec_extract_wrapped(dtypes);
|
219
|
-
Ok(
|
210
|
+
Ok(dsl::dtype_cols(dtypes).into())
|
211
|
+
}
|
212
|
+
|
213
|
+
pub fn index_cols(indices: Vec<i64>) -> RbExpr {
|
214
|
+
if indices.len() == 1 {
|
215
|
+
dsl::nth(indices[0])
|
216
|
+
} else {
|
217
|
+
dsl::index_cols(indices)
|
218
|
+
}
|
219
|
+
.into()
|
220
220
|
}
|
221
221
|
|
222
222
|
#[allow(clippy::too_many_arguments)]
|
@@ -27,33 +27,27 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
|
|
27
27
|
pub fn date_range(
|
28
28
|
start: &RbExpr,
|
29
29
|
end: &RbExpr,
|
30
|
-
|
30
|
+
interval: String,
|
31
31
|
closed: Wrap<ClosedWindow>,
|
32
|
-
time_unit: Option<Wrap<TimeUnit>>,
|
33
|
-
time_zone: Option<TimeZone>,
|
34
32
|
) -> RbExpr {
|
35
33
|
let start = start.inner.clone();
|
36
34
|
let end = end.inner.clone();
|
37
|
-
let
|
35
|
+
let interval = Duration::parse(&interval);
|
38
36
|
let closed = closed.0;
|
39
|
-
|
40
|
-
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
37
|
+
dsl::date_range(start, end, interval, closed).into()
|
41
38
|
}
|
42
39
|
|
43
40
|
pub fn date_ranges(
|
44
41
|
start: &RbExpr,
|
45
42
|
end: &RbExpr,
|
46
|
-
|
43
|
+
interval: String,
|
47
44
|
closed: Wrap<ClosedWindow>,
|
48
|
-
time_unit: Option<Wrap<TimeUnit>>,
|
49
|
-
time_zone: Option<TimeZone>,
|
50
45
|
) -> RbExpr {
|
51
46
|
let start = start.inner.clone();
|
52
47
|
let end = end.inner.clone();
|
53
|
-
let
|
48
|
+
let interval = Duration::parse(&interval);
|
54
49
|
let closed = closed.0;
|
55
|
-
|
56
|
-
dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
|
50
|
+
dsl::date_ranges(start, end, interval, closed).into()
|
57
51
|
}
|
58
52
|
|
59
53
|
pub fn datetime_range(
|
@@ -1,4 +1,5 @@
|
|
1
1
|
use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars::series::BitRepr;
|
2
3
|
use polars_core::prelude::*;
|
3
4
|
|
4
5
|
use crate::error::RbPolarsErr;
|
@@ -23,7 +24,7 @@ impl RbSeries {
|
|
23
24
|
.funcall("cast", (np_arr,))
|
24
25
|
}
|
25
26
|
dt if dt.is_numeric() => {
|
26
|
-
if s.
|
27
|
+
if let Some(BitRepr::Large(_)) = s.bit_repr() {
|
27
28
|
let s = s.cast(&DataType::Float64).unwrap();
|
28
29
|
let ca = s.f64().unwrap();
|
29
30
|
// TODO make more efficient
|