polars-df 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/README.md +7 -6
- data/ext/polars/Cargo.toml +10 -6
- data/ext/polars/src/batched_csv.rs +53 -50
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +31 -67
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +13 -4
- data/ext/polars/src/expr/general.rs +50 -9
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +12 -33
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +20 -3
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +111 -56
- data/ext/polars/src/lib.rs +68 -34
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +13 -133
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +23 -8
- data/ext/polars/src/dataframe.rs +0 -1182
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -39,3 +39,77 @@ pub fn date_range(
|
|
39
39
|
let time_unit = time_unit.map(|x| x.0);
|
40
40
|
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
41
41
|
}
|
42
|
+
|
43
|
+
pub fn date_ranges(
|
44
|
+
start: &RbExpr,
|
45
|
+
end: &RbExpr,
|
46
|
+
every: String,
|
47
|
+
closed: Wrap<ClosedWindow>,
|
48
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
49
|
+
time_zone: Option<TimeZone>,
|
50
|
+
) -> RbExpr {
|
51
|
+
let start = start.inner.clone();
|
52
|
+
let end = end.inner.clone();
|
53
|
+
let every = Duration::parse(&every);
|
54
|
+
let closed = closed.0;
|
55
|
+
let time_unit = time_unit.map(|x| x.0);
|
56
|
+
dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
|
57
|
+
}
|
58
|
+
|
59
|
+
pub fn datetime_range(
|
60
|
+
start: &RbExpr,
|
61
|
+
end: &RbExpr,
|
62
|
+
every: String,
|
63
|
+
closed: Wrap<ClosedWindow>,
|
64
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
65
|
+
time_zone: Option<TimeZone>,
|
66
|
+
) -> RbExpr {
|
67
|
+
let start = start.inner.clone();
|
68
|
+
let end = end.inner.clone();
|
69
|
+
let every = Duration::parse(&every);
|
70
|
+
let closed = closed.0;
|
71
|
+
let time_unit = time_unit.map(|x| x.0);
|
72
|
+
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
73
|
+
}
|
74
|
+
|
75
|
+
pub fn datetime_ranges(
|
76
|
+
start: &RbExpr,
|
77
|
+
end: &RbExpr,
|
78
|
+
every: String,
|
79
|
+
closed: Wrap<ClosedWindow>,
|
80
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
81
|
+
time_zone: Option<TimeZone>,
|
82
|
+
) -> RbExpr {
|
83
|
+
let start = start.inner.clone();
|
84
|
+
let end = end.inner.clone();
|
85
|
+
let every = Duration::parse(&every);
|
86
|
+
let closed = closed.0;
|
87
|
+
let time_unit = time_unit.map(|x| x.0);
|
88
|
+
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
89
|
+
}
|
90
|
+
|
91
|
+
pub fn time_range(
|
92
|
+
start: &RbExpr,
|
93
|
+
end: &RbExpr,
|
94
|
+
every: String,
|
95
|
+
closed: Wrap<ClosedWindow>,
|
96
|
+
) -> RbExpr {
|
97
|
+
let start = start.inner.clone();
|
98
|
+
let end = end.inner.clone();
|
99
|
+
let every = Duration::parse(&every);
|
100
|
+
let closed = closed.0;
|
101
|
+
dsl::time_range(start, end, every, closed).into()
|
102
|
+
}
|
103
|
+
|
104
|
+
pub fn time_ranges(
|
105
|
+
start: &RbExpr,
|
106
|
+
end: &RbExpr,
|
107
|
+
every: String,
|
108
|
+
closed: Wrap<ClosedWindow>,
|
109
|
+
) -> RbExpr {
|
110
|
+
let start = start.inner.clone();
|
111
|
+
let end = end.inner.clone();
|
112
|
+
let every = Duration::parse(&every);
|
113
|
+
let closed = closed.0;
|
114
|
+
dsl::time_ranges(start, end, every, closed).into()
|
115
|
+
}
|
@@ -2,42 +2,72 @@ use polars::lazy::dsl;
|
|
2
2
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
|
+
pub fn when(condition: &RbExpr) -> RbWhen {
|
6
|
+
RbWhen {
|
7
|
+
inner: dsl::when(condition.inner.clone()),
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
5
11
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
12
|
#[derive(Clone)]
|
7
13
|
pub struct RbWhen {
|
8
14
|
pub inner: dsl::When,
|
9
15
|
}
|
10
16
|
|
11
|
-
|
12
|
-
fn from(inner: dsl::When) -> Self {
|
13
|
-
RbWhen { inner }
|
14
|
-
}
|
15
|
-
}
|
16
|
-
|
17
|
-
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
17
|
+
#[magnus::wrap(class = "Polars::RbThen")]
|
18
18
|
#[derive(Clone)]
|
19
19
|
pub struct RbThen {
|
20
20
|
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
#[magnus::wrap(class = "Polars::RbChainedWhen")]
|
24
|
+
#[derive(Clone)]
|
25
|
+
pub struct RbChainedWhen {
|
26
|
+
pub inner: dsl::ChainedWhen,
|
27
|
+
}
|
28
|
+
|
29
|
+
#[magnus::wrap(class = "Polars::RbChainedThen")]
|
30
|
+
#[derive(Clone)]
|
31
|
+
pub struct RbChainedThen {
|
32
|
+
pub inner: dsl::ChainedThen,
|
27
33
|
}
|
28
34
|
|
29
35
|
impl RbWhen {
|
30
|
-
pub fn then(&self,
|
31
|
-
|
36
|
+
pub fn then(&self, statement: &RbExpr) -> RbThen {
|
37
|
+
RbThen {
|
38
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
39
|
+
}
|
32
40
|
}
|
33
41
|
}
|
34
42
|
|
35
43
|
impl RbThen {
|
36
|
-
pub fn
|
37
|
-
|
44
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
45
|
+
RbChainedWhen {
|
46
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
51
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
38
52
|
}
|
39
53
|
}
|
40
54
|
|
41
|
-
|
42
|
-
|
55
|
+
impl RbChainedWhen {
|
56
|
+
pub fn then(&self, statement: &RbExpr) -> RbChainedThen {
|
57
|
+
RbChainedThen {
|
58
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
impl RbChainedThen {
|
64
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
65
|
+
RbChainedWhen {
|
66
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
71
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
72
|
+
}
|
43
73
|
}
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod numo;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars_core::utils::try_get_supertype;
|
3
|
+
|
4
|
+
use crate::dataframe::RbDataFrame;
|
5
|
+
|
6
|
+
impl RbDataFrame {
|
7
|
+
pub fn to_numo(&self) -> Option<Value> {
|
8
|
+
let mut st = None;
|
9
|
+
for s in self.df.borrow().iter() {
|
10
|
+
let dt_i = s.dtype();
|
11
|
+
match st {
|
12
|
+
None => st = Some(dt_i.clone()),
|
13
|
+
Some(ref mut st) => {
|
14
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
18
|
+
let _st = st?;
|
19
|
+
|
20
|
+
// TODO
|
21
|
+
None
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,60 @@
|
|
1
|
+
use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars_core::prelude::*;
|
3
|
+
|
4
|
+
use crate::error::RbPolarsErr;
|
5
|
+
use crate::raise_err;
|
6
|
+
use crate::series::RbSeries;
|
7
|
+
use crate::RbResult;
|
8
|
+
|
9
|
+
impl RbSeries {
|
10
|
+
/// For numeric types, this should only be called for Series with null types.
|
11
|
+
/// This will cast to floats so that `nil = NAN`
|
12
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
13
|
+
let s = &self.series.borrow();
|
14
|
+
match s.dtype() {
|
15
|
+
DataType::String => {
|
16
|
+
let ca = s.str().unwrap();
|
17
|
+
|
18
|
+
// TODO make more efficient
|
19
|
+
let np_arr = RArray::from_iter(ca);
|
20
|
+
class::object()
|
21
|
+
.const_get::<_, RModule>("Numo")?
|
22
|
+
.const_get::<_, RClass>("RObject")?
|
23
|
+
.funcall("cast", (np_arr,))
|
24
|
+
}
|
25
|
+
dt if dt.is_numeric() => {
|
26
|
+
if s.bit_repr_is_large() {
|
27
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
28
|
+
let ca = s.f64().unwrap();
|
29
|
+
// TODO make more efficient
|
30
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
31
|
+
Some(v) => v,
|
32
|
+
None => f64::NAN,
|
33
|
+
}));
|
34
|
+
class::object()
|
35
|
+
.const_get::<_, RModule>("Numo")?
|
36
|
+
.const_get::<_, RClass>("DFloat")?
|
37
|
+
.funcall("cast", (np_arr,))
|
38
|
+
} else {
|
39
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
40
|
+
let ca = s.f32().unwrap();
|
41
|
+
// TODO make more efficient
|
42
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
43
|
+
Some(v) => v,
|
44
|
+
None => f32::NAN,
|
45
|
+
}));
|
46
|
+
class::object()
|
47
|
+
.const_get::<_, RModule>("Numo")?
|
48
|
+
.const_get::<_, RClass>("SFloat")?
|
49
|
+
.funcall("cast", (np_arr,))
|
50
|
+
}
|
51
|
+
}
|
52
|
+
dt => {
|
53
|
+
raise_err!(
|
54
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
55
|
+
ComputeError
|
56
|
+
);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
|
-
use polars::io::RowIndex;
|
2
|
+
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
@@ -48,7 +48,7 @@ impl RbLazyFrame {
|
|
48
48
|
// in this scope
|
49
49
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
50
50
|
|
51
|
-
let lp = serde_json::from_str::<
|
51
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
52
52
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
53
53
|
Ok(LazyFrame::from(lp).into())
|
54
54
|
}
|
@@ -63,7 +63,10 @@ impl RbLazyFrame {
|
|
63
63
|
row_index: Option<(String, IdxSize)>,
|
64
64
|
) -> RbResult<Self> {
|
65
65
|
let batch_size = batch_size.map(|v| v.0);
|
66
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
66
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
67
|
+
name: Arc::from(name.as_str()),
|
68
|
+
offset,
|
69
|
+
});
|
67
70
|
|
68
71
|
let lf = LazyJsonLineReader::new(path)
|
69
72
|
.with_infer_schema_length(infer_schema_length)
|
@@ -100,14 +103,17 @@ impl RbLazyFrame {
|
|
100
103
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
101
104
|
let try_parse_dates = bool::try_convert(arguments[18])?;
|
102
105
|
let eol_char = String::try_convert(arguments[19])?;
|
106
|
+
let truncate_ragged_lines = bool::try_convert(arguments[20])?;
|
103
107
|
// end arguments
|
104
108
|
|
105
109
|
let null_values = null_values.map(|w| w.0);
|
106
110
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
107
111
|
let separator = separator.as_bytes()[0];
|
108
112
|
let eol_char = eol_char.as_bytes()[0];
|
109
|
-
|
110
|
-
|
113
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
114
|
+
name: Arc::from(name.as_str()),
|
115
|
+
offset,
|
116
|
+
});
|
111
117
|
|
112
118
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
113
119
|
overwrite_dtype
|
@@ -115,25 +121,29 @@ impl RbLazyFrame {
|
|
115
121
|
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
116
122
|
.collect::<Schema>()
|
117
123
|
});
|
124
|
+
|
118
125
|
let r = LazyCsvReader::new(path)
|
119
126
|
.with_infer_schema_length(infer_schema_length)
|
120
127
|
.with_separator(separator)
|
121
|
-
.
|
128
|
+
.with_has_header(has_header)
|
122
129
|
.with_ignore_errors(ignore_errors)
|
123
130
|
.with_skip_rows(skip_rows)
|
124
131
|
.with_n_rows(n_rows)
|
125
132
|
.with_cache(cache)
|
126
|
-
.with_dtype_overwrite(overwrite_dtype.
|
127
|
-
|
133
|
+
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
134
|
+
// TODO add with_schema
|
135
|
+
.with_low_memory(low_memory)
|
128
136
|
.with_comment_prefix(comment_prefix.as_deref())
|
129
137
|
.with_quote_char(quote_char)
|
130
|
-
.
|
138
|
+
.with_eol_char(eol_char)
|
131
139
|
.with_rechunk(rechunk)
|
132
140
|
.with_skip_rows_after_header(skip_rows_after_header)
|
133
141
|
.with_encoding(encoding.0)
|
134
142
|
.with_row_index(row_index)
|
135
143
|
.with_try_parse_dates(try_parse_dates)
|
136
|
-
.with_null_values(null_values)
|
144
|
+
.with_null_values(null_values)
|
145
|
+
// TODO add with_missing_is_null
|
146
|
+
.with_truncate_ragged_lines(truncate_ragged_lines);
|
137
147
|
|
138
148
|
if let Some(_lambda) = with_schema_modify {
|
139
149
|
todo!();
|
@@ -144,7 +154,8 @@ impl RbLazyFrame {
|
|
144
154
|
|
145
155
|
#[allow(clippy::too_many_arguments)]
|
146
156
|
pub fn new_from_parquet(
|
147
|
-
path:
|
157
|
+
path: Option<PathBuf>,
|
158
|
+
paths: Vec<PathBuf>,
|
148
159
|
n_rows: Option<usize>,
|
149
160
|
cache: bool,
|
150
161
|
parallel: Wrap<ParallelStrategy>,
|
@@ -153,21 +164,48 @@ impl RbLazyFrame {
|
|
153
164
|
low_memory: bool,
|
154
165
|
use_statistics: bool,
|
155
166
|
hive_partitioning: bool,
|
167
|
+
hive_schema: Option<Wrap<Schema>>,
|
168
|
+
glob: bool,
|
156
169
|
) -> RbResult<Self> {
|
157
|
-
let
|
170
|
+
let parallel = parallel.0;
|
171
|
+
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
172
|
+
|
173
|
+
let first_path = if let Some(path) = &path {
|
174
|
+
path
|
175
|
+
} else {
|
176
|
+
paths
|
177
|
+
.first()
|
178
|
+
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
179
|
+
};
|
180
|
+
|
181
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
182
|
+
name: Arc::from(name.as_str()),
|
183
|
+
offset,
|
184
|
+
});
|
185
|
+
let hive_options = HiveOptions {
|
186
|
+
enabled: hive_partitioning,
|
187
|
+
schema: hive_schema,
|
188
|
+
};
|
189
|
+
|
158
190
|
let args = ScanArgsParquet {
|
159
191
|
n_rows,
|
160
192
|
cache,
|
161
|
-
parallel
|
193
|
+
parallel,
|
162
194
|
rechunk,
|
163
195
|
row_index,
|
164
196
|
low_memory,
|
165
|
-
// TODO support cloud options
|
166
197
|
cloud_options: None,
|
167
198
|
use_statistics,
|
168
|
-
|
199
|
+
hive_options,
|
200
|
+
glob,
|
169
201
|
};
|
170
|
-
|
202
|
+
|
203
|
+
let lf = if path.is_some() {
|
204
|
+
LazyFrame::scan_parquet(first_path, args)
|
205
|
+
} else {
|
206
|
+
LazyFrame::scan_parquet_files(Arc::from(paths), args)
|
207
|
+
}
|
208
|
+
.map_err(RbPolarsErr::from)?;
|
171
209
|
Ok(lf.into())
|
172
210
|
}
|
173
211
|
|
@@ -179,13 +217,18 @@ impl RbLazyFrame {
|
|
179
217
|
row_index: Option<(String, IdxSize)>,
|
180
218
|
memory_map: bool,
|
181
219
|
) -> RbResult<Self> {
|
182
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
220
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
221
|
+
name: Arc::from(name.as_str()),
|
222
|
+
offset,
|
223
|
+
});
|
224
|
+
|
183
225
|
let args = ScanArgsIpc {
|
184
226
|
n_rows,
|
185
227
|
cache,
|
186
228
|
rechunk,
|
187
229
|
row_index,
|
188
|
-
|
230
|
+
memory_map,
|
231
|
+
cloud_options: None,
|
189
232
|
};
|
190
233
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
191
234
|
Ok(lf.into())
|
@@ -198,8 +241,11 @@ impl RbLazyFrame {
|
|
198
241
|
Ok(())
|
199
242
|
}
|
200
243
|
|
201
|
-
pub fn describe_plan(&self) -> String {
|
202
|
-
self.ldf
|
244
|
+
pub fn describe_plan(&self) -> RbResult<String> {
|
245
|
+
self.ldf
|
246
|
+
.describe_plan()
|
247
|
+
.map_err(RbPolarsErr::from)
|
248
|
+
.map_err(Into::into)
|
203
249
|
}
|
204
250
|
|
205
251
|
pub fn describe_optimized_plan(&self) -> RbResult<String> {
|
@@ -242,17 +288,18 @@ impl RbLazyFrame {
|
|
242
288
|
pub fn sort(
|
243
289
|
&self,
|
244
290
|
by_column: String,
|
245
|
-
|
291
|
+
descending: bool,
|
246
292
|
nulls_last: bool,
|
247
293
|
maintain_order: bool,
|
294
|
+
multithreaded: bool,
|
248
295
|
) -> Self {
|
249
296
|
let ldf = self.ldf.clone();
|
250
297
|
ldf.sort(
|
251
|
-
&by_column,
|
252
|
-
|
253
|
-
descending:
|
298
|
+
[&by_column],
|
299
|
+
SortMultipleOptions {
|
300
|
+
descending: vec![descending],
|
254
301
|
nulls_last,
|
255
|
-
multithreaded
|
302
|
+
multithreaded,
|
256
303
|
maintain_order,
|
257
304
|
},
|
258
305
|
)
|
@@ -261,15 +308,24 @@ impl RbLazyFrame {
|
|
261
308
|
|
262
309
|
pub fn sort_by_exprs(
|
263
310
|
&self,
|
264
|
-
|
265
|
-
|
311
|
+
by: RArray,
|
312
|
+
descending: Vec<bool>,
|
266
313
|
nulls_last: bool,
|
267
314
|
maintain_order: bool,
|
315
|
+
multithreaded: bool,
|
268
316
|
) -> RbResult<Self> {
|
269
317
|
let ldf = self.ldf.clone();
|
270
|
-
let exprs = rb_exprs_to_exprs(
|
318
|
+
let exprs = rb_exprs_to_exprs(by)?;
|
271
319
|
Ok(ldf
|
272
|
-
.sort_by_exprs(
|
320
|
+
.sort_by_exprs(
|
321
|
+
exprs,
|
322
|
+
SortMultipleOptions {
|
323
|
+
descending,
|
324
|
+
nulls_last,
|
325
|
+
maintain_order,
|
326
|
+
multithreaded,
|
327
|
+
},
|
328
|
+
)
|
273
329
|
.into())
|
274
330
|
}
|
275
331
|
|
@@ -326,6 +382,7 @@ impl RbLazyFrame {
|
|
326
382
|
Ok(())
|
327
383
|
}
|
328
384
|
|
385
|
+
#[allow(clippy::too_many_arguments)]
|
329
386
|
pub fn sink_csv(
|
330
387
|
&self,
|
331
388
|
path: PathBuf,
|
@@ -427,7 +484,7 @@ impl RbLazyFrame {
|
|
427
484
|
let closed_window = closed.0;
|
428
485
|
let ldf = self.ldf.clone();
|
429
486
|
let by = rb_exprs_to_exprs(by)?;
|
430
|
-
let lazy_gb = ldf.
|
487
|
+
let lazy_gb = ldf.rolling(
|
431
488
|
index_column.inner.clone(),
|
432
489
|
by,
|
433
490
|
RollingGroupOptions {
|
@@ -602,58 +659,56 @@ impl RbLazyFrame {
|
|
602
659
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
603
660
|
}
|
604
661
|
|
605
|
-
pub fn min(&self) ->
|
662
|
+
pub fn min(&self) -> Self {
|
606
663
|
let ldf = self.ldf.clone();
|
607
|
-
let out = ldf.min()
|
608
|
-
|
664
|
+
let out = ldf.min();
|
665
|
+
out.into()
|
609
666
|
}
|
610
667
|
|
611
|
-
pub fn max(&self) ->
|
668
|
+
pub fn max(&self) -> Self {
|
612
669
|
let ldf = self.ldf.clone();
|
613
|
-
let out = ldf.max()
|
614
|
-
|
670
|
+
let out = ldf.max();
|
671
|
+
out.into()
|
615
672
|
}
|
616
673
|
|
617
|
-
pub fn sum(&self) ->
|
674
|
+
pub fn sum(&self) -> Self {
|
618
675
|
let ldf = self.ldf.clone();
|
619
|
-
let out = ldf.sum()
|
620
|
-
|
676
|
+
let out = ldf.sum();
|
677
|
+
out.into()
|
621
678
|
}
|
622
679
|
|
623
|
-
pub fn mean(&self) ->
|
680
|
+
pub fn mean(&self) -> Self {
|
624
681
|
let ldf = self.ldf.clone();
|
625
|
-
let out = ldf.mean()
|
626
|
-
|
682
|
+
let out = ldf.mean();
|
683
|
+
out.into()
|
627
684
|
}
|
628
685
|
|
629
|
-
pub fn std(&self, ddof: u8) ->
|
686
|
+
pub fn std(&self, ddof: u8) -> Self {
|
630
687
|
let ldf = self.ldf.clone();
|
631
|
-
let out = ldf.std(ddof)
|
632
|
-
|
688
|
+
let out = ldf.std(ddof);
|
689
|
+
out.into()
|
633
690
|
}
|
634
691
|
|
635
|
-
pub fn var(&self, ddof: u8) ->
|
692
|
+
pub fn var(&self, ddof: u8) -> Self {
|
636
693
|
let ldf = self.ldf.clone();
|
637
|
-
let out = ldf.var(ddof)
|
638
|
-
|
694
|
+
let out = ldf.var(ddof);
|
695
|
+
out.into()
|
639
696
|
}
|
640
697
|
|
641
|
-
pub fn median(&self) ->
|
698
|
+
pub fn median(&self) -> Self {
|
642
699
|
let ldf = self.ldf.clone();
|
643
|
-
let out = ldf.median()
|
644
|
-
|
700
|
+
let out = ldf.median();
|
701
|
+
out.into()
|
645
702
|
}
|
646
703
|
|
647
704
|
pub fn quantile(
|
648
705
|
&self,
|
649
706
|
quantile: &RbExpr,
|
650
707
|
interpolation: Wrap<QuantileInterpolOptions>,
|
651
|
-
) ->
|
708
|
+
) -> Self {
|
652
709
|
let ldf = self.ldf.clone();
|
653
|
-
let out = ldf
|
654
|
-
|
655
|
-
.map_err(RbPolarsErr::from)?;
|
656
|
-
Ok(out.into())
|
710
|
+
let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
|
711
|
+
out.into()
|
657
712
|
}
|
658
713
|
|
659
714
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|