polars-df 0.9.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/README.md +7 -6
- data/ext/polars/Cargo.toml +10 -6
- data/ext/polars/src/batched_csv.rs +53 -50
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +31 -67
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +13 -4
- data/ext/polars/src/expr/general.rs +50 -9
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +12 -33
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +20 -3
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +111 -56
- data/ext/polars/src/lib.rs +68 -34
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +13 -133
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +23 -8
- data/ext/polars/src/dataframe.rs +0 -1182
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -39,3 +39,77 @@ pub fn date_range(
|
|
39
39
|
let time_unit = time_unit.map(|x| x.0);
|
40
40
|
dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
|
41
41
|
}
|
42
|
+
|
43
|
+
pub fn date_ranges(
|
44
|
+
start: &RbExpr,
|
45
|
+
end: &RbExpr,
|
46
|
+
every: String,
|
47
|
+
closed: Wrap<ClosedWindow>,
|
48
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
49
|
+
time_zone: Option<TimeZone>,
|
50
|
+
) -> RbExpr {
|
51
|
+
let start = start.inner.clone();
|
52
|
+
let end = end.inner.clone();
|
53
|
+
let every = Duration::parse(&every);
|
54
|
+
let closed = closed.0;
|
55
|
+
let time_unit = time_unit.map(|x| x.0);
|
56
|
+
dsl::date_ranges(start, end, every, closed, time_unit, time_zone).into()
|
57
|
+
}
|
58
|
+
|
59
|
+
pub fn datetime_range(
|
60
|
+
start: &RbExpr,
|
61
|
+
end: &RbExpr,
|
62
|
+
every: String,
|
63
|
+
closed: Wrap<ClosedWindow>,
|
64
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
65
|
+
time_zone: Option<TimeZone>,
|
66
|
+
) -> RbExpr {
|
67
|
+
let start = start.inner.clone();
|
68
|
+
let end = end.inner.clone();
|
69
|
+
let every = Duration::parse(&every);
|
70
|
+
let closed = closed.0;
|
71
|
+
let time_unit = time_unit.map(|x| x.0);
|
72
|
+
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
73
|
+
}
|
74
|
+
|
75
|
+
pub fn datetime_ranges(
|
76
|
+
start: &RbExpr,
|
77
|
+
end: &RbExpr,
|
78
|
+
every: String,
|
79
|
+
closed: Wrap<ClosedWindow>,
|
80
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
81
|
+
time_zone: Option<TimeZone>,
|
82
|
+
) -> RbExpr {
|
83
|
+
let start = start.inner.clone();
|
84
|
+
let end = end.inner.clone();
|
85
|
+
let every = Duration::parse(&every);
|
86
|
+
let closed = closed.0;
|
87
|
+
let time_unit = time_unit.map(|x| x.0);
|
88
|
+
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
89
|
+
}
|
90
|
+
|
91
|
+
pub fn time_range(
|
92
|
+
start: &RbExpr,
|
93
|
+
end: &RbExpr,
|
94
|
+
every: String,
|
95
|
+
closed: Wrap<ClosedWindow>,
|
96
|
+
) -> RbExpr {
|
97
|
+
let start = start.inner.clone();
|
98
|
+
let end = end.inner.clone();
|
99
|
+
let every = Duration::parse(&every);
|
100
|
+
let closed = closed.0;
|
101
|
+
dsl::time_range(start, end, every, closed).into()
|
102
|
+
}
|
103
|
+
|
104
|
+
pub fn time_ranges(
|
105
|
+
start: &RbExpr,
|
106
|
+
end: &RbExpr,
|
107
|
+
every: String,
|
108
|
+
closed: Wrap<ClosedWindow>,
|
109
|
+
) -> RbExpr {
|
110
|
+
let start = start.inner.clone();
|
111
|
+
let end = end.inner.clone();
|
112
|
+
let every = Duration::parse(&every);
|
113
|
+
let closed = closed.0;
|
114
|
+
dsl::time_ranges(start, end, every, closed).into()
|
115
|
+
}
|
@@ -2,42 +2,72 @@ use polars::lazy::dsl;
|
|
2
2
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
|
+
pub fn when(condition: &RbExpr) -> RbWhen {
|
6
|
+
RbWhen {
|
7
|
+
inner: dsl::when(condition.inner.clone()),
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
5
11
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
12
|
#[derive(Clone)]
|
7
13
|
pub struct RbWhen {
|
8
14
|
pub inner: dsl::When,
|
9
15
|
}
|
10
16
|
|
11
|
-
|
12
|
-
fn from(inner: dsl::When) -> Self {
|
13
|
-
RbWhen { inner }
|
14
|
-
}
|
15
|
-
}
|
16
|
-
|
17
|
-
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
17
|
+
#[magnus::wrap(class = "Polars::RbThen")]
|
18
18
|
#[derive(Clone)]
|
19
19
|
pub struct RbThen {
|
20
20
|
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
#[magnus::wrap(class = "Polars::RbChainedWhen")]
|
24
|
+
#[derive(Clone)]
|
25
|
+
pub struct RbChainedWhen {
|
26
|
+
pub inner: dsl::ChainedWhen,
|
27
|
+
}
|
28
|
+
|
29
|
+
#[magnus::wrap(class = "Polars::RbChainedThen")]
|
30
|
+
#[derive(Clone)]
|
31
|
+
pub struct RbChainedThen {
|
32
|
+
pub inner: dsl::ChainedThen,
|
27
33
|
}
|
28
34
|
|
29
35
|
impl RbWhen {
|
30
|
-
pub fn then(&self,
|
31
|
-
|
36
|
+
pub fn then(&self, statement: &RbExpr) -> RbThen {
|
37
|
+
RbThen {
|
38
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
39
|
+
}
|
32
40
|
}
|
33
41
|
}
|
34
42
|
|
35
43
|
impl RbThen {
|
36
|
-
pub fn
|
37
|
-
|
44
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
45
|
+
RbChainedWhen {
|
46
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
51
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
38
52
|
}
|
39
53
|
}
|
40
54
|
|
41
|
-
|
42
|
-
|
55
|
+
impl RbChainedWhen {
|
56
|
+
pub fn then(&self, statement: &RbExpr) -> RbChainedThen {
|
57
|
+
RbChainedThen {
|
58
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
impl RbChainedThen {
|
64
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
65
|
+
RbChainedWhen {
|
66
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
71
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
72
|
+
}
|
43
73
|
}
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod numo;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars_core::utils::try_get_supertype;
|
3
|
+
|
4
|
+
use crate::dataframe::RbDataFrame;
|
5
|
+
|
6
|
+
impl RbDataFrame {
|
7
|
+
pub fn to_numo(&self) -> Option<Value> {
|
8
|
+
let mut st = None;
|
9
|
+
for s in self.df.borrow().iter() {
|
10
|
+
let dt_i = s.dtype();
|
11
|
+
match st {
|
12
|
+
None => st = Some(dt_i.clone()),
|
13
|
+
Some(ref mut st) => {
|
14
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
18
|
+
let _st = st?;
|
19
|
+
|
20
|
+
// TODO
|
21
|
+
None
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,60 @@
|
|
1
|
+
use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars_core::prelude::*;
|
3
|
+
|
4
|
+
use crate::error::RbPolarsErr;
|
5
|
+
use crate::raise_err;
|
6
|
+
use crate::series::RbSeries;
|
7
|
+
use crate::RbResult;
|
8
|
+
|
9
|
+
impl RbSeries {
|
10
|
+
/// For numeric types, this should only be called for Series with null types.
|
11
|
+
/// This will cast to floats so that `nil = NAN`
|
12
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
13
|
+
let s = &self.series.borrow();
|
14
|
+
match s.dtype() {
|
15
|
+
DataType::String => {
|
16
|
+
let ca = s.str().unwrap();
|
17
|
+
|
18
|
+
// TODO make more efficient
|
19
|
+
let np_arr = RArray::from_iter(ca);
|
20
|
+
class::object()
|
21
|
+
.const_get::<_, RModule>("Numo")?
|
22
|
+
.const_get::<_, RClass>("RObject")?
|
23
|
+
.funcall("cast", (np_arr,))
|
24
|
+
}
|
25
|
+
dt if dt.is_numeric() => {
|
26
|
+
if s.bit_repr_is_large() {
|
27
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
28
|
+
let ca = s.f64().unwrap();
|
29
|
+
// TODO make more efficient
|
30
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
31
|
+
Some(v) => v,
|
32
|
+
None => f64::NAN,
|
33
|
+
}));
|
34
|
+
class::object()
|
35
|
+
.const_get::<_, RModule>("Numo")?
|
36
|
+
.const_get::<_, RClass>("DFloat")?
|
37
|
+
.funcall("cast", (np_arr,))
|
38
|
+
} else {
|
39
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
40
|
+
let ca = s.f32().unwrap();
|
41
|
+
// TODO make more efficient
|
42
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
43
|
+
Some(v) => v,
|
44
|
+
None => f32::NAN,
|
45
|
+
}));
|
46
|
+
class::object()
|
47
|
+
.const_get::<_, RModule>("Numo")?
|
48
|
+
.const_get::<_, RClass>("SFloat")?
|
49
|
+
.funcall("cast", (np_arr,))
|
50
|
+
}
|
51
|
+
}
|
52
|
+
dt => {
|
53
|
+
raise_err!(
|
54
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
55
|
+
ComputeError
|
56
|
+
);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
|
-
use polars::io::RowIndex;
|
2
|
+
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
@@ -48,7 +48,7 @@ impl RbLazyFrame {
|
|
48
48
|
// in this scope
|
49
49
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
50
50
|
|
51
|
-
let lp = serde_json::from_str::<
|
51
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
52
52
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
53
53
|
Ok(LazyFrame::from(lp).into())
|
54
54
|
}
|
@@ -63,7 +63,10 @@ impl RbLazyFrame {
|
|
63
63
|
row_index: Option<(String, IdxSize)>,
|
64
64
|
) -> RbResult<Self> {
|
65
65
|
let batch_size = batch_size.map(|v| v.0);
|
66
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
66
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
67
|
+
name: Arc::from(name.as_str()),
|
68
|
+
offset,
|
69
|
+
});
|
67
70
|
|
68
71
|
let lf = LazyJsonLineReader::new(path)
|
69
72
|
.with_infer_schema_length(infer_schema_length)
|
@@ -100,14 +103,17 @@ impl RbLazyFrame {
|
|
100
103
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
101
104
|
let try_parse_dates = bool::try_convert(arguments[18])?;
|
102
105
|
let eol_char = String::try_convert(arguments[19])?;
|
106
|
+
let truncate_ragged_lines = bool::try_convert(arguments[20])?;
|
103
107
|
// end arguments
|
104
108
|
|
105
109
|
let null_values = null_values.map(|w| w.0);
|
106
110
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
107
111
|
let separator = separator.as_bytes()[0];
|
108
112
|
let eol_char = eol_char.as_bytes()[0];
|
109
|
-
|
110
|
-
|
113
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
114
|
+
name: Arc::from(name.as_str()),
|
115
|
+
offset,
|
116
|
+
});
|
111
117
|
|
112
118
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
113
119
|
overwrite_dtype
|
@@ -115,25 +121,29 @@ impl RbLazyFrame {
|
|
115
121
|
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
116
122
|
.collect::<Schema>()
|
117
123
|
});
|
124
|
+
|
118
125
|
let r = LazyCsvReader::new(path)
|
119
126
|
.with_infer_schema_length(infer_schema_length)
|
120
127
|
.with_separator(separator)
|
121
|
-
.
|
128
|
+
.with_has_header(has_header)
|
122
129
|
.with_ignore_errors(ignore_errors)
|
123
130
|
.with_skip_rows(skip_rows)
|
124
131
|
.with_n_rows(n_rows)
|
125
132
|
.with_cache(cache)
|
126
|
-
.with_dtype_overwrite(overwrite_dtype.
|
127
|
-
|
133
|
+
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
134
|
+
// TODO add with_schema
|
135
|
+
.with_low_memory(low_memory)
|
128
136
|
.with_comment_prefix(comment_prefix.as_deref())
|
129
137
|
.with_quote_char(quote_char)
|
130
|
-
.
|
138
|
+
.with_eol_char(eol_char)
|
131
139
|
.with_rechunk(rechunk)
|
132
140
|
.with_skip_rows_after_header(skip_rows_after_header)
|
133
141
|
.with_encoding(encoding.0)
|
134
142
|
.with_row_index(row_index)
|
135
143
|
.with_try_parse_dates(try_parse_dates)
|
136
|
-
.with_null_values(null_values)
|
144
|
+
.with_null_values(null_values)
|
145
|
+
// TODO add with_missing_is_null
|
146
|
+
.with_truncate_ragged_lines(truncate_ragged_lines);
|
137
147
|
|
138
148
|
if let Some(_lambda) = with_schema_modify {
|
139
149
|
todo!();
|
@@ -144,7 +154,8 @@ impl RbLazyFrame {
|
|
144
154
|
|
145
155
|
#[allow(clippy::too_many_arguments)]
|
146
156
|
pub fn new_from_parquet(
|
147
|
-
path:
|
157
|
+
path: Option<PathBuf>,
|
158
|
+
paths: Vec<PathBuf>,
|
148
159
|
n_rows: Option<usize>,
|
149
160
|
cache: bool,
|
150
161
|
parallel: Wrap<ParallelStrategy>,
|
@@ -153,21 +164,48 @@ impl RbLazyFrame {
|
|
153
164
|
low_memory: bool,
|
154
165
|
use_statistics: bool,
|
155
166
|
hive_partitioning: bool,
|
167
|
+
hive_schema: Option<Wrap<Schema>>,
|
168
|
+
glob: bool,
|
156
169
|
) -> RbResult<Self> {
|
157
|
-
let
|
170
|
+
let parallel = parallel.0;
|
171
|
+
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
172
|
+
|
173
|
+
let first_path = if let Some(path) = &path {
|
174
|
+
path
|
175
|
+
} else {
|
176
|
+
paths
|
177
|
+
.first()
|
178
|
+
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
179
|
+
};
|
180
|
+
|
181
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
182
|
+
name: Arc::from(name.as_str()),
|
183
|
+
offset,
|
184
|
+
});
|
185
|
+
let hive_options = HiveOptions {
|
186
|
+
enabled: hive_partitioning,
|
187
|
+
schema: hive_schema,
|
188
|
+
};
|
189
|
+
|
158
190
|
let args = ScanArgsParquet {
|
159
191
|
n_rows,
|
160
192
|
cache,
|
161
|
-
parallel
|
193
|
+
parallel,
|
162
194
|
rechunk,
|
163
195
|
row_index,
|
164
196
|
low_memory,
|
165
|
-
// TODO support cloud options
|
166
197
|
cloud_options: None,
|
167
198
|
use_statistics,
|
168
|
-
|
199
|
+
hive_options,
|
200
|
+
glob,
|
169
201
|
};
|
170
|
-
|
202
|
+
|
203
|
+
let lf = if path.is_some() {
|
204
|
+
LazyFrame::scan_parquet(first_path, args)
|
205
|
+
} else {
|
206
|
+
LazyFrame::scan_parquet_files(Arc::from(paths), args)
|
207
|
+
}
|
208
|
+
.map_err(RbPolarsErr::from)?;
|
171
209
|
Ok(lf.into())
|
172
210
|
}
|
173
211
|
|
@@ -179,13 +217,18 @@ impl RbLazyFrame {
|
|
179
217
|
row_index: Option<(String, IdxSize)>,
|
180
218
|
memory_map: bool,
|
181
219
|
) -> RbResult<Self> {
|
182
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
220
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
221
|
+
name: Arc::from(name.as_str()),
|
222
|
+
offset,
|
223
|
+
});
|
224
|
+
|
183
225
|
let args = ScanArgsIpc {
|
184
226
|
n_rows,
|
185
227
|
cache,
|
186
228
|
rechunk,
|
187
229
|
row_index,
|
188
|
-
|
230
|
+
memory_map,
|
231
|
+
cloud_options: None,
|
189
232
|
};
|
190
233
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
191
234
|
Ok(lf.into())
|
@@ -198,8 +241,11 @@ impl RbLazyFrame {
|
|
198
241
|
Ok(())
|
199
242
|
}
|
200
243
|
|
201
|
-
pub fn describe_plan(&self) -> String {
|
202
|
-
self.ldf
|
244
|
+
pub fn describe_plan(&self) -> RbResult<String> {
|
245
|
+
self.ldf
|
246
|
+
.describe_plan()
|
247
|
+
.map_err(RbPolarsErr::from)
|
248
|
+
.map_err(Into::into)
|
203
249
|
}
|
204
250
|
|
205
251
|
pub fn describe_optimized_plan(&self) -> RbResult<String> {
|
@@ -242,17 +288,18 @@ impl RbLazyFrame {
|
|
242
288
|
pub fn sort(
|
243
289
|
&self,
|
244
290
|
by_column: String,
|
245
|
-
|
291
|
+
descending: bool,
|
246
292
|
nulls_last: bool,
|
247
293
|
maintain_order: bool,
|
294
|
+
multithreaded: bool,
|
248
295
|
) -> Self {
|
249
296
|
let ldf = self.ldf.clone();
|
250
297
|
ldf.sort(
|
251
|
-
&by_column,
|
252
|
-
|
253
|
-
descending:
|
298
|
+
[&by_column],
|
299
|
+
SortMultipleOptions {
|
300
|
+
descending: vec![descending],
|
254
301
|
nulls_last,
|
255
|
-
multithreaded
|
302
|
+
multithreaded,
|
256
303
|
maintain_order,
|
257
304
|
},
|
258
305
|
)
|
@@ -261,15 +308,24 @@ impl RbLazyFrame {
|
|
261
308
|
|
262
309
|
pub fn sort_by_exprs(
|
263
310
|
&self,
|
264
|
-
|
265
|
-
|
311
|
+
by: RArray,
|
312
|
+
descending: Vec<bool>,
|
266
313
|
nulls_last: bool,
|
267
314
|
maintain_order: bool,
|
315
|
+
multithreaded: bool,
|
268
316
|
) -> RbResult<Self> {
|
269
317
|
let ldf = self.ldf.clone();
|
270
|
-
let exprs = rb_exprs_to_exprs(
|
318
|
+
let exprs = rb_exprs_to_exprs(by)?;
|
271
319
|
Ok(ldf
|
272
|
-
.sort_by_exprs(
|
320
|
+
.sort_by_exprs(
|
321
|
+
exprs,
|
322
|
+
SortMultipleOptions {
|
323
|
+
descending,
|
324
|
+
nulls_last,
|
325
|
+
maintain_order,
|
326
|
+
multithreaded,
|
327
|
+
},
|
328
|
+
)
|
273
329
|
.into())
|
274
330
|
}
|
275
331
|
|
@@ -326,6 +382,7 @@ impl RbLazyFrame {
|
|
326
382
|
Ok(())
|
327
383
|
}
|
328
384
|
|
385
|
+
#[allow(clippy::too_many_arguments)]
|
329
386
|
pub fn sink_csv(
|
330
387
|
&self,
|
331
388
|
path: PathBuf,
|
@@ -427,7 +484,7 @@ impl RbLazyFrame {
|
|
427
484
|
let closed_window = closed.0;
|
428
485
|
let ldf = self.ldf.clone();
|
429
486
|
let by = rb_exprs_to_exprs(by)?;
|
430
|
-
let lazy_gb = ldf.
|
487
|
+
let lazy_gb = ldf.rolling(
|
431
488
|
index_column.inner.clone(),
|
432
489
|
by,
|
433
490
|
RollingGroupOptions {
|
@@ -602,58 +659,56 @@ impl RbLazyFrame {
|
|
602
659
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
603
660
|
}
|
604
661
|
|
605
|
-
pub fn min(&self) ->
|
662
|
+
pub fn min(&self) -> Self {
|
606
663
|
let ldf = self.ldf.clone();
|
607
|
-
let out = ldf.min()
|
608
|
-
|
664
|
+
let out = ldf.min();
|
665
|
+
out.into()
|
609
666
|
}
|
610
667
|
|
611
|
-
pub fn max(&self) ->
|
668
|
+
pub fn max(&self) -> Self {
|
612
669
|
let ldf = self.ldf.clone();
|
613
|
-
let out = ldf.max()
|
614
|
-
|
670
|
+
let out = ldf.max();
|
671
|
+
out.into()
|
615
672
|
}
|
616
673
|
|
617
|
-
pub fn sum(&self) ->
|
674
|
+
pub fn sum(&self) -> Self {
|
618
675
|
let ldf = self.ldf.clone();
|
619
|
-
let out = ldf.sum()
|
620
|
-
|
676
|
+
let out = ldf.sum();
|
677
|
+
out.into()
|
621
678
|
}
|
622
679
|
|
623
|
-
pub fn mean(&self) ->
|
680
|
+
pub fn mean(&self) -> Self {
|
624
681
|
let ldf = self.ldf.clone();
|
625
|
-
let out = ldf.mean()
|
626
|
-
|
682
|
+
let out = ldf.mean();
|
683
|
+
out.into()
|
627
684
|
}
|
628
685
|
|
629
|
-
pub fn std(&self, ddof: u8) ->
|
686
|
+
pub fn std(&self, ddof: u8) -> Self {
|
630
687
|
let ldf = self.ldf.clone();
|
631
|
-
let out = ldf.std(ddof)
|
632
|
-
|
688
|
+
let out = ldf.std(ddof);
|
689
|
+
out.into()
|
633
690
|
}
|
634
691
|
|
635
|
-
pub fn var(&self, ddof: u8) ->
|
692
|
+
pub fn var(&self, ddof: u8) -> Self {
|
636
693
|
let ldf = self.ldf.clone();
|
637
|
-
let out = ldf.var(ddof)
|
638
|
-
|
694
|
+
let out = ldf.var(ddof);
|
695
|
+
out.into()
|
639
696
|
}
|
640
697
|
|
641
|
-
pub fn median(&self) ->
|
698
|
+
pub fn median(&self) -> Self {
|
642
699
|
let ldf = self.ldf.clone();
|
643
|
-
let out = ldf.median()
|
644
|
-
|
700
|
+
let out = ldf.median();
|
701
|
+
out.into()
|
645
702
|
}
|
646
703
|
|
647
704
|
pub fn quantile(
|
648
705
|
&self,
|
649
706
|
quantile: &RbExpr,
|
650
707
|
interpolation: Wrap<QuantileInterpolOptions>,
|
651
|
-
) ->
|
708
|
+
) -> Self {
|
652
709
|
let ldf = self.ldf.clone();
|
653
|
-
let out = ldf
|
654
|
-
|
655
|
-
.map_err(RbPolarsErr::from)?;
|
656
|
-
Ok(out.into())
|
710
|
+
let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
|
711
|
+
out.into()
|
657
712
|
}
|
658
713
|
|
659
714
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|