polars-df 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/conversion.rs +237 -43
- data/ext/polars/src/dataframe.rs +278 -1
- data/ext/polars/src/lazy/dataframe.rs +304 -10
- data/ext/polars/src/lazy/dsl.rs +1096 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +313 -0
- data/ext/polars/src/series.rs +168 -5
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +312 -7
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +488 -0
- data/lib/polars/lazy_frame.rb +184 -6
- data/lib/polars/lazy_functions.rb +4 -0
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +513 -11
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +27 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -1
- metadata +10 -2
data/Cargo.toml
ADDED
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.1"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -15,21 +15,51 @@ serde_json = "1"
|
|
15
15
|
[dependencies.polars]
|
16
16
|
version = "0.25.1"
|
17
17
|
features = [
|
18
|
+
"abs",
|
18
19
|
"arange",
|
20
|
+
"concat_str",
|
19
21
|
"csv-file",
|
20
22
|
"cum_agg",
|
23
|
+
"cumulative_eval",
|
24
|
+
"date_offset",
|
21
25
|
"diagonal_concat",
|
26
|
+
"diff",
|
27
|
+
"dot_product",
|
22
28
|
"dtype-full",
|
29
|
+
"dynamic_groupby",
|
30
|
+
"ewma",
|
23
31
|
"fmt",
|
24
32
|
"horizontal_concat",
|
25
33
|
"interpolate",
|
26
34
|
"ipc",
|
35
|
+
"is_first",
|
36
|
+
"is_in",
|
27
37
|
"json",
|
28
38
|
"lazy",
|
29
39
|
"lazy_regex",
|
40
|
+
"list_eval",
|
41
|
+
"log",
|
42
|
+
"meta",
|
43
|
+
"mode",
|
44
|
+
"moment",
|
30
45
|
"parquet",
|
46
|
+
"partition_by",
|
47
|
+
"pct_change",
|
48
|
+
"product",
|
49
|
+
"random",
|
50
|
+
"rank",
|
51
|
+
"repeat_by",
|
52
|
+
"rolling_window",
|
53
|
+
"round_series",
|
54
|
+
"search_sorted",
|
31
55
|
"semi_anti_join",
|
32
56
|
"serde-lazy",
|
57
|
+
"sign",
|
58
|
+
"string_justify",
|
33
59
|
"strings",
|
60
|
+
"timezones",
|
61
|
+
"to_dummies",
|
62
|
+
"top_k",
|
34
63
|
"trigonometry",
|
64
|
+
"unique_counts",
|
35
65
|
]
|
@@ -1,27 +1,245 @@
|
|
1
|
-
use magnus::{Value, QNIL};
|
1
|
+
use magnus::{TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
3
3
|
use polars::datatypes::AnyValue;
|
4
4
|
use polars::frame::DataFrame;
|
5
5
|
use polars::prelude::*;
|
6
|
+
use polars::series::ops::NullBehavior;
|
6
7
|
|
7
|
-
use crate::{RbDataFrame, RbResult, RbValueError};
|
8
|
-
|
9
|
-
pub
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
8
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
9
|
+
|
10
|
+
pub struct Wrap<T>(pub T);
|
11
|
+
|
12
|
+
impl<T> From<T> for Wrap<T> {
|
13
|
+
fn from(t: T) -> Self {
|
14
|
+
Wrap(t)
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
19
|
+
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
20
|
+
Ok(rbdf.df.borrow().clone())
|
21
|
+
}
|
22
|
+
|
23
|
+
impl Into<Value> for Wrap<AnyValue<'_>> {
|
24
|
+
fn into(self) -> Value {
|
25
|
+
match self.0 {
|
26
|
+
AnyValue::UInt8(v) => Value::from(v),
|
27
|
+
AnyValue::UInt16(v) => Value::from(v),
|
28
|
+
AnyValue::UInt32(v) => Value::from(v),
|
29
|
+
AnyValue::UInt64(v) => Value::from(v),
|
30
|
+
AnyValue::Int8(v) => Value::from(v),
|
31
|
+
AnyValue::Int16(v) => Value::from(v),
|
32
|
+
AnyValue::Int32(v) => Value::from(v),
|
33
|
+
AnyValue::Int64(v) => Value::from(v),
|
34
|
+
AnyValue::Float32(v) => Value::from(v),
|
35
|
+
AnyValue::Float64(v) => Value::from(v),
|
36
|
+
AnyValue::Null => *QNIL,
|
37
|
+
AnyValue::Boolean(v) => Value::from(v),
|
38
|
+
AnyValue::Utf8(v) => Value::from(v),
|
39
|
+
_ => todo!(),
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
impl TryConvert for Wrap<DataType> {
|
45
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
46
|
+
let dtype = match ob.try_convert::<String>()?.as_str() {
|
47
|
+
"u8" => DataType::UInt8,
|
48
|
+
"u16" => DataType::UInt16,
|
49
|
+
"u32" => DataType::UInt32,
|
50
|
+
"u64" => DataType::UInt64,
|
51
|
+
"i8" => DataType::Int8,
|
52
|
+
"i16" => DataType::Int16,
|
53
|
+
"i32" => DataType::Int32,
|
54
|
+
"i64" => DataType::Int64,
|
55
|
+
"str" => DataType::Utf8,
|
56
|
+
"bool" => DataType::Boolean,
|
57
|
+
"f32" => DataType::Float32,
|
58
|
+
"f64" => DataType::Float64,
|
59
|
+
"date" => DataType::Date,
|
60
|
+
_ => {
|
61
|
+
return Err(RbValueError::new_err(format!(
|
62
|
+
"{} is not a supported DataType.",
|
63
|
+
ob
|
64
|
+
)))
|
65
|
+
}
|
66
|
+
};
|
67
|
+
Ok(Wrap(dtype))
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
72
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
73
|
+
// TODO improve
|
74
|
+
if let Ok(v) = ob.try_convert::<i64>() {
|
75
|
+
Ok(AnyValue::Int64(v).into())
|
76
|
+
} else if let Ok(v) = ob.try_convert::<f64>() {
|
77
|
+
Ok(AnyValue::Float64(v).into())
|
78
|
+
} else {
|
79
|
+
Err(RbPolarsErr::other(format!(
|
80
|
+
"object type not supported {:?}",
|
81
|
+
ob
|
82
|
+
)))
|
83
|
+
}
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
impl TryConvert for Wrap<CategoricalOrdering> {
|
88
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
89
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
90
|
+
"physical" => CategoricalOrdering::Physical,
|
91
|
+
"lexical" => CategoricalOrdering::Lexical,
|
92
|
+
v => {
|
93
|
+
return Err(RbValueError::new_err(format!(
|
94
|
+
"ordering must be one of {{'physical', 'lexical'}}, got {}",
|
95
|
+
v
|
96
|
+
)))
|
97
|
+
}
|
98
|
+
};
|
99
|
+
Ok(Wrap(parsed))
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
impl TryConvert for Wrap<ClosedWindow> {
|
104
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
105
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
106
|
+
"left" => ClosedWindow::Left,
|
107
|
+
"right" => ClosedWindow::Right,
|
108
|
+
"both" => ClosedWindow::Both,
|
109
|
+
"none" => ClosedWindow::None,
|
110
|
+
v => {
|
111
|
+
return Err(RbValueError::new_err(format!(
|
112
|
+
"closed must be one of {{'left', 'right', 'both', 'none'}}, got {}",
|
113
|
+
v
|
114
|
+
)))
|
115
|
+
}
|
116
|
+
};
|
117
|
+
Ok(Wrap(parsed))
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
impl TryConvert for Wrap<JoinType> {
|
122
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
123
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
124
|
+
"inner" => JoinType::Inner,
|
125
|
+
"left" => JoinType::Left,
|
126
|
+
"outer" => JoinType::Outer,
|
127
|
+
"semi" => JoinType::Semi,
|
128
|
+
"anti" => JoinType::Anti,
|
129
|
+
// #[cfg(feature = "cross_join")]
|
130
|
+
// "cross" => JoinType::Cross,
|
131
|
+
v => {
|
132
|
+
return Err(RbValueError::new_err(format!(
|
133
|
+
"how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
|
134
|
+
v
|
135
|
+
)))
|
136
|
+
}
|
137
|
+
};
|
138
|
+
Ok(Wrap(parsed))
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
impl TryConvert for Wrap<NullBehavior> {
|
143
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
144
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
145
|
+
"drop" => NullBehavior::Drop,
|
146
|
+
"ignore" => NullBehavior::Ignore,
|
147
|
+
v => {
|
148
|
+
return Err(RbValueError::new_err(format!(
|
149
|
+
"null behavior must be one of {{'drop', 'ignore'}}, got {}",
|
150
|
+
v
|
151
|
+
)))
|
152
|
+
}
|
153
|
+
};
|
154
|
+
Ok(Wrap(parsed))
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
impl TryConvert for Wrap<NullStrategy> {
|
159
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
160
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
161
|
+
"ignore" => NullStrategy::Ignore,
|
162
|
+
"propagate" => NullStrategy::Propagate,
|
163
|
+
v => {
|
164
|
+
return Err(RbValueError::new_err(format!(
|
165
|
+
"null strategy must be one of {{'ignore', 'propagate'}}, got {}",
|
166
|
+
v
|
167
|
+
)))
|
168
|
+
}
|
169
|
+
};
|
170
|
+
Ok(Wrap(parsed))
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
impl TryConvert for Wrap<QuantileInterpolOptions> {
|
175
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
176
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
177
|
+
"lower" => QuantileInterpolOptions::Lower,
|
178
|
+
"higher" => QuantileInterpolOptions::Higher,
|
179
|
+
"nearest" => QuantileInterpolOptions::Nearest,
|
180
|
+
"linear" => QuantileInterpolOptions::Linear,
|
181
|
+
"midpoint" => QuantileInterpolOptions::Midpoint,
|
182
|
+
v => {
|
183
|
+
return Err(RbValueError::new_err(format!(
|
184
|
+
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
|
185
|
+
v
|
186
|
+
)))
|
187
|
+
}
|
188
|
+
};
|
189
|
+
Ok(Wrap(parsed))
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
impl TryConvert for Wrap<RankMethod> {
|
194
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
195
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
196
|
+
"min" => RankMethod::Min,
|
197
|
+
"max" => RankMethod::Max,
|
198
|
+
"average" => RankMethod::Average,
|
199
|
+
"dense" => RankMethod::Dense,
|
200
|
+
"ordinal" => RankMethod::Ordinal,
|
201
|
+
"random" => RankMethod::Random,
|
202
|
+
v => {
|
203
|
+
return Err(RbValueError::new_err(format!(
|
204
|
+
"method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {}",
|
205
|
+
v
|
206
|
+
)))
|
207
|
+
}
|
208
|
+
};
|
209
|
+
Ok(Wrap(parsed))
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
impl TryConvert for Wrap<TimeUnit> {
|
214
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
215
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
216
|
+
"ns" => TimeUnit::Nanoseconds,
|
217
|
+
"us" => TimeUnit::Microseconds,
|
218
|
+
"ms" => TimeUnit::Milliseconds,
|
219
|
+
v => {
|
220
|
+
return Err(RbValueError::new_err(format!(
|
221
|
+
"time unit must be one of {{'ns', 'us', 'ms'}}, got {}",
|
222
|
+
v
|
223
|
+
)))
|
224
|
+
}
|
225
|
+
};
|
226
|
+
Ok(Wrap(parsed))
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
impl TryConvert for Wrap<UniqueKeepStrategy> {
|
231
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
232
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
233
|
+
"first" => UniqueKeepStrategy::First,
|
234
|
+
"last" => UniqueKeepStrategy::Last,
|
235
|
+
v => {
|
236
|
+
return Err(RbValueError::new_err(format!(
|
237
|
+
"keep must be one of {{'first', 'last'}}, got {}",
|
238
|
+
v
|
239
|
+
)))
|
240
|
+
}
|
241
|
+
};
|
242
|
+
Ok(Wrap(parsed))
|
25
243
|
}
|
26
244
|
}
|
27
245
|
|
@@ -47,30 +265,6 @@ pub fn parse_fill_null_strategy(
|
|
47
265
|
Ok(parsed)
|
48
266
|
}
|
49
267
|
|
50
|
-
pub fn wrap_join_type(ob: &str) -> RbResult<JoinType> {
|
51
|
-
let parsed = match ob {
|
52
|
-
"inner" => JoinType::Inner,
|
53
|
-
"left" => JoinType::Left,
|
54
|
-
"outer" => JoinType::Outer,
|
55
|
-
"semi" => JoinType::Semi,
|
56
|
-
"anti" => JoinType::Anti,
|
57
|
-
// #[cfg(feature = "cross_join")]
|
58
|
-
// "cross" => JoinType::Cross,
|
59
|
-
v => {
|
60
|
-
return Err(RbValueError::new_err(format!(
|
61
|
-
"how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
|
62
|
-
v
|
63
|
-
)))
|
64
|
-
}
|
65
|
-
};
|
66
|
-
Ok(parsed)
|
67
|
-
}
|
68
|
-
|
69
|
-
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
70
|
-
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
71
|
-
Ok(rbdf.df.borrow().clone())
|
72
|
-
}
|
73
|
-
|
74
268
|
pub fn parse_parquet_compression(
|
75
269
|
compression: &str,
|
76
270
|
compression_level: Option<i32>,
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -7,8 +7,9 @@ use std::io::{BufReader, BufWriter, Cursor};
|
|
7
7
|
use std::ops::Deref;
|
8
8
|
use std::path::PathBuf;
|
9
9
|
|
10
|
-
use crate::conversion
|
10
|
+
use crate::conversion::*;
|
11
11
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
12
|
+
use crate::series::to_rbseries_collection;
|
12
13
|
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
14
|
|
14
15
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
@@ -38,6 +39,10 @@ impl RbDataFrame {
|
|
38
39
|
Ok(RbDataFrame::new(df))
|
39
40
|
}
|
40
41
|
|
42
|
+
pub fn estimated_size(&self) -> usize {
|
43
|
+
self.df.borrow().estimated_size()
|
44
|
+
}
|
45
|
+
|
41
46
|
pub fn read_csv(rb_f: Value, has_header: bool) -> RbResult<Self> {
|
42
47
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
43
48
|
let df = CsvReader::new(mmap_bytes_r)
|
@@ -213,6 +218,11 @@ impl RbDataFrame {
|
|
213
218
|
format!("{}", self.df.borrow())
|
214
219
|
}
|
215
220
|
|
221
|
+
pub fn get_columns(&self) -> Vec<RbSeries> {
|
222
|
+
let cols = self.df.borrow().get_columns().clone();
|
223
|
+
to_rbseries_collection(cols)
|
224
|
+
}
|
225
|
+
|
216
226
|
pub fn columns(&self) -> Vec<String> {
|
217
227
|
self.df
|
218
228
|
.borrow()
|
@@ -222,6 +232,14 @@ impl RbDataFrame {
|
|
222
232
|
.collect()
|
223
233
|
}
|
224
234
|
|
235
|
+
pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
|
236
|
+
self.df
|
237
|
+
.borrow_mut()
|
238
|
+
.set_column_names(&names)
|
239
|
+
.map_err(RbPolarsErr::from)?;
|
240
|
+
Ok(())
|
241
|
+
}
|
242
|
+
|
225
243
|
pub fn dtypes(&self) -> Vec<String> {
|
226
244
|
self.df
|
227
245
|
.borrow()
|
@@ -230,6 +248,11 @@ impl RbDataFrame {
|
|
230
248
|
.collect()
|
231
249
|
}
|
232
250
|
|
251
|
+
pub fn n_chunks(&self) -> RbResult<usize> {
|
252
|
+
let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?;
|
253
|
+
Ok(n)
|
254
|
+
}
|
255
|
+
|
233
256
|
pub fn shape(&self) -> (usize, usize) {
|
234
257
|
self.df.borrow().shape()
|
235
258
|
}
|
@@ -258,6 +281,28 @@ impl RbDataFrame {
|
|
258
281
|
.map_err(RbPolarsErr::from)
|
259
282
|
}
|
260
283
|
|
284
|
+
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
285
|
+
let df = self
|
286
|
+
.df
|
287
|
+
.borrow()
|
288
|
+
.select(selection)
|
289
|
+
.map_err(RbPolarsErr::from)?;
|
290
|
+
Ok(RbDataFrame::new(df))
|
291
|
+
}
|
292
|
+
|
293
|
+
pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
294
|
+
let indices = IdxCa::from_vec("", indices);
|
295
|
+
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
296
|
+
Ok(RbDataFrame::new(df))
|
297
|
+
}
|
298
|
+
|
299
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
300
|
+
let binding = indices.series.borrow();
|
301
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
302
|
+
let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
303
|
+
Ok(RbDataFrame::new(df))
|
304
|
+
}
|
305
|
+
|
261
306
|
pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
|
262
307
|
let df = self
|
263
308
|
.df
|
@@ -273,6 +318,38 @@ impl RbDataFrame {
|
|
273
318
|
Ok(RbDataFrame::new(df))
|
274
319
|
}
|
275
320
|
|
321
|
+
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
322
|
+
self.df
|
323
|
+
.borrow_mut()
|
324
|
+
.replace(&column, new_col.series.borrow().clone())
|
325
|
+
.map_err(RbPolarsErr::from)?;
|
326
|
+
Ok(())
|
327
|
+
}
|
328
|
+
|
329
|
+
pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
330
|
+
self.df
|
331
|
+
.borrow_mut()
|
332
|
+
.replace_at_idx(index, new_col.series.borrow().clone())
|
333
|
+
.map_err(RbPolarsErr::from)?;
|
334
|
+
Ok(())
|
335
|
+
}
|
336
|
+
|
337
|
+
pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
338
|
+
self.df
|
339
|
+
.borrow_mut()
|
340
|
+
.insert_at_idx(index, new_col.series.borrow().clone())
|
341
|
+
.map_err(RbPolarsErr::from)?;
|
342
|
+
Ok(())
|
343
|
+
}
|
344
|
+
|
345
|
+
pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
|
346
|
+
let df = self.df.borrow().slice(
|
347
|
+
offset as i64,
|
348
|
+
length.unwrap_or_else(|| self.df.borrow().height()),
|
349
|
+
);
|
350
|
+
df.into()
|
351
|
+
}
|
352
|
+
|
276
353
|
pub fn head(&self, length: Option<usize>) -> Self {
|
277
354
|
self.df.borrow().head(length).into()
|
278
355
|
}
|
@@ -281,6 +358,20 @@ impl RbDataFrame {
|
|
281
358
|
self.df.borrow().tail(length).into()
|
282
359
|
}
|
283
360
|
|
361
|
+
pub fn is_unique(&self) -> RbResult<RbSeries> {
|
362
|
+
let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
|
363
|
+
Ok(mask.into_series().into())
|
364
|
+
}
|
365
|
+
|
366
|
+
pub fn is_duplicated(&self) -> RbResult<RbSeries> {
|
367
|
+
let mask = self
|
368
|
+
.df
|
369
|
+
.borrow()
|
370
|
+
.is_duplicated()
|
371
|
+
.map_err(RbPolarsErr::from)?;
|
372
|
+
Ok(mask.into_series().into())
|
373
|
+
}
|
374
|
+
|
284
375
|
pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
285
376
|
if null_equal {
|
286
377
|
self.df.borrow().frame_equal_missing(&other.df.borrow())
|
@@ -289,16 +380,202 @@ impl RbDataFrame {
|
|
289
380
|
}
|
290
381
|
}
|
291
382
|
|
383
|
+
pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
|
384
|
+
let df = self
|
385
|
+
.df
|
386
|
+
.borrow()
|
387
|
+
.with_row_count(&name, offset)
|
388
|
+
.map_err(RbPolarsErr::from)?;
|
389
|
+
Ok(df.into())
|
390
|
+
}
|
391
|
+
|
392
|
+
pub fn clone(&self) -> Self {
|
393
|
+
RbDataFrame::new(self.df.borrow().clone())
|
394
|
+
}
|
395
|
+
|
396
|
+
pub fn melt(
|
397
|
+
&self,
|
398
|
+
id_vars: Vec<String>,
|
399
|
+
value_vars: Vec<String>,
|
400
|
+
value_name: Option<String>,
|
401
|
+
variable_name: Option<String>,
|
402
|
+
) -> RbResult<Self> {
|
403
|
+
let args = MeltArgs {
|
404
|
+
id_vars,
|
405
|
+
value_vars,
|
406
|
+
value_name,
|
407
|
+
variable_name,
|
408
|
+
};
|
409
|
+
|
410
|
+
let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
|
411
|
+
Ok(RbDataFrame::new(df))
|
412
|
+
}
|
413
|
+
|
414
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
415
|
+
let out = if stable {
|
416
|
+
self.df.borrow().partition_by_stable(groups)
|
417
|
+
} else {
|
418
|
+
self.df.borrow().partition_by(groups)
|
419
|
+
}
|
420
|
+
.map_err(RbPolarsErr::from)?;
|
421
|
+
Ok(out.into_iter().map(|v| RbDataFrame::new(v)).collect())
|
422
|
+
}
|
423
|
+
|
424
|
+
pub fn shift(&self, periods: i64) -> Self {
|
425
|
+
self.df.borrow().shift(periods).into()
|
426
|
+
}
|
427
|
+
|
428
|
+
pub fn unique(
|
429
|
+
&self,
|
430
|
+
maintain_order: bool,
|
431
|
+
subset: Option<Vec<String>>,
|
432
|
+
keep: Wrap<UniqueKeepStrategy>,
|
433
|
+
) -> RbResult<Self> {
|
434
|
+
let subset = subset.as_ref().map(|v| v.as_ref());
|
435
|
+
let df = match maintain_order {
|
436
|
+
true => self.df.borrow().unique_stable(subset, keep.0),
|
437
|
+
false => self.df.borrow().unique(subset, keep.0),
|
438
|
+
}
|
439
|
+
.map_err(RbPolarsErr::from)?;
|
440
|
+
Ok(df.into())
|
441
|
+
}
|
442
|
+
|
292
443
|
pub fn lazy(&self) -> RbLazyFrame {
|
293
444
|
self.df.borrow().clone().lazy().into()
|
294
445
|
}
|
295
446
|
|
447
|
+
pub fn max(&self) -> Self {
|
448
|
+
self.df.borrow().max().into()
|
449
|
+
}
|
450
|
+
|
451
|
+
pub fn min(&self) -> Self {
|
452
|
+
self.df.borrow().min().into()
|
453
|
+
}
|
454
|
+
|
455
|
+
pub fn sum(&self) -> Self {
|
456
|
+
self.df.borrow().sum().into()
|
457
|
+
}
|
458
|
+
|
296
459
|
pub fn mean(&self) -> Self {
|
297
460
|
self.df.borrow().mean().into()
|
298
461
|
}
|
299
462
|
|
463
|
+
pub fn std(&self, ddof: u8) -> Self {
|
464
|
+
self.df.borrow().std(ddof).into()
|
465
|
+
}
|
466
|
+
|
467
|
+
pub fn var(&self, ddof: u8) -> Self {
|
468
|
+
self.df.borrow().var(ddof).into()
|
469
|
+
}
|
470
|
+
|
471
|
+
pub fn median(&self) -> Self {
|
472
|
+
self.df.borrow().median().into()
|
473
|
+
}
|
474
|
+
|
475
|
+
pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
476
|
+
let s = self
|
477
|
+
.df
|
478
|
+
.borrow()
|
479
|
+
.hmean(null_strategy.0)
|
480
|
+
.map_err(RbPolarsErr::from)?;
|
481
|
+
Ok(s.map(|s| s.into()))
|
482
|
+
}
|
483
|
+
|
484
|
+
pub fn hmax(&self) -> RbResult<Option<RbSeries>> {
|
485
|
+
let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?;
|
486
|
+
Ok(s.map(|s| s.into()))
|
487
|
+
}
|
488
|
+
|
489
|
+
pub fn hmin(&self) -> RbResult<Option<RbSeries>> {
|
490
|
+
let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
|
491
|
+
Ok(s.map(|s| s.into()))
|
492
|
+
}
|
493
|
+
|
494
|
+
pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
495
|
+
let s = self
|
496
|
+
.df
|
497
|
+
.borrow()
|
498
|
+
.hsum(null_strategy.0)
|
499
|
+
.map_err(RbPolarsErr::from)?;
|
500
|
+
Ok(s.map(|s| s.into()))
|
501
|
+
}
|
502
|
+
|
503
|
+
pub fn quantile(
|
504
|
+
&self,
|
505
|
+
quantile: f64,
|
506
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
507
|
+
) -> RbResult<Self> {
|
508
|
+
let df = self
|
509
|
+
.df
|
510
|
+
.borrow()
|
511
|
+
.quantile(quantile, interpolation.0)
|
512
|
+
.map_err(RbPolarsErr::from)?;
|
513
|
+
Ok(df.into())
|
514
|
+
}
|
515
|
+
|
516
|
+
pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
|
517
|
+
let df = match columns {
|
518
|
+
Some(cols) => self
|
519
|
+
.df
|
520
|
+
.borrow()
|
521
|
+
.columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
|
522
|
+
None => self.df.borrow().to_dummies(),
|
523
|
+
}
|
524
|
+
.map_err(RbPolarsErr::from)?;
|
525
|
+
Ok(df.into())
|
526
|
+
}
|
527
|
+
|
300
528
|
pub fn null_count(&self) -> Self {
|
301
529
|
let df = self.df.borrow().null_count();
|
302
530
|
df.into()
|
303
531
|
}
|
532
|
+
|
533
|
+
pub fn shrink_to_fit(&self) {
|
534
|
+
self.df.borrow_mut().shrink_to_fit();
|
535
|
+
}
|
536
|
+
|
537
|
+
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
538
|
+
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
539
|
+
if include_header {
|
540
|
+
let s = Utf8Chunked::from_iter_values(
|
541
|
+
&names,
|
542
|
+
self.df.borrow().get_columns().iter().map(|s| s.name()),
|
543
|
+
)
|
544
|
+
.into_series();
|
545
|
+
df.insert_at_idx(0, s).unwrap();
|
546
|
+
}
|
547
|
+
Ok(df.into())
|
548
|
+
}
|
549
|
+
|
550
|
+
pub fn upsample(
|
551
|
+
&self,
|
552
|
+
by: Vec<String>,
|
553
|
+
index_column: String,
|
554
|
+
every: String,
|
555
|
+
offset: String,
|
556
|
+
stable: bool,
|
557
|
+
) -> RbResult<Self> {
|
558
|
+
let out = if stable {
|
559
|
+
self.df.borrow().upsample_stable(
|
560
|
+
by,
|
561
|
+
&index_column,
|
562
|
+
Duration::parse(&every),
|
563
|
+
Duration::parse(&offset),
|
564
|
+
)
|
565
|
+
} else {
|
566
|
+
self.df.borrow().upsample(
|
567
|
+
by,
|
568
|
+
&index_column,
|
569
|
+
Duration::parse(&every),
|
570
|
+
Duration::parse(&offset),
|
571
|
+
)
|
572
|
+
};
|
573
|
+
let out = out.map_err(RbPolarsErr::from)?;
|
574
|
+
Ok(out.into())
|
575
|
+
}
|
576
|
+
|
577
|
+
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
578
|
+
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
579
|
+
Ok(df.into())
|
580
|
+
}
|
304
581
|
}
|