polars-df 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +336 -42
- data/ext/polars/src/dataframe.rs +409 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +436 -10
- data/ext/polars/src/lazy/dsl.rs +1134 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +2 -0
- data/ext/polars/src/lib.rs +390 -3
- data/ext/polars/src/series.rs +175 -13
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +892 -21
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +503 -0
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +338 -6
- data/lib/polars/lazy_functions.rb +158 -11
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +1304 -14
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +60 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -1
- metadata +13 -2
@@ -0,0 +1,41 @@
|
|
1
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
2
|
+
|
3
|
+
impl RbExpr {
|
4
|
+
pub fn meta_pop(&self) -> Vec<RbExpr> {
|
5
|
+
self.inner
|
6
|
+
.clone()
|
7
|
+
.meta()
|
8
|
+
.pop()
|
9
|
+
.into_iter()
|
10
|
+
.map(RbExpr::from)
|
11
|
+
.collect()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn meta_eq(&self, other: &RbExpr) -> bool {
|
15
|
+
self.inner == other.inner
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn meta_roots(&self) -> Vec<String> {
|
19
|
+
self.inner
|
20
|
+
.clone()
|
21
|
+
.meta()
|
22
|
+
.root_names()
|
23
|
+
.iter()
|
24
|
+
.map(|name| name.to_string())
|
25
|
+
.collect()
|
26
|
+
}
|
27
|
+
|
28
|
+
pub fn meta_output_name(&self) -> RbResult<String> {
|
29
|
+
let name = self
|
30
|
+
.inner
|
31
|
+
.clone()
|
32
|
+
.meta()
|
33
|
+
.output_name()
|
34
|
+
.map_err(RbPolarsErr::from)?;
|
35
|
+
Ok(name.to_string())
|
36
|
+
}
|
37
|
+
|
38
|
+
pub fn meta_undo_aliases(&self) -> RbExpr {
|
39
|
+
self.inner.clone().meta().undo_aliases().into()
|
40
|
+
}
|
41
|
+
}
|
data/ext/polars/src/lazy/mod.rs
CHANGED
data/ext/polars/src/lib.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
mod batched_csv;
|
1
2
|
mod conversion;
|
2
3
|
mod dataframe;
|
3
4
|
mod error;
|
@@ -5,14 +6,18 @@ mod file;
|
|
5
6
|
mod lazy;
|
6
7
|
mod series;
|
7
8
|
|
8
|
-
use
|
9
|
+
use batched_csv::RbBatchedCsv;
|
10
|
+
use conversion::*;
|
9
11
|
use dataframe::RbDataFrame;
|
10
12
|
use error::{RbPolarsErr, RbValueError};
|
13
|
+
use file::get_file_like;
|
11
14
|
use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
|
12
15
|
use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
|
13
16
|
use magnus::{
|
14
|
-
define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RModule,
|
17
|
+
define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
|
18
|
+
Value,
|
15
19
|
};
|
20
|
+
use polars::datatypes::DataType;
|
16
21
|
use polars::error::PolarsResult;
|
17
22
|
use polars::frame::DataFrame;
|
18
23
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
@@ -34,37 +39,92 @@ fn init() -> RbResult<()> {
|
|
34
39
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
35
40
|
module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
|
36
41
|
module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
|
42
|
+
module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
|
43
|
+
module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
|
44
|
+
module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
|
45
|
+
|
46
|
+
let class = module.define_class("RbBatchedCsv", Default::default())?;
|
47
|
+
class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
|
48
|
+
class.define_method("next_batches", method!(RbBatchedCsv::next_batches, 1))?;
|
37
49
|
|
38
50
|
let class = module.define_class("RbDataFrame", Default::default())?;
|
39
51
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
40
|
-
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv,
|
52
|
+
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
41
53
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 1))?;
|
54
|
+
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
42
55
|
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
43
56
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
44
57
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
58
|
+
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
45
59
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
46
60
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
47
61
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
62
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
48
63
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
49
64
|
class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
|
50
65
|
class.define_method("to_s", method!(RbDataFrame::to_s, 0))?;
|
66
|
+
class.define_method("get_columns", method!(RbDataFrame::get_columns, 0))?;
|
51
67
|
class.define_method("columns", method!(RbDataFrame::columns, 0))?;
|
68
|
+
class.define_method(
|
69
|
+
"set_column_names",
|
70
|
+
method!(RbDataFrame::set_column_names, 1),
|
71
|
+
)?;
|
52
72
|
class.define_method("dtypes", method!(RbDataFrame::dtypes, 0))?;
|
73
|
+
class.define_method("n_chunks", method!(RbDataFrame::n_chunks, 0))?;
|
53
74
|
class.define_method("shape", method!(RbDataFrame::shape, 0))?;
|
54
75
|
class.define_method("height", method!(RbDataFrame::height, 0))?;
|
55
76
|
class.define_method("width", method!(RbDataFrame::width, 0))?;
|
56
77
|
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
57
78
|
class.define_method("column", method!(RbDataFrame::column, 1))?;
|
79
|
+
class.define_method("select", method!(RbDataFrame::select, 1))?;
|
80
|
+
class.define_method("take", method!(RbDataFrame::take, 1))?;
|
81
|
+
class.define_method(
|
82
|
+
"take_with_series",
|
83
|
+
method!(RbDataFrame::take_with_series, 1),
|
84
|
+
)?;
|
58
85
|
class.define_method("sort", method!(RbDataFrame::sort, 3))?;
|
86
|
+
class.define_method("replace", method!(RbDataFrame::replace, 2))?;
|
87
|
+
class.define_method("replace_at_idx", method!(RbDataFrame::replace_at_idx, 2))?;
|
88
|
+
class.define_method("insert_at_idx", method!(RbDataFrame::insert_at_idx, 2))?;
|
89
|
+
class.define_method("slice", method!(RbDataFrame::slice, 2))?;
|
59
90
|
class.define_method("head", method!(RbDataFrame::head, 1))?;
|
60
91
|
class.define_method("tail", method!(RbDataFrame::tail, 1))?;
|
92
|
+
class.define_method("is_unique", method!(RbDataFrame::is_unique, 0))?;
|
93
|
+
class.define_method("is_duplicated", method!(RbDataFrame::is_duplicated, 0))?;
|
61
94
|
class.define_method("frame_equal", method!(RbDataFrame::frame_equal, 2))?;
|
95
|
+
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
96
|
+
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
97
|
+
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
98
|
+
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
99
|
+
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
100
|
+
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
62
101
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
102
|
+
class.define_method("max", method!(RbDataFrame::max, 0))?;
|
103
|
+
class.define_method("min", method!(RbDataFrame::min, 0))?;
|
104
|
+
class.define_method("sum", method!(RbDataFrame::sum, 0))?;
|
63
105
|
class.define_method("mean", method!(RbDataFrame::mean, 0))?;
|
106
|
+
class.define_method("std", method!(RbDataFrame::std, 1))?;
|
107
|
+
class.define_method("var", method!(RbDataFrame::var, 1))?;
|
108
|
+
class.define_method("median", method!(RbDataFrame::median, 0))?;
|
109
|
+
class.define_method("hmean", method!(RbDataFrame::hmean, 1))?;
|
110
|
+
class.define_method("hmax", method!(RbDataFrame::hmax, 0))?;
|
111
|
+
class.define_method("hmin", method!(RbDataFrame::hmin, 0))?;
|
112
|
+
class.define_method("hsum", method!(RbDataFrame::hsum, 1))?;
|
113
|
+
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
114
|
+
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
|
64
115
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
116
|
+
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
117
|
+
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
118
|
+
class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
|
119
|
+
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
65
120
|
|
66
121
|
let class = module.define_class("RbExpr", Default::default())?;
|
122
|
+
class.define_method("+", method!(RbExpr::add, 1))?;
|
123
|
+
class.define_method("-", method!(RbExpr::sub, 1))?;
|
67
124
|
class.define_method("*", method!(RbExpr::mul, 1))?;
|
125
|
+
class.define_method("/", method!(RbExpr::truediv, 1))?;
|
126
|
+
class.define_method("%", method!(RbExpr::_mod, 1))?;
|
127
|
+
class.define_method("floordiv", method!(RbExpr::floordiv, 1))?;
|
68
128
|
class.define_method("to_str", method!(RbExpr::to_str, 0))?;
|
69
129
|
class.define_method("eq", method!(RbExpr::eq, 1))?;
|
70
130
|
class.define_method("neq", method!(RbExpr::neq, 1))?;
|
@@ -76,20 +136,43 @@ fn init() -> RbResult<()> {
|
|
76
136
|
class.define_method("is_not", method!(RbExpr::is_not, 0))?;
|
77
137
|
class.define_method("is_null", method!(RbExpr::is_null, 0))?;
|
78
138
|
class.define_method("is_not_null", method!(RbExpr::is_not_null, 0))?;
|
139
|
+
class.define_method("is_infinite", method!(RbExpr::is_infinite, 0))?;
|
140
|
+
class.define_method("is_finite", method!(RbExpr::is_finite, 0))?;
|
141
|
+
class.define_method("is_nan", method!(RbExpr::is_nan, 0))?;
|
142
|
+
class.define_method("is_not_nan", method!(RbExpr::is_not_nan, 0))?;
|
79
143
|
class.define_method("min", method!(RbExpr::min, 0))?;
|
80
144
|
class.define_method("max", method!(RbExpr::max, 0))?;
|
145
|
+
class.define_method("nan_max", method!(RbExpr::nan_max, 0))?;
|
146
|
+
class.define_method("nan_min", method!(RbExpr::nan_min, 0))?;
|
81
147
|
class.define_method("mean", method!(RbExpr::mean, 0))?;
|
82
148
|
class.define_method("median", method!(RbExpr::median, 0))?;
|
83
149
|
class.define_method("sum", method!(RbExpr::sum, 0))?;
|
84
150
|
class.define_method("n_unique", method!(RbExpr::n_unique, 0))?;
|
151
|
+
class.define_method("arg_unique", method!(RbExpr::arg_unique, 0))?;
|
85
152
|
class.define_method("unique", method!(RbExpr::unique, 0))?;
|
86
153
|
class.define_method("unique_stable", method!(RbExpr::unique_stable, 0))?;
|
87
154
|
class.define_method("first", method!(RbExpr::first, 0))?;
|
88
155
|
class.define_method("last", method!(RbExpr::last, 0))?;
|
89
156
|
class.define_method("list", method!(RbExpr::list, 0))?;
|
157
|
+
class.define_method("quantile", method!(RbExpr::quantile, 2))?;
|
158
|
+
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
|
90
159
|
class.define_method("count", method!(RbExpr::count, 0))?;
|
160
|
+
class.define_method("value_counts", method!(RbExpr::value_counts, 2))?;
|
161
|
+
class.define_method("unique_counts", method!(RbExpr::unique_counts, 0))?;
|
162
|
+
class.define_method("null_count", method!(RbExpr::null_count, 0))?;
|
163
|
+
class.define_method("cast", method!(RbExpr::cast, 2))?;
|
91
164
|
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
165
|
+
class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
|
166
|
+
class.define_method("top_k", method!(RbExpr::top_k, 2))?;
|
167
|
+
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
168
|
+
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
169
|
+
class.define_method("search_sorted", method!(RbExpr::search_sorted, 1))?;
|
170
|
+
class.define_method("take", method!(RbExpr::take, 1))?;
|
92
171
|
class.define_method("sort_by", method!(RbExpr::sort_by, 2))?;
|
172
|
+
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
173
|
+
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
174
|
+
class.define_method("shift", method!(RbExpr::shift, 1))?;
|
175
|
+
class.define_method("shift_and_fill", method!(RbExpr::shift_and_fill, 2))?;
|
93
176
|
class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
|
94
177
|
class.define_method(
|
95
178
|
"fill_null_with_strategy",
|
@@ -102,39 +185,276 @@ fn init() -> RbResult<()> {
|
|
102
185
|
class.define_method("reverse", method!(RbExpr::reverse, 0))?;
|
103
186
|
class.define_method("std", method!(RbExpr::std, 1))?;
|
104
187
|
class.define_method("var", method!(RbExpr::var, 1))?;
|
188
|
+
class.define_method("is_unique", method!(RbExpr::is_unique, 0))?;
|
189
|
+
class.define_method("is_first", method!(RbExpr::is_first, 0))?;
|
190
|
+
class.define_method("explode", method!(RbExpr::explode, 0))?;
|
191
|
+
class.define_method("take_every", method!(RbExpr::take_every, 1))?;
|
105
192
|
class.define_method("tail", method!(RbExpr::tail, 1))?;
|
106
193
|
class.define_method("head", method!(RbExpr::head, 1))?;
|
194
|
+
class.define_method("slice", method!(RbExpr::slice, 2))?;
|
195
|
+
class.define_method("append", method!(RbExpr::append, 2))?;
|
196
|
+
class.define_method("rechunk", method!(RbExpr::rechunk, 0))?;
|
197
|
+
class.define_method("round", method!(RbExpr::round, 1))?;
|
198
|
+
class.define_method("floor", method!(RbExpr::floor, 0))?;
|
199
|
+
class.define_method("ceil", method!(RbExpr::ceil, 0))?;
|
200
|
+
class.define_method("clip", method!(RbExpr::clip, 2))?;
|
201
|
+
class.define_method("clip_min", method!(RbExpr::clip_min, 1))?;
|
202
|
+
class.define_method("clip_max", method!(RbExpr::clip_max, 1))?;
|
203
|
+
class.define_method("abs", method!(RbExpr::abs, 0))?;
|
204
|
+
class.define_method("sin", method!(RbExpr::sin, 0))?;
|
205
|
+
class.define_method("cos", method!(RbExpr::cos, 0))?;
|
206
|
+
class.define_method("tan", method!(RbExpr::tan, 0))?;
|
207
|
+
class.define_method("arcsin", method!(RbExpr::arcsin, 0))?;
|
208
|
+
class.define_method("arccos", method!(RbExpr::arccos, 0))?;
|
209
|
+
class.define_method("arctan", method!(RbExpr::arctan, 0))?;
|
210
|
+
class.define_method("sinh", method!(RbExpr::sinh, 0))?;
|
211
|
+
class.define_method("cosh", method!(RbExpr::cosh, 0))?;
|
212
|
+
class.define_method("tanh", method!(RbExpr::tanh, 0))?;
|
213
|
+
class.define_method("arcsinh", method!(RbExpr::arcsinh, 0))?;
|
214
|
+
class.define_method("arccosh", method!(RbExpr::arccosh, 0))?;
|
215
|
+
class.define_method("arctanh", method!(RbExpr::arctanh, 0))?;
|
216
|
+
class.define_method("sign", method!(RbExpr::sign, 0))?;
|
217
|
+
class.define_method("is_duplicated", method!(RbExpr::is_duplicated, 0))?;
|
107
218
|
class.define_method("over", method!(RbExpr::over, 1))?;
|
108
219
|
class.define_method("_and", method!(RbExpr::_and, 1))?;
|
109
220
|
class.define_method("_xor", method!(RbExpr::_xor, 1))?;
|
110
221
|
class.define_method("_or", method!(RbExpr::_or, 1))?;
|
222
|
+
class.define_method("is_in", method!(RbExpr::is_in, 1))?;
|
223
|
+
class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?;
|
224
|
+
class.define_method("pow", method!(RbExpr::pow, 1))?;
|
225
|
+
class.define_method("cumsum", method!(RbExpr::cumsum, 1))?;
|
226
|
+
class.define_method("cummax", method!(RbExpr::cummax, 1))?;
|
227
|
+
class.define_method("cummin", method!(RbExpr::cummin, 1))?;
|
228
|
+
class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
|
111
229
|
class.define_method("product", method!(RbExpr::product, 0))?;
|
230
|
+
class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
|
231
|
+
class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 3))?;
|
232
|
+
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 3))?;
|
233
|
+
class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 3))?;
|
234
|
+
class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
|
235
|
+
class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
|
236
|
+
class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
|
237
|
+
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
238
|
+
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
239
|
+
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
112
240
|
class.define_method("str_lengths", method!(RbExpr::str_lengths, 0))?;
|
241
|
+
class.define_method("str_n_chars", method!(RbExpr::str_n_chars, 0))?;
|
242
|
+
class.define_method("str_replace", method!(RbExpr::str_replace, 3))?;
|
243
|
+
class.define_method("str_replace_all", method!(RbExpr::str_replace_all, 3))?;
|
244
|
+
class.define_method("str_zfill", method!(RbExpr::str_zfill, 1))?;
|
245
|
+
class.define_method("str_ljust", method!(RbExpr::str_ljust, 2))?;
|
246
|
+
class.define_method("str_rjust", method!(RbExpr::str_rjust, 2))?;
|
113
247
|
class.define_method("str_contains", method!(RbExpr::str_contains, 2))?;
|
248
|
+
class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
|
249
|
+
class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
|
250
|
+
class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
|
251
|
+
class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
|
252
|
+
class.define_method("count_match", method!(RbExpr::count_match, 1))?;
|
253
|
+
class.define_method("strftime", method!(RbExpr::strftime, 1))?;
|
254
|
+
class.define_method("str_split", method!(RbExpr::str_split, 1))?;
|
255
|
+
class.define_method(
|
256
|
+
"str_split_inclusive",
|
257
|
+
method!(RbExpr::str_split_inclusive, 1),
|
258
|
+
)?;
|
259
|
+
class.define_method("str_split_exact", method!(RbExpr::str_split_exact, 2))?;
|
260
|
+
class.define_method(
|
261
|
+
"str_split_exact_inclusive",
|
262
|
+
method!(RbExpr::str_split_exact_inclusive, 2),
|
263
|
+
)?;
|
264
|
+
class.define_method("str_splitn", method!(RbExpr::str_splitn, 2))?;
|
265
|
+
class.define_method("arr_lengths", method!(RbExpr::arr_lengths, 0))?;
|
266
|
+
class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
|
267
|
+
class.define_method("year", method!(RbExpr::year, 0))?;
|
268
|
+
class.define_method("iso_year", method!(RbExpr::iso_year, 0))?;
|
269
|
+
class.define_method("quarter", method!(RbExpr::quarter, 0))?;
|
270
|
+
class.define_method("month", method!(RbExpr::month, 0))?;
|
271
|
+
class.define_method("week", method!(RbExpr::week, 0))?;
|
272
|
+
class.define_method("weekday", method!(RbExpr::weekday, 0))?;
|
273
|
+
class.define_method("day", method!(RbExpr::day, 0))?;
|
274
|
+
class.define_method("ordinal_day", method!(RbExpr::ordinal_day, 0))?;
|
275
|
+
class.define_method("hour", method!(RbExpr::hour, 0))?;
|
276
|
+
class.define_method("minute", method!(RbExpr::minute, 0))?;
|
277
|
+
class.define_method("second", method!(RbExpr::second, 0))?;
|
278
|
+
class.define_method("millisecond", method!(RbExpr::millisecond, 0))?;
|
279
|
+
class.define_method("microsecond", method!(RbExpr::microsecond, 0))?;
|
280
|
+
class.define_method("nanosecond", method!(RbExpr::nanosecond, 0))?;
|
281
|
+
class.define_method("duration_days", method!(RbExpr::duration_days, 0))?;
|
282
|
+
class.define_method("duration_hours", method!(RbExpr::duration_hours, 0))?;
|
283
|
+
class.define_method("duration_minutes", method!(RbExpr::duration_minutes, 0))?;
|
284
|
+
class.define_method("duration_seconds", method!(RbExpr::duration_seconds, 0))?;
|
285
|
+
class.define_method(
|
286
|
+
"duration_nanoseconds",
|
287
|
+
method!(RbExpr::duration_nanoseconds, 0),
|
288
|
+
)?;
|
289
|
+
class.define_method(
|
290
|
+
"duration_microseconds",
|
291
|
+
method!(RbExpr::duration_microseconds, 0),
|
292
|
+
)?;
|
293
|
+
class.define_method(
|
294
|
+
"duration_milliseconds",
|
295
|
+
method!(RbExpr::duration_milliseconds, 0),
|
296
|
+
)?;
|
297
|
+
class.define_method("timestamp", method!(RbExpr::timestamp, 1))?;
|
298
|
+
class.define_method("dt_offset_by", method!(RbExpr::dt_offset_by, 1))?;
|
299
|
+
class.define_method("dt_epoch_seconds", method!(RbExpr::dt_epoch_seconds, 0))?;
|
300
|
+
class.define_method("dt_with_time_unit", method!(RbExpr::dt_with_time_unit, 1))?;
|
301
|
+
class.define_method("dt_with_time_zone", method!(RbExpr::dt_with_time_zone, 1))?;
|
302
|
+
class.define_method("dt_cast_time_unit", method!(RbExpr::dt_cast_time_unit, 1))?;
|
303
|
+
class.define_method("dt_cast_time_zone", method!(RbExpr::dt_cast_time_zone, 1))?;
|
304
|
+
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
305
|
+
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
306
|
+
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
307
|
+
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
308
|
+
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
114
309
|
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
115
310
|
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
311
|
+
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
116
312
|
class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
|
313
|
+
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
|
314
|
+
class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
|
315
|
+
class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
|
316
|
+
class.define_method("rolling_mean", method!(RbExpr::rolling_mean, 6))?;
|
317
|
+
class.define_method("rolling_std", method!(RbExpr::rolling_std, 6))?;
|
318
|
+
class.define_method("rolling_var", method!(RbExpr::rolling_var, 6))?;
|
319
|
+
class.define_method("rolling_median", method!(RbExpr::rolling_median, 6))?;
|
320
|
+
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 8))?;
|
321
|
+
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
|
322
|
+
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
323
|
+
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
324
|
+
class.define_method("lst_max", method!(RbExpr::lst_max, 0))?;
|
325
|
+
class.define_method("lst_min", method!(RbExpr::lst_min, 0))?;
|
326
|
+
class.define_method("lst_sum", method!(RbExpr::lst_sum, 0))?;
|
327
|
+
class.define_method("lst_mean", method!(RbExpr::lst_mean, 0))?;
|
328
|
+
class.define_method("lst_sort", method!(RbExpr::lst_sort, 1))?;
|
329
|
+
class.define_method("lst_reverse", method!(RbExpr::lst_reverse, 0))?;
|
330
|
+
class.define_method("lst_unique", method!(RbExpr::lst_unique, 0))?;
|
331
|
+
class.define_method("lst_get", method!(RbExpr::lst_get, 1))?;
|
332
|
+
class.define_method("lst_join", method!(RbExpr::lst_join, 1))?;
|
333
|
+
class.define_method("lst_arg_min", method!(RbExpr::lst_arg_min, 0))?;
|
334
|
+
class.define_method("lst_arg_max", method!(RbExpr::lst_arg_max, 0))?;
|
335
|
+
class.define_method("lst_diff", method!(RbExpr::lst_diff, 2))?;
|
336
|
+
class.define_method("lst_shift", method!(RbExpr::lst_shift, 1))?;
|
337
|
+
class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
|
338
|
+
class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
|
339
|
+
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
340
|
+
class.define_method("rank", method!(RbExpr::rank, 2))?;
|
341
|
+
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
342
|
+
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
343
|
+
class.define_method("skew", method!(RbExpr::skew, 1))?;
|
344
|
+
class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?;
|
345
|
+
class.define_method("str_concat", method!(RbExpr::str_concat, 1))?;
|
346
|
+
class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?;
|
347
|
+
class.define_method("reshape", method!(RbExpr::reshape, 1))?;
|
348
|
+
class.define_method("cumcount", method!(RbExpr::cumcount, 1))?;
|
349
|
+
class.define_method("to_physical", method!(RbExpr::to_physical, 0))?;
|
350
|
+
class.define_method("shuffle", method!(RbExpr::shuffle, 1))?;
|
351
|
+
class.define_method("sample_n", method!(RbExpr::sample_n, 4))?;
|
352
|
+
class.define_method("sample_frac", method!(RbExpr::sample_frac, 4))?;
|
353
|
+
class.define_method("ewm_mean", method!(RbExpr::ewm_mean, 3))?;
|
354
|
+
class.define_method("ewm_std", method!(RbExpr::ewm_std, 4))?;
|
355
|
+
class.define_method("ewm_var", method!(RbExpr::ewm_var, 4))?;
|
356
|
+
class.define_method("any", method!(RbExpr::any, 0))?;
|
357
|
+
class.define_method("all", method!(RbExpr::all, 0))?;
|
358
|
+
class.define_method(
|
359
|
+
"struct_field_by_name",
|
360
|
+
method!(RbExpr::struct_field_by_name, 1),
|
361
|
+
)?;
|
362
|
+
class.define_method(
|
363
|
+
"struct_field_by_index",
|
364
|
+
method!(RbExpr::struct_field_by_index, 1),
|
365
|
+
)?;
|
366
|
+
class.define_method(
|
367
|
+
"struct_rename_fields",
|
368
|
+
method!(RbExpr::struct_rename_fields, 1),
|
369
|
+
)?;
|
370
|
+
class.define_method("log", method!(RbExpr::log, 1))?;
|
371
|
+
class.define_method("exp", method!(RbExpr::exp, 0))?;
|
372
|
+
|
373
|
+
// meta
|
374
|
+
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
375
|
+
class.define_method("meta_eq", method!(RbExpr::meta_eq, 1))?;
|
376
|
+
class.define_method("meta_roots", method!(RbExpr::meta_roots, 0))?;
|
377
|
+
class.define_method("meta_output_name", method!(RbExpr::meta_output_name, 0))?;
|
378
|
+
class.define_method("meta_undo_aliases", method!(RbExpr::meta_undo_aliases, 0))?;
|
117
379
|
|
118
380
|
// maybe add to different class
|
119
381
|
class.define_singleton_method("col", function!(crate::lazy::dsl::col, 1))?;
|
382
|
+
class.define_singleton_method("count", function!(crate::lazy::dsl::count, 0))?;
|
383
|
+
class.define_singleton_method("first", function!(crate::lazy::dsl::first, 0))?;
|
384
|
+
class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
|
385
|
+
class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
|
386
|
+
class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
|
120
387
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
121
388
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
122
389
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
390
|
+
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
391
|
+
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
123
392
|
|
124
393
|
let class = module.define_class("RbLazyFrame", Default::default())?;
|
394
|
+
class.define_singleton_method(
|
395
|
+
"new_from_ndjson",
|
396
|
+
function!(RbLazyFrame::new_from_ndjson, 7),
|
397
|
+
)?;
|
398
|
+
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
399
|
+
class.define_singleton_method(
|
400
|
+
"new_from_parquet",
|
401
|
+
function!(RbLazyFrame::new_from_parquet, 7),
|
402
|
+
)?;
|
403
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
404
|
+
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
405
|
+
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
406
|
+
class.define_method(
|
407
|
+
"describe_optimized_plan",
|
408
|
+
method!(RbLazyFrame::describe_optimized_plan, 0),
|
409
|
+
)?;
|
125
410
|
class.define_method(
|
126
411
|
"optimization_toggle",
|
127
412
|
method!(RbLazyFrame::optimization_toggle, 7),
|
128
413
|
)?;
|
414
|
+
class.define_method("sort", method!(RbLazyFrame::sort, 3))?;
|
415
|
+
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
|
416
|
+
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
129
417
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
418
|
+
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
130
419
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
131
420
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
132
421
|
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
422
|
+
class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
|
423
|
+
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
|
133
424
|
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
134
425
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
426
|
+
class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
|
427
|
+
class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
|
428
|
+
class.define_method("shift", method!(RbLazyFrame::shift, 1))?;
|
429
|
+
class.define_method("shift_and_fill", method!(RbLazyFrame::shift_and_fill, 2))?;
|
430
|
+
class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
|
431
|
+
class.define_method("min", method!(RbLazyFrame::min, 0))?;
|
432
|
+
class.define_method("max", method!(RbLazyFrame::max, 0))?;
|
433
|
+
class.define_method("sum", method!(RbLazyFrame::sum, 0))?;
|
434
|
+
class.define_method("mean", method!(RbLazyFrame::mean, 0))?;
|
435
|
+
class.define_method("std", method!(RbLazyFrame::std, 1))?;
|
436
|
+
class.define_method("var", method!(RbLazyFrame::var, 1))?;
|
437
|
+
class.define_method("median", method!(RbLazyFrame::median, 0))?;
|
438
|
+
class.define_method("quantile", method!(RbLazyFrame::quantile, 2))?;
|
439
|
+
class.define_method("explode", method!(RbLazyFrame::explode, 1))?;
|
440
|
+
class.define_method("unique", method!(RbLazyFrame::unique, 3))?;
|
441
|
+
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
442
|
+
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
443
|
+
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
444
|
+
class.define_method("melt", method!(RbLazyFrame::melt, 4))?;
|
445
|
+
class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
|
446
|
+
class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
|
447
|
+
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
448
|
+
class.define_method("columns", method!(RbLazyFrame::columns, 0))?;
|
449
|
+
class.define_method("dtypes", method!(RbLazyFrame::dtypes, 0))?;
|
450
|
+
class.define_method("schema", method!(RbLazyFrame::schema, 0))?;
|
451
|
+
class.define_method("unnest", method!(RbLazyFrame::unnest, 1))?;
|
452
|
+
class.define_method("width", method!(RbLazyFrame::width, 0))?;
|
135
453
|
|
136
454
|
let class = module.define_class("RbLazyGroupBy", Default::default())?;
|
137
455
|
class.define_method("agg", method!(RbLazyGroupBy::agg, 1))?;
|
456
|
+
class.define_method("head", method!(RbLazyGroupBy::head, 1))?;
|
457
|
+
class.define_method("tail", method!(RbLazyGroupBy::tail, 1))?;
|
138
458
|
|
139
459
|
let class = module.define_class("RbSeries", Default::default())?;
|
140
460
|
class.define_singleton_method("new_opt_bool", function!(RbSeries::new_opt_bool, 3))?;
|
@@ -149,7 +469,15 @@ fn init() -> RbResult<()> {
|
|
149
469
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
150
470
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
151
471
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
472
|
+
class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
|
473
|
+
class.define_method(
|
474
|
+
"is_sorted_reverse_flag",
|
475
|
+
method!(RbSeries::is_sorted_reverse_flag, 0),
|
476
|
+
)?;
|
477
|
+
class.define_method("estimated_size", method!(RbSeries::estimated_size, 0))?;
|
478
|
+
class.define_method("get_fmt", method!(RbSeries::get_fmt, 2))?;
|
152
479
|
class.define_method("rechunk", method!(RbSeries::rechunk, 1))?;
|
480
|
+
class.define_method("get_idx", method!(RbSeries::get_idx, 1))?;
|
153
481
|
class.define_method("bitand", method!(RbSeries::bitand, 1))?;
|
154
482
|
class.define_method("bitor", method!(RbSeries::bitor, 1))?;
|
155
483
|
class.define_method("bitxor", method!(RbSeries::bitxor, 1))?;
|
@@ -194,11 +522,28 @@ fn init() -> RbResult<()> {
|
|
194
522
|
class.define_method("len", method!(RbSeries::len, 0))?;
|
195
523
|
class.define_method("to_a", method!(RbSeries::to_a, 0))?;
|
196
524
|
class.define_method("median", method!(RbSeries::median, 0))?;
|
525
|
+
class.define_method("quantile", method!(RbSeries::quantile, 2))?;
|
526
|
+
class.define_method("_clone", method!(RbSeries::clone, 0))?;
|
527
|
+
class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
|
528
|
+
class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
|
529
|
+
class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
|
530
|
+
class.define_method("peak_min", method!(RbSeries::peak_min, 0))?;
|
531
|
+
class.define_method("n_unique", method!(RbSeries::n_unique, 0))?;
|
532
|
+
class.define_method("floor", method!(RbSeries::floor, 0))?;
|
533
|
+
class.define_method("shrink_to_fit", method!(RbSeries::shrink_to_fit, 0))?;
|
534
|
+
class.define_method("dot", method!(RbSeries::dot, 1))?;
|
535
|
+
class.define_method("skew", method!(RbSeries::skew, 1))?;
|
536
|
+
class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
|
537
|
+
class.define_method("cast", method!(RbSeries::cast, 2))?;
|
538
|
+
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
197
539
|
// rest
|
198
540
|
class.define_method("cumsum", method!(RbSeries::cumsum, 1))?;
|
199
541
|
class.define_method("cummax", method!(RbSeries::cummax, 1))?;
|
200
542
|
class.define_method("cummin", method!(RbSeries::cummin, 1))?;
|
543
|
+
class.define_method("cumprod", method!(RbSeries::cumprod, 1))?;
|
201
544
|
class.define_method("slice", method!(RbSeries::slice, 2))?;
|
545
|
+
class.define_method("ceil", method!(RbSeries::ceil, 0))?;
|
546
|
+
class.define_method("round", method!(RbSeries::round, 1))?;
|
202
547
|
|
203
548
|
let class = module.define_class("RbWhen", Default::default())?;
|
204
549
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
@@ -254,3 +599,45 @@ fn rb_hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
254
599
|
let df = hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
255
600
|
Ok(df.into())
|
256
601
|
}
|
602
|
+
|
603
|
+
fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
604
|
+
let mut iter = seq.each();
|
605
|
+
let first = iter.next().unwrap()?;
|
606
|
+
|
607
|
+
let mut s = get_series(first)?;
|
608
|
+
|
609
|
+
for res in iter {
|
610
|
+
let item = res?;
|
611
|
+
let item = get_series(item)?;
|
612
|
+
s.append(&item).map_err(RbPolarsErr::from)?;
|
613
|
+
}
|
614
|
+
Ok(s.into())
|
615
|
+
}
|
616
|
+
|
617
|
+
fn ipc_schema(rb_f: Value) -> RbResult<Value> {
|
618
|
+
use polars::export::arrow::io::ipc::read::read_file_metadata;
|
619
|
+
let mut r = get_file_like(rb_f, false)?;
|
620
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
621
|
+
|
622
|
+
let dict = RHash::new();
|
623
|
+
for field in metadata.schema.fields {
|
624
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
625
|
+
dict.aset(field.name, dt)?;
|
626
|
+
}
|
627
|
+
Ok(dict.into())
|
628
|
+
}
|
629
|
+
|
630
|
+
fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
631
|
+
use polars::export::arrow::io::parquet::read::{infer_schema, read_metadata};
|
632
|
+
|
633
|
+
let mut r = get_file_like(rb_f, false)?;
|
634
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
635
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::arrow)?;
|
636
|
+
|
637
|
+
let dict = RHash::new();
|
638
|
+
for field in arrow_schema.fields {
|
639
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
640
|
+
dict.aset(field.name, dt)?;
|
641
|
+
}
|
642
|
+
Ok(dict.into())
|
643
|
+
}
|