polars-df 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +90 -45
- data/README.md +1 -0
- data/ext/polars/Cargo.toml +8 -6
- data/ext/polars/src/batched_csv.rs +3 -1
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +18 -7
- data/ext/polars/src/dataframe.rs +40 -14
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +7 -2
- data/ext/polars/src/expr/general.rs +22 -3
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/string.rs +3 -3
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +18 -3
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/lazyframe/mod.rs +58 -19
- data/ext/polars/src/lib.rs +23 -14
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/mod.rs +12 -2
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/data_frame.rb +148 -74
- data/lib/polars/date_time_expr.rb +10 -4
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/expr.rb +37 -34
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/io.rb +18 -6
- data/lib/polars/lazy_frame.rb +39 -36
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/series.rb +12 -10
- data/lib/polars/string_expr.rb +1 -0
- data/lib/polars/utils.rb +54 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +1 -2
- metadata +4 -5
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -267,7 +267,7 @@ impl RbExpr {
|
|
267
267
|
pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self {
|
268
268
|
self.clone()
|
269
269
|
.inner
|
270
|
-
.
|
270
|
+
.sort(SortOptions {
|
271
271
|
descending,
|
272
272
|
nulls_last,
|
273
273
|
multithreaded: true,
|
@@ -323,9 +323,28 @@ impl RbExpr {
|
|
323
323
|
self.clone().inner.gather(idx.inner.clone()).into()
|
324
324
|
}
|
325
325
|
|
326
|
-
pub fn sort_by(
|
326
|
+
pub fn sort_by(
|
327
|
+
&self,
|
328
|
+
by: RArray,
|
329
|
+
descending: Vec<bool>,
|
330
|
+
nulls_last: bool,
|
331
|
+
multithreaded: bool,
|
332
|
+
maintain_order: bool,
|
333
|
+
) -> RbResult<Self> {
|
327
334
|
let by = rb_exprs_to_exprs(by)?;
|
328
|
-
Ok(self
|
335
|
+
Ok(self
|
336
|
+
.clone()
|
337
|
+
.inner
|
338
|
+
.sort_by(
|
339
|
+
by,
|
340
|
+
SortMultipleOptions {
|
341
|
+
descending,
|
342
|
+
nulls_last,
|
343
|
+
multithreaded,
|
344
|
+
maintain_order,
|
345
|
+
},
|
346
|
+
)
|
347
|
+
.into())
|
329
348
|
}
|
330
349
|
|
331
350
|
pub fn backward_fill(&self, limit: FillNullLimit) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -51,8 +51,12 @@ impl RbExpr {
|
|
51
51
|
.into()
|
52
52
|
}
|
53
53
|
|
54
|
-
pub fn list_get(&self, index: &RbExpr) -> Self {
|
55
|
-
self.inner
|
54
|
+
pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
55
|
+
self.inner
|
56
|
+
.clone()
|
57
|
+
.list()
|
58
|
+
.get(index.inner.clone(), null_on_oob)
|
59
|
+
.into()
|
56
60
|
}
|
57
61
|
|
58
62
|
pub fn list_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
|
@@ -244,12 +244,12 @@ impl RbExpr {
|
|
244
244
|
.into()
|
245
245
|
}
|
246
246
|
|
247
|
-
pub fn str_to_integer(&self, base:
|
247
|
+
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
248
248
|
self.inner
|
249
249
|
.clone()
|
250
250
|
.str()
|
251
|
-
.to_integer(base, strict)
|
252
|
-
.with_fmt("str.
|
251
|
+
.to_integer(base.inner.clone(), strict)
|
252
|
+
.with_fmt("str.to_integer")
|
253
253
|
.into()
|
254
254
|
}
|
255
255
|
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,20 +1,167 @@
|
|
1
|
-
use magnus::{exception, prelude::*, Error, RString, Value};
|
2
|
-
use polars::io::mmap::MmapBytesReader;
|
3
1
|
use std::fs::File;
|
4
|
-
use std::io
|
2
|
+
use std::io;
|
3
|
+
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
|
5
4
|
use std::path::PathBuf;
|
6
5
|
|
6
|
+
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::mmap::MmapBytesReader;
|
8
|
+
|
9
|
+
use crate::error::RbPolarsErr;
|
10
|
+
use crate::prelude::resolve_homedir;
|
7
11
|
use crate::RbResult;
|
8
12
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
#[derive(Clone)]
|
14
|
+
pub struct RbFileLikeObject {
|
15
|
+
inner: Value,
|
16
|
+
}
|
17
|
+
|
18
|
+
/// Wraps a `Value`, and implements read, seek, and write for it.
|
19
|
+
impl RbFileLikeObject {
|
20
|
+
/// Creates an instance of a `RbFileLikeObject` from a `Value`.
|
21
|
+
/// To assert the object has the required methods methods,
|
22
|
+
/// instantiate it with `RbFileLikeObject::require`
|
23
|
+
pub fn new(object: Value) -> Self {
|
24
|
+
RbFileLikeObject { inner: object }
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
|
28
|
+
let data = self.as_file_buffer().into_inner();
|
29
|
+
std::io::Cursor::new(data)
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
33
|
+
let bytes = self
|
34
|
+
.inner
|
35
|
+
.funcall::<_, _, RString>("read", ())
|
36
|
+
.expect("no read method found");
|
37
|
+
|
38
|
+
let buf = unsafe { bytes.as_slice() }.to_vec();
|
39
|
+
|
40
|
+
Cursor::new(buf)
|
41
|
+
}
|
42
|
+
|
43
|
+
/// Same as `RbFileLikeObject::new`, but validates that the underlying
|
44
|
+
/// ruby object has a `read`, `write`, and `seek` methods in respect to parameters.
|
45
|
+
/// Will return a `TypeError` if object does not have `read`, `seek`, and `write` methods.
|
46
|
+
pub fn with_requirements(object: Value, read: bool, write: bool, seek: bool) -> RbResult<Self> {
|
47
|
+
if read && !object.respond_to("read", false)? {
|
48
|
+
return Err(Error::new(
|
49
|
+
exception::type_error(),
|
50
|
+
"Object does not have a .read() method.",
|
51
|
+
));
|
52
|
+
}
|
53
|
+
|
54
|
+
if seek && !object.respond_to("seek", false)? {
|
55
|
+
return Err(Error::new(
|
56
|
+
exception::type_error(),
|
57
|
+
"Object does not have a .seek() method.",
|
58
|
+
));
|
59
|
+
}
|
60
|
+
|
61
|
+
if write && !object.respond_to("write", false)? {
|
62
|
+
return Err(Error::new(
|
63
|
+
exception::type_error(),
|
64
|
+
"Object does not have a .write() method.",
|
65
|
+
));
|
66
|
+
}
|
67
|
+
|
68
|
+
Ok(RbFileLikeObject::new(object))
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
/// Extracts a string repr from, and returns an IO error to send back to rust.
|
73
|
+
fn rberr_to_io_err(e: Error) -> io::Error {
|
74
|
+
io::Error::new(io::ErrorKind::Other, e.to_string())
|
75
|
+
}
|
76
|
+
|
77
|
+
impl Read for RbFileLikeObject {
|
78
|
+
fn read(&mut self, mut buf: &mut [u8]) -> Result<usize, io::Error> {
|
79
|
+
let bytes = self
|
80
|
+
.inner
|
81
|
+
.funcall::<_, _, RString>("read", (buf.len(),))
|
82
|
+
.map_err(rberr_to_io_err)?;
|
83
|
+
|
84
|
+
buf.write_all(unsafe { bytes.as_slice() })?;
|
85
|
+
|
86
|
+
Ok(bytes.len())
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
impl Write for RbFileLikeObject {
|
91
|
+
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
92
|
+
let rbbytes = RString::from_slice(buf);
|
93
|
+
|
94
|
+
let number_bytes_written = self
|
95
|
+
.inner
|
96
|
+
.funcall::<_, _, usize>("write", (rbbytes,))
|
97
|
+
.map_err(rberr_to_io_err)?;
|
98
|
+
|
99
|
+
Ok(number_bytes_written)
|
100
|
+
}
|
101
|
+
|
102
|
+
fn flush(&mut self) -> Result<(), io::Error> {
|
103
|
+
self.inner
|
104
|
+
.funcall::<_, _, Value>("flush", ())
|
105
|
+
.map_err(rberr_to_io_err)?;
|
106
|
+
|
107
|
+
Ok(())
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
impl Seek for RbFileLikeObject {
|
112
|
+
fn seek(&mut self, pos: SeekFrom) -> Result<u64, io::Error> {
|
113
|
+
let (whence, offset) = match pos {
|
114
|
+
SeekFrom::Start(i) => (0, i as i64),
|
115
|
+
SeekFrom::Current(i) => (1, i),
|
116
|
+
SeekFrom::End(i) => (2, i),
|
117
|
+
};
|
118
|
+
|
119
|
+
let new_position = self
|
120
|
+
.inner
|
121
|
+
.funcall("seek", (offset, whence))
|
122
|
+
.map_err(rberr_to_io_err)?;
|
123
|
+
|
124
|
+
Ok(new_position)
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
pub trait FileLike: Read + Write + Seek {}
|
129
|
+
|
130
|
+
impl FileLike for File {}
|
131
|
+
impl FileLike for RbFileLikeObject {}
|
132
|
+
|
133
|
+
pub enum EitherRustRubyFile {
|
134
|
+
Rb(RbFileLikeObject),
|
135
|
+
Rust(BufReader<File>),
|
136
|
+
}
|
137
|
+
|
138
|
+
///
|
139
|
+
/// # Arguments
|
140
|
+
/// * `truncate` - open or create a new file.
|
141
|
+
pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFile> {
|
142
|
+
if let Ok(rstring) = RString::try_convert(rb_f) {
|
143
|
+
let s = unsafe { rstring.as_str() }?;
|
144
|
+
let file_path = std::path::Path::new(&s);
|
145
|
+
let file_path = resolve_homedir(file_path);
|
146
|
+
let f = if truncate {
|
147
|
+
File::create(file_path).map_err(RbPolarsErr::io)?
|
148
|
+
} else {
|
149
|
+
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
150
|
+
};
|
151
|
+
let reader = BufReader::new(f);
|
152
|
+
Ok(EitherRustRubyFile::Rust(reader))
|
14
153
|
} else {
|
15
|
-
|
16
|
-
|
17
|
-
|
154
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
155
|
+
Ok(EitherRustRubyFile::Rb(f))
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
|
160
|
+
use EitherRustRubyFile::*;
|
161
|
+
match get_either_file(f, truncate)? {
|
162
|
+
Rb(f) => Ok(Box::new(f)),
|
163
|
+
Rust(f) => Ok(Box::new(f.into_inner())),
|
164
|
+
}
|
18
165
|
}
|
19
166
|
|
20
167
|
pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
|
@@ -55,9 +55,24 @@ pub fn rolling_cov(
|
|
55
55
|
.into()
|
56
56
|
}
|
57
57
|
|
58
|
-
pub fn arg_sort_by(
|
58
|
+
pub fn arg_sort_by(
|
59
|
+
by: RArray,
|
60
|
+
descending: Vec<bool>,
|
61
|
+
nulls_last: bool,
|
62
|
+
multithreaded: bool,
|
63
|
+
maintain_order: bool,
|
64
|
+
) -> RbResult<RbExpr> {
|
59
65
|
let by = rb_exprs_to_exprs(by)?;
|
60
|
-
Ok(dsl::arg_sort_by(
|
66
|
+
Ok(dsl::arg_sort_by(
|
67
|
+
by,
|
68
|
+
SortMultipleOptions {
|
69
|
+
descending,
|
70
|
+
nulls_last,
|
71
|
+
multithreaded,
|
72
|
+
maintain_order,
|
73
|
+
},
|
74
|
+
)
|
75
|
+
.into())
|
61
76
|
}
|
62
77
|
|
63
78
|
pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
@@ -324,6 +339,6 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
324
339
|
}
|
325
340
|
|
326
341
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
327
|
-
let expr = polars::sql::sql_expr(
|
342
|
+
let expr = polars::sql::sql_expr(sql).map_err(RbPolarsErr::from)?;
|
328
343
|
Ok(expr.into())
|
329
344
|
}
|
@@ -2,42 +2,72 @@ use polars::lazy::dsl;
|
|
2
2
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
|
+
pub fn when(condition: &RbExpr) -> RbWhen {
|
6
|
+
RbWhen {
|
7
|
+
inner: dsl::when(condition.inner.clone()),
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
5
11
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
12
|
#[derive(Clone)]
|
7
13
|
pub struct RbWhen {
|
8
14
|
pub inner: dsl::When,
|
9
15
|
}
|
10
16
|
|
11
|
-
|
12
|
-
fn from(inner: dsl::When) -> Self {
|
13
|
-
RbWhen { inner }
|
14
|
-
}
|
15
|
-
}
|
16
|
-
|
17
|
-
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
17
|
+
#[magnus::wrap(class = "Polars::RbThen")]
|
18
18
|
#[derive(Clone)]
|
19
19
|
pub struct RbThen {
|
20
20
|
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
#[magnus::wrap(class = "Polars::RbChainedWhen")]
|
24
|
+
#[derive(Clone)]
|
25
|
+
pub struct RbChainedWhen {
|
26
|
+
pub inner: dsl::ChainedWhen,
|
27
|
+
}
|
28
|
+
|
29
|
+
#[magnus::wrap(class = "Polars::RbChainedThen")]
|
30
|
+
#[derive(Clone)]
|
31
|
+
pub struct RbChainedThen {
|
32
|
+
pub inner: dsl::ChainedThen,
|
27
33
|
}
|
28
34
|
|
29
35
|
impl RbWhen {
|
30
|
-
pub fn then(&self,
|
31
|
-
|
36
|
+
pub fn then(&self, statement: &RbExpr) -> RbThen {
|
37
|
+
RbThen {
|
38
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
39
|
+
}
|
32
40
|
}
|
33
41
|
}
|
34
42
|
|
35
43
|
impl RbThen {
|
36
|
-
pub fn
|
37
|
-
|
44
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
45
|
+
RbChainedWhen {
|
46
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
51
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
38
52
|
}
|
39
53
|
}
|
40
54
|
|
41
|
-
|
42
|
-
|
55
|
+
impl RbChainedWhen {
|
56
|
+
pub fn then(&self, statement: &RbExpr) -> RbChainedThen {
|
57
|
+
RbChainedThen {
|
58
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
impl RbChainedThen {
|
64
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
65
|
+
RbChainedWhen {
|
66
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
71
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
72
|
+
}
|
43
73
|
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
|
-
use polars::io::RowIndex;
|
2
|
+
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
@@ -100,13 +100,13 @@ impl RbLazyFrame {
|
|
100
100
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
101
101
|
let try_parse_dates = bool::try_convert(arguments[18])?;
|
102
102
|
let eol_char = String::try_convert(arguments[19])?;
|
103
|
+
let truncate_ragged_lines = bool::try_convert(arguments[20])?;
|
103
104
|
// end arguments
|
104
105
|
|
105
106
|
let null_values = null_values.map(|w| w.0);
|
106
107
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
107
108
|
let separator = separator.as_bytes()[0];
|
108
109
|
let eol_char = eol_char.as_bytes()[0];
|
109
|
-
|
110
110
|
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
111
111
|
|
112
112
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
@@ -115,6 +115,7 @@ impl RbLazyFrame {
|
|
115
115
|
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
116
116
|
.collect::<Schema>()
|
117
117
|
});
|
118
|
+
|
118
119
|
let r = LazyCsvReader::new(path)
|
119
120
|
.with_infer_schema_length(infer_schema_length)
|
120
121
|
.with_separator(separator)
|
@@ -124,6 +125,7 @@ impl RbLazyFrame {
|
|
124
125
|
.with_n_rows(n_rows)
|
125
126
|
.with_cache(cache)
|
126
127
|
.with_dtype_overwrite(overwrite_dtype.as_ref())
|
128
|
+
// TODO add with_schema
|
127
129
|
.low_memory(low_memory)
|
128
130
|
.with_comment_prefix(comment_prefix.as_deref())
|
129
131
|
.with_quote_char(quote_char)
|
@@ -133,7 +135,9 @@ impl RbLazyFrame {
|
|
133
135
|
.with_encoding(encoding.0)
|
134
136
|
.with_row_index(row_index)
|
135
137
|
.with_try_parse_dates(try_parse_dates)
|
136
|
-
.with_null_values(null_values)
|
138
|
+
.with_null_values(null_values)
|
139
|
+
// TODO add with_missing_is_null
|
140
|
+
.truncate_ragged_lines(truncate_ragged_lines);
|
137
141
|
|
138
142
|
if let Some(_lambda) = with_schema_modify {
|
139
143
|
todo!();
|
@@ -144,7 +148,8 @@ impl RbLazyFrame {
|
|
144
148
|
|
145
149
|
#[allow(clippy::too_many_arguments)]
|
146
150
|
pub fn new_from_parquet(
|
147
|
-
path:
|
151
|
+
path: Option<PathBuf>,
|
152
|
+
paths: Vec<PathBuf>,
|
148
153
|
n_rows: Option<usize>,
|
149
154
|
cache: bool,
|
150
155
|
parallel: Wrap<ParallelStrategy>,
|
@@ -153,21 +158,43 @@ impl RbLazyFrame {
|
|
153
158
|
low_memory: bool,
|
154
159
|
use_statistics: bool,
|
155
160
|
hive_partitioning: bool,
|
161
|
+
hive_schema: Option<Wrap<Schema>>,
|
156
162
|
) -> RbResult<Self> {
|
163
|
+
let parallel = parallel.0;
|
164
|
+
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
165
|
+
|
166
|
+
let first_path = if let Some(path) = &path {
|
167
|
+
path
|
168
|
+
} else {
|
169
|
+
paths
|
170
|
+
.first()
|
171
|
+
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
172
|
+
};
|
173
|
+
|
157
174
|
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
175
|
+
let hive_options = HiveOptions {
|
176
|
+
enabled: hive_partitioning,
|
177
|
+
schema: hive_schema,
|
178
|
+
};
|
179
|
+
|
158
180
|
let args = ScanArgsParquet {
|
159
181
|
n_rows,
|
160
182
|
cache,
|
161
|
-
parallel
|
183
|
+
parallel,
|
162
184
|
rechunk,
|
163
185
|
row_index,
|
164
186
|
low_memory,
|
165
|
-
// TODO support cloud options
|
166
187
|
cloud_options: None,
|
167
188
|
use_statistics,
|
168
|
-
|
189
|
+
hive_options,
|
169
190
|
};
|
170
|
-
|
191
|
+
|
192
|
+
let lf = if path.is_some() {
|
193
|
+
LazyFrame::scan_parquet(first_path, args)
|
194
|
+
} else {
|
195
|
+
LazyFrame::scan_parquet_files(Arc::from(paths), args)
|
196
|
+
}
|
197
|
+
.map_err(RbPolarsErr::from)?;
|
171
198
|
Ok(lf.into())
|
172
199
|
}
|
173
200
|
|
@@ -185,7 +212,8 @@ impl RbLazyFrame {
|
|
185
212
|
cache,
|
186
213
|
rechunk,
|
187
214
|
row_index,
|
188
|
-
|
215
|
+
memory_map,
|
216
|
+
cloud_options: None,
|
189
217
|
};
|
190
218
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
191
219
|
Ok(lf.into())
|
@@ -242,17 +270,18 @@ impl RbLazyFrame {
|
|
242
270
|
pub fn sort(
|
243
271
|
&self,
|
244
272
|
by_column: String,
|
245
|
-
|
273
|
+
descending: bool,
|
246
274
|
nulls_last: bool,
|
247
275
|
maintain_order: bool,
|
276
|
+
multithreaded: bool,
|
248
277
|
) -> Self {
|
249
278
|
let ldf = self.ldf.clone();
|
250
279
|
ldf.sort(
|
251
|
-
&by_column,
|
252
|
-
|
253
|
-
descending:
|
280
|
+
[&by_column],
|
281
|
+
SortMultipleOptions {
|
282
|
+
descending: vec![descending],
|
254
283
|
nulls_last,
|
255
|
-
multithreaded
|
284
|
+
multithreaded,
|
256
285
|
maintain_order,
|
257
286
|
},
|
258
287
|
)
|
@@ -261,15 +290,24 @@ impl RbLazyFrame {
|
|
261
290
|
|
262
291
|
pub fn sort_by_exprs(
|
263
292
|
&self,
|
264
|
-
|
265
|
-
|
293
|
+
by: RArray,
|
294
|
+
descending: Vec<bool>,
|
266
295
|
nulls_last: bool,
|
267
296
|
maintain_order: bool,
|
297
|
+
multithreaded: bool,
|
268
298
|
) -> RbResult<Self> {
|
269
299
|
let ldf = self.ldf.clone();
|
270
|
-
let exprs = rb_exprs_to_exprs(
|
300
|
+
let exprs = rb_exprs_to_exprs(by)?;
|
271
301
|
Ok(ldf
|
272
|
-
.sort_by_exprs(
|
302
|
+
.sort_by_exprs(
|
303
|
+
exprs,
|
304
|
+
SortMultipleOptions {
|
305
|
+
descending,
|
306
|
+
nulls_last,
|
307
|
+
maintain_order,
|
308
|
+
multithreaded,
|
309
|
+
},
|
310
|
+
)
|
273
311
|
.into())
|
274
312
|
}
|
275
313
|
|
@@ -326,6 +364,7 @@ impl RbLazyFrame {
|
|
326
364
|
Ok(())
|
327
365
|
}
|
328
366
|
|
367
|
+
#[allow(clippy::too_many_arguments)]
|
329
368
|
pub fn sink_csv(
|
330
369
|
&self,
|
331
370
|
path: PathBuf,
|
@@ -427,7 +466,7 @@ impl RbLazyFrame {
|
|
427
466
|
let closed_window = closed.0;
|
428
467
|
let ldf = self.ldf.clone();
|
429
468
|
let by = rb_exprs_to_exprs(by)?;
|
430
|
-
let lazy_gb = ldf.
|
469
|
+
let lazy_gb = ldf.rolling(
|
431
470
|
index_column.inner.clone(),
|
432
471
|
by,
|
433
472
|
RollingGroupOptions {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -23,7 +23,7 @@ use error::{RbPolarsErr, RbTypeError, RbValueError};
|
|
23
23
|
use expr::rb_exprs_to_exprs;
|
24
24
|
use expr::RbExpr;
|
25
25
|
use functions::string_cache::RbStringCacheHolder;
|
26
|
-
use functions::whenthen::{RbThen, RbWhen};
|
26
|
+
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
27
27
|
use lazyframe::RbLazyFrame;
|
28
28
|
use lazygroupby::RbLazyGroupBy;
|
29
29
|
use magnus::{define_module, function, method, prelude::*, Error, Ruby};
|
@@ -74,7 +74,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
74
74
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
75
75
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
76
76
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
77
|
-
class.define_method("write_parquet", method!(RbDataFrame::write_parquet,
|
77
|
+
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
78
78
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
79
79
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
80
80
|
class.define_method("div", method!(RbDataFrame::div, 1))?;
|
@@ -213,7 +213,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
213
213
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
214
214
|
class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
|
215
215
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
216
|
-
class.define_method("sort_by", method!(RbExpr::sort_by,
|
216
|
+
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
217
217
|
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
218
218
|
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
219
219
|
class.define_method("shift", method!(RbExpr::shift, 2))?;
|
@@ -312,7 +312,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
312
312
|
class.define_method("arr_reverse", method!(RbExpr::arr_reverse, 0))?;
|
313
313
|
class.define_method("arr_arg_min", method!(RbExpr::arr_arg_min, 0))?;
|
314
314
|
class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?;
|
315
|
-
class.define_method("arr_get", method!(RbExpr::arr_get,
|
315
|
+
class.define_method("arr_get", method!(RbExpr::arr_get, 2))?;
|
316
316
|
class.define_method("arr_join", method!(RbExpr::arr_join, 2))?;
|
317
317
|
class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
|
318
318
|
class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?;
|
@@ -406,7 +406,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
406
406
|
class.define_method("dt_cast_time_unit", method!(RbExpr::dt_cast_time_unit, 1))?;
|
407
407
|
class.define_method(
|
408
408
|
"dt_replace_time_zone",
|
409
|
-
method!(RbExpr::dt_replace_time_zone,
|
409
|
+
method!(RbExpr::dt_replace_time_zone, 3),
|
410
410
|
)?;
|
411
411
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
412
412
|
class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
|
@@ -448,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
448
448
|
class.define_method("list_sort", method!(RbExpr::list_sort, 1))?;
|
449
449
|
class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
|
450
450
|
class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
|
451
|
-
class.define_method("list_get", method!(RbExpr::list_get,
|
451
|
+
class.define_method("list_get", method!(RbExpr::list_get, 2))?;
|
452
452
|
class.define_method("list_join", method!(RbExpr::list_join, 2))?;
|
453
453
|
class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
|
454
454
|
class.define_method("list_arg_max", method!(RbExpr::list_arg_max, 0))?;
|
@@ -554,7 +554,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
554
554
|
class.define_singleton_method("arctan2d", function!(functions::lazy::arctan2d, 2))?;
|
555
555
|
class.define_singleton_method("rolling_corr", function!(functions::lazy::rolling_corr, 5))?;
|
556
556
|
class.define_singleton_method("rolling_cov", function!(functions::lazy::rolling_cov, 5))?;
|
557
|
-
class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by,
|
557
|
+
class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by, 5))?;
|
558
558
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
559
559
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
560
560
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
@@ -689,7 +689,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
689
689
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
690
690
|
class.define_singleton_method(
|
691
691
|
"new_from_parquet",
|
692
|
-
function!(RbLazyFrame::new_from_parquet,
|
692
|
+
function!(RbLazyFrame::new_from_parquet, 11),
|
693
693
|
)?;
|
694
694
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
695
695
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -702,8 +702,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
702
702
|
"optimization_toggle",
|
703
703
|
method!(RbLazyFrame::optimization_toggle, 9),
|
704
704
|
)?;
|
705
|
-
class.define_method("sort", method!(RbLazyFrame::sort,
|
706
|
-
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs,
|
705
|
+
class.define_method("sort", method!(RbLazyFrame::sort, 5))?;
|
706
|
+
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
707
707
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
708
708
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
709
709
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
@@ -835,7 +835,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
835
835
|
class.define_method("mul", method!(RbSeries::mul, 1))?;
|
836
836
|
class.define_method("div", method!(RbSeries::div, 1))?;
|
837
837
|
class.define_method("rem", method!(RbSeries::rem, 1))?;
|
838
|
-
class.define_method("sort", method!(RbSeries::sort,
|
838
|
+
class.define_method("sort", method!(RbSeries::sort, 3))?;
|
839
839
|
class.define_method("value_counts", method!(RbSeries::value_counts, 1))?;
|
840
840
|
class.define_method("any", method!(RbSeries::any, 1))?;
|
841
841
|
class.define_method("all", method!(RbSeries::all, 1))?;
|
@@ -1032,11 +1032,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1032
1032
|
// extra
|
1033
1033
|
class.define_method("extend_constant", method!(RbSeries::extend_constant, 2))?;
|
1034
1034
|
|
1035
|
+
// when then
|
1035
1036
|
let class = module.define_class("RbWhen", ruby.class_object())?;
|
1036
|
-
class.define_method("
|
1037
|
+
class.define_method("then", method!(RbWhen::then, 1))?;
|
1037
1038
|
|
1038
|
-
let class = module.define_class("
|
1039
|
-
class.define_method("
|
1039
|
+
let class = module.define_class("RbThen", ruby.class_object())?;
|
1040
|
+
class.define_method("when", method!(RbThen::when, 1))?;
|
1041
|
+
class.define_method("otherwise", method!(RbThen::otherwise, 1))?;
|
1042
|
+
|
1043
|
+
let class = module.define_class("RbChainedWhen", ruby.class_object())?;
|
1044
|
+
class.define_method("then", method!(RbChainedWhen::then, 1))?;
|
1045
|
+
|
1046
|
+
let class = module.define_class("RbChainedThen", ruby.class_object())?;
|
1047
|
+
class.define_method("when", method!(RbChainedThen::when, 1))?;
|
1048
|
+
class.define_method("otherwise", method!(RbChainedThen::otherwise, 1))?;
|
1040
1049
|
|
1041
1050
|
// sql
|
1042
1051
|
let class = module.define_class("RbSQLContext", ruby.class_object())?;
|