polars-df 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +90 -45
- data/README.md +1 -0
- data/ext/polars/Cargo.toml +8 -6
- data/ext/polars/src/batched_csv.rs +3 -1
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +18 -7
- data/ext/polars/src/dataframe.rs +40 -14
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +7 -2
- data/ext/polars/src/expr/general.rs +22 -3
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/string.rs +3 -3
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +18 -3
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/lazyframe/mod.rs +58 -19
- data/ext/polars/src/lib.rs +23 -14
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/mod.rs +12 -2
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/data_frame.rb +148 -74
- data/lib/polars/date_time_expr.rb +10 -4
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/expr.rb +37 -34
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/io.rb +18 -6
- data/lib/polars/lazy_frame.rb +39 -36
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/series.rb +12 -10
- data/lib/polars/string_expr.rb +1 -0
- data/lib/polars/utils.rb +54 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +1 -2
- metadata +4 -5
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -267,7 +267,7 @@ impl RbExpr {
|
|
267
267
|
pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self {
|
268
268
|
self.clone()
|
269
269
|
.inner
|
270
|
-
.
|
270
|
+
.sort(SortOptions {
|
271
271
|
descending,
|
272
272
|
nulls_last,
|
273
273
|
multithreaded: true,
|
@@ -323,9 +323,28 @@ impl RbExpr {
|
|
323
323
|
self.clone().inner.gather(idx.inner.clone()).into()
|
324
324
|
}
|
325
325
|
|
326
|
-
pub fn sort_by(
|
326
|
+
pub fn sort_by(
|
327
|
+
&self,
|
328
|
+
by: RArray,
|
329
|
+
descending: Vec<bool>,
|
330
|
+
nulls_last: bool,
|
331
|
+
multithreaded: bool,
|
332
|
+
maintain_order: bool,
|
333
|
+
) -> RbResult<Self> {
|
327
334
|
let by = rb_exprs_to_exprs(by)?;
|
328
|
-
Ok(self
|
335
|
+
Ok(self
|
336
|
+
.clone()
|
337
|
+
.inner
|
338
|
+
.sort_by(
|
339
|
+
by,
|
340
|
+
SortMultipleOptions {
|
341
|
+
descending,
|
342
|
+
nulls_last,
|
343
|
+
multithreaded,
|
344
|
+
maintain_order,
|
345
|
+
},
|
346
|
+
)
|
347
|
+
.into())
|
329
348
|
}
|
330
349
|
|
331
350
|
pub fn backward_fill(&self, limit: FillNullLimit) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -51,8 +51,12 @@ impl RbExpr {
|
|
51
51
|
.into()
|
52
52
|
}
|
53
53
|
|
54
|
-
pub fn list_get(&self, index: &RbExpr) -> Self {
|
55
|
-
self.inner
|
54
|
+
pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
55
|
+
self.inner
|
56
|
+
.clone()
|
57
|
+
.list()
|
58
|
+
.get(index.inner.clone(), null_on_oob)
|
59
|
+
.into()
|
56
60
|
}
|
57
61
|
|
58
62
|
pub fn list_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
|
@@ -244,12 +244,12 @@ impl RbExpr {
|
|
244
244
|
.into()
|
245
245
|
}
|
246
246
|
|
247
|
-
pub fn str_to_integer(&self, base:
|
247
|
+
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
248
248
|
self.inner
|
249
249
|
.clone()
|
250
250
|
.str()
|
251
|
-
.to_integer(base, strict)
|
252
|
-
.with_fmt("str.
|
251
|
+
.to_integer(base.inner.clone(), strict)
|
252
|
+
.with_fmt("str.to_integer")
|
253
253
|
.into()
|
254
254
|
}
|
255
255
|
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,20 +1,167 @@
|
|
1
|
-
use magnus::{exception, prelude::*, Error, RString, Value};
|
2
|
-
use polars::io::mmap::MmapBytesReader;
|
3
1
|
use std::fs::File;
|
4
|
-
use std::io
|
2
|
+
use std::io;
|
3
|
+
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
|
5
4
|
use std::path::PathBuf;
|
6
5
|
|
6
|
+
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::mmap::MmapBytesReader;
|
8
|
+
|
9
|
+
use crate::error::RbPolarsErr;
|
10
|
+
use crate::prelude::resolve_homedir;
|
7
11
|
use crate::RbResult;
|
8
12
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
#[derive(Clone)]
|
14
|
+
pub struct RbFileLikeObject {
|
15
|
+
inner: Value,
|
16
|
+
}
|
17
|
+
|
18
|
+
/// Wraps a `Value`, and implements read, seek, and write for it.
|
19
|
+
impl RbFileLikeObject {
|
20
|
+
/// Creates an instance of a `RbFileLikeObject` from a `Value`.
|
21
|
+
/// To assert the object has the required methods methods,
|
22
|
+
/// instantiate it with `RbFileLikeObject::require`
|
23
|
+
pub fn new(object: Value) -> Self {
|
24
|
+
RbFileLikeObject { inner: object }
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
|
28
|
+
let data = self.as_file_buffer().into_inner();
|
29
|
+
std::io::Cursor::new(data)
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
|
33
|
+
let bytes = self
|
34
|
+
.inner
|
35
|
+
.funcall::<_, _, RString>("read", ())
|
36
|
+
.expect("no read method found");
|
37
|
+
|
38
|
+
let buf = unsafe { bytes.as_slice() }.to_vec();
|
39
|
+
|
40
|
+
Cursor::new(buf)
|
41
|
+
}
|
42
|
+
|
43
|
+
/// Same as `RbFileLikeObject::new`, but validates that the underlying
|
44
|
+
/// ruby object has a `read`, `write`, and `seek` methods in respect to parameters.
|
45
|
+
/// Will return a `TypeError` if object does not have `read`, `seek`, and `write` methods.
|
46
|
+
pub fn with_requirements(object: Value, read: bool, write: bool, seek: bool) -> RbResult<Self> {
|
47
|
+
if read && !object.respond_to("read", false)? {
|
48
|
+
return Err(Error::new(
|
49
|
+
exception::type_error(),
|
50
|
+
"Object does not have a .read() method.",
|
51
|
+
));
|
52
|
+
}
|
53
|
+
|
54
|
+
if seek && !object.respond_to("seek", false)? {
|
55
|
+
return Err(Error::new(
|
56
|
+
exception::type_error(),
|
57
|
+
"Object does not have a .seek() method.",
|
58
|
+
));
|
59
|
+
}
|
60
|
+
|
61
|
+
if write && !object.respond_to("write", false)? {
|
62
|
+
return Err(Error::new(
|
63
|
+
exception::type_error(),
|
64
|
+
"Object does not have a .write() method.",
|
65
|
+
));
|
66
|
+
}
|
67
|
+
|
68
|
+
Ok(RbFileLikeObject::new(object))
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
/// Extracts a string repr from, and returns an IO error to send back to rust.
|
73
|
+
fn rberr_to_io_err(e: Error) -> io::Error {
|
74
|
+
io::Error::new(io::ErrorKind::Other, e.to_string())
|
75
|
+
}
|
76
|
+
|
77
|
+
impl Read for RbFileLikeObject {
|
78
|
+
fn read(&mut self, mut buf: &mut [u8]) -> Result<usize, io::Error> {
|
79
|
+
let bytes = self
|
80
|
+
.inner
|
81
|
+
.funcall::<_, _, RString>("read", (buf.len(),))
|
82
|
+
.map_err(rberr_to_io_err)?;
|
83
|
+
|
84
|
+
buf.write_all(unsafe { bytes.as_slice() })?;
|
85
|
+
|
86
|
+
Ok(bytes.len())
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
impl Write for RbFileLikeObject {
|
91
|
+
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
92
|
+
let rbbytes = RString::from_slice(buf);
|
93
|
+
|
94
|
+
let number_bytes_written = self
|
95
|
+
.inner
|
96
|
+
.funcall::<_, _, usize>("write", (rbbytes,))
|
97
|
+
.map_err(rberr_to_io_err)?;
|
98
|
+
|
99
|
+
Ok(number_bytes_written)
|
100
|
+
}
|
101
|
+
|
102
|
+
fn flush(&mut self) -> Result<(), io::Error> {
|
103
|
+
self.inner
|
104
|
+
.funcall::<_, _, Value>("flush", ())
|
105
|
+
.map_err(rberr_to_io_err)?;
|
106
|
+
|
107
|
+
Ok(())
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
impl Seek for RbFileLikeObject {
|
112
|
+
fn seek(&mut self, pos: SeekFrom) -> Result<u64, io::Error> {
|
113
|
+
let (whence, offset) = match pos {
|
114
|
+
SeekFrom::Start(i) => (0, i as i64),
|
115
|
+
SeekFrom::Current(i) => (1, i),
|
116
|
+
SeekFrom::End(i) => (2, i),
|
117
|
+
};
|
118
|
+
|
119
|
+
let new_position = self
|
120
|
+
.inner
|
121
|
+
.funcall("seek", (offset, whence))
|
122
|
+
.map_err(rberr_to_io_err)?;
|
123
|
+
|
124
|
+
Ok(new_position)
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
pub trait FileLike: Read + Write + Seek {}
|
129
|
+
|
130
|
+
impl FileLike for File {}
|
131
|
+
impl FileLike for RbFileLikeObject {}
|
132
|
+
|
133
|
+
pub enum EitherRustRubyFile {
|
134
|
+
Rb(RbFileLikeObject),
|
135
|
+
Rust(BufReader<File>),
|
136
|
+
}
|
137
|
+
|
138
|
+
///
|
139
|
+
/// # Arguments
|
140
|
+
/// * `truncate` - open or create a new file.
|
141
|
+
pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFile> {
|
142
|
+
if let Ok(rstring) = RString::try_convert(rb_f) {
|
143
|
+
let s = unsafe { rstring.as_str() }?;
|
144
|
+
let file_path = std::path::Path::new(&s);
|
145
|
+
let file_path = resolve_homedir(file_path);
|
146
|
+
let f = if truncate {
|
147
|
+
File::create(file_path).map_err(RbPolarsErr::io)?
|
148
|
+
} else {
|
149
|
+
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
|
150
|
+
};
|
151
|
+
let reader = BufReader::new(f);
|
152
|
+
Ok(EitherRustRubyFile::Rust(reader))
|
14
153
|
} else {
|
15
|
-
|
16
|
-
|
17
|
-
|
154
|
+
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
|
155
|
+
Ok(EitherRustRubyFile::Rb(f))
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
|
160
|
+
use EitherRustRubyFile::*;
|
161
|
+
match get_either_file(f, truncate)? {
|
162
|
+
Rb(f) => Ok(Box::new(f)),
|
163
|
+
Rust(f) => Ok(Box::new(f.into_inner())),
|
164
|
+
}
|
18
165
|
}
|
19
166
|
|
20
167
|
pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
|
@@ -55,9 +55,24 @@ pub fn rolling_cov(
|
|
55
55
|
.into()
|
56
56
|
}
|
57
57
|
|
58
|
-
pub fn arg_sort_by(
|
58
|
+
pub fn arg_sort_by(
|
59
|
+
by: RArray,
|
60
|
+
descending: Vec<bool>,
|
61
|
+
nulls_last: bool,
|
62
|
+
multithreaded: bool,
|
63
|
+
maintain_order: bool,
|
64
|
+
) -> RbResult<RbExpr> {
|
59
65
|
let by = rb_exprs_to_exprs(by)?;
|
60
|
-
Ok(dsl::arg_sort_by(
|
66
|
+
Ok(dsl::arg_sort_by(
|
67
|
+
by,
|
68
|
+
SortMultipleOptions {
|
69
|
+
descending,
|
70
|
+
nulls_last,
|
71
|
+
multithreaded,
|
72
|
+
maintain_order,
|
73
|
+
},
|
74
|
+
)
|
75
|
+
.into())
|
61
76
|
}
|
62
77
|
|
63
78
|
pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
@@ -324,6 +339,6 @@ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool
|
|
324
339
|
}
|
325
340
|
|
326
341
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
327
|
-
let expr = polars::sql::sql_expr(
|
342
|
+
let expr = polars::sql::sql_expr(sql).map_err(RbPolarsErr::from)?;
|
328
343
|
Ok(expr.into())
|
329
344
|
}
|
@@ -2,42 +2,72 @@ use polars::lazy::dsl;
|
|
2
2
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
|
+
pub fn when(condition: &RbExpr) -> RbWhen {
|
6
|
+
RbWhen {
|
7
|
+
inner: dsl::when(condition.inner.clone()),
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
5
11
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
6
12
|
#[derive(Clone)]
|
7
13
|
pub struct RbWhen {
|
8
14
|
pub inner: dsl::When,
|
9
15
|
}
|
10
16
|
|
11
|
-
|
12
|
-
fn from(inner: dsl::When) -> Self {
|
13
|
-
RbWhen { inner }
|
14
|
-
}
|
15
|
-
}
|
16
|
-
|
17
|
-
#[magnus::wrap(class = "Polars::RbWhenThen")]
|
17
|
+
#[magnus::wrap(class = "Polars::RbThen")]
|
18
18
|
#[derive(Clone)]
|
19
19
|
pub struct RbThen {
|
20
20
|
pub inner: dsl::Then,
|
21
21
|
}
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
#[magnus::wrap(class = "Polars::RbChainedWhen")]
|
24
|
+
#[derive(Clone)]
|
25
|
+
pub struct RbChainedWhen {
|
26
|
+
pub inner: dsl::ChainedWhen,
|
27
|
+
}
|
28
|
+
|
29
|
+
#[magnus::wrap(class = "Polars::RbChainedThen")]
|
30
|
+
#[derive(Clone)]
|
31
|
+
pub struct RbChainedThen {
|
32
|
+
pub inner: dsl::ChainedThen,
|
27
33
|
}
|
28
34
|
|
29
35
|
impl RbWhen {
|
30
|
-
pub fn then(&self,
|
31
|
-
|
36
|
+
pub fn then(&self, statement: &RbExpr) -> RbThen {
|
37
|
+
RbThen {
|
38
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
39
|
+
}
|
32
40
|
}
|
33
41
|
}
|
34
42
|
|
35
43
|
impl RbThen {
|
36
|
-
pub fn
|
37
|
-
|
44
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
45
|
+
RbChainedWhen {
|
46
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
51
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
38
52
|
}
|
39
53
|
}
|
40
54
|
|
41
|
-
|
42
|
-
|
55
|
+
impl RbChainedWhen {
|
56
|
+
pub fn then(&self, statement: &RbExpr) -> RbChainedThen {
|
57
|
+
RbChainedThen {
|
58
|
+
inner: self.inner.clone().then(statement.inner.clone()),
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
impl RbChainedThen {
|
64
|
+
pub fn when(&self, condition: &RbExpr) -> RbChainedWhen {
|
65
|
+
RbChainedWhen {
|
66
|
+
inner: self.inner.clone().when(condition.inner.clone()),
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn otherwise(&self, statement: &RbExpr) -> RbExpr {
|
71
|
+
self.inner.clone().otherwise(statement.inner.clone()).into()
|
72
|
+
}
|
43
73
|
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
|
-
use polars::io::RowIndex;
|
2
|
+
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
@@ -100,13 +100,13 @@ impl RbLazyFrame {
|
|
100
100
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
101
101
|
let try_parse_dates = bool::try_convert(arguments[18])?;
|
102
102
|
let eol_char = String::try_convert(arguments[19])?;
|
103
|
+
let truncate_ragged_lines = bool::try_convert(arguments[20])?;
|
103
104
|
// end arguments
|
104
105
|
|
105
106
|
let null_values = null_values.map(|w| w.0);
|
106
107
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
107
108
|
let separator = separator.as_bytes()[0];
|
108
109
|
let eol_char = eol_char.as_bytes()[0];
|
109
|
-
|
110
110
|
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
111
111
|
|
112
112
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
@@ -115,6 +115,7 @@ impl RbLazyFrame {
|
|
115
115
|
.map(|(name, dtype)| Field::new(&name, dtype.0))
|
116
116
|
.collect::<Schema>()
|
117
117
|
});
|
118
|
+
|
118
119
|
let r = LazyCsvReader::new(path)
|
119
120
|
.with_infer_schema_length(infer_schema_length)
|
120
121
|
.with_separator(separator)
|
@@ -124,6 +125,7 @@ impl RbLazyFrame {
|
|
124
125
|
.with_n_rows(n_rows)
|
125
126
|
.with_cache(cache)
|
126
127
|
.with_dtype_overwrite(overwrite_dtype.as_ref())
|
128
|
+
// TODO add with_schema
|
127
129
|
.low_memory(low_memory)
|
128
130
|
.with_comment_prefix(comment_prefix.as_deref())
|
129
131
|
.with_quote_char(quote_char)
|
@@ -133,7 +135,9 @@ impl RbLazyFrame {
|
|
133
135
|
.with_encoding(encoding.0)
|
134
136
|
.with_row_index(row_index)
|
135
137
|
.with_try_parse_dates(try_parse_dates)
|
136
|
-
.with_null_values(null_values)
|
138
|
+
.with_null_values(null_values)
|
139
|
+
// TODO add with_missing_is_null
|
140
|
+
.truncate_ragged_lines(truncate_ragged_lines);
|
137
141
|
|
138
142
|
if let Some(_lambda) = with_schema_modify {
|
139
143
|
todo!();
|
@@ -144,7 +148,8 @@ impl RbLazyFrame {
|
|
144
148
|
|
145
149
|
#[allow(clippy::too_many_arguments)]
|
146
150
|
pub fn new_from_parquet(
|
147
|
-
path:
|
151
|
+
path: Option<PathBuf>,
|
152
|
+
paths: Vec<PathBuf>,
|
148
153
|
n_rows: Option<usize>,
|
149
154
|
cache: bool,
|
150
155
|
parallel: Wrap<ParallelStrategy>,
|
@@ -153,21 +158,43 @@ impl RbLazyFrame {
|
|
153
158
|
low_memory: bool,
|
154
159
|
use_statistics: bool,
|
155
160
|
hive_partitioning: bool,
|
161
|
+
hive_schema: Option<Wrap<Schema>>,
|
156
162
|
) -> RbResult<Self> {
|
163
|
+
let parallel = parallel.0;
|
164
|
+
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
165
|
+
|
166
|
+
let first_path = if let Some(path) = &path {
|
167
|
+
path
|
168
|
+
} else {
|
169
|
+
paths
|
170
|
+
.first()
|
171
|
+
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
172
|
+
};
|
173
|
+
|
157
174
|
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
175
|
+
let hive_options = HiveOptions {
|
176
|
+
enabled: hive_partitioning,
|
177
|
+
schema: hive_schema,
|
178
|
+
};
|
179
|
+
|
158
180
|
let args = ScanArgsParquet {
|
159
181
|
n_rows,
|
160
182
|
cache,
|
161
|
-
parallel
|
183
|
+
parallel,
|
162
184
|
rechunk,
|
163
185
|
row_index,
|
164
186
|
low_memory,
|
165
|
-
// TODO support cloud options
|
166
187
|
cloud_options: None,
|
167
188
|
use_statistics,
|
168
|
-
|
189
|
+
hive_options,
|
169
190
|
};
|
170
|
-
|
191
|
+
|
192
|
+
let lf = if path.is_some() {
|
193
|
+
LazyFrame::scan_parquet(first_path, args)
|
194
|
+
} else {
|
195
|
+
LazyFrame::scan_parquet_files(Arc::from(paths), args)
|
196
|
+
}
|
197
|
+
.map_err(RbPolarsErr::from)?;
|
171
198
|
Ok(lf.into())
|
172
199
|
}
|
173
200
|
|
@@ -185,7 +212,8 @@ impl RbLazyFrame {
|
|
185
212
|
cache,
|
186
213
|
rechunk,
|
187
214
|
row_index,
|
188
|
-
|
215
|
+
memory_map,
|
216
|
+
cloud_options: None,
|
189
217
|
};
|
190
218
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
191
219
|
Ok(lf.into())
|
@@ -242,17 +270,18 @@ impl RbLazyFrame {
|
|
242
270
|
pub fn sort(
|
243
271
|
&self,
|
244
272
|
by_column: String,
|
245
|
-
|
273
|
+
descending: bool,
|
246
274
|
nulls_last: bool,
|
247
275
|
maintain_order: bool,
|
276
|
+
multithreaded: bool,
|
248
277
|
) -> Self {
|
249
278
|
let ldf = self.ldf.clone();
|
250
279
|
ldf.sort(
|
251
|
-
&by_column,
|
252
|
-
|
253
|
-
descending:
|
280
|
+
[&by_column],
|
281
|
+
SortMultipleOptions {
|
282
|
+
descending: vec![descending],
|
254
283
|
nulls_last,
|
255
|
-
multithreaded
|
284
|
+
multithreaded,
|
256
285
|
maintain_order,
|
257
286
|
},
|
258
287
|
)
|
@@ -261,15 +290,24 @@ impl RbLazyFrame {
|
|
261
290
|
|
262
291
|
pub fn sort_by_exprs(
|
263
292
|
&self,
|
264
|
-
|
265
|
-
|
293
|
+
by: RArray,
|
294
|
+
descending: Vec<bool>,
|
266
295
|
nulls_last: bool,
|
267
296
|
maintain_order: bool,
|
297
|
+
multithreaded: bool,
|
268
298
|
) -> RbResult<Self> {
|
269
299
|
let ldf = self.ldf.clone();
|
270
|
-
let exprs = rb_exprs_to_exprs(
|
300
|
+
let exprs = rb_exprs_to_exprs(by)?;
|
271
301
|
Ok(ldf
|
272
|
-
.sort_by_exprs(
|
302
|
+
.sort_by_exprs(
|
303
|
+
exprs,
|
304
|
+
SortMultipleOptions {
|
305
|
+
descending,
|
306
|
+
nulls_last,
|
307
|
+
maintain_order,
|
308
|
+
multithreaded,
|
309
|
+
},
|
310
|
+
)
|
273
311
|
.into())
|
274
312
|
}
|
275
313
|
|
@@ -326,6 +364,7 @@ impl RbLazyFrame {
|
|
326
364
|
Ok(())
|
327
365
|
}
|
328
366
|
|
367
|
+
#[allow(clippy::too_many_arguments)]
|
329
368
|
pub fn sink_csv(
|
330
369
|
&self,
|
331
370
|
path: PathBuf,
|
@@ -427,7 +466,7 @@ impl RbLazyFrame {
|
|
427
466
|
let closed_window = closed.0;
|
428
467
|
let ldf = self.ldf.clone();
|
429
468
|
let by = rb_exprs_to_exprs(by)?;
|
430
|
-
let lazy_gb = ldf.
|
469
|
+
let lazy_gb = ldf.rolling(
|
431
470
|
index_column.inner.clone(),
|
432
471
|
by,
|
433
472
|
RollingGroupOptions {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -23,7 +23,7 @@ use error::{RbPolarsErr, RbTypeError, RbValueError};
|
|
23
23
|
use expr::rb_exprs_to_exprs;
|
24
24
|
use expr::RbExpr;
|
25
25
|
use functions::string_cache::RbStringCacheHolder;
|
26
|
-
use functions::whenthen::{RbThen, RbWhen};
|
26
|
+
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
27
27
|
use lazyframe::RbLazyFrame;
|
28
28
|
use lazygroupby::RbLazyGroupBy;
|
29
29
|
use magnus::{define_module, function, method, prelude::*, Error, Ruby};
|
@@ -74,7 +74,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
74
74
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
75
75
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
76
76
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
77
|
-
class.define_method("write_parquet", method!(RbDataFrame::write_parquet,
|
77
|
+
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
78
78
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
79
79
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
80
80
|
class.define_method("div", method!(RbDataFrame::div, 1))?;
|
@@ -213,7 +213,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
213
213
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
214
214
|
class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
|
215
215
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
216
|
-
class.define_method("sort_by", method!(RbExpr::sort_by,
|
216
|
+
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
217
217
|
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
218
218
|
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
219
219
|
class.define_method("shift", method!(RbExpr::shift, 2))?;
|
@@ -312,7 +312,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
312
312
|
class.define_method("arr_reverse", method!(RbExpr::arr_reverse, 0))?;
|
313
313
|
class.define_method("arr_arg_min", method!(RbExpr::arr_arg_min, 0))?;
|
314
314
|
class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?;
|
315
|
-
class.define_method("arr_get", method!(RbExpr::arr_get,
|
315
|
+
class.define_method("arr_get", method!(RbExpr::arr_get, 2))?;
|
316
316
|
class.define_method("arr_join", method!(RbExpr::arr_join, 2))?;
|
317
317
|
class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
|
318
318
|
class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?;
|
@@ -406,7 +406,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
406
406
|
class.define_method("dt_cast_time_unit", method!(RbExpr::dt_cast_time_unit, 1))?;
|
407
407
|
class.define_method(
|
408
408
|
"dt_replace_time_zone",
|
409
|
-
method!(RbExpr::dt_replace_time_zone,
|
409
|
+
method!(RbExpr::dt_replace_time_zone, 3),
|
410
410
|
)?;
|
411
411
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
412
412
|
class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
|
@@ -448,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
448
448
|
class.define_method("list_sort", method!(RbExpr::list_sort, 1))?;
|
449
449
|
class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
|
450
450
|
class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
|
451
|
-
class.define_method("list_get", method!(RbExpr::list_get,
|
451
|
+
class.define_method("list_get", method!(RbExpr::list_get, 2))?;
|
452
452
|
class.define_method("list_join", method!(RbExpr::list_join, 2))?;
|
453
453
|
class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
|
454
454
|
class.define_method("list_arg_max", method!(RbExpr::list_arg_max, 0))?;
|
@@ -554,7 +554,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
554
554
|
class.define_singleton_method("arctan2d", function!(functions::lazy::arctan2d, 2))?;
|
555
555
|
class.define_singleton_method("rolling_corr", function!(functions::lazy::rolling_corr, 5))?;
|
556
556
|
class.define_singleton_method("rolling_cov", function!(functions::lazy::rolling_cov, 5))?;
|
557
|
-
class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by,
|
557
|
+
class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by, 5))?;
|
558
558
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
559
559
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
560
560
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
@@ -689,7 +689,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
689
689
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
690
690
|
class.define_singleton_method(
|
691
691
|
"new_from_parquet",
|
692
|
-
function!(RbLazyFrame::new_from_parquet,
|
692
|
+
function!(RbLazyFrame::new_from_parquet, 11),
|
693
693
|
)?;
|
694
694
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
695
695
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -702,8 +702,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
702
702
|
"optimization_toggle",
|
703
703
|
method!(RbLazyFrame::optimization_toggle, 9),
|
704
704
|
)?;
|
705
|
-
class.define_method("sort", method!(RbLazyFrame::sort,
|
706
|
-
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs,
|
705
|
+
class.define_method("sort", method!(RbLazyFrame::sort, 5))?;
|
706
|
+
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
707
707
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
708
708
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
709
709
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
@@ -835,7 +835,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
835
835
|
class.define_method("mul", method!(RbSeries::mul, 1))?;
|
836
836
|
class.define_method("div", method!(RbSeries::div, 1))?;
|
837
837
|
class.define_method("rem", method!(RbSeries::rem, 1))?;
|
838
|
-
class.define_method("sort", method!(RbSeries::sort,
|
838
|
+
class.define_method("sort", method!(RbSeries::sort, 3))?;
|
839
839
|
class.define_method("value_counts", method!(RbSeries::value_counts, 1))?;
|
840
840
|
class.define_method("any", method!(RbSeries::any, 1))?;
|
841
841
|
class.define_method("all", method!(RbSeries::all, 1))?;
|
@@ -1032,11 +1032,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1032
1032
|
// extra
|
1033
1033
|
class.define_method("extend_constant", method!(RbSeries::extend_constant, 2))?;
|
1034
1034
|
|
1035
|
+
// when then
|
1035
1036
|
let class = module.define_class("RbWhen", ruby.class_object())?;
|
1036
|
-
class.define_method("
|
1037
|
+
class.define_method("then", method!(RbWhen::then, 1))?;
|
1037
1038
|
|
1038
|
-
let class = module.define_class("
|
1039
|
-
class.define_method("
|
1039
|
+
let class = module.define_class("RbThen", ruby.class_object())?;
|
1040
|
+
class.define_method("when", method!(RbThen::when, 1))?;
|
1041
|
+
class.define_method("otherwise", method!(RbThen::otherwise, 1))?;
|
1042
|
+
|
1043
|
+
let class = module.define_class("RbChainedWhen", ruby.class_object())?;
|
1044
|
+
class.define_method("then", method!(RbChainedWhen::then, 1))?;
|
1045
|
+
|
1046
|
+
let class = module.define_class("RbChainedThen", ruby.class_object())?;
|
1047
|
+
class.define_method("when", method!(RbChainedThen::when, 1))?;
|
1048
|
+
class.define_method("otherwise", method!(RbChainedThen::otherwise, 1))?;
|
1040
1049
|
|
1041
1050
|
// sql
|
1042
1051
|
let class = module.define_class("RbSQLContext", ruby.class_object())?;
|