polars-df 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +374 -222
- data/Cargo.toml +2 -2
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +4 -3
- data/ext/polars/src/apply/dataframe.rs +24 -13
- data/ext/polars/src/apply/mod.rs +3 -4
- data/ext/polars/src/conversion.rs +155 -31
- data/ext/polars/src/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dataframe.rs +8 -2
- data/ext/polars/src/lazy/dsl.rs +54 -18
- data/ext/polars/src/lib.rs +19 -11
- data/ext/polars/src/series.rs +32 -10
- data/lib/polars/data_frame.rb +25 -23
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +5 -2
- data/lib/polars/expr.rb +4 -3
- data/lib/polars/functions.rb +2 -2
- data/lib/polars/group_by.rb +33 -33
- data/lib/polars/lazy_frame.rb +8 -5
- data/lib/polars/lazy_functions.rb +8 -3
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/series.rb +64 -21
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +9 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +34 -33
- metadata +5 -4
data/Cargo.toml
CHANGED
@@ -3,8 +3,8 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
-
|
7
|
-
|
6
|
+
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
+
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "9f36b2b97446e6dd495473e4361a70d863ac8027" }
|
8
8
|
|
9
9
|
[profile.release]
|
10
10
|
strip = true
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -12,8 +12,6 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem "polars-df"
|
13
13
|
```
|
14
14
|
|
15
|
-
Note: Rust is currently required for installation, and it can take 15-20 minutes to compile the extension.
|
16
|
-
|
17
15
|
## Getting Started
|
18
16
|
|
19
17
|
This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
|
+
license = "MIT"
|
4
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
6
|
edition = "2021"
|
6
7
|
publish = false
|
@@ -11,11 +12,11 @@ crate-type = ["cdylib"]
|
|
11
12
|
[dependencies]
|
12
13
|
ahash = "0.8"
|
13
14
|
magnus = "0.4"
|
14
|
-
polars-core = "0.
|
15
|
+
polars-core = "0.26.1"
|
15
16
|
serde_json = "1"
|
16
17
|
|
17
18
|
[dependencies.polars]
|
18
|
-
version = "0.
|
19
|
+
version = "0.26.1"
|
19
20
|
features = [
|
20
21
|
"abs",
|
21
22
|
"arange",
|
@@ -1,20 +1,32 @@
|
|
1
1
|
use magnus::{class, RArray, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
|
+
use polars_core::series::SeriesIter;
|
4
5
|
|
5
6
|
use super::*;
|
6
7
|
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
7
8
|
|
9
|
+
fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
|
10
|
+
df.get_columns().iter().map(|s| s.iter()).collect()
|
11
|
+
}
|
12
|
+
|
13
|
+
fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
|
14
|
+
df.get_columns()
|
15
|
+
.iter()
|
16
|
+
.map(|s| s.iter().skip(skip))
|
17
|
+
.collect()
|
18
|
+
}
|
19
|
+
|
8
20
|
pub fn apply_lambda_unknown<'a>(
|
9
21
|
df: &'a DataFrame,
|
10
22
|
lambda: Value,
|
11
23
|
inference_size: usize,
|
12
24
|
) -> RbResult<(Value, bool)> {
|
13
|
-
let columns = df.get_columns();
|
14
25
|
let mut null_count = 0;
|
26
|
+
let mut iters = get_iters(df);
|
15
27
|
|
16
|
-
for
|
17
|
-
let iter =
|
28
|
+
for _ in 0..df.height() {
|
29
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
18
30
|
let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
19
31
|
let out: Value = lambda.funcall("call", arg)?;
|
20
32
|
|
@@ -126,9 +138,9 @@ fn apply_iter<T>(
|
|
126
138
|
where
|
127
139
|
T: TryConvert,
|
128
140
|
{
|
129
|
-
let
|
130
|
-
((init_null_count + skip)..df.height()).map(move |
|
131
|
-
let iter =
|
141
|
+
let mut iters = get_iters_skip(df, init_null_count + skip);
|
142
|
+
((init_null_count + skip)..df.height()).map(move |_| {
|
143
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
132
144
|
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
133
145
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
134
146
|
Ok(val) => val.try_convert::<T>().ok(),
|
@@ -197,14 +209,13 @@ pub fn apply_lambda_with_list_out_type<'a>(
|
|
197
209
|
first_value: Option<&Series>,
|
198
210
|
dt: &DataType,
|
199
211
|
) -> RbResult<ListChunked> {
|
200
|
-
let columns = df.get_columns();
|
201
|
-
|
202
212
|
let skip = usize::from(first_value.is_some());
|
203
213
|
if init_null_count == df.height() {
|
204
214
|
Ok(ChunkedArray::full_null("apply", df.height()))
|
205
215
|
} else {
|
206
|
-
let
|
207
|
-
|
216
|
+
let mut iters = get_iters_skip(df, init_null_count + skip);
|
217
|
+
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
218
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
208
219
|
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
209
220
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
210
221
|
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
@@ -234,15 +245,15 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
234
245
|
first_value: Row<'a>,
|
235
246
|
inference_size: usize,
|
236
247
|
) -> PolarsResult<DataFrame> {
|
237
|
-
let columns = df.get_columns();
|
238
248
|
let width = first_value.0.len();
|
239
249
|
let null_row = Row::new(vec![AnyValue::Null; width]);
|
240
250
|
|
241
251
|
let mut row_buf = Row::default();
|
242
252
|
|
243
253
|
let skip = 1;
|
244
|
-
let mut
|
245
|
-
|
254
|
+
let mut iters = get_iters_skip(df, init_null_count + skip);
|
255
|
+
let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
|
256
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
246
257
|
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
247
258
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
248
259
|
Ok(val) => {
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -31,12 +31,11 @@ fn iterator_to_struct(
|
|
31
31
|
capacity: usize,
|
32
32
|
) -> RbResult<RbSeries> {
|
33
33
|
let (vals, flds) = match &first_value {
|
34
|
-
AnyValue::Struct(
|
35
|
-
AnyValue::StructOwned(payload) => (
|
34
|
+
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
|
35
|
+
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
|
36
36
|
_ => {
|
37
37
|
return Err(crate::error::ComputeError::new_err(format!(
|
38
|
-
"expected struct got {:?}",
|
39
|
-
first_value
|
38
|
+
"expected struct got {first_value:?}",
|
40
39
|
)))
|
41
40
|
}
|
42
41
|
};
|
@@ -1,9 +1,9 @@
|
|
1
|
-
use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
5
|
use polars::frame::row::Row;
|
6
|
-
use polars::frame::
|
6
|
+
use polars::frame::NullStrategy;
|
7
7
|
use polars::io::avro::AvroCompression;
|
8
8
|
use polars::prelude::*;
|
9
9
|
use polars::series::ops::NullBehavior;
|
@@ -18,6 +18,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
|
18
18
|
unsafe { std::mem::transmute(slice) }
|
19
19
|
}
|
20
20
|
|
21
|
+
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
22
|
+
// Safety:
|
23
|
+
// Wrap is transparent.
|
24
|
+
unsafe { std::mem::transmute(buf) }
|
25
|
+
}
|
26
|
+
|
21
27
|
#[repr(transparent)]
|
22
28
|
pub struct Wrap<T>(pub T);
|
23
29
|
|
@@ -138,40 +144,127 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
138
144
|
|
139
145
|
impl From<Wrap<DataType>> for Value {
|
140
146
|
fn from(w: Wrap<DataType>) -> Self {
|
141
|
-
|
147
|
+
let pl = crate::module();
|
148
|
+
|
149
|
+
match &w.0 {
|
150
|
+
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
151
|
+
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
152
|
+
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
153
|
+
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
154
|
+
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
155
|
+
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
156
|
+
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
157
|
+
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
158
|
+
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
159
|
+
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
160
|
+
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
161
|
+
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
162
|
+
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
163
|
+
DataType::List(inner) => {
|
164
|
+
let inner = Wrap(*inner.clone());
|
165
|
+
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
166
|
+
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
167
|
+
}
|
168
|
+
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
169
|
+
DataType::Datetime(tu, tz) => {
|
170
|
+
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
171
|
+
datetime_class
|
172
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
|
173
|
+
.unwrap()
|
174
|
+
}
|
175
|
+
DataType::Duration(tu) => {
|
176
|
+
let duration_class = pl.const_get::<_, Value>("Duration").unwrap();
|
177
|
+
duration_class
|
178
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
179
|
+
.unwrap()
|
180
|
+
}
|
181
|
+
DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
|
182
|
+
DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
183
|
+
DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
|
184
|
+
DataType::Struct(fields) => {
|
185
|
+
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
186
|
+
let iter = fields.iter().map(|fld| {
|
187
|
+
let name = fld.name().clone();
|
188
|
+
let dtype = Wrap(fld.data_type().clone());
|
189
|
+
field_class
|
190
|
+
.funcall::<_, _, Value>("new", (name, dtype))
|
191
|
+
.unwrap()
|
192
|
+
});
|
193
|
+
let fields = RArray::from_iter(iter);
|
194
|
+
let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
|
195
|
+
struct_class
|
196
|
+
.funcall::<_, _, Value>("new", (fields,))
|
197
|
+
.unwrap()
|
198
|
+
}
|
199
|
+
DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
|
200
|
+
DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
|
201
|
+
}
|
142
202
|
}
|
143
203
|
}
|
144
204
|
|
145
205
|
impl TryConvert for Wrap<DataType> {
|
146
206
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
147
|
-
let dtype =
|
148
|
-
"
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
207
|
+
let dtype = if ob.is_kind_of(class::class()) {
|
208
|
+
let name = ob.funcall::<_, _, String>("name", ())?;
|
209
|
+
match name.as_str() {
|
210
|
+
"Polars::UInt8" => DataType::UInt8,
|
211
|
+
"Polars::UInt16" => DataType::UInt16,
|
212
|
+
"Polars::UInt32" => DataType::UInt32,
|
213
|
+
"Polars::UInt64" => DataType::UInt64,
|
214
|
+
"Polars::Int8" => DataType::Int8,
|
215
|
+
"Polars::Int16" => DataType::Int16,
|
216
|
+
"Polars::Int32" => DataType::Int32,
|
217
|
+
"Polars::Int64" => DataType::Int64,
|
218
|
+
"Polars::Utf8" => DataType::Utf8,
|
219
|
+
"Polars::Binary" => DataType::Binary,
|
220
|
+
"Polars::Boolean" => DataType::Boolean,
|
221
|
+
"Polars::Categorical" => DataType::Categorical(None),
|
222
|
+
"Polars::Date" => DataType::Date,
|
223
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
224
|
+
"Polars::Time" => DataType::Time,
|
225
|
+
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
226
|
+
"Polars::Float32" => DataType::Float32,
|
227
|
+
"Polars::Float64" => DataType::Float64,
|
228
|
+
// "Polars::Object" => DataType::Object(OBJECT_NAME),
|
229
|
+
"Polars::List" => DataType::List(Box::new(DataType::Boolean)),
|
230
|
+
"Polars::Null" => DataType::Null,
|
231
|
+
"Polars::Unknown" => DataType::Unknown,
|
232
|
+
dt => {
|
233
|
+
return Err(RbValueError::new_err(format!(
|
234
|
+
"{dt} is not a correct polars DataType.",
|
235
|
+
)))
|
236
|
+
}
|
237
|
+
}
|
238
|
+
} else {
|
239
|
+
match ob.try_convert::<String>()?.as_str() {
|
240
|
+
"u8" => DataType::UInt8,
|
241
|
+
"u16" => DataType::UInt16,
|
242
|
+
"u32" => DataType::UInt32,
|
243
|
+
"u64" => DataType::UInt64,
|
244
|
+
"i8" => DataType::Int8,
|
245
|
+
"i16" => DataType::Int16,
|
246
|
+
"i32" => DataType::Int32,
|
247
|
+
"i64" => DataType::Int64,
|
248
|
+
"str" => DataType::Utf8,
|
249
|
+
"bin" => DataType::Binary,
|
250
|
+
"bool" => DataType::Boolean,
|
251
|
+
"cat" => DataType::Categorical(None),
|
252
|
+
"date" => DataType::Date,
|
253
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
254
|
+
"f32" => DataType::Float32,
|
255
|
+
"time" => DataType::Time,
|
256
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
257
|
+
"f64" => DataType::Float64,
|
258
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
259
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
260
|
+
"null" => DataType::Null,
|
261
|
+
"unk" => DataType::Unknown,
|
262
|
+
_ => {
|
263
|
+
return Err(RbValueError::new_err(format!(
|
264
|
+
"{} is not a supported DataType.",
|
265
|
+
ob
|
266
|
+
)))
|
267
|
+
}
|
175
268
|
}
|
176
269
|
};
|
177
270
|
Ok(Wrap(dtype))
|
@@ -210,6 +303,21 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
210
303
|
}
|
211
304
|
}
|
212
305
|
|
306
|
+
impl TryConvert for Wrap<InterpolationMethod> {
|
307
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
308
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
309
|
+
"linear" => InterpolationMethod::Linear,
|
310
|
+
"nearest" => InterpolationMethod::Nearest,
|
311
|
+
v => {
|
312
|
+
return Err(RbValueError::new_err(format!(
|
313
|
+
"method must be one of {{'linear', 'nearest'}}, got {v}",
|
314
|
+
)))
|
315
|
+
}
|
316
|
+
};
|
317
|
+
Ok(Wrap(parsed))
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
213
321
|
impl TryConvert for Wrap<Option<AvroCompression>> {
|
214
322
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
215
323
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -243,6 +351,22 @@ impl TryConvert for Wrap<CategoricalOrdering> {
|
|
243
351
|
}
|
244
352
|
}
|
245
353
|
|
354
|
+
impl TryConvert for Wrap<StartBy> {
|
355
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
356
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
357
|
+
"window" => StartBy::WindowBound,
|
358
|
+
"datapoint" => StartBy::DataPoint,
|
359
|
+
"monday" => StartBy::Monday,
|
360
|
+
v => {
|
361
|
+
return Err(RbValueError::new_err(format!(
|
362
|
+
"closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
|
363
|
+
)))
|
364
|
+
}
|
365
|
+
};
|
366
|
+
Ok(Wrap(parsed))
|
367
|
+
}
|
368
|
+
}
|
369
|
+
|
246
370
|
impl TryConvert for Wrap<ClosedWindow> {
|
247
371
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
248
372
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::frame::NullStrategy;
|
2
3
|
use polars::io::avro::AvroCompression;
|
3
4
|
use polars::io::mmap::ReaderBytes;
|
4
5
|
use polars::io::RowCount;
|
@@ -397,7 +398,7 @@ impl RbDataFrame {
|
|
397
398
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
398
399
|
obj.unwrap().to_object()
|
399
400
|
}
|
400
|
-
_ => Wrap(s.get(idx)).into(),
|
401
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
401
402
|
})
|
402
403
|
.collect(),
|
403
404
|
)
|
@@ -420,7 +421,7 @@ impl RbDataFrame {
|
|
420
421
|
s.get_object(idx).map(|any| any.into());
|
421
422
|
obj.unwrap().to_object()
|
422
423
|
}
|
423
|
-
_ => Wrap(s.get(idx)).into(),
|
424
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
424
425
|
})
|
425
426
|
.collect(),
|
426
427
|
)
|
@@ -573,9 +574,8 @@ impl RbDataFrame {
|
|
573
574
|
.collect()
|
574
575
|
}
|
575
576
|
|
576
|
-
pub fn n_chunks(&self) ->
|
577
|
-
|
578
|
-
Ok(n)
|
577
|
+
pub fn n_chunks(&self) -> usize {
|
578
|
+
self.df.borrow().n_chunks()
|
579
579
|
}
|
580
580
|
|
581
581
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -348,6 +348,7 @@ impl RbLazyFrame {
|
|
348
348
|
include_boundaries: bool,
|
349
349
|
closed: Wrap<ClosedWindow>,
|
350
350
|
by: RArray,
|
351
|
+
start_by: Wrap<StartBy>,
|
351
352
|
) -> RbResult<RbLazyGroupBy> {
|
352
353
|
let closed_window = closed.0;
|
353
354
|
let by = rb_exprs_to_exprs(by)?;
|
@@ -362,6 +363,7 @@ impl RbLazyFrame {
|
|
362
363
|
truncate,
|
363
364
|
include_boundaries,
|
364
365
|
closed_window,
|
366
|
+
start_by: start_by.0,
|
365
367
|
},
|
366
368
|
);
|
367
369
|
|
@@ -514,9 +516,13 @@ impl RbLazyFrame {
|
|
514
516
|
ldf.median().into()
|
515
517
|
}
|
516
518
|
|
517
|
-
pub fn quantile(
|
519
|
+
pub fn quantile(
|
520
|
+
&self,
|
521
|
+
quantile: &RbExpr,
|
522
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
523
|
+
) -> Self {
|
518
524
|
let ldf = self.ldf.clone();
|
519
|
-
ldf.quantile(quantile, interpolation.0).into()
|
525
|
+
ldf.quantile(quantile.inner.clone(), interpolation.0).into()
|
520
526
|
}
|
521
527
|
|
522
528
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -164,10 +164,14 @@ impl RbExpr {
|
|
164
164
|
self.clone().inner.list().into()
|
165
165
|
}
|
166
166
|
|
167
|
-
pub fn quantile(
|
167
|
+
pub fn quantile(
|
168
|
+
&self,
|
169
|
+
quantile: &RbExpr,
|
170
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
171
|
+
) -> Self {
|
168
172
|
self.clone()
|
169
173
|
.inner
|
170
|
-
.quantile(quantile, interpolation.0)
|
174
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
171
175
|
.into()
|
172
176
|
}
|
173
177
|
|
@@ -506,7 +510,13 @@ impl RbExpr {
|
|
506
510
|
self.inner.clone().shrink_dtype().into()
|
507
511
|
}
|
508
512
|
|
509
|
-
pub fn str_parse_date(
|
513
|
+
pub fn str_parse_date(
|
514
|
+
&self,
|
515
|
+
fmt: Option<String>,
|
516
|
+
strict: bool,
|
517
|
+
exact: bool,
|
518
|
+
cache: bool,
|
519
|
+
) -> Self {
|
510
520
|
self.inner
|
511
521
|
.clone()
|
512
522
|
.str()
|
@@ -515,11 +525,20 @@ impl RbExpr {
|
|
515
525
|
fmt,
|
516
526
|
strict,
|
517
527
|
exact,
|
528
|
+
cache,
|
529
|
+
tz_aware: false,
|
518
530
|
})
|
519
531
|
.into()
|
520
532
|
}
|
521
533
|
|
522
|
-
pub fn str_parse_datetime(
|
534
|
+
pub fn str_parse_datetime(
|
535
|
+
&self,
|
536
|
+
fmt: Option<String>,
|
537
|
+
strict: bool,
|
538
|
+
exact: bool,
|
539
|
+
cache: bool,
|
540
|
+
tz_aware: bool,
|
541
|
+
) -> Self {
|
523
542
|
let tu = match fmt {
|
524
543
|
Some(ref fmt) => {
|
525
544
|
if fmt.contains("%.9f")
|
@@ -544,11 +563,19 @@ impl RbExpr {
|
|
544
563
|
fmt,
|
545
564
|
strict,
|
546
565
|
exact,
|
566
|
+
cache,
|
567
|
+
tz_aware,
|
547
568
|
})
|
548
569
|
.into()
|
549
570
|
}
|
550
571
|
|
551
|
-
pub fn str_parse_time(
|
572
|
+
pub fn str_parse_time(
|
573
|
+
&self,
|
574
|
+
fmt: Option<String>,
|
575
|
+
strict: bool,
|
576
|
+
exact: bool,
|
577
|
+
cache: bool,
|
578
|
+
) -> Self {
|
552
579
|
self.inner
|
553
580
|
.clone()
|
554
581
|
.str()
|
@@ -557,6 +584,8 @@ impl RbExpr {
|
|
557
584
|
fmt,
|
558
585
|
strict,
|
559
586
|
exact,
|
587
|
+
cache,
|
588
|
+
tz_aware: false,
|
560
589
|
})
|
561
590
|
.into()
|
562
591
|
}
|
@@ -723,8 +752,12 @@ impl RbExpr {
|
|
723
752
|
self.inner.clone().str().extract(&pat, group_index).into()
|
724
753
|
}
|
725
754
|
|
726
|
-
pub fn str_extract_all(&self, pat:
|
727
|
-
self.inner
|
755
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
756
|
+
self.inner
|
757
|
+
.clone()
|
758
|
+
.str()
|
759
|
+
.extract_all(pat.inner.clone())
|
760
|
+
.into()
|
728
761
|
}
|
729
762
|
|
730
763
|
pub fn count_match(&self, pat: String) -> Self {
|
@@ -989,14 +1022,12 @@ impl RbExpr {
|
|
989
1022
|
.clone()
|
990
1023
|
.map_alias(move |name| {
|
991
1024
|
let out = lambda.call::<_, String>((name,));
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
// )),
|
999
|
-
// }
|
1025
|
+
match out {
|
1026
|
+
Ok(out) => Ok(out),
|
1027
|
+
Err(e) => Err(PolarsError::ComputeError(
|
1028
|
+
format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
|
1029
|
+
)),
|
1030
|
+
}
|
1000
1031
|
})
|
1001
1032
|
.into()
|
1002
1033
|
}
|
@@ -1005,8 +1036,8 @@ impl RbExpr {
|
|
1005
1036
|
self.inner.clone().exclude(columns).into()
|
1006
1037
|
}
|
1007
1038
|
|
1008
|
-
pub fn interpolate(&self) -> Self {
|
1009
|
-
self.inner.clone().interpolate().into()
|
1039
|
+
pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
|
1040
|
+
self.inner.clone().interpolate(method.0).into()
|
1010
1041
|
}
|
1011
1042
|
|
1012
1043
|
pub fn rolling_sum(
|
@@ -1297,6 +1328,7 @@ impl RbExpr {
|
|
1297
1328
|
&self,
|
1298
1329
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
1299
1330
|
_name_gen: Option<Value>,
|
1331
|
+
upper_bound: usize,
|
1300
1332
|
) -> RbResult<Self> {
|
1301
1333
|
// TODO fix
|
1302
1334
|
let name_gen = None;
|
@@ -1311,7 +1343,7 @@ impl RbExpr {
|
|
1311
1343
|
.inner
|
1312
1344
|
.clone()
|
1313
1345
|
.arr()
|
1314
|
-
.to_struct(width_strat.0, name_gen)
|
1346
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
1315
1347
|
.into())
|
1316
1348
|
}
|
1317
1349
|
|
@@ -1498,6 +1530,10 @@ pub fn cols(names: Vec<String>) -> RbExpr {
|
|
1498
1530
|
dsl::cols(names).into()
|
1499
1531
|
}
|
1500
1532
|
|
1533
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
1534
|
+
dsl::dtype_cols(dtypes).into()
|
1535
|
+
}
|
1536
|
+
|
1501
1537
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
1502
1538
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
1503
1539
|
|