polars-df 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +374 -222
- data/Cargo.toml +2 -2
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +4 -3
- data/ext/polars/src/apply/dataframe.rs +24 -13
- data/ext/polars/src/apply/mod.rs +3 -4
- data/ext/polars/src/conversion.rs +155 -31
- data/ext/polars/src/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dataframe.rs +8 -2
- data/ext/polars/src/lazy/dsl.rs +54 -18
- data/ext/polars/src/lib.rs +19 -11
- data/ext/polars/src/series.rs +32 -10
- data/lib/polars/data_frame.rb +25 -23
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +5 -2
- data/lib/polars/expr.rb +4 -3
- data/lib/polars/functions.rb +2 -2
- data/lib/polars/group_by.rb +33 -33
- data/lib/polars/lazy_frame.rb +8 -5
- data/lib/polars/lazy_functions.rb +8 -3
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/series.rb +64 -21
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +9 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +34 -33
- metadata +5 -4
data/Cargo.toml
CHANGED
@@ -3,8 +3,8 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
-
|
7
|
-
|
6
|
+
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
+
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "9f36b2b97446e6dd495473e4361a70d863ac8027" }
|
8
8
|
|
9
9
|
[profile.release]
|
10
10
|
strip = true
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -12,8 +12,6 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem "polars-df"
|
13
13
|
```
|
14
14
|
|
15
|
-
Note: Rust is currently required for installation, and it can take 15-20 minutes to compile the extension.
|
16
|
-
|
17
15
|
## Getting Started
|
18
16
|
|
19
17
|
This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
|
+
license = "MIT"
|
4
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
6
|
edition = "2021"
|
6
7
|
publish = false
|
@@ -11,11 +12,11 @@ crate-type = ["cdylib"]
|
|
11
12
|
[dependencies]
|
12
13
|
ahash = "0.8"
|
13
14
|
magnus = "0.4"
|
14
|
-
polars-core = "0.
|
15
|
+
polars-core = "0.26.1"
|
15
16
|
serde_json = "1"
|
16
17
|
|
17
18
|
[dependencies.polars]
|
18
|
-
version = "0.
|
19
|
+
version = "0.26.1"
|
19
20
|
features = [
|
20
21
|
"abs",
|
21
22
|
"arange",
|
@@ -1,20 +1,32 @@
|
|
1
1
|
use magnus::{class, RArray, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
|
+
use polars_core::series::SeriesIter;
|
4
5
|
|
5
6
|
use super::*;
|
6
7
|
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
7
8
|
|
9
|
+
fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
|
10
|
+
df.get_columns().iter().map(|s| s.iter()).collect()
|
11
|
+
}
|
12
|
+
|
13
|
+
fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
|
14
|
+
df.get_columns()
|
15
|
+
.iter()
|
16
|
+
.map(|s| s.iter().skip(skip))
|
17
|
+
.collect()
|
18
|
+
}
|
19
|
+
|
8
20
|
pub fn apply_lambda_unknown<'a>(
|
9
21
|
df: &'a DataFrame,
|
10
22
|
lambda: Value,
|
11
23
|
inference_size: usize,
|
12
24
|
) -> RbResult<(Value, bool)> {
|
13
|
-
let columns = df.get_columns();
|
14
25
|
let mut null_count = 0;
|
26
|
+
let mut iters = get_iters(df);
|
15
27
|
|
16
|
-
for
|
17
|
-
let iter =
|
28
|
+
for _ in 0..df.height() {
|
29
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
18
30
|
let arg = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
19
31
|
let out: Value = lambda.funcall("call", arg)?;
|
20
32
|
|
@@ -126,9 +138,9 @@ fn apply_iter<T>(
|
|
126
138
|
where
|
127
139
|
T: TryConvert,
|
128
140
|
{
|
129
|
-
let
|
130
|
-
((init_null_count + skip)..df.height()).map(move |
|
131
|
-
let iter =
|
141
|
+
let mut iters = get_iters_skip(df, init_null_count + skip);
|
142
|
+
((init_null_count + skip)..df.height()).map(move |_| {
|
143
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
132
144
|
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
133
145
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
134
146
|
Ok(val) => val.try_convert::<T>().ok(),
|
@@ -197,14 +209,13 @@ pub fn apply_lambda_with_list_out_type<'a>(
|
|
197
209
|
first_value: Option<&Series>,
|
198
210
|
dt: &DataType,
|
199
211
|
) -> RbResult<ListChunked> {
|
200
|
-
let columns = df.get_columns();
|
201
|
-
|
202
212
|
let skip = usize::from(first_value.is_some());
|
203
213
|
if init_null_count == df.height() {
|
204
214
|
Ok(ChunkedArray::full_null("apply", df.height()))
|
205
215
|
} else {
|
206
|
-
let
|
207
|
-
|
216
|
+
let mut iters = get_iters_skip(df, init_null_count + skip);
|
217
|
+
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
218
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
208
219
|
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
209
220
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
210
221
|
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
@@ -234,15 +245,15 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
234
245
|
first_value: Row<'a>,
|
235
246
|
inference_size: usize,
|
236
247
|
) -> PolarsResult<DataFrame> {
|
237
|
-
let columns = df.get_columns();
|
238
248
|
let width = first_value.0.len();
|
239
249
|
let null_row = Row::new(vec![AnyValue::Null; width]);
|
240
250
|
|
241
251
|
let mut row_buf = Row::default();
|
242
252
|
|
243
253
|
let skip = 1;
|
244
|
-
let mut
|
245
|
-
|
254
|
+
let mut iters = get_iters_skip(df, init_null_count + skip);
|
255
|
+
let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
|
256
|
+
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
246
257
|
let tpl = (iter.collect::<Vec<Wrap<AnyValue>>>(),);
|
247
258
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
248
259
|
Ok(val) => {
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -31,12 +31,11 @@ fn iterator_to_struct(
|
|
31
31
|
capacity: usize,
|
32
32
|
) -> RbResult<RbSeries> {
|
33
33
|
let (vals, flds) = match &first_value {
|
34
|
-
AnyValue::Struct(
|
35
|
-
AnyValue::StructOwned(payload) => (
|
34
|
+
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
|
35
|
+
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
|
36
36
|
_ => {
|
37
37
|
return Err(crate::error::ComputeError::new_err(format!(
|
38
|
-
"expected struct got {:?}",
|
39
|
-
first_value
|
38
|
+
"expected struct got {first_value:?}",
|
40
39
|
)))
|
41
40
|
}
|
42
41
|
};
|
@@ -1,9 +1,9 @@
|
|
1
|
-
use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, Module, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
5
|
use polars::frame::row::Row;
|
6
|
-
use polars::frame::
|
6
|
+
use polars::frame::NullStrategy;
|
7
7
|
use polars::io::avro::AvroCompression;
|
8
8
|
use polars::prelude::*;
|
9
9
|
use polars::series::ops::NullBehavior;
|
@@ -18,6 +18,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
|
18
18
|
unsafe { std::mem::transmute(slice) }
|
19
19
|
}
|
20
20
|
|
21
|
+
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
22
|
+
// Safety:
|
23
|
+
// Wrap is transparent.
|
24
|
+
unsafe { std::mem::transmute(buf) }
|
25
|
+
}
|
26
|
+
|
21
27
|
#[repr(transparent)]
|
22
28
|
pub struct Wrap<T>(pub T);
|
23
29
|
|
@@ -138,40 +144,127 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
138
144
|
|
139
145
|
impl From<Wrap<DataType>> for Value {
|
140
146
|
fn from(w: Wrap<DataType>) -> Self {
|
141
|
-
|
147
|
+
let pl = crate::module();
|
148
|
+
|
149
|
+
match &w.0 {
|
150
|
+
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
151
|
+
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
152
|
+
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
153
|
+
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
154
|
+
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
155
|
+
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
156
|
+
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
157
|
+
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
158
|
+
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
159
|
+
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
160
|
+
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
161
|
+
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
162
|
+
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
163
|
+
DataType::List(inner) => {
|
164
|
+
let inner = Wrap(*inner.clone());
|
165
|
+
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
166
|
+
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
167
|
+
}
|
168
|
+
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
169
|
+
DataType::Datetime(tu, tz) => {
|
170
|
+
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
171
|
+
datetime_class
|
172
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.clone()))
|
173
|
+
.unwrap()
|
174
|
+
}
|
175
|
+
DataType::Duration(tu) => {
|
176
|
+
let duration_class = pl.const_get::<_, Value>("Duration").unwrap();
|
177
|
+
duration_class
|
178
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
179
|
+
.unwrap()
|
180
|
+
}
|
181
|
+
DataType::Object(_) => pl.const_get::<_, Value>("Object").unwrap(),
|
182
|
+
DataType::Categorical(_) => pl.const_get::<_, Value>("Categorical").unwrap(),
|
183
|
+
DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
|
184
|
+
DataType::Struct(fields) => {
|
185
|
+
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
186
|
+
let iter = fields.iter().map(|fld| {
|
187
|
+
let name = fld.name().clone();
|
188
|
+
let dtype = Wrap(fld.data_type().clone());
|
189
|
+
field_class
|
190
|
+
.funcall::<_, _, Value>("new", (name, dtype))
|
191
|
+
.unwrap()
|
192
|
+
});
|
193
|
+
let fields = RArray::from_iter(iter);
|
194
|
+
let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
|
195
|
+
struct_class
|
196
|
+
.funcall::<_, _, Value>("new", (fields,))
|
197
|
+
.unwrap()
|
198
|
+
}
|
199
|
+
DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
|
200
|
+
DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
|
201
|
+
}
|
142
202
|
}
|
143
203
|
}
|
144
204
|
|
145
205
|
impl TryConvert for Wrap<DataType> {
|
146
206
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
147
|
-
let dtype =
|
148
|
-
"
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
207
|
+
let dtype = if ob.is_kind_of(class::class()) {
|
208
|
+
let name = ob.funcall::<_, _, String>("name", ())?;
|
209
|
+
match name.as_str() {
|
210
|
+
"Polars::UInt8" => DataType::UInt8,
|
211
|
+
"Polars::UInt16" => DataType::UInt16,
|
212
|
+
"Polars::UInt32" => DataType::UInt32,
|
213
|
+
"Polars::UInt64" => DataType::UInt64,
|
214
|
+
"Polars::Int8" => DataType::Int8,
|
215
|
+
"Polars::Int16" => DataType::Int16,
|
216
|
+
"Polars::Int32" => DataType::Int32,
|
217
|
+
"Polars::Int64" => DataType::Int64,
|
218
|
+
"Polars::Utf8" => DataType::Utf8,
|
219
|
+
"Polars::Binary" => DataType::Binary,
|
220
|
+
"Polars::Boolean" => DataType::Boolean,
|
221
|
+
"Polars::Categorical" => DataType::Categorical(None),
|
222
|
+
"Polars::Date" => DataType::Date,
|
223
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
224
|
+
"Polars::Time" => DataType::Time,
|
225
|
+
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
226
|
+
"Polars::Float32" => DataType::Float32,
|
227
|
+
"Polars::Float64" => DataType::Float64,
|
228
|
+
// "Polars::Object" => DataType::Object(OBJECT_NAME),
|
229
|
+
"Polars::List" => DataType::List(Box::new(DataType::Boolean)),
|
230
|
+
"Polars::Null" => DataType::Null,
|
231
|
+
"Polars::Unknown" => DataType::Unknown,
|
232
|
+
dt => {
|
233
|
+
return Err(RbValueError::new_err(format!(
|
234
|
+
"{dt} is not a correct polars DataType.",
|
235
|
+
)))
|
236
|
+
}
|
237
|
+
}
|
238
|
+
} else {
|
239
|
+
match ob.try_convert::<String>()?.as_str() {
|
240
|
+
"u8" => DataType::UInt8,
|
241
|
+
"u16" => DataType::UInt16,
|
242
|
+
"u32" => DataType::UInt32,
|
243
|
+
"u64" => DataType::UInt64,
|
244
|
+
"i8" => DataType::Int8,
|
245
|
+
"i16" => DataType::Int16,
|
246
|
+
"i32" => DataType::Int32,
|
247
|
+
"i64" => DataType::Int64,
|
248
|
+
"str" => DataType::Utf8,
|
249
|
+
"bin" => DataType::Binary,
|
250
|
+
"bool" => DataType::Boolean,
|
251
|
+
"cat" => DataType::Categorical(None),
|
252
|
+
"date" => DataType::Date,
|
253
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
254
|
+
"f32" => DataType::Float32,
|
255
|
+
"time" => DataType::Time,
|
256
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
257
|
+
"f64" => DataType::Float64,
|
258
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
259
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
260
|
+
"null" => DataType::Null,
|
261
|
+
"unk" => DataType::Unknown,
|
262
|
+
_ => {
|
263
|
+
return Err(RbValueError::new_err(format!(
|
264
|
+
"{} is not a supported DataType.",
|
265
|
+
ob
|
266
|
+
)))
|
267
|
+
}
|
175
268
|
}
|
176
269
|
};
|
177
270
|
Ok(Wrap(dtype))
|
@@ -210,6 +303,21 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
210
303
|
}
|
211
304
|
}
|
212
305
|
|
306
|
+
impl TryConvert for Wrap<InterpolationMethod> {
|
307
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
308
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
309
|
+
"linear" => InterpolationMethod::Linear,
|
310
|
+
"nearest" => InterpolationMethod::Nearest,
|
311
|
+
v => {
|
312
|
+
return Err(RbValueError::new_err(format!(
|
313
|
+
"method must be one of {{'linear', 'nearest'}}, got {v}",
|
314
|
+
)))
|
315
|
+
}
|
316
|
+
};
|
317
|
+
Ok(Wrap(parsed))
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
213
321
|
impl TryConvert for Wrap<Option<AvroCompression>> {
|
214
322
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
215
323
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -243,6 +351,22 @@ impl TryConvert for Wrap<CategoricalOrdering> {
|
|
243
351
|
}
|
244
352
|
}
|
245
353
|
|
354
|
+
impl TryConvert for Wrap<StartBy> {
|
355
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
356
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
357
|
+
"window" => StartBy::WindowBound,
|
358
|
+
"datapoint" => StartBy::DataPoint,
|
359
|
+
"monday" => StartBy::Monday,
|
360
|
+
v => {
|
361
|
+
return Err(RbValueError::new_err(format!(
|
362
|
+
"closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
|
363
|
+
)))
|
364
|
+
}
|
365
|
+
};
|
366
|
+
Ok(Wrap(parsed))
|
367
|
+
}
|
368
|
+
}
|
369
|
+
|
246
370
|
impl TryConvert for Wrap<ClosedWindow> {
|
247
371
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
248
372
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::frame::NullStrategy;
|
2
3
|
use polars::io::avro::AvroCompression;
|
3
4
|
use polars::io::mmap::ReaderBytes;
|
4
5
|
use polars::io::RowCount;
|
@@ -397,7 +398,7 @@ impl RbDataFrame {
|
|
397
398
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
398
399
|
obj.unwrap().to_object()
|
399
400
|
}
|
400
|
-
_ => Wrap(s.get(idx)).into(),
|
401
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
401
402
|
})
|
402
403
|
.collect(),
|
403
404
|
)
|
@@ -420,7 +421,7 @@ impl RbDataFrame {
|
|
420
421
|
s.get_object(idx).map(|any| any.into());
|
421
422
|
obj.unwrap().to_object()
|
422
423
|
}
|
423
|
-
_ => Wrap(s.get(idx)).into(),
|
424
|
+
_ => Wrap(s.get(idx).unwrap()).into(),
|
424
425
|
})
|
425
426
|
.collect(),
|
426
427
|
)
|
@@ -573,9 +574,8 @@ impl RbDataFrame {
|
|
573
574
|
.collect()
|
574
575
|
}
|
575
576
|
|
576
|
-
pub fn n_chunks(&self) ->
|
577
|
-
|
578
|
-
Ok(n)
|
577
|
+
pub fn n_chunks(&self) -> usize {
|
578
|
+
self.df.borrow().n_chunks()
|
579
579
|
}
|
580
580
|
|
581
581
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -348,6 +348,7 @@ impl RbLazyFrame {
|
|
348
348
|
include_boundaries: bool,
|
349
349
|
closed: Wrap<ClosedWindow>,
|
350
350
|
by: RArray,
|
351
|
+
start_by: Wrap<StartBy>,
|
351
352
|
) -> RbResult<RbLazyGroupBy> {
|
352
353
|
let closed_window = closed.0;
|
353
354
|
let by = rb_exprs_to_exprs(by)?;
|
@@ -362,6 +363,7 @@ impl RbLazyFrame {
|
|
362
363
|
truncate,
|
363
364
|
include_boundaries,
|
364
365
|
closed_window,
|
366
|
+
start_by: start_by.0,
|
365
367
|
},
|
366
368
|
);
|
367
369
|
|
@@ -514,9 +516,13 @@ impl RbLazyFrame {
|
|
514
516
|
ldf.median().into()
|
515
517
|
}
|
516
518
|
|
517
|
-
pub fn quantile(
|
519
|
+
pub fn quantile(
|
520
|
+
&self,
|
521
|
+
quantile: &RbExpr,
|
522
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
523
|
+
) -> Self {
|
518
524
|
let ldf = self.ldf.clone();
|
519
|
-
ldf.quantile(quantile, interpolation.0).into()
|
525
|
+
ldf.quantile(quantile.inner.clone(), interpolation.0).into()
|
520
526
|
}
|
521
527
|
|
522
528
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -164,10 +164,14 @@ impl RbExpr {
|
|
164
164
|
self.clone().inner.list().into()
|
165
165
|
}
|
166
166
|
|
167
|
-
pub fn quantile(
|
167
|
+
pub fn quantile(
|
168
|
+
&self,
|
169
|
+
quantile: &RbExpr,
|
170
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
171
|
+
) -> Self {
|
168
172
|
self.clone()
|
169
173
|
.inner
|
170
|
-
.quantile(quantile, interpolation.0)
|
174
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
171
175
|
.into()
|
172
176
|
}
|
173
177
|
|
@@ -506,7 +510,13 @@ impl RbExpr {
|
|
506
510
|
self.inner.clone().shrink_dtype().into()
|
507
511
|
}
|
508
512
|
|
509
|
-
pub fn str_parse_date(
|
513
|
+
pub fn str_parse_date(
|
514
|
+
&self,
|
515
|
+
fmt: Option<String>,
|
516
|
+
strict: bool,
|
517
|
+
exact: bool,
|
518
|
+
cache: bool,
|
519
|
+
) -> Self {
|
510
520
|
self.inner
|
511
521
|
.clone()
|
512
522
|
.str()
|
@@ -515,11 +525,20 @@ impl RbExpr {
|
|
515
525
|
fmt,
|
516
526
|
strict,
|
517
527
|
exact,
|
528
|
+
cache,
|
529
|
+
tz_aware: false,
|
518
530
|
})
|
519
531
|
.into()
|
520
532
|
}
|
521
533
|
|
522
|
-
pub fn str_parse_datetime(
|
534
|
+
pub fn str_parse_datetime(
|
535
|
+
&self,
|
536
|
+
fmt: Option<String>,
|
537
|
+
strict: bool,
|
538
|
+
exact: bool,
|
539
|
+
cache: bool,
|
540
|
+
tz_aware: bool,
|
541
|
+
) -> Self {
|
523
542
|
let tu = match fmt {
|
524
543
|
Some(ref fmt) => {
|
525
544
|
if fmt.contains("%.9f")
|
@@ -544,11 +563,19 @@ impl RbExpr {
|
|
544
563
|
fmt,
|
545
564
|
strict,
|
546
565
|
exact,
|
566
|
+
cache,
|
567
|
+
tz_aware,
|
547
568
|
})
|
548
569
|
.into()
|
549
570
|
}
|
550
571
|
|
551
|
-
pub fn str_parse_time(
|
572
|
+
pub fn str_parse_time(
|
573
|
+
&self,
|
574
|
+
fmt: Option<String>,
|
575
|
+
strict: bool,
|
576
|
+
exact: bool,
|
577
|
+
cache: bool,
|
578
|
+
) -> Self {
|
552
579
|
self.inner
|
553
580
|
.clone()
|
554
581
|
.str()
|
@@ -557,6 +584,8 @@ impl RbExpr {
|
|
557
584
|
fmt,
|
558
585
|
strict,
|
559
586
|
exact,
|
587
|
+
cache,
|
588
|
+
tz_aware: false,
|
560
589
|
})
|
561
590
|
.into()
|
562
591
|
}
|
@@ -723,8 +752,12 @@ impl RbExpr {
|
|
723
752
|
self.inner.clone().str().extract(&pat, group_index).into()
|
724
753
|
}
|
725
754
|
|
726
|
-
pub fn str_extract_all(&self, pat:
|
727
|
-
self.inner
|
755
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
756
|
+
self.inner
|
757
|
+
.clone()
|
758
|
+
.str()
|
759
|
+
.extract_all(pat.inner.clone())
|
760
|
+
.into()
|
728
761
|
}
|
729
762
|
|
730
763
|
pub fn count_match(&self, pat: String) -> Self {
|
@@ -989,14 +1022,12 @@ impl RbExpr {
|
|
989
1022
|
.clone()
|
990
1023
|
.map_alias(move |name| {
|
991
1024
|
let out = lambda.call::<_, String>((name,));
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
// )),
|
999
|
-
// }
|
1025
|
+
match out {
|
1026
|
+
Ok(out) => Ok(out),
|
1027
|
+
Err(e) => Err(PolarsError::ComputeError(
|
1028
|
+
format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
|
1029
|
+
)),
|
1030
|
+
}
|
1000
1031
|
})
|
1001
1032
|
.into()
|
1002
1033
|
}
|
@@ -1005,8 +1036,8 @@ impl RbExpr {
|
|
1005
1036
|
self.inner.clone().exclude(columns).into()
|
1006
1037
|
}
|
1007
1038
|
|
1008
|
-
pub fn interpolate(&self) -> Self {
|
1009
|
-
self.inner.clone().interpolate().into()
|
1039
|
+
pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
|
1040
|
+
self.inner.clone().interpolate(method.0).into()
|
1010
1041
|
}
|
1011
1042
|
|
1012
1043
|
pub fn rolling_sum(
|
@@ -1297,6 +1328,7 @@ impl RbExpr {
|
|
1297
1328
|
&self,
|
1298
1329
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
1299
1330
|
_name_gen: Option<Value>,
|
1331
|
+
upper_bound: usize,
|
1300
1332
|
) -> RbResult<Self> {
|
1301
1333
|
// TODO fix
|
1302
1334
|
let name_gen = None;
|
@@ -1311,7 +1343,7 @@ impl RbExpr {
|
|
1311
1343
|
.inner
|
1312
1344
|
.clone()
|
1313
1345
|
.arr()
|
1314
|
-
.to_struct(width_strat.0, name_gen)
|
1346
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
1315
1347
|
.into())
|
1316
1348
|
}
|
1317
1349
|
|
@@ -1498,6 +1530,10 @@ pub fn cols(names: Vec<String>) -> RbExpr {
|
|
1498
1530
|
dsl::cols(names).into()
|
1499
1531
|
}
|
1500
1532
|
|
1533
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
1534
|
+
dsl::dtype_cols(dtypes).into()
|
1535
|
+
}
|
1536
|
+
|
1501
1537
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
1502
1538
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
1503
1539
|
|